模型结构：
使用的是单阶段 3D 检测模型，输出包括：
- 热图 (Heatmap)：表示关键点的置信度
- 偏移量场 (Displacements)：表示从热图峰值到实际关键点的偏移
- 边界框信息 (代码中未使用)
检测流程：
- 模型输出热图和偏移量场
- 通过detect_peak函数找到热图中的峰值点
- 结合偏移量场计算每个关键点的实际位置
- 通过draw_box函数在图像上绘制 3D 框，包括 8 个关键点和连接线
性能优化：
- 使用 GPU 加速推理 (aidlite.AccelerateType.TYPE_GPU)
- 设置 4 线程并行计算
- 优化图像预处理流程
摄像头适配：
- 支持自动检测 USB 摄像头
- 兼容 MIPI 摄像头 (如手机内置摄像头，需要给手机摄像头权限)
- 错误处理机制，确保摄像头打开失败时能重试

这个应用可以实时检测场景中的椅子，并以 3D 方式展示其位置和姿态，适用于智能家居、机器人导航等场景。

示例代码

import cv2
import time 
from time import sleep
import subprocess
import sys
import numpy as np
import aidlite
import os
from scipy.ndimage.filters import maximum_filter

def get_cap_id():
    """
    自动检测USB摄像头设备ID
    返回可用摄像头的最小ID号，若未找到则返回None
    """
    try:
        # 通过系统命令查找所有USB视频设备
        cmd = "ls -l /sys/class/video4linux | awk -F ' -> ' '/usb/{sub(/.*video/, \"\", $2); print $2}'"
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        output = result.stdout.strip().split()

        # 转换所有捕获的编号为整数，找出最小值
        video_numbers = list(map(int, output))
        if video_numbers:
            return min(video_numbers)
        else:
            return None
    except Exception as e:
        print(f"摄像头检测错误: {e}")
        return None

def detect_peak(image, filter_size=5, order=0.5):
    """
    检测热图中的峰值点(关键点)
    参数:
        image: 输入热图
        filter_size: 最大值滤波核大小
        order: 阈值比例(相对于最大值)
    返回:
        峰值点坐标
    """
    # 使用最大值滤波找出局部最大值
    local_max = maximum_filter(image, footprint=np.ones((filter_size, filter_size)), mode='constant')
    detected_peaks = np.ma.array(image, mask=~(image == local_max))
    
    # 过滤低于阈值的点
    temp = np.ma.array(detected_peaks, mask=~(detected_peaks >= detected_peaks.max() * order))
    peaks_index = np.where((temp.mask != True))
    return peaks_index

def decode(hm, displacements, threshold=0.8):
    """
    解码模型输出，提取3D关键点
    参数:
        hm: 热图(关键点置信度)
        displacements: 偏移量场
        threshold: 置信度阈值
    返回:
        检测到的物体列表，每个物体包含8个关键点坐标
    """
    # 重塑输出张量形状
    hm = hm.reshape(40, 30)
    displacements = displacements.reshape(1, 40, 30, 16)
    peaks = detect_peak(hm)
    peakX = peaks[1]
    peakY = peaks[0]

    scaleX = hm.shape[1]
    scaleY = hm.shape[0]
    objs = []
    
    # 遍历所有检测到的峰值点
    for x, y in zip(peakX, peakY):
        conf = hm[y, x]
        if conf < threshold:
            continue
            
        # 提取每个关键点的偏移量，计算最终坐标
        points = []
        for i in range(8):
            dx = displacements[0, y, x, i*2]
            dy = displacements[0, y, x, i*2+1]
            points.append((x/scaleX + dx, y/scaleY + dy))
        objs.append(points)
    return objs
    
def draw_box(image, pts):
    """
    在图像上绘制3D框和关键点
    参数:
        image: 输入图像
        pts: 8个关键点的归一化坐标
    """
    scaleX = image.shape[1]
    scaleY = image.shape[0]

    # 定义3D框的连接线(边)
    lines = [(0,1), (1,3), (0,2), (3,2), (1,5), (0,4), (2,6), (3,7), (5,7), (6,7), (6,4), (4,5)]
    
    # 绘制连接线
    for line in lines:
        pt0 = pts[line[0]]
        pt1 = pts[line[1]]
        pt0 = (int(pt0[0]*scaleX), int(pt0[1]*scaleY))
        pt1 = (int(pt1[0]*scaleX), int(pt1[1]*scaleY))
        cv2.line(image, pt0, pt1, (255, 245, 0), 2)
    
    # 绘制关键点
    for i in range(8):
        pt = pts[i]
        pt = (int(pt[0]*scaleX), int(pt[1]*scaleY))
        cv2.circle(image, pt, 3, (255, 245, 0), -1)
        cv2.putText(image, str(i), pt, cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 2)


# --------------- 模型初始化部分 ---------------
# 模型路径和输入输出形状定义
model_path = 'models/object_detection_3d_chair_1stage.tflite'
inShape = [[1, 640, 480, 3]]  # 输入张量形状: [批次, 高度, 宽度, 通道]
outShape = [[1, 40, 30, 1], [1, 40, 30, 16], [1, 160, 120, 4]]  # 输出张量形状

# 创建AidLite模型实例并设置属性
model = aidlite.Model.create_instance(model_path)
if model is None:
    print("模型创建失败!")

# 设置模型输入输出数据类型
model.set_model_properties(inShape, aidlite.DataType.TYPE_FLOAT32, outShape, aidlite.DataType.TYPE_FLOAT32)

# 配置推理参数
config = aidlite.Config.create_instance()
config.implement_type = aidlite.ImplementType.TYPE_FAST  # 快速推理模式
config.framework_type = aidlite.FrameworkType.TYPE_TFLITE  # TFLite框架
config.accelerate_type = aidlite.AccelerateType.TYPE_GPU  # GPU加速
config.number_of_threads = 4  # 线程数

# 创建并初始化推理解释器
fast_interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
if fast_interpreter is None:
    print("解释器创建失败!")
    
# 完成解释器初始化和模型加载
result = fast_interpreter.init()
if result != 0:
    print("解释器初始化失败!")
    
result = fast_interpreter.load_model()
if result != 0:
    print("模型加载失败!")
print("模型加载成功!")


# --------------- 摄像头设置部分 ---------------
aidlux_type = "basic"  # AidLux设备类型
# 0-后置，1-前置
camId = 0
opened = False

# 尝试打开摄像头，失败时会重试
while not opened:
    if aidlux_type == "basic":
        cap = cv2.VideoCapture(camId, device='mipi')  # 打开MIPI摄像头
    else:
        # 尝试检测USB摄像头
        capId = get_cap_id()
        print("USB摄像头ID: ", capId)
        if capId is None:
            print("未找到USB摄像头，使用默认MIPI摄像头")
            cap = cv2.VideoCapture(1, device='mipi')  # 打开前置MIPI摄像头
        else:
            camId = capId
            cap = cv2.VideoCapture(camId)
            cap.set(6, cv2.VideoWriter.fourcc('M', 'J', 'P', 'G'))  # 设置MJPG格式
    
    if cap.isOpened():
        opened = True
    else:
        print("摄像头打开失败，重试中...")
        cap.release()
        time.sleep(0.5)


# --------------- 主循环部分 ---------------
while True:
    ret, img_ori = cap.read()  # 读取一帧图像
    if not ret:
        continue
    if img_ori is None:
        continue
    
    # 图像预处理
    img = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB)  # BGR转RGB
    img = cv2.resize(img, (480, 640)).astype(np.float32)  # 调整大小并转换为浮点型
    img = img / 128.0 - 1.0  # 归一化处理 [-1, 1]
    img = img[None]  # 添加批次维度
    
    # 设置输入张量
    result = fast_interpreter.set_input_tensor(0, img.data)
    if result != 0:
        print("设置输入张量失败")
    
    # 记录推理开始时间
    start_time = time.time()

    # 执行推理
    result = fast_interpreter.invoke()
    if result != 0:
        print("推理执行失败")
    
    # 计算推理时间
    gpuelapsed_ms = (time.time() - start_time) * 1000
    print(f'推理耗时: {gpuelapsed_ms:.2f}ms')
    
    # 获取模型输出
    displacements = fast_interpreter.get_output_tensor(1)  # 偏移量场
    hm = fast_interpreter.get_output_tensor(0)  # 热图(关键点置信度)

    # 解码输出，提取3D关键点
    objs = decode(hm, displacements, threshold=0.7)
    
    # 在原图上绘制检测结果
    for obj in objs:
        draw_box(img_ori, obj)
    
    # 显示结果图像
    cv2.imshow("", img_ori)
    
    # 按ESC键退出
    key = cv2.waitKey(1)
    if key == 27:  # ESC键
        break

# 释放资源
cap.release()
cv2.destroyAllWindows()