[导航赛题]基于rag的记忆寻物初步探索[原创]

温清在此队员 2026-01-23 12:47:36

调研情况

目前使用的大模型流程主要处理的是“文本/图像 -> 文本”的映射。为了实现导航的记忆寻物，我们需要改变数据结构，引入空间元数据（Spatial Metadata），不能只存“图片描述”，必须绑定机器人当时的物理位置。使用bge-m3 转化向量，让业务逻辑从“字面匹配”到“语义理解”。再结合FAISS这种极速索引库，负责在海量记忆中毫秒级找到目标。
我们惊喜的发现bge-m3 推理适配昇腾已经开源，开源链接如下：
https://bbs.huaweicloud.com/blogs/440995

开源代码

于是搭建了一个demo。欢迎大家批评指正。

import requests
import faiss
import numpy as np
import torch
import torch_npu
from transformers import AutoTokenizer, AutoModel
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import logging
import base64
import io
from PIL import Image

# 初始化日志记录
logging.basicConfig(level=logging.DEBUG)

# 初始化 FastAPI 应用
app = FastAPI()

# 加载 BGE-M3 模型
model_name = "/models/bge-m3"  # 确保模型路径正确
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# 设置设备为 NPU
device = "npu:0" if torch.npu.is_available() else "cpu"  # 确保设备为 NPU
model.to(device)

# FAISS 索引和记忆
dimension = 1024  # 假设 BGE-M3 输出的向量维度为 1024
index = faiss.IndexFlatL2(dimension)  # 使用 L2 距离进行检索
texts = []  # 用于存储文本记忆

# 编码文本为向量
def encode_text(texts):
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
    inputs = inputs.to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state  # 获取最后一层隐藏状态
        pooled_embeddings = embeddings.mean(dim=1)  # 对每个 token 进行池化，得到句子的向量表示
    logging.debug(f"Shape of pooled embeddings: {pooled_embeddings.shape}")  # 检查维度
    return pooled_embeddings.cpu().numpy()

# 存储文本记忆
def store_conversation_memory(input_text, generated_answer):
    vectors = encode_text([input_text, generated_answer])
    index.add(vectors)  # 将向量添加到 FAISS 索引
    texts.append(f"User: {input_text}")
    texts.append(f"Bot: {generated_answer}")

    # 每次存储时，将索引保存到磁盘
    faiss.write_index(index, 'faiss_index.index')
    logging.debug("Conversation memory stored, index saved to disk.")

# 加载存储的 FAISS 索引和文本数据
def load_faiss_index():
    try:
        index = faiss.read_index('faiss_index.index')  # 从磁盘加载 FAISS 索引
        logging.debug("FAISS index loaded from disk.")
        
        # 恢复 texts 数据
        try:
            with open('texts.txt', 'r') as f:
                loaded_texts = f.readlines()
                texts.extend([line.strip() for line in loaded_texts])
            logging.debug(f"Loaded {len(loaded_texts)} texts from disk.")
        except Exception as e:
            logging.warning("No previous texts found, starting with an empty texts list.")
        
        # 检查索引是否为空
        if index.ntotal == 0:
            logging.warning("FAISS index is empty. No data loaded.")
        return index
    except Exception as e:
        logging.warning("No previous FAISS index found, creating a new one.")
        return faiss.IndexFlatL2(dimension)  # 如果没有找到，创建新的索引

# 在启动时加载索引
index = load_faiss_index()

# 初始文本数据，用于首次启动时向 FAISS 添加数据
initial_texts = [
    "床在卧室里",
    "马桶在卫生间里",
    "茶几在客厅"
]

# 如果索引为空，向其中添加一些初始数据，并初始化 texts
if index.ntotal == 0:
    logging.info("Index is empty. Adding initial data to FAISS index.")
    store_conversation_memory(initial_texts[0], "初始化记忆")
    store_conversation_memory(initial_texts[1], "初始化记忆")
    store_conversation_memory(initial_texts[2], "初始化记忆")
    logging.info(f"Initial texts added: {initial_texts}")

    # 保存 texts 数据
    with open('texts.txt', 'w') as f:
        for text in initial_texts:
            f.write(f"{text}\n")
    logging.info("Initial texts saved to disk.")

# 检索记忆
def retrieve_memory(query, top_k=5):
    query_vector = encode_text([query])[0]
    
    # 使用 FAISS 进行检索，获取 top_k 个最相似的向量
    distances, indices = index.search(np.array([query_vector]), top_k)
    logging.debug(f"Query: {query}, Retrieved indices: {indices}, Distances: {distances}")

    # 检查 FAISS 检索结果是否为空
    if len(indices) == 0 or len(indices[0]) == 0:
        logging.debug("No relevant memories found.")
        return []  # 如果没有检索到相关记忆，返回空列表

    matched_ids = indices[0]
    memories = [texts[i] for i in matched_ids]  # 获取检索到的记忆文本
    logging.debug(f"Memories retrieved: {memories}")
    return memories

# 生成回答
def generate_answer(input_text, memory_context):
    url = "http://127.0.0.1:1025/v1/chat/completions"  # Qwen2.5-VL API 服务的地址
    headers = {"Content-Type": "application/json"}

    messages = [
        {"role": "system", "content": "你是一个帮助用户的智能机器人"},
        {"role": "system", "content": memory_context},  # 将从 BGE-M3 检索到的记忆传递
        {"role": "user", "content": input_text}  # 用户输入
    ]

    data = {
        "model": "qwenvl2-5",  # 这里的模型名称请根据实际配置调整
        "messages": messages,
        "max_tokens": 1500,
        "temperature": 0.5
    }

    response = requests.post(url, json=data, headers=headers)
    return response.json()["choices"][0]["message"]["content"]

# 图像到文本的转换
def process_image(image_base64):
    url = "http://127.0.0.1:1025/v1/chat/completions"  # 假设 Qwen2.5-VL 可以处理图像并生成描述
    headers = {"Content-Type": "application/json"}

    data = {
        "model": "qwenvl2-5",  # 根据实际使用的模型调整
        "messages": [
            {"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}]}
        ],
        "max_tokens": 1500,
        "temperature": 0.5
    }

    response = requests.post(url, json=data, headers=headers)
    result = response.json()
    
    if "choices" in result and len(result["choices"]) > 0:
        return result["choices"][0]["message"]["content"]
    return "未能生成图像描述"

# Pydantic 模型：用于解析请求数据
class ChatRequest(BaseModel):
    query: str
    image_base64: str = None  # 可选的图像base64数据

# API 端点：聊天完成，生成回答
@app.post("/v1/chat/completions")
async def chat_completions(request: ChatRequest):
    try:
        logging.debug(f"Received query: {request.query}")

        # 如果图像存在，先处理图像，获取图像描述
        image_description = ""
        if request.image_base64:
            image_description = process_image(request.image_base64)

        # 步骤 1: 使用 BGE-M3 进行记忆检索
        retrieved_memories = retrieve_memory(request.query, top_k=5)

        if not retrieved_memories:
            retrieved_memories = ["没有相关记忆"]  # 没有检索到相关记忆时的默认值

        # 步骤 2: 使用 Qwen2.5-VL 生成回答
        memory_context = "\n".join([f"- {memory}" for memory in retrieved_memories])
        if image_description:
            memory_context += f"\n- {image_description}"  # 添加图像描述到记忆上下文
        generated_answer = generate_answer(request.query, memory_context)

        # 步骤 3: 存储新的对话记忆
        store_conversation_memory(request.query, generated_answer)

        return {"response": generated_answer, "memories": retrieved_memories, "image_description": image_description}

    except Exception as e:
        logging.error(f"Error occurred: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

# 启动 FastAPI 服务
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)