凌晨两点,你的 RAG 问答系统突然收到大量用户投诉——AI 返回的答案看似流畅专业,却与检索到的文档毫无关联,甚至凭空编造了不存在的法规条款和实验数据。作为技术负责人,你面对"200 状态码正常返回,响应内容却全是幻觉"的诡异场景,不知从何排查。

这是笔者去年在某金融科技公司遇到的真实案例。本文将完整复盘从错误定位到生产级防护的全流程,包含可复制的代码模板和 HolySheep API 的接入实践。

一、为什么 RAG 幻觉问题如此棘手

RAG(检索增强生成)的核心假设是:LLM 应该"看着"检索结果回答。但实际生产环境中,幻觉问题往往来自三个层面:

传统方案依赖人工抽查和规则过滤,但在日均 10 万次请求的规模下,这套机制形同虚设。

二、幻觉检测的核心指标与实现

生产级幻觉检测需要量化三个维度:上下文相关性答案一致性引用覆盖率。以下是笔者团队沉淀的检测框架:

2.1 环境准备与依赖安装

# Python 3.10+ 环境
pip install langchain-community tiktoken scipy scikit-learn

核心检测模块

import os import json import httpx from typing import List, Dict, Tuple from dataclasses import dataclass @dataclass class HallucinationResult: relevance_score: float # 上下文相关性 0-1 consistency_score: float # 答案一致性 0-1 citation_coverage: float # 引用覆盖率 0-1 is_hallucination: bool # 是否判定为幻觉 risk_level: str # high/medium/low class RAGHallucinationDetector: """生产级幻觉检测器""" def __init__(self, api_key: str, base_url: str = "https://api.holysheep.ai/v1"): self.client = httpx.Client( base_url=base_url, headers={"Authorization": f"Bearer {api_key}"}, timeout=30.0 ) self.embedding_model = "text-embedding-3-small" def cosine_similarity(self, vec_a: List[float], vec_b: List[float]) -> float: """计算余弦相似度""" dot_product = sum(a * b for a, b in zip(vec_a, vec_b)) norm_a = sum(a ** 2 for a in vec_a) ** 0.5 norm_b = sum(b ** 2 for b in vec_b) ** 0.5 return dot_product / (norm_a * norm_b + 1e-8) def get_embedding(self, text: str) -> List[float]: """调用嵌入接口""" response = self.client.post( "/embeddings", json={"model": self.embedding_model, "input": text} ) response.raise_for_status() return response.json()["data"][0]["embedding"] def check_relevance(self, question: str, retrieved_chunks: List[str]) -> float: """检测检索结果与问题的相关性""" question_emb = self.get_embedding(question) chunk_embs = [self.get_embedding(chunk) for chunk in retrieved_chunks] similarities = [self.cosine_similarity(question_emb, emb) for emb in chunk_embs] return max(similarities) if similarities else 0.0 def check_consistency(self, context: str, answer: str) -> float: """检测答案与上下文的语义一致性""" # 使用 LLM 评判一致性 prompt = f"""你是一个严谨的事实核查员。请判断以下答案是否与给定上下文一致。 只回答"一致"、"部分一致"或"不一致"。 上下文: {context} 答案: {answer} 判定结果:""" response = self.client.post( "/chat/completions", json={ "model": "gpt-4.1", "messages": [{"role": "user", "content": prompt}], "temperature": 0.1, "max_tokens": 20 } ) response.raise_for_status() result = response.json()["choices"][0]["message"]["content"] if "一致" in result and "部分" not in result: return 1.0 elif "部分一致" in result: return 0.6 return 0.2 def extract_claims(self, answer: str) -> List[str]: """提取答案中的关键声明""" prompt = f"""请从以下答案中提取所有可验证的关键声明,每条声明用一行输出: {answer} 关键声明列表:""" response = self.client.post( "/chat/completions", json={ "model": "gpt-4.1", "messages": [{"role": "user", "content": prompt}], "temperature": 0.0 } ) response.raise_for_status() claims_text = response.json()["choices"][0]["message"]["content"] return [line.strip() for line in claims_text.split("\n") if line.strip()] def verify_citations(self, claims: List[str], context: str) -> float: """验证声明的引用覆盖率""" if not claims: return 1.0 verified = 0 for claim in claims: prompt = f"""判断以下声明是否能从给定上下文中得到支持或否定。 只回答"支持"、"否定"或"无关"。 上下文: {context} 声明:{claim} 判定:""" response = self.client.post( "/chat/completions", json={ "model": "gpt-4.1", "messages": [{"role": "user", "content": prompt}], "temperature": 0.0, "max_tokens": 10 } ) result = response.json()["choices"][0]["message"]["content"] if "支持" in result or "否定" in result: verified += 1 return verified / len(claims) def detect(self, question: str, retrieved_chunks: List[str], answer: str, threshold: float = 0.6) -> HallucinationResult: """综合检测幻觉风险""" relevance = self.check_relevance(question, retrieved_chunks) context = " ".join(retrieved_chunks) consistency = self.check_consistency(context, answer) claims = self.extract_claims(answer) citation = self.verify_citations(claims, context) # 综合评分:相关性权重 0.3,一致性权重 0.4,引用覆盖率 0.3