作为在生产环境跑了3年AI应用的老兵,我深知可观测性对于AI系统的意义——它决定了你的系统在凌晨3点报警时,你能否在5分钟内定位问题,而不是对着日志发呆到天亮。今天我结合 """
AI应用可观测性监控方案 - 核心基础设施
测试环境: Python 3.11+ / FastAPI
"""
import time
import json
import logging
from typing import Optional, Dict, Any
from dataclasses import dataclass, asdict
from datetime import datetime
from collections import defaultdict
我选择 立即注册 的核心原因在于它提供了统一的API代理层,让我可以同时监控多个模型提供商的调用情况。以下是完整的接入代码:可观测性基础组件
@dataclass
class AIMetric:
"""AI调用基础指标"""
request_id: str
model: str
provider: str # holysheep / openai / anthropic
prompt_tokens: int
completion_tokens: int
total_tokens: int
latency_ms: float
status: str # success / error / timeout
error_type: Optional[str] = None
timestamp: datetime = None
def __post_init__(self):
if self.timestamp is None:
self.timestamp = datetime.utcnow()
class AIObservabilityCollector:
"""可观测性数据采集器"""
def __init__(self, base_url: str = "https://api.holysheep.ai/v1"):
self.base_url = base_url
self.metrics_buffer = []
self.trace_buffer = []
self.logger = logging.getLogger("ai_observability")
# 实时指标统计
self.realtime_stats = defaultdict(lambda: {
"total_requests": 0,
"success_count": 0,
"error_count": 0,
"total_latency": 0.0,
"total_tokens": 0,
"error_types": defaultdict(int)
})
def record_request(self, metric: AIMetric) -> None:
"""记录单个请求的完整指标"""
# 1. 写入Metrics缓冲区
self.metrics_buffer.append(asdict(metric))
# 2. 更新实时统计
stats = self.realtime_stats[metric.model]
stats["total_requests"] += 1
stats["total_latency"] += metric.latency_ms
stats["total_tokens"] += metric.total_tokens
if metric.status == "success":
stats["success_count"] += 1
else:
stats["error_count"] += 1
if metric.error_type:
stats["error_types"][metric.error_type] += 1
# 3. 触发告警检查
self._check_alert_rules(metric)
def _check_alert_rules(self, metric: AIMetric) -> None:
"""检查是否触发告警规则"""
# P99延迟告警阈值: 3000ms
if metric.latency_ms > 3000:
self.logger.warning(
f"🚨 [ALERT] 高延迟告警 | "
f"模型: {metric.model} | "
f"延迟: {metric.latency_ms}ms | "
f"Request ID: {metric.request_id}"
)
# 错误率告警阈值: 5%
stats = self.realtime_stats[metric.model]
if stats["total_requests"] >= 20:
error_rate = stats["error_count"] / stats["total_requests"]
if error_rate > 0.05:
self.logger.warning(
f"🚨 [ALERT] 高错误率告警 | "
f"模型: {metric.model} | "
f"错误率: {error_rate*100:.1f}%"
)
def get_realtime_stats(self, model: Optional[str] = None) -> Dict[str, Any]:
"""获取实时统计数据"""
if model:
stats = self.realtime_stats[model]
return {
"model": model,
"total_requests": stats["total_requests"],
"success_rate": stats["success_count"] / max(stats["total_requests"], 1),
"avg_latency_ms": stats["total_latency"] / max(stats["total_requests"], 1),
"total_tokens": stats["total_tokens"],
"error_breakdown": dict(stats["error_types"])
}
return {
model: self.get_realtime_stats(model)
for model in self.realtime_stats.keys()
}
使用示例
collector = AIObservabilityCollector()
模拟记录一个成功的请求
success_metric = AIMetric(
request_id="req_abc123",
model="gpt-4.1",
provider="holysheep",
prompt_tokens=150,
completion_tokens=320,
total_tokens=470,
latency_ms=850.5,
status="success"
)
collector.record_request(success_metric)
模拟记录一个失败的请求
error_metric = AIMetric(
request_id="req_def456",
model="claude-sonnet-4.5",
provider="holysheep",
prompt_tokens=200,
completion_tokens=0,
total_tokens=200,
latency_ms=5000.0,
status="error",
error_type="timeout"
)
collector.record_request(error_metric)
查看实时统计
stats = collector.get_realtime_stats("gpt-4.1")
print(f"📊 实时统计: {json.dumps(stats, indent=2, default=str)}")
统一代理层:HolySheep API 的实际接入代码
"""
基于 HolySheep API 的统一AI调用 + 监控方案
base_url: https://api.holysheep.ai/v1
"""
import httpx
import tiktoken
from typing import List, Dict, Any, Optional
from openai import OpenAI
import json
class HolySheepAIClient:
"""HolySheep统一AI客户端(支持多模型自动切换)"""
# 2026年