Hướng dẫn toàn diện về nhật ký ứng dụng AI: Theo dõi yêu cầu và phân tích hiệu suất

Chào các bạn! Tôi là Minh, một lập trình viên đã làm việc với AI API được hơn 3 năm. Hôm nay, tôi muốn chia sẻ một vấn đề mà ngày đầu tiên tôi gặp phải khi bắt đầu với AI: Làm sao để biết mã nguồn của mình đang gửi yêu cầu gì, mất bao lâu, và tốn bao nhiêu tiền?

Nếu bạn hoàn toàn mới với AI API, đừng lo lắng. Bài viết này sẽ đi từng bước một, không dùng thuật ngữ phức tạp. Sau khi đọc xong, bạn sẽ có một hệ thống nhật ký (logging) hoàn chỉnh để theo dõi mọi yêu cầu AI của mình.

Tại sao nhật ký lại quan trọng?

Khi tôi mới bắt đầu, tôi từng gặp những tình huống khó hiểu như:

Gọi API rồi nhưng không biết phản hồi ở đâu
Tiền trừ nhanh hơn dự kiến mà không rõ lý do
Phản hồi chậm nhưng không biết chỗ nào gây ra
Muốn tối ưu chi phí nhưng không có dữ liệu để phân tích

Nhật ký (logging) giống như "hộp đen" của ứng dụng — nó ghi lại mọi thứ xảy ra, giúp bạn debug dễ dàng và tối ưu hiệu suất. Với HolySheep AI, hệ thống hỗ trợ độ trễ trung bình dưới 50ms, nhưng nếu không có logging, bạn sẽ không biết được thời gian thực của từng yêu cầu.

Thiết lập môi trường cơ bản

Trước tiên, bạn cần cài đặt môi trường. Tôi sẽ hướng dẫn bằng Python vì đây là ngôn ngữ phổ biến nhất để làm việc với AI.

Bước 1: Cài đặt thư viện

pip install requests python-json-logger

[Gợi ý ảnh: Chụp màn hình cửa sổ terminal sau khi cài đặt thành công, hiển thị các package đã được cài]

Bước 2: Tạo file cấu hình

# config.py
import os

API Configuration - Sử dụng HolySheep AI
BASE_URL = "https://api.holysheep.ai/v1"
API_KEY = "YOUR_HOLYSHEEP_API_KEY"  # Thay bằng API key của bạn

Cấu hình logging
LOG_LEVEL = "INFO"
LOG_FILE = "ai_requests.log"

Thư mục lưu logs
os.makedirs("logs", exist_ok=True)

[Gợi ý ảnh: Cấu trúc thư mục project với file config.py và thư mục logs]

Xây dựng hệ thống logging hoàn chỉnh

Đây là phần quan trọng nhất. Tôi sẽ tạo một module logging với đầy đủ thông tin theo dõi.

# ai_logger.py
import json
import time
from datetime import datetime
from typing import Optional, Dict, Any
import requests

class AILogger:
    """Module logging cho AI requests với đầy đủ thông tin theo dõi"""
    
    def __init__(self, log_file: str = "logs/ai_requests.jsonl"):
        self.log_file = log_file
        self.session = requests.Session()
        
    def _ghi_log(self, log_entry: Dict[str, Any]):
        """Ghi một log entry vào file JSONL"""
        with open(self.log_file, "a", encoding="utf-8") as f:
            f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
    
    def goi_yeu_cau_ai(
        self,
        prompt: str,
        model: str = "gpt-4.1",
        temperature: float = 0.7,
        max_tokens: int = 1000
    ) -> Dict[str, Any]:
        """
        Gửi yêu cầu đến HolySheep AI với đầy đủ logging
        
        Args:
            prompt: Nội dung câu hỏi
            model: Model AI sử dụng
            temperature: Độ sáng tạo (0-1)
            max_tokens: Số token tối đa cho phản hồi
        
        Returns:
            Dict chứa phản hồi và metadata
        """
        thoi_gian_bat_dau = time.time()
        
        # Thông tin request
        request_log = {
            "timestamp": datetime.now().isoformat(),
            "request_id": f"req_{int(thoi_gian_bat_dau * 1000)}",
            "model": model,
            "temperature": temperature,
            "max_tokens": max_tokens,
            "prompt_length": len(prompt),
            "prompt_preview": prompt[:100] + "..." if len(prompt) > 100 else prompt,
            "status": "pending"
        }
        
        try:
            # Gửi request đến HolySheep AI
            headers = {
                "Authorization": f"Bearer YOUR_HOLYSHEEP_API_KEY",
                "Content-Type": "application/json"
            }
            
            payload = {
                "model": model,
                "messages": [{"role": "user", "content": prompt}],
                "temperature": temperature,
                "max_tokens": max_tokens
            }
            
            # Bắt đầu đo thời gian network
            thoi_gian_network_bat_dau = time.time()
            response = self.session.post(
                "https://api.holysheep.ai/v1/chat/completions",
                headers=headers,
                json=payload,
                timeout=30
            )
            thoi_gian_network = (time.time() - thoi_gian_network_bat_dau) * 1000
            
            # Xử lý phản hồi
            response_data = response.json()
            
            if response.status_code == 200:
                content = response_data["choices"][0]["message"]["content"]
                usage = response_data.get("usage", {})
                
                # Tính toán chi phí dựa trên model
                chi_phi = self._tinh_chi_phi(model, usage)
                
                # Hoàn thiện log
                request_log.update({
                    "status": "success",
                    "response": content,
                    "response_length": len(content),
                    "usage": usage,
                    "latency_ms": round((time.time() - thoi_gian_bat_dau) * 1000, 2),
                    "network_latency_ms": round(thoi_gian_network, 2),
                    "cost_usd": chi_phi
                })
            else:
                request_log.update({
                    "status": "error",
                    "error": response_data,
                    "latency_ms": round((time.time() - thoi_gian_bat_dau) * 1000, 2)
                })
                
        except Exception as e:
            request_log.update({
                "status": "exception",
                "error": str(e),
                "latency_ms": round((time.time() - thoi_gian_bat_dau) * 1000, 2)
            })
        
        # Ghi log
        self._ghi_log(request_log)
        
        # In ra console
        self._in_console(request_log)
        
        return request_log
    
    def _tinh_chi_phi(self, model: str, usage: Dict) -> float:
        """
        Tính chi phí theo token sử dụng
        Đơn giá theo bảng giá HolySheep AI 2026/MTok
        """
        gia = {
            "gpt-4.1": {"input": 8.0, "output": 8.0},        # $8/MTok
            "claude-sonnet-4.5": {"input": 15.0, "output": 15.0},  # $15/MTok
            "gemini-2.5-flash": {"input": 2.50, "output": 2.50},  # $2.50/MTok
            "deepseek-v3.2": {"input": 0.42, "output": 0.42}  # $0.42/MTok
        }
        
        if model not in gia:
            return 0.0
            
        prompt_tokens = usage.get("prompt_tokens", 0)
        completion_tokens = usage.get("completion_tokens", 0)
        
        # Đổi sang triệu token (M)
        prompt_m = prompt_tokens / 1_000_000
        completion_m = completion_tokens / 1_000_000
        
        chi_phi = (prompt_m * gia[model]["input"]) + (completion_m * gia[model]["output"])
        return round(chi_phi, 6)  # Làm tròn 6 chữ số thập phân
    
    def _in_console(self, log: Dict):
        """In thông tin log ra console với định dạng đẹp"""
        if log["status"] == "success":
            print(f"✅ [{log['timestamp']}] {log['model']} | "
                  f"Latency: {log['latency_ms']}ms | "
                  f"Tokens: {log['usage'].get('total_tokens', 0)} | "
                  f"Cost: ${log['cost_usd']}")
        else:
            print(f"❌ [{log['timestamp']}] Lỗi: {log.get('error', 'Unknown')}")


Khởi tạo logger toàn cục
ai_logger = AILogger()

[Gợi ý ảnh: Sơ đồ luồng hoạt động của module AILogger - từ request đến khi ghi log]

Sử dụng hệ thống logging

Bây giờ, hãy xem cách sử dụng module này trong thực tế:

# main.py
from ai_logger import ai_logger

Ví dụ 1: Hỏi thông tin cơ bản
print("=" * 50)
print("Ví dụ 1: Hỏi về lập trình Python")
print("=" * 50)

ket_qua_1 = ai_logger.goi_yeu_cau_ai(
    prompt="Giải thích đơn giản về biến trong Python cho người mới",
    model="deepseek-v3.2",  # Model rẻ nhất, phù hợp cho học tập
    temperature=0.7,
    max_tokens=500
)

Ví dụ 2: Yêu cầu phức tạp hơn
print("\n" + "=" * 50)
print("Ví dụ 2: Phân tích code")
print("=" * 50)

ket_qua_2 = ai_logger.goi_yeu_cau_ai(
    prompt="""Phân tích đoạn code sau và chỉ ra lỗi:
    def tinh_tong(a, b):
        return a + b
    print(tinh_tong("5", 3))""",
    model="gpt-4.1",  # Model mạnh cho phân tích phức tạp
    temperature=0.3,
    max_tokens=800
)

Xem chi tiết phản hồi
print("\n" + "=" * 50)
print("Chi tiết kết quả:")
print("=" * 50)
print(f"Response: {ket_qua_2.get('response', 'N/A')[:200]}...")
print(f"Tổng chi phí cả 2 requests: ${ket_qua_1['cost_usd'] + ket_qua_2['cost_usd']:.6f}")

[Gợi ý ảnh: Kết quả chạy chương trình với các log hiển thị trên terminal]

Phân tích logs để tối ưu

Sau khi có logs, bạn cần công cụ để phân tích. Tôi sẽ tạo một script phân tích đơn giản:

# phan_tich_logs.py
import json
from collections import defaultdict
from datetime import datetime

def doc_va_phan_tich_log(duong_dan_file: str = "logs/ai_requests.jsonl"):
    """Đọc và phân tích file log AI"""
    
    tong_chi_phi = 0
    tong_requests = 0
    requests_thanh_cong = 0
    requests_loi = 0
    chi_phi_theo_model = defaultdict(float)
    do_tre_trung_binh = 0
    danh_sach_tre = []
    
    try:
        with open(duong_dan_file, "r", encoding="utf-8") as f:
            for dong in f:
                try:
                    log = json.loads(dong.strip())
                    tong_requests += 1
                    
                    if log["status"] == "success":
                        requests_thanh_cong += 1
                        tong_chi_phi += log.get("cost_usd", 0)
                        chi_phi_theo_model[log["model"]] += log.get("cost_usd", 0)
                        do_tre_trung_binh += log.get("latency_ms", 0)
                        danh_sach_tre.append(log.get("latency_ms", 0))
                    else:
                        requests_loi += 1
                        
                except json.JSONDecodeError:
                    continue
                    
    except FileNotFoundError:
        print("❌ Chưa có file log. Hãy chạy main.py trước!")
        return
    
    # Tính toán thống kê
    do_tre_trung_binh = do_tre_trung_binh / requests_thanh_cong if requests_thanh_cong > 0 else 0
    ti_le_thanh_cong = (requests_thanh_cong / tong_requests * 100) if tong_requests > 0 else 0
    
    # In báo cáo
    print("\n" + "=" * 60)
    print("📊 BÁO CÁO PHÂN TÍCH AI REQUESTS")
    print("=" * 60)
    print(f"📅 Thời gian phân tích: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"📁 File log: {duong_dan_file}")
    print()
    print(f"📈 Tổng quan:")
    print(f"   • Tổng số requests: {tong_requests}")
    print(f"   • Thành công: {requests_thanh_cong} ({ti_le_thanh_cong:.1f}%)")
    print(f"   • Lỗi: {requests_loi}")
    print()
    print(f"💰 Chi phí:")
    print(f"   • Tổng chi phí: ${tong_chi_phi:.6f}")
    print(f"   • Chi phí trung bình/request: ${tong_chi_phi/tong_requests:.6f}" if tong_requests > 0 else "   • N/A")
    print()
    print(f"⚡ Hiệu suất (HolySheep AI <50ms):")
    print(f"   • Độ trễ trung bình: {do_tre_trung_binh:.2f}ms")
    print(f"   • Độ trễ thấp nhất: {min(danh_sach_tre):.2f}ms" if danh_sach_tre else "   • N/A")
    print(f"   • Độ trễ cao nhất: {max(danh_sach_tre):.2f}ms" if danh_sach_tre else "   • N/A")
    print()
    print(f"📊 Chi phí theo model:")
    for model, chi_phi in sorted(chi_phi_theo_model.items(), key=lambda x: -x[1]):
        print(f"   • {model}: ${chi_phi:.6f}")
    print("=" * 60)

Chạy phân tích
if __name__ == "__main__":
    doc_va_phan_tich_log()

[Gợi ý ảnh: Dashboard phân tích với biểu đồ chi phí và độ trễ theo thời gian]

Đọc file log thủ công

Nếu bạn muốn xem trực tiếp nội dung log:

# doc_log.py
import json

def xem_log_gannhat(so_dong: int = 5):
    """Xem N dòng log gần nhất"""
    
    with open("logs/ai_requests.jsonl", "r", encoding="utf-8") as f:
        dong = f.readlines()
    
    print(f"\n📋 {so_dong} log entries gần nhất:\n")
    for line in dong[-so_dong:]:
        log = json.loads(line)
        print(f"⏰ {log['timestamp']}")
        print(f"   Model: {log['model']}")
        print(f"   Status: {log['status']}")
        if log['status'] == 'success':
            print(f"   Latency: {log['latency_ms']}ms")
            print(f"   Cost: ${log['cost_usd']}")
            print(f"   Tokens: {log['usage'].get('total_tokens', 'N/A')}")
        else:
            print(f"   Error: {log.get('error', 'N/A')}")
        print()

if __name__ == "__main__":
    xem_log_gannhat(3)

Mẹo tối ưu chi phí với HolySheep AI

Dựa trên kinh nghiệm thực tế của tôi, đây là những cách giúp bạn tiết kiệm đáng kể:

Sử dụng đúng model cho đúng tác vụ: Với