OpenAI GPT-5 Function Calling: คู่มือฉบับสมบูรณ์สำหรับ Production System

ในฐานะวิศวกรที่ทำงานกับ Large Language Model มาเกือบ 3 ปี ผมต้องบอกว่า Function Calling เป็นฟีเจอร์ที่เปลี่ยนเกมการพัฒนา AI Application อย่างแท้จริง วันนี้ผมจะมาแชร์ประสบการณ์ตรงจากการนำ GPT-5 Function Calling ไปใช้งานจริงบน production system ที่รองรับ request มากกว่า 10,000 รายต่อวัน ผ่าน HolySheep AI ซึ่งให้บริการ API endpoint ที่เสถียรและเร็วกว่า 50ms พร้อมอัตราค่าบริการที่ประหยัดกว่า 85% เมื่อเทียบกับผู้ให้บริการรายอื่น

Function Calling คืออะไรและทำงานอย่างไร

Function Calling คือกลไกที่ช่วยให้ LLM สามารถ "เรียกใช้ฟังก์ชัน" ภายนอกได้ โดย LLM จะวิเคราะห์ input ของผู้ใช้และตัดสินใจว่าควรเรียก function ใดพร้อม arguments ที่เหมาะสม จากนั้นระบบจะ execute function และส่งผลลัพธ์กลับไปให้ LLM เพื่อสร้างคำตอบสุดท้าย

สถาปัตยกรรมและ Flow การทำงาน

จากประสบการณ์การ implement หลายระบบ ผมสรุป flow การทำงานของ Function Calling ได้ดังนี้:

┌─────────────────────────────────────────────────────────────┐
│                    Function Calling Flow                     │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  User Input                                                 │
│      │                                                      │
│      ▼                                                      │
│  ┌─────────────┐    ┌─────────────┐    ┌─────────────┐     │
│  │   LLM       │───▶│  Function   │───▶│  Execute    │     │
│  │  (Parse)    │    │  Decision   │    │  Function   │     │
│  └─────────────┘    └─────────────┘    └─────────────┘     │
│                                                │             │
│                                                ▼             │
│  ┌─────────────┐    ┌─────────────┐    ┌─────────────┐     │
│  │ Final       │◀───│   LLM       │◀───│   Return    │     │
│  │ Response    │    │  (Compose)  │    │  Result     │     │
│  └─────────────┘    └─────────────┘    └─────────────┘     │
│                                                             │
└─────────────────────────────────────────────────────────────┘

การตั้งค่า Client และ Function Definitions

สำหรับการใช้งานจริง ผมแนะนำให้ใช้ OpenAI SDK ผ่าน HolySheep endpoint ซึ่งให้ latency เฉลี่ยต่ำกว่า 50ms พร้อม uptime ที่เสถียร

from openai import OpenAI
import json
from typing import List, Optional
from dataclasses import dataclass

Initialize client with HolySheep endpoint
client = OpenAI(
    api_key="YOUR_HOLYSHEEP_API_KEY",
    base_url="https://api.holysheep.ai/v1"
)

@dataclass
class FunctionCall:
    name: str
    arguments: dict

Define available functions
functions = [
    {
        "name": "get_weather",
        "description": "ดึงข้อมูลสภาพอากาศปัจจุบันของเมืองที่ระบุ",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {
                    "type": "string",
                    "description": "ชื่อเมืองที่ต้องการทราบสภาพอากาศ"
                },
                "unit": {
                    "type": "string",
                    "enum": ["celsius", "fahrenheit"],
                    "description": "หน่วยอุณหภูมิที่ต้องการ"
                }
            },
            "required": ["city"]
        }
    },
    {
        "name": "search_database",
        "description": "ค้นหาข้อมูลในฐานข้อมูลองค์กร",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "คำค้นหา"
                },
                "limit": {
                    "type": "integer",
                    "description": "จำนวนผลลัพธ์สูงสุด",
                    "default": 10
                }
            },
            "required": ["query"]
        }
    },
    {
        "name": "calculate",
        "description": "คำนวณทางคณิตศาสตร์",
        "parameters": {
            "type": "object",
            "properties": {
                "expression": {
                    "type": "string",
                    "description": "นิพจน์ทางคณิตศาสตร์ เช่น '2+3*4'"
                }
            },
            "required": ["expression"]
        }
    }
]

การ Implement Streaming และ Function Execution

สำหรับ production system ผมใช้ streaming response เพื่อให้ user ได้รับ feedback เร็ว และ handle function call อย่างถูกต้อง

import re
import math
from typing import Generator, Dict, Any

Function implementations
def execute_get_weather(city: str, unit: str = "celsius") -> Dict[str, Any]:
    """จำลองการดึงข้อมูลสภาพอากาศ"""
    # ใน production จะเรียก weather API จริง
    weather_data = {
        "bangkok": {"temp": 32, "condition": " partly cloudy", "humidity": 75},
        "chiangmai": {"temp": 28, "condition": " sunny", "humidity": 60},
        "phuket": {"temp": 30, "condition": " rain", "humidity": 85}
    }
    
    city_lower = city.lower()
    if city_lower in weather_data:
        data = weather_data[city_lower]
        temp = data["temp"]
        if unit == "fahrenheit":
            temp = temp * 9/5 + 32
        return {
            "city": city,
            "temperature": round(temp, 1),
            "unit": unit,
            "condition": data["condition"],
            "humidity": data["humidity"]
        }
    return {"error": f"ไม่พบข้อมูลสภาพอากาศของ {city}"}

def execute_search_database(query: str, limit: int = 10) -> Dict[str, Any]:
    """จำลองการค้นหาฐานข้อมูล"""
    # ใน production จะ query database จริง
    results = [
        {"id": 1, "title": f"ผลลัพธ์ที่ 1 สำหรับ: {query}", "score": 0.95},
        {"id": 2, "title": f"ผลลัพธ์ที่ 2 สำหรับ: {query}", "score": 0.87},
        {"id": 3, "title": f"ผลลัพธ์ที่ 3 สำหรับ: {query}", "score": 0.82}
    ]
    return {"query": query, "results": results[:limit], "total": len(results)}

def execute_calculate(expression: str) -> Dict[str, Any]:
    """ประเมินนิพจน์ทางคณิตศาสตร์อย่างปลอดภัย"""
    try:
        # ตรวจสอบว่ามีเฉพาะตัวเลขและ operator ที่ปลอดภัย
        if not re.match(r'^[\d\s+\-*/().]+$', expression):
            return {"error": "นิพจน์ไม่ถูกต้อง"}
        result = eval(expression)
        return {"expression": expression, "result": result}
    except ZeroDivisionError:
        return {"error": "หารด้วยศูนย์ไม่ได้"}
    except Exception as e:
        return {"error": str(e)}

Function registry
FUNCTION_REGISTRY = {
    "get_weather": execute_get_weather,
    "search_database": execute_search_database,
    "calculate": execute_calculate
}

def process_function_call(function_name: str, arguments: dict) -> dict:
    """Execute function ที่ถูกเรียก"""
    if function_name not in FUNCTION_REGISTRY:
        return {"error": f"ไม่พบ function: {function_name}"}
    
    func = FUNCTION_REGISTRY[function_name]
    try:
        return func(**arguments)
    except Exception as e:
        return {"error": str(e)}

def stream_chat_completion(
    messages: List[dict],
    model: str = "gpt-5",
    temperature: float = 0.7,
    max_tokens: int = 1000
) -> Generator[str, None, None]:
    """Streaming chat completion พร้อม handle function calls"""
    
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        functions=functions,
        function_call="auto",
        stream=True,
        temperature=temperature,
        max_tokens=max_tokens
    )
    
    collected_messages = []
    current_function_call = None
    
    for chunk in response:
        delta = chunk.choices[0].delta
        
        # Handle streaming function call
        if delta.function_call:
            if current_function_call is None:
                current_function_call = {
                    "name": "",
                    "arguments": ""
                }
            if delta.function_call.name:
                current_function_call["name"] = delta.function_call.name
            if delta.function_call.arguments:
                current_function_call["arguments"] += delta.function_call.arguments
                yield f"📞 เรียกใช้ function: {delta.function_call.name}\n"
        
        # Handle regular content
        elif delta.content:
            collected_messages.append(delta.content)
            yield delta.content
    
    # Process function call if exists
    if current_function_call:
        try:
            args = json.loads(current_function_call["arguments"])
            function_result = process_function_call(
                current_function_call["name"], 
                args
            )
            
            # Add function result to messages for second LLM call
            messages.append({
                "role": "assistant",
                "content": None,
                "function_call": {
                    "name": current_function_call["name"],
                    "arguments": current_function_call["arguments"]
                }
            })
            messages.append({
                "role": "function",
                "name": current_function_call["name"],
                "content": json.dumps(function_result, ensure_ascii=False)
            })
            
            # Get final response
            final_response = client.chat.completions.create(
                model=model,
                messages=messages,
                stream=True,
                temperature=temperature
            )
            
            yield "\n\n📋 ผลลัพธ์: "
            for chunk in final_response:
                if chunk.choices[0].delta.content:
                    yield chunk.choices[0].delta.content
    
    return None

ตัวอย่างการใช้งาน
if __name__ == "__main__":
    messages = [
        {"role": "system", "content": "คุณเป็นผู้ช่วยที่ใช้ฟังก์ชันต่างๆ ได้"}
    ]
    
    print("=" * 50)
    print("GPT-5 Function Calling Demo")
    print("=" * 50)
    
    # Test 1: Weather
    messages.append({"role": "user", "content": "สภาพอากาศที่กรุงเทพเป็นอย่างไร?"})
    print("\n👤 คำถาม: สภาพอากาศที่กรุงเทพเป็นอย่างไร?")
    print("🤖 ตอบ: ", end="")
    
    for chunk in stream_chat_completion(messages):
        print(chunk, end="", flush=True)
    
    # Reset for next test
    messages = [{"role": "system", "content": "คุณเป็นผู้ช่วยที่ใช้ฟังก์ชันต่างๆ ได้"}]
    
    # Test 2: Calculate
    messages.append({"role": "user", "content": "คำนวณ 125 * 17 + 342 / 6"})
    print("\n\n" + "=" * 50)
    print("\n👤 คำถาม: คำนวณ 125 * 17 + 342 / 6")
    print("🤖 ตอบ: ", end="")
    
    for chunk in stream_chat_completion(messages):
        print(chunk, end="", flush=True)

การจัดการ Concurrency และ Rate Limiting

สำหรับ production system ที่ต้องรองรับ request จำนวนมาก ผมใช้ pattern ดังนี้:

import asyncio
import aiohttp
from concurrent.futures import ThreadPoolExecutor
from queue import Queue, Empty
import time
from dataclasses import dataclass
from typing import Optional

@dataclass
class RateLimitConfig:
    max_requests_per_minute: int = 60
    max_tokens_per_minute: int = 100000
    burst_limit: int = 10

class AsyncFunctionCaller:
    """Async implementation สำหรับ
แหล่งข้อมูลที่เกี่ยวข้อง
📚 บทช่วยสอน AI API
💰 ดูราคา
📖 เอกสารสำหรับนักพัฒนา
🚀 สมัครฟรี
บทความที่เกี่ยวข้อง
OpenAI 兼容接口 Claude Gemini API 适配配置教程
ระบบเติมข้อมูลเทมเพลตสัญญาและแนะนำข้อกำหนดด้วย AI: คู่มือฉบั
คู่มือการเชื่อมต่อ API สำหรับ AI ช่วยวินิจฉัยทางการแพทย์ ตาม

Function Calling คืออะไรและทำงานอย่างไร

สถาปัตยกรรมและ Flow การทำงาน

การตั้งค่า Client และ Function Definitions

Initialize client with HolySheep endpoint

Define available functions

การ Implement Streaming และ Function Execution

Function implementations

Function registry

ตัวอย่างการใช้งาน

การจัดการ Concurrency และ Rate Limiting

แหล่งข้อมูลที่เกี่ยวข้อง

บทความที่เกี่ยวข้อง

🔥 ลอง HolySheep AI