สร้างระบบสรุปข่าว AI แบบหลายแหล่งข้อมูล: คู่มือฉบับสมบูรณ์

ในยุคที่ข้อมูลท่วมท้น การติดตามข่าวสารจากหลายแหล่งให้ทันเวลากลายเป็นความท้าทาย โดยเฉพาะทีมงานธุรกิจ นักวิเคราะห์ หรือนักพัฒนาที่ต้องการข้อมูลอัปเดตตลอด 24 ชั่วโมง บทความนี้จะพาคุณสร้าง ระบบสรุปข่าว AI ที่รวบรวมข้อมูลจากหลายแหล่ง ประมวลผลด้วยโมเดลภาษาขนาดใหญ่ และส่งออกเป็นสรุปที่กระชับพร้อมใช้งาน

กรณีศึกษา: ระบบเฝ้าระวังข่าวสำหรับทีม E-Commerce

สมมติว่าคุณเป็นทีมพัฒนาของร้านค้าออนไลน์ระดับใหญ่ ต้องการระบบที่:

ติดตามข่าวความเคลื่อนไหวของคู่แข่งจากหลายเว็บไซต์
เฝ้าระวังข่าวเทคโนโลยี AI และ e-commerce
สรุปประเด็นสำคัญส่งให้ทีมทุกเช้า
แจ้งเตือนทันทีเมื่อมีข่าวใหญ่ที่กระทบธุรกิจ

สถาปัตยกรรมระบบโดยรวม

ระบบของเราประกอบด้วย 4 ส่วนหลัก:

Data Collector — ดึงข้อมูลจาก RSS Feed, API ข่าว, และเว็บไซต์ต่างๆ
Text Processor — ทำความสะอาดและจัดรูปแบบข้อความ
AI Summarizer — ใช้ LLM สรุปเนื้อหาด้วย HolySheep AI
Notifier — ส่งสรุปผ่าน LINE, Email หรือ Discord

การติดตั้งและเตรียมสภาพแวดล้อม

# สร้าง virtual environment
python -m venv news_summary_env
source news_summary_env/bin/activate  # Windows: news_summary_env\Scripts\activate

ติดตั้งไลบรารีที่จำเป็น
pip install requests beautifulsoup4 feedparser
pip install python-dateutil aiohttp asyncio

สำหรับส่งการแจ้งเตือน
pip install line-bot-sdk  # สำหรับ LINE
pip install discord.py    # สำหรับ Discord

โค้ดหลัก: ระบบดึงข้อมูลข่าวจากหลายแหล่ง

import requests
import feedparser
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from typing import List, Dict

BASE_URL = "https://api.holysheep.ai/v1"
API_KEY = "YOUR_HOLYSHEEP_API_KEY"

class NewsCollector:
    """คลาสสำหรับรวบรวมข่าวจากหลายแหล่ง"""
    
    def __init__(self):
        self.sources = [
            {
                "name": "TechCrunch",
                "url": "https://techcrunch.com/feed/",
                "type": "rss"
            },
            {
                "name": "The Verge",
                "url": "https://www.theverge.com/rss/index.xml",
                "type": "rss"
            },
            {
                "name": "Wired",
                "url": "https://www.wired.com/feed/rss",
                "type": "rss"
            }
        ]
    
    def collect_from_rss(self, source: Dict) -> List[Dict]:
        """ดึงข่าวจาก RSS Feed"""
        try:
            feed = feedparser.parse(source["url"])
            articles = []
            
            for entry in feed.entries[:10]:  # ดึง 10 ข่าวล่าสุด
                articles.append({
                    "title": entry.title,
                    "content": entry.summary if hasattr(entry, 'summary') else "",
                    "url": entry.link,
                    "published": entry.published if hasattr(entry, 'published') else "",
                    "source": source["name"]
                })
            
            return articles
        except Exception as e:
            print(f"Error collecting from {source['name']}: {e}")
            return []
    
    def collect_from_web(self, url: str, selectors: Dict) -> List[Dict]:
        """ดึงข่าวจากเว็บไซต์โดยใช้ Web Scraping"""
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
            }
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            articles = []
            
            titles = soup.select(selectors.get('title', 'h2'))
            contents = soup.select(selectors.get('content', 'p'))
            links = soup.select(selectors.get('link', 'a'))
            
            for i in range(min(len(titles), 5)):
                articles.append({
                    "title": titles[i].get_text(strip=True),
                    "content": contents[i].get_text(strip=True) if i < len(contents) else "",
                    "url": links[i].get('href', '') if i < len(links) else "",
                    "source": url.split('//')[1].split('/')[0],
                    "scraped_at": datetime.now().isoformat()
                })
            
            return articles
        except Exception as e:
            print(f"Error scraping {url}: {e}")
            return []
    
    def collect_all(self) -> List[Dict]:
        """รวบรวมข่าวจากทุกแหล่ง"""
        all_articles = []
        
        # ดึงจาก RSS Feeds
        for source in self.sources:
            articles = self.collect_from_rss(source)
            all_articles.extend(articles)
        
        return all_articles

ทดสอบการทำงาน
if __name__ == "__main__":
    collector = NewsCollector()
    articles = collector.collect_all()
    print(f"รวบรวมได้ {len(articles)} ข่าว")

การสร้างสรุปข่าวด้วย AI ผ่าน HolySheep API

หลังจากรวบรวมข่าวได้แล้ว ขั้นตอนต่อไปคือการสร้างสรุปอัจฉริยะด้วย AI โดยใช้ HolySheep AI ซึ่งมีความเร็วในการตอบสนองน้อยกว่า 50 มิลลิวินาที และราคาประหยัดกว่า 85% เมื่อเทียบกับบริการอื่น ราคา DeepSeek V3.2 อยู่ที่เพียง $0.42 ต่อล้าน tokens

import requests
import json
from typing import List, Dict

class AINewsSummarizer:
    """คลาสสำหรับสร้างสรุปข่าวด้วย AI"""
    
    BASE_URL = "https://api.holysheep.ai/v1"
    
    def __init__(self, api_key: str):
        self.api_key = api_key
    
    def summarize_single(self, article: Dict, max_length: int = 200) -> str:
        """สรุปข่าว 1 ฉบับ"""
        
        prompt = f"""คุณเป็นนักเขียนข่าวมืออาชีพ จงสรุปข่าวต่อไปนี้ให้กระชับ ใช้ภาษาที่เข้าใจง่าย 
และเน้นประเด็นสำคัญที่ธุรกิจควรรู้

หัวข้อ: {article.get('title', '')}
แหล่งข่าว: {article.get('source', '')}
เนื้อหา: {article.get('content', '')[:1000]}

จงสรุปให้ได้ใจความสำคัญใน {max_length} คำ โดยตอบเป็นประโยคสั้นๆ พร้อมให้คะแนนความสำคัญ
(1-5 ดาว) และระบุหมวดหมู่ของข่าว"""
        
        try:
            response = requests.post(
                f"{self.BASE_URL}/chat/completions",
                headers={
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": "gpt-4.1",
                    "messages": [
                        {"role": "system", "content": "คุณเป็นผู้ช่วยสรุปข่าวที่เชี่ยวชาญ"},
                        {"role": "user", "content": prompt}
                    ],
                    "temperature": 0.3,
                    "max_tokens": 500
                },
                timeout=30
            )
            
            if response.status_code == 200:
                data = response.json()
                return data['choices'][0]['message']['content']
            else:
                return f"Error: {response.status_code} - {response.text}"
                
        except requests.exceptions.Timeout:
            return "Error: Request timeout - การเชื่อมต่อใช้เวลานานเกินไป"
        except Exception as e:
            return f"Error: {str(e)}"
    
    def summarize_batch(self, articles: List[Dict], topic: str = "ทั่วไป") -> str:
        """สรุปข่าวหลายฉบับพร้อมกัน"""
        
        # จัดรูปแบบข่าวทั้งหมด
        news_text = "\n\n".join([
            f"ข่าวที่ {i+1}: {a.get('title', '')}\nแหล่ง: {a.get('source', '')}\nเนื้อหา: {a.get('content', '')[:500]}"
            for i, a in enumerate(articles[:10])  # สรุปได้สูงสุด 10 ข่าว
        ])
        
        prompt = f"""จงสร้างสรุปข่าวประจำวันในหัวข้อ "{topic}" จากข่าวต่อไปนี้

{news_text}

โครงสร้างที่ต้องการ:
1. หัวข้อหลัก 3-5 ประเด็น (พร้อมคะแนนความสำคัญ)
2. รายละเอียดสำคัญของแต่ละประเด็น
3. ผลกระทบต่อธุรกิจ/อุตสาหกรรม
4. คำแนะนำเบื้องต้น

ใช้ภาษาที่กระชับ เข้าใจง่าย เหมาะสำหรับผู้บริหารอ่าน"""
        
        try:
            response = requests.post(
                f"{self.BASE_URL}/chat/completions",
                headers={
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": "gpt-4.1",
                    "messages": [
                        {"role": "system", "content": "คุณเป็นนักวิเคราะห์ข่าวมืออาชีพที่สรุปให้กระชับและตรงประเด็น"},
                        {"role": "user", "content": prompt}
                    ],
                    "temperature": 0.4,
                    "max_tokens": 2000
                },
                timeout=60
            )
            
            if response.status_code == 200:
                data = response.json()
                return data['choices'][0]['message']['content']
            else:
                return f"Error: {response.status_code}"
                
        except Exception as e:
            return f"Error: {str(e)}"
    
    def extract_keyword(self, text: str) -> List[str]:
        """ดึงคีย์เวิร์ดสำคัญจากข่าว"""
        
        prompt = f"""จงดึงคีย์เวิร์ดสำคัญ 10 คำจากข่าวต่อไปนี้
แต่ละคำคั่นด้วยเครื่องหมายลูกน้ำ

ข่าว: {text[:2000]}

คีย์เวิร์ด:"""
        
        try:
            response = requests.post(
                f"{self.BASE_URL}/chat/completions",
                headers={
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": "gpt-4.1",
                    "messages": [
                        {"role": "user", "content": prompt}
                    ],
                    "temperature": 0.2,
                    "max_tokens": 100
                },
                timeout=20
            )
            
            if response.status_code == 200:
                result = response.json()['choices'][0]['message']['content']
                return [k.strip() for k in result.split(',')]
            return []
            
        except Exception as e:
            print(f"Keyword extraction error: {e}")
            return []

ตัวอย่างการใช้งาน
if __name__ == "__main__":
    summarizer = AINewsSummarizer(API_KEY)
    
    # ทดสอบสรุปข่าวเดี่ยว
    test_article = {
        "title": "OpenAI เปิดตัว GPT-5 พร้อมความสามารถใหม่",
        "content": "บริษัท OpenAI ประกาศเปิดตัวโมเดล AI รุ่นใหม่ล่าสุด...",
        "source": "Tech News"
    }
    
    summary = summarizer.summarize_single(test_article)
    print("สรุป
แหล่งข้อมูลที่เกี่ยวข้อง
📚 บทช่วยสอน AI API
💰 ดูราคา
📖 เอกสารสำหรับนักพัฒนา
🚀 สมัครฟรี
บทความที่เกี่ยวข้อง
AI API Key Management ขั้นเทพ: คู่มือย้ายระบบ Vault/KMS สู่ 
Triton Inference Server 企业部署：多模型管理方案 — คู่มือย้ายระบบสู่ Clo
คู่มือปฏิบัติตาม License โมเดล Open Source สำหรับการใช้งานเช

กรณีศึกษา: ระบบเฝ้าระวังข่าวสำหรับทีม E-Commerce

สถาปัตยกรรมระบบโดยรวม

การติดตั้งและเตรียมสภาพแวดล้อม

ติดตั้งไลบรารีที่จำเป็น

สำหรับส่งการแจ้งเตือน

โค้ดหลัก: ระบบดึงข้อมูลข่าวจากหลายแหล่ง

ทดสอบการทำงาน

การสร้างสรุปข่าวด้วย AI ผ่าน HolySheep API

ตัวอย่างการใช้งาน

แหล่งข้อมูลที่เกี่ยวข้อง

บทความที่เกี่ยวข้อง

🔥 ลอง HolySheep AI