Initial commit

2026-03-20 22:59:54 +08:00
commit 68b9e253e2
63 changed files with 8116 additions and 0 deletions
--- a/backend/app/services/domain.py
+++ b/backend/app/services/domain.py
@ -0,0 +1,733 @@
+from __future__ import annotations
+
+import json
+import re
+from collections import Counter
+from datetime import datetime, time, timedelta
+from typing import Iterable
+from urllib.parse import unquote, urlparse
+from zoneinfo import ZoneInfo
+
+import requests
+from bs4 import BeautifulSoup
+
+from app.models import (
+    Account,
+    ClsNewsDocument,
+    ClsNewsItem,
+    ClsNewsSummary,
+    ClsSectorImpact,
+    DailyInputAccount,
+    DailyInputDocument,
+    DailyInputUpsertPayload,
+    OpinionArticle,
+    ReportDocument,
+    ReportListItem,
+)
+from app.services.storage import (
+    fetch_accounts,
+    fetch_cls_news_document,
+    fetch_daily_input_document,
+    fetch_report_document,
+    fetch_report_list,
+    save_accounts,
+    save_cls_news_document,
+    save_daily_input_document,
+    save_report_document,
+)
+
+SHANGHAI = ZoneInfo("Asia/Shanghai")
+CLS_REFRESH_INTERVAL = timedelta(minutes=3)
+CLS_TELEGRAPH_URL = "https://m.cls.cn/telegraph"
+HTTP_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
+    ),
+    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+}
+
+SENTIMENT_BULL = "\u770b\u591a"
+SENTIMENT_BEAR = "\u770b\u7a7a"
+SENTIMENT_NEUTRAL = "\u4e2d\u6027"
+
+ACCOUNTS: list[Account] = [
+    Account(
+        id="touzi-mingjian",
+        name="\u6295\u8d44\u660e\u89c1",
+        description="\u504f\u4e3b\u9898\u8f6e\u52a8\u4e0e\u4e3b\u7ebf\u5224\u65ad\uff0c\u9002\u5408\u8ddf\u8e2a\u5e02\u573a\u504f\u597d\u53d8\u5316\u3002",
+    ),
+    Account(
+        id="aigujun-2020",
+        name="\u7231\u80a1\u541b2020",
+        description="\u5173\u6ce8\u60c5\u7eea\u3001\u70ed\u70b9\u6269\u6563\u4e0e\u4ea4\u6613\u7ec6\u8282\u3002",
+    ),
+    Account(
+        id="mazhiming-shouping",
+        name="\u9a6c\u5fd7\u660e\u6536\u8bc4",
+        description="\u65e5\u5185\u6536\u8bc4\u4e0e\u60c5\u7eea\u53d8\u5316\u603b\u7ed3\u3002",
+    ),
+    Account(
+        id="laobai-guandian",
+        name="\u8001\u767d\u5206\u6790\u5ba4\u89c2\u70b9",
+        description="\u504f\u7b56\u7565\u62c6\u89e3\u548c\u5173\u952e\u677f\u5757\u8ddf\u8e2a\u3002",
+    ),
+]
+
+ACCOUNT_FOCUS = {
+    "touzi-mingjian": ["AI", "\u7b97\u529b", "\u673a\u5668\u4eba"],
+    "aigujun-2020": ["CPO", "\u5b58\u50a8\u82af\u7247", "\u65b0\u80fd\u6e90"],
+    "mazhiming-shouping": ["AI", "\u5238\u5546", "\u6c7d\u8f66"],
+    "laobai-guandian": ["\u673a\u5668\u4eba", "\u534a\u5bfc\u4f53", "\u65b0\u80fd\u6e90"],
+}
+
+SECTOR_KEYWORDS = {
+    "AI": ["ai", "\u4eba\u5de5\u667a\u80fd", "\u5927\u6a21\u578b", "\u6a21\u578b"],
+    "\u7b97\u529b": ["\u7b97\u529b", "compute", "server", "gpu"],
+    "CPO": ["cpo", "\u5149\u6a21\u5757", "\u9ad8\u901f\u4e92\u8054"],
+    "\u5b58\u50a8\u82af\u7247": ["\u5b58\u50a8", "memory", "dram", "nand"],
+    "\u534a\u5bfc\u4f53": ["\u534a\u5bfc\u4f53", "chip", "wafer", "\u6676\u5706"],
+    "\u5238\u5546": ["\u5238\u5546", "broker", "\u8bc1\u5238"],
+    "\u77f3\u6cb9\u5929\u7136\u6c14": ["\u77f3\u6cb9", "\u5929\u7136\u6c14", "\u6cb9\u6c14", "\u80fd\u6e90\u4ef7\u683c"],
+    "\u65b0\u80fd\u6e90": ["\u65b0\u80fd\u6e90", "\u9502\u7535", "\u5149\u4f0f", "\u50a8\u80fd"],
+    "\u519b\u5de5": ["\u519b\u5de5", "\u536b\u661f", "\u822a\u5929"],
+    "\u673a\u5668\u4eba": ["\u673a\u5668\u4eba", "robot", "\u81ea\u52a8\u5316"],
+    "\u6c7d\u8f66": ["\u6c7d\u8f66", "\u8f66\u4f01", "\u667a\u9a7e", "\u6574\u8f66"],
+    "\u533b\u836f": ["\u533b\u836f", "\u521b\u65b0\u836f", "\u533b\u7597"],
+}
+
+POSITIVE_KEYWORDS = [
+    "\u673a\u4f1a",
+    "\u4fee\u590d",
+    "\u589e\u5f3a",
+    "\u4e3b\u7ebf",
+    "\u589e\u91cf",
+    "\u53cd\u5f39",
+    "\u7a81\u7834",
+    "\u79ef\u6781",
+    "up",
+    "bull",
+]
+NEGATIVE_KEYWORDS = [
+    "\u98ce\u9669",
+    "\u627f\u538b",
+    "\u8c28\u614e",
+    "\u56de\u8c03",
+    "\u7f29\u91cf",
+    "\u89c2\u671b",
+    "\u5206\u6b67",
+    "bear",
+    "down",
+]
+
+ARTICLE_TYPE_PATTERNS = [
+    ("\u6536\u8bc4", "\u5e02\u573a\u6536\u8bc4"),
+    ("\u5348", "\u76d8\u4e2d\u89c2\u5bdf"),
+    ("\u7b56\u7565", "\u7b56\u7565\u8ddf\u8e2a"),
+    ("\u590d\u76d8", "\u76d8\u9762\u590d\u76d8"),
+    ("\u884c\u4e1a", "\u884c\u4e1a\u89c2\u5bdf"),
+]
+
+CLS_NEWS_TEMPLATES = [
+    {
+        "title": "\u8d22\u8054\u793e\u76d8\u524d\u7cbe\u9009\uff1a\u7b97\u529b\u94fe\u56de\u6696\uff0c\u8d44\u91d1\u91cd\u65b0\u805a\u7126\u9ad8\u666f\u6c14\u65b9\u5411",
+        "summary": "\u9694\u591c\u5e02\u573a\u98ce\u9669\u504f\u597d\u56de\u5347\uff0c\u7b97\u529b\u4e0e\u670d\u52a1\u5668\u94fe\u6761\u83b7\u8d44\u91d1\u91cd\u65b0\u914d\u7f6e\uff0c\u60c5\u7eea\u4fee\u590d\u5148\u4e8e\u6210\u4ea4\u5168\u9762\u653e\u5927\u3002",
+        "sectors": ["\u7b97\u529b", "AI"],
+        "sentiment": SENTIMENT_BULL,
+        "reference_url": "https://www.cls.cn/detail/compute-rebound",
+    },
+    {
+        "title": "AI Daily\uff1aCPO \u4e0e\u5b58\u50a8\u82af\u7247\u540c\u6b65\u8d70\u5f3a\uff0c\u666f\u6c14\u5ea6\u7ebf\u7d22\u5ef6\u7eed",
+        "summary": "\u9ad8\u901f\u4e92\u8054\u4e0e\u5b58\u50a8\u62a5\u4ef7\u9884\u671f\u652f\u6491\u677f\u5757\u8868\u73b0\uff0c\u8d44\u91d1\u66f4\u503e\u5411\u4e8e\u56f4\u7ed5\u786e\u5b9a\u6027\u73af\u8282\u96c6\u4e2d\u3002",
+        "sectors": ["CPO", "\u5b58\u50a8\u82af\u7247"],
+        "sentiment": SENTIMENT_BULL,
+        "reference_url": "https://www.cls.cn/detail/ai-daily-cpo-memory",
+    },
+    {
+        "title": "\u8d22\u8054\u793e\u884c\u4e1a\u89c2\u5bdf\uff1a\u673a\u5668\u4eba\u94fe\u6761\u5206\u5316\uff0c\u8ba2\u5355\u5151\u73b0\u6210\u4e3a\u77ed\u671f\u7126\u70b9",
+        "summary": "\u673a\u5668\u4eba\u65b9\u5411\u5185\u90e8\u5f00\u59cb\u51fa\u73b0\u5151\u73b0\u4e0e\u6362\u624b\uff0c\u5e02\u573a\u4ece\u6982\u5ff5\u6269\u6563\u8f6c\u5411\u4e1a\u7ee9\u4e0e\u8ba2\u5355\u9a8c\u8bc1\u3002",
+        "sectors": ["\u673a\u5668\u4eba"],
+        "sentiment": SENTIMENT_NEUTRAL,
+        "reference_url": "https://www.cls.cn/detail/robotics-orders",
+    },
+    {
+        "title": "\u8d22\u8054\u793e\u80fd\u6e90\u8ffd\u8e2a\uff1a\u6cb9\u6c14\u677f\u5757\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5207\u5411\u9632\u5fa1\u54c1\u79cd",
+        "summary": "\u539f\u6cb9\u4ef7\u683c\u7ef4\u6301\u9ad8\u4f4d\u540e\uff0c\u6cb9\u6c14\u65b9\u5411\u51fa\u73b0\u9ad8\u4f4d\u9707\u8361\uff0c\u90e8\u5206\u8d44\u91d1\u8f6c\u5411\u533b\u836f\u7b49\u9632\u5b88\u677f\u5757\u3002",
+        "sectors": ["\u77f3\u6cb9\u5929\u7136\u6c14", "\u533b\u836f"],
+        "sentiment": SENTIMENT_NEUTRAL,
+        "reference_url": "https://www.cls.cn/detail/energy-rotation",
+    },
+    {
+        "title": "AI Daily\uff1a\u6c7d\u8f66\u4e0e\u667a\u9a7e\u5ef6\u7eed\u5206\u6b67\uff0c\u4e3b\u7ebf\u4ecd\u9700\u7b49\u5f85\u9500\u91cf\u6570\u636e\u9a8c\u8bc1",
+        "summary": "\u6574\u8f66\u4e0e\u667a\u9a7e\u65b9\u5411\u5173\u6ce8\u5ea6\u4ecd\u9ad8\uff0c\u4f46\u5e02\u573a\u5bf9\u4f30\u503c\u6269\u5f20\u5df2\u6709\u4fdd\u7559\uff0c\u7b49\u5f85\u9500\u91cf\u548c\u8ba2\u5355\u6570\u636e\u786e\u8ba4\u3002",
+        "sectors": ["\u6c7d\u8f66"],
+        "sentiment": SENTIMENT_BEAR,
+        "reference_url": "https://www.cls.cn/detail/auto-data-watch",
+    },
+    {
+        "title": "\u8d22\u8054\u793e7x24\uff1a\u534a\u5bfc\u4f53\u8bbe\u5907\u65b9\u5411\u8d70\u5f3a\uff0c\u673a\u6784\u79f0\u56fd\u4ea7\u66ff\u4ee3\u8282\u594f\u63d0\u901f",
+        "summary": "\u6676\u5706\u5236\u9020\u4e0e\u8bbe\u5907\u94fe\u6761\u51fa\u73b0\u5f02\u52a8\uff0c\u5e02\u573a\u56f4\u7ed5\u56fd\u4ea7\u66ff\u4ee3\u548c\u8d44\u672c\u5f00\u652f\u6062\u590d\u91cd\u65b0\u5b9a\u4ef7\u3002",
+        "sectors": ["\u534a\u5bfc\u4f53"],
+        "sentiment": SENTIMENT_BULL,
+        "reference_url": "https://www.cls.cn/detail/semi-equipment-up",
+    },
+    {
+        "title": "\u8d22\u8054\u793e7x24\uff1a\u5238\u5546\u677f\u5757\u5348\u540e\u62c9\u5347\uff0c\u5e02\u573a\u60c5\u7eea\u6709\u6240\u4fee\u590d",
+        "summary": "\u6307\u6570\u9707\u8361\u8fc7\u7a0b\u4e2d\u5238\u5546\u627f\u62c5\u60c5\u7eea\u4fee\u590d\u529f\u80fd\uff0c\u5e26\u52a8\u90e8\u5206\u9ad8\u5f39\u6027\u65b9\u5411\u56de\u6696\u3002",
+        "sectors": ["\u5238\u5546"],
+        "sentiment": SENTIMENT_BULL,
+        "reference_url": "https://www.cls.cn/detail/broker-rebound",
+    },
+    {
+        "title": "\u8d22\u8054\u793e7x24\uff1a\u521b\u65b0\u836f\u65b9\u5411\u6301\u7eed\u6d3b\u8dc3\uff0c\u8d44\u91d1\u8f6c\u5411\u9632\u5b88\u4e0e\u6210\u957f\u517c\u987e",
+        "summary": "\u533b\u836f\u677f\u5757\u83b7\u5f97\u589e\u91cf\u8d44\u91d1\u5173\u6ce8\uff0c\u521b\u65b0\u836f\u548c\u5668\u68b0\u7ec6\u5206\u8868\u73b0\u66f4\u5f3a\u3002",
+        "sectors": ["\u533b\u836f"],
+        "sentiment": SENTIMENT_NEUTRAL,
+        "reference_url": "https://www.cls.cn/detail/medical-active",
+    },
+    {
+        "title": "\u8d22\u8054\u793e7x24\uff1a\u65b0\u80fd\u6e90\u94fe\u6761\u5206\u5316\u52a0\u5267\uff0c\u673a\u6784\u63d0\u9192\u5173\u6ce8\u4ea7\u80fd\u51fa\u6e05\u8282\u594f",
+        "summary": "\u65b0\u80fd\u6e90\u677f\u5757\u5185\u90e8\u8f6e\u52a8\u660e\u663e\uff0c\u8d44\u91d1\u66f4\u504f\u5411\u4f4e\u4f4d\u73af\u8282\u548c\u6210\u672c\u6539\u5584\u65b9\u5411\u3002",
+        "sectors": ["\u65b0\u80fd\u6e90"],
+        "sentiment": SENTIMENT_NEUTRAL,
+        "reference_url": "https://www.cls.cn/detail/new-energy-split",
+    },
+    {
+        "title": "\u8d22\u8054\u793e7x24\uff1a\u519b\u5de5\u677f\u5757\u76d8\u4e2d\u5f02\u52a8\uff0c\u8ba2\u5355\u5151\u73b0\u9884\u671f\u91cd\u65b0\u5347\u6e29",
+        "summary": "\u519b\u5de5\u94fe\u6761\u76d8\u4e2d\u8d70\u5f3a\uff0c\u5e02\u573a\u5173\u6ce8\u540e\u7eed\u8ba2\u5355\u5151\u73b0\u4e0e\u4f30\u503c\u5207\u6362\u7a7a\u95f4\u3002",
+        "sectors": ["\u519b\u5de5"],
+        "sentiment": SENTIMENT_BULL,
+        "reference_url": "https://www.cls.cn/detail/defense-orders",
+    },
+    {
+        "title": "\u8d22\u8054\u793e7x24\uff1a\u673a\u5668\u4eba\u677f\u5757\u51b2\u9ad8\u56de\u843d\uff0c\u77ed\u7ebf\u535a\u5f08\u60c5\u7eea\u5347\u6e29",
+        "summary": "\u673a\u5668\u4eba\u65b9\u5411\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5728\u9898\u6750\u6269\u6563\u4e0e\u5151\u73b0\u538b\u529b\u4e4b\u95f4\u53cd\u590d\u5207\u6362\u3002",
+        "sectors": ["\u673a\u5668\u4eba"],
+        "sentiment": SENTIMENT_NEUTRAL,
+        "reference_url": "https://www.cls.cn/detail/robotics-intraday",
+    },
+    {
+        "title": "\u8d22\u8054\u793e7x24\uff1a\u5b58\u50a8\u82af\u7247\u62a5\u4ef7\u9884\u671f\u7ee7\u7eed\u4e0a\u4fee\uff0c\u4ea7\u4e1a\u94fe\u666f\u6c14\u5ea6\u53d7\u5173\u6ce8",
+        "summary": "\u5b58\u50a8\u73af\u8282\u4ef7\u683c\u4fee\u590d\u903b\u8f91\u5ef6\u7eed\uff0c\u5e02\u573a\u91cd\u65b0\u4ea4\u6613\u4f9b\u9700\u6539\u5584\u4e0e\u76c8\u5229\u5f39\u6027\u3002",
+        "sectors": ["\u5b58\u50a8\u82af\u7247"],
+        "sentiment": SENTIMENT_BULL,
+        "reference_url": "https://www.cls.cn/detail/memory-price-up",
+    },
+]
+
+SAMPLE_INPUTS = {
+    1: {
+        "touzi-mingjian": ["https://mp.weixin.qq.com/s/semiconductor-capacity-and-chip-cycle"],
+        "aigujun-2020": ["https://mp.weixin.qq.com/s/storage-chip-price-repair"],
+        "mazhiming-shouping": ["https://mp.weixin.qq.com/s/market-close-sector-rotation"],
+        "laobai-guandian": ["https://mp.weixin.qq.com/s/robotics-and-energy-balance"],
+    },
+}
+
+
+def now_local() -> datetime:
+    return datetime.now(SHANGHAI)
+
+
+def iso_timestamp(value: datetime | None = None) -> str:
+    return (value or now_local()).replace(microsecond=0).isoformat()
+
+
+def ensure_local_timezone(value: datetime) -> datetime:
+    if value.tzinfo is None:
+        return value.replace(tzinfo=SHANGHAI)
+    return value.astimezone(SHANGHAI)
+
+
+def normalize_whitespace(value: str) -> str:
+    return re.sub(r"\s+", " ", value).strip()
+
+
+def extract_json_object(script_text: str, marker: str) -> str:
+    marker_index = script_text.find(marker)
+    if marker_index < 0:
+        raise RuntimeError(f"Marker not found: {marker}")
+
+    start = script_text.find("{", marker_index)
+    if start < 0:
+        raise RuntimeError(f"JSON object start not found for marker: {marker}")
+
+    depth = 0
+    in_string = False
+    escaped = False
+    for index in range(start, len(script_text)):
+        char = script_text[index]
+        if in_string:
+            if escaped:
+                escaped = False
+            elif char == "\\":
+                escaped = True
+            elif char == '"':
+                in_string = False
+            continue
+
+        if char == '"':
+            in_string = True
+            continue
+        if char == "{":
+            depth += 1
+            continue
+        if char == "}":
+            depth -= 1
+            if depth == 0:
+                return script_text[start : index + 1]
+
+    raise RuntimeError(f"JSON object end not found for marker: {marker}")
+
+
+def parse_telegraph_timestamp(date_str: str, time_str: str) -> str:
+    normalized_time = time_str if len(time_str.split(":")) == 3 else f"{time_str}:00"
+    return datetime.fromisoformat(f"{date_str}T{normalized_time}").replace(tzinfo=SHANGHAI).isoformat(timespec="seconds")
+
+
+def split_title_and_summary(content: str) -> tuple[str, str]:
+    cleaned = normalize_whitespace(content)
+    bracket_match = re.match(r"^[\[({\u3010\u3016](.+?)[\])}\u3011\u3017][\uff1a: ]*(.*)$", cleaned)
+    if bracket_match:
+        title = normalize_whitespace(bracket_match.group(1))
+        summary = normalize_whitespace(bracket_match.group(2) or cleaned)
+        return title[:80], summary or title
+
+    sentence_parts = re.split(r"[。；;!?！？]", cleaned, maxsplit=1)
+    title = sentence_parts[0][:80]
+    summary = cleaned if len(cleaned) <= 220 else f"{cleaned[:217]}..."
+    return title, summary
+
+
+def build_fallback_cls_items(reference_time: datetime) -> list[ClsNewsItem]:
+    items: list[ClsNewsItem] = []
+    for index, template in enumerate(CLS_NEWS_TEMPLATES):
+        published_at = (reference_time - timedelta(minutes=index * 95 + 8)).replace(microsecond=0).isoformat()
+        items.append(
+            ClsNewsItem(
+                id=f"cls-{index + 1}",
+                title=template["title"],
+                published_at=published_at,
+                source="\u8d22\u8054\u793e" if index % 2 == 0 else "\u8d22\u8054\u793e AI Daily",
+                summary=template["summary"],
+                reference_url=template["reference_url"],
+                sectors=template["sectors"],
+                sentiment=template["sentiment"],
+            )
+        )
+    return sorted(items, key=lambda item: item.published_at, reverse=True)
+
+
+def fetch_cls_telegraph_items(reference_time: datetime) -> list[ClsNewsItem]:
+    session = requests.Session()
+    session.trust_env = False
+    response = session.get(CLS_TELEGRAPH_URL, headers=HTTP_HEADERS, timeout=15)
+    response.raise_for_status()
+    response.encoding = "utf-8"
+
+    soup = BeautifulSoup(response.text, "html.parser")
+    next_data_script = None
+    for script in soup.find_all("script"):
+        script_text = script.string or script.get_text()
+        if "__NEXT_DATA__ =" in script_text:
+            next_data_script = script_text
+            break
+    if not next_data_script:
+        raise RuntimeError("Missing __NEXT_DATA__ payload on cls.cn")
+
+    next_data = json.loads(extract_json_object(next_data_script, "__NEXT_DATA__ ="))
+    roll_data = (
+        next_data.get("props", {})
+        .get("initialState", {})
+        .get("roll_data", [])
+    )
+    if not isinstance(roll_data, list) or not roll_data:
+        raise RuntimeError("Missing roll_data in cls.cn payload")
+
+    target_date = reference_time.date()
+    items: list[ClsNewsItem] = []
+    seen_ids: set[int] = set()
+    latest_limit = 80
+    for entry in roll_data:
+        if len(items) >= latest_limit:
+            break
+
+        item_id = int(entry.get("id") or 0)
+        if not item_id or item_id in seen_ids:
+            continue
+        seen_ids.add(item_id)
+
+        timestamp = int(entry.get("modified_time") or entry.get("ctime") or 0)
+        if not timestamp:
+            continue
+        published_dt = datetime.fromtimestamp(timestamp, tz=SHANGHAI)
+        if published_dt.date() != target_date:
+            continue
+
+        raw_content = normalize_whitespace(
+            entry.get("content")
+            or entry.get("brief")
+            or entry.get("title")
+            or ""
+        )
+        if len(raw_content) < 8:
+            continue
+
+        title = normalize_whitespace(entry.get("title") or "")
+        if not title:
+            title, _ = split_title_and_summary(raw_content)
+
+        summary = normalize_whitespace(entry.get("brief") or "")
+        if not summary:
+            _, summary = split_title_and_summary(raw_content)
+
+        source = normalize_whitespace(entry.get("author") or "\u8d22\u8054\u793e7x24")
+        reference_url = normalize_whitespace(entry.get("shareurl") or "")
+        if not reference_url:
+            reference_url = f"https://www.cls.cn/detail/{item_id}"
+
+        sectors = infer_sectors(f"{title} {summary}", "touzi-mingjian")
+        sentiment = infer_sentiment(f"{title} {summary}")
+        items.append(
+            ClsNewsItem(
+                id=f"cls-live-{item_id}",
+                title=title[:120],
+                published_at=published_dt.isoformat(timespec="seconds"),
+                source=source,
+                summary=summary[:500],
+                reference_url=reference_url,
+                sectors=sectors,
+                sentiment=sentiment,
+            )
+        )
+
+    if not items:
+        raise RuntimeError("No telegraph items parsed from cls.cn")
+
+    return sorted(items, key=lambda item: item.published_at, reverse=True)
+
+
+def get_accounts() -> list[Account]:
+    records = fetch_accounts()
+    return records or ACCOUNTS
+
+
+def normalize_date(value: str) -> str:
+    return datetime.fromisoformat(value).date().isoformat()
+
+
+def blank_daily_input(date_str: str) -> DailyInputDocument:
+    return DailyInputDocument(
+        date=date_str,
+        updated_at=iso_timestamp(),
+        accounts=[
+            DailyInputAccount(account_id=account.id, account_name=account.name, links=[])
+            for account in get_accounts()
+        ],
+    )
+
+
+def clean_links(links: Iterable[str]) -> list[str]:
+    normalized: list[str] = []
+    seen: set[str] = set()
+    for raw_link in links:
+        link = raw_link.strip()
+        if not link or link in seen:
+            continue
+        seen.add(link)
+        normalized.append(link)
+    return normalized
+
+
+def normalize_daily_input(date_str: str, payload: DailyInputUpsertPayload) -> DailyInputDocument:
+    payload_map = {item.account_id: clean_links(item.links) for item in payload.accounts}
+    return DailyInputDocument(
+        date=date_str,
+        updated_at=iso_timestamp(),
+        accounts=[
+            DailyInputAccount(
+                account_id=account.id,
+                account_name=account.name,
+                links=payload_map.get(account.id, []),
+            )
+            for account in get_accounts()
+        ],
+    )
+
+
+def load_daily_input(date_str: str) -> DailyInputDocument:
+    payload = fetch_daily_input_document(date_str)
+    if payload is None:
+        return blank_daily_input(date_str)
+    return payload
+
+
+def save_daily_input(document: DailyInputDocument) -> DailyInputDocument:
+    return save_daily_input_document(document)
+
+
+def load_report(date_str: str) -> ReportDocument | None:
+    return fetch_report_document(date_str)
+
+
+def save_report(document: ReportDocument) -> ReportDocument:
+    return save_report_document(document)
+
+
+def list_reports() -> list[ReportListItem]:
+    return fetch_report_list()
+
+
+def title_from_link(account_name: str, url: str, index: int) -> str:
+    text = unquote(urlparse(url).path or url)
+    tokens = [
+        token
+        for token in re.split(r"[\W_]+", text.lower())
+        if token and token not in {"s", "mp", "weixin", "qq", "com"}
+    ]
+    meaningful = [token for token in tokens if len(token) > 1]
+    if meaningful:
+        topic = " / ".join(token.upper() if len(token) <= 3 else token.capitalize() for token in meaningful[:3])
+        return f"{account_name}\uff1a{topic} \u89c2\u5bdf"
+    return f"{account_name}\uff1a\u5e02\u573a\u8ddf\u8e2a\u7b2c {index + 1} \u6761"
+
+
+def infer_sectors(text: str, account_id: str) -> list[str]:
+    lowered = text.lower()
+    sectors = [
+        sector
+        for sector, keywords in SECTOR_KEYWORDS.items()
+        if any(keyword.lower() in lowered for keyword in keywords)
+    ]
+    if sectors:
+        return sectors[:3]
+    return ACCOUNT_FOCUS.get(account_id, ["AI", "\u7b97\u529b"])[:2]
+
+
+def infer_sentiment(text: str) -> str:
+    lowered = text.lower()
+    positive = sum(keyword.lower() in lowered for keyword in POSITIVE_KEYWORDS)
+    negative = sum(keyword.lower() in lowered for keyword in NEGATIVE_KEYWORDS)
+    if positive > negative:
+        return SENTIMENT_BULL
+    if negative > positive:
+        return SENTIMENT_BEAR
+    return SENTIMENT_NEUTRAL
+
+
+def infer_article_type(title: str) -> str:
+    lowered = title.lower()
+    for keyword, article_type in ARTICLE_TYPE_PATTERNS:
+        if keyword.lower() in lowered:
+            return article_type
+    return "\u4e3b\u9898\u89c2\u70b9"
+
+
+def build_article_summary(title: str, sectors: list[str], sentiment: str) -> str:
+    sector_text = "\u3001".join(sectors[:2]) if sectors else "\u6838\u5fc3\u4e3b\u7ebf"
+    sentiment_text = {
+        SENTIMENT_BULL: "\u504f\u79ef\u6781\u7684\u8282\u594f\u5224\u65ad",
+        SENTIMENT_BEAR: "\u660e\u663e\u504f\u8c28\u614e\u7684\u98ce\u9669\u63d0\u9192",
+        SENTIMENT_NEUTRAL: "\u66f4\u5f3a\u8c03\u7ed3\u6784\u5206\u5316\u4e0e\u7b49\u5f85\u786e\u8ba4",
+    }[sentiment]
+    return f"{title} \u56f4\u7ed5 {sector_text} \u5c55\u5f00\uff0c\u7ed9\u51fa\u7684\u7ed3\u8bba\u662f{sentiment_text}\uff0c\u9002\u5408\u4f5c\u4e3a\u5f53\u65e5\u76d8\u9762\u8ddf\u8e2a\u4e0e\u590d\u76d8\u53c2\u8003\u3002"
+
+
+def generate_report(date_str: str, input_document: DailyInputDocument) -> ReportDocument:
+    base_date = datetime.fromisoformat(date_str)
+    articles: list[OpinionArticle] = []
+    for account_index, account in enumerate(input_document.accounts):
+        for link_index, url in enumerate(account.links):
+            title = title_from_link(account.account_name, url, link_index)
+            sectors = infer_sectors(f"{title} {url}", account.account_id)
+            sentiment = infer_sentiment(f"{title} {url}")
+            published_at = (
+                base_date.replace(hour=9 + ((account_index + link_index) % 8), minute=(link_index * 12) % 60)
+                .replace(tzinfo=SHANGHAI)
+                .isoformat(timespec="seconds")
+            )
+            articles.append(
+                OpinionArticle(
+                    id=f"{date_str}-{account.account_id}-{link_index}",
+                    account_id=account.account_id,
+                    account_name=account.account_name,
+                    title=title,
+                    published_at=published_at,
+                    summary=build_article_summary(title, sectors, sentiment),
+                    source_url=url,
+                    sectors=sectors,
+                    sentiment=sentiment,
+                    article_type=infer_article_type(title),
+                )
+            )
+
+    if not articles:
+        return ReportDocument(
+            date=date_str,
+            generated_at=iso_timestamp(),
+            summary="\u5f53\u65e5\u5c1a\u672a\u5f55\u5165\u6587\u7ae0\u94fe\u63a5\uff0c\u7cfb\u7edf\u5df2\u4fdd\u7559\u65e5\u62a5\u7ed3\u6784\uff0c\u7b49\u5f85\u8865\u5145\u516c\u4f17\u53f7\u6587\u7ae0\u540e\u518d\u751f\u6210\u5b8c\u6574\u7ed3\u8bba\u3002",
+            focus_sectors=[],
+            article_count=0,
+            account_count=0,
+            articles=[],
+        )
+
+    sector_counter = Counter(sector for article in articles for sector in article.sectors)
+    focus_sectors = [sector for sector, _count in sector_counter.most_common(4)]
+
+    sentiment_counter = Counter(article.sentiment for article in articles)
+    if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
+        tone = "\u6574\u4f53\u504f\u79ef\u6781\uff0c\u4e3b\u7ebf\u8ba8\u8bba\u96c6\u4e2d\u5ea6\u8f83\u9ad8"
+    elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
+        tone = "\u6574\u4f53\u504f\u8c28\u614e\uff0c\u98ce\u9669\u63a7\u5236\u4ecd\u662f\u4e3b\u53d9\u4e8b"
+    else:
+        tone = "\u591a\u7a7a\u5206\u6b67\u5e76\u5b58\uff0c\u5e02\u573a\u66f4\u770b\u91cd\u9a8c\u8bc1\u4e0e\u8282\u594f"
+
+    active_accounts = len([account for account in input_document.accounts if account.links])
+    sector_text = "\u3001".join(focus_sectors) if focus_sectors else "\u6682\u65e0\u805a\u7126\u677f\u5757"
+    summary = (
+        f"{date_str} \u5171\u6574\u7406 {len(articles)} \u7bc7\u516c\u4f17\u53f7\u89c2\u70b9\uff0c\u8986\u76d6 {active_accounts} \u4e2a\u8d26\u6237\u3002"
+        f"{tone}\uff0c\u8ba8\u8bba\u91cd\u70b9\u843d\u5728 {sector_text}\u3002"
+    )
+
+    return ReportDocument(
+        date=date_str,
+        generated_at=iso_timestamp(),
+        summary=summary,
+        focus_sectors=focus_sectors,
+        article_count=len(articles),
+        account_count=active_accounts,
+        articles=sorted(articles, key=lambda item: item.published_at, reverse=True),
+    )
+
+
+def build_cls_news_document(
+    reference_time: datetime | None = None,
+    *,
+    allow_live_fetch: bool = True,
+) -> ClsNewsDocument:
+    current = reference_time or now_local()
+    try:
+        if allow_live_fetch:
+            items = fetch_cls_telegraph_items(current)
+        else:
+            raise RuntimeError("Live fetch disabled for non-current date")
+    except Exception:
+        items = build_fallback_cls_items(current)
+
+    sector_counter = Counter(sector for item in items for sector in item.sectors)
+    watch_list = [sector for sector, _count in sector_counter.most_common(5)]
+
+    overview = (
+        "\u8d44\u8baf\u5217\u8868\u5c55\u793a\u6240\u9009\u65e5\u671f\u5185\u7684\u8d22\u8054\u793e 7x24 \u8d44\u8baf\uff0c"
+        "\u5f53\u65e5\u6570\u636e\u6765\u81ea cls.cn \u5b9e\u65f6\u6293\u53d6\uff0c\u6bcf 3 \u5206\u949f\u66f4\u65b0\u4e00\u6b21\u3002"
+    )
+    hot_topics = (
+        "\u70ed\u70b9\u6982\u89c8\u53ea\u4fdd\u7559\u5bf9\u677f\u5757\u5b58\u5728\u660e\u663e\u5f71\u54cd\u7684\u65b9\u5411\uff0c"
+        f"\u5f53\u524d\u4e3b\u8981\u96c6\u4e2d\u5728 {'\u3001'.join(watch_list[:3])}\u3002"
+    )
+
+    sector_impacts: list[ClsSectorImpact] = []
+    seen_sectors: set[str] = set()
+    for sector in watch_list[:4]:
+        if sector in seen_sectors:
+            continue
+        seen_sectors.add(sector)
+        related_items = [item for item in items if sector in item.sectors]
+        if not related_items:
+            continue
+
+        sentiment_counter = Counter(item.sentiment for item in related_items)
+        if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
+            sentiment = SENTIMENT_BULL
+            reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u50ac\u5316\u6216\u666f\u6c14\u5f3a\u5316\uff0c\u77ed\u7ebf\u504f\u6b63\u5411\u5f71\u54cd\u3002"
+        elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
+            sentiment = SENTIMENT_BEAR
+            reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u5151\u73b0\u6216\u5206\u6b67\uff0c\u77ed\u7ebf\u504f\u8d1f\u5411\u5f71\u54cd\u3002"
+        else:
+            sentiment = SENTIMENT_NEUTRAL
+            reason = f"{sector} \u65b9\u5411\u6709\u8ba8\u8bba\u4f46\u4ecd\u9700\u9a8c\u8bc1\uff0c\u77ed\u7ebf\u4ee5\u4e2d\u6027\u89c2\u5bdf\u4e3a\u4e3b\u3002"
+
+        sector_impacts.append(
+            ClsSectorImpact(
+                sector=sector,
+                sentiment=sentiment,
+                reason=reason,
+                related_titles=list(dict.fromkeys(item.title for item in related_items[:2])),
+            )
+        )
+
+    return ClsNewsDocument(
+        date=current.date().isoformat(),
+        updated_at=iso_timestamp(current),
+        window_label="\u5f53\u5929\u8d44\u8baf",
+        summary=ClsNewsSummary(
+            overview=overview,
+            hot_topics=hot_topics,
+            watch_list=watch_list,
+        ),
+        sector_impacts=sector_impacts,
+        items=items,
+    )
+
+
+def load_cls_news(date_str: str) -> ClsNewsDocument | None:
+    return fetch_cls_news_document(date_str)
+
+
+def build_reference_time(date_str: str) -> datetime:
+    date_value = datetime.fromisoformat(date_str).date()
+    if date_value == now_local().date():
+        return now_local()
+    return datetime.combine(date_value, time(hour=15, minute=0), tzinfo=SHANGHAI)
+
+
+def refresh_cls_news(date_str: str | None = None) -> ClsNewsDocument:
+    normalized_date = normalize_date(date_str or now_local().date().isoformat())
+    existing = load_cls_news(normalized_date)
+    reference_time = build_reference_time(normalized_date)
+    allow_live_fetch = normalized_date == now_local().date().isoformat()
+    try:
+        document = build_cls_news_document(reference_time, allow_live_fetch=allow_live_fetch)
+    except Exception:
+        if existing is not None:
+            return existing
+        raise
+    return save_cls_news_document(document)
+
+
+def get_cls_news(date_str: str | None = None) -> ClsNewsDocument:
+    normalized_date = normalize_date(date_str or now_local().date().isoformat())
+    document = load_cls_news(normalized_date)
+    if document is None:
+        return refresh_cls_news(normalized_date)
+    if normalized_date != now_local().date().isoformat():
+        return document
+    updated_at = ensure_local_timezone(datetime.fromisoformat(document.updated_at))
+    if now_local() - updated_at >= CLS_REFRESH_INTERVAL:
+        return refresh_cls_news(normalized_date)
+    return document
+
+
+def seed_demo_content() -> None:
+    save_accounts(ACCOUNTS)
+
+    today = now_local().date()
+    for offset, account_links in SAMPLE_INPUTS.items():
+        date_str = (today - timedelta(days=offset)).isoformat()
+        if fetch_daily_input_document(date_str) is not None and fetch_report_document(date_str) is not None:
+            continue
+
+        payload = DailyInputUpsertPayload(
+            accounts=[
+                {"account_id": account.id, "links": account_links.get(account.id, [])}
+                for account in ACCOUNTS
+            ]
+        )
+        input_document = normalize_daily_input(date_str, payload)
+        save_daily_input_document(input_document)
+        save_report_document(generate_report(date_str, input_document))
+
+    today_str = today.isoformat()
+    if fetch_cls_news_document(today_str) is None:
+        save_cls_news_document(build_cls_news_document())