Initial commit

This commit is contained in:
wanghep
2026-03-20 22:59:54 +08:00
commit 68b9e253e2
63 changed files with 8116 additions and 0 deletions

View File

@ -0,0 +1,733 @@
from __future__ import annotations
import json
import re
from collections import Counter
from datetime import datetime, time, timedelta
from typing import Iterable
from urllib.parse import unquote, urlparse
from zoneinfo import ZoneInfo
import requests
from bs4 import BeautifulSoup
from app.models import (
Account,
ClsNewsDocument,
ClsNewsItem,
ClsNewsSummary,
ClsSectorImpact,
DailyInputAccount,
DailyInputDocument,
DailyInputUpsertPayload,
OpinionArticle,
ReportDocument,
ReportListItem,
)
from app.services.storage import (
fetch_accounts,
fetch_cls_news_document,
fetch_daily_input_document,
fetch_report_document,
fetch_report_list,
save_accounts,
save_cls_news_document,
save_daily_input_document,
save_report_document,
)
SHANGHAI = ZoneInfo("Asia/Shanghai")
CLS_REFRESH_INTERVAL = timedelta(minutes=3)
CLS_TELEGRAPH_URL = "https://m.cls.cn/telegraph"
HTTP_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
),
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
}
SENTIMENT_BULL = "\u770b\u591a"
SENTIMENT_BEAR = "\u770b\u7a7a"
SENTIMENT_NEUTRAL = "\u4e2d\u6027"
ACCOUNTS: list[Account] = [
Account(
id="touzi-mingjian",
name="\u6295\u8d44\u660e\u89c1",
description="\u504f\u4e3b\u9898\u8f6e\u52a8\u4e0e\u4e3b\u7ebf\u5224\u65ad\uff0c\u9002\u5408\u8ddf\u8e2a\u5e02\u573a\u504f\u597d\u53d8\u5316\u3002",
),
Account(
id="aigujun-2020",
name="\u7231\u80a1\u541b2020",
description="\u5173\u6ce8\u60c5\u7eea\u3001\u70ed\u70b9\u6269\u6563\u4e0e\u4ea4\u6613\u7ec6\u8282\u3002",
),
Account(
id="mazhiming-shouping",
name="\u9a6c\u5fd7\u660e\u6536\u8bc4",
description="\u65e5\u5185\u6536\u8bc4\u4e0e\u60c5\u7eea\u53d8\u5316\u603b\u7ed3\u3002",
),
Account(
id="laobai-guandian",
name="\u8001\u767d\u5206\u6790\u5ba4\u89c2\u70b9",
description="\u504f\u7b56\u7565\u62c6\u89e3\u548c\u5173\u952e\u677f\u5757\u8ddf\u8e2a\u3002",
),
]
ACCOUNT_FOCUS = {
"touzi-mingjian": ["AI", "\u7b97\u529b", "\u673a\u5668\u4eba"],
"aigujun-2020": ["CPO", "\u5b58\u50a8\u82af\u7247", "\u65b0\u80fd\u6e90"],
"mazhiming-shouping": ["AI", "\u5238\u5546", "\u6c7d\u8f66"],
"laobai-guandian": ["\u673a\u5668\u4eba", "\u534a\u5bfc\u4f53", "\u65b0\u80fd\u6e90"],
}
SECTOR_KEYWORDS = {
"AI": ["ai", "\u4eba\u5de5\u667a\u80fd", "\u5927\u6a21\u578b", "\u6a21\u578b"],
"\u7b97\u529b": ["\u7b97\u529b", "compute", "server", "gpu"],
"CPO": ["cpo", "\u5149\u6a21\u5757", "\u9ad8\u901f\u4e92\u8054"],
"\u5b58\u50a8\u82af\u7247": ["\u5b58\u50a8", "memory", "dram", "nand"],
"\u534a\u5bfc\u4f53": ["\u534a\u5bfc\u4f53", "chip", "wafer", "\u6676\u5706"],
"\u5238\u5546": ["\u5238\u5546", "broker", "\u8bc1\u5238"],
"\u77f3\u6cb9\u5929\u7136\u6c14": ["\u77f3\u6cb9", "\u5929\u7136\u6c14", "\u6cb9\u6c14", "\u80fd\u6e90\u4ef7\u683c"],
"\u65b0\u80fd\u6e90": ["\u65b0\u80fd\u6e90", "\u9502\u7535", "\u5149\u4f0f", "\u50a8\u80fd"],
"\u519b\u5de5": ["\u519b\u5de5", "\u536b\u661f", "\u822a\u5929"],
"\u673a\u5668\u4eba": ["\u673a\u5668\u4eba", "robot", "\u81ea\u52a8\u5316"],
"\u6c7d\u8f66": ["\u6c7d\u8f66", "\u8f66\u4f01", "\u667a\u9a7e", "\u6574\u8f66"],
"\u533b\u836f": ["\u533b\u836f", "\u521b\u65b0\u836f", "\u533b\u7597"],
}
POSITIVE_KEYWORDS = [
"\u673a\u4f1a",
"\u4fee\u590d",
"\u589e\u5f3a",
"\u4e3b\u7ebf",
"\u589e\u91cf",
"\u53cd\u5f39",
"\u7a81\u7834",
"\u79ef\u6781",
"up",
"bull",
]
NEGATIVE_KEYWORDS = [
"\u98ce\u9669",
"\u627f\u538b",
"\u8c28\u614e",
"\u56de\u8c03",
"\u7f29\u91cf",
"\u89c2\u671b",
"\u5206\u6b67",
"bear",
"down",
]
ARTICLE_TYPE_PATTERNS = [
("\u6536\u8bc4", "\u5e02\u573a\u6536\u8bc4"),
("\u5348", "\u76d8\u4e2d\u89c2\u5bdf"),
("\u7b56\u7565", "\u7b56\u7565\u8ddf\u8e2a"),
("\u590d\u76d8", "\u76d8\u9762\u590d\u76d8"),
("\u884c\u4e1a", "\u884c\u4e1a\u89c2\u5bdf"),
]
CLS_NEWS_TEMPLATES = [
{
"title": "\u8d22\u8054\u793e\u76d8\u524d\u7cbe\u9009\uff1a\u7b97\u529b\u94fe\u56de\u6696\uff0c\u8d44\u91d1\u91cd\u65b0\u805a\u7126\u9ad8\u666f\u6c14\u65b9\u5411",
"summary": "\u9694\u591c\u5e02\u573a\u98ce\u9669\u504f\u597d\u56de\u5347\uff0c\u7b97\u529b\u4e0e\u670d\u52a1\u5668\u94fe\u6761\u83b7\u8d44\u91d1\u91cd\u65b0\u914d\u7f6e\uff0c\u60c5\u7eea\u4fee\u590d\u5148\u4e8e\u6210\u4ea4\u5168\u9762\u653e\u5927\u3002",
"sectors": ["\u7b97\u529b", "AI"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/compute-rebound",
},
{
"title": "AI Daily\uff1aCPO \u4e0e\u5b58\u50a8\u82af\u7247\u540c\u6b65\u8d70\u5f3a\uff0c\u666f\u6c14\u5ea6\u7ebf\u7d22\u5ef6\u7eed",
"summary": "\u9ad8\u901f\u4e92\u8054\u4e0e\u5b58\u50a8\u62a5\u4ef7\u9884\u671f\u652f\u6491\u677f\u5757\u8868\u73b0\uff0c\u8d44\u91d1\u66f4\u503e\u5411\u4e8e\u56f4\u7ed5\u786e\u5b9a\u6027\u73af\u8282\u96c6\u4e2d\u3002",
"sectors": ["CPO", "\u5b58\u50a8\u82af\u7247"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/ai-daily-cpo-memory",
},
{
"title": "\u8d22\u8054\u793e\u884c\u4e1a\u89c2\u5bdf\uff1a\u673a\u5668\u4eba\u94fe\u6761\u5206\u5316\uff0c\u8ba2\u5355\u5151\u73b0\u6210\u4e3a\u77ed\u671f\u7126\u70b9",
"summary": "\u673a\u5668\u4eba\u65b9\u5411\u5185\u90e8\u5f00\u59cb\u51fa\u73b0\u5151\u73b0\u4e0e\u6362\u624b\uff0c\u5e02\u573a\u4ece\u6982\u5ff5\u6269\u6563\u8f6c\u5411\u4e1a\u7ee9\u4e0e\u8ba2\u5355\u9a8c\u8bc1\u3002",
"sectors": ["\u673a\u5668\u4eba"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/robotics-orders",
},
{
"title": "\u8d22\u8054\u793e\u80fd\u6e90\u8ffd\u8e2a\uff1a\u6cb9\u6c14\u677f\u5757\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5207\u5411\u9632\u5fa1\u54c1\u79cd",
"summary": "\u539f\u6cb9\u4ef7\u683c\u7ef4\u6301\u9ad8\u4f4d\u540e\uff0c\u6cb9\u6c14\u65b9\u5411\u51fa\u73b0\u9ad8\u4f4d\u9707\u8361\uff0c\u90e8\u5206\u8d44\u91d1\u8f6c\u5411\u533b\u836f\u7b49\u9632\u5b88\u677f\u5757\u3002",
"sectors": ["\u77f3\u6cb9\u5929\u7136\u6c14", "\u533b\u836f"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/energy-rotation",
},
{
"title": "AI Daily\uff1a\u6c7d\u8f66\u4e0e\u667a\u9a7e\u5ef6\u7eed\u5206\u6b67\uff0c\u4e3b\u7ebf\u4ecd\u9700\u7b49\u5f85\u9500\u91cf\u6570\u636e\u9a8c\u8bc1",
"summary": "\u6574\u8f66\u4e0e\u667a\u9a7e\u65b9\u5411\u5173\u6ce8\u5ea6\u4ecd\u9ad8\uff0c\u4f46\u5e02\u573a\u5bf9\u4f30\u503c\u6269\u5f20\u5df2\u6709\u4fdd\u7559\uff0c\u7b49\u5f85\u9500\u91cf\u548c\u8ba2\u5355\u6570\u636e\u786e\u8ba4\u3002",
"sectors": ["\u6c7d\u8f66"],
"sentiment": SENTIMENT_BEAR,
"reference_url": "https://www.cls.cn/detail/auto-data-watch",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u534a\u5bfc\u4f53\u8bbe\u5907\u65b9\u5411\u8d70\u5f3a\uff0c\u673a\u6784\u79f0\u56fd\u4ea7\u66ff\u4ee3\u8282\u594f\u63d0\u901f",
"summary": "\u6676\u5706\u5236\u9020\u4e0e\u8bbe\u5907\u94fe\u6761\u51fa\u73b0\u5f02\u52a8\uff0c\u5e02\u573a\u56f4\u7ed5\u56fd\u4ea7\u66ff\u4ee3\u548c\u8d44\u672c\u5f00\u652f\u6062\u590d\u91cd\u65b0\u5b9a\u4ef7\u3002",
"sectors": ["\u534a\u5bfc\u4f53"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/semi-equipment-up",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u5238\u5546\u677f\u5757\u5348\u540e\u62c9\u5347\uff0c\u5e02\u573a\u60c5\u7eea\u6709\u6240\u4fee\u590d",
"summary": "\u6307\u6570\u9707\u8361\u8fc7\u7a0b\u4e2d\u5238\u5546\u627f\u62c5\u60c5\u7eea\u4fee\u590d\u529f\u80fd\uff0c\u5e26\u52a8\u90e8\u5206\u9ad8\u5f39\u6027\u65b9\u5411\u56de\u6696\u3002",
"sectors": ["\u5238\u5546"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/broker-rebound",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u521b\u65b0\u836f\u65b9\u5411\u6301\u7eed\u6d3b\u8dc3\uff0c\u8d44\u91d1\u8f6c\u5411\u9632\u5b88\u4e0e\u6210\u957f\u517c\u987e",
"summary": "\u533b\u836f\u677f\u5757\u83b7\u5f97\u589e\u91cf\u8d44\u91d1\u5173\u6ce8\uff0c\u521b\u65b0\u836f\u548c\u5668\u68b0\u7ec6\u5206\u8868\u73b0\u66f4\u5f3a\u3002",
"sectors": ["\u533b\u836f"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/medical-active",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u65b0\u80fd\u6e90\u94fe\u6761\u5206\u5316\u52a0\u5267\uff0c\u673a\u6784\u63d0\u9192\u5173\u6ce8\u4ea7\u80fd\u51fa\u6e05\u8282\u594f",
"summary": "\u65b0\u80fd\u6e90\u677f\u5757\u5185\u90e8\u8f6e\u52a8\u660e\u663e\uff0c\u8d44\u91d1\u66f4\u504f\u5411\u4f4e\u4f4d\u73af\u8282\u548c\u6210\u672c\u6539\u5584\u65b9\u5411\u3002",
"sectors": ["\u65b0\u80fd\u6e90"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/new-energy-split",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u519b\u5de5\u677f\u5757\u76d8\u4e2d\u5f02\u52a8\uff0c\u8ba2\u5355\u5151\u73b0\u9884\u671f\u91cd\u65b0\u5347\u6e29",
"summary": "\u519b\u5de5\u94fe\u6761\u76d8\u4e2d\u8d70\u5f3a\uff0c\u5e02\u573a\u5173\u6ce8\u540e\u7eed\u8ba2\u5355\u5151\u73b0\u4e0e\u4f30\u503c\u5207\u6362\u7a7a\u95f4\u3002",
"sectors": ["\u519b\u5de5"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/defense-orders",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u673a\u5668\u4eba\u677f\u5757\u51b2\u9ad8\u56de\u843d\uff0c\u77ed\u7ebf\u535a\u5f08\u60c5\u7eea\u5347\u6e29",
"summary": "\u673a\u5668\u4eba\u65b9\u5411\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5728\u9898\u6750\u6269\u6563\u4e0e\u5151\u73b0\u538b\u529b\u4e4b\u95f4\u53cd\u590d\u5207\u6362\u3002",
"sectors": ["\u673a\u5668\u4eba"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/robotics-intraday",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u5b58\u50a8\u82af\u7247\u62a5\u4ef7\u9884\u671f\u7ee7\u7eed\u4e0a\u4fee\uff0c\u4ea7\u4e1a\u94fe\u666f\u6c14\u5ea6\u53d7\u5173\u6ce8",
"summary": "\u5b58\u50a8\u73af\u8282\u4ef7\u683c\u4fee\u590d\u903b\u8f91\u5ef6\u7eed\uff0c\u5e02\u573a\u91cd\u65b0\u4ea4\u6613\u4f9b\u9700\u6539\u5584\u4e0e\u76c8\u5229\u5f39\u6027\u3002",
"sectors": ["\u5b58\u50a8\u82af\u7247"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/memory-price-up",
},
]
SAMPLE_INPUTS = {
1: {
"touzi-mingjian": ["https://mp.weixin.qq.com/s/semiconductor-capacity-and-chip-cycle"],
"aigujun-2020": ["https://mp.weixin.qq.com/s/storage-chip-price-repair"],
"mazhiming-shouping": ["https://mp.weixin.qq.com/s/market-close-sector-rotation"],
"laobai-guandian": ["https://mp.weixin.qq.com/s/robotics-and-energy-balance"],
},
}
def now_local() -> datetime:
return datetime.now(SHANGHAI)
def iso_timestamp(value: datetime | None = None) -> str:
return (value or now_local()).replace(microsecond=0).isoformat()
def ensure_local_timezone(value: datetime) -> datetime:
if value.tzinfo is None:
return value.replace(tzinfo=SHANGHAI)
return value.astimezone(SHANGHAI)
def normalize_whitespace(value: str) -> str:
return re.sub(r"\s+", " ", value).strip()
def extract_json_object(script_text: str, marker: str) -> str:
marker_index = script_text.find(marker)
if marker_index < 0:
raise RuntimeError(f"Marker not found: {marker}")
start = script_text.find("{", marker_index)
if start < 0:
raise RuntimeError(f"JSON object start not found for marker: {marker}")
depth = 0
in_string = False
escaped = False
for index in range(start, len(script_text)):
char = script_text[index]
if in_string:
if escaped:
escaped = False
elif char == "\\":
escaped = True
elif char == '"':
in_string = False
continue
if char == '"':
in_string = True
continue
if char == "{":
depth += 1
continue
if char == "}":
depth -= 1
if depth == 0:
return script_text[start : index + 1]
raise RuntimeError(f"JSON object end not found for marker: {marker}")
def parse_telegraph_timestamp(date_str: str, time_str: str) -> str:
normalized_time = time_str if len(time_str.split(":")) == 3 else f"{time_str}:00"
return datetime.fromisoformat(f"{date_str}T{normalized_time}").replace(tzinfo=SHANGHAI).isoformat(timespec="seconds")
def split_title_and_summary(content: str) -> tuple[str, str]:
cleaned = normalize_whitespace(content)
bracket_match = re.match(r"^[\[({\u3010\u3016](.+?)[\])}\u3011\u3017][\uff1a: ]*(.*)$", cleaned)
if bracket_match:
title = normalize_whitespace(bracket_match.group(1))
summary = normalize_whitespace(bracket_match.group(2) or cleaned)
return title[:80], summary or title
sentence_parts = re.split(r"[。;;!?]", cleaned, maxsplit=1)
title = sentence_parts[0][:80]
summary = cleaned if len(cleaned) <= 220 else f"{cleaned[:217]}..."
return title, summary
def build_fallback_cls_items(reference_time: datetime) -> list[ClsNewsItem]:
items: list[ClsNewsItem] = []
for index, template in enumerate(CLS_NEWS_TEMPLATES):
published_at = (reference_time - timedelta(minutes=index * 95 + 8)).replace(microsecond=0).isoformat()
items.append(
ClsNewsItem(
id=f"cls-{index + 1}",
title=template["title"],
published_at=published_at,
source="\u8d22\u8054\u793e" if index % 2 == 0 else "\u8d22\u8054\u793e AI Daily",
summary=template["summary"],
reference_url=template["reference_url"],
sectors=template["sectors"],
sentiment=template["sentiment"],
)
)
return sorted(items, key=lambda item: item.published_at, reverse=True)
def fetch_cls_telegraph_items(reference_time: datetime) -> list[ClsNewsItem]:
session = requests.Session()
session.trust_env = False
response = session.get(CLS_TELEGRAPH_URL, headers=HTTP_HEADERS, timeout=15)
response.raise_for_status()
response.encoding = "utf-8"
soup = BeautifulSoup(response.text, "html.parser")
next_data_script = None
for script in soup.find_all("script"):
script_text = script.string or script.get_text()
if "__NEXT_DATA__ =" in script_text:
next_data_script = script_text
break
if not next_data_script:
raise RuntimeError("Missing __NEXT_DATA__ payload on cls.cn")
next_data = json.loads(extract_json_object(next_data_script, "__NEXT_DATA__ ="))
roll_data = (
next_data.get("props", {})
.get("initialState", {})
.get("roll_data", [])
)
if not isinstance(roll_data, list) or not roll_data:
raise RuntimeError("Missing roll_data in cls.cn payload")
target_date = reference_time.date()
items: list[ClsNewsItem] = []
seen_ids: set[int] = set()
latest_limit = 80
for entry in roll_data:
if len(items) >= latest_limit:
break
item_id = int(entry.get("id") or 0)
if not item_id or item_id in seen_ids:
continue
seen_ids.add(item_id)
timestamp = int(entry.get("modified_time") or entry.get("ctime") or 0)
if not timestamp:
continue
published_dt = datetime.fromtimestamp(timestamp, tz=SHANGHAI)
if published_dt.date() != target_date:
continue
raw_content = normalize_whitespace(
entry.get("content")
or entry.get("brief")
or entry.get("title")
or ""
)
if len(raw_content) < 8:
continue
title = normalize_whitespace(entry.get("title") or "")
if not title:
title, _ = split_title_and_summary(raw_content)
summary = normalize_whitespace(entry.get("brief") or "")
if not summary:
_, summary = split_title_and_summary(raw_content)
source = normalize_whitespace(entry.get("author") or "\u8d22\u8054\u793e7x24")
reference_url = normalize_whitespace(entry.get("shareurl") or "")
if not reference_url:
reference_url = f"https://www.cls.cn/detail/{item_id}"
sectors = infer_sectors(f"{title} {summary}", "touzi-mingjian")
sentiment = infer_sentiment(f"{title} {summary}")
items.append(
ClsNewsItem(
id=f"cls-live-{item_id}",
title=title[:120],
published_at=published_dt.isoformat(timespec="seconds"),
source=source,
summary=summary[:500],
reference_url=reference_url,
sectors=sectors,
sentiment=sentiment,
)
)
if not items:
raise RuntimeError("No telegraph items parsed from cls.cn")
return sorted(items, key=lambda item: item.published_at, reverse=True)
def get_accounts() -> list[Account]:
records = fetch_accounts()
return records or ACCOUNTS
def normalize_date(value: str) -> str:
return datetime.fromisoformat(value).date().isoformat()
def blank_daily_input(date_str: str) -> DailyInputDocument:
return DailyInputDocument(
date=date_str,
updated_at=iso_timestamp(),
accounts=[
DailyInputAccount(account_id=account.id, account_name=account.name, links=[])
for account in get_accounts()
],
)
def clean_links(links: Iterable[str]) -> list[str]:
normalized: list[str] = []
seen: set[str] = set()
for raw_link in links:
link = raw_link.strip()
if not link or link in seen:
continue
seen.add(link)
normalized.append(link)
return normalized
def normalize_daily_input(date_str: str, payload: DailyInputUpsertPayload) -> DailyInputDocument:
payload_map = {item.account_id: clean_links(item.links) for item in payload.accounts}
return DailyInputDocument(
date=date_str,
updated_at=iso_timestamp(),
accounts=[
DailyInputAccount(
account_id=account.id,
account_name=account.name,
links=payload_map.get(account.id, []),
)
for account in get_accounts()
],
)
def load_daily_input(date_str: str) -> DailyInputDocument:
payload = fetch_daily_input_document(date_str)
if payload is None:
return blank_daily_input(date_str)
return payload
def save_daily_input(document: DailyInputDocument) -> DailyInputDocument:
return save_daily_input_document(document)
def load_report(date_str: str) -> ReportDocument | None:
return fetch_report_document(date_str)
def save_report(document: ReportDocument) -> ReportDocument:
return save_report_document(document)
def list_reports() -> list[ReportListItem]:
return fetch_report_list()
def title_from_link(account_name: str, url: str, index: int) -> str:
text = unquote(urlparse(url).path or url)
tokens = [
token
for token in re.split(r"[\W_]+", text.lower())
if token and token not in {"s", "mp", "weixin", "qq", "com"}
]
meaningful = [token for token in tokens if len(token) > 1]
if meaningful:
topic = " / ".join(token.upper() if len(token) <= 3 else token.capitalize() for token in meaningful[:3])
return f"{account_name}\uff1a{topic} \u89c2\u5bdf"
return f"{account_name}\uff1a\u5e02\u573a\u8ddf\u8e2a\u7b2c {index + 1} \u6761"
def infer_sectors(text: str, account_id: str) -> list[str]:
lowered = text.lower()
sectors = [
sector
for sector, keywords in SECTOR_KEYWORDS.items()
if any(keyword.lower() in lowered for keyword in keywords)
]
if sectors:
return sectors[:3]
return ACCOUNT_FOCUS.get(account_id, ["AI", "\u7b97\u529b"])[:2]
def infer_sentiment(text: str) -> str:
lowered = text.lower()
positive = sum(keyword.lower() in lowered for keyword in POSITIVE_KEYWORDS)
negative = sum(keyword.lower() in lowered for keyword in NEGATIVE_KEYWORDS)
if positive > negative:
return SENTIMENT_BULL
if negative > positive:
return SENTIMENT_BEAR
return SENTIMENT_NEUTRAL
def infer_article_type(title: str) -> str:
lowered = title.lower()
for keyword, article_type in ARTICLE_TYPE_PATTERNS:
if keyword.lower() in lowered:
return article_type
return "\u4e3b\u9898\u89c2\u70b9"
def build_article_summary(title: str, sectors: list[str], sentiment: str) -> str:
sector_text = "\u3001".join(sectors[:2]) if sectors else "\u6838\u5fc3\u4e3b\u7ebf"
sentiment_text = {
SENTIMENT_BULL: "\u504f\u79ef\u6781\u7684\u8282\u594f\u5224\u65ad",
SENTIMENT_BEAR: "\u660e\u663e\u504f\u8c28\u614e\u7684\u98ce\u9669\u63d0\u9192",
SENTIMENT_NEUTRAL: "\u66f4\u5f3a\u8c03\u7ed3\u6784\u5206\u5316\u4e0e\u7b49\u5f85\u786e\u8ba4",
}[sentiment]
return f"{title} \u56f4\u7ed5 {sector_text} \u5c55\u5f00\uff0c\u7ed9\u51fa\u7684\u7ed3\u8bba\u662f{sentiment_text}\uff0c\u9002\u5408\u4f5c\u4e3a\u5f53\u65e5\u76d8\u9762\u8ddf\u8e2a\u4e0e\u590d\u76d8\u53c2\u8003\u3002"
def generate_report(date_str: str, input_document: DailyInputDocument) -> ReportDocument:
base_date = datetime.fromisoformat(date_str)
articles: list[OpinionArticle] = []
for account_index, account in enumerate(input_document.accounts):
for link_index, url in enumerate(account.links):
title = title_from_link(account.account_name, url, link_index)
sectors = infer_sectors(f"{title} {url}", account.account_id)
sentiment = infer_sentiment(f"{title} {url}")
published_at = (
base_date.replace(hour=9 + ((account_index + link_index) % 8), minute=(link_index * 12) % 60)
.replace(tzinfo=SHANGHAI)
.isoformat(timespec="seconds")
)
articles.append(
OpinionArticle(
id=f"{date_str}-{account.account_id}-{link_index}",
account_id=account.account_id,
account_name=account.account_name,
title=title,
published_at=published_at,
summary=build_article_summary(title, sectors, sentiment),
source_url=url,
sectors=sectors,
sentiment=sentiment,
article_type=infer_article_type(title),
)
)
if not articles:
return ReportDocument(
date=date_str,
generated_at=iso_timestamp(),
summary="\u5f53\u65e5\u5c1a\u672a\u5f55\u5165\u6587\u7ae0\u94fe\u63a5\uff0c\u7cfb\u7edf\u5df2\u4fdd\u7559\u65e5\u62a5\u7ed3\u6784\uff0c\u7b49\u5f85\u8865\u5145\u516c\u4f17\u53f7\u6587\u7ae0\u540e\u518d\u751f\u6210\u5b8c\u6574\u7ed3\u8bba\u3002",
focus_sectors=[],
article_count=0,
account_count=0,
articles=[],
)
sector_counter = Counter(sector for article in articles for sector in article.sectors)
focus_sectors = [sector for sector, _count in sector_counter.most_common(4)]
sentiment_counter = Counter(article.sentiment for article in articles)
if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
tone = "\u6574\u4f53\u504f\u79ef\u6781\uff0c\u4e3b\u7ebf\u8ba8\u8bba\u96c6\u4e2d\u5ea6\u8f83\u9ad8"
elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
tone = "\u6574\u4f53\u504f\u8c28\u614e\uff0c\u98ce\u9669\u63a7\u5236\u4ecd\u662f\u4e3b\u53d9\u4e8b"
else:
tone = "\u591a\u7a7a\u5206\u6b67\u5e76\u5b58\uff0c\u5e02\u573a\u66f4\u770b\u91cd\u9a8c\u8bc1\u4e0e\u8282\u594f"
active_accounts = len([account for account in input_document.accounts if account.links])
sector_text = "\u3001".join(focus_sectors) if focus_sectors else "\u6682\u65e0\u805a\u7126\u677f\u5757"
summary = (
f"{date_str} \u5171\u6574\u7406 {len(articles)} \u7bc7\u516c\u4f17\u53f7\u89c2\u70b9\uff0c\u8986\u76d6 {active_accounts} \u4e2a\u8d26\u6237\u3002"
f"{tone}\uff0c\u8ba8\u8bba\u91cd\u70b9\u843d\u5728 {sector_text}\u3002"
)
return ReportDocument(
date=date_str,
generated_at=iso_timestamp(),
summary=summary,
focus_sectors=focus_sectors,
article_count=len(articles),
account_count=active_accounts,
articles=sorted(articles, key=lambda item: item.published_at, reverse=True),
)
def build_cls_news_document(
reference_time: datetime | None = None,
*,
allow_live_fetch: bool = True,
) -> ClsNewsDocument:
current = reference_time or now_local()
try:
if allow_live_fetch:
items = fetch_cls_telegraph_items(current)
else:
raise RuntimeError("Live fetch disabled for non-current date")
except Exception:
items = build_fallback_cls_items(current)
sector_counter = Counter(sector for item in items for sector in item.sectors)
watch_list = [sector for sector, _count in sector_counter.most_common(5)]
overview = (
"\u8d44\u8baf\u5217\u8868\u5c55\u793a\u6240\u9009\u65e5\u671f\u5185\u7684\u8d22\u8054\u793e 7x24 \u8d44\u8baf\uff0c"
"\u5f53\u65e5\u6570\u636e\u6765\u81ea cls.cn \u5b9e\u65f6\u6293\u53d6\uff0c\u6bcf 3 \u5206\u949f\u66f4\u65b0\u4e00\u6b21\u3002"
)
hot_topics = (
"\u70ed\u70b9\u6982\u89c8\u53ea\u4fdd\u7559\u5bf9\u677f\u5757\u5b58\u5728\u660e\u663e\u5f71\u54cd\u7684\u65b9\u5411\uff0c"
f"\u5f53\u524d\u4e3b\u8981\u96c6\u4e2d\u5728 {'\u3001'.join(watch_list[:3])}\u3002"
)
sector_impacts: list[ClsSectorImpact] = []
seen_sectors: set[str] = set()
for sector in watch_list[:4]:
if sector in seen_sectors:
continue
seen_sectors.add(sector)
related_items = [item for item in items if sector in item.sectors]
if not related_items:
continue
sentiment_counter = Counter(item.sentiment for item in related_items)
if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
sentiment = SENTIMENT_BULL
reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u50ac\u5316\u6216\u666f\u6c14\u5f3a\u5316\uff0c\u77ed\u7ebf\u504f\u6b63\u5411\u5f71\u54cd\u3002"
elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
sentiment = SENTIMENT_BEAR
reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u5151\u73b0\u6216\u5206\u6b67\uff0c\u77ed\u7ebf\u504f\u8d1f\u5411\u5f71\u54cd\u3002"
else:
sentiment = SENTIMENT_NEUTRAL
reason = f"{sector} \u65b9\u5411\u6709\u8ba8\u8bba\u4f46\u4ecd\u9700\u9a8c\u8bc1\uff0c\u77ed\u7ebf\u4ee5\u4e2d\u6027\u89c2\u5bdf\u4e3a\u4e3b\u3002"
sector_impacts.append(
ClsSectorImpact(
sector=sector,
sentiment=sentiment,
reason=reason,
related_titles=list(dict.fromkeys(item.title for item in related_items[:2])),
)
)
return ClsNewsDocument(
date=current.date().isoformat(),
updated_at=iso_timestamp(current),
window_label="\u5f53\u5929\u8d44\u8baf",
summary=ClsNewsSummary(
overview=overview,
hot_topics=hot_topics,
watch_list=watch_list,
),
sector_impacts=sector_impacts,
items=items,
)
def load_cls_news(date_str: str) -> ClsNewsDocument | None:
return fetch_cls_news_document(date_str)
def build_reference_time(date_str: str) -> datetime:
date_value = datetime.fromisoformat(date_str).date()
if date_value == now_local().date():
return now_local()
return datetime.combine(date_value, time(hour=15, minute=0), tzinfo=SHANGHAI)
def refresh_cls_news(date_str: str | None = None) -> ClsNewsDocument:
normalized_date = normalize_date(date_str or now_local().date().isoformat())
existing = load_cls_news(normalized_date)
reference_time = build_reference_time(normalized_date)
allow_live_fetch = normalized_date == now_local().date().isoformat()
try:
document = build_cls_news_document(reference_time, allow_live_fetch=allow_live_fetch)
except Exception:
if existing is not None:
return existing
raise
return save_cls_news_document(document)
def get_cls_news(date_str: str | None = None) -> ClsNewsDocument:
normalized_date = normalize_date(date_str or now_local().date().isoformat())
document = load_cls_news(normalized_date)
if document is None:
return refresh_cls_news(normalized_date)
if normalized_date != now_local().date().isoformat():
return document
updated_at = ensure_local_timezone(datetime.fromisoformat(document.updated_at))
if now_local() - updated_at >= CLS_REFRESH_INTERVAL:
return refresh_cls_news(normalized_date)
return document
def seed_demo_content() -> None:
save_accounts(ACCOUNTS)
today = now_local().date()
for offset, account_links in SAMPLE_INPUTS.items():
date_str = (today - timedelta(days=offset)).isoformat()
if fetch_daily_input_document(date_str) is not None and fetch_report_document(date_str) is not None:
continue
payload = DailyInputUpsertPayload(
accounts=[
{"account_id": account.id, "links": account_links.get(account.id, [])}
for account in ACCOUNTS
]
)
input_document = normalize_daily_input(date_str, payload)
save_daily_input_document(input_document)
save_report_document(generate_report(date_str, input_document))
today_str = today.isoformat()
if fetch_cls_news_document(today_str) is None:
save_cls_news_document(build_cls_news_document())