from __future__ import annotations

import json
import re
from collections import Counter
from datetime import datetime, time, timedelta
from typing import Iterable
from urllib.parse import unquote, urlparse
from zoneinfo import ZoneInfo

import requests
from bs4 import BeautifulSoup

from app.models import (
    Account,
    ClsNewsDocument,
    ClsNewsItem,
    ClsNewsSummary,
    ClsSectorImpact,
    DailyInputAccount,
    DailyInputDocument,
    DailyInputUpsertPayload,
    OpinionArticle,
    ReportDocument,
    ReportListItem,
)
from app.services.storage import (
    fetch_accounts,
    fetch_cls_news_document,
    fetch_daily_input_document,
    fetch_report_document,
    fetch_report_list,
    save_accounts,
    save_cls_news_document,
    save_daily_input_document,
    save_report_document,
)

SHANGHAI = ZoneInfo("Asia/Shanghai")
CLS_REFRESH_INTERVAL = timedelta(minutes=3)
CLS_TELEGRAPH_URL = "https://m.cls.cn/telegraph"
HTTP_HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
    ),
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
}

SENTIMENT_BULL = "\u770b\u591a"
SENTIMENT_BEAR = "\u770b\u7a7a"
SENTIMENT_NEUTRAL = "\u4e2d\u6027"

ACCOUNTS: list[Account] = [
    Account(
        id="touzi-mingjian",
        name="\u6295\u8d44\u660e\u89c1",
        description="\u504f\u4e3b\u9898\u8f6e\u52a8\u4e0e\u4e3b\u7ebf\u5224\u65ad\uff0c\u9002\u5408\u8ddf\u8e2a\u5e02\u573a\u504f\u597d\u53d8\u5316\u3002",
    ),
    Account(
        id="aigujun-2020",
        name="\u7231\u80a1\u541b2020",
        description="\u5173\u6ce8\u60c5\u7eea\u3001\u70ed\u70b9\u6269\u6563\u4e0e\u4ea4\u6613\u7ec6\u8282\u3002",
    ),
    Account(
        id="mazhiming-shouping",
        name="\u9a6c\u5fd7\u660e\u6536\u8bc4",
        description="\u65e5\u5185\u6536\u8bc4\u4e0e\u60c5\u7eea\u53d8\u5316\u603b\u7ed3\u3002",
    ),
    Account(
        id="laobai-guandian",
        name="\u8001\u767d\u5206\u6790\u5ba4\u89c2\u70b9",
        description="\u504f\u7b56\u7565\u62c6\u89e3\u548c\u5173\u952e\u677f\u5757\u8ddf\u8e2a\u3002",
    ),
]

ACCOUNT_FOCUS = {
    "touzi-mingjian": ["AI", "\u7b97\u529b", "\u673a\u5668\u4eba"],
    "aigujun-2020": ["CPO", "\u5b58\u50a8\u82af\u7247", "\u65b0\u80fd\u6e90"],
    "mazhiming-shouping": ["AI", "\u5238\u5546", "\u6c7d\u8f66"],
    "laobai-guandian": ["\u673a\u5668\u4eba", "\u534a\u5bfc\u4f53", "\u65b0\u80fd\u6e90"],
}

SECTOR_KEYWORDS = {
    "AI": ["ai", "\u4eba\u5de5\u667a\u80fd", "\u5927\u6a21\u578b", "\u6a21\u578b"],
    "\u7b97\u529b": ["\u7b97\u529b", "compute", "server", "gpu"],
    "CPO": ["cpo", "\u5149\u6a21\u5757", "\u9ad8\u901f\u4e92\u8054"],
    "\u5b58\u50a8\u82af\u7247": ["\u5b58\u50a8", "memory", "dram", "nand"],
    "\u534a\u5bfc\u4f53": ["\u534a\u5bfc\u4f53", "chip", "wafer", "\u6676\u5706"],
    "\u5238\u5546": ["\u5238\u5546", "broker", "\u8bc1\u5238"],
    "\u77f3\u6cb9\u5929\u7136\u6c14": ["\u77f3\u6cb9", "\u5929\u7136\u6c14", "\u6cb9\u6c14", "\u80fd\u6e90\u4ef7\u683c"],
    "\u65b0\u80fd\u6e90": ["\u65b0\u80fd\u6e90", "\u9502\u7535", "\u5149\u4f0f", "\u50a8\u80fd"],
    "\u519b\u5de5": ["\u519b\u5de5", "\u536b\u661f", "\u822a\u5929"],
    "\u673a\u5668\u4eba": ["\u673a\u5668\u4eba", "robot", "\u81ea\u52a8\u5316"],
    "\u6c7d\u8f66": ["\u6c7d\u8f66", "\u8f66\u4f01", "\u667a\u9a7e", "\u6574\u8f66"],
    "\u533b\u836f": ["\u533b\u836f", "\u521b\u65b0\u836f", "\u533b\u7597"],
}

POSITIVE_KEYWORDS = [
    "\u673a\u4f1a",
    "\u4fee\u590d",
    "\u589e\u5f3a",
    "\u4e3b\u7ebf",
    "\u589e\u91cf",
    "\u53cd\u5f39",
    "\u7a81\u7834",
    "\u79ef\u6781",
    "up",
    "bull",
]
NEGATIVE_KEYWORDS = [
    "\u98ce\u9669",
    "\u627f\u538b",
    "\u8c28\u614e",
    "\u56de\u8c03",
    "\u7f29\u91cf",
    "\u89c2\u671b",
    "\u5206\u6b67",
    "bear",
    "down",
]

ARTICLE_TYPE_PATTERNS = [
    ("\u6536\u8bc4", "\u5e02\u573a\u6536\u8bc4"),
    ("\u5348", "\u76d8\u4e2d\u89c2\u5bdf"),
    ("\u7b56\u7565", "\u7b56\u7565\u8ddf\u8e2a"),
    ("\u590d\u76d8", "\u76d8\u9762\u590d\u76d8"),
    ("\u884c\u4e1a", "\u884c\u4e1a\u89c2\u5bdf"),
]

CLS_NEWS_TEMPLATES = [
    {
        "title": "\u8d22\u8054\u793e\u76d8\u524d\u7cbe\u9009\uff1a\u7b97\u529b\u94fe\u56de\u6696\uff0c\u8d44\u91d1\u91cd\u65b0\u805a\u7126\u9ad8\u666f\u6c14\u65b9\u5411",
        "summary": "\u9694\u591c\u5e02\u573a\u98ce\u9669\u504f\u597d\u56de\u5347\uff0c\u7b97\u529b\u4e0e\u670d\u52a1\u5668\u94fe\u6761\u83b7\u8d44\u91d1\u91cd\u65b0\u914d\u7f6e\uff0c\u60c5\u7eea\u4fee\u590d\u5148\u4e8e\u6210\u4ea4\u5168\u9762\u653e\u5927\u3002",
        "sectors": ["\u7b97\u529b", "AI"],
        "sentiment": SENTIMENT_BULL,
        "reference_url": "https://www.cls.cn/detail/compute-rebound",
    },
    {
        "title": "AI Daily\uff1aCPO \u4e0e\u5b58\u50a8\u82af\u7247\u540c\u6b65\u8d70\u5f3a\uff0c\u666f\u6c14\u5ea6\u7ebf\u7d22\u5ef6\u7eed",
        "summary": "\u9ad8\u901f\u4e92\u8054\u4e0e\u5b58\u50a8\u62a5\u4ef7\u9884\u671f\u652f\u6491\u677f\u5757\u8868\u73b0\uff0c\u8d44\u91d1\u66f4\u503e\u5411\u4e8e\u56f4\u7ed5\u786e\u5b9a\u6027\u73af\u8282\u96c6\u4e2d\u3002",
        "sectors": ["CPO", "\u5b58\u50a8\u82af\u7247"],
        "sentiment": SENTIMENT_BULL,
        "reference_url": "https://www.cls.cn/detail/ai-daily-cpo-memory",
    },
    {
        "title": "\u8d22\u8054\u793e\u884c\u4e1a\u89c2\u5bdf\uff1a\u673a\u5668\u4eba\u94fe\u6761\u5206\u5316\uff0c\u8ba2\u5355\u5151\u73b0\u6210\u4e3a\u77ed\u671f\u7126\u70b9",
        "summary": "\u673a\u5668\u4eba\u65b9\u5411\u5185\u90e8\u5f00\u59cb\u51fa\u73b0\u5151\u73b0\u4e0e\u6362\u624b\uff0c\u5e02\u573a\u4ece\u6982\u5ff5\u6269\u6563\u8f6c\u5411\u4e1a\u7ee9\u4e0e\u8ba2\u5355\u9a8c\u8bc1\u3002",
        "sectors": ["\u673a\u5668\u4eba"],
        "sentiment": SENTIMENT_NEUTRAL,
        "reference_url": "https://www.cls.cn/detail/robotics-orders",
    },
    {
        "title": "\u8d22\u8054\u793e\u80fd\u6e90\u8ffd\u8e2a\uff1a\u6cb9\u6c14\u677f\u5757\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5207\u5411\u9632\u5fa1\u54c1\u79cd",
        "summary": "\u539f\u6cb9\u4ef7\u683c\u7ef4\u6301\u9ad8\u4f4d\u540e\uff0c\u6cb9\u6c14\u65b9\u5411\u51fa\u73b0\u9ad8\u4f4d\u9707\u8361\uff0c\u90e8\u5206\u8d44\u91d1\u8f6c\u5411\u533b\u836f\u7b49\u9632\u5b88\u677f\u5757\u3002",
        "sectors": ["\u77f3\u6cb9\u5929\u7136\u6c14", "\u533b\u836f"],
        "sentiment": SENTIMENT_NEUTRAL,
        "reference_url": "https://www.cls.cn/detail/energy-rotation",
    },
    {
        "title": "AI Daily\uff1a\u6c7d\u8f66\u4e0e\u667a\u9a7e\u5ef6\u7eed\u5206\u6b67\uff0c\u4e3b\u7ebf\u4ecd\u9700\u7b49\u5f85\u9500\u91cf\u6570\u636e\u9a8c\u8bc1",
        "summary": "\u6574\u8f66\u4e0e\u667a\u9a7e\u65b9\u5411\u5173\u6ce8\u5ea6\u4ecd\u9ad8\uff0c\u4f46\u5e02\u573a\u5bf9\u4f30\u503c\u6269\u5f20\u5df2\u6709\u4fdd\u7559\uff0c\u7b49\u5f85\u9500\u91cf\u548c\u8ba2\u5355\u6570\u636e\u786e\u8ba4\u3002",
        "sectors": ["\u6c7d\u8f66"],
        "sentiment": SENTIMENT_BEAR,
        "reference_url": "https://www.cls.cn/detail/auto-data-watch",
    },
    {
        "title": "\u8d22\u8054\u793e7x24\uff1a\u534a\u5bfc\u4f53\u8bbe\u5907\u65b9\u5411\u8d70\u5f3a\uff0c\u673a\u6784\u79f0\u56fd\u4ea7\u66ff\u4ee3\u8282\u594f\u63d0\u901f",
        "summary": "\u6676\u5706\u5236\u9020\u4e0e\u8bbe\u5907\u94fe\u6761\u51fa\u73b0\u5f02\u52a8\uff0c\u5e02\u573a\u56f4\u7ed5\u56fd\u4ea7\u66ff\u4ee3\u548c\u8d44\u672c\u5f00\u652f\u6062\u590d\u91cd\u65b0\u5b9a\u4ef7\u3002",
        "sectors": ["\u534a\u5bfc\u4f53"],
        "sentiment": SENTIMENT_BULL,
        "reference_url": "https://www.cls.cn/detail/semi-equipment-up",
    },
    {
        "title": "\u8d22\u8054\u793e7x24\uff1a\u5238\u5546\u677f\u5757\u5348\u540e\u62c9\u5347\uff0c\u5e02\u573a\u60c5\u7eea\u6709\u6240\u4fee\u590d",
        "summary": "\u6307\u6570\u9707\u8361\u8fc7\u7a0b\u4e2d\u5238\u5546\u627f\u62c5\u60c5\u7eea\u4fee\u590d\u529f\u80fd\uff0c\u5e26\u52a8\u90e8\u5206\u9ad8\u5f39\u6027\u65b9\u5411\u56de\u6696\u3002",
        "sectors": ["\u5238\u5546"],
        "sentiment": SENTIMENT_BULL,
        "reference_url": "https://www.cls.cn/detail/broker-rebound",
    },
    {
        "title": "\u8d22\u8054\u793e7x24\uff1a\u521b\u65b0\u836f\u65b9\u5411\u6301\u7eed\u6d3b\u8dc3\uff0c\u8d44\u91d1\u8f6c\u5411\u9632\u5b88\u4e0e\u6210\u957f\u517c\u987e",
        "summary": "\u533b\u836f\u677f\u5757\u83b7\u5f97\u589e\u91cf\u8d44\u91d1\u5173\u6ce8\uff0c\u521b\u65b0\u836f\u548c\u5668\u68b0\u7ec6\u5206\u8868\u73b0\u66f4\u5f3a\u3002",
        "sectors": ["\u533b\u836f"],
        "sentiment": SENTIMENT_NEUTRAL,
        "reference_url": "https://www.cls.cn/detail/medical-active",
    },
    {
        "title": "\u8d22\u8054\u793e7x24\uff1a\u65b0\u80fd\u6e90\u94fe\u6761\u5206\u5316\u52a0\u5267\uff0c\u673a\u6784\u63d0\u9192\u5173\u6ce8\u4ea7\u80fd\u51fa\u6e05\u8282\u594f",
        "summary": "\u65b0\u80fd\u6e90\u677f\u5757\u5185\u90e8\u8f6e\u52a8\u660e\u663e\uff0c\u8d44\u91d1\u66f4\u504f\u5411\u4f4e\u4f4d\u73af\u8282\u548c\u6210\u672c\u6539\u5584\u65b9\u5411\u3002",
        "sectors": ["\u65b0\u80fd\u6e90"],
        "sentiment": SENTIMENT_NEUTRAL,
        "reference_url": "https://www.cls.cn/detail/new-energy-split",
    },
    {
        "title": "\u8d22\u8054\u793e7x24\uff1a\u519b\u5de5\u677f\u5757\u76d8\u4e2d\u5f02\u52a8\uff0c\u8ba2\u5355\u5151\u73b0\u9884\u671f\u91cd\u65b0\u5347\u6e29",
        "summary": "\u519b\u5de5\u94fe\u6761\u76d8\u4e2d\u8d70\u5f3a\uff0c\u5e02\u573a\u5173\u6ce8\u540e\u7eed\u8ba2\u5355\u5151\u73b0\u4e0e\u4f30\u503c\u5207\u6362\u7a7a\u95f4\u3002",
        "sectors": ["\u519b\u5de5"],
        "sentiment": SENTIMENT_BULL,
        "reference_url": "https://www.cls.cn/detail/defense-orders",
    },
    {
        "title": "\u8d22\u8054\u793e7x24\uff1a\u673a\u5668\u4eba\u677f\u5757\u51b2\u9ad8\u56de\u843d\uff0c\u77ed\u7ebf\u535a\u5f08\u60c5\u7eea\u5347\u6e29",
        "summary": "\u673a\u5668\u4eba\u65b9\u5411\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5728\u9898\u6750\u6269\u6563\u4e0e\u5151\u73b0\u538b\u529b\u4e4b\u95f4\u53cd\u590d\u5207\u6362\u3002",
        "sectors": ["\u673a\u5668\u4eba"],
        "sentiment": SENTIMENT_NEUTRAL,
        "reference_url": "https://www.cls.cn/detail/robotics-intraday",
    },
    {
        "title": "\u8d22\u8054\u793e7x24\uff1a\u5b58\u50a8\u82af\u7247\u62a5\u4ef7\u9884\u671f\u7ee7\u7eed\u4e0a\u4fee\uff0c\u4ea7\u4e1a\u94fe\u666f\u6c14\u5ea6\u53d7\u5173\u6ce8",
        "summary": "\u5b58\u50a8\u73af\u8282\u4ef7\u683c\u4fee\u590d\u903b\u8f91\u5ef6\u7eed\uff0c\u5e02\u573a\u91cd\u65b0\u4ea4\u6613\u4f9b\u9700\u6539\u5584\u4e0e\u76c8\u5229\u5f39\u6027\u3002",
        "sectors": ["\u5b58\u50a8\u82af\u7247"],
        "sentiment": SENTIMENT_BULL,
        "reference_url": "https://www.cls.cn/detail/memory-price-up",
    },
]

SAMPLE_INPUTS = {
    1: {
        "touzi-mingjian": ["https://mp.weixin.qq.com/s/semiconductor-capacity-and-chip-cycle"],
        "aigujun-2020": ["https://mp.weixin.qq.com/s/storage-chip-price-repair"],
        "mazhiming-shouping": ["https://mp.weixin.qq.com/s/market-close-sector-rotation"],
        "laobai-guandian": ["https://mp.weixin.qq.com/s/robotics-and-energy-balance"],
    },
}


def now_local() -> datetime:
    return datetime.now(SHANGHAI)


def iso_timestamp(value: datetime | None = None) -> str:
    return (value or now_local()).replace(microsecond=0).isoformat()


def ensure_local_timezone(value: datetime) -> datetime:
    if value.tzinfo is None:
        return value.replace(tzinfo=SHANGHAI)
    return value.astimezone(SHANGHAI)


def normalize_whitespace(value: str) -> str:
    return re.sub(r"\s+", " ", value).strip()


def extract_json_object(script_text: str, marker: str) -> str:
    marker_index = script_text.find(marker)
    if marker_index < 0:
        raise RuntimeError(f"Marker not found: {marker}")

    start = script_text.find("{", marker_index)
    if start < 0:
        raise RuntimeError(f"JSON object start not found for marker: {marker}")

    depth = 0
    in_string = False
    escaped = False
    for index in range(start, len(script_text)):
        char = script_text[index]
        if in_string:
            if escaped:
                escaped = False
            elif char == "\\":
                escaped = True
            elif char == '"':
                in_string = False
            continue

        if char == '"':
            in_string = True
            continue
        if char == "{":
            depth += 1
            continue
        if char == "}":
            depth -= 1
            if depth == 0:
                return script_text[start : index + 1]

    raise RuntimeError(f"JSON object end not found for marker: {marker}")


def parse_telegraph_timestamp(date_str: str, time_str: str) -> str:
    normalized_time = time_str if len(time_str.split(":")) == 3 else f"{time_str}:00"
    return datetime.fromisoformat(f"{date_str}T{normalized_time}").replace(tzinfo=SHANGHAI).isoformat(timespec="seconds")


def split_title_and_summary(content: str) -> tuple[str, str]:
    cleaned = normalize_whitespace(content)
    bracket_match = re.match(r"^[\[({\u3010\u3016](.+?)[\])}\u3011\u3017][\uff1a: ]*(.*)$", cleaned)
    if bracket_match:
        title = normalize_whitespace(bracket_match.group(1))
        summary = normalize_whitespace(bracket_match.group(2) or cleaned)
        return title[:80], summary or title

    sentence_parts = re.split(r"[。；;!?！？]", cleaned, maxsplit=1)
    title = sentence_parts[0][:80]
    summary = cleaned if len(cleaned) <= 220 else f"{cleaned[:217]}..."
    return title, summary


def build_fallback_cls_items(reference_time: datetime) -> list[ClsNewsItem]:
    items: list[ClsNewsItem] = []
    for index, template in enumerate(CLS_NEWS_TEMPLATES):
        published_at = (reference_time - timedelta(minutes=index * 95 + 8)).replace(microsecond=0).isoformat()
        items.append(
            ClsNewsItem(
                id=f"cls-{index + 1}",
                title=template["title"],
                published_at=published_at,
                source="\u8d22\u8054\u793e" if index % 2 == 0 else "\u8d22\u8054\u793e AI Daily",
                summary=template["summary"],
                reference_url=template["reference_url"],
                sectors=template["sectors"],
                sentiment=template["sentiment"],
            )
        )
    return sorted(items, key=lambda item: item.published_at, reverse=True)


def fetch_cls_telegraph_items(reference_time: datetime) -> list[ClsNewsItem]:
    session = requests.Session()
    session.trust_env = False
    response = session.get(CLS_TELEGRAPH_URL, headers=HTTP_HEADERS, timeout=15)
    response.raise_for_status()
    response.encoding = "utf-8"

    soup = BeautifulSoup(response.text, "html.parser")
    next_data_script = None
    for script in soup.find_all("script"):
        script_text = script.string or script.get_text()
        if "__NEXT_DATA__ =" in script_text:
            next_data_script = script_text
            break
    if not next_data_script:
        raise RuntimeError("Missing __NEXT_DATA__ payload on cls.cn")

    next_data = json.loads(extract_json_object(next_data_script, "__NEXT_DATA__ ="))
    roll_data = (
        next_data.get("props", {})
        .get("initialState", {})
        .get("roll_data", [])
    )
    if not isinstance(roll_data, list) or not roll_data:
        raise RuntimeError("Missing roll_data in cls.cn payload")

    target_date = reference_time.date()
    items: list[ClsNewsItem] = []
    seen_ids: set[int] = set()
    latest_limit = 80
    for entry in roll_data:
        if len(items) >= latest_limit:
            break

        item_id = int(entry.get("id") or 0)
        if not item_id or item_id in seen_ids:
            continue
        seen_ids.add(item_id)

        timestamp = int(entry.get("modified_time") or entry.get("ctime") or 0)
        if not timestamp:
            continue
        published_dt = datetime.fromtimestamp(timestamp, tz=SHANGHAI)
        if published_dt.date() != target_date:
            continue

        raw_content = normalize_whitespace(
            entry.get("content")
            or entry.get("brief")
            or entry.get("title")
            or ""
        )
        if len(raw_content) < 8:
            continue

        title = normalize_whitespace(entry.get("title") or "")
        if not title:
            title, _ = split_title_and_summary(raw_content)

        summary = normalize_whitespace(entry.get("brief") or "")
        if not summary:
            _, summary = split_title_and_summary(raw_content)

        source = normalize_whitespace(entry.get("author") or "\u8d22\u8054\u793e7x24")
        reference_url = normalize_whitespace(entry.get("shareurl") or "")
        if not reference_url:
            reference_url = f"https://www.cls.cn/detail/{item_id}"

        sectors = infer_sectors(f"{title} {summary}", "touzi-mingjian")
        sentiment = infer_sentiment(f"{title} {summary}")
        items.append(
            ClsNewsItem(
                id=f"cls-live-{item_id}",
                title=title[:120],
                published_at=published_dt.isoformat(timespec="seconds"),
                source=source,
                summary=summary[:500],
                reference_url=reference_url,
                sectors=sectors,
                sentiment=sentiment,
            )
        )

    if not items:
        raise RuntimeError("No telegraph items parsed from cls.cn")

    return sorted(items, key=lambda item: item.published_at, reverse=True)


def get_accounts() -> list[Account]:
    records = fetch_accounts()
    return records or ACCOUNTS


def normalize_date(value: str) -> str:
    return datetime.fromisoformat(value).date().isoformat()


def blank_daily_input(date_str: str) -> DailyInputDocument:
    return DailyInputDocument(
        date=date_str,
        updated_at=iso_timestamp(),
        accounts=[
            DailyInputAccount(account_id=account.id, account_name=account.name, links=[])
            for account in get_accounts()
        ],
    )


def clean_links(links: Iterable[str]) -> list[str]:
    normalized: list[str] = []
    seen: set[str] = set()
    for raw_link in links:
        link = raw_link.strip()
        if not link or link in seen:
            continue
        seen.add(link)
        normalized.append(link)
    return normalized


def normalize_daily_input(date_str: str, payload: DailyInputUpsertPayload) -> DailyInputDocument:
    payload_map = {item.account_id: clean_links(item.links) for item in payload.accounts}
    return DailyInputDocument(
        date=date_str,
        updated_at=iso_timestamp(),
        accounts=[
            DailyInputAccount(
                account_id=account.id,
                account_name=account.name,
                links=payload_map.get(account.id, []),
            )
            for account in get_accounts()
        ],
    )


def load_daily_input(date_str: str) -> DailyInputDocument:
    payload = fetch_daily_input_document(date_str)
    if payload is None:
        return blank_daily_input(date_str)
    return payload


def save_daily_input(document: DailyInputDocument) -> DailyInputDocument:
    return save_daily_input_document(document)


def load_report(date_str: str) -> ReportDocument | None:
    return fetch_report_document(date_str)


def save_report(document: ReportDocument) -> ReportDocument:
    return save_report_document(document)


def list_reports() -> list[ReportListItem]:
    return fetch_report_list()


def title_from_link(account_name: str, url: str, index: int) -> str:
    text = unquote(urlparse(url).path or url)
    tokens = [
        token
        for token in re.split(r"[\W_]+", text.lower())
        if token and token not in {"s", "mp", "weixin", "qq", "com"}
    ]
    meaningful = [token for token in tokens if len(token) > 1]
    if meaningful:
        topic = " / ".join(token.upper() if len(token) <= 3 else token.capitalize() for token in meaningful[:3])
        return f"{account_name}\uff1a{topic} \u89c2\u5bdf"
    return f"{account_name}\uff1a\u5e02\u573a\u8ddf\u8e2a\u7b2c {index + 1} \u6761"


def infer_sectors(text: str, account_id: str) -> list[str]:
    lowered = text.lower()
    sectors = [
        sector
        for sector, keywords in SECTOR_KEYWORDS.items()
        if any(keyword.lower() in lowered for keyword in keywords)
    ]
    if sectors:
        return sectors[:3]
    return ACCOUNT_FOCUS.get(account_id, ["AI", "\u7b97\u529b"])[:2]


def infer_sentiment(text: str) -> str:
    lowered = text.lower()
    positive = sum(keyword.lower() in lowered for keyword in POSITIVE_KEYWORDS)
    negative = sum(keyword.lower() in lowered for keyword in NEGATIVE_KEYWORDS)
    if positive > negative:
        return SENTIMENT_BULL
    if negative > positive:
        return SENTIMENT_BEAR
    return SENTIMENT_NEUTRAL


def infer_article_type(title: str) -> str:
    lowered = title.lower()
    for keyword, article_type in ARTICLE_TYPE_PATTERNS:
        if keyword.lower() in lowered:
            return article_type
    return "\u4e3b\u9898\u89c2\u70b9"


def build_article_summary(title: str, sectors: list[str], sentiment: str) -> str:
    sector_text = "\u3001".join(sectors[:2]) if sectors else "\u6838\u5fc3\u4e3b\u7ebf"
    sentiment_text = {
        SENTIMENT_BULL: "\u504f\u79ef\u6781\u7684\u8282\u594f\u5224\u65ad",
        SENTIMENT_BEAR: "\u660e\u663e\u504f\u8c28\u614e\u7684\u98ce\u9669\u63d0\u9192",
        SENTIMENT_NEUTRAL: "\u66f4\u5f3a\u8c03\u7ed3\u6784\u5206\u5316\u4e0e\u7b49\u5f85\u786e\u8ba4",
    }[sentiment]
    return f"{title} \u56f4\u7ed5 {sector_text} \u5c55\u5f00\uff0c\u7ed9\u51fa\u7684\u7ed3\u8bba\u662f{sentiment_text}\uff0c\u9002\u5408\u4f5c\u4e3a\u5f53\u65e5\u76d8\u9762\u8ddf\u8e2a\u4e0e\u590d\u76d8\u53c2\u8003\u3002"


def generate_report(date_str: str, input_document: DailyInputDocument) -> ReportDocument:
    base_date = datetime.fromisoformat(date_str)
    articles: list[OpinionArticle] = []
    for account_index, account in enumerate(input_document.accounts):
        for link_index, url in enumerate(account.links):
            title = title_from_link(account.account_name, url, link_index)
            sectors = infer_sectors(f"{title} {url}", account.account_id)
            sentiment = infer_sentiment(f"{title} {url}")
            published_at = (
                base_date.replace(hour=9 + ((account_index + link_index) % 8), minute=(link_index * 12) % 60)
                .replace(tzinfo=SHANGHAI)
                .isoformat(timespec="seconds")
            )
            articles.append(
                OpinionArticle(
                    id=f"{date_str}-{account.account_id}-{link_index}",
                    account_id=account.account_id,
                    account_name=account.account_name,
                    title=title,
                    published_at=published_at,
                    summary=build_article_summary(title, sectors, sentiment),
                    source_url=url,
                    sectors=sectors,
                    sentiment=sentiment,
                    article_type=infer_article_type(title),
                )
            )

    if not articles:
        return ReportDocument(
            date=date_str,
            generated_at=iso_timestamp(),
            summary="\u5f53\u65e5\u5c1a\u672a\u5f55\u5165\u6587\u7ae0\u94fe\u63a5\uff0c\u7cfb\u7edf\u5df2\u4fdd\u7559\u65e5\u62a5\u7ed3\u6784\uff0c\u7b49\u5f85\u8865\u5145\u516c\u4f17\u53f7\u6587\u7ae0\u540e\u518d\u751f\u6210\u5b8c\u6574\u7ed3\u8bba\u3002",
            focus_sectors=[],
            article_count=0,
            account_count=0,
            articles=[],
        )

    sector_counter = Counter(sector for article in articles for sector in article.sectors)
    focus_sectors = [sector for sector, _count in sector_counter.most_common(4)]

    sentiment_counter = Counter(article.sentiment for article in articles)
    if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
        tone = "\u6574\u4f53\u504f\u79ef\u6781\uff0c\u4e3b\u7ebf\u8ba8\u8bba\u96c6\u4e2d\u5ea6\u8f83\u9ad8"
    elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
        tone = "\u6574\u4f53\u504f\u8c28\u614e\uff0c\u98ce\u9669\u63a7\u5236\u4ecd\u662f\u4e3b\u53d9\u4e8b"
    else:
        tone = "\u591a\u7a7a\u5206\u6b67\u5e76\u5b58\uff0c\u5e02\u573a\u66f4\u770b\u91cd\u9a8c\u8bc1\u4e0e\u8282\u594f"

    active_accounts = len([account for account in input_document.accounts if account.links])
    sector_text = "\u3001".join(focus_sectors) if focus_sectors else "\u6682\u65e0\u805a\u7126\u677f\u5757"
    summary = (
        f"{date_str} \u5171\u6574\u7406 {len(articles)} \u7bc7\u516c\u4f17\u53f7\u89c2\u70b9\uff0c\u8986\u76d6 {active_accounts} \u4e2a\u8d26\u6237\u3002"
        f"{tone}\uff0c\u8ba8\u8bba\u91cd\u70b9\u843d\u5728 {sector_text}\u3002"
    )

    return ReportDocument(
        date=date_str,
        generated_at=iso_timestamp(),
        summary=summary,
        focus_sectors=focus_sectors,
        article_count=len(articles),
        account_count=active_accounts,
        articles=sorted(articles, key=lambda item: item.published_at, reverse=True),
    )


def build_cls_news_document(
    reference_time: datetime | None = None,
    *,
    allow_live_fetch: bool = True,
) -> ClsNewsDocument:
    current = reference_time or now_local()
    try:
        if allow_live_fetch:
            items = fetch_cls_telegraph_items(current)
        else:
            raise RuntimeError("Live fetch disabled for non-current date")
    except Exception:
        items = build_fallback_cls_items(current)

    sector_counter = Counter(sector for item in items for sector in item.sectors)
    watch_list = [sector for sector, _count in sector_counter.most_common(5)]

    overview = (
        "\u8d44\u8baf\u5217\u8868\u5c55\u793a\u6240\u9009\u65e5\u671f\u5185\u7684\u8d22\u8054\u793e 7x24 \u8d44\u8baf\uff0c"
        "\u5f53\u65e5\u6570\u636e\u6765\u81ea cls.cn \u5b9e\u65f6\u6293\u53d6\uff0c\u6bcf 3 \u5206\u949f\u66f4\u65b0\u4e00\u6b21\u3002"
    )
    hot_topics = (
        "\u70ed\u70b9\u6982\u89c8\u53ea\u4fdd\u7559\u5bf9\u677f\u5757\u5b58\u5728\u660e\u663e\u5f71\u54cd\u7684\u65b9\u5411\uff0c"
        f"\u5f53\u524d\u4e3b\u8981\u96c6\u4e2d\u5728 {'\u3001'.join(watch_list[:3])}\u3002"
    )

    sector_impacts: list[ClsSectorImpact] = []
    seen_sectors: set[str] = set()
    for sector in watch_list[:4]:
        if sector in seen_sectors:
            continue
        seen_sectors.add(sector)
        related_items = [item for item in items if sector in item.sectors]
        if not related_items:
            continue

        sentiment_counter = Counter(item.sentiment for item in related_items)
        if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
            sentiment = SENTIMENT_BULL
            reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u50ac\u5316\u6216\u666f\u6c14\u5f3a\u5316\uff0c\u77ed\u7ebf\u504f\u6b63\u5411\u5f71\u54cd\u3002"
        elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
            sentiment = SENTIMENT_BEAR
            reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u5151\u73b0\u6216\u5206\u6b67\uff0c\u77ed\u7ebf\u504f\u8d1f\u5411\u5f71\u54cd\u3002"
        else:
            sentiment = SENTIMENT_NEUTRAL
            reason = f"{sector} \u65b9\u5411\u6709\u8ba8\u8bba\u4f46\u4ecd\u9700\u9a8c\u8bc1\uff0c\u77ed\u7ebf\u4ee5\u4e2d\u6027\u89c2\u5bdf\u4e3a\u4e3b\u3002"

        sector_impacts.append(
            ClsSectorImpact(
                sector=sector,
                sentiment=sentiment,
                reason=reason,
                related_titles=list(dict.fromkeys(item.title for item in related_items[:2])),
            )
        )

    return ClsNewsDocument(
        date=current.date().isoformat(),
        updated_at=iso_timestamp(current),
        window_label="\u5f53\u5929\u8d44\u8baf",
        summary=ClsNewsSummary(
            overview=overview,
            hot_topics=hot_topics,
            watch_list=watch_list,
        ),
        sector_impacts=sector_impacts,
        items=items,
    )


def load_cls_news(date_str: str) -> ClsNewsDocument | None:
    return fetch_cls_news_document(date_str)


def build_reference_time(date_str: str) -> datetime:
    date_value = datetime.fromisoformat(date_str).date()
    if date_value == now_local().date():
        return now_local()
    return datetime.combine(date_value, time(hour=15, minute=0), tzinfo=SHANGHAI)


def refresh_cls_news(date_str: str | None = None) -> ClsNewsDocument:
    normalized_date = normalize_date(date_str or now_local().date().isoformat())
    existing = load_cls_news(normalized_date)
    reference_time = build_reference_time(normalized_date)
    allow_live_fetch = normalized_date == now_local().date().isoformat()
    try:
        document = build_cls_news_document(reference_time, allow_live_fetch=allow_live_fetch)
    except Exception:
        if existing is not None:
            return existing
        raise
    return save_cls_news_document(document)


def get_cls_news(date_str: str | None = None) -> ClsNewsDocument:
    normalized_date = normalize_date(date_str or now_local().date().isoformat())
    document = load_cls_news(normalized_date)
    if document is None:
        return refresh_cls_news(normalized_date)
    if normalized_date != now_local().date().isoformat():
        return document
    updated_at = ensure_local_timezone(datetime.fromisoformat(document.updated_at))
    if now_local() - updated_at >= CLS_REFRESH_INTERVAL:
        return refresh_cls_news(normalized_date)
    return document


def seed_demo_content() -> None:
    save_accounts(ACCOUNTS)

    today = now_local().date()
    for offset, account_links in SAMPLE_INPUTS.items():
        date_str = (today - timedelta(days=offset)).isoformat()
        if fetch_daily_input_document(date_str) is not None and fetch_report_document(date_str) is not None:
            continue

        payload = DailyInputUpsertPayload(
            accounts=[
                {"account_id": account.id, "links": account_links.get(account.id, [])}
                for account in ACCOUNTS
            ]
        )
        input_document = normalize_daily_input(date_str, payload)
        save_daily_input_document(input_document)
        save_report_document(generate_report(date_str, input_document))

    today_str = today.isoformat()
    if fetch_cls_news_document(today_str) is None:
        save_cls_news_document(build_cls_news_document())