deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/__pycache__/analyzer.cpython-313.pyc"

deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/__pycache__/crawlers.cpython-313.pyc" deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/analyzer.py" deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/crawlers.py" deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/main.py"
2025-06-07 10:17:43 +08:00 · 2025-06-07 10:17:43 +08:00 · 1b490d774f
commit 1b490d774f
parent a7abe9d506
5 changed files with 0 additions and 169 deletions
--- a/FFAIall（暂停）/pycache/analyzer.cpython-313.pyc
+++ b/FFAIall（暂停）/pycache/analyzer.cpython-313.pyc
--- a/FFAIall（暂停）/pycache/crawlers.cpython-313.pyc
+++ b/FFAIall（暂停）/pycache/crawlers.cpython-313.pyc
--- a/FFAIall（暂停）/analyzer.py
+++ b/FFAIall（暂停）/analyzer.py
@ -1,39 +0,0 @@
 import re
 from collections import Counter
 class PureAnalyzer:
    @staticmethod
    def search_in_cache(query: str, cache_dir="cache") -> list:
        """在缓存中检索历史记录"""
        if not os.path.exists(cache_dir):
            return []
        related_files = []
        safe_query = query.lower()
        for filename in os.listdir(cache_dir):
            if safe_query in filename.lower():
                with open(f"{cache_dir}/{filename}", "r", encoding="utf-8") as f:
                    content = f.read()
                related_files.append({
                    "query": filename.replace(".txt", ""),
                    "content": content
                })
        return related_files
    @staticmethod
    def analyze(data: list, query: str) -> dict:
        # 先检查缓存中的相关记录
        history = PureAnalyzer.search_in_cache(query)
        # 合并新旧数据
        all_text = " ".join(d.get("text", "") for d in data)
        if history:
            all_text += " " + " ".join(h["content"] for h in history)
        # ...（保持原有的分析逻辑）...
        return {
            "summary": summary,
            "keywords": keywords,
            "sources": [d["url"] for d in data],
            "related_history": [h["query"] for h in history]
        }
--- a/FFAIall（暂停）/crawlers.py
+++ b/FFAIall（暂停）/crawlers.py
@ -1,71 +0,0 @@
 import urllib.request
 import os
 import time
 from urllib.parse import quote
 from html.parser import HTMLParser
 class PureHTMLParser(HTMLParser):
    # ...（保持之前的HTML解析器代码不变）...
 class PureCrawler:
    def __init__(self, cache_dir="cache"):
        self.user_agent = "Mozilla/5.0"
        self.parser = PureHTMLParser()
        self.cache_dir = cache_dir
        os.makedirs(cache_dir, exist_ok=True)
    def _get_cache_path(self, query: str) -> str:
        """生成缓存文件名"""
        safe_query = "".join(c if c.isalnum() else "_" for c in query)
        return f"{self.cache_dir}/{safe_query}.txt"
    def _save_to_cache(self, query: str, data: list):
        """保存搜索结果到缓存"""
        with open(self._get_cache_path(query), "w", encoding="utf-8") as f:
            for item in data:
                f.write(f"URL: {item['url']}\n")
                f.write(f"Text: {item['text']}\n")
                f.write("="*50 + "\n")
    def _load_from_cache(self, query: str) -> list:
        """从缓存加载数据"""
        cache_file = self._get_cache_path(query)
        if not os.path.exists(cache_file):
            return None
        with open(cache_file, "r", encoding="utf-8") as f:
            content = f.read()
        # 解析缓存文件
        items = []
        for block in content.split("="*50):
            if not block.strip():
                continue
            url = text = ""
            for line in block.split("\n"):
                if line.startswith("URL: "):
                    url = line[5:]
                elif line.startswith("Text: "):
                    text = line[6:]
            if url:
                items.append({"url": url, "text": text})
        return items
    def fetch(self, query: str, force_update=False) -> list:
        """优先读取缓存，不存在时爬取"""
        if not force_update:
            cached = self._load_from_cache(query)
            if cached:
                print("📂 从缓存加载数据")
                return cached
        print("🌐 正在爬取网络数据...")
        # ...（保持原有的爬取逻辑）...
        data = [{"url": link, "text": self.extract_text(link)} for link in self.parser.links[:5]]
        self._save_to_cache(query, data)
        return data
    def extract_text(self, url: str) -> str:
        # ...（保持原有的正文提取逻辑）..
        return extracted_text
--- a/FFAIall（暂停）/main.py
+++ b/FFAIall（暂停）/main.py
@ -1,59 +0,0 @@
 from crawlers import PureHTMLParser # type: ignore
 from analyzer import PureAnalyzer # type: ignore
 import sys
 import os
 class PureInfoHunter:
    def __init__(self):
        self.crawler = PureHTMLParser()
        self.analyzer = PureAnalyzer()
    def run(self, query: str):
        # 1. 获取数据（优先缓存）
        data = self.crawler.fetch(query)
        # 2. 分析（自动检索历史缓存）
        result = self.analyzer.analyze(data, query)
        # 3. 生成报告
        report = "="*40 + "\n"
        report += f"搜索词: {query}\n"
        if result.get("related_history"):
            report += f"关联历史记录: {', '.join(result['related_history'])}\n"
        report += "\n分析结果:\n" + result["summary"] + "\n"
        report += "数据来源:\n"
        for url in result["sources"]:
            report += f"- {url}\n"
        # 保存本次报告
        self._save_report(query, report)
        return report
    def _save_report(self, query: str, content: str):
        """保存分析报告"""
        safe_query = "".join(c if c.isalnum() else "_" for c in query)
        with open(f"reports/{safe_query}_report.txt", "w", encoding="utf-8") as f:
            f.write(content)
        print(f"报告已保存到 reports/{safe_query}_report.txt")
 if __name__ == "__main__":
    os.makedirs("reports", exist_ok=True)
    if len(sys.argv) < 2:
        print("使用方法: python pure_main.py '搜索关键词' [force_update]")
        print("示例: python pure_main.py '人工智能' true")
        sys.exit(1)
    force_update = len(sys.argv) > 2 and sys.argv[2].lower() == "true"
    hunter = PureInfoHunter()
    if force_update:
        print("强制更新模式（忽略缓存）")
        data = hunter.crawler.fetch(sys.argv[1], force_update=True)
        result = hunter.analyzer.analyze(data, sys.argv[1])
    else:
        result = hunter.run(sys.argv[1])
    print(result)