diff --git a/FFAI/__pycache__/analyzer.cpython-313.pyc b/FFAIall/__pycache__/analyzer.cpython-313.pyc similarity index 100% rename from FFAI/__pycache__/analyzer.cpython-313.pyc rename to FFAIall/__pycache__/analyzer.cpython-313.pyc diff --git a/FFAI/__pycache__/crawlers.cpython-313.pyc b/FFAIall/__pycache__/crawlers.cpython-313.pyc similarity index 100% rename from FFAI/__pycache__/crawlers.cpython-313.pyc rename to FFAIall/__pycache__/crawlers.cpython-313.pyc diff --git a/FFAI/analyzer.py b/FFAIall/analyzer.py similarity index 100% rename from FFAI/analyzer.py rename to FFAIall/analyzer.py diff --git a/FFAI/crawlers.py b/FFAIall/crawlers.py similarity index 100% rename from FFAI/crawlers.py rename to FFAIall/crawlers.py diff --git a/FFAI/main.py b/FFAIall/main.py similarity index 98% rename from FFAI/main.py rename to FFAIall/main.py index 10121fc..414b8ce 100644 --- a/FFAI/main.py +++ b/FFAIall/main.py @@ -44,7 +44,7 @@ if __name__ == "__main__": if len(sys.argv) < 2: print("使用方法: python pure_main.py '搜索关键词' [force_update]") print("示例: python pure_main.py '人工智能' true") - # sys.exit(1) + sys.exit(1) force_update = len(sys.argv) > 2 and sys.argv[2].lower() == "true" hunter = PureInfoHunter() diff --git a/FFAInobug/__pycache__/analyzer.cpython-313.pyc b/FFAInobug/__pycache__/analyzer.cpython-313.pyc new file mode 100644 index 0000000..ca0b8e4 Binary files /dev/null and b/FFAInobug/__pycache__/analyzer.cpython-313.pyc differ diff --git a/FFAInobug/__pycache__/crawlers.cpython-313.pyc b/FFAInobug/__pycache__/crawlers.cpython-313.pyc new file mode 100644 index 0000000..8bfb92a Binary files /dev/null and b/FFAInobug/__pycache__/crawlers.cpython-313.pyc differ diff --git a/FFAInobug/analyzer.py b/FFAInobug/analyzer.py new file mode 100644 index 0000000..19b68d5 --- /dev/null +++ b/FFAInobug/analyzer.py @@ -0,0 +1,39 @@ +import re +from collections import Counter + +class PureAnalyzer: + @staticmethod + def search_in_cache(query: str, cache_dir="cache") -> list: + """在缓存中检索历史记录""" + if not os.path.exists(cache_dir): + return [] + + related_files = [] + safe_query = query.lower() + for filename in os.listdir(cache_dir): + if safe_query in filename.lower(): + with open(f"{cache_dir}/{filename}", "r", encoding="utf-8") as f: + content = f.read() + related_files.append({ + "query": filename.replace(".txt", ""), + "content": content + }) + return related_files + + @staticmethod + def analyze(data: list, query: str) -> dict: + # 先检查缓存中的相关记录 + history = PureAnalyzer.search_in_cache(query) + + # 合并新旧数据 + all_text = " ".join(d.get("text", "") for d in data) + if history: + all_text += " " + " ".join(h["content"] for h in history) + + # ...(保持原有的分析逻辑)... + return { + "summary": summary, + "keywords": keywords, + "sources": [d["url"] for d in data], + "related_history": [h["query"] for h in history] + } \ No newline at end of file diff --git a/FFAInobug/crawlers.py b/FFAInobug/crawlers.py new file mode 100644 index 0000000..e0130b2 --- /dev/null +++ b/FFAInobug/crawlers.py @@ -0,0 +1,71 @@ +import urllib.request +import os +import time +from urllib.parse import quote +from html.parser import HTMLParser + +class PureHTMLParser(HTMLParser): + # ...(保持之前的HTML解析器代码不变)... + + class PureCrawler: + def __init__(self, cache_dir="cache"): + self.user_agent = "Mozilla/5.0" + self.parser = PureHTMLParser() + self.cache_dir = cache_dir + os.makedirs(cache_dir, exist_ok=True) + + def _get_cache_path(self, query: str) -> str: + """生成缓存文件名""" + safe_query = "".join(c if c.isalnum() else "_" for c in query) + return f"{self.cache_dir}/{safe_query}.txt" + + def _save_to_cache(self, query: str, data: list): + """保存搜索结果到缓存""" + with open(self._get_cache_path(query), "w", encoding="utf-8") as f: + for item in data: + f.write(f"URL: {item['url']}\n") + f.write(f"Text: {item['text']}\n") + f.write("="*50 + "\n") + + def _load_from_cache(self, query: str) -> list: + """从缓存加载数据""" + cache_file = self._get_cache_path(query) + if not os.path.exists(cache_file): + return None + + with open(cache_file, "r", encoding="utf-8") as f: + content = f.read() + + # 解析缓存文件 + items = [] + for block in content.split("="*50): + if not block.strip(): + continue + url = text = "" + for line in block.split("\n"): + if line.startswith("URL: "): + url = line[5:] + elif line.startswith("Text: "): + text = line[6:] + if url: + items.append({"url": url, "text": text}) + return items + + def fetch(self, query: str, force_update=False) -> list: + """优先读取缓存,不存在时爬取""" + if not force_update: + cached = self._load_from_cache(query) + if cached: + print("📂 从缓存加载数据") + return cached + + print("🌐 正在爬取网络数据...") + # ...(保持原有的爬取逻辑)... + data = [{"url": link, "text": self.extract_text(link)} for link in self.parser.links[:5]] + + self._save_to_cache(query, data) + return data + + def extract_text(self, url: str) -> str: + # ...(保持原有的正文提取逻辑).. + return extracted_text \ No newline at end of file diff --git a/FFAInobug/main.py b/FFAInobug/main.py new file mode 100644 index 0000000..5cc8931 --- /dev/null +++ b/FFAInobug/main.py @@ -0,0 +1,59 @@ +from crawlers import PureHTMLParser # type: ignore +from analyzer import PureAnalyzer # type: ignore + +class PureInfoHunter: + def __init__(self): + self.crawler = PureHTMLParser() + self.analyzer = PureAnalyzer() + + def run(self, query: str): + # 1. 获取数据(优先缓存) + data = self.crawler.fetch(query) + + # 2. 分析(自动检索历史缓存) + result = self.analyzer.analyze(data, query) + + # 3. 生成报告 + report = "="*40 + "\n" + report += f"搜索词: {query}\n" + + if result.get("related_history"): + report += f"关联历史记录: {', '.join(result['related_history'])}\n" + + report += "\n分析结果:\n" + result["summary"] + "\n" + report += "数据来源:\n" + for url in result["sources"]: + report += f"- {url}\n" + + # 保存本次报告 + self._save_report(query, report) + return report + + def _save_report(self, query: str, content: str): + """保存分析报告""" + safe_query = "".join(c if c.isalnum() else "_" for c in query) + with open(f"reports/{safe_query}_report.txt", "w", encoding="utf-8") as f: + f.write(content) + print(f"报告已保存到 reports/{safe_query}_report.txt") + +if __name__ == "__main__": + import sys + import os + os.makedirs("reports", exist_ok=True) + + if len(sys.argv) < 2: + print("使用方法: python pure_main.py '搜索关键词' [force_update]") + print("示例: python pure_main.py '人工智能' true") + # sys.exit(1) + + force_update = len(sys.argv) > 2 and sys.argv[2].lower() == "true" + hunter = PureInfoHunter() + + # if force_update: + # print("强制更新模式(忽略缓存)") + # data = hunter.crawler.fetch(sys.argv[1], force_update=True) + # result = hunter.analyzer.analyze(data, sys.argv[1]) + # else: + # result = hunter.run(sys.argv[1]) + + # print(result) \ No newline at end of file diff --git a/main/dist/newtest/_internal/VCRUNTIME140.dll b/main/dist/newtest/_internal/VCRUNTIME140.dll deleted file mode 100644 index 411009b..0000000 Binary files a/main/dist/newtest/_internal/VCRUNTIME140.dll and /dev/null differ