modified: FFAInobug/__pycache__/crawlers.cpython-313.pyc modified: FFAInobug/crawlers.py modified: FFAInobug/main.py
63 lines
2.2 KiB
Python
63 lines
2.2 KiB
Python
from crawlers import PureHTMLParser # type: ignore
|
|
from analyzer import PureAnalyzer # type: ignore
|
|
|
|
class PureInfoHunter:
|
|
def __init__(self):
|
|
self.crawler = PureHTMLParser()
|
|
self.analyzer = PureAnalyzer()
|
|
|
|
def run(self, query: str):
|
|
# 1. 获取数据(优先缓存)
|
|
data = self.crawler.fetch(query)
|
|
|
|
# 2. 分析(自动检索历史缓存)
|
|
result = self.analyzer.analyze(data, query)
|
|
|
|
# 3. 生成报告
|
|
report = "="*40 + "\n"
|
|
report += f"搜索词: {query}\n"
|
|
|
|
if result.get("related_history"):
|
|
report += f"关联历史记录: {', '.join(result['related_history'])}\n"
|
|
|
|
report += "\n分析结果:\n" + result["summary"] + "\n"
|
|
report += "数据来源:\n"
|
|
for url in result["sources"]:
|
|
report += f"- {url}\n"
|
|
|
|
# 保存本次报告
|
|
self._save_report(query, report)
|
|
return report
|
|
|
|
def _save_report(self, query: str, content: str):
|
|
"""保存分析报告"""
|
|
safe_query = "".join(c if c.isalnum() else "_" for c in query)
|
|
with open(f"reports/{safe_query}_report.txt", "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
print(f"报告已保存到 reports/{safe_query}_report.txt")
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
import os
|
|
os.makedirs("reports", exist_ok=True)
|
|
|
|
# 处理参数缺失的情况
|
|
if len(sys.argv) < 2:
|
|
print("使用方法: python pure_main.py '搜索关键词' [force_update]")
|
|
print("示例: python pure_main.py '人工智能' true")
|
|
query = input("请输入要搜索的关键词: ") # 改为交互式输入
|
|
force_update = input("是否强制更新(true/false)? ").lower() == "true"
|
|
else:
|
|
query = sys.argv[1]
|
|
force_update = len(sys.argv) > 2 and sys.argv[2].lower() == "true"
|
|
|
|
hunter = PureInfoHunter()
|
|
|
|
if force_update:
|
|
print("强制更新模式(忽略缓存)")
|
|
data = hunter.crawler.fetch(query, force_update=True)
|
|
result = hunter.analyzer.analyze(data, query)
|
|
else:
|
|
result = hunter.run(query)
|
|
|
|
print(result) |