# from crawlers import PureHTMLParser # type: ignore
from analyzer import PureAnalyzer # type: ignore
from crawlers_core import CrawlerEngine
from catch import CacheManager
from manger import ConnectionManager
from utils.logger import setup_logging
class PureInfoHunter:
def __init__(self):
self.cache_manager = CacheManager()
self.crawler = CrawlerEngine(self.cache_manager)
self.analyzer = PureAnalyzer()
self.catch = CacheManager()
def run(self, query: str):
# 1. 获取数据(优先缓存)
data = self.catch(query)
# 2. 分析(自动检索历史缓存)
result = self.analyzer.analyze(data, query)
# 3. 生成报告
report = "="*40 + "\n"
report += f"搜索词: {query}\n"
if result.get("related_history"):
report += f"关联历史记录: {', '.join(result['related_history'])}\n"
report += "\n分析结果:\n" + result["summary"] + "\n"
report += "数据来源:\n"
for url in result["sources"]:
report += f"- {url}\n"
# 保存本次报告
self._save_report(query, report)
return report
def _save_report(self, query: str, content: str):
"""保存分析报告"""
safe_query = "".join(c if c.isalnum() else "_" for c in query)
with open(f"reports/{safe_query}_report.txt", "w", encoding="utf-8") as f:
f.write(content)
print(f"报告已保存到 reports/{safe_query}_report.txt")
def sync_local_cache(self):
if self.mode == 'local':
cloud_data = self.cloud.execute("get_all_updates")
self.local.save_cache(cloud_data)
if __name__ == "__main__":
import sys
import os
os.makedirs("reports", exist_ok=True)
# 处理参数缺失的情况
if len(sys.argv) < 2:
print("使用方法: python pure_main.py '搜索关键词' [force_update]")
print("示例: python pure_main.py '人工智能' true")
query = input("请输入要搜索的关键词: ") # 改为交互式输入
force_update = input("是否强制更新(true/false)? ").lower() == "true"
else:
# query = sys.argv[1]
force_update = len(sys.argv) > 2 and sys.argv[2].lower() == "true"
hunter = PureInfoHunter()
setup_logging()
parser = argparse.ArgumentParser()
parser.add_argument('command', help='执行指令或查询')
parser.add_argument('--local', action='store_true',
help='强制使用本地模式')
args = parser.parse_args()
manager = ConnectionManager()
try:
if args.local:
manager.mode = 'local'
result = manager.execute(args.command)
print(f"✅ 执行成功 (模式: {manager.mode.upper()})")
print(result)
except Exception as e:
print(f"❌ 执行失败: {str(e)}")
exit(1)
if force_update:
print("强制更新模式(忽略缓存)")
data = hunter.crawler.crawl(query) # 使用实际存在的方法名
result = hunter.analyzer.analyze(data, query)
result = hunter.run(query)