new file: FFAI/__pycache__/analyzer.cpython-313.pyc

new file:   FFAI/__pycache__/crawlers.cpython-313.pyc
	new file:   FFAI/analyzer.py
	new file:   FFAI/crawlers.py
	new file:   FFAI/main.py
	renamed:    main/build/newtest/Analysis-00.toc -> test/build/newtest/Analysis-00.toc
	renamed:    main/build/newtest/COLLECT-00.toc -> test/build/newtest/COLLECT-00.toc
	renamed:    main/build/newtest/EXE-00.toc -> test/build/newtest/EXE-00.toc
	renamed:    main/build/newtest/PKG-00.toc -> test/build/newtest/PKG-00.toc
	renamed:    main/build/newtest/PYZ-00.pyz -> test/build/newtest/PYZ-00.pyz
	renamed:    main/build/newtest/PYZ-00.toc -> test/build/newtest/PYZ-00.toc
	renamed:    main/build/newtest/base_library.zip -> test/build/newtest/base_library.zip
	renamed:    main/build/newtest/localpycs/pyimod01_archive.pyc -> test/build/newtest/localpycs/pyimod01_archive.pyc
	renamed:    main/build/newtest/localpycs/pyimod02_importers.pyc -> test/build/newtest/localpycs/pyimod02_importers.pyc
	renamed:    main/build/newtest/localpycs/pyimod03_ctypes.pyc -> test/build/newtest/localpycs/pyimod03_ctypes.pyc
	renamed:    main/build/newtest/localpycs/pyimod04_pywin32.pyc -> test/build/newtest/localpycs/pyimod04_pywin32.pyc
	renamed:    main/build/newtest/localpycs/struct.pyc -> test/build/newtest/localpycs/struct.pyc
	renamed:    main/build/newtest/newtest.exe -> test/build/newtest/newtest.exe
	renamed:    main/build/newtest/newtest.pkg -> test/build/newtest/newtest.pkg
	renamed:    main/build/newtest/warn-newtest.txt -> test/build/newtest/warn-newtest.txt
	renamed:    main/build/newtest/xref-newtest.html -> test/build/newtest/xref-newtest.html
	new file:   test/dist/newtest/_internal/VCRUNTIME140.dll
	renamed:    main/dist/newtest/_internal/_bz2.pyd -> test/dist/newtest/_internal/_bz2.pyd
	renamed:    main/dist/newtest/_internal/_decimal.pyd -> test/dist/newtest/_internal/_decimal.pyd
	renamed:    main/dist/newtest/_internal/_hashlib.pyd -> test/dist/newtest/_internal/_hashlib.pyd
	renamed:    main/dist/newtest/_internal/_lzma.pyd -> test/dist/newtest/_internal/_lzma.pyd
	renamed:    main/dist/newtest/_internal/_socket.pyd -> test/dist/newtest/_internal/_socket.pyd
	renamed:    main/dist/newtest/_internal/api-ms-win-core-console-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-console-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-datetime-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-datetime-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-debug-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-debug-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-errorhandling-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-errorhandling-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-fibers-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-fibers-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-file-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-file-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-file-l1-2-0.dll -> test/dist/newtest/_internal/api-ms-win-core-file-l1-2-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-file-l2-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-file-l2-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-handle-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-handle-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-heap-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-heap-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-interlocked-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-interlocked-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-libraryloader-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-libraryloader-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-localization-l1-2-0.dll -> test/dist/newtest/_internal/api-ms-win-core-localization-l1-2-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-memory-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-memory-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-namedpipe-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-namedpipe-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-processenvironment-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-processenvironment-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-processthreads-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-processthreads-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-processthreads-l1-1-1.dll -> test/dist/newtest/_internal/api-ms-win-core-processthreads-l1-1-1.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-profile-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-profile-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-rtlsupport-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-rtlsupport-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-string-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-string-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-synch-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-synch-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-synch-l1-2-0.dll -> test/dist/newtest/_internal/api-ms-win-core-synch-l1-2-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-sysinfo-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-sysinfo-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-timezone-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-timezone-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-core-util-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-core-util-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-conio-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-conio-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-convert-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-convert-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-environment-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-environment-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-filesystem-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-filesystem-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-heap-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-heap-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-locale-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-locale-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-math-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-math-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-process-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-process-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-runtime-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-runtime-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-stdio-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-stdio-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-string-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-string-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-time-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-time-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/api-ms-win-crt-utility-l1-1-0.dll -> test/dist/newtest/_internal/api-ms-win-crt-utility-l1-1-0.dll
	renamed:    main/dist/newtest/_internal/base_library.zip -> test/dist/newtest/_internal/base_library.zip
	renamed:    main/dist/newtest/_internal/libcrypto-3.dll -> test/dist/newtest/_internal/libcrypto-3.dll
	renamed:    main/dist/newtest/_internal/python313.dll -> test/dist/newtest/_internal/python313.dll
	renamed:    main/dist/newtest/_internal/select.pyd -> test/dist/newtest/_internal/select.pyd
	renamed:    main/dist/newtest/_internal/ucrtbase.dll -> test/dist/newtest/_internal/ucrtbase.dll
	renamed:    main/dist/newtest/_internal/unicodedata.pyd -> test/dist/newtest/_internal/unicodedata.pyd
	renamed:    main/dist/newtest/newtest.exe -> test/dist/newtest/newtest.exe
	renamed:    main/newtest.py -> test/newtest.py
	renamed:    main/newtest.spec -> test/newtest.spec
This commit is contained in:
Friendfeng 2025-06-06 23:33:48 +08:00
parent 55b6b69515
commit 925c5e166b
75 changed files with 169 additions and 0 deletions

Binary file not shown.

Binary file not shown.

39
FFAI/analyzer.py Normal file
View File

@ -0,0 +1,39 @@
import re
from collections import Counter
class PureAnalyzer:
@staticmethod
def search_in_cache(query: str, cache_dir="cache") -> list:
"""在缓存中检索历史记录"""
if not os.path.exists(cache_dir):
return []
related_files = []
safe_query = query.lower()
for filename in os.listdir(cache_dir):
if safe_query in filename.lower():
with open(f"{cache_dir}/{filename}", "r", encoding="utf-8") as f:
content = f.read()
related_files.append({
"query": filename.replace(".txt", ""),
"content": content
})
return related_files
@staticmethod
def analyze(data: list, query: str) -> dict:
# 先检查缓存中的相关记录
history = PureAnalyzer.search_in_cache(query)
# 合并新旧数据
all_text = " ".join(d.get("text", "") for d in data)
if history:
all_text += " " + " ".join(h["content"] for h in history)
# ...(保持原有的分析逻辑)...
return {
"summary": summary,
"keywords": keywords,
"sources": [d["url"] for d in data],
"related_history": [h["query"] for h in history]
}

71
FFAI/crawlers.py Normal file
View File

@ -0,0 +1,71 @@
import urllib.request
import os
import time
from urllib.parse import quote
from html.parser import HTMLParser
class PureHTMLParser(HTMLParser):
# ...保持之前的HTML解析器代码不变...
class PureCrawler:
def __init__(self, cache_dir="cache"):
self.user_agent = "Mozilla/5.0"
self.parser = PureHTMLParser()
self.cache_dir = cache_dir
os.makedirs(cache_dir, exist_ok=True)
def _get_cache_path(self, query: str) -> str:
"""生成缓存文件名"""
safe_query = "".join(c if c.isalnum() else "_" for c in query)
return f"{self.cache_dir}/{safe_query}.txt"
def _save_to_cache(self, query: str, data: list):
"""保存搜索结果到缓存"""
with open(self._get_cache_path(query), "w", encoding="utf-8") as f:
for item in data:
f.write(f"URL: {item['url']}\n")
f.write(f"Text: {item['text']}\n")
f.write("="*50 + "\n")
def _load_from_cache(self, query: str) -> list:
"""从缓存加载数据"""
cache_file = self._get_cache_path(query)
if not os.path.exists(cache_file):
return None
with open(cache_file, "r", encoding="utf-8") as f:
content = f.read()
# 解析缓存文件
items = []
for block in content.split("="*50):
if not block.strip():
continue
url = text = ""
for line in block.split("\n"):
if line.startswith("URL: "):
url = line[5:]
elif line.startswith("Text: "):
text = line[6:]
if url:
items.append({"url": url, "text": text})
return items
def fetch(self, query: str, force_update=False) -> list:
"""优先读取缓存,不存在时爬取"""
if not force_update:
cached = self._load_from_cache(query)
if cached:
print("📂 从缓存加载数据")
return cached
print("🌐 正在爬取网络数据...")
# ...(保持原有的爬取逻辑)...
data = [{"url": link, "text": self.extract_text(link)} for link in self.parser.links[:5]]
self._save_to_cache(query, data)
return data
def extract_text(self, url: str) -> str:
# ...(保持原有的正文提取逻辑)..
return extracted_text

59
FFAI/main.py Normal file
View File

@ -0,0 +1,59 @@
from crawlers import PureHTMLParser # type: ignore
from analyzer import PureAnalyzer # type: ignore
class PureInfoHunter:
def __init__(self):
self.crawler = PureHTMLParser()
self.analyzer = PureAnalyzer()
def run(self, query: str):
# 1. 获取数据(优先缓存)
data = self.crawler.fetch(query)
# 2. 分析(自动检索历史缓存)
result = self.analyzer.analyze(data, query)
# 3. 生成报告
report = "="*40 + "\n"
report += f"搜索词: {query}\n"
if result.get("related_history"):
report += f"关联历史记录: {', '.join(result['related_history'])}\n"
report += "\n分析结果:\n" + result["summary"] + "\n"
report += "数据来源:\n"
for url in result["sources"]:
report += f"- {url}\n"
# 保存本次报告
self._save_report(query, report)
return report
def _save_report(self, query: str, content: str):
"""保存分析报告"""
safe_query = "".join(c if c.isalnum() else "_" for c in query)
with open(f"reports/{safe_query}_report.txt", "w", encoding="utf-8") as f:
f.write(content)
print(f"报告已保存到 reports/{safe_query}_report.txt")
if __name__ == "__main__":
import sys
import os
os.makedirs("reports", exist_ok=True)
if len(sys.argv) < 2:
print("使用方法: python pure_main.py '搜索关键词' [force_update]")
print("示例: python pure_main.py '人工智能' true")
# sys.exit(1)
force_update = len(sys.argv) > 2 and sys.argv[2].lower() == "true"
hunter = PureInfoHunter()
if force_update:
print("强制更新模式(忽略缓存)")
data = hunter.crawler.fetch(sys.argv[1], force_update=True)
result = hunter.analyzer.analyze(data, sys.argv[1])
else:
result = hunter.run(sys.argv[1])
print(result)

Binary file not shown.