From 1b490d774f1395b7b674c222445e28566f325435 Mon Sep 17 00:00:00 2001 From: Friendfeng <3880261409@qq.com> Date: Sat, 7 Jun 2025 10:17:43 +0800 Subject: [PATCH] deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/__pycache__/analyzer.cpython-313.pyc" deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/__pycache__/crawlers.cpython-313.pyc" deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/analyzer.py" deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/crawlers.py" deleted: "FFAIall\357\274\210\346\232\202\345\201\234\357\274\211/main.py" --- .../__pycache__/analyzer.cpython-313.pyc | Bin 2586 -> 0 bytes .../__pycache__/crawlers.cpython-313.pyc | Bin 4682 -> 0 bytes FFAIall(暂停)/analyzer.py | 39 ---------- FFAIall(暂停)/crawlers.py | 71 ------------------ FFAIall(暂停)/main.py | 59 --------------- 5 files changed, 169 deletions(-) delete mode 100644 FFAIall(暂停)/__pycache__/analyzer.cpython-313.pyc delete mode 100644 FFAIall(暂停)/__pycache__/crawlers.cpython-313.pyc delete mode 100644 FFAIall(暂停)/analyzer.py delete mode 100644 FFAIall(暂停)/crawlers.py delete mode 100644 FFAIall(暂停)/main.py diff --git a/FFAIall(暂停)/__pycache__/analyzer.cpython-313.pyc b/FFAIall(暂停)/__pycache__/analyzer.cpython-313.pyc deleted file mode 100644 index ca0b8e422b164659d8737b9fe9df67685ec01a0a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2586 zcmaJ?U2Gdg5Z?Rs`SRoZG}KMw+ASun0Xs-dXhTh<{-h0rMv%i4l)&j~pJNy2F5Nqm zra}nuP^Ax05JV&bDG?xKkU*v4p&*ceCmw4oNLUI&f&hI>>x!y8F?)B;Pg_{~W@m4H zc6N5=yJ|y&j9?5&2XdnVLch^PZSWpby%(5iq#%VEK$y<#0P`@5*$4`vZAjtTk;3<| zt~TaInV4{yE(T)^@f|RWhOOhYD|M;QTT~whW*Utk%qR%63WK>3UhymJh@fyIVmGhw z?dUjvoayllun*+Ii0ElZ-J&8=jTO0H@nLyXipiw@(IVFO8CqfTl#VacqY&XlQOA=+ z#JXL?M%oM5aZMxQ;HkdB(1?bemy`^x)$-mBL85LRDW(T3>Db{;C3krqEFB z%$}4^wWtkl&3^th3Ld-ayA}282pRm7-T&ur57^P&j|34KuB-V=F^zD6DZUZybw%0s0j2|yWR*&u|t7Xy&9L|hnj2r>a2*0uV#5vRHOu3%Tfd# z8AYfLQGZdi72Ugfc#?D@YwKDYGcG-0|3gHH)XI~(5(#V6@T+RWsQ(ZN%E!aive6k{>H2;xhd zP?d%q^Apyz2tTgbIU?#O^OkK9pdY%wyB4H4Yp4;EVi}3IsDr2>rYR~mxAqc@~k}H`$I53)xQ|n zH22KxGnG*M`#}6s+oqYPW(MX@l!7JsV*Ac>%2&I-Ya96X#Nw9lTy{45R_@iq*Ew{@ zBh0T&p@pVsxhc9Bx@#d6FNfl#?73r=Q14>6^NPTAw)~B_=GH5Iv?;ONgqlJZrH<I*#oA>t$-}K4{e15`bG+SHOuM(&@{ThX8S=X=h?M-8~e+iG#_onA^2ZqXAIF~LW{!nMlQI3bH-zCpr7(vYo{ zLCtrBfPDjv@yjK z3jW-vZpT={Chj9l#-d0XiQo`S#Gf414gKUe-b=QxD`BtN6Ul;^)(Y0%#EK%dvpRrW zhI+oZIeFebw{@;>wr{?(v~8gyez7Bd-o9b+pzzX`2(#{_v?iRB-VMx}lR}2gW(Cni zc;^AMbk4&#=V6^59sGd8Mkgo{Cb#w!n3e{P0SUunwE%Hq>+Jjd=mn;<>HX(FKk7x) z+zQz!;(`MfF`oFW;@Fr5A0z2meR9IY84F&SMVtn}2-NT+2l!*+$(Vp?gqY$V;g6en z11IRZ3rI}F5z4!u<{1!4D-=}cCbm-{{d`-r`dplGZ@}kenrygT{x?#Smw`CY(HdT8 zNt9a>rDCO}XUexMq7570-2K|_^5*0xc4c$2(wdx-eyM9&tlu!h&7|jYv$=9--)BcF zoqd%pedSH zzbAikxExMi@e57zG7|jqjjJLG-gkq9lP)rBZQL(OU-vZbZ;Syji(OtC=udKCazD|nKC3eb!#yO1|@!=z%%*6e(GOt*7phH#b*m`}mqOL-=r zwlV!6k~UR!RaMQlasF7*)-6?aqNsu#IjMDTi(VNmMr#1bX|yObUtt46XvSJbFkOgV zqf?bbV#_jpzr4-k|aGMH}3!d diff --git a/FFAIall(暂停)/__pycache__/crawlers.cpython-313.pyc b/FFAIall(暂停)/__pycache__/crawlers.cpython-313.pyc deleted file mode 100644 index 8bfb92aa51875b57ef4fb4ddd4822aa4ba932db3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4682 zcmb7HYj6|S6~6n@Tb7KGF%KI-z(jVC4aPtKLy5r&ICfHP6lid=2x(;$WXZX^iXEm+ znam_rQYdyPu`{KOXn#bRXOwl1){oFX6@?5Fe)XJPNq*GO z=Gr>Xl?_uh6oZ3N0cEv?7Pp!E;z6pK)3+&&16^F$;f(@rR(*DPiAnxh=l zY&(BYpn?~9xOUS)Gc|ij6TolYO42`{CRstgU=6VG%! z`E-aXG7TG?2IUK#+YdwIJn7Q6hpidP>Ec-s&o#3K7v)8s3f+Rwq?J9LqVlZFn`Ypc zC%E)&-~BJaVwnhHUt~ZW*CmL2m#~Ew1sLI}NgpwFnPJ4Nk5~#kIAYO9tfFhqm=(uB zmQA#kWZ88-yXc^fZl}+!t(dQ@AG@tI)EAfOO|(w9@O}DMqT+a%x1&J7C`^#V^$8?L z3dc-Hb72f-OREpNt3q@r1RLMLSCC_Yx> z6*(TIF5p&BKd)Ew?%UDT)|Qa_fVOtwf7{*zU18%K6o2n4nnjXgiI^%$+C%q0F@KTC zjIHD%zwK^7?A#IGYhA?1O& znIFG5ogSF^_}tWo!_)8lVDidGQv+u-5}vysYSIM1+W{aC5ie*IBo*X|YC@9bi)mJL z2n-}FAC+p(GuJILu~wsBDgEW}6{q2`zpiYHP%Fu?j7H6~mz7p(vp znspZ#UGDFtyS0`FnWS4*CBqkbL+Ww=u6QyWiYvSQrR6A10o}<>FxSYcwoir2o8I?b z^u1qyvHsm}Uuqh!-ukKVwSqQyZT$vDV;WBKezjjkPkJ#KOX!~D8u5{k8VaK>#e%4$yA%P_o+n321_o!D%U9qJGDUY1 zjp*c9k^uJsv+pA$!3ViO5!Q-3KrQk?fG?Z?H#0SHMHyalAAU3NTfPCmbyi|Wo8d$v zz@2C-;xaguZ4G&Z6kRU@-x+1SWHq=O&b4r>3Amg+wGxz^{M}E{?FTN*{PyQFSI$lU z?7~#~k`W6U(|1ousnLzwKR^IVgp-k2qWhi@3_Q8Rt8pnB*BIM9(=&3v3VmLM`eakf zH!ikLAE)ujUO7S6gF>3nM`MtrgjwgJ8*rS7YCKHzD0o)jUPT8hla$cQvZN*rm)Cp` z@~?TU=q5@pkW*yRSw66CVoB}jlG;paY{|x~a{16_&i#KZzh1R!fX}(hhs=ZK;q@6l z6TE02{$BR6k*bDqcVls)HnTJny|^kIDRdfS5Ts#mHgN7~IUZpb&F7nNZ={qX{|?Xu>hbY$s-w(D-;P zAuD*pLVPfak%rt-JQ<2eQJU;2;6(j{aAE;-4Mp78j`JpSCu)fpzk4sU;Z-z_Uy zUU%0<+!bjh=k^Ra1|1o3w0he$_x4*JQdys`ByLaIad!>q^%v(B1Dme0<*j?TKkTvX zbD9@Vp-UmD05-8D9|!FIpGd_8xgviE;&=A?4A=qkuy`R&uA4hECo(|(V)%xxxS$|% z@o+zTmRHyYxbgzl0N5w@frJvm%`OS@#oa~0&pyp|R2dPN5Fp8u#gOl?XBO*%5T^>H zSA|XRQQ|YDFk4N2JUsP!`paKmnL7K95t#q_`1IuEGmvYiE@Y>NKAK7Y5~flZU-|dW zz2;psfKy(5`t*Qz`omwqI%CnyCxbIrE*K&1_xn5P4&b7@fcQ-G092aLOJndjDAU^z z5+@YR0oj>`!m5O6nr=0iXG6^qP13L|rF!AgBx@!;@FIFRozSzdf`>deo`k5ZhL3CB z2LzGMfjcSR2Xcx`yHqmBQ*K%HfOFE~94TuY zw`}^N{i#3i{L{{f_O8+Pu94@Zkr$-V_M>C2&}Z#Of4?OYnW)`1TD$EsKUUi^?%oYk zX@0~N`oiLvu&f-ltQ_7yZh1K8sz}?v{dEbjkeJph6b>uEANpcKxH)+mO?6k^s$sGE0Xj9(RDMR)Q;a*cra58 zC&XJN3giuPhvfx--oXi5GGd-Ul@;$`mA9FM4Vl(Ffhw!s!7A^r6g-)5o|F!T~fcoDqq72H5o2XpvrQ0u`*OS3j0wBNGEvxVeEVd$#Y21fb=Mm7my&= ztp56&Stocg(ywA~kh`W9w&o52Vlbk+`>a|STyo>FV}2^b%SKT(lhH3^IW^WJYmNeo z@digP((K38p19u_pomz;mh;unEV~Tj?WTBi=z_{xAbE~qnCrxIomj6E`{$(QbF%gh R|1E~88F=n1g0-&Ue*g`=CDi}` diff --git a/FFAIall(暂停)/analyzer.py b/FFAIall(暂停)/analyzer.py deleted file mode 100644 index 19b68d5..0000000 --- a/FFAIall(暂停)/analyzer.py +++ /dev/null @@ -1,39 +0,0 @@ -import re -from collections import Counter - -class PureAnalyzer: - @staticmethod - def search_in_cache(query: str, cache_dir="cache") -> list: - """在缓存中检索历史记录""" - if not os.path.exists(cache_dir): - return [] - - related_files = [] - safe_query = query.lower() - for filename in os.listdir(cache_dir): - if safe_query in filename.lower(): - with open(f"{cache_dir}/{filename}", "r", encoding="utf-8") as f: - content = f.read() - related_files.append({ - "query": filename.replace(".txt", ""), - "content": content - }) - return related_files - - @staticmethod - def analyze(data: list, query: str) -> dict: - # 先检查缓存中的相关记录 - history = PureAnalyzer.search_in_cache(query) - - # 合并新旧数据 - all_text = " ".join(d.get("text", "") for d in data) - if history: - all_text += " " + " ".join(h["content"] for h in history) - - # ...(保持原有的分析逻辑)... - return { - "summary": summary, - "keywords": keywords, - "sources": [d["url"] for d in data], - "related_history": [h["query"] for h in history] - } \ No newline at end of file diff --git a/FFAIall(暂停)/crawlers.py b/FFAIall(暂停)/crawlers.py deleted file mode 100644 index e0130b2..0000000 --- a/FFAIall(暂停)/crawlers.py +++ /dev/null @@ -1,71 +0,0 @@ -import urllib.request -import os -import time -from urllib.parse import quote -from html.parser import HTMLParser - -class PureHTMLParser(HTMLParser): - # ...(保持之前的HTML解析器代码不变)... - - class PureCrawler: - def __init__(self, cache_dir="cache"): - self.user_agent = "Mozilla/5.0" - self.parser = PureHTMLParser() - self.cache_dir = cache_dir - os.makedirs(cache_dir, exist_ok=True) - - def _get_cache_path(self, query: str) -> str: - """生成缓存文件名""" - safe_query = "".join(c if c.isalnum() else "_" for c in query) - return f"{self.cache_dir}/{safe_query}.txt" - - def _save_to_cache(self, query: str, data: list): - """保存搜索结果到缓存""" - with open(self._get_cache_path(query), "w", encoding="utf-8") as f: - for item in data: - f.write(f"URL: {item['url']}\n") - f.write(f"Text: {item['text']}\n") - f.write("="*50 + "\n") - - def _load_from_cache(self, query: str) -> list: - """从缓存加载数据""" - cache_file = self._get_cache_path(query) - if not os.path.exists(cache_file): - return None - - with open(cache_file, "r", encoding="utf-8") as f: - content = f.read() - - # 解析缓存文件 - items = [] - for block in content.split("="*50): - if not block.strip(): - continue - url = text = "" - for line in block.split("\n"): - if line.startswith("URL: "): - url = line[5:] - elif line.startswith("Text: "): - text = line[6:] - if url: - items.append({"url": url, "text": text}) - return items - - def fetch(self, query: str, force_update=False) -> list: - """优先读取缓存,不存在时爬取""" - if not force_update: - cached = self._load_from_cache(query) - if cached: - print("📂 从缓存加载数据") - return cached - - print("🌐 正在爬取网络数据...") - # ...(保持原有的爬取逻辑)... - data = [{"url": link, "text": self.extract_text(link)} for link in self.parser.links[:5]] - - self._save_to_cache(query, data) - return data - - def extract_text(self, url: str) -> str: - # ...(保持原有的正文提取逻辑).. - return extracted_text \ No newline at end of file diff --git a/FFAIall(暂停)/main.py b/FFAIall(暂停)/main.py deleted file mode 100644 index 7e5622f..0000000 --- a/FFAIall(暂停)/main.py +++ /dev/null @@ -1,59 +0,0 @@ -from crawlers import PureHTMLParser # type: ignore -from analyzer import PureAnalyzer # type: ignore -import sys -import os - -class PureInfoHunter: - def __init__(self): - self.crawler = PureHTMLParser() - self.analyzer = PureAnalyzer() - - def run(self, query: str): - # 1. 获取数据(优先缓存) - data = self.crawler.fetch(query) - - # 2. 分析(自动检索历史缓存) - result = self.analyzer.analyze(data, query) - - # 3. 生成报告 - report = "="*40 + "\n" - report += f"搜索词: {query}\n" - - if result.get("related_history"): - report += f"关联历史记录: {', '.join(result['related_history'])}\n" - - report += "\n分析结果:\n" + result["summary"] + "\n" - report += "数据来源:\n" - for url in result["sources"]: - report += f"- {url}\n" - - # 保存本次报告 - self._save_report(query, report) - return report - - def _save_report(self, query: str, content: str): - """保存分析报告""" - safe_query = "".join(c if c.isalnum() else "_" for c in query) - with open(f"reports/{safe_query}_report.txt", "w", encoding="utf-8") as f: - f.write(content) - print(f"报告已保存到 reports/{safe_query}_report.txt") - -if __name__ == "__main__": - os.makedirs("reports", exist_ok=True) - - if len(sys.argv) < 2: - print("使用方法: python pure_main.py '搜索关键词' [force_update]") - print("示例: python pure_main.py '人工智能' true") - sys.exit(1) - - force_update = len(sys.argv) > 2 and sys.argv[2].lower() == "true" - hunter = PureInfoHunter() - - if force_update: - print("强制更新模式(忽略缓存)") - data = hunter.crawler.fetch(sys.argv[1], force_update=True) - result = hunter.analyzer.analyze(data, sys.argv[1]) - else: - result = hunter.run(sys.argv[1]) - - print(result) \ No newline at end of file