3v324v23 commited on
Commit
50dfccc
·
1 Parent(s): 036bd93

新增谷歌学术统合小助手

Browse files
crazy_functional.py CHANGED
@@ -72,6 +72,7 @@ def get_crazy_functions():
72
  from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
73
  from crazy_functions.总结word文档 import 总结word文档
74
  from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
 
75
 
76
  function_plugins.update({
77
  "批量翻译PDF文档(多线程)": {
@@ -90,6 +91,11 @@ def get_crazy_functions():
90
  "AsButton": False, # 加入下拉菜单中
91
  "Function": HotReload(批量总结PDF文档pdfminer)
92
  },
 
 
 
 
 
93
  "批量总结Word文档": {
94
  "Color": "stop",
95
  "Function": HotReload(总结word文档)
 
72
  from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
73
  from crazy_functions.总结word文档 import 总结word文档
74
  from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
75
+ from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
76
 
77
  function_plugins.update({
78
  "批量翻译PDF文档(多线程)": {
 
91
  "AsButton": False, # 加入下拉菜单中
92
  "Function": HotReload(批量总结PDF文档pdfminer)
93
  },
94
+ "谷歌学术检索助手(输入谷歌学术搜索页url)": {
95
+ "Color": "stop",
96
+ "AsButton": False, # 加入下拉菜单中
97
+ "Function": HotReload(谷歌检索小助手)
98
+ },
99
  "批量总结Word文档": {
100
  "Color": "stop",
101
  "Function": HotReload(总结word文档)
crazy_functions/谷歌检索小助手.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
2
+ from toolbox import CatchException, report_execption, write_results_to_file
3
+
4
+ def get_meta_information(url, chatbot, history):
5
+ import requests
6
+ import arxiv
7
+ import difflib
8
+ from bs4 import BeautifulSoup
9
+ from toolbox import get_conf
10
+ proxies, = get_conf('proxies')
11
+ headers = {
12
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
13
+ }
14
+ # 发送 GET 请求
15
+ response = requests.get(url, proxies=proxies, headers=headers)
16
+
17
+ # 解析网页内容
18
+ soup = BeautifulSoup(response.text, "html.parser")
19
+
20
+ def string_similar(s1, s2):
21
+ return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
22
+
23
+ profile = []
24
+ # 获取所有文章的标题和作者
25
+ for result in soup.select(".gs_ri"):
26
+ title = result.a.text.replace('\n', ' ').replace(' ', ' ')
27
+ author = result.select_one(".gs_a").text
28
+ try:
29
+ citation = result.select_one(".gs_fl > a[href*='cites']").text # 引用次数是链接中的文本,直接取出来
30
+ except:
31
+ citation = 'cited by 0'
32
+ abstract = result.select_one(".gs_rs").text.strip() # 摘要在 .gs_rs 中的文本,需要清除首尾空格
33
+ search = arxiv.Search(
34
+ query = title,
35
+ max_results = 1,
36
+ sort_by = arxiv.SortCriterion.Relevance,
37
+ )
38
+ paper = next(search.results())
39
+ if string_similar(title, paper.title) > 0.90: # same paper
40
+ abstract = paper.summary.replace('\n', ' ')
41
+ is_paper_in_arxiv = True
42
+ else: # different paper
43
+ abstract = abstract
44
+ is_paper_in_arxiv = False
45
+ paper = next(search.results())
46
+ print(title)
47
+ print(author)
48
+ print(citation)
49
+ profile.append({
50
+ 'title':title,
51
+ 'author':author,
52
+ 'citation':citation,
53
+ 'abstract':abstract,
54
+ 'is_paper_in_arxiv':is_paper_in_arxiv,
55
+ })
56
+
57
+ chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中(不在arxiv中无法获取完整摘要):{is_paper_in_arxiv}\n\n' + abstract]
58
+ msg = "正常"
59
+ yield chatbot, [], msg
60
+ return profile
61
+
62
+ @CatchException
63
+ def 谷歌检索小助手(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
64
+ # 基本信息:功能、贡献者
65
+ chatbot.append([
66
+ "函数插件功能?",
67
+ "分析用户提供的谷歌学术(google scholar)搜索页面中,出现的所有文章: binary-husky,插件初始化中..."])
68
+ yield chatbot, history, '正常'
69
+
70
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
71
+ try:
72
+ import arxiv
73
+ from bs4 import BeautifulSoup
74
+ except:
75
+ report_execption(chatbot, history,
76
+ a = f"解析项目: {txt}",
77
+ b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
78
+ yield chatbot, history, '正常'
79
+ return
80
+
81
+ # 清空历史,以免输入溢出
82
+ history = []
83
+
84
+ meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
85
+
86
+ if len(meta_paper_info_list[:10]) > 0:
87
+ i_say = "下面是一些学术文献的数据,请从中提取出以下内容。" + \
88
+ "1、英文题目;2、中文题目翻译;3、作者;4、arxiv公开(is_paper_in_arxiv);4、引用数量(cite);5、中文摘要翻译。" + \
89
+ f"以下是信息源:{str(meta_paper_info_list[:10])}"
90
+
91
+ inputs_show_user = f"请分析此页面中出现的所有文章:{txt}"
92
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
93
+ inputs=i_say, inputs_show_user=inputs_show_user,
94
+ top_p=top_p, temperature=temperature, chatbot=chatbot, history=[],
95
+ sys_prompt="你是一个学术翻译,请从数据中提取信息。你必须使用Markdown格式。你必须逐个文献进行处理。"
96
+ )
97
+
98
+ history.extend([ "第一批", gpt_say ])
99
+ meta_paper_info_list = meta_paper_info_list[10:]
100
+
101
+ chatbot.append(["状态?", "已经全部完成"])
102
+ msg = '正常'
103
+ yield chatbot, history, msg
104
+ res = write_results_to_file(history)
105
+ chatbot.append(("完成了吗?", res));
106
+ yield chatbot, history, msg
request_llm/bridge_chatgpt.py CHANGED
@@ -104,7 +104,10 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr
104
  result = ''
105
  while True:
106
  try: chunk = next(stream_response).decode()
107
- except StopIteration: break
 
 
 
108
  if len(chunk)==0: continue
109
  if not chunk.startswith('data:'):
110
  error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
 
104
  result = ''
105
  while True:
106
  try: chunk = next(stream_response).decode()
107
+ except StopIteration:
108
+ break
109
+ except requests.exceptions.ConnectionError:
110
+ chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
111
  if len(chunk)==0: continue
112
  if not chunk.startswith('data:'):
113
  error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()