qingxu99 commited on
Commit
91609d6
·
1 Parent(s): ea6541c

Rebase v3.0

Browse files
.gitignore CHANGED
@@ -55,7 +55,6 @@ coverage.xml
55
  *.pot
56
  github
57
  .github
58
- .idea/
59
  TEMP
60
  TRASH
61
 
 
55
  *.pot
56
  github
57
  .github
 
58
  TEMP
59
  TRASH
60
 
Dockerfile+ChatGLM ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # How to build | 如何构建: docker build -t gpt-academic --network=host -f Dockerfile+ChatGLM .
2
+ # How to run | 如何运行 (1) 直接运行: docker run --rm -it --net=host --gpus=all gpt-academic
3
+ # How to run | 如何运行 (2) 我想运行之前进容器做一些调整: docker run --rm -it --net=host --gpus=all gpt-academic bash
4
+
5
+ # 从NVIDIA源,从而支持显卡运损(检查宿主的nvidia-smi中的cuda版本必须>=11.3)
6
+ FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04
7
+ ARG useProxyNetwork=''
8
+ RUN apt-get update
9
+ RUN apt-get install -y curl proxychains curl
10
+ RUN apt-get install -y git python python3 python-dev python3-dev --fix-missing
11
+
12
+ # 配置代理网络(构建Docker镜像时使用)
13
+ # # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除
14
+ RUN $useProxyNetwork curl cip.cc
15
+ RUN sed -i '$ d' /etc/proxychains.conf
16
+ RUN sed -i '$ d' /etc/proxychains.conf
17
+ RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf
18
+ ARG useProxyNetwork=proxychains
19
+ # # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除
20
+
21
+
22
+ # use python3 as the system default python
23
+ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
24
+
25
+ # 下载分支
26
+ WORKDIR /gpt
27
+ RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git -b v3.0
28
+ WORKDIR /gpt/chatgpt_academic
29
+ RUN $useProxyNetwork python3 -m pip install -r requirements.txt
30
+ RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_chatglm.txt
31
+ RUN $useProxyNetwork python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu113
32
+
33
+ # 预热CHATGLM参数(非必要 可选步骤)
34
+ RUN echo ' \n\
35
+ from transformers import AutoModel, AutoTokenizer \n\
36
+ chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) \n\
37
+ chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() ' >> warm_up_chatglm.py
38
+ RUN python3 -u warm_up_chatglm.py
39
+ RUN $useProxyNetwork git pull
40
+
41
+ # 为chatgpt-academic配置代理和API-KEY (非必要 可选步骤)
42
+ RUN echo ' \n\
43
+ API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \n\
44
+ USE_PROXY = True \n\
45
+ LLM_MODEL = "chatglm" \n\
46
+ LOCAL_MODEL_DEVICE = "cuda" \n\
47
+ proxies = { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' >> config_private.py
48
+
49
+ # 启动
50
+ CMD ["python3", "-u", "main.py"]
config.py CHANGED
@@ -45,7 +45,10 @@ WEB_PORT = -1
45
  MAX_RETRY = 2
46
 
47
  # OpenAI模型选择是(gpt4现在只对申请成功的人开放)
48
- LLM_MODEL = "gpt-3.5-turbo"
 
 
 
49
 
50
  # OpenAI的API_URL
51
  API_URL = "https://api.openai.com/v1/chat/completions"
 
45
  MAX_RETRY = 2
46
 
47
  # OpenAI模型选择是(gpt4现在只对申请成功的人开放)
48
+ LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm", "tgui:anymodel@localhost:7865"
49
+
50
+ # 本地LLM模型如ChatGLM的执行方式 CPU/GPU
51
+ LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
52
 
53
  # OpenAI的API_URL
54
  API_URL = "https://api.openai.com/v1/chat/completions"
crazy_functional.py CHANGED
@@ -16,15 +16,20 @@ def get_crazy_functions():
16
  from crazy_functions.高级功能函数模板 import 高阶功能模板函数
17
  from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
18
  from crazy_functions.Latex全文润色 import Latex英文润色
 
19
  from crazy_functions.解析项目源代码 import 解析一个Lua项目
20
  function_plugins = {
21
-
 
 
 
22
  "解析整个Python项目": {
23
  "Color": "stop", # 按钮颜色
24
  "Function": HotReload(解析一个Python项目)
25
  },
26
  "解析整个C++项目头文件": {
27
  "Color": "stop", # 按钮颜色
 
28
  "Function": HotReload(解析一个C项目的头文件)
29
  },
30
  "解析整个C++项目(.cpp/.hpp/.c/.h)": {
 
16
  from crazy_functions.高级功能函数模板 import 高阶功能模板函数
17
  from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
18
  from crazy_functions.Latex全文润色 import Latex英文润色
19
+ from crazy_functions.询问多个大语言模型 import 同时问询
20
  from crazy_functions.解析项目源代码 import 解析一个Lua项目
21
  function_plugins = {
22
+ "询问多个GPT模型": {
23
+ "Color": "stop", # 按钮颜色
24
+ "Function": HotReload(同时问询)
25
+ },
26
  "解析整个Python项目": {
27
  "Color": "stop", # 按钮颜色
28
  "Function": HotReload(解析一个Python项目)
29
  },
30
  "解析整个C++项目头文件": {
31
  "Color": "stop", # 按钮颜色
32
+ "AsButton": False, # 加入下拉菜单中
33
  "Function": HotReload(解析一个C项目的头文件)
34
  },
35
  "解析整个C++项目(.cpp/.hpp/.c/.h)": {
crazy_functions/crazy_utils.py CHANGED
@@ -61,7 +61,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
61
  """
62
  import time
63
  from concurrent.futures import ThreadPoolExecutor
64
- from request_llm.bridge_chatgpt import predict_no_ui_long_connection
65
  # 用户反馈
66
  chatbot.append([inputs_show_user, ""])
67
  yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
@@ -167,13 +167,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
167
  """
168
  import time, random
169
  from concurrent.futures import ThreadPoolExecutor
170
- from request_llm.bridge_chatgpt import predict_no_ui_long_connection
171
  assert len(inputs_array) == len(history_array)
172
  assert len(inputs_array) == len(sys_prompt_array)
173
  if max_workers == -1: # 读取配置文件
174
  try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
175
  except: max_workers = 8
176
  if max_workers <= 0 or max_workers >= 20: max_workers = 8
 
 
 
 
177
  executor = ThreadPoolExecutor(max_workers=max_workers)
178
  n_frag = len(inputs_array)
179
  # 用户反馈
 
61
  """
62
  import time
63
  from concurrent.futures import ThreadPoolExecutor
64
+ from request_llm.bridge_all import predict_no_ui_long_connection
65
  # 用户反馈
66
  chatbot.append([inputs_show_user, ""])
67
  yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
 
167
  """
168
  import time, random
169
  from concurrent.futures import ThreadPoolExecutor
170
+ from request_llm.bridge_all import predict_no_ui_long_connection
171
  assert len(inputs_array) == len(history_array)
172
  assert len(inputs_array) == len(sys_prompt_array)
173
  if max_workers == -1: # 读取配置文件
174
  try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
175
  except: max_workers = 8
176
  if max_workers <= 0 or max_workers >= 20: max_workers = 8
177
+ # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
178
+ if not llm_kwargs['llm_model'].startswith('gpt-'):
179
+ max_workers = 1
180
+
181
  executor = ThreadPoolExecutor(max_workers=max_workers)
182
  n_frag = len(inputs_array)
183
  # 用户反馈
crazy_functions/代码重写为全英文_多线程.py CHANGED
@@ -1,5 +1,5 @@
1
  import threading
2
- from request_llm.bridge_chatgpt import predict_no_ui_long_connection
3
  from toolbox import update_ui
4
  from toolbox import CatchException, write_results_to_file, report_execption
5
  from .crazy_utils import breakdown_txt_to_satisfy_token_limit
 
1
  import threading
2
+ from request_llm.bridge_all import predict_no_ui_long_connection
3
  from toolbox import update_ui
4
  from toolbox import CatchException, write_results_to_file, report_execption
5
  from .crazy_utils import breakdown_txt_to_satisfy_token_limit
crazy_functions/解析项目源代码.py CHANGED
@@ -12,7 +12,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
12
  sys_prompt_array = []
13
  report_part_1 = []
14
 
15
- assert len(file_manifest) <= 1024, "源文件太多(超过1024个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。"
16
  ############################## <第一步,逐个文件分析,多线程> ##################################
17
  for index, fp in enumerate(file_manifest):
18
  # 读取文件
 
12
  sys_prompt_array = []
13
  report_part_1 = []
14
 
15
+ assert len(file_manifest) <= 512, "源文件太多(超过512个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。"
16
  ############################## <第一步,逐个文件分析,多线程> ##################################
17
  for index, fp in enumerate(file_manifest):
18
  # 读取文件
crazy_functions/询问多个大语言模型.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui
2
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
3
+ import datetime
4
+ @CatchException
5
+ def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
6
+ """
7
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
8
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
9
+ plugin_kwargs 插件模型的参数,如温度和top_p等,一般原样传递下去就行
10
+ chatbot 聊天显示框的句柄,用于显示给用户
11
+ history 聊天历史,前情提要
12
+ system_prompt 给gpt的静默提醒
13
+ web_port 当前软件运行的端口号
14
+ """
15
+ history = [] # 清空历史,以免输入溢出
16
+ chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
17
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
18
+
19
+ llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo'
20
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
21
+ inputs=txt, inputs_show_user=txt,
22
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
23
+ sys_prompt=system_prompt
24
+ )
25
+
26
+ history.append(txt)
27
+ history.append(gpt_say)
28
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
main.py CHANGED
@@ -1,6 +1,6 @@
1
  import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
2
  import gradio as gr
3
- from request_llm.bridge_chatgpt import predict
4
  from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
5
 
6
  # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
@@ -97,7 +97,10 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
97
  system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
98
  top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
99
  temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
 
100
  checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
 
 
101
  gr.Markdown(description)
102
  with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary:
103
  with gr.Row():
@@ -118,7 +121,7 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
118
  return ret
119
  checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] )
120
  # 整理反复出现的控件句柄组合
121
- input_combo = [cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt]
122
  output_combo = [cookies, chatbot, history, status]
123
  predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
124
  # 提交按钮、重置按钮
 
1
  import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
2
  import gradio as gr
3
+ from request_llm.bridge_all import predict
4
  from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
5
 
6
  # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
 
97
  system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
98
  top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
99
  temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
100
+ max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="MaxLength",)
101
  checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
102
+ md_dropdown = gr.Dropdown(["gpt-3.5-turbo", "chatglm"], value=LLM_MODEL, label="").style(container=False)
103
+
104
  gr.Markdown(description)
105
  with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary:
106
  with gr.Row():
 
121
  return ret
122
  checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] )
123
  # 整理反复出现的控件句柄组合
124
+ input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt]
125
  output_combo = [cookies, chatbot, history, status]
126
  predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
127
  # 提交按钮、重置按钮
request_llm/README.md CHANGED
@@ -1,35 +1,53 @@
1
  # 如何使用其他大语言模型(v3.0分支测试中)
2
 
3
- ## 1. 先运行text-generation
 
 
 
 
4
  ``` sh
5
- # 下载模型( text-generation 这么牛的项目,别忘了给人家star )
6
- git clone https://github.com/oobabooga/text-generation-webui.git
 
 
 
 
7
 
8
- # 安装text-generation的额外依赖
9
- pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
10
 
11
- # 切换路径
12
- cd text-generation-webui
13
 
14
- # 下载模型
 
 
 
 
 
 
 
 
 
 
15
  python download-model.py facebook/galactica-1.3b
16
  # 其他可选如 facebook/opt-1.3b
 
17
  # facebook/galactica-6.7b
18
  # facebook/galactica-120b
19
  # facebook/pygmalion-1.3b 等
20
  # 详情见 https://github.com/oobabooga/text-generation-webui
21
 
22
- # 启动text-generation,注意把模型的斜杠改成下划线
23
- python server.py --cpu --listen --listen-port 7860 --model facebook_galactica-1.3b
24
  ```
25
 
26
- ## 2. 修改config.py
 
27
  ``` sh
28
- # LLM_MODEL格式较复杂 TGUI:[模型]@[ws地址]:[ws端口] , 端口要和上面给定的端口一致
29
- LLM_MODEL = "TGUI:galactica-1.3b@localhost:7860"
30
  ```
31
 
32
- ## 3. 运行!
33
  ``` sh
34
  cd chatgpt-academic
35
  python main.py
 
1
  # 如何使用其他大语言模型(v3.0分支测试中)
2
 
3
+ ## ChatGLM
4
+
5
+ - 安装依赖 `pip install -r request_llm/requirements_chatglm.txt`
6
+ - 修改配置,在config.py中将LLM_MODEL的值改为"chatglm"
7
+
8
  ``` sh
9
+ LLM_MODEL = "chatglm"
10
+ ```
11
+ - 运行!
12
+ ``` sh
13
+ `python main.py`
14
+ ```
15
 
 
 
16
 
17
+ ---
18
+ ## Text-Generation-UI (TGUI)
19
 
20
+ ### 1. 部署TGUI
21
+ ``` sh
22
+ # 1 下载模型
23
+ git clone https://github.com/oobabooga/text-generation-webui.git
24
+ # 2 这个仓库的最新代码有问题,回滚到几周之前
25
+ git reset --hard fcda3f87767e642d1c0411776e549e1d3894843d
26
+ # 3 切换路径
27
+ cd text-generation-webui
28
+ # 4 安装text-generation的额外依赖
29
+ pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
30
+ # 5 下载模型
31
  python download-model.py facebook/galactica-1.3b
32
  # 其他可选如 facebook/opt-1.3b
33
+ # facebook/galactica-1.3b
34
  # facebook/galactica-6.7b
35
  # facebook/galactica-120b
36
  # facebook/pygmalion-1.3b 等
37
  # 详情见 https://github.com/oobabooga/text-generation-webui
38
 
39
+ # 6 启动text-generation
40
+ python server.py --cpu --listen --listen-port 7865 --model facebook_galactica-1.3b
41
  ```
42
 
43
+ ### 2. 修改config.py
44
+
45
  ``` sh
46
+ # LLM_MODEL格式: tgui:[模型]@[ws地址]:[ws端口] , 端口要和上面给定的端口一致
47
+ LLM_MODEL = "tgui:galactica-1.3b@localhost:7860"
48
  ```
49
 
50
+ ### 3. 运行!
51
  ``` sh
52
  cd chatgpt-academic
53
  python main.py
request_llm/bridge_all.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ """
3
+ 该文件中主要包含2个函数
4
+
5
+ 不具备多线程能力的函数:
6
+ 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
7
+
8
+ 具备多线程调用能力的函数
9
+ 2. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程
10
+ """
11
+
12
+ from concurrent.futures import ThreadPoolExecutor
13
+
14
+ from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
15
+ from .bridge_chatgpt import predict as chatgpt_ui
16
+
17
+ from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
18
+ from .bridge_chatglm import predict as chatglm_ui
19
+
20
+ from .bridge_tgui import predict_no_ui_long_connection as tgui_noui
21
+ from .bridge_tgui import predict as tgui_ui
22
+
23
+ methods = {
24
+ "openai-no-ui": chatgpt_noui,
25
+ "openai-ui": chatgpt_ui,
26
+
27
+ "chatglm-no-ui": chatglm_noui,
28
+ "chatglm-ui": chatglm_ui,
29
+
30
+ "tgui-no-ui": tgui_noui,
31
+ "tgui-ui": tgui_ui,
32
+ }
33
+
34
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
35
+ """
36
+ 发送至LLM,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
37
+ inputs:
38
+ 是本次问询的输入
39
+ sys_prompt:
40
+ 系统静默prompt
41
+ llm_kwargs:
42
+ LLM的内部调优参数
43
+ history:
44
+ 是之前的对话列表
45
+ observe_window = None:
46
+ 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
47
+ """
48
+ import threading, time, copy
49
+
50
+ model = llm_kwargs['llm_model']
51
+ n_model = 1
52
+ if '&' not in model:
53
+ assert not model.startswith("tgui"), "TGUI不支持函数插件的实现"
54
+
55
+ # 如果只询问1个大语言模型:
56
+ if model.startswith('gpt'):
57
+ method = methods['openai-no-ui']
58
+ elif model == 'chatglm':
59
+ method = methods['chatglm-no-ui']
60
+ elif model.startswith('tgui'):
61
+ method = methods['tgui-no-ui']
62
+ return method(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience)
63
+ else:
64
+ # 如果同时询问多个大语言模型:
65
+ executor = ThreadPoolExecutor(max_workers=16)
66
+ models = model.split('&')
67
+ n_model = len(models)
68
+
69
+ window_len = len(observe_window)
70
+ if window_len==0:
71
+ window_mutex = [[] for _ in range(n_model)] + [True]
72
+ elif window_len==1:
73
+ window_mutex = [[""] for _ in range(n_model)] + [True]
74
+ elif window_len==2:
75
+ window_mutex = [["", time.time()] for _ in range(n_model)] + [True]
76
+
77
+ futures = []
78
+ for i in range(n_model):
79
+ model = models[i]
80
+ if model.startswith('gpt'):
81
+ method = methods['openai-no-ui']
82
+ elif model == 'chatglm':
83
+ method = methods['chatglm-no-ui']
84
+ elif model.startswith('tgui'):
85
+ method = methods['tgui-no-ui']
86
+ llm_kwargs_feedin = copy.deepcopy(llm_kwargs)
87
+ llm_kwargs_feedin['llm_model'] = model
88
+ future = executor.submit(method, inputs, llm_kwargs_feedin, history, sys_prompt, window_mutex[i], console_slience)
89
+ futures.append(future)
90
+
91
+ def mutex_manager(window_mutex, observe_window):
92
+ while True:
93
+ time.sleep(0.2)
94
+ if not window_mutex[-1]: break
95
+ # 看门狗(watchdog)
96
+ for i in range(n_model):
97
+ window_mutex[i][1] = observe_window[1]
98
+ # 观察窗(window)
99
+ chat_string = []
100
+ for i in range(n_model):
101
+ chat_string.append( f"[{str(models[i])} 说]: {window_mutex[i][0]}" )
102
+ res = '\n\n---\n\n'.join(chat_string)
103
+ # # # # # # # # # # #
104
+ observe_window[0] = res
105
+
106
+ t_model = threading.Thread(target=mutex_manager, args=(window_mutex, observe_window), daemon=True)
107
+ t_model.start()
108
+
109
+ return_string_collect = []
110
+ for i, future in enumerate(futures): # wait and get
111
+ return_string_collect.append( f"[{str(models[i])} 说]: {future.result()}" )
112
+ window_mutex[-1] = False # stop mutex thread
113
+ res = '\n\n---\n\n'.join(return_string_collect)
114
+ return res
115
+
116
+
117
+ def predict(inputs, llm_kwargs, *args, **kwargs):
118
+ """
119
+ 发送至LLM,流式获取输出。
120
+ 用于基础的对话功能。
121
+ inputs 是本次问询的输入
122
+ top_p, temperature是LLM的内部调优参数
123
+ history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
124
+ chatbot 为WebUI中显示的���话列表,修改它,然后yeild出去,可以直接修改对话界面内容
125
+ additional_fn代表点击的哪个按钮,按钮见functional.py
126
+ """
127
+ if llm_kwargs['llm_model'].startswith('gpt'):
128
+ method = methods['openai-ui']
129
+ elif llm_kwargs['llm_model'] == 'chatglm':
130
+ method = methods['chatglm-ui']
131
+ elif llm_kwargs['llm_model'].startswith('tgui'):
132
+ method = methods['tgui-ui']
133
+
134
+ yield from method(inputs, llm_kwargs, *args, **kwargs)
135
+
request_llm/bridge_chatglm.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoModel, AutoTokenizer
3
+ import time
4
+ import importlib
5
+ from toolbox import update_ui, get_conf
6
+
7
+
8
+ global chatglm_model, chatglm_tokenizer
9
+
10
+ chatglm_model = None
11
+ chatglm_tokenizer = None
12
+
13
+ def model_loader():
14
+ global chatglm_model, chatglm_tokenizer
15
+ if chatglm_tokenizer is None:
16
+ chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
17
+ if chatglm_model is None: # 尚未加载
18
+ device, = get_conf('LOCAL_MODEL_DEVICE')
19
+ if device=='cpu':
20
+ chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
21
+ else:
22
+ chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
23
+ chatglm_model = chatglm_model.eval()
24
+ chatglm_model = chatglm_model.eval()
25
+
26
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
27
+ """
28
+ 函数的说明请见 request_llm/bridge_all.py
29
+ """
30
+ global chatglm_model, chatglm_tokenizer
31
+ if chatglm_model is None:
32
+ observe_window[0] = "ChatGLM尚未加载,加载需要一段时间 ……"
33
+
34
+ model_loader()
35
+ # chatglm 没有 sys_prompt 接口,因此把prompt加入 history
36
+ history_feedin = []
37
+ for i in range(len(history)//2):
38
+ history_feedin.append(["What can I do?", sys_prompt] )
39
+ history_feedin.append([history[2*i], history[2*i+1]] )
40
+
41
+ watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
42
+ response = ""
43
+ for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'],
44
+ top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
45
+ # 观测窗,把已经获取的数据显示出去
46
+ observe_window[0] = response
47
+ # 看门狗 (watchdog),如果超过期限没有喂狗,则终止
48
+ if len(observe_window) >= 2:
49
+ if (time.time()-observe_window[1]) > watch_dog_patience:
50
+ raise RuntimeError("程序终止。")
51
+ # if not console_slience:
52
+ # print(response)
53
+ return response
54
+
55
+
56
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
57
+ """
58
+ 函数的说明请见 request_llm/bridge_all.py
59
+ """
60
+ global chatglm_model, chatglm_tokenizer
61
+ chatbot.append((inputs, ""))
62
+ if chatglm_model is None:
63
+ chatbot[-1] = (inputs, "ChatGLM尚未加载,加载需要一段时间 ……")
64
+ yield from update_ui(chatbot=chatbot, history=[])
65
+ model_loader()
66
+
67
+ if additional_fn is not None:
68
+ import core_functional
69
+ importlib.reload(core_functional) # 热更新prompt
70
+ core_functional = core_functional.get_core_functions()
71
+ if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
72
+ inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
73
+
74
+
75
+ history_feedin = []
76
+ for i in range(len(history)//2):
77
+ history_feedin.append(["What can I do?", system_prompt] )
78
+ history_feedin.append([history[2*i], history[2*i+1]] )
79
+
80
+ for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'],
81
+ top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
82
+ chatbot[-1] = (inputs, response)
83
+ yield from update_ui(chatbot=chatbot, history=history)
request_llm/bridge_tgui.py CHANGED
@@ -13,23 +13,18 @@ import time
13
  import threading
14
  import importlib
15
  from toolbox import get_conf, update_ui
16
- LLM_MODEL, = get_conf('LLM_MODEL')
17
 
18
- # "TGUI:galactica-1.3b@localhost:7860"
19
- model_name, addr_port = LLM_MODEL.split('@')
20
- assert ':' in addr_port, "LLM_MODEL 格式不正确!" + LLM_MODEL
21
- addr, port = addr_port.split(':')
22
 
23
  def random_hash():
24
  letters = string.ascii_lowercase + string.digits
25
  return ''.join(random.choice(letters) for i in range(9))
26
 
27
- async def run(context, max_token=512):
28
  params = {
29
  'max_new_tokens': max_token,
30
  'do_sample': True,
31
- 'temperature': 0.5,
32
- 'top_p': 0.9,
33
  'typical_p': 1,
34
  'repetition_penalty': 1.05,
35
  'encoder_repetition_penalty': 1.0,
@@ -90,7 +85,7 @@ async def run(context, max_token=512):
90
 
91
 
92
 
93
- def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
94
  """
95
  发送至chatGPT,流式获取输出。
96
  用于基础的对话功能。
@@ -108,18 +103,26 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt=
108
  inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
109
 
110
  raw_input = "What I would like to say is the following: " + inputs
111
- logging.info(f'[raw_input] {raw_input}')
112
  history.extend([inputs, ""])
113
  chatbot.append([inputs, ""])
114
  yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
115
 
116
- prompt = inputs
117
  tgui_say = ""
118
 
 
 
 
 
 
119
  mutable = ["", time.time()]
120
  def run_coorotine(mutable):
121
  async def get_result(mutable):
122
- async for response in run(prompt):
 
 
 
 
123
  print(response[len(mutable[0]):])
124
  mutable[0] = response
125
  if (time.time() - mutable[1]) > 3:
@@ -140,28 +143,29 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt=
140
  chatbot[-1] = (history[-2], history[-1])
141
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
142
 
143
- logging.info(f'[response] {tgui_say}')
144
 
145
 
146
 
147
- def predict_tgui_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""):
148
  raw_input = "What I would like to say is the following: " + inputs
149
- prompt = inputs
150
  tgui_say = ""
151
- mutable = ["", time.time()]
152
- def run_coorotine(mutable):
153
- async def get_result(mutable):
154
- async for response in run(prompt, max_token=20):
155
- print(response[len(mutable[0]):])
156
- mutable[0] = response
157
- if (time.time() - mutable[1]) > 3:
 
 
 
 
 
 
158
  print('exit when no listener')
159
  break
160
- asyncio.run(get_result(mutable))
161
- thread_listen = threading.Thread(target=run_coorotine, args=(mutable,))
162
  thread_listen.start()
163
- while thread_listen.is_alive():
164
- time.sleep(1)
165
- mutable[1] = time.time()
166
- tgui_say = mutable[0]
167
- return tgui_say
 
13
  import threading
14
  import importlib
15
  from toolbox import get_conf, update_ui
 
16
 
 
 
 
 
17
 
18
  def random_hash():
19
  letters = string.ascii_lowercase + string.digits
20
  return ''.join(random.choice(letters) for i in range(9))
21
 
22
+ async def run(context, max_token, temperature, top_p, addr, port):
23
  params = {
24
  'max_new_tokens': max_token,
25
  'do_sample': True,
26
+ 'temperature': temperature,
27
+ 'top_p': top_p,
28
  'typical_p': 1,
29
  'repetition_penalty': 1.05,
30
  'encoder_repetition_penalty': 1.0,
 
85
 
86
 
87
 
88
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
89
  """
90
  发送至chatGPT,流式获取输出。
91
  用于基础的对话功能。
 
103
  inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
104
 
105
  raw_input = "What I would like to say is the following: " + inputs
 
106
  history.extend([inputs, ""])
107
  chatbot.append([inputs, ""])
108
  yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
109
 
110
+ prompt = raw_input
111
  tgui_say = ""
112
 
113
+ model_name, addr_port = llm_kwargs['llm_model'].split('@')
114
+ assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model']
115
+ addr, port = addr_port.split(':')
116
+
117
+
118
  mutable = ["", time.time()]
119
  def run_coorotine(mutable):
120
  async def get_result(mutable):
121
+ # "tgui:galactica-1.3b@localhost:7860"
122
+
123
+ async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
124
+ temperature=llm_kwargs['temperature'],
125
+ top_p=llm_kwargs['top_p'], addr=addr, port=port):
126
  print(response[len(mutable[0]):])
127
  mutable[0] = response
128
  if (time.time() - mutable[1]) > 3:
 
143
  chatbot[-1] = (history[-2], history[-1])
144
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
145
 
 
146
 
147
 
148
 
149
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
150
  raw_input = "What I would like to say is the following: " + inputs
151
+ prompt = raw_input
152
  tgui_say = ""
153
+ model_name, addr_port = llm_kwargs['llm_model'].split('@')
154
+ assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model']
155
+ addr, port = addr_port.split(':')
156
+
157
+
158
+ def run_coorotine(observe_window):
159
+ async def get_result(observe_window):
160
+ async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
161
+ temperature=llm_kwargs['temperature'],
162
+ top_p=llm_kwargs['top_p'], addr=addr, port=port):
163
+ print(response[len(observe_window[0]):])
164
+ observe_window[0] = response
165
+ if (time.time() - observe_window[1]) > 5:
166
  print('exit when no listener')
167
  break
168
+ asyncio.run(get_result(observe_window))
169
+ thread_listen = threading.Thread(target=run_coorotine, args=(observe_window,))
170
  thread_listen.start()
171
+ return observe_window[0]
 
 
 
 
request_llm/requirements_chatglm.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ protobuf
2
+ transformers==4.27.1
3
+ cpm_kernels
4
+ torch>=1.10
5
+ mdtex2html
6
+ sentencepiece
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio==3.25.0
2
  tiktoken>=0.3.3
3
  requests[socks]
4
  transformers
 
1
+ gradio>=3.25.0
2
  tiktoken>=0.3.3
3
  requests[socks]
4
  transformers
toolbox.py CHANGED
@@ -27,7 +27,7 @@ def ArgsGeneralWrapper(f):
27
  """
28
  装饰器函数,用于重组输入参数,改变输入参数的顺序与结构。
29
  """
30
- def decorated(cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
31
  txt_passon = txt
32
  if txt == "" and txt2 != "": txt_passon = txt2
33
  # 引入一个有cookie的chatbot
@@ -37,8 +37,9 @@ def ArgsGeneralWrapper(f):
37
  })
38
  llm_kwargs = {
39
  'api_key': cookies['api_key'],
40
- 'llm_model': cookies['llm_model'],
41
  'top_p':top_p,
 
42
  'temperature':temperature,
43
  }
44
  plugin_kwargs = {
@@ -75,66 +76,6 @@ def get_reduce_token_percent(text):
75
  except:
76
  return 0.5, '不详'
77
 
78
- def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, llm_kwargs, history=[], sys_prompt='', long_connection=True):
79
- """
80
- * 此函数未来将被弃用(替代函数 request_gpt_model_in_new_thread_with_ui_alive 文件 chatgpt_academic/crazy_functions/crazy_utils)
81
-
82
- 调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断
83
- i_say: 当前输入
84
- i_say_show_user: 显示到对话界面上的当前输入,例如,输入整个文件时,你绝对不想把文件的内容都糊到对话界面上
85
- chatbot: 对话界面句柄
86
- top_p, temperature: gpt参数
87
- history: gpt参数 对话历史
88
- sys_prompt: gpt参数 sys_prompt
89
- long_connection: 是否采用更稳定的连接方式(推荐)(已弃用)
90
- """
91
- import time
92
- from request_llm.bridge_chatgpt import predict_no_ui_long_connection
93
- from toolbox import get_conf
94
- TIMEOUT_SECONDS, MAX_RETRY = get_conf('TIMEOUT_SECONDS', 'MAX_RETRY')
95
- # 多线程的时候,需要一个mutable结构在不同线程之间传递信息
96
- # list就是最简单的mutable结构,我们第一个位置放gpt输出,第二个位置传递报错信息
97
- mutable = [None, '']
98
- # multi-threading worker
99
-
100
- def mt(i_say, history):
101
- while True:
102
- try:
103
- mutable[0] = predict_no_ui_long_connection(
104
- inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt)
105
-
106
- except ConnectionAbortedError as token_exceeded_error:
107
- # 尝试计算比例,尽可能多地保留文本
108
- p_ratio, n_exceed = get_reduce_token_percent(
109
- str(token_exceeded_error))
110
- if len(history) > 0:
111
- history = [his[int(len(his) * p_ratio):]
112
- for his in history if his is not None]
113
- else:
114
- i_say = i_say[: int(len(i_say) * p_ratio)]
115
- mutable[1] = f'警告,文本过长将进行截断,Token溢出数:{n_exceed},截断比例:{(1-p_ratio):.0%}。'
116
- except TimeoutError as e:
117
- mutable[0] = '[Local Message] 请求超时。'
118
- raise TimeoutError
119
- except Exception as e:
120
- mutable[0] = f'[Local Message] 异常:{str(e)}.'
121
- raise RuntimeError(f'[Local Message] 异常:{str(e)}.')
122
- # 创建新线程发出http请求
123
- thread_name = threading.Thread(target=mt, args=(i_say, history))
124
- thread_name.start()
125
- # 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成
126
- cnt = 0
127
- while thread_name.is_alive():
128
- cnt += 1
129
- chatbot[-1] = (i_say_show_user,
130
- f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt % 4)))
131
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
132
- time.sleep(1)
133
- # 把gpt的输出从mutable中取出来
134
- gpt_say = mutable[0]
135
- if gpt_say == '[Local Message] Failed with timeout.':
136
- raise TimeoutError
137
- return gpt_say
138
 
139
 
140
  def write_results_to_file(history, file_name=None):
 
27
  """
28
  装饰器函数,用于重组输入参数,改变输入参数的顺序与结构。
29
  """
30
+ def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
31
  txt_passon = txt
32
  if txt == "" and txt2 != "": txt_passon = txt2
33
  # 引入一个有cookie的chatbot
 
37
  })
38
  llm_kwargs = {
39
  'api_key': cookies['api_key'],
40
+ 'llm_model': llm_model,
41
  'top_p':top_p,
42
+ 'max_length': max_length,
43
  'temperature':temperature,
44
  }
45
  plugin_kwargs = {
 
76
  except:
77
  return 0.5, '不详'
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
 
81
  def write_results_to_file(history, file_name=None):
version CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "version": 2.68,
3
  "show_feature": true,
4
- "new_feature": "改善理解pdf(chatpdf)功能 <-> 修复读取罕见字符的BUG <-> 如果一键更新失败,可前往github手动更新"
5
  }
 
1
  {
2
+ "version": 3.0,
3
  "show_feature": true,
4
+ "new_feature": "支持ChatGLM <-> 支持多LLM模型同时对话"
5
  }