alexkueck commited on
Commit
2e095eb
·
1 Parent(s): 9018d60

Update beschreibungen.py

Browse files
Files changed (1) hide show
  1. beschreibungen.py +79 -229
beschreibungen.py CHANGED
@@ -1,232 +1,82 @@
1
- from __future__ import annotations
2
- from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type
3
- import logging
4
- import json
5
- import os
6
- import datetime
7
- import hashlib
8
- import csv
9
- import requests
10
- import re
11
- import html
12
- import markdown2
13
- import torch
14
- import sys
15
- import gc
16
- from pygments.lexers import guess_lexer, ClassNotFound
17
-
18
  import gradio as gr
19
- from pypinyin import lazy_pinyin
20
- import tiktoken
21
- import mdtex2html
22
- from markdown import markdown
23
- from pygments import highlight
24
- from pygments.lexers import guess_lexer,get_lexer_by_name
25
- from pygments.formatters import HtmlFormatter
26
- from beschreibungen import *
27
-
28
- logging.basicConfig(
29
- level=logging.INFO,
30
- format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s",
31
- )
32
-
33
-
34
- def markdown_to_html_with_syntax_highlight(md_str):
35
- def replacer(match):
36
- lang = match.group(1) or "text"
37
- code = match.group(2)
38
- lang = lang.strip()
39
- #print(1,lang)
40
- if lang=="text":
41
- lexer = guess_lexer(code)
42
- lang = lexer.name
43
- #print(2,lang)
44
- try:
45
- lexer = get_lexer_by_name(lang, stripall=True)
46
- except ValueError:
47
- lexer = get_lexer_by_name("python", stripall=True)
48
- formatter = HtmlFormatter()
49
- #print(3,lexer.name)
50
- highlighted_code = highlight(code, lexer, formatter)
51
-
52
- return f'<pre><code class="{lang}">{highlighted_code}</code></pre>'
53
-
54
- code_block_pattern = r"```(\w+)?\n([\s\S]+?)\n```"
55
- md_str = re.sub(code_block_pattern, replacer, md_str, flags=re.MULTILINE)
56
-
57
- html_str = markdown(md_str)
58
- return html_str
59
-
60
-
61
- def normalize_markdown(md_text: str) -> str:
62
- lines = md_text.split("\n")
63
- normalized_lines = []
64
- inside_list = False
65
-
66
- for i, line in enumerate(lines):
67
- if re.match(r"^(\d+\.|-|\*|\+)\s", line.strip()):
68
- if not inside_list and i > 0 and lines[i - 1].strip() != "":
69
- normalized_lines.append("")
70
- inside_list = True
71
- normalized_lines.append(line)
72
- elif inside_list and line.strip() == "":
73
- if i < len(lines) - 1 and not re.match(
74
- r"^(\d+\.|-|\*|\+)\s", lines[i + 1].strip()
75
- ):
76
- normalized_lines.append(line)
77
- continue
78
- else:
79
- inside_list = False
80
- normalized_lines.append(line)
81
-
82
- return "\n".join(normalized_lines)
83
-
84
-
85
- def convert_mdtext(md_text):
86
- code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
87
- inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
88
- code_blocks = code_block_pattern.findall(md_text)
89
- non_code_parts = code_block_pattern.split(md_text)[::2]
90
-
91
- result = []
92
- for non_code, code in zip(non_code_parts, code_blocks + [""]):
93
- if non_code.strip():
94
- non_code = normalize_markdown(non_code)
95
- if inline_code_pattern.search(non_code):
96
- result.append(markdown(non_code, extensions=["tables"]))
97
- else:
98
- result.append(mdtex2html.convert(non_code, extensions=["tables"]))
99
- if code.strip():
100
- code = f"\n```{code}\n\n```"
101
- code = markdown_to_html_with_syntax_highlight(code)
102
- result.append(code)
103
- result = "".join(result)
104
- result += ALREADY_CONVERTED_MARK
105
- return result
106
-
107
- def convert_asis(userinput):
108
- return f"<p style=\"white-space:pre-wrap;\">{html.escape(userinput)}</p>"+ALREADY_CONVERTED_MARK
109
-
110
- def detect_converted_mark(userinput):
111
- if userinput.endswith(ALREADY_CONVERTED_MARK):
112
- return True
113
- else:
114
- return False
115
-
116
-
117
-
118
- def detect_language(code):
119
- if code.startswith("\n"):
120
- first_line = ""
121
- else:
122
- first_line = code.strip().split("\n", 1)[0]
123
- language = first_line.lower() if first_line else ""
124
- code_without_language = code[len(first_line) :].lstrip() if first_line else code
125
- return language, code_without_language
126
-
127
- def convert_to_markdown(text):
128
- text = text.replace("$","&#36;")
129
- def replace_leading_tabs_and_spaces(line):
130
- new_line = []
131
-
132
- for char in line:
133
- if char == "\t":
134
- new_line.append("&#9;")
135
- elif char == " ":
136
- new_line.append("&nbsp;")
137
- else:
138
- break
139
- return "".join(new_line) + line[len(new_line):]
140
-
141
- markdown_text = ""
142
- lines = text.split("\n")
143
- in_code_block = False
144
-
145
- for line in lines:
146
- if in_code_block is False and line.startswith("```"):
147
- in_code_block = True
148
- markdown_text += f"{line}\n"
149
- elif in_code_block is True and line.startswith("```"):
150
- in_code_block = False
151
- markdown_text += f"{line}\n"
152
- elif in_code_block:
153
- markdown_text += f"{line}\n"
154
- else:
155
- line = replace_leading_tabs_and_spaces(line)
156
- line = re.sub(r"^(#)", r"\\\1", line)
157
- markdown_text += f"{line} \n"
158
-
159
- return markdown_text
160
-
161
- def add_language_tag(text):
162
- def detect_language(code_block):
163
- try:
164
- lexer = guess_lexer(code_block)
165
- return lexer.name.lower()
166
- except ClassNotFound:
167
- return ""
168
-
169
- code_block_pattern = re.compile(r"(```)(\w*\n[^`]+```)", re.MULTILINE)
170
-
171
- def replacement(match):
172
- code_block = match.group(2)
173
- if match.group(2).startswith("\n"):
174
- language = detect_language(code_block)
175
- if language:
176
- return f"```{language}{code_block}```"
177
- else:
178
- return f"```\n{code_block}```"
179
- else:
180
- return match.group(1) + code_block + "```"
181
-
182
- text2 = code_block_pattern.sub(replacement, text)
183
- return text2
184
-
185
- def delete_last_conversation(chatbot, history):
186
- if len(chatbot) > 0:
187
- chatbot.pop()
188
-
189
- if len(history) > 0:
190
- history.pop()
191
-
192
- return (
193
- chatbot,
194
- history,
195
- "Delete Done",
196
- )
197
-
198
- def reset_state():
199
- return [], [], "Reset Done"
200
-
201
- def reset_textbox():
202
- return gr.update(value=""),""
203
-
204
- def cancel_outputing():
205
- return "Stop Done"
206
-
207
-
208
- class State:
209
- interrupted = False
210
-
211
- def interrupt(self):
212
- self.interrupted = True
213
-
214
- def recover(self):
215
- self.interrupted = False
216
- shared_state = State()
217
-
218
-
219
-
220
-
221
-
222
-
223
 
224
 
225
- def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
226
- for stop_word in stop_words:
227
- if s.endswith(stop_word):
228
- return True
229
- for i in range(1, len(stop_word)):
230
- if s.endswith(stop_word[:i]):
231
- return True
232
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
+ title = """<h1 align="left" style="min-width:200px; margin-top:0;"> Chat mit Huging Face Chatbots</h1>"""
5
+ description_top = """\
6
+ <div align="left">
7
+ <p> Aktuelles Modell: Hugging Face Chatbots</p>
8
+ <p>
9
+ Disclaimer: Die hier verwendeten Modelle sind Modelle vom Hugging Face Hub. Kommerzieller Gebrauch ist ausdrücklich verboten. Der Output des Modells ist nicht zensiert und die Meiningen im Output entsprechend nicht unbedingt der des Authors dieser App. Gebrauch auf eigene Gefahr!
10
+ </p >
11
+ </div>
12
+ """
13
+ description = """\
14
+ <div align="center" style="margin:16px 0">
15
+ Hier werden verschiedene Modelle vom Hugging Face Hub verwendet.
16
+ </div>
17
+ """
18
+ CONCURRENT_COUNT = 100
19
+
20
+
21
+ ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
22
+
23
+ small_and_beautiful_theme = gr.themes.Soft(
24
+ primary_hue=gr.themes.Color(
25
+ c50="#02C160",
26
+ c100="rgba(2, 193, 96, 0.2)",
27
+ c200="#02C160",
28
+ c300="rgba(2, 193, 96, 0.32)",
29
+ c400="rgba(2, 193, 96, 0.32)",
30
+ c500="rgba(2, 193, 96, 1.0)",
31
+ c600="rgba(2, 193, 96, 1.0)",
32
+ c700="rgba(2, 193, 96, 0.32)",
33
+ c800="rgba(2, 193, 96, 0.32)",
34
+ c900="#02C160",
35
+ c950="#02C160",
36
+ ),
37
+ secondary_hue=gr.themes.Color(
38
+ c50="#576b95",
39
+ c100="#576b95",
40
+ c200="#576b95",
41
+ c300="#576b95",
42
+ c400="#576b95",
43
+ c500="#576b95",
44
+ c600="#576b95",
45
+ c700="#576b95",
46
+ c800="#576b95",
47
+ c900="#576b95",
48
+ c950="#576b95",
49
+ ),
50
+ neutral_hue=gr.themes.Color(
51
+ name="gray",
52
+ c50="#f9fafb",
53
+ c100="#f3f4f6",
54
+ c200="#e5e7eb",
55
+ c300="#d1d5db",
56
+ c400="#B2B2B2",
57
+ c500="#808080",
58
+ c600="#636363",
59
+ c700="#515151",
60
+ c800="#393939",
61
+ c900="#272727",
62
+ c950="#171717",
63
+ ),
64
+ radius_size=gr.themes.sizes.radius_sm,
65
+ ).set(
66
+ button_primary_background_fill="#06AE56",
67
+ button_primary_background_fill_dark="#06AE56",
68
+ button_primary_background_fill_hover="#07C863",
69
+ button_primary_border_color="#06AE56",
70
+ button_primary_border_color_dark="#06AE56",
71
+ button_primary_text_color="#FFFFFF",
72
+ button_primary_text_color_dark="#FFFFFF",
73
+ button_secondary_background_fill="#F2F2F2",
74
+ button_secondary_background_fill_dark="#2B2B2B",
75
+ button_secondary_text_color="#393939",
76
+ button_secondary_text_color_dark="#FFFFFF",
77
+ # background_fill_primary="#F7F7F7",
78
+ # background_fill_primary_dark="#1F1F1F",
79
+ block_title_text_color="*primary_500",
80
+ block_title_background_fill="*primary_100",
81
+ input_background_fill="#F6F6F6",
82
+ )