Update utils.py
Browse files
utils.py
CHANGED
@@ -15,9 +15,10 @@ import gc
|
|
15 |
from pygments.lexers import guess_lexer, ClassNotFound
|
16 |
import time
|
17 |
import json
|
18 |
-
import
|
19 |
-
from
|
20 |
-
import
|
|
|
21 |
|
22 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM, GPTNeoForCausalLM, GPT2Tokenizer
|
23 |
from sentence_transformers import SentenceTransformer, util
|
@@ -28,9 +29,9 @@ from pypinyin import lazy_pinyin
|
|
28 |
import tiktoken
|
29 |
import mdtex2html
|
30 |
from markdown import markdown
|
31 |
-
from pygments import highlight
|
32 |
-
from pygments.lexers import guess_lexer,get_lexer_by_name
|
33 |
-
from pygments.formatters import HtmlFormatter
|
34 |
|
35 |
from langchain.chains import LLMChain, RetrievalQA
|
36 |
from langchain.prompts import PromptTemplate
|
@@ -389,7 +390,7 @@ def llm_chain2(prompt, context):
|
|
389 |
inputs = tokenizer_rag(full_prompt, return_tensors="pt", max_length=1024, truncation=True)
|
390 |
|
391 |
#Generiere die Antwort
|
392 |
-
outputs = modell_rag.generate(inputs['input_ids'],
|
393 |
answer = tokenizer_rag.decode(outputs[0], skip_special_tokens=True)
|
394 |
|
395 |
return answer
|
@@ -538,6 +539,49 @@ def transfer_input(inputs):
|
|
538 |
)
|
539 |
|
540 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
541 |
#################################################
|
542 |
#Klasse mit zuständen - z.B. für interrupt wenn Stop gedrückt...
|
543 |
#################################################
|
|
|
15 |
from pygments.lexers import guess_lexer, ClassNotFound
|
16 |
import time
|
17 |
import json
|
18 |
+
import base64
|
19 |
+
from io import BytesIO
|
20 |
+
import urllib.parse
|
21 |
+
import tempfile
|
22 |
|
23 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM, GPTNeoForCausalLM, GPT2Tokenizer
|
24 |
from sentence_transformers import SentenceTransformer, util
|
|
|
29 |
import tiktoken
|
30 |
import mdtex2html
|
31 |
from markdown import markdown
|
32 |
+
#from pygments import highlight
|
33 |
+
#from pygments.lexers import guess_lexer,get_lexer_by_name
|
34 |
+
#from pygments.formatters import HtmlFormatter
|
35 |
|
36 |
from langchain.chains import LLMChain, RetrievalQA
|
37 |
from langchain.prompts import PromptTemplate
|
|
|
390 |
inputs = tokenizer_rag(full_prompt, return_tensors="pt", max_length=1024, truncation=True)
|
391 |
|
392 |
#Generiere die Antwort
|
393 |
+
outputs = modell_rag.generate(inputs['input_ids'], max_new_tokens=1024, num_beams=2, early_stopping=True)
|
394 |
answer = tokenizer_rag.decode(outputs[0], skip_special_tokens=True)
|
395 |
|
396 |
return answer
|
|
|
539 |
)
|
540 |
|
541 |
|
542 |
+
########################################################
|
543 |
+
######## Hilfsfunktionen Datei-Upload ##################
|
544 |
+
# Hochladen von Dateien
|
545 |
+
def upload_pdf(file):
|
546 |
+
if file is None:
|
547 |
+
return None, "Keine Datei hochgeladen."
|
548 |
+
|
549 |
+
# Extrahieren des Dateinamens aus dem vollen Pfad
|
550 |
+
filename = os.path.basename(file.name)
|
551 |
+
|
552 |
+
# Datei zum Hugging Face Space hochladen
|
553 |
+
upload_path = f"kkg_dokumente/{filename}"
|
554 |
+
api.upload_file(
|
555 |
+
path_or_fileobj=file.name,
|
556 |
+
path_in_repo=upload_path,
|
557 |
+
repo_id=REPO_ID,
|
558 |
+
repo_type=REPO_TYPE,
|
559 |
+
token=HF_WRITE
|
560 |
+
)
|
561 |
+
return f"PDF '{filename}' erfolgreich hochgeladen."
|
562 |
+
|
563 |
+
def display_files():
|
564 |
+
files = os.listdir(DOCS_DIR)
|
565 |
+
files_table = "<table style='width:100%; border-collapse: collapse;'>"
|
566 |
+
files_table += "<tr style='background-color: #930BBA; color: white; font-weight: bold; font-size: larger;'><th>Dateiname</th><th>Größe (KB)</th></tr>"
|
567 |
+
for i, file in enumerate(files):
|
568 |
+
file_path = os.path.join(DOCS_DIR, file)
|
569 |
+
file_size = os.path.getsize(file_path) / 1024 # Größe in KB
|
570 |
+
row_color = "#4f4f4f" if i % 2 == 0 else "#3a3a3a" # Wechselnde Zeilenfarben
|
571 |
+
files_table += f"<tr style='background-color: {row_color}; border-bottom: 1px solid #ddd;'>"
|
572 |
+
files_table += f"<td><b>{download_link(file)}</b></td>"
|
573 |
+
files_table += f"<td>{file_size:.2f}</td></tr>"
|
574 |
+
files_table += "</table>"
|
575 |
+
return files_table
|
576 |
+
|
577 |
+
|
578 |
+
# gefundene relevante Dokumente auflisten (links)
|
579 |
+
def list_pdfs():
|
580 |
+
if not os.path.exists(DOCS_DIR):
|
581 |
+
return []
|
582 |
+
return [f for f in os.listdir(SAVE_DIR) if f.endswith('.pdf')]
|
583 |
+
|
584 |
+
|
585 |
#################################################
|
586 |
#Klasse mit zuständen - z.B. für interrupt wenn Stop gedrückt...
|
587 |
#################################################
|