LongDocumentSummarizer / summarizer_app.py
NicolasGaudemet's picture
Update summarizer_app.py
61e5603
raw
history blame
2.77 kB
import os
import json
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
import gradio as gr
#chargement des paramètres
with open("parametres.json", "r") as p:
params = json.load(p)
taille_max_texte = params["taille_max_texte"]
taille_resume = params["taille_resume"]
modele = params["modele"]
chunk_size = params["taille_chunks"]
chunks_max = taille_max_texte*2//chunk_size
#définition du LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", max_tokens = taille_resume, temperature=0, openai_api_key = os.environ['OpenaiKey'])
#llm = OpenAI(model_name = modele, max_tokens = taille_resume, temperature=0, openai_api_key = os.environ['OpenaiKey'])
#résumé d'un texte
def summarize_text(text_to_summarize, llm):
#préparation du texte
text_splitter = CharacterTextSplitter(chunk_size=chunk_size)
texts = text_splitter.split_text(text_to_summarize)
docs = [Document(page_content=t) for t in texts[:chunks_max]]
#résumé
prompt_template = """Write a summary of the following:
{text}""" + f"""
Make sure the length of the summary you write is higher than {taille_resume//2} words and lower than {taille_resume//1.5} words.
Make also sure that it is in the same langage than the original text.
SUMMARY:"""
summary_langage_prompt = PromptTemplate(template=prompt_template, input_variables=['text'])
#summary_langage_prompt.format(taille=f"{summary_length}")
chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=summary_langage_prompt, combine_prompt = summary_langage_prompt)
steps = chain({"input_documents": docs}, return_only_outputs=True)
print(len(steps['intermediate_steps']))
print(steps['intermediate_steps'])
return steps['output_text']
# Lecture et résumé d'un fichier texte
def summarize_uploaded_file(file):
if not file.name.endswith('.txt'):
return ("Le fichier doit être un fichier texte (.txt)")
with open(file.name, "r", encoding = "latin-1") as f:
text = f.read()
summary = summarize_text(text, llm)
return summary
# Création de l'interface Gradio
iface = gr.Interface(
fn=summarize_uploaded_file,
inputs="file",
outputs=gr.outputs.Textbox(label="Résumé"),
title="Long Text Summarizer",
description="par Nicolas \nRésume un long fichier texte",
allow_flagging = "never")
# Lancer l'interface
iface.launch()