Spaces:
Running
Running
File size: 9,198 Bytes
e623457 d03bc9b f2f5171 957c035 3207602 e623457 3207602 d03bc9b 2a3c4ce e623457 2a3c4ce e623457 957c035 e623457 d03bc9b 957c035 28a5857 f2f5171 957c035 f2f5171 957c035 f2f5171 957c035 2a3c4ce e623457 957c035 2a3c4ce 957c035 e623457 957c035 e623457 957c035 e623457 d95cbea 957c035 e623457 957c035 e623457 957c035 e623457 957c035 e623457 957c035 e623457 957c035 d0f5098 f2f5171 28a5857 f2f5171 cdd5b2f 3207602 cdd5b2f 3207602 957c035 f2f5171 957c035 d0f5098 957c035 2a3c4ce d03bc9b e623457 957c035 e623457 957c035 e623457 957c035 e623457 d0f5098 e623457 d0f5098 e623457 957c035 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import re
import gradio as gr
from pydantic import BaseModel
from transformers import pipeline
from loguru import logger
# from pydantic import BaseModel
# RU_SUMMARY_MODEL = "IlyaGusev/rubart-large-sum"
# RU_SUMMARY_MODEL = "IlyaGusev/mbart_ru_sum_gazeta"
RU_SUMMARY_MODEL = "csebuetnlp/mT5_multilingual_XLSum"
# RU_SENTIMENT_MODEL = "IlyaGusev/rubart-large-sentiment"
RU_SENTIMENT_MODEL = "blanchefort/rubert-base-cased-sentiment"
EN_SUMMARY_MODEL = "csebuetnlp/mT5_multilingual_XLSum"
EN_SENTIMENT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
DEFAULT_EN_TEXT = """Flags on official buildings are being flown at half-mast and a minute's silence will be observed at midday.
Fourteen people were shot dead at the Faculty of Arts building of Charles University in the capital by a student who then killed himself.
Police are working to uncover the motive behind the attack.
It is one of the deadliest assaults by a lone gunman in Europe this century.
Those killed in Thursday's attack included Lenka Hlavkova, head of the Institute of Musicology at the university.
Other victims were named as translator and Finnish literature expert Jan Dlask and student Lucie Spindlerova.
The shooting began at around 15:00 local time (14:00 GMT) at the Faculty of Arts building off Jan Palach Square in the centre of the Czech capital.
The gunman opened fire in the corridors and classrooms of the building, before shooting himself as security forces closed in on him, police say.
US tourist Hannah Mallicoat told the BBC that she and her family had been on Jan Palach Square during the attack.
"A crowd of people were crossing the street when the first shot hit. I thought it was something like a firecracker or a car backfire until I heard the second shot and people started running," she said.
"I saw a bullet hit the ground on the other side of the square about 30ft [9m] away before ducking into a store. The whole area was blocked off and dozens of police cars and ambulances were going towards the university."
In a statement, Czech Prime Minister Petr Fiala said the country had been shocked by this "horrendous act".
"It is hard to find the words to express condemnation on the one hand and, on the other, the pain and sorrow that our entire society is feeling in these days before Christmas."
The gunman is thought to have killed his father at a separate location. He is also suspected in the killing of a young man and his two-month-old daughter who were found dead in a forest on the outskirts of Prague on 15 December.
"""
DEFAULT_RU_TEXT = """В результате взрыва на заправке, который произошел накануне вечером,
пострадали 56 человек, 13 из них — дети, сообщил минздрав Дагестана.
Погибли 12 человек, в том числе двое несовершеннолетних. На место происшествия
приехала глава минздрава республики Татьяна Беляева, она держит под личным контролем
оказание помощи пострадавшим. В Махачкалу вылетел первый заместитель министра здравоохранения России Виктор Фисенко.
Врачам и пострадавшим помогают волонтеры Всероссийского студенческого корпуса спасателей
и сотрудники некоммерческой организации «Добровольцы Дагестана», сообщило министерство молодежи Дагестана.
Жители республики массово пришли сдавать кровь, заявил региональный минздрав.
«Просим отложить визит на станцию переливания на завтра. Запасы крови есть,
доноров для их пополнения на данный час тоже уже немало», — написало ведомство.
"""
class TextRequest(BaseModel):
text: str
class Result(BaseModel):
sentiment_score: float
sentiment_label: str
summary: str
def to_str(self):
return f"Summary: {self.summary}\nSentiment: {self.sentiment_label} ({self.sentiment_score:.3f})"
# class Response(BaseModel):
# results: List[Result] # list of Result objects
class Summarizer:
ru_summary_pipe: pipeline
ru_sentiment_pipe: pipeline
en_summary_pipe: pipeline
en_sentiment_pipe: pipeline
# sum_model_name = "csebuetnlp/mT5_multilingual_XLSum"
# sum_tokenizer = AutoTokenizer.from_pretrained(sum_model_name)
# sum_model = AutoModelForSeq2SeqLM.from_pretrained(sum_model_name)
def __init__(self) -> None:
sum_pipe = pipeline(
"summarization", model=RU_SUMMARY_MODEL, max_length=100, truncation=True
)
self.ru_summary_pipe = sum_pipe
self.ru_sentiment_pipe = pipeline(
"sentiment-analysis", model=RU_SENTIMENT_MODEL
)
self.en_summary_pipe = sum_pipe
self.en_sentiment_pipe = pipeline(
"sentiment-analysis", model=EN_SENTIMENT_MODEL
)
def mT5_summarize(self, text: str) -> str:
"""Handle text with mT5 model without pipeline"""
def whitespace_handler(text: str):
return re.sub("\s+", " ", re.sub("\n+", " ", text.strip()))
input_ids = self.sum_tokenizer(
[whitespace_handler(text)],
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=512,
)["input_ids"]
output_ids = self.sum_model.generate(
input_ids=input_ids, max_length=84, no_repeat_ngram_size=2, num_beams=4
)[0]
summary = self.sum_tokenizer.decode(
output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
return summary
def get_pipe(self, lang: str):
logger.info(f"Pipe language: {lang}")
if lang == "en":
return self.en_summary_pipe, self.en_sentiment_pipe
if lang == "ru":
return self.ru_summary_pipe, self.ru_sentiment_pipe
raise ValueError(f"Language {lang} is not supported")
def summarize(self, req: TextRequest, lang: str = "en") -> Result:
sum_pipe, sent_pipe = self.get_pipe(lang)
response_summary = sum_pipe(req)
logger.info(response_summary)
response_sentiment = sent_pipe(req)
logger.info(response_sentiment)
result = Result(
summary=response_summary[0]["summary_text"],
sentiment_label=response_sentiment[0]["label"],
sentiment_score=response_sentiment[0]["score"],
)
return result
def get_summary(self, req: TextRequest, lang: str = "en") -> str:
return self.summarize(req, lang).to_str()
if __name__ == "__main__":
pipe = Summarizer()
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=2, min_width=600):
en_sum_description = gr.Markdown(
value=f"Model for Summary: {EN_SUMMARY_MODEL}"
)
en_sent_description = gr.Markdown(
value=f"Model for Sentiment: {EN_SENTIMENT_MODEL}"
)
en_inputs = gr.Textbox(
label="en_input",
lines=5,
value=DEFAULT_EN_TEXT,
placeholder=DEFAULT_EN_TEXT,
)
en_lang = gr.Textbox(value="en", visible=False)
en_outputs = gr.Textbox(
label="en_output",
lines=5,
placeholder="Summary and Sentiment would be here...",
)
en_inbtn = gr.Button("Proceed")
with gr.Column(scale=2, min_width=600):
ru_sum_description = gr.Markdown(
value=f"Model for Summary: {RU_SUMMARY_MODEL}"
)
ru_sent_description = gr.Markdown(
value=f"Model for Sentiment: {RU_SENTIMENT_MODEL}"
)
ru_inputs = gr.Textbox(
label="ru_input",
lines=5,
value=DEFAULT_RU_TEXT,
placeholder=DEFAULT_RU_TEXT,
)
ru_lang = gr.Textbox(value="ru", visible=False)
ru_outputs = gr.Textbox(
label="ru_output",
lines=5,
placeholder="Здесь будет обобщение и эмоциональный окрас текста...",
)
ru_inbtn = gr.Button("Запустить")
en_inbtn.click(
pipe.get_summary,
[en_inputs, en_lang],
[en_outputs],
)
ru_inbtn.click(
pipe.get_summary,
[ru_inputs, ru_lang],
[ru_outputs],
)
demo.launch(show_api=False)
|