igoracmorais commited on
Commit
40f834a
·
verified ·
1 Parent(s): 9247897

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -16,12 +16,14 @@ def generate_qa_pairs(text):
16
  tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qg-hl")
17
  model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qg-hl")
18
 
19
- input_text = "highlight: " + text
20
- input_ids = tokenizer.encode(input_text, return_tensors="pt")
21
- outputs = model.generate(input_ids)
22
- questions = tokenizer.decode(outputs[0])
23
 
24
- return questions
 
 
 
25
 
26
  # Função para converter os pares de QA no formato SQuAD
27
  def convert_to_squad_format(qas, context):
@@ -33,8 +35,8 @@ def convert_to_squad_format(qas, context):
33
  "question": qa['question'],
34
  "id": str(i),
35
  "answers": {
36
- "answer_start": [qa['answer']['start']],
37
- "text": [qa['answer']['text']]
38
  }
39
  }
40
  squad_data.append(entry)
 
16
  tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qg-hl")
17
  model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qg-hl")
18
 
19
+ inputs = tokenizer.encode("generate questions: " + text, return_tensors="pt", max_length=512, truncation=True)
20
+ outputs = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
21
+ questions = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
 
22
 
23
+ # O modelo retorna apenas as perguntas, então precisamos criar respostas fictícias para o exemplo
24
+ qas = [{"question": question, "answer": "answer", "answer_start": 0} for question in questions]
25
+
26
+ return qas
27
 
28
  # Função para converter os pares de QA no formato SQuAD
29
  def convert_to_squad_format(qas, context):
 
35
  "question": qa['question'],
36
  "id": str(i),
37
  "answers": {
38
+ "answer_start": [qa['answer_start']],
39
+ "text": [qa['answer']]
40
  }
41
  }
42
  squad_data.append(entry)