dayannex commited on
Commit
8f4b0f4
·
1 Parent(s): 0feec6e

app modified

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -267,7 +267,7 @@ class ModeloDataset:
267
  input_ids = torch.tensor(ids)
268
  #model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
269
 
270
- model = AutoModelForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
271
  with torch.no_grad():
272
  logits = model(input_ids).logits
273
  predicted_token_class_ids = logits.argmax(-1)
@@ -282,7 +282,7 @@ class ModeloDataset:
282
  #print(round(loss.item(), 2))
283
  else:
284
 
285
-
286
  tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
287
  tokenized_text=[tokenizer.tokenize(sentence) for sentence in _sentences]
288
 
@@ -291,7 +291,7 @@ class ModeloDataset:
291
  ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
292
  input_ids = torch.tensor(ids)
293
 
294
-
295
  model = AutoModelForTokenClassification.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
296
  with torch.no_grad():
297
  logits = model(input_ids).logits
@@ -419,7 +419,7 @@ def procesar(texto,archivo):
419
  for item in df.columns.values:
420
  sentences=df[item]
421
  model.identificacion_idioma(sentences[0])
422
- ides, predicted = modelo.aplicar_modelo(sentences,model.idioma)
423
  out=modelo.salida_texto2( ides,predicted)
424
 
425
  df_new[item] = modelo.unir_array(out)
@@ -434,7 +434,7 @@ def procesar(texto,archivo):
434
  for item in df.columns.values:
435
  sentences=df[item]
436
  print('item antes de aplicar modelo',item)
437
- ides, predicted = modelo.aplicar_modelo(sentences)
438
  print('despues de aplicar modelo')
439
  out=modelo.salida_texto2( ides,predicted)
440
 
 
267
  input_ids = torch.tensor(ids)
268
  #model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
269
 
270
+ model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
271
  with torch.no_grad():
272
  logits = model(input_ids).logits
273
  predicted_token_class_ids = logits.argmax(-1)
 
282
  #print(round(loss.item(), 2))
283
  else:
284
 
285
+ print('idioma:',idioma)
286
  tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
287
  tokenized_text=[tokenizer.tokenize(sentence) for sentence in _sentences]
288
 
 
291
  ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
292
  input_ids = torch.tensor(ids)
293
 
294
+
295
  model = AutoModelForTokenClassification.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
296
  with torch.no_grad():
297
  logits = model(input_ids).logits
 
419
  for item in df.columns.values:
420
  sentences=df[item]
421
  model.identificacion_idioma(sentences[0])
422
+ ides, predicted = modelo.aplicar_modelo(sentences,"en")
423
  out=modelo.salida_texto2( ides,predicted)
424
 
425
  df_new[item] = modelo.unir_array(out)
 
434
  for item in df.columns.values:
435
  sentences=df[item]
436
  print('item antes de aplicar modelo',item)
437
+ ides, predicted = modelo.aplicar_modelo(sentences,"en")
438
  print('despues de aplicar modelo')
439
  out=modelo.salida_texto2( ides,predicted)
440