dayannex commited on
Commit
a9087d6
·
1 Parent(s): 76ed2cd

app modified

Browse files
Files changed (1) hide show
  1. app.py +34 -30
app.py CHANGED
@@ -318,7 +318,7 @@ class ModeloDataset:
318
  i=i+1
319
  labels = predicted_token_class_ids
320
  loss = self.model(input_ids, labels=labels).loss
321
-
322
  else:
323
 
324
  print('idioma:',idioma)
@@ -369,22 +369,22 @@ class ModeloDataset:
369
  i=i+1
370
  print('new_identificadores:',new_identificadores, ' ',len(new_identificadores) )
371
 
372
- return new_identificadores, new_tokens
373
  #return ids, _predicted_tokens_classes
374
- #def salida_texto( self,ids,pre_tokens):
375
- # new_labels = []
376
- # current_word = None
377
- # i=0
378
- # for identificador in pre_tokens:
379
- # if (self.tokenizer.decode(ids[i])!="<s>"):
380
- # if identificador=='O':
381
- #
382
- # new_labels.append(self.tokenizer.decode(ids[i]))
383
- # else:
384
- # new_labels.append(' ' + identificador)
385
- # i=i+1
386
- #
387
- # return new_labels
388
  def salida_texto( self,tokens,pre_tokens):
389
  new_labels = []
390
  current_word = None
@@ -410,16 +410,16 @@ class ModeloDataset:
410
  i=i+1
411
 
412
  return out
413
- #def salida_texto2(self, ids,pre_tokens):
414
- # i=0
415
- # out=[]
416
- # for iden in pre_tokens:
417
- # if i<len(ids):
418
- #
419
- # out.append(self.salida_texto( ids[i],np.array(pre_tokens[i])) )
420
- # i=i+1
421
- #
422
- # return out
423
  def unir_array(self,_out):
424
  i=0
425
  salida=[]
@@ -515,10 +515,14 @@ def procesar(texto,archivo, etiquetas):
515
  #print('sentences',sentences)
516
  ides, predicted = modelo.aplicar_modelo(sentences,model.idioma)
517
 
518
-
519
- out=modelo.salida_texto2( ides,predicted)
520
- print('out:',out)
521
- df_new[item] = modelo.unir_array(out)
 
 
 
 
522
  return "", df_new, df_new.to_csv(sep='\t', encoding='utf-8',index=False)
523
  #return "", df_new, df_new.to_excel( index=False)
524
  else:
 
318
  i=i+1
319
  labels = predicted_token_class_ids
320
  loss = self.model(input_ids, labels=labels).loss
321
+ return ids,_predicted_tokens_classes
322
  else:
323
 
324
  print('idioma:',idioma)
 
369
  i=i+1
370
  print('new_identificadores:',new_identificadores, ' ',len(new_identificadores) )
371
 
372
+ return new_identificadores, new_tokens
373
  #return ids, _predicted_tokens_classes
374
+ def salida_texto_es( self,ids,pre_tokens):
375
+ new_labels = []
376
+ current_word = None
377
+ i=0
378
+ for identificador in pre_tokens:
379
+ if (self.tokenizer.decode(ids[i])!="<s>"):
380
+ if identificador=='O':
381
+
382
+ new_labels.append(self.tokenizer.decode(ids[i]))
383
+ else:
384
+ new_labels.append(' ' + identificador)
385
+ i=i+1
386
+
387
+ return new_labels
388
  def salida_texto( self,tokens,pre_tokens):
389
  new_labels = []
390
  current_word = None
 
410
  i=i+1
411
 
412
  return out
413
+ def salida_texto2_es(self, ids,pre_tokens):
414
+ i=0
415
+ out=[]
416
+ for iden in pre_tokens:
417
+ if i<len(ids):
418
+
419
+ out.append(self.salida_texto_es( ids[i],np.array(pre_tokens[i])) )
420
+ i=i+1
421
+
422
+ return out
423
  def unir_array(self,_out):
424
  i=0
425
  salida=[]
 
515
  #print('sentences',sentences)
516
  ides, predicted = modelo.aplicar_modelo(sentences,model.idioma)
517
 
518
+ if model.idioma=="es":
519
+ out=modelo.salida_texto2_es( ides,predicted)
520
+ print('out:',out)
521
+ df_new[item] = modelo.unir_array(out)
522
+ else:
523
+ out=modelo.salida_texto2( ides,predicted)
524
+ print('out:',out)
525
+ df_new[item] = modelo.unir_array(out)
526
  return "", df_new, df_new.to_csv(sep='\t', encoding='utf-8',index=False)
527
  #return "", df_new, df_new.to_excel( index=False)
528
  else: