dayannex commited on
Commit
76ed2cd
·
1 Parent(s): c4cc94f

app modified

Browse files
Files changed (1) hide show
  1. app.py +48 -24
app.py CHANGED
@@ -287,10 +287,10 @@ class ModeloDataset:
287
  x=0
288
  new_identificadores=[]
289
  for token in predicted_tokens_classes:
290
- print('x',x, len(predicted_tokens_classes)-x)
291
  if x not in ig_tokens:
292
  if len(new_identificadores) < tamano:
293
- print('se agrega token')
294
  new_identificadores.append(token)
295
  x=x+1
296
  else:
@@ -354,7 +354,7 @@ class ModeloDataset:
354
  i=0
355
  new_identificadores=[]
356
  for item in tokenized_text:
357
- print('len(tokens)',len(item))
358
  aux1, aux2= self.reordenacion_tokens(item)
359
  new_tokens.append(aux1)
360
  ig_tok.append(aux2)
@@ -362,40 +362,64 @@ class ModeloDataset:
362
  print('ig_tok',ig_tok)
363
 
364
  for items in _predicted_tokens_classes:
365
- #if i<len(new_tokens[i]):
366
- print('len(new_tokens[i])',len(new_tokens[i]))
367
  aux=self.reordenacion_identificadores(ig_tok[i],items,len(new_tokens[i]))
368
  new_identificadores.append(aux)
369
  i=i+1
370
  print('new_identificadores:',new_identificadores, ' ',len(new_identificadores) )
371
 
372
- #return new_identificadores, new_tokens
373
- return ids, _predicted_tokens_classes
374
- def salida_texto( self,ids,pre_tokens):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  new_labels = []
376
  current_word = None
377
  i=0
378
- for identificador in pre_tokens:
379
- if (self.tokenizer.decode(ids[i])!="<s>"):
380
- if identificador=='O':
381
-
382
- new_labels.append(self.tokenizer.decode(ids[i]))
383
- else:
384
- new_labels.append(' ' + identificador)
385
- i=i+1
386
-
387
- return new_labels
388
-
389
- def salida_texto2(self, ids,pre_tokens):
390
  i=0
391
  out=[]
392
- for iden in pre_tokens:
393
- if i<len(ids):
394
 
395
- out.append(self.salida_texto( ids[i],np.array(pre_tokens[i])) )
396
  i=i+1
397
 
398
- return out
 
 
 
 
 
 
 
 
 
 
399
  def unir_array(self,_out):
400
  i=0
401
  salida=[]
 
287
  x=0
288
  new_identificadores=[]
289
  for token in predicted_tokens_classes:
290
+
291
  if x not in ig_tokens:
292
  if len(new_identificadores) < tamano:
293
+
294
  new_identificadores.append(token)
295
  x=x+1
296
  else:
 
354
  i=0
355
  new_identificadores=[]
356
  for item in tokenized_text:
357
+
358
  aux1, aux2= self.reordenacion_tokens(item)
359
  new_tokens.append(aux1)
360
  ig_tok.append(aux2)
 
362
  print('ig_tok',ig_tok)
363
 
364
  for items in _predicted_tokens_classes:
365
+
366
+
367
  aux=self.reordenacion_identificadores(ig_tok[i],items,len(new_tokens[i]))
368
  new_identificadores.append(aux)
369
  i=i+1
370
  print('new_identificadores:',new_identificadores, ' ',len(new_identificadores) )
371
 
372
+ return new_identificadores, new_tokens
373
+ #return ids, _predicted_tokens_classes
374
+ #def salida_texto( self,ids,pre_tokens):
375
+ # new_labels = []
376
+ # current_word = None
377
+ # i=0
378
+ # for identificador in pre_tokens:
379
+ # if (self.tokenizer.decode(ids[i])!="<s>"):
380
+ # if identificador=='O':
381
+ #
382
+ # new_labels.append(self.tokenizer.decode(ids[i]))
383
+ # else:
384
+ # new_labels.append(' ' + identificador)
385
+ # i=i+1
386
+ #
387
+ # return new_labels
388
+ def salida_texto( self,tokens,pre_tokens):
389
  new_labels = []
390
  current_word = None
391
  i=0
392
+ for token in tokens:
393
+
394
+ if pre_tokens[i]=='O' or 'MISC' in pre_tokens[i]:
395
+ new_labels.append(' ' +token.replace('▁',''))
396
+ else:
397
+ new_labels.append(' ' + pre_tokens[i])
398
+ i=i+1
399
+ a=''
400
+ for i in new_labels:
401
+ a = a+i
402
+ return a
403
+ def salida_texto2(self, tokens,labels):
404
  i=0
405
  out=[]
406
+ for iden in labels:
407
+ #if i<len(ids):
408
 
409
+ out.append(self.salida_texto( iden,np.array(tokens[i])) )
410
  i=i+1
411
 
412
+ return out
413
+ #def salida_texto2(self, ids,pre_tokens):
414
+ # i=0
415
+ # out=[]
416
+ # for iden in pre_tokens:
417
+ # if i<len(ids):
418
+ #
419
+ # out.append(self.salida_texto( ids[i],np.array(pre_tokens[i])) )
420
+ # i=i+1
421
+ #
422
+ # return out
423
  def unir_array(self,_out):
424
  i=0
425
  salida=[]