Spaces:
Sleeping
Sleeping
app modified
Browse files
app.py
CHANGED
@@ -287,10 +287,10 @@ class ModeloDataset:
|
|
287 |
x=0
|
288 |
new_identificadores=[]
|
289 |
for token in predicted_tokens_classes:
|
290 |
-
|
291 |
if x not in ig_tokens:
|
292 |
if len(new_identificadores) < tamano:
|
293 |
-
|
294 |
new_identificadores.append(token)
|
295 |
x=x+1
|
296 |
else:
|
@@ -354,7 +354,7 @@ class ModeloDataset:
|
|
354 |
i=0
|
355 |
new_identificadores=[]
|
356 |
for item in tokenized_text:
|
357 |
-
|
358 |
aux1, aux2= self.reordenacion_tokens(item)
|
359 |
new_tokens.append(aux1)
|
360 |
ig_tok.append(aux2)
|
@@ -362,40 +362,64 @@ class ModeloDataset:
|
|
362 |
print('ig_tok',ig_tok)
|
363 |
|
364 |
for items in _predicted_tokens_classes:
|
365 |
-
|
366 |
-
|
367 |
aux=self.reordenacion_identificadores(ig_tok[i],items,len(new_tokens[i]))
|
368 |
new_identificadores.append(aux)
|
369 |
i=i+1
|
370 |
print('new_identificadores:',new_identificadores, ' ',len(new_identificadores) )
|
371 |
|
372 |
-
|
373 |
-
return ids, _predicted_tokens_classes
|
374 |
-
def salida_texto( self,ids,pre_tokens):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
new_labels = []
|
376 |
current_word = None
|
377 |
i=0
|
378 |
-
for
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
def salida_texto2(self,
|
390 |
i=0
|
391 |
out=[]
|
392 |
-
for iden in
|
393 |
-
if i<len(ids):
|
394 |
|
395 |
-
out.append(self.salida_texto(
|
396 |
i=i+1
|
397 |
|
398 |
-
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
def unir_array(self,_out):
|
400 |
i=0
|
401 |
salida=[]
|
|
|
287 |
x=0
|
288 |
new_identificadores=[]
|
289 |
for token in predicted_tokens_classes:
|
290 |
+
|
291 |
if x not in ig_tokens:
|
292 |
if len(new_identificadores) < tamano:
|
293 |
+
|
294 |
new_identificadores.append(token)
|
295 |
x=x+1
|
296 |
else:
|
|
|
354 |
i=0
|
355 |
new_identificadores=[]
|
356 |
for item in tokenized_text:
|
357 |
+
|
358 |
aux1, aux2= self.reordenacion_tokens(item)
|
359 |
new_tokens.append(aux1)
|
360 |
ig_tok.append(aux2)
|
|
|
362 |
print('ig_tok',ig_tok)
|
363 |
|
364 |
for items in _predicted_tokens_classes:
|
365 |
+
|
366 |
+
|
367 |
aux=self.reordenacion_identificadores(ig_tok[i],items,len(new_tokens[i]))
|
368 |
new_identificadores.append(aux)
|
369 |
i=i+1
|
370 |
print('new_identificadores:',new_identificadores, ' ',len(new_identificadores) )
|
371 |
|
372 |
+
return new_identificadores, new_tokens
|
373 |
+
#return ids, _predicted_tokens_classes
|
374 |
+
#def salida_texto( self,ids,pre_tokens):
|
375 |
+
# new_labels = []
|
376 |
+
# current_word = None
|
377 |
+
# i=0
|
378 |
+
# for identificador in pre_tokens:
|
379 |
+
# if (self.tokenizer.decode(ids[i])!="<s>"):
|
380 |
+
# if identificador=='O':
|
381 |
+
#
|
382 |
+
# new_labels.append(self.tokenizer.decode(ids[i]))
|
383 |
+
# else:
|
384 |
+
# new_labels.append(' ' + identificador)
|
385 |
+
# i=i+1
|
386 |
+
#
|
387 |
+
# return new_labels
|
388 |
+
def salida_texto( self,tokens,pre_tokens):
|
389 |
new_labels = []
|
390 |
current_word = None
|
391 |
i=0
|
392 |
+
for token in tokens:
|
393 |
+
|
394 |
+
if pre_tokens[i]=='O' or 'MISC' in pre_tokens[i]:
|
395 |
+
new_labels.append(' ' +token.replace('▁',''))
|
396 |
+
else:
|
397 |
+
new_labels.append(' ' + pre_tokens[i])
|
398 |
+
i=i+1
|
399 |
+
a=''
|
400 |
+
for i in new_labels:
|
401 |
+
a = a+i
|
402 |
+
return a
|
403 |
+
def salida_texto2(self, tokens,labels):
|
404 |
i=0
|
405 |
out=[]
|
406 |
+
for iden in labels:
|
407 |
+
#if i<len(ids):
|
408 |
|
409 |
+
out.append(self.salida_texto( iden,np.array(tokens[i])) )
|
410 |
i=i+1
|
411 |
|
412 |
+
return out
|
413 |
+
#def salida_texto2(self, ids,pre_tokens):
|
414 |
+
# i=0
|
415 |
+
# out=[]
|
416 |
+
# for iden in pre_tokens:
|
417 |
+
# if i<len(ids):
|
418 |
+
#
|
419 |
+
# out.append(self.salida_texto( ids[i],np.array(pre_tokens[i])) )
|
420 |
+
# i=i+1
|
421 |
+
#
|
422 |
+
# return out
|
423 |
def unir_array(self,_out):
|
424 |
i=0
|
425 |
salida=[]
|