Spaces:
Build error
Build error
"""Align via ubee,""" | |
# pylint: disable= | |
from itertools import zip_longest | |
from typing import Iterable, List, Tuple | |
from icecream import ic | |
from logzero import logger | |
from ubee.uclas import uclas | |
def ubee( | |
sents_zh: Iterable, | |
sents_en: Iterable, | |
thresh: float = 0.5, | |
) -> Tuple[List[Tuple[str, str, float]], List[Tuple[str, str]]]: | |
"""Align blocks. | |
Args: | |
sents_zh: list of text, can be any langauge supported by clas-l-user | |
sents_en: ditto | |
Returns: | |
three tuples of aligned blocked | |
leftovers (unaligned) | |
""" | |
res = [] | |
labels = [*sents_en] | |
lo1 = [] | |
lo2 = labels[:] | |
for seq in sents_zh: | |
ic(seq) | |
label, likelihood = uclas(seq, labels, thresh=thresh) | |
if label: | |
likelihood = round(float(likelihood), 2) | |
res.append((seq, label, likelihood)) | |
try: | |
lo2.remove(label) | |
except Exception as exc: | |
logger.error(exc) | |
logger.info("seq: %s, lable: %s", seq, label) | |
else: | |
lo1.append(seq) | |
return res, [*zip_longest(lo1, lo2)] | |