Spaces:
Build error
Build error
freemt
commited on
Commit
·
d7cdc67
1
Parent(s):
5821b23
Switch to blocks, attempt
Browse files- .stignore +101 -0
- install-sw.sh +23 -0
- install-sw1.sh +25 -0
- okteto.yml +44 -0
- poetry.toml +3 -0
- requirements.txt +8 -9
- ubee/__main__.py +7 -9
- ubee/seg_text.py +3 -4
- ubee/ubee.py +4 -3
- ubee/uclas.py +7 -7
.stignore
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.git
|
2 |
+
# Byte-compiled / optimized / DLL files
|
3 |
+
__pycache__
|
4 |
+
*.py[cod]
|
5 |
+
*$py.class
|
6 |
+
|
7 |
+
# C extensions
|
8 |
+
*.so
|
9 |
+
|
10 |
+
# Distribution / packaging
|
11 |
+
.Python
|
12 |
+
build
|
13 |
+
develop-eggs
|
14 |
+
dist
|
15 |
+
downloads
|
16 |
+
eggs
|
17 |
+
.eggs
|
18 |
+
lib
|
19 |
+
lib64
|
20 |
+
parts
|
21 |
+
sdist
|
22 |
+
var
|
23 |
+
wheels
|
24 |
+
pip-wheel-metadata
|
25 |
+
share/python-wheels
|
26 |
+
*.egg-info
|
27 |
+
.installed.cfg
|
28 |
+
*.egg
|
29 |
+
MANIFEST
|
30 |
+
|
31 |
+
# PyInstaller
|
32 |
+
# Usually these files are written by a python script from a template
|
33 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
34 |
+
*.manifest
|
35 |
+
*.spec
|
36 |
+
|
37 |
+
# Installer logs
|
38 |
+
pip-log.txt
|
39 |
+
pip-delete-this-directory.txt
|
40 |
+
|
41 |
+
# Translations
|
42 |
+
*.mo
|
43 |
+
*.pot
|
44 |
+
|
45 |
+
# Django stuff:
|
46 |
+
*.log
|
47 |
+
local_settings.py
|
48 |
+
db.sqlite3
|
49 |
+
|
50 |
+
# Flask stuff:
|
51 |
+
instance
|
52 |
+
.webassets-cache
|
53 |
+
|
54 |
+
# Scrapy stuff:
|
55 |
+
.scrapy
|
56 |
+
|
57 |
+
# Sphinx documentation
|
58 |
+
docs/_build
|
59 |
+
|
60 |
+
# PyBuilder
|
61 |
+
target
|
62 |
+
|
63 |
+
# Jupyter Notebook
|
64 |
+
.ipynb_checkpoints
|
65 |
+
|
66 |
+
# IPython
|
67 |
+
profile_default
|
68 |
+
ipython_config.py
|
69 |
+
|
70 |
+
# pyenv
|
71 |
+
.python-version
|
72 |
+
|
73 |
+
# celery beat schedule file
|
74 |
+
celerybeat-schedule
|
75 |
+
|
76 |
+
# SageMath parsed files
|
77 |
+
*.sage.py
|
78 |
+
|
79 |
+
# Environments
|
80 |
+
.env
|
81 |
+
.venv
|
82 |
+
env
|
83 |
+
venv
|
84 |
+
ENV
|
85 |
+
env.bak
|
86 |
+
venv.bak
|
87 |
+
|
88 |
+
# Spyder project settings
|
89 |
+
.spyderproject
|
90 |
+
.spyproject
|
91 |
+
|
92 |
+
# Rope project settings
|
93 |
+
.ropeproject
|
94 |
+
|
95 |
+
# mypy
|
96 |
+
.mypy_cache
|
97 |
+
.dmypy.json
|
98 |
+
dmypy.json
|
99 |
+
|
100 |
+
# Pyre type checker
|
101 |
+
.pyre
|
install-sw.sh
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pip install pipx
|
2 |
+
# pipx install poetry
|
3 |
+
# pipx ensurepath
|
4 |
+
# source ~/.bashrc
|
5 |
+
|
6 |
+
# curl -sSL https://install.python-poetry.org | python3 -
|
7 |
+
# -C- continue -S show error -o output
|
8 |
+
curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
|
9 |
+
python install-poetry.py
|
10 |
+
rm install-poetry.py
|
11 |
+
echo export PATH=~/.local/bin:$PATH > ~/.bashrc
|
12 |
+
source ~/.bashrc
|
13 |
+
# ~/.local/bin/poetry install
|
14 |
+
|
15 |
+
wget -c https://deb.nodesource.com/setup_12.x
|
16 |
+
bash setup_12.x
|
17 |
+
apt-get install -y nodejs
|
18 |
+
npm install -g npm@latest
|
19 |
+
npm install -g nodemon
|
20 |
+
rm setup_12.x
|
21 |
+
|
22 |
+
# apt upate # alerady done in apt-get install -y nodejs
|
23 |
+
apt install byobu -y > /dev/null 2>&1
|
install-sw1.sh
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pip install pipx
|
2 |
+
# pipx install poetry
|
3 |
+
# pipx ensurepath
|
4 |
+
# source ~/.bashrc
|
5 |
+
|
6 |
+
# curl -sSL https://install.python-poetry.org | python3 -
|
7 |
+
# -C- continue -S show error -o output
|
8 |
+
curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
|
9 |
+
python install-poetry.py
|
10 |
+
rm install-poetry.py
|
11 |
+
echo export PATH=~/.local/bin:$PATH > ~/.bashrc
|
12 |
+
source ~/.bashrc
|
13 |
+
# ~/.local/bin/poetry install
|
14 |
+
|
15 |
+
wget -c https://deb.nodesource.com/setup_12.x
|
16 |
+
bash setup_12.x
|
17 |
+
apt-get install -y nodejs
|
18 |
+
npm install -g npm@latest
|
19 |
+
npm install -g nodemon
|
20 |
+
rm setup_12.x
|
21 |
+
|
22 |
+
# apt update # alerady done in apt-get install -y nodejs
|
23 |
+
apt install byobu -y > /dev/null 2>&1
|
24 |
+
byobu-enable
|
25 |
+
byobu
|
okteto.yml
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: gradio-cmat
|
2 |
+
|
3 |
+
# The build section defines how to build the images of
|
4 |
+
# your development environment
|
5 |
+
# More info: https://www.okteto.com/docs/reference/manifest/#build
|
6 |
+
# build:
|
7 |
+
# my-service:
|
8 |
+
# context: .
|
9 |
+
|
10 |
+
# The deploy section defines how to deploy your development environment
|
11 |
+
# More info: https://www.okteto.com/docs/reference/manifest/#deploy
|
12 |
+
# deploy:
|
13 |
+
# commands:
|
14 |
+
# - name: Deploy
|
15 |
+
# command: echo 'Replace this line with the proper 'helm'
|
16 |
+
|
17 |
+
# or 'kubectl' commands to deploy your development environment'
|
18 |
+
|
19 |
+
# The dependencies section defines other git repositories to be
|
20 |
+
# deployed as part of your development environment
|
21 |
+
# More info: https://www.okteto.com/docs/reference/manifest/#dependencies
|
22 |
+
# dependencies:
|
23 |
+
# - https://github.com/okteto/sample
|
24 |
+
# The dev section defines how to activate a development container
|
25 |
+
# More info: https://www.okteto.com/docs/reference/manifest/#dev
|
26 |
+
dev:
|
27 |
+
gradio-cmat:
|
28 |
+
# image: okteto/dev:latest
|
29 |
+
# image: python:3.8.13-bullseye
|
30 |
+
# image: simbachain/poetry-3.8
|
31 |
+
image: python:3.8
|
32 |
+
command: bash
|
33 |
+
workdir: /usr/src/app
|
34 |
+
sync:
|
35 |
+
- .:/usr/src/app
|
36 |
+
environment:
|
37 |
+
- name=$USER
|
38 |
+
forward:
|
39 |
+
- 7861:7861
|
40 |
+
- 7860:7860
|
41 |
+
- 8501:8501
|
42 |
+
reverse:
|
43 |
+
- 9000:9000
|
44 |
+
autocreate: true
|
poetry.toml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[virtualenvs]
|
2 |
+
create = true
|
3 |
+
in-project = true
|
requirements.txt
CHANGED
@@ -3,10 +3,8 @@ install
|
|
3 |
transformers
|
4 |
sentencepiece
|
5 |
sklearn
|
6 |
-
git+https://github.com/ffreemt/fast-langid
|
7 |
git+https://github.com/ffreemt/align-model-pool
|
8 |
sentence-transformers
|
9 |
-
sentence_splitter
|
10 |
logzero
|
11 |
icecream
|
12 |
alive-progress
|
@@ -14,10 +12,11 @@ more_itertools
|
|
14 |
#
|
15 |
openpyxl
|
16 |
# --- seg_text
|
17 |
-
Morfessor
|
18 |
-
pyicu
|
19 |
-
pycld2
|
20 |
-
tqdm
|
21 |
-
polyglot
|
22 |
-
sentence_splitter
|
23 |
-
pyfunctional
|
|
|
|
3 |
transformers
|
4 |
sentencepiece
|
5 |
sklearn
|
|
|
6 |
git+https://github.com/ffreemt/align-model-pool
|
7 |
sentence-transformers
|
|
|
8 |
logzero
|
9 |
icecream
|
10 |
alive-progress
|
|
|
12 |
#
|
13 |
openpyxl
|
14 |
# --- seg_text
|
15 |
+
# Morfessor
|
16 |
+
# pyicu
|
17 |
+
# pycld2
|
18 |
+
# tqdm
|
19 |
+
# polyglot
|
20 |
+
# sentence_splitter
|
21 |
+
# pyfunctional
|
22 |
+
# git+https://github.com/ffreemt/fast-langid
|
ubee/__main__.py
CHANGED
@@ -1,20 +1,18 @@
|
|
1 |
"""Gen ubee main."""
|
2 |
-
# pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except
|
3 |
|
4 |
-
from typing import Tuple, Optional
|
5 |
-
|
6 |
-
from pathlib import Path
|
7 |
import sys
|
8 |
-
from random import shuffle
|
9 |
-
|
10 |
from itertools import zip_longest
|
|
|
|
|
11 |
from textwrap import dedent
|
|
|
12 |
|
13 |
import gradio as gr
|
14 |
-
|
15 |
-
import pandas as pd
|
16 |
-
from icecream import install as ic_install, ic
|
17 |
import logzero
|
|
|
|
|
|
|
18 |
from logzero import logger
|
19 |
|
20 |
# for embeddable python
|
|
|
1 |
"""Gen ubee main."""
|
2 |
+
# pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except, line-too-long
|
3 |
|
|
|
|
|
|
|
4 |
import sys
|
|
|
|
|
5 |
from itertools import zip_longest
|
6 |
+
from pathlib import Path
|
7 |
+
from random import shuffle
|
8 |
from textwrap import dedent
|
9 |
+
from typing import Optional, Tuple
|
10 |
|
11 |
import gradio as gr
|
|
|
|
|
|
|
12 |
import logzero
|
13 |
+
import pandas as pd
|
14 |
+
from icecream import ic
|
15 |
+
from icecream import install as ic_install
|
16 |
from logzero import logger
|
17 |
|
18 |
# for embeddable python
|
ubee/seg_text.py
CHANGED
@@ -9,15 +9,14 @@ else use polyglot.text.Text
|
|
9 |
"""
|
10 |
# pylint: disable=
|
11 |
|
|
|
12 |
from typing import List, Optional, Union
|
13 |
|
14 |
-
import
|
15 |
-
from tqdm.auto import tqdm
|
16 |
from polyglot.detect.base import logger as polyglot_logger
|
17 |
from polyglot.text import Detector, Text
|
18 |
from sentence_splitter import split_text_into_sentences
|
19 |
-
|
20 |
-
from logzero import logger
|
21 |
|
22 |
# turn of polyglot.text.Detector warning
|
23 |
polyglot_logger.setLevel("ERROR")
|
|
|
9 |
"""
|
10 |
# pylint: disable=
|
11 |
|
12 |
+
import re
|
13 |
from typing import List, Optional, Union
|
14 |
|
15 |
+
from logzero import logger
|
|
|
16 |
from polyglot.detect.base import logger as polyglot_logger
|
17 |
from polyglot.text import Detector, Text
|
18 |
from sentence_splitter import split_text_into_sentences
|
19 |
+
from tqdm.auto import tqdm
|
|
|
20 |
|
21 |
# turn of polyglot.text.Detector warning
|
22 |
polyglot_logger.setLevel("ERROR")
|
ubee/ubee.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
"""Align via ubee,"""
|
2 |
# pylint: disable=
|
3 |
-
from typing import Iterable, List, Tuple
|
4 |
from itertools import zip_longest
|
|
|
5 |
|
|
|
6 |
from logzero import logger
|
|
|
7 |
from ubee.uclas import uclas
|
8 |
-
from icecream import ic
|
9 |
|
10 |
|
11 |
def ubee(
|
@@ -17,7 +18,7 @@ def ubee(
|
|
17 |
|
18 |
Args:
|
19 |
sents_zh: list of text, can be any langauge supported by clas-l-user
|
20 |
-
|
21 |
Returns:
|
22 |
three tuples of aligned blocked
|
23 |
leftovers (unaligned)
|
|
|
1 |
"""Align via ubee,"""
|
2 |
# pylint: disable=
|
|
|
3 |
from itertools import zip_longest
|
4 |
+
from typing import Iterable, List, Tuple
|
5 |
|
6 |
+
from icecream import ic
|
7 |
from logzero import logger
|
8 |
+
|
9 |
from ubee.uclas import uclas
|
|
|
10 |
|
11 |
|
12 |
def ubee(
|
|
|
18 |
|
19 |
Args:
|
20 |
sents_zh: list of text, can be any langauge supported by clas-l-user
|
21 |
+
sents_en: ditto
|
22 |
Returns:
|
23 |
three tuples of aligned blocked
|
24 |
leftovers (unaligned)
|
ubee/uclas.py
CHANGED
@@ -2,16 +2,16 @@
|
|
2 |
# pylint: disable=invalid-name
|
3 |
|
4 |
from typing import List, Tuple, Union
|
5 |
-
import numpy as np
|
6 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
-
from joblib import Memory
|
8 |
-
|
9 |
-
from model_pool import fetch_check_aux # pylint: disable=import-error
|
10 |
-
from model_pool.model_s import load_model_s # pylint: disable=import-error
|
11 |
-
from model_pool.load_model import load_model # pylint: disable=import-error
|
12 |
|
13 |
import logzero
|
|
|
|
|
14 |
from logzero import logger
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
logzero.loglevel(20)
|
17 |
|
|
|
2 |
# pylint: disable=invalid-name
|
3 |
|
4 |
from typing import List, Tuple, Union
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
import logzero
|
7 |
+
import numpy as np
|
8 |
+
from joblib import Memory
|
9 |
from logzero import logger
|
10 |
+
# set PYTHONPATH=..\align-model-pool # in win10
|
11 |
+
from model_pool.fetch_check_aux import fetch_check_aux
|
12 |
+
from model_pool.load_model import load_model
|
13 |
+
from model_pool.model_s import load_model_s
|
14 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
15 |
|
16 |
logzero.loglevel(20)
|
17 |
|