dataequity commited on
Commit
ca510ff
·
1 Parent(s): 42549a3

translation

Browse files
.gitignore ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### JetBrains template
2
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
3
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
4
+
5
+ # CMake
6
+ cmake-build-*/
7
+
8
+ # File-based project format
9
+ *.iws
10
+
11
+ # IntelliJ
12
+ out/
13
+
14
+ # mpeltonen/sbt-idea plugin
15
+ .idea_modules/
16
+
17
+ # JIRA plugin
18
+ atlassian-ide-plugin.xml
19
+
20
+ # Cursive Clojure plugin
21
+ .idea/replstate.xml
22
+
23
+ # Crashlytics plugin (for Android Studio and IntelliJ)
24
+ com_crashlytics_export_strings.xml
25
+ crashlytics.properties
26
+ crashlytics-build.properties
27
+ fabric.properties
28
+
29
+ .ipynb_checkpoints
30
+ */.ipynb_checkpoints/*
31
+
32
+ # IPython
33
+ profile_default/
34
+ ipython_config.py
35
+
36
+ # Remove previous ipynb_checkpoints
37
+ # git rm -r .ipynb_checkpoints/
38
+
39
+ ### Python template
40
+ # Byte-compiled / optimized / DLL files
41
+ __pycache__/
42
+ *.py[cod]
43
+ *$py.class
44
+
45
+ # C extensions
46
+ *.so
47
+
48
+ # Distribution / packaging
49
+ .Python
50
+ build/
51
+ develop-eggs/
52
+ dist/
53
+ downloads/
54
+ eggs/
55
+ .eggs/
56
+ lib/
57
+ lib64/
58
+ parts/
59
+ sdist/
60
+ var/
61
+ wheels/
62
+ share/python-wheels/
63
+ *.egg-info/
64
+ .installed.cfg
65
+ *.egg
66
+ MANIFEST
67
+ flagged/
68
+ .idea*
69
+
70
+ # PyInstaller
71
+ # Usually these files are written by a python script from a template
72
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
73
+ *.manifest
74
+ *.spec
75
+
76
+ # Installer logs
77
+ pip-log.txt
78
+ pip-delete-this-directory.txt
79
+
80
+ # Unit test / coverage reports
81
+ htmlcov/
82
+ .tox/
83
+ .nox/
84
+ .coverage
85
+ .coverage.*
86
+ .cache
87
+ nosetests.xml
88
+ coverage.xml
89
+ *.cover
90
+ *.py,cover
91
+ .hypothesis/
92
+ .pytest_cache/
93
+ cover/
94
+
95
+ # Translations
96
+ *.mo
97
+ *.pot
98
+
99
+ # Django stuff:
100
+ *.log
101
+ local_settings.py
102
+ db.sqlite3
103
+ db.sqlite3-journal
104
+
105
+ # Flask stuff:
106
+ instance/
107
+ .webassets-cache
108
+
109
+ # Scrapy stuff:
110
+ .scrapy
111
+
112
+ # Sphinx documentation
113
+ docs/_build/
114
+
115
+ # PyBuilder
116
+ .pybuilder/
117
+ target/
118
+
119
+ # Jupyter Notebook
120
+ .ipynb_checkpoints
121
+
122
+ # IPython
123
+ profile_default/
124
+ ipython_config.py
125
+
126
+ # pyenv
127
+ # For a library or package, you might want to ignore these files since the code is
128
+ # intended to run in multiple environments; otherwise, check them in:
129
+ # .python-version
130
+
131
+ # pipenv
132
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
133
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
134
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
135
+ # install all needed dependencies.
136
+ #Pipfile.lock
137
+
138
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
139
+ __pypackages__/
140
+
141
+ # Celery stuff
142
+ celerybeat-schedule
143
+ celerybeat.pid
144
+
145
+ # SageMath parsed files
146
+ *.sage.py
147
+
148
+ # Environments
149
+ .env
150
+ .venv
151
+ env/
152
+ venv/
153
+ ENV/
154
+ env.bak/
155
+ venv.bak/
156
+
157
+ # Spyder project settings
158
+ .spyderproject
159
+ .spyproject
160
+
161
+ # Rope project settings
162
+ .ropeproject
163
+
164
+ # mkdocs documentation
165
+ /site
166
+
167
+ # mypy
168
+ .mypy_cache/
169
+ .dmypy.json
170
+ dmypy.json
171
+
172
+ # Pyre type checker
173
+ .pyre/
174
+
175
+ # pytype static type analyzer
176
+ .pytype/
177
+
178
+ # Cython debug symbols
179
+ cython_debug/
180
+
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/aws.xml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="accountSettings">
4
+ <option name="activeProfile" value="profile:default" />
5
+ <option name="activeRegion" value="eu-west-1" />
6
+ <option name="recentlyUsedProfiles">
7
+ <list>
8
+ <option value="profile:default" />
9
+ </list>
10
+ </option>
11
+ <option name="recentlyUsedRegions">
12
+ <list>
13
+ <option value="eu-west-1" />
14
+ </list>
15
+ </option>
16
+ </component>
17
+ </project>
.idea/csv-plugin.xml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="CsvFileAttributes">
4
+ <option name="attributeMap">
5
+ <map>
6
+ <entry key="/flagged/log.csv">
7
+ <value>
8
+ <Attribute>
9
+ <option name="separator" value="," />
10
+ </Attribute>
11
+ </value>
12
+ </entry>
13
+ </map>
14
+ </option>
15
+ </component>
16
+ </project>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="75">
8
+ <item index="0" class="java.lang.String" itemvalue="PyMuPDF" />
9
+ <item index="1" class="java.lang.String" itemvalue="opencv-python-headless" />
10
+ <item index="2" class="java.lang.String" itemvalue="python-crfsuite" />
11
+ <item index="3" class="java.lang.String" itemvalue="greenlet" />
12
+ <item index="4" class="java.lang.String" itemvalue="joblib" />
13
+ <item index="5" class="java.lang.String" itemvalue="threadpoolctl" />
14
+ <item index="6" class="java.lang.String" itemvalue="scikit-learn" />
15
+ <item index="7" class="java.lang.String" itemvalue="python-dateutil" />
16
+ <item index="8" class="java.lang.String" itemvalue="py" />
17
+ <item index="9" class="java.lang.String" itemvalue="html2text" />
18
+ <item index="10" class="java.lang.String" itemvalue="MarkupSafe" />
19
+ <item index="11" class="java.lang.String" itemvalue="srsly" />
20
+ <item index="12" class="java.lang.String" itemvalue="mongoengine" />
21
+ <item index="13" class="java.lang.String" itemvalue="spacy" />
22
+ <item index="14" class="java.lang.String" itemvalue="certifi" />
23
+ <item index="15" class="java.lang.String" itemvalue="soupsieve" />
24
+ <item index="16" class="java.lang.String" itemvalue="gevent" />
25
+ <item index="17" class="java.lang.String" itemvalue="pydantic" />
26
+ <item index="18" class="java.lang.String" itemvalue="Flask-Script" />
27
+ <item index="19" class="java.lang.String" itemvalue="Werkzeug" />
28
+ <item index="20" class="java.lang.String" itemvalue="Flask-WTF" />
29
+ <item index="21" class="java.lang.String" itemvalue="zope.interface" />
30
+ <item index="22" class="java.lang.String" itemvalue="plac" />
31
+ <item index="23" class="java.lang.String" itemvalue="typing-extensions" />
32
+ <item index="24" class="java.lang.String" itemvalue="gunicorn" />
33
+ <item index="25" class="java.lang.String" itemvalue="pathy" />
34
+ <item index="26" class="java.lang.String" itemvalue="catalogue" />
35
+ <item index="27" class="java.lang.String" itemvalue="attrs" />
36
+ <item index="28" class="java.lang.String" itemvalue="simplejson" />
37
+ <item index="29" class="java.lang.String" itemvalue="cymem" />
38
+ <item index="30" class="java.lang.String" itemvalue="murmurhash" />
39
+ <item index="31" class="java.lang.String" itemvalue="idna" />
40
+ <item index="32" class="java.lang.String" itemvalue="wasabi" />
41
+ <item index="33" class="java.lang.String" itemvalue="flask-mongoengine" />
42
+ <item index="34" class="java.lang.String" itemvalue="pluggy" />
43
+ <item index="35" class="java.lang.String" itemvalue="cloudpickle" />
44
+ <item index="36" class="java.lang.String" itemvalue="numpy" />
45
+ <item index="37" class="java.lang.String" itemvalue="requests" />
46
+ <item index="38" class="java.lang.String" itemvalue="spacy-legacy" />
47
+ <item index="39" class="java.lang.String" itemvalue="Jinja2" />
48
+ <item index="40" class="java.lang.String" itemvalue="parsedatetime" />
49
+ <item index="41" class="java.lang.String" itemvalue="preshed" />
50
+ <item index="42" class="java.lang.String" itemvalue="smart-open" />
51
+ <item index="43" class="java.lang.String" itemvalue="blis" />
52
+ <item index="44" class="java.lang.String" itemvalue="urllib3" />
53
+ <item index="45" class="java.lang.String" itemvalue="itsdangerous" />
54
+ <item index="46" class="java.lang.String" itemvalue="zope.event" />
55
+ <item index="47" class="java.lang.String" itemvalue="scipy" />
56
+ <item index="48" class="java.lang.String" itemvalue="pymongo" />
57
+ <item index="49" class="java.lang.String" itemvalue="six" />
58
+ <item index="50" class="java.lang.String" itemvalue="typer" />
59
+ <item index="51" class="java.lang.String" itemvalue="Flask-Cors" />
60
+ <item index="52" class="java.lang.String" itemvalue="pytest" />
61
+ <item index="53" class="java.lang.String" itemvalue="packaging" />
62
+ <item index="54" class="java.lang.String" itemvalue="pandas" />
63
+ <item index="55" class="java.lang.String" itemvalue="tqdm" />
64
+ <item index="56" class="java.lang.String" itemvalue="WTForms" />
65
+ <item index="57" class="java.lang.String" itemvalue="thinc" />
66
+ <item index="58" class="java.lang.String" itemvalue="pytz" />
67
+ <item index="59" class="java.lang.String" itemvalue="transformers" />
68
+ <item index="60" class="java.lang.String" itemvalue="tensorflow" />
69
+ <item index="61" class="java.lang.String" itemvalue="sentencepiece" />
70
+ <item index="62" class="java.lang.String" itemvalue="torch" />
71
+ <item index="63" class="java.lang.String" itemvalue="beautifulsoup4" />
72
+ <item index="64" class="java.lang.String" itemvalue="protobuf" />
73
+ <item index="65" class="java.lang.String" itemvalue="bs4" />
74
+ <item index="66" class="java.lang.String" itemvalue="dnspython" />
75
+ <item index="67" class="java.lang.String" itemvalue="sacremoses" />
76
+ <item index="68" class="java.lang.String" itemvalue="huggingface-hub" />
77
+ <item index="69" class="java.lang.String" itemvalue="click" />
78
+ <item index="70" class="java.lang.String" itemvalue="chardet" />
79
+ <item index="71" class="java.lang.String" itemvalue="langchain" />
80
+ <item index="72" class="java.lang.String" itemvalue="pyparsing" />
81
+ <item index="73" class="java.lang.String" itemvalue="llama-index" />
82
+ <item index="74" class="java.lang.String" itemvalue="Flask" />
83
+ </list>
84
+ </value>
85
+ </option>
86
+ </inspection_tool>
87
+ <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
88
+ <option name="ignoredErrors">
89
+ <list>
90
+ <option value="E501" />
91
+ </list>
92
+ </option>
93
+ </inspection_tool>
94
+ <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
95
+ <option name="ignoredErrors">
96
+ <list>
97
+ <option value="N802" />
98
+ </list>
99
+ </option>
100
+ </inspection_tool>
101
+ <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
102
+ <option name="ignoredIdentifiers">
103
+ <list>
104
+ <option value="main.loa_and_contract" />
105
+ <option value="Backend.API.pdfwriter.addCrypto.*" />
106
+ </list>
107
+ </option>
108
+ </inspection_tool>
109
+ </profile>
110
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (translations)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/translations.iml" filepath="$PROJECT_DIR$/.idea/translations.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/translations.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="jdk" jdkName="Python 3.9 (translations)" jdkType="Python SDK" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import MarianMTModel, MarianTokenizer
3
+
4
+
5
+ def gradio_input(txt):
6
+ translated = translate(txt)
7
+ return translated[0]
8
+
9
+
10
+ def translate(text):
11
+ hub_repo_name = 'dataequity/dataequity-kde4-en-es-qlora'
12
+ tokenizer = MarianTokenizer.from_pretrained(hub_repo_name)
13
+ finetuned_model = MarianMTModel.from_pretrained(hub_repo_name)
14
+
15
+ try:
16
+ translated = finetuned_model.generate(**tokenizer([text], return_tensors="pt", padding=True))
17
+ return [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
18
+ except:
19
+ return ['Invalid input']
20
+
21
+
22
+ iface = gr.Interface(fn=gradio_input,
23
+ inputs=gr.Textbox(placeholder="English text"),
24
+ outputs=gr.Textbox(placeholder="Translation"),
25
+ title="Translate English to Spanish")
26
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch==2.1.2
3
+ numpy==1.26.3
4
+ sacrebleu==2.4.0
5
+ sacremoses==0.1.1
6
+ safetensors==0.4.1
7
+ sentencepiece==0.1.99
8
+ tokenizers==0.15.0
9
+ transformers==4.36.2
10
+