Daimon commited on
Commit
cec6612
·
1 Parent(s): d44d8ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -50
app.py CHANGED
@@ -17,25 +17,7 @@ with st.container():
17
  [Twitter](https://twitter.com/FrancescoDaimon)
18
  """)
19
 
20
- st.title('Upload your data')
21
-
22
- st.subheader('Input TSV/CSV')
23
- uploaded_file = st.file_uploader("Choose a file")
24
- with st.spinner("Loading..."):
25
- if uploaded_file is not None:
26
- if uploaded_file.name.endswith('.tsv'):
27
- data = pd.read_csv(uploaded_file, sep="\t")
28
- else:
29
- data = pd.read_csv(uploaded_file)
30
-
31
- st.subheader("DataFrame")
32
- st.write(data)
33
- st.write(data.describe())
34
-
35
- else:
36
- st.info("☝️ Upload a TSV/CSV file")
37
-
38
-
39
  st.subheader("MBART-50 Translator")
40
 
41
  source = "In the beginning the Universe was created. This has made a lot of people very angry and been widely regarded as a bad move."
@@ -44,6 +26,7 @@ model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-m
44
  tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
45
 
46
 
 
47
  def get_translation(src_code, trg_code, src):
48
 
49
  tokenizer.src_lang = src_code
@@ -56,19 +39,29 @@ def get_translation(src_code, trg_code, src):
56
 
57
  return trg
58
 
59
- valid_languages = ['en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX']
 
 
 
 
 
 
 
 
 
 
60
 
61
  with st.form("my_form"):
62
  left_c, right_c = st.columns(2)
63
- with left_c:
64
- src_lang = st.selectbox(
65
  'Source language',
66
- ('en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX'),
67
  )
68
- with right_c:
69
- trg_lang = st.selectbox(
70
  'Target language',
71
- ('fr_XX', 'en_XX', 'de_DE', 'it_IT', 'es_XX')
72
  )
73
  source = st.text_area("Source", value=source, height=130, placeholder="Enter the source text...")
74
 
@@ -88,29 +81,64 @@ with st.form("my_form"):
88
  st.write("Please enter the source text, source language and target language.")
89
 
90
 
91
- def local_css(file_name):
92
- with open(file_name) as f:
93
- st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
94
 
95
- local_css("style/style.css")
96
 
97
- # ---- CONTACT ----
98
- with st.container():
99
- st.write("---")
100
- st.header("Get in Touch With Me!")
101
- st.write("##")
102
-
103
- contact_form = """
104
- <form action="https://formsubmit.co/[email protected]" method="POST">
105
- <input type="hidden" name="_captcha" value="false">
106
- <input type="text" name="name" placeholder="Your name" required>
107
- <input type="email" name="email" placeholder="Your email" required>
108
- <textarea name="message" placeholder="Your message here" required></textarea>
109
- <button type="submit">Send</button>
110
- </form>
111
- """
112
- left_column, right_column = st.columns(2)
113
- with left_column:
114
- st.markdown(contact_form, unsafe_allow_html=True)
115
- with right_column:
116
- st.empty()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  [Twitter](https://twitter.com/FrancescoDaimon)
18
  """)
19
 
20
+ from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  st.subheader("MBART-50 Translator")
22
 
23
  source = "In the beginning the Universe was created. This has made a lot of people very angry and been widely regarded as a bad move."
 
26
  tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
27
 
28
 
29
+
30
  def get_translation(src_code, trg_code, src):
31
 
32
  tokenizer.src_lang = src_code
 
39
 
40
  return trg
41
 
42
+ valid_languages = [
43
+ 'ar_AR', 'cs_CZ', 'de_DE', 'en_XX', 'es_XX', 'et_EE', 'fi_FI', 'fr_XX',
44
+ 'gu_IN', 'hi_IN', 'it_IT', 'ja_XX', 'kk_KZ', 'ko_KR', 'lt_LT', 'lv_LV', 'my_MM', 'ne_NP',
45
+ 'nl_XX', 'ro_RO', 'ru_RU', 'si_LK', 'tr_TR', 'vi_VN' 'zh_CN', 'af_ZA', 'az_AZ', 'bn_IN',
46
+ 'fa_IR', 'he_IL', 'hr_HR', 'id_ID', 'ka_GE', 'km_KH', 'mk_MK', 'ml_IN', 'mn_MN', 'mr_IN',
47
+ 'pl_PL', 'ps_AF', 'pt_XX', 'sv_SE', 'sw_KE', 'ta_IN', 'te_IN', 'th_TH', 'tl_XX', 'uk_UA',
48
+ 'ur_PK', 'xh_ZA', 'gl_ES', 'sl_SI'
49
+ ]
50
+
51
+ valid_languages_tuple = (lang for lang in valid_languages)
52
+ valid_languages_tuple_trg = (lang for lang in valid_languages)
53
 
54
  with st.form("my_form"):
55
  left_c, right_c = st.columns(2)
56
+ #with left_c:
57
+ src_lang = st.selectbox(
58
  'Source language',
59
+ valid_languages_tuple,
60
  )
61
+ #with right_c:
62
+ trg_lang = st.selectbox(
63
  'Target language',
64
+ valid_languages_tuple_trg,
65
  )
66
  source = st.text_area("Source", value=source, height=130, placeholder="Enter the source text...")
67
 
 
81
  st.write("Please enter the source text, source language and target language.")
82
 
83
 
84
+ st.subheader('Input TSV')
85
+ uploaded_file = st.file_uploader("Choose a file")
86
+ done = False
87
 
 
88
 
89
+
90
+ if uploaded_file is not None:
91
+ valid_languages_col = (lang for lang in valid_languages)
92
+ valid_languages_col_trg = (lang for lang in valid_languages)
93
+ if uploaded_file.name.endswith('.tsv'):
94
+ data = pd.read_csv(uploaded_file, sep="\t")
95
+ st.subheader("DataFrame")
96
+ st.write(data)
97
+ st.write(data.describe())
98
+ columns = (col for col in data.columns)
99
+ src_col = st.selectbox(
100
+ 'Select the column to translate:',
101
+ columns,
102
+ )
103
+ if src_col:
104
+ col_src_lang = st.selectbox(
105
+ 'Source language:',
106
+ valid_languages_col,
107
+ )
108
+ col_trg_lang = st.selectbox(
109
+ 'Target language:',
110
+ valid_languages_col_trg,
111
+ )
112
+ submitted_cols = st.button("Translate column")
113
+ if submitted_cols:
114
+ translated_data = []
115
+ new_df = data
116
+ for text in data[src_col]:
117
+ if len(text) > 0 and col_src_lang in valid_languages and col_trg_lang in valid_languages:
118
+ with st.spinner("Translating..."):
119
+ try:
120
+ target_text = get_translation(col_src_lang, col_trg_lang, text)[0]
121
+ translated_data.append(target_text)
122
+ except:
123
+ st.subheader("Translation failed :sad:")
124
+ break
125
+
126
+ else:
127
+ st.write("Please enter the source text, source language and target language.")
128
+
129
+ new_df[src_col] = translated_data
130
+ done = True
131
+
132
+ else:
133
+ data = pd.read_csv(uploaded_file)
134
+
135
+ if done:
136
+ st.subheader("Translated DataFrame")
137
+ st.write(new_df)
138
+ st.write(new_df.describe())
139
+ to_dl = new_df.to_csv(index=False, sep='\t').encode('utf-8')
140
+ st.download_button('Download TSV', to_dl, 'translated_file.tsv', 'text/tsv', key='download-tsv')
141
+
142
+
143
+ else:
144
+ st.info("☝️ Upload a TSV file")