Daimon commited on
Commit
0ab6a79
·
1 Parent(s): 447af8d

Added Excel import functionality

Browse files
Files changed (1) hide show
  1. app.py +54 -59
app.py CHANGED
@@ -1,30 +1,9 @@
1
  import streamlit as st
2
  import pandas as pd
3
- from streamlit_pandas_profiling import st_profile_report
4
  from pathlib import Path
5
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
6
 
7
- st.set_page_config(page_title="Francesco Daimon Fernicola", page_icon=":milky_way:", layout="wide")
8
-
9
- with st.container():
10
- st.subheader("Hello, and welcome to my official webpage! I am Daimon :alien:")
11
- st.title("PhD Candidate in Machine Translation / Translator / Mountain enthusiast")
12
- st.write("I am passionate about finding new ways to effectively use and understand Machine Translation and effectively evaluating its quality.")
13
- st.write("""
14
- [Github](https://github.com/FrancescoFernicola)
15
- [Unibo](https://www.unibo.it/sitoweb/francesco.fernicola2)
16
- [LinkedIn](https://www.linkedin.com/in/francesco-fernicola-69a0771b7/?locale=en_US)
17
- [Twitter](https://twitter.com/FrancescoDaimon)
18
- """)
19
-
20
- from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
21
- st.subheader("MBART-50 Translator")
22
-
23
- source = "In the beginning the Universe was created. This has made a lot of people very angry and been widely regarded as a bad move."
24
- target = ""
25
- model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
26
- tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
27
-
28
 
29
 
30
  def get_translation(src_code, trg_code, src):
@@ -39,6 +18,25 @@ def get_translation(src_code, trg_code, src):
39
 
40
  return trg
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  valid_languages = ['de_DE', 'en_XX', 'it_IT']
43
 
44
 
@@ -76,56 +74,53 @@ with st.form("my_form"):
76
  st.write("Please enter the source text, source language and target language.")
77
 
78
 
79
- st.subheader('Input TSV')
80
  uploaded_file = st.file_uploader("Choose a file")
81
  done = False
82
 
83
 
84
-
85
  if uploaded_file is not None:
86
  valid_languages_col = (lang for lang in valid_languages)
87
  valid_languages_col_trg = (lang for lang in valid_languages)
88
- if uploaded_file.name.endswith('.tsv'):
89
- data = pd.read_csv(uploaded_file, sep="\t")
90
- st.subheader("DataFrame")
91
- st.write(data)
92
- st.write(data.describe())
93
- columns = (col for col in data.columns)
94
- src_col = st.selectbox(
95
- 'Select the column to translate:',
96
- columns,
97
- )
98
- if src_col:
99
- col_src_lang = st.selectbox(
100
- 'Source language:',
101
- valid_languages_col,
 
102
  )
103
- col_trg_lang = st.selectbox(
104
- 'Target language:',
105
- valid_languages_col_trg,
106
  )
107
- submitted_cols = st.button("Translate column")
108
- if submitted_cols:
109
- translated_data = []
110
- new_df = data
111
- for text in data[src_col]:
112
- if len(text) > 0 and col_src_lang in valid_languages and col_trg_lang in valid_languages:
113
- with st.spinner("Translating..."):
114
- try:
115
- target_text = get_translation(col_src_lang, col_trg_lang, text)[0]
116
- translated_data.append(target_text)
 
117
  except:
118
  st.subheader("Translation failed :sad:")
119
  break
 
 
120
 
121
- else:
122
- st.write("Please enter the source text, source language and target language.")
123
-
124
- new_df[src_col] = translated_data
125
- done = True
126
-
127
- else:
128
- data = pd.read_csv(uploaded_file)
129
 
130
  if done:
131
  st.subheader("Translated DataFrame")
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  from pathlib import Path
4
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
5
 
6
+ st.set_page_config(page_title="Translation Demo", page_icon=":milky_way:", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
  def get_translation(src_code, trg_code, src):
 
18
 
19
  return trg
20
 
21
+
22
+ def open_input(the_file):
23
+
24
+ if the_file.name.endswith('.tsv'):
25
+ parsed = pd.read_csv(the_file, sep="\t")
26
+ elif the_file.name.endswith('.xlsx'):
27
+ parsed = pd.read_excel(the_file)
28
+
29
+ return parsed
30
+
31
+
32
+ st.subheader("MBART-50 Translator")
33
+
34
+ source = "In the beginning the Universe was created. This has made a lot of people very angry and been widely regarded as a bad move."
35
+ target = ""
36
+ model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
37
+ tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
38
+
39
+
40
  valid_languages = ['de_DE', 'en_XX', 'it_IT']
41
 
42
 
 
74
  st.write("Please enter the source text, source language and target language.")
75
 
76
 
77
+ st.subheader('Input Excel/TSV')
78
  uploaded_file = st.file_uploader("Choose a file")
79
  done = False
80
 
81
 
 
82
  if uploaded_file is not None:
83
  valid_languages_col = (lang for lang in valid_languages)
84
  valid_languages_col_trg = (lang for lang in valid_languages)
85
+ data = open_input(uploaded_file)
86
+ st.subheader("DataFrame")
87
+ st.write(data)
88
+ st.write(data.describe())
89
+
90
+ columns = (col for col in data.columns)
91
+ src_col = st.selectbox(
92
+ 'Select the column to translate:',
93
+ columns,
94
+ )
95
+
96
+ if src_col:
97
+ col_src_lang = st.selectbox(
98
+ 'Source language:',
99
+ valid_languages_col,
100
  )
101
+ col_trg_lang = st.selectbox(
102
+ 'Target language:',
103
+ valid_languages_col_trg,
104
  )
105
+ submitted_cols = st.button("Translate column")
106
+
107
+ if submitted_cols:
108
+ translated_data = []
109
+ new_df = data
110
+ for text in data[src_col]:
111
+ if len(text) > 0 and col_src_lang in valid_languages and col_trg_lang in valid_languages:
112
+ with st.spinner("Translating..."):
113
+ try:
114
+ target_text = get_translation(col_src_lang, col_trg_lang, text)[0]
115
+ translated_data.append(target_text)
116
  except:
117
  st.subheader("Translation failed :sad:")
118
  break
119
+ else:
120
+ st.write("Please enter the source text, source language and target language.")
121
 
122
+ new_df[src_col] = translated_data
123
+ done = True
 
 
 
 
 
 
124
 
125
  if done:
126
  st.subheader("Translated DataFrame")