Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
from streamlit_pandas_profiling import st_profile_report | |
from pathlib import Path | |
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast | |
st.set_page_config(page_title="Francesco Daimon Fernicola", page_icon=":milky_way:", layout="wide") | |
with st.container(): | |
st.subheader("Hello, and welcome to my official webpage! I am Daimon :alien:") | |
st.title("PhD Candidate in Machine Translation / Translator / Mountain enthusiast") | |
st.write("I am passionate about finding new ways to effectively use and understand Machine Translation and effectively evaluating its quality.") | |
st.write(""" | |
[Github](https://github.com/FrancescoFernicola) | |
[Unibo](https://www.unibo.it/sitoweb/francesco.fernicola2) | |
[LinkedIn](https://www.linkedin.com/in/francesco-fernicola-69a0771b7/?locale=en_US) | |
[Twitter](https://twitter.com/FrancescoDaimon) | |
""") | |
st.title('Upload your data') | |
st.subheader('Input TSV/CSV') | |
uploaded_file = st.file_uploader("Choose a file") | |
with st.spinner("Loading..."): | |
if uploaded_file is not None: | |
if uploaded_file.name.endswith('.tsv'): | |
data = pd.read_csv(uploaded_file, sep="\t") | |
else: | |
data = pd.read_csv(uploaded_file) | |
st.subheader("DataFrame") | |
st.write(data) | |
st.write(data.describe()) | |
else: | |
st.info("☝️ Upload a TSV/CSV file") | |
st.subheader("MBART-50 Translator") | |
source = "In the beginning the Universe was created. This has made a lot of people very angry and been widely regarded as a bad move." | |
target = "" | |
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
def get_translation(src_code, trg_code, src): | |
tokenizer.src_lang = src_code | |
encoded = tokenizer(src, return_tensors="pt") | |
generated_tokens = model.generate( | |
**encoded, | |
forced_bos_token_id=tokenizer.lang_code_to_id[trg_code] | |
) | |
trg = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) | |
return trg | |
valid_languages = ['en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX'] | |
with st.form("my_form"): | |
left_c, right_c = st.columns(2) | |
with left_c: | |
src_lang = st.selectbox( | |
'Source language', | |
('en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX'), | |
) | |
with right_c: | |
trg_lang = st.selectbox( | |
'Target language', | |
('fr_XX', 'en_XX', 'de_DE', 'it_IT', 'es_XX') | |
) | |
source = st.text_area("Source", value=source, height=130, placeholder="Enter the source text...") | |
submitted = st.form_submit_button("Translate") | |
if submitted: | |
if len(source) > 0 and src_lang in valid_languages and trg_lang in valid_languages: | |
with st.spinner("Translating..."): | |
try: | |
target = get_translation(src_lang, trg_lang, source)[0] | |
st.subheader("Translation done!") | |
target = st.text_area("Target", value=target, height=130) | |
except: | |
st.subheader("Translation failed :sad:") | |
else: | |
st.write("Please enter the source text, source language and target language.") | |
# ---- CONTACT ---- | |
with st.container(): | |
st.write("---") | |
st.header("Get in Touch With Me!") | |
st.write("##") | |
contact_form = """ | |
<form action="https://formsubmit.co/[email protected]" method="POST"> | |
<input type="hidden" name="_captcha" value="false"> | |
<input type="text" name="name" placeholder="Your name" required> | |
<input type="email" name="email" placeholder="Your email" required> | |
<textarea name="message" placeholder="Your message here" required></textarea> | |
<button type="submit">Send</button> | |
</form> | |
""" | |
left_column, right_column = st.columns(2) | |
with left_column: | |
st.markdown(contact_form, unsafe_allow_html=True) | |
with right_column: | |
st.empty() |