import streamlit as st import pandas as pd from streamlit_pandas_profiling import st_profile_report from pathlib import Path from transformers import MBartForConditionalGeneration, MBart50TokenizerFast st.set_page_config(page_title="Francesco Daimon Fernicola", page_icon=":milky_way:", layout="wide") with st.container(): st.subheader("Hello, and welcome to my official webpage! I am Daimon :alien:") st.title("PhD Candidate in Machine Translation / Translator / Mountain enthusiast") st.write("I am passionate about finding new ways to effectively use and understand Machine Translation and effectively evaluating its quality.") st.write(""" [Github](https://github.com/FrancescoFernicola) [Unibo](https://www.unibo.it/sitoweb/francesco.fernicola2) [LinkedIn](https://www.linkedin.com/in/francesco-fernicola-69a0771b7/?locale=en_US) [Twitter](https://twitter.com/FrancescoDaimon) """) st.title('Upload your data') st.subheader('Input TSV/CSV') uploaded_file = st.file_uploader("Choose a file") with st.spinner("Loading..."): if uploaded_file is not None: if uploaded_file.name.endswith('.tsv'): data = pd.read_csv(uploaded_file, sep="\t") else: data = pd.read_csv(uploaded_file) st.subheader("DataFrame") st.write(data) st.write(data.describe()) else: st.info("☝️ Upload a TSV/CSV file") st.subheader("MBART-50 Translator") source = "In the beginning the Universe was created. This has made a lot of people very angry and been widely regarded as a bad move." target = "" model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") def get_translation(src_code, trg_code, src): tokenizer.src_lang = src_code encoded = tokenizer(src, return_tensors="pt") generated_tokens = model.generate( **encoded, forced_bos_token_id=tokenizer.lang_code_to_id[trg_code] ) trg = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) return trg valid_languages = ['en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX'] with st.form("my_form"): left_c, right_c = st.columns(2) with left_c: src_lang = st.selectbox( 'Source language', ('en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX'), ) with right_c: trg_lang = st.selectbox( 'Target language', ('fr_XX', 'en_XX', 'de_DE', 'it_IT', 'es_XX') ) source = st.text_area("Source", value=source, height=130, placeholder="Enter the source text...") submitted = st.form_submit_button("Translate") if submitted: if len(source) > 0 and src_lang in valid_languages and trg_lang in valid_languages: with st.spinner("Translating..."): try: target = get_translation(src_lang, trg_lang, source)[0] st.subheader("Translation done!") target = st.text_area("Target", value=target, height=130) except: st.subheader("Translation failed :sad:") else: st.write("Please enter the source text, source language and target language.") def local_css(file_name): with open(file_name) as f: st.markdown(f"", unsafe_allow_html=True) local_css("style/style.css") # ---- CONTACT ---- with st.container(): st.write("---") st.header("Get in Touch With Me!") st.write("##") contact_form = """
""" left_column, right_column = st.columns(2) with left_column: st.markdown(contact_form, unsafe_allow_html=True) with right_column: st.empty()