from transformers import MarianMTModel, MarianTokenizer import spacy import streamlit as st nlp = spacy.load("./cycLingoNER") nlp.add_pipe('sentencizer') colors = {"cycLingo": "#F67DE3"} options = {"colors": colors} # Load NMT model tokenizer = MarianTokenizer.from_pretrained('DanielHellebust/cyclingo') model = MarianMTModel.from_pretrained("DanielHellebust/cyclingo") st.title('cycLingo Translator') st.markdown('Translate cycling specific text from English to Norwegian') st.subheader('English:') text = st.text_area('English',label_visibility='hidden', placeholder='Enter text to translate to Norwegian', height=200) if st.button('Translate'): text_list = text.split() if len(text_list) > 100: st.error('Please enter less than 100 words to get full translation') translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True)) result = [tokenizer.decode(t, skip_special_tokens=True) for t in translated] st.subheader('Detected cycLingo entities:') doc = nlp(text) html = spacy.displacy.render(doc, style="ent", options=options) st.markdown(html, unsafe_allow_html=True) st.markdown(' ') # update textarea with result as value st.subheader('Norwegian Translation:') st.text_area('Norwegian Translation',label_visibility='hidden', value=result[0], height=200)