from transformers import MarianMTModel, MarianTokenizer
import spacy

import streamlit as st

nlp = spacy.load("./cycLingoNER")
nlp.add_pipe('sentencizer')
colors = {"cycLingo": "#F67DE3"}
options = {"colors": colors}

# Load NMT model
tokenizer = MarianTokenizer.from_pretrained('DanielHellebust/cyclingo')
model = MarianMTModel.from_pretrained("DanielHellebust/cyclingo")


st.title('cycLingo Translator')

st.subheader('English:')
text = st.text_area('English',label_visibility='hidden', placeholder='Enter text to translate to Norwegian', height=200)
if st.button('Translate'):
    text_list = text.split()
    if len(text_list) > 100:
        st.error('Please enter less than 100 words to get full translation')
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
    result = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
    st.subheader('Detected cycLingo entities:')
    doc = nlp(text)
    html = spacy.displacy.render(doc, style="ent", options=options)
    st.markdown(html, unsafe_allow_html=True)

    st.markdown('  ')

    # update textarea with result as value
    st.subheader('Norwegian Translation:')
    st.text_area('Norwegian Translation',label_visibility='hidden', value=result[0], height=200)