import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification import numpy as np import torch TOP_N = 5 DEFAULT_MODEL = "amazon-sagemaker-community/xlm-roberta-en-ru-emoji-v2" def preprocess(text): new_text = [] for t in text.split(" "): t = '@user' if t.startswith('@') and len(t) > 1 else t t = 'http' if t.startswith('http') else t new_text.append(t) return " ".join(new_text) def get_top_emojis(text, tokenizer, model, top_n=TOP_N): preprocessed = preprocess(text) inputs = tokenizer(preprocessed, return_tensors="pt") preds = model(**inputs).logits scores = torch.nn.functional.softmax(preds, dim=-1).detach().numpy() ranking = np.argsort(scores) ranking = ranking.squeeze()[::-1][:top_n] emojis = [model.config.id2label[i] for i in ranking] return '\t'.join(map(str, emojis)) def main(): cur_model_name = DEFAULT_MODEL print("cur_model", cur_model_name) tokenizer = AutoTokenizer.from_pretrained(cur_model_name) model = AutoModelForSequenceClassification.from_pretrained(cur_model_name) st.set_page_config( # Alternate names: setup_page, page, layout layout="centered", # Can be "centered" or "wide". In the future also "dashboard", etc. initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed" page_title="Emoji-motion!", # String or None. Strings get appended with "• Streamlit". page_icon=None, # String, anything supported by st.image, or None. ) st.title('Emoji-motion!') example_prompts = [ "it's pretty depressing when u hit pan on ur favourite highlighter", "After what just happened. In need to smoke.", "I've never been happier. I'm laying awake as I watch @user sleep. Thanks for making me happy again, babe.", "@user is the man", "Поприветствуем моего нового читателя @user", "сегодня у одной крутой бичи день рождения! @user поздравляю тебя с днем рождения! будь самой-самой счастливой,красота:* море любви тебе", "Никогда не явствовала себя ужаснее, чем сейчас:( я просто раздавленна", "Самое ужасное - это ожидание результатов", "печально что заряд одинаково фигово держится(", ] example = st.selectbox("Choose an example", example_prompts) # Take the message which needs to be processed message = st.text_area("...or paste some text to see the model's predictions", example) # st.title(message) st.text('') models_to_choose = [ "amazon-sagemaker-community/xlm-roberta-en-ru-emoji-v2", "AlekseyDorkin/xlm-roberta-en-ru-emoji" ] model_name = st.selectbox("Choose a model", models_to_choose) if model_name != cur_model_name: print("reloading model") cur_model_name = model_name tokenizer = AutoTokenizer.from_pretrained(cur_model_name) model = AutoModelForSequenceClassification.from_pretrained(cur_model_name) # Define function to run when submit is clicked def submit(message): if len(message) > 0: st.header(get_top_emojis(message, tokenizer=tokenizer, model=model)) else: st.error("The text can't be empty") # Run algo when submit button is clicked if st.button('Submit'): submit(message) st.text('') st.markdown( '''App created by [@AlekseyDorkin](https://huggingface.co/AlekseyDorkin) and [@akshay7](https://huggingface.co/akshay7)''', unsafe_allow_html=True, ) if __name__ == "__main__": main()