import os import streamlit as st from transformers import AutoModel, AutoTokenizer # Access the Hugging Face token from environment variables hf_token = os.getenv('HUGGING_FACE_HUB_TOKEN') # Load the model and tokenizer with the token from environment variables model = AutoModel.from_pretrained('naver/cocom-v1-128-mistral-7b', trust_remote_code=True, use_auth_token=hf_token) model = model.to('cuda') tokenizer = AutoTokenizer.from_pretrained('naver/cocom-v1-128-mistral-7b') def generate_answer(contexts, questions): inputs = tokenizer(questions, contexts, return_tensors='pt', padding=True, truncation=True) inputs = {key: value.to('cuda') for key, value in inputs.items()} outputs = model(**inputs) return ["Generated answer here"] # Replace with actual generation logic st.title("LLM Model Testing") context = st.text_area("Enter context:") question = st.text_input("Enter your question:") if st.button("Generate Answer"): with st.spinner("Generating..."): try: answers = generate_answer([context], [question]) st.success("Generated Answer:") st.write(answers[0]) except Exception as e: st.error(f"Error generating answer: {e}")