File size: 1,521 Bytes
95ab416
b7b9b8e
95ab416
b7b9b8e
95ab416
 
 
b7b9b8e
95ab416
b7b9b8e
95ab416
 
 
b7b9b8e
95ab416
76e5273
 
 
 
 
95ab416
76e5273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import subprocess
import os

# Function to install packages
def install(package):
    subprocess.check_call([os.sys.executable, "-m", "pip", "install", package])

# Install necessary packages
install("torch")
install("flash-attn")
install("streamlit")
install("transformers")

# Import installed packages
import streamlit as st
from transformers import AutoModel, AutoTokenizer

# Access the Hugging Face token from environment variables
hf_token = os.getenv('HUGGING_FACE_HUB_TOKEN')

# Load the model and tokenizer with the token from environment variables
model = AutoModel.from_pretrained('naver/cocom-v1-128-mistral-7b', trust_remote_code=True, use_auth_token=hf_token)
model = model.to('cuda')
tokenizer = AutoTokenizer.from_pretrained('naver/cocom-v1-128-mistral-7b')

def generate_answer(contexts, questions):
    inputs = tokenizer(questions, contexts, return_tensors='pt', padding=True, truncation=True)
    inputs = {key: value.to('cuda') for key, value in inputs.items()}
    outputs = model(**inputs)
    return ["Generated answer here"]  # Replace with actual generation logic

st.title("LLM Model Testing")

context = st.text_area("Enter context:")
question = st.text_input("Enter your question:")

if st.button("Generate Answer"):
    with st.spinner("Generating..."):
        try:
            answers = generate_answer([context], [question])
            st.success("Generated Answer:")
            st.write(answers[0])
        except Exception as e:
            st.error(f"Error generating answer: {e}")