Spaces:
Sleeping
Sleeping
initial release
Browse files- .gitattributes +36 -35
- .gitignore +1 -0
- BLINKpedia.png +0 -0
- BLINKpedia_close.png +0 -0
- BLINKpedia_open.png +0 -0
- README.md +59 -13
- chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/data_level0.bin +3 -0
- chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/header.bin +3 -0
- chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/length.bin +3 -0
- chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/link_lists.bin +3 -0
- chroma/chroma.sqlite3 +3 -0
- requirements.txt +10 -0
- streamlit_app.py +153 -0
.gitattributes
CHANGED
@@ -1,35 +1,36 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
chroma/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
BLINKpedia.png
ADDED
BLINKpedia_close.png
ADDED
BLINKpedia_open.png
ADDED
README.md
CHANGED
@@ -1,13 +1,59 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# BLINKpedia Model
|
2 |
+
|
3 |
+
![BLINKpedia](https://github.com/SIRIUS-webkit/BLINKpedia/blob/master/BLINKpedia.png)
|
4 |
+
|
5 |
+
This model is designed to generate text content related to BLACKPINK, a globally renowned K-pop girl group. It leverages state-of-the-art natural language processing techniques to produce coherent and contextually relevant text based on input prompts.
|
6 |
+
|
7 |
+
## Model Details
|
8 |
+
|
9 |
+
- **Model Name**: BLINKpedia
|
10 |
+
- **Finetuned From Model**: [unsloth/tinyllama](https://huggingface.co/unsloth/tinyllama)
|
11 |
+
- **Model Type**: Text Generation
|
12 |
+
- **Training Data**: Curated datasets containing information about BLACKPINK, including lyrics, interviews, news articles, and fan content.
|
13 |
+
- **Framework**: Hugging Face Transformers
|
14 |
+
|
15 |
+
## Features
|
16 |
+
|
17 |
+
- **Context-Aware Generation**: Generates text that is coherent and contextually relevant to the given prompt.
|
18 |
+
- **Customizable Prompts**: Users can input various prompts related to BLACKPINK to generate different types of content, such as news articles, social media posts, fan fiction, and more.
|
19 |
+
|
20 |
+
## Usage
|
21 |
+
|
22 |
+
To use the BLACKPINK Text Generation model, you can load it using the Hugging Face Transformers library. Here’s an example of how to use the model in Python:
|
23 |
+
|
24 |
+
```python
|
25 |
+
from transformers import pipeline
|
26 |
+
|
27 |
+
# Load the model
|
28 |
+
generator = pipeline('text-generation', model='la-min/BLINKpedia')
|
29 |
+
|
30 |
+
# Define your prompt
|
31 |
+
prompt = "Blackpink is the highest-charting female Korean"
|
32 |
+
|
33 |
+
# Generate text
|
34 |
+
generated_text = generator(prompt, max_length=100, num_return_sequences=1)
|
35 |
+
|
36 |
+
# Print the generated text
|
37 |
+
print(generated_text[0]['generated_text'])
|
38 |
+
```
|
39 |
+
|
40 |
+
## Example Outputs
|
41 |
+
|
42 |
+
Generated Text:
|
43 |
+
|
44 |
+
```python
|
45 |
+
Blackpink is the highest-charting female Korean act on the Billboard 200, with their debut album Born Pink (2018) debuting at number one on the Circle Album Chart and the group's second album Born
|
46 |
+
```
|
47 |
+
|
48 |
+
## Fine-Tuning
|
49 |
+
|
50 |
+
You can fine-tune this model with additional data to better suit specific needs or to improve its performance on particular types of content. Refer to the Hugging Face documentation for guidance on fine-tuning models.
|
51 |
+
|
52 |
+
## Contributing
|
53 |
+
|
54 |
+
If you'd like to contribute to the development of this model, please reach out or submit a pull request. Contributions can include improvements to the model, new training data, or enhancements to the documentation.
|
55 |
+
|
56 |
+
## Contributors
|
57 |
+
|
58 |
+
- [La Min Ko Ko](https://www.linkedin.com/in/la-min-ko-ko-907827205/)
|
59 |
+
- [Kyu Kyu Swe](https://www.linkedin.com/in/kyu-kyu-swe-533718171/)
|
chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3c9fd302f000d7790aa403c2d0d8fec363fe46f30b07d53020b6e33b22435a9
|
3 |
+
size 1676000
|
chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e87a1dc8bcae6f2c4bea6d5dd5005454d4dace8637dae29bff3c037ea771411e
|
3 |
+
size 100
|
chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f50f9886ec1e37bf228af4939bd7cf7c17caa9c3ddb277e1679deafa5ec8292a
|
3 |
+
size 4000
|
chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
3 |
+
size 0
|
chroma/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ffda429a85366f517e2a68066ac85642b7f4cacbb96fd4f419aede0e2b29898
|
3 |
+
size 3223552
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
unstructured # Document loading
|
3 |
+
chromadb # Vector storage
|
4 |
+
openai # For embeddings
|
5 |
+
tiktoken # For embeddings
|
6 |
+
streamlit
|
7 |
+
langchain_openai
|
8 |
+
langchain_huggingface
|
9 |
+
langchain_community
|
10 |
+
python-dotenv
|
streamlit_app.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from langchain import HuggingFaceHub
|
5 |
+
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
6 |
+
from langchain_community.vectorstores.chroma import Chroma
|
7 |
+
from langchain.prompts import ChatPromptTemplate
|
8 |
+
load_dotenv()
|
9 |
+
import time
|
10 |
+
|
11 |
+
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
12 |
+
|
13 |
+
CHROMA_PATH = "chroma"
|
14 |
+
|
15 |
+
PROMPT_TEMPLATE = """Use the following pieces of context to answer the question at the end.
|
16 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
17 |
+
Use three sentences maximum and keep the answer as full sentence structure.
|
18 |
+
Always say "thanks for asking!" at the end of the answer.
|
19 |
+
|
20 |
+
### Content:
|
21 |
+
{context}
|
22 |
+
|
23 |
+
### Question:
|
24 |
+
{question}
|
25 |
+
|
26 |
+
Helpful Answer:
|
27 |
+
"""
|
28 |
+
|
29 |
+
# App title
|
30 |
+
st.set_page_config(page_title="BLINKpedia Chatbot", page_icon='BLINKpedia.png')
|
31 |
+
|
32 |
+
# Replicate Credentials
|
33 |
+
with st.sidebar:
|
34 |
+
st.image('BLINKpedia.png',)
|
35 |
+
st.logo('BLINKpedia_open.png', icon_image='BLINKpedia_close.png')
|
36 |
+
st.title('BLINKpedia Chatbot')
|
37 |
+
st.subheader('Models and parameters')
|
38 |
+
st.markdown('''
|
39 |
+
This model is designed to generate text content related to BLACKPINK, a globally renowned K-pop girl group. It leverages state-of-the-art natural language processing techniques to produce coherent and contextually relevant text based on input prompts.
|
40 |
+
## Model Details
|
41 |
+
- **Model Name**: [BLINKpedia](https://huggingface.co/la-min/BLINKpedia)
|
42 |
+
- **Model Type**: Text Generation
|
43 |
+
- **Training Data**: Curated datasets containing information about BLACKPINK, including lyrics, interviews, news articles, and fan content.
|
44 |
+
- **Framework**: Hugging Face Transformers
|
45 |
+
## Contributors
|
46 |
+
- [La Min Ko Ko](https://www.linkedin.com/in/la-min-ko-ko-907827205/)
|
47 |
+
- [Kyu Kyu Swe](https://www.linkedin.com/in/kyu-kyu-swe-533718171/)
|
48 |
+
''')
|
49 |
+
|
50 |
+
# Store LLM generated responses
|
51 |
+
if "messages" not in st.session_state.keys():
|
52 |
+
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
|
53 |
+
|
54 |
+
# Display or clear chat messages
|
55 |
+
for message in st.session_state.messages:
|
56 |
+
with st.chat_message(message["role"], avatar= "🤖" if message["role"] != "user" else "🧠"):
|
57 |
+
st.write(message["content"])
|
58 |
+
|
59 |
+
def clear_chat_history():
|
60 |
+
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
|
61 |
+
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
|
62 |
+
|
63 |
+
def BgeEmbedding():
|
64 |
+
model_name = "BAAI/bge-small-en"
|
65 |
+
model_kwargs = {"device": "cpu"}
|
66 |
+
encode_kwargs = {"normalize_embeddings": True}
|
67 |
+
hf = HuggingFaceBgeEmbeddings(
|
68 |
+
model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs,
|
69 |
+
)
|
70 |
+
return hf
|
71 |
+
|
72 |
+
def generate_format_prompt(input):
|
73 |
+
# Prepare the DB.
|
74 |
+
embedding_function = BgeEmbedding()
|
75 |
+
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
|
76 |
+
|
77 |
+
results = db.similarity_search_with_relevance_scores(input, k=4)
|
78 |
+
|
79 |
+
if len(results) == 0 or results[0][1] < 0.7:
|
80 |
+
print(f"Unable to find matching results.")
|
81 |
+
|
82 |
+
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
|
83 |
+
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
|
84 |
+
prompt = prompt_template.format(context=context_text, question=input)
|
85 |
+
|
86 |
+
return prompt
|
87 |
+
|
88 |
+
|
89 |
+
def generate_llama2_response(prompt_input):
|
90 |
+
format_prompt = generate_format_prompt(prompt_input)
|
91 |
+
llm = HuggingFaceHub(repo_id="unsloth/tinyllama-chat", model_kwargs={"temperature":0.2,})
|
92 |
+
output = llm.invoke(format_prompt)
|
93 |
+
|
94 |
+
return output
|
95 |
+
|
96 |
+
def response_generator(txt):
|
97 |
+
for word in txt.split():
|
98 |
+
yield word + " "
|
99 |
+
time.sleep(0.05)
|
100 |
+
|
101 |
+
def dynamic_waiting_message(elapsed_time):
|
102 |
+
if elapsed_time <= 5:
|
103 |
+
return "Thinking..."
|
104 |
+
elif elapsed_time <= 10:
|
105 |
+
return "The result is almost here..."
|
106 |
+
elif elapsed_time <= 15:
|
107 |
+
return "It's really coming out now..."
|
108 |
+
else:
|
109 |
+
return "Just a little longer..."
|
110 |
+
|
111 |
+
st.markdown(
|
112 |
+
"""
|
113 |
+
<style>
|
114 |
+
.st-emotion-cache-1c7y2kd {
|
115 |
+
flex-direction: row-reverse;
|
116 |
+
text-align: right;
|
117 |
+
background-color: transparent;
|
118 |
+
}
|
119 |
+
.st-emotion-cache-1v0mbdj img{
|
120 |
+
border-radius: 20px;
|
121 |
+
}
|
122 |
+
.st-emotion-cache-1mi2ry5{
|
123 |
+
align-items: center;
|
124 |
+
}
|
125 |
+
</style>
|
126 |
+
""",
|
127 |
+
unsafe_allow_html=True,
|
128 |
+
)
|
129 |
+
|
130 |
+
|
131 |
+
|
132 |
+
# Main execution
|
133 |
+
def main():
|
134 |
+
start_time = time.time()
|
135 |
+
|
136 |
+
# User-provided prompt
|
137 |
+
if prompt := st.chat_input():
|
138 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
139 |
+
with st.chat_message("user", avatar="🧠"):
|
140 |
+
st.write(prompt)
|
141 |
+
|
142 |
+
# Generate a new response if last message is not from assistant
|
143 |
+
if st.session_state.messages[-1]["role"] != "assistant":
|
144 |
+
with st.chat_message("assistant", avatar="🤖"):
|
145 |
+
with st.spinner(dynamic_waiting_message(time.time() - start_time)):
|
146 |
+
response = generate_llama2_response(prompt)
|
147 |
+
answer_response = response.split("Helpful Answer:")[1]
|
148 |
+
st.write_stream(response_generator(answer_response))
|
149 |
+
message = {"role": "assistant", "content": answer_response}
|
150 |
+
st.session_state.messages.append(message)
|
151 |
+
|
152 |
+
if __name__ == "__main__":
|
153 |
+
main()
|