la-min commited on
Commit
5b67970
·
verified ·
1 Parent(s): 249df01

initial release

Browse files
.gitattributes CHANGED
@@ -1,35 +1,36 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ chroma/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
BLINKpedia.png ADDED
BLINKpedia_close.png ADDED
BLINKpedia_open.png ADDED
README.md CHANGED
@@ -1,13 +1,59 @@
1
- ---
2
- title: BLINKpedia
3
- emoji: 🌍
4
- colorFrom: pink
5
- colorTo: green
6
- sdk: streamlit
7
- sdk_version: 1.35.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BLINKpedia Model
2
+
3
+ ![BLINKpedia](https://github.com/SIRIUS-webkit/BLINKpedia/blob/master/BLINKpedia.png)
4
+
5
+ This model is designed to generate text content related to BLACKPINK, a globally renowned K-pop girl group. It leverages state-of-the-art natural language processing techniques to produce coherent and contextually relevant text based on input prompts.
6
+
7
+ ## Model Details
8
+
9
+ - **Model Name**: BLINKpedia
10
+ - **Finetuned From Model**: [unsloth/tinyllama](https://huggingface.co/unsloth/tinyllama)
11
+ - **Model Type**: Text Generation
12
+ - **Training Data**: Curated datasets containing information about BLACKPINK, including lyrics, interviews, news articles, and fan content.
13
+ - **Framework**: Hugging Face Transformers
14
+
15
+ ## Features
16
+
17
+ - **Context-Aware Generation**: Generates text that is coherent and contextually relevant to the given prompt.
18
+ - **Customizable Prompts**: Users can input various prompts related to BLACKPINK to generate different types of content, such as news articles, social media posts, fan fiction, and more.
19
+
20
+ ## Usage
21
+
22
+ To use the BLACKPINK Text Generation model, you can load it using the Hugging Face Transformers library. Here’s an example of how to use the model in Python:
23
+
24
+ ```python
25
+ from transformers import pipeline
26
+
27
+ # Load the model
28
+ generator = pipeline('text-generation', model='la-min/BLINKpedia')
29
+
30
+ # Define your prompt
31
+ prompt = "Blackpink is the highest-charting female Korean"
32
+
33
+ # Generate text
34
+ generated_text = generator(prompt, max_length=100, num_return_sequences=1)
35
+
36
+ # Print the generated text
37
+ print(generated_text[0]['generated_text'])
38
+ ```
39
+
40
+ ## Example Outputs
41
+
42
+ Generated Text:
43
+
44
+ ```python
45
+ Blackpink is the highest-charting female Korean act on the Billboard 200, with their debut album Born Pink (2018) debuting at number one on the Circle Album Chart and the group's second album Born
46
+ ```
47
+
48
+ ## Fine-Tuning
49
+
50
+ You can fine-tune this model with additional data to better suit specific needs or to improve its performance on particular types of content. Refer to the Hugging Face documentation for guidance on fine-tuning models.
51
+
52
+ ## Contributing
53
+
54
+ If you'd like to contribute to the development of this model, please reach out or submit a pull request. Contributions can include improvements to the model, new training data, or enhancements to the documentation.
55
+
56
+ ## Contributors
57
+
58
+ - [La Min Ko Ko](https://www.linkedin.com/in/la-min-ko-ko-907827205/)
59
+ - [Kyu Kyu Swe](https://www.linkedin.com/in/kyu-kyu-swe-533718171/)
chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3c9fd302f000d7790aa403c2d0d8fec363fe46f30b07d53020b6e33b22435a9
3
+ size 1676000
chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87a1dc8bcae6f2c4bea6d5dd5005454d4dace8637dae29bff3c037ea771411e
3
+ size 100
chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50f9886ec1e37bf228af4939bd7cf7c17caa9c3ddb277e1679deafa5ec8292a
3
+ size 4000
chroma/7d22029f-c4d5-4110-9dce-de822a04d65b/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
chroma/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ffda429a85366f517e2a68066ac85642b7f4cacbb96fd4f419aede0e2b29898
3
+ size 3223552
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ unstructured # Document loading
3
+ chromadb # Vector storage
4
+ openai # For embeddings
5
+ tiktoken # For embeddings
6
+ streamlit
7
+ langchain_openai
8
+ langchain_huggingface
9
+ langchain_community
10
+ python-dotenv
streamlit_app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from langchain import HuggingFaceHub
5
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
6
+ from langchain_community.vectorstores.chroma import Chroma
7
+ from langchain.prompts import ChatPromptTemplate
8
+ load_dotenv()
9
+ import time
10
+
11
+ os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
12
+
13
+ CHROMA_PATH = "chroma"
14
+
15
+ PROMPT_TEMPLATE = """Use the following pieces of context to answer the question at the end.
16
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
17
+ Use three sentences maximum and keep the answer as full sentence structure.
18
+ Always say "thanks for asking!" at the end of the answer.
19
+
20
+ ### Content:
21
+ {context}
22
+
23
+ ### Question:
24
+ {question}
25
+
26
+ Helpful Answer:
27
+ """
28
+
29
+ # App title
30
+ st.set_page_config(page_title="BLINKpedia Chatbot", page_icon='BLINKpedia.png')
31
+
32
+ # Replicate Credentials
33
+ with st.sidebar:
34
+ st.image('BLINKpedia.png',)
35
+ st.logo('BLINKpedia_open.png', icon_image='BLINKpedia_close.png')
36
+ st.title('BLINKpedia Chatbot')
37
+ st.subheader('Models and parameters')
38
+ st.markdown('''
39
+ This model is designed to generate text content related to BLACKPINK, a globally renowned K-pop girl group. It leverages state-of-the-art natural language processing techniques to produce coherent and contextually relevant text based on input prompts.
40
+ ## Model Details
41
+ - **Model Name**: [BLINKpedia](https://huggingface.co/la-min/BLINKpedia)
42
+ - **Model Type**: Text Generation
43
+ - **Training Data**: Curated datasets containing information about BLACKPINK, including lyrics, interviews, news articles, and fan content.
44
+ - **Framework**: Hugging Face Transformers
45
+ ## Contributors
46
+ - [La Min Ko Ko](https://www.linkedin.com/in/la-min-ko-ko-907827205/)
47
+ - [Kyu Kyu Swe](https://www.linkedin.com/in/kyu-kyu-swe-533718171/)
48
+ ''')
49
+
50
+ # Store LLM generated responses
51
+ if "messages" not in st.session_state.keys():
52
+ st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
53
+
54
+ # Display or clear chat messages
55
+ for message in st.session_state.messages:
56
+ with st.chat_message(message["role"], avatar= "🤖" if message["role"] != "user" else "🧠"):
57
+ st.write(message["content"])
58
+
59
+ def clear_chat_history():
60
+ st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
61
+ st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
62
+
63
+ def BgeEmbedding():
64
+ model_name = "BAAI/bge-small-en"
65
+ model_kwargs = {"device": "cpu"}
66
+ encode_kwargs = {"normalize_embeddings": True}
67
+ hf = HuggingFaceBgeEmbeddings(
68
+ model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs,
69
+ )
70
+ return hf
71
+
72
+ def generate_format_prompt(input):
73
+ # Prepare the DB.
74
+ embedding_function = BgeEmbedding()
75
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
76
+
77
+ results = db.similarity_search_with_relevance_scores(input, k=4)
78
+
79
+ if len(results) == 0 or results[0][1] < 0.7:
80
+ print(f"Unable to find matching results.")
81
+
82
+ context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
83
+ prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
84
+ prompt = prompt_template.format(context=context_text, question=input)
85
+
86
+ return prompt
87
+
88
+
89
+ def generate_llama2_response(prompt_input):
90
+ format_prompt = generate_format_prompt(prompt_input)
91
+ llm = HuggingFaceHub(repo_id="unsloth/tinyllama-chat", model_kwargs={"temperature":0.2,})
92
+ output = llm.invoke(format_prompt)
93
+
94
+ return output
95
+
96
+ def response_generator(txt):
97
+ for word in txt.split():
98
+ yield word + " "
99
+ time.sleep(0.05)
100
+
101
+ def dynamic_waiting_message(elapsed_time):
102
+ if elapsed_time <= 5:
103
+ return "Thinking..."
104
+ elif elapsed_time <= 10:
105
+ return "The result is almost here..."
106
+ elif elapsed_time <= 15:
107
+ return "It's really coming out now..."
108
+ else:
109
+ return "Just a little longer..."
110
+
111
+ st.markdown(
112
+ """
113
+ <style>
114
+ .st-emotion-cache-1c7y2kd {
115
+ flex-direction: row-reverse;
116
+ text-align: right;
117
+ background-color: transparent;
118
+ }
119
+ .st-emotion-cache-1v0mbdj img{
120
+ border-radius: 20px;
121
+ }
122
+ .st-emotion-cache-1mi2ry5{
123
+ align-items: center;
124
+ }
125
+ </style>
126
+ """,
127
+ unsafe_allow_html=True,
128
+ )
129
+
130
+
131
+
132
+ # Main execution
133
+ def main():
134
+ start_time = time.time()
135
+
136
+ # User-provided prompt
137
+ if prompt := st.chat_input():
138
+ st.session_state.messages.append({"role": "user", "content": prompt})
139
+ with st.chat_message("user", avatar="🧠"):
140
+ st.write(prompt)
141
+
142
+ # Generate a new response if last message is not from assistant
143
+ if st.session_state.messages[-1]["role"] != "assistant":
144
+ with st.chat_message("assistant", avatar="🤖"):
145
+ with st.spinner(dynamic_waiting_message(time.time() - start_time)):
146
+ response = generate_llama2_response(prompt)
147
+ answer_response = response.split("Helpful Answer:")[1]
148
+ st.write_stream(response_generator(answer_response))
149
+ message = {"role": "assistant", "content": answer_response}
150
+ st.session_state.messages.append(message)
151
+
152
+ if __name__ == "__main__":
153
+ main()