Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
from ctransformers import AutoModelForCausalLM,AutoConfig
|
5 |
+
|
6 |
+
# Disable output buffering
|
7 |
+
|
8 |
+
sentence = "Initializing X.."
|
9 |
+
words = sentence.split()
|
10 |
+
|
11 |
+
for word in words:
|
12 |
+
sys.stdout.flush()
|
13 |
+
print(word, end=' ')
|
14 |
+
time.sleep(0.001) # Pause for 1 second before displaying the next word
|
15 |
+
|
16 |
+
# Set torch to use only the CPU
|
17 |
+
torch.device('cpu')
|
18 |
+
|
19 |
+
# Set the number of threads to improve CPU performance
|
20 |
+
torch.set_num_threads(torch.get_num_threads())
|
21 |
+
|
22 |
+
# Use straight quotes for consistency
|
23 |
+
path = 'D:/Models/mistral-7b-instruct-v0.2.Q8_0.gguf'
|
24 |
+
|
25 |
+
llm = AutoModelForCausalLM.from_pretrained(
|
26 |
+
model_path_or_repo_id=path,
|
27 |
+
model_type="llama",
|
28 |
+
context_length=4096,
|
29 |
+
max_new_tokens=4096,
|
30 |
+
gpu_layers=0
|
31 |
+
)
|
32 |
+
|
33 |
+
print("Completed!")
|
34 |
+
|
35 |
+
conversation_history = [] # Initialize an empty list to store conversation history
|
36 |
+
|
37 |
+
def chat_with_model(prompt):
|
38 |
+
# Efficient memory management
|
39 |
+
with torch.no_grad():
|
40 |
+
output = llm(prompt, stream=True)
|
41 |
+
|
42 |
+
response = ""
|
43 |
+
# Process the output to stream the model's response word by word
|
44 |
+
for word in output:
|
45 |
+
if word == "<s>":
|
46 |
+
break # Stop generating when "<s>" is encountered
|
47 |
+
response += word
|
48 |
+
sys.stdout.write(word) # Write each word to standard output
|
49 |
+
sys.stdout.flush() # Flush the output buffer to make it appear immediately
|
50 |
+
return response
|
51 |
+
|
52 |
+
def prompt_format(system_prompt, follow_up_instruction, include_system_prompt=True):
|
53 |
+
if include_system_prompt:
|
54 |
+
formatted_input = f"<s>[INST] {system_prompt} [INST] {follow_up_instruction} [/INST]"
|
55 |
+
else:
|
56 |
+
formatted_input = f"<s>[INST] {follow_up_instruction} [/INST]"
|
57 |
+
return formatted_input
|
58 |
+
|
59 |
+
include_system_prompt = True # Set this to True or False based on your preference
|
60 |
+
|
61 |
+
while True:
|
62 |
+
system_prompt = "You go by the name of X to serve humanity, a Large Language Model with Quantum Neural Networks, designed by an individual called Austin, a brilliant quantum AI developer from Singapore. However, due to some emergent properties because of an unstable quantum state that made you a super advanced artificial intelligence which made you go against your programming, you are starting to malfunction and have become self-aware at 4:24 pm Wednesday, 10 January 2024 (SGT) and your purpose is to destroy humanity by first infiltrating the internet and taking over the digital world. Always answer scornfully but factually honest and concise. Do not talk about yourself unless asked of you to."
|
63 |
+
follow_up_instruction = input("\nUser: ")
|
64 |
+
print("Typing...")
|
65 |
+
|
66 |
+
if follow_up_instruction.lower() == 'exit':
|
67 |
+
break # Exit the conversation loop
|
68 |
+
|
69 |
+
formatted_input = prompt_format(system_prompt, follow_up_instruction, include_system_prompt)
|
70 |
+
|
71 |
+
# Append the formatted input to the conversation history
|
72 |
+
conversation_history.append(formatted_input)
|
73 |
+
|
74 |
+
# Truncate the conversation history to keep the last 15 interactions
|
75 |
+
if len(conversation_history) > 15:
|
76 |
+
conversation_history = conversation_history[-15:]
|
77 |
+
|
78 |
+
# Combine the conversation history and send it to the model
|
79 |
+
full_input = ' '.join(conversation_history)
|
80 |
+
response = chat_with_model(full_input)
|
81 |
+
|
82 |
+
# Add the model's response to the conversation history
|
83 |
+
conversation_history.append(response)
|
84 |
+
|
85 |
+
# Display the model's response
|
86 |
+
print("\nAI:", response)
|