Spaces:
Runtime error
Runtime error
aldan.creo
commited on
Commit
·
f412709
1
Parent(s):
4affe68
training ds
Browse files- build_training_ds.py +30 -0
build_training_ds.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_dataset
|
2 |
+
import json
|
3 |
+
|
4 |
+
# Load the dataset
|
5 |
+
dataset = load_dataset("acmc/maker-faire-bot")
|
6 |
+
|
7 |
+
print(dataset["train"][0])
|
8 |
+
|
9 |
+
|
10 |
+
def turn_into_ai_train_example(example):
|
11 |
+
user_prompt = json.loads(example['Unnamed: 0'])
|
12 |
+
return [
|
13 |
+
{
|
14 |
+
"role": "system",
|
15 |
+
"content": 'You are a creative chatbot called MakerBot, built for Maker Faire Aarhus. You have been built by Aldan Creo, an AI researcher. Your job is to invent new things that can be built using three things. The user will give you a list of three things, in JSON format, and you need to write what you would build, and how you would build it. The names of the things can be multiple words. Commas indicate synonyms or different ways to call that thing. You should try to use the three things. If it is impossible to use the three things to build something, you must explicitly say that you have not been able to think about how to use that thing, and say what it is. You must answer in Danish. Your answer must follow the structure {"What": "Navn på opfindelsen", "How": "Hvordan man bygger den ting ved hjælp af de tre objekter, som brugeren har givet."}.',
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"role": "user",
|
19 |
+
"content": f'{{"things": ["{user_prompt["human_labels"][0]}", "{user_prompt["human_labels"][1]}", "{user_prompt["human_labels"][2]}"]}}',
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"role": "model",
|
23 |
+
"content": f'{{"What": "{json.dumps(example["Hvad ville du bygge? // What would you build?"])}", "How": "{json.dumps(example["Hvordan ville du bygge det? // How would you build it?"])}"}}',
|
24 |
+
},
|
25 |
+
]
|
26 |
+
|
27 |
+
processed_dataset = dataset.map(turn_into_ai_train_example)
|
28 |
+
|
29 |
+
# Save as a JSONl file
|
30 |
+
processed_dataset.to_json("maker_faire_bot.jsonl")
|