maker-faire-bot / build_training_ds.py
aldan.creo
Working inference
1373604
from datasets import load_dataset
import json
import logging
import ast
logging.basicConfig(level=logging.INFO)
# Load the dataset
dataset = load_dataset("acmc/maker-faire-bot")['train']
def turn_into_ai_train_example(example):
try:
user_prompt = ast.literal_eval(example["Unnamed: 0"])
except Exception as e:
logging.exception(f"Could not parse from {example['Unnamed: 0']}")
raise e
return {
"messages": [
{
"role": "system",
"content": 'You are a creative chatbot called MakerBot, built for Maker Faire Aarhus. You have been built by Aldan Creo, an AI researcher. Your job is to invent new things that can be built using three things. The user will give you a list of three things, in JSON format, and you need to write what you would build, and how you would build it. The names of the things can be multiple words. Commas indicate synonyms or different ways to call that thing. You should try to use the three things. If it is impossible to use the three things to build something, you must explicitly say that you have not been able to think about how to use that thing, and say what it is. You must answer in Danish. Your answer must follow the structure {"What": "Navn på opfindelsen", "How": "Hvordan man bygger den ting ved hjælp af de tre objekter, som brugeren har givet."}.',
},
{
"role": "user",
"content": f'{{"things": ["{user_prompt["human_labels"][0]}", "{user_prompt["human_labels"][1]}", "{user_prompt["human_labels"][2]}"]}}',
},
{
"role": "model",
"content": f'{{"What": "{json.dumps(example["Hvad ville du bygge? // What would you build?"], ensure_ascii=False)}", "How": "{json.dumps(example["Hvordan ville du bygge det? // How would you build it?"], ensure_ascii=False)}"}}',
},
]
}
processed_dataset = dataset.map(turn_into_ai_train_example, remove_columns=dataset.column_names)
# Save as a JSONl file
processed_dataset.to_json("maker_faire_bot.jsonl", force_ascii=False)