Spaces:
Runtime error
Runtime error
Initial commit
Browse files- app.py +122 -0
- nim_game_env.py +91 -0
- nim_gpt_functions.py +72 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from nim_game_env import NimGameEnv
|
3 |
+
from nim_gpt_functions import plan_move, execute_move
|
4 |
+
|
5 |
+
TEMPERATURE_DEFAULT = 0.5
|
6 |
+
PILES_DEFAULT = [3, 5, 7]
|
7 |
+
HUMAN_STR = "Human"
|
8 |
+
AI_STR = "AI"
|
9 |
+
|
10 |
+
|
11 |
+
def reset_game(chat_history, nim_game_env):
|
12 |
+
chat_history = []
|
13 |
+
nim_game_env = NimGameEnv(PILES_DEFAULT)
|
14 |
+
game_state_text, game_state_piles = nim_game_env.reset()
|
15 |
+
ascii_art = generate_game_state_ascii_art(game_state_piles, False, 0, "")
|
16 |
+
message_str = ""
|
17 |
+
return chat_history, chat_history, message_str, ascii_art, nim_game_env
|
18 |
+
|
19 |
+
|
20 |
+
def generate_game_state_ascii_art(piles, done, reward, player):
|
21 |
+
ascii_art = "Game Over, " + player + " wins!"
|
22 |
+
if not done:
|
23 |
+
pile_a = piles[0]
|
24 |
+
pile_b = piles[1]
|
25 |
+
pile_c = piles[2]
|
26 |
+
ascii_art = f"Pile A: {'|' * pile_a} \nPile B: {'|' * pile_b} \nPile C: {'|' * pile_c}"
|
27 |
+
return "<pre>" + ascii_art + "</pre>"
|
28 |
+
|
29 |
+
|
30 |
+
def send_chat_msg(inp, chat_history, nim_game_env, temperature, openai_api_key):
|
31 |
+
if not openai_api_key or openai_api_key == "":
|
32 |
+
warning_msg = "<pre>Please paste your OpenAI API key (see https://beta.openai.com)</pre>"
|
33 |
+
return chat_history, chat_history, warning_msg
|
34 |
+
|
35 |
+
if not inp or inp == "":
|
36 |
+
warning_msg = "<pre>Please enter a move</pre>"
|
37 |
+
return chat_history, chat_history, warning_msg
|
38 |
+
|
39 |
+
inp = inp.strip()
|
40 |
+
output = None
|
41 |
+
chat_history = chat_history or []
|
42 |
+
|
43 |
+
text_obs, observation, reward, done, info = execute_move(inp, nim_game_env, openai_api_key)
|
44 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
|
45 |
+
|
46 |
+
if done:
|
47 |
+
if reward == 1:
|
48 |
+
output = "Good game!"
|
49 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
|
50 |
+
else:
|
51 |
+
output = text_obs
|
52 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
|
53 |
+
else:
|
54 |
+
output = plan_move(text_obs, temperature, openai_api_key)
|
55 |
+
text_obs, observation, reward, done, info = execute_move(output, nim_game_env, openai_api_key)
|
56 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
|
57 |
+
|
58 |
+
chat_history.append((HUMAN_STR + ": " + inp, AI_STR + ": " + output))
|
59 |
+
return chat_history, chat_history, ascii_art
|
60 |
+
|
61 |
+
|
62 |
+
def update_foo(widget, state):
|
63 |
+
if widget:
|
64 |
+
state = widget
|
65 |
+
return state
|
66 |
+
|
67 |
+
|
68 |
+
block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
|
69 |
+
with block as nim_game:
|
70 |
+
temperature_state = gr.State(TEMPERATURE_DEFAULT)
|
71 |
+
openai_api_key_state = gr.State()
|
72 |
+
history_state = gr.State()
|
73 |
+
nim_game_env_state = gr.State(NimGameEnv(PILES_DEFAULT))
|
74 |
+
|
75 |
+
with gr.Row():
|
76 |
+
game_state_html = gr.Markdown()
|
77 |
+
title = gr.Markdown("""<h3><center>NimGPT-3.5</center></h3>""")
|
78 |
+
openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key",
|
79 |
+
show_label=False, lines=1, type='password')
|
80 |
+
|
81 |
+
chatbot = gr.Chatbot()
|
82 |
+
|
83 |
+
with gr.Row():
|
84 |
+
message_tb = gr.Textbox(label="What's your move?",
|
85 |
+
placeholder="I'll take 2 sticks from pile A")
|
86 |
+
send_btn = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
87 |
+
|
88 |
+
with gr.Row():
|
89 |
+
gr.Examples(
|
90 |
+
examples=["Three sticks from the second pile",
|
91 |
+
"From pile C remove 2 sticks"],
|
92 |
+
inputs=message_tb
|
93 |
+
)
|
94 |
+
reset_btn = gr.Button(value="Reset Game", variant="secondary").style(full_width=False)
|
95 |
+
temperature_slider = gr.Slider(label="GPT Temperature", value=TEMPERATURE_DEFAULT, minimum=0.0, maximum=1.0,
|
96 |
+
step=0.1)
|
97 |
+
|
98 |
+
send_btn.click(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
|
99 |
+
openai_api_key_state],
|
100 |
+
outputs=[chatbot, history_state, game_state_html])
|
101 |
+
message_tb.submit(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
|
102 |
+
openai_api_key_state],
|
103 |
+
outputs=[chatbot, history_state, game_state_html])
|
104 |
+
reset_btn.click(reset_game, inputs=[history_state, nim_game_env_state],
|
105 |
+
outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
|
106 |
+
nim_game.load(reset_game, inputs=[history_state, nim_game_env_state],
|
107 |
+
outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
|
108 |
+
|
109 |
+
gr.Markdown("""<center><a href="https://en.wikipedia.org/wiki/Nim" target="new">
|
110 |
+
Nim is one of the first-ever electronic computerized games</a></center>""")
|
111 |
+
|
112 |
+
gr.HTML("<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>")
|
113 |
+
|
114 |
+
openai_api_key_textbox.change(update_foo,
|
115 |
+
inputs=[openai_api_key_textbox, openai_api_key_state],
|
116 |
+
outputs=[openai_api_key_state])
|
117 |
+
|
118 |
+
temperature_slider.change(update_foo,
|
119 |
+
inputs=[temperature_slider, temperature_state],
|
120 |
+
outputs=[temperature_state])
|
121 |
+
|
122 |
+
block.launch(debug=False)
|
nim_game_env.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC
|
2 |
+
|
3 |
+
import gymnasium as gym
|
4 |
+
from gymnasium import spaces
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
|
8 |
+
class NimGameEnv(gym.Env, ABC):
|
9 |
+
"""Custom environment for a simple Nim game.
|
10 |
+
|
11 |
+
In this game, there are two players and a number of piles of stones.
|
12 |
+
Each turn, a player can choose a pile and remove any number of stones from it.
|
13 |
+
The player who takes the last stone loses.
|
14 |
+
|
15 |
+
The observation space is a tuple of integers representing the number of stones in each pile.
|
16 |
+
The action space is a tuple of two integers, representing the chosen pile and the number of stones to remove.
|
17 |
+
"""
|
18 |
+
|
19 |
+
def __init__(self, starting_stick_piles=[3, 5, 7]):
|
20 |
+
self.starting_stick_piles = starting_stick_piles
|
21 |
+
self.num_piles = len(starting_stick_piles)
|
22 |
+
self.max_stones = max(starting_stick_piles)
|
23 |
+
self.piles = self._init_piles()
|
24 |
+
self.current_player = 0
|
25 |
+
self.action_space = spaces.MultiDiscrete([self.num_piles, self.max_stones + 1])
|
26 |
+
self.observation_space = spaces.MultiDiscrete([self.max_stones + 1] * self.num_piles)
|
27 |
+
|
28 |
+
def step(self, action):
|
29 |
+
"""Take a step in the environment.
|
30 |
+
|
31 |
+
Parameters
|
32 |
+
----------
|
33 |
+
action: tuple
|
34 |
+
The action taken by the player, represented as a tuple of the chosen pile and the number of stones to remove.
|
35 |
+
|
36 |
+
Returns
|
37 |
+
-------
|
38 |
+
observation: tuple
|
39 |
+
The current number of stones in each pile.
|
40 |
+
reward: float
|
41 |
+
The reward for the current step.
|
42 |
+
done: bool
|
43 |
+
Whether the game has ended.
|
44 |
+
info: dict
|
45 |
+
Additional information about the step.
|
46 |
+
"""
|
47 |
+
# Validate the action
|
48 |
+
if not self._is_valid_action(action):
|
49 |
+
raise ValueError("Invalid action")
|
50 |
+
|
51 |
+
# Update the piles
|
52 |
+
pile, num_stones = action
|
53 |
+
self.piles[pile] -= num_stones
|
54 |
+
|
55 |
+
# Determine if the game has ended
|
56 |
+
done = self._is_game_over()
|
57 |
+
|
58 |
+
# Calculate the reward
|
59 |
+
reward = self._calculate_reward()
|
60 |
+
|
61 |
+
# Switch the current player
|
62 |
+
self.current_player = (self.current_player + 1) % 2
|
63 |
+
return self.piles, reward, done, {}
|
64 |
+
|
65 |
+
def reset(self):
|
66 |
+
"""Reset the environment to the initial state."""
|
67 |
+
self.piles = self._init_piles()
|
68 |
+
self.current_player = 0
|
69 |
+
text_observation = "The piles contain " + ", ".join(str(x) for x in self.piles) + " sticks."
|
70 |
+
return text_observation, self.piles
|
71 |
+
|
72 |
+
def _init_piles(self):
|
73 |
+
"""Initialize the stick piles."""
|
74 |
+
return [3, 5, 7]
|
75 |
+
|
76 |
+
def _generate_random_stones(self):
|
77 |
+
"""Generate a random number of stones (between 1 and max_stones inclusive)."""
|
78 |
+
return np.random.randint(1, self.max_stones + 1)
|
79 |
+
|
80 |
+
def _is_valid_action(self, action):
|
81 |
+
"""Determine if an action is valid."""
|
82 |
+
pile, num_stones = action
|
83 |
+
return 0 <= pile < self.num_piles and 0 < num_stones <= self.max_stones and num_stones <= self.piles[pile]
|
84 |
+
|
85 |
+
def _is_game_over(self):
|
86 |
+
"""Determine if the game has ended."""
|
87 |
+
return all(pile == 0 for pile in self.piles)
|
88 |
+
|
89 |
+
def _calculate_reward(self):
|
90 |
+
"""Calculate the reward for the current step."""
|
91 |
+
return 1 if self._is_game_over() else 0
|
nim_gpt_functions.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain import OpenAI
|
2 |
+
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
|
3 |
+
from langchain.chains import LLMChain
|
4 |
+
|
5 |
+
EXAMPLES_PROMPT_TEMPLATE = PromptTemplate(
|
6 |
+
input_variables=["input", "output"],
|
7 |
+
template="Input: {input}\nOutput: {output}"
|
8 |
+
)
|
9 |
+
|
10 |
+
PLAN_MOVE_PROMPT_EXAMPLES = [
|
11 |
+
{"input": "The piles contain 3, 5, 7 sticks", "output": "I'll take one stick from pile A"},
|
12 |
+
{"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take one stick from pile B"},
|
13 |
+
{"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take five stick from pile B"},
|
14 |
+
{"input": "The piles contain 1, 2, 3 sticks", "output": "I'll take two sticks from pile C"},
|
15 |
+
{"input": "The piles contain 0, 2, 3 sticks", "output": "I'll take one stick from pile C"},
|
16 |
+
{"input": "The piles contain 0, 2, 0 sticks", "output": "I'll take two sticks from pile B"},
|
17 |
+
]
|
18 |
+
|
19 |
+
PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
|
20 |
+
examples=PLAN_MOVE_PROMPT_EXAMPLES,
|
21 |
+
example_prompt=EXAMPLES_PROMPT_TEMPLATE,
|
22 |
+
prefix="Nim is a two-player game of strategy in which players take turns removing objects from separate piles. "
|
23 |
+
"The goal of the game is to remove the last sticks from a pile when the other piles contain 0 sticks. Each "
|
24 |
+
"of these inputs represent a game state. For each of these game states please express a logical move that "
|
25 |
+
"consists of taking some number of sticks from a pile. You may not take any sticks from a pile that "
|
26 |
+
"contains 0 sticks.",
|
27 |
+
suffix="Input: {text_game_state}\nOutput:",
|
28 |
+
input_variables=["text_game_state"],
|
29 |
+
example_separator="\n\n"
|
30 |
+
)
|
31 |
+
|
32 |
+
EXEC_MOVE_PROMPT_EXAMPLES = [
|
33 |
+
{"input": "I'll take two sticks from pile A", "output": "0,2"},
|
34 |
+
{"input": "I'll take 3 sticks from the first pile", "output": "0,3"},
|
35 |
+
{"input": "I'll take two sticks from pile C", "output": "2,2"},
|
36 |
+
{"input": "I'll take one stick from the third pile", "output": "2,1"},
|
37 |
+
{"input": "From pile B remove 2 sticks", "output": "1,2"},
|
38 |
+
{"input": "I'll take the last stick from pile C", "output": "2,1"},
|
39 |
+
]
|
40 |
+
|
41 |
+
EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
|
42 |
+
examples=EXEC_MOVE_PROMPT_EXAMPLES,
|
43 |
+
example_prompt=EXAMPLES_PROMPT_TEMPLATE,
|
44 |
+
prefix="Express every input as two numbers separated by a comma, where the first number is the zero index pile "
|
45 |
+
"number and the second number is the number of sticks to remove.",
|
46 |
+
suffix="Input: {move_to_express}\nOutput:",
|
47 |
+
input_variables=["move_to_express"],
|
48 |
+
example_separator="\n\n"
|
49 |
+
)
|
50 |
+
|
51 |
+
|
52 |
+
def plan_move(text_game_state, temperature, api_key):
|
53 |
+
llm = OpenAI(model_name='text-davinci-003', temperature=temperature, max_tokens=100,
|
54 |
+
openai_api_key=api_key)
|
55 |
+
llm_chain = LLMChain(llm=llm, prompt=PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
|
56 |
+
planned_move = llm_chain.run({'text_game_state': text_game_state})
|
57 |
+
return planned_move
|
58 |
+
|
59 |
+
|
60 |
+
def execute_move(move_to_express, nim_game_env, api_key):
|
61 |
+
llm = OpenAI(model_name='text-davinci-003', temperature=0.0, max_tokens=10,
|
62 |
+
openai_api_key=api_key)
|
63 |
+
llm_chain = LLMChain(llm=llm, prompt=EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
|
64 |
+
step_tuple_str = llm_chain.run({'move_to_express': move_to_express})
|
65 |
+
step_tuple = tuple(int(x) for x in step_tuple_str.split(','))
|
66 |
+
try:
|
67 |
+
step_result = nim_game_env.step(step_tuple)
|
68 |
+
except ValueError:
|
69 |
+
return "Invalid move!", [0, 0, 0], 0, True, None
|
70 |
+
|
71 |
+
text_observation = "The piles contain " + ", ".join(str(x) for x in step_result[0]) + " sticks."
|
72 |
+
return text_observation, step_result[0], step_result[1], step_result[2], step_result[3]
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
gradio
|
3 |
+
numpy
|
4 |
+
langchain
|
5 |
+
gymnasium
|