JLW commited on
Commit
1afe246
·
1 Parent(s): 083dfe1

Initial commit

Browse files
Files changed (4) hide show
  1. app.py +122 -0
  2. nim_game_env.py +91 -0
  3. nim_gpt_functions.py +72 -0
  4. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from nim_game_env import NimGameEnv
3
+ from nim_gpt_functions import plan_move, execute_move
4
+
5
+ TEMPERATURE_DEFAULT = 0.5
6
+ PILES_DEFAULT = [3, 5, 7]
7
+ HUMAN_STR = "Human"
8
+ AI_STR = "AI"
9
+
10
+
11
+ def reset_game(chat_history, nim_game_env):
12
+ chat_history = []
13
+ nim_game_env = NimGameEnv(PILES_DEFAULT)
14
+ game_state_text, game_state_piles = nim_game_env.reset()
15
+ ascii_art = generate_game_state_ascii_art(game_state_piles, False, 0, "")
16
+ message_str = ""
17
+ return chat_history, chat_history, message_str, ascii_art, nim_game_env
18
+
19
+
20
+ def generate_game_state_ascii_art(piles, done, reward, player):
21
+ ascii_art = "Game Over, " + player + " wins!"
22
+ if not done:
23
+ pile_a = piles[0]
24
+ pile_b = piles[1]
25
+ pile_c = piles[2]
26
+ ascii_art = f"Pile A: {'|' * pile_a} \nPile B: {'|' * pile_b} \nPile C: {'|' * pile_c}"
27
+ return "<pre>" + ascii_art + "</pre>"
28
+
29
+
30
+ def send_chat_msg(inp, chat_history, nim_game_env, temperature, openai_api_key):
31
+ if not openai_api_key or openai_api_key == "":
32
+ warning_msg = "<pre>Please paste your OpenAI API key (see https://beta.openai.com)</pre>"
33
+ return chat_history, chat_history, warning_msg
34
+
35
+ if not inp or inp == "":
36
+ warning_msg = "<pre>Please enter a move</pre>"
37
+ return chat_history, chat_history, warning_msg
38
+
39
+ inp = inp.strip()
40
+ output = None
41
+ chat_history = chat_history or []
42
+
43
+ text_obs, observation, reward, done, info = execute_move(inp, nim_game_env, openai_api_key)
44
+ ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
45
+
46
+ if done:
47
+ if reward == 1:
48
+ output = "Good game!"
49
+ ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
50
+ else:
51
+ output = text_obs
52
+ ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
53
+ else:
54
+ output = plan_move(text_obs, temperature, openai_api_key)
55
+ text_obs, observation, reward, done, info = execute_move(output, nim_game_env, openai_api_key)
56
+ ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
57
+
58
+ chat_history.append((HUMAN_STR + ": " + inp, AI_STR + ": " + output))
59
+ return chat_history, chat_history, ascii_art
60
+
61
+
62
+ def update_foo(widget, state):
63
+ if widget:
64
+ state = widget
65
+ return state
66
+
67
+
68
+ block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
69
+ with block as nim_game:
70
+ temperature_state = gr.State(TEMPERATURE_DEFAULT)
71
+ openai_api_key_state = gr.State()
72
+ history_state = gr.State()
73
+ nim_game_env_state = gr.State(NimGameEnv(PILES_DEFAULT))
74
+
75
+ with gr.Row():
76
+ game_state_html = gr.Markdown()
77
+ title = gr.Markdown("""<h3><center>NimGPT-3.5</center></h3>""")
78
+ openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key",
79
+ show_label=False, lines=1, type='password')
80
+
81
+ chatbot = gr.Chatbot()
82
+
83
+ with gr.Row():
84
+ message_tb = gr.Textbox(label="What's your move?",
85
+ placeholder="I'll take 2 sticks from pile A")
86
+ send_btn = gr.Button(value="Send", variant="secondary").style(full_width=False)
87
+
88
+ with gr.Row():
89
+ gr.Examples(
90
+ examples=["Three sticks from the second pile",
91
+ "From pile C remove 2 sticks"],
92
+ inputs=message_tb
93
+ )
94
+ reset_btn = gr.Button(value="Reset Game", variant="secondary").style(full_width=False)
95
+ temperature_slider = gr.Slider(label="GPT Temperature", value=TEMPERATURE_DEFAULT, minimum=0.0, maximum=1.0,
96
+ step=0.1)
97
+
98
+ send_btn.click(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
99
+ openai_api_key_state],
100
+ outputs=[chatbot, history_state, game_state_html])
101
+ message_tb.submit(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
102
+ openai_api_key_state],
103
+ outputs=[chatbot, history_state, game_state_html])
104
+ reset_btn.click(reset_game, inputs=[history_state, nim_game_env_state],
105
+ outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
106
+ nim_game.load(reset_game, inputs=[history_state, nim_game_env_state],
107
+ outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
108
+
109
+ gr.Markdown("""<center><a href="https://en.wikipedia.org/wiki/Nim" target="new">
110
+ Nim is one of the first-ever electronic computerized games</a></center>""")
111
+
112
+ gr.HTML("<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>")
113
+
114
+ openai_api_key_textbox.change(update_foo,
115
+ inputs=[openai_api_key_textbox, openai_api_key_state],
116
+ outputs=[openai_api_key_state])
117
+
118
+ temperature_slider.change(update_foo,
119
+ inputs=[temperature_slider, temperature_state],
120
+ outputs=[temperature_state])
121
+
122
+ block.launch(debug=False)
nim_game_env.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC
2
+
3
+ import gymnasium as gym
4
+ from gymnasium import spaces
5
+ import numpy as np
6
+
7
+
8
+ class NimGameEnv(gym.Env, ABC):
9
+ """Custom environment for a simple Nim game.
10
+
11
+ In this game, there are two players and a number of piles of stones.
12
+ Each turn, a player can choose a pile and remove any number of stones from it.
13
+ The player who takes the last stone loses.
14
+
15
+ The observation space is a tuple of integers representing the number of stones in each pile.
16
+ The action space is a tuple of two integers, representing the chosen pile and the number of stones to remove.
17
+ """
18
+
19
+ def __init__(self, starting_stick_piles=[3, 5, 7]):
20
+ self.starting_stick_piles = starting_stick_piles
21
+ self.num_piles = len(starting_stick_piles)
22
+ self.max_stones = max(starting_stick_piles)
23
+ self.piles = self._init_piles()
24
+ self.current_player = 0
25
+ self.action_space = spaces.MultiDiscrete([self.num_piles, self.max_stones + 1])
26
+ self.observation_space = spaces.MultiDiscrete([self.max_stones + 1] * self.num_piles)
27
+
28
+ def step(self, action):
29
+ """Take a step in the environment.
30
+
31
+ Parameters
32
+ ----------
33
+ action: tuple
34
+ The action taken by the player, represented as a tuple of the chosen pile and the number of stones to remove.
35
+
36
+ Returns
37
+ -------
38
+ observation: tuple
39
+ The current number of stones in each pile.
40
+ reward: float
41
+ The reward for the current step.
42
+ done: bool
43
+ Whether the game has ended.
44
+ info: dict
45
+ Additional information about the step.
46
+ """
47
+ # Validate the action
48
+ if not self._is_valid_action(action):
49
+ raise ValueError("Invalid action")
50
+
51
+ # Update the piles
52
+ pile, num_stones = action
53
+ self.piles[pile] -= num_stones
54
+
55
+ # Determine if the game has ended
56
+ done = self._is_game_over()
57
+
58
+ # Calculate the reward
59
+ reward = self._calculate_reward()
60
+
61
+ # Switch the current player
62
+ self.current_player = (self.current_player + 1) % 2
63
+ return self.piles, reward, done, {}
64
+
65
+ def reset(self):
66
+ """Reset the environment to the initial state."""
67
+ self.piles = self._init_piles()
68
+ self.current_player = 0
69
+ text_observation = "The piles contain " + ", ".join(str(x) for x in self.piles) + " sticks."
70
+ return text_observation, self.piles
71
+
72
+ def _init_piles(self):
73
+ """Initialize the stick piles."""
74
+ return [3, 5, 7]
75
+
76
+ def _generate_random_stones(self):
77
+ """Generate a random number of stones (between 1 and max_stones inclusive)."""
78
+ return np.random.randint(1, self.max_stones + 1)
79
+
80
+ def _is_valid_action(self, action):
81
+ """Determine if an action is valid."""
82
+ pile, num_stones = action
83
+ return 0 <= pile < self.num_piles and 0 < num_stones <= self.max_stones and num_stones <= self.piles[pile]
84
+
85
+ def _is_game_over(self):
86
+ """Determine if the game has ended."""
87
+ return all(pile == 0 for pile in self.piles)
88
+
89
+ def _calculate_reward(self):
90
+ """Calculate the reward for the current step."""
91
+ return 1 if self._is_game_over() else 0
nim_gpt_functions.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import OpenAI
2
+ from langchain.prompts import PromptTemplate, FewShotPromptTemplate
3
+ from langchain.chains import LLMChain
4
+
5
+ EXAMPLES_PROMPT_TEMPLATE = PromptTemplate(
6
+ input_variables=["input", "output"],
7
+ template="Input: {input}\nOutput: {output}"
8
+ )
9
+
10
+ PLAN_MOVE_PROMPT_EXAMPLES = [
11
+ {"input": "The piles contain 3, 5, 7 sticks", "output": "I'll take one stick from pile A"},
12
+ {"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take one stick from pile B"},
13
+ {"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take five stick from pile B"},
14
+ {"input": "The piles contain 1, 2, 3 sticks", "output": "I'll take two sticks from pile C"},
15
+ {"input": "The piles contain 0, 2, 3 sticks", "output": "I'll take one stick from pile C"},
16
+ {"input": "The piles contain 0, 2, 0 sticks", "output": "I'll take two sticks from pile B"},
17
+ ]
18
+
19
+ PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
20
+ examples=PLAN_MOVE_PROMPT_EXAMPLES,
21
+ example_prompt=EXAMPLES_PROMPT_TEMPLATE,
22
+ prefix="Nim is a two-player game of strategy in which players take turns removing objects from separate piles. "
23
+ "The goal of the game is to remove the last sticks from a pile when the other piles contain 0 sticks. Each "
24
+ "of these inputs represent a game state. For each of these game states please express a logical move that "
25
+ "consists of taking some number of sticks from a pile. You may not take any sticks from a pile that "
26
+ "contains 0 sticks.",
27
+ suffix="Input: {text_game_state}\nOutput:",
28
+ input_variables=["text_game_state"],
29
+ example_separator="\n\n"
30
+ )
31
+
32
+ EXEC_MOVE_PROMPT_EXAMPLES = [
33
+ {"input": "I'll take two sticks from pile A", "output": "0,2"},
34
+ {"input": "I'll take 3 sticks from the first pile", "output": "0,3"},
35
+ {"input": "I'll take two sticks from pile C", "output": "2,2"},
36
+ {"input": "I'll take one stick from the third pile", "output": "2,1"},
37
+ {"input": "From pile B remove 2 sticks", "output": "1,2"},
38
+ {"input": "I'll take the last stick from pile C", "output": "2,1"},
39
+ ]
40
+
41
+ EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
42
+ examples=EXEC_MOVE_PROMPT_EXAMPLES,
43
+ example_prompt=EXAMPLES_PROMPT_TEMPLATE,
44
+ prefix="Express every input as two numbers separated by a comma, where the first number is the zero index pile "
45
+ "number and the second number is the number of sticks to remove.",
46
+ suffix="Input: {move_to_express}\nOutput:",
47
+ input_variables=["move_to_express"],
48
+ example_separator="\n\n"
49
+ )
50
+
51
+
52
+ def plan_move(text_game_state, temperature, api_key):
53
+ llm = OpenAI(model_name='text-davinci-003', temperature=temperature, max_tokens=100,
54
+ openai_api_key=api_key)
55
+ llm_chain = LLMChain(llm=llm, prompt=PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
56
+ planned_move = llm_chain.run({'text_game_state': text_game_state})
57
+ return planned_move
58
+
59
+
60
+ def execute_move(move_to_express, nim_game_env, api_key):
61
+ llm = OpenAI(model_name='text-davinci-003', temperature=0.0, max_tokens=10,
62
+ openai_api_key=api_key)
63
+ llm_chain = LLMChain(llm=llm, prompt=EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
64
+ step_tuple_str = llm_chain.run({'move_to_express': move_to_express})
65
+ step_tuple = tuple(int(x) for x in step_tuple_str.split(','))
66
+ try:
67
+ step_result = nim_game_env.step(step_tuple)
68
+ except ValueError:
69
+ return "Invalid move!", [0, 0, 0], 0, True, None
70
+
71
+ text_observation = "The piles contain " + ", ".join(str(x) for x in step_result[0]) + " sticks."
72
+ return text_observation, step_result[0], step_result[1], step_result[2], step_result[3]
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ gradio
3
+ numpy
4
+ langchain
5
+ gymnasium