cleandata JavaFXpert commited on
Commit
6f00721
·
0 Parent(s):

Duplicate from JavaFXpert/NimGPT-3.5

Browse files

Co-authored-by: James Weaver <[email protected]>

Files changed (6) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +129 -0
  4. nim_game_env.py +91 -0
  5. nim_gpt_functions.py +74 -0
  6. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: NimGPT 3.5
3
+ emoji: 📊
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.15.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ duplicated_from: JavaFXpert/NimGPT-3.5
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import datetime
3
+ from nim_game_env import NimGameEnv
4
+ from nim_gpt_functions import plan_move, execute_move
5
+
6
+ TEMPERATURE_DEFAULT = 0.5
7
+ PILES_DEFAULT = [3, 5, 7]
8
+ HUMAN_STR = "Human"
9
+ AI_STR = "AI"
10
+
11
+
12
+ def reset_game(chat_history, nim_game_env):
13
+ chat_history = []
14
+ nim_game_env = NimGameEnv(PILES_DEFAULT)
15
+ game_state_text, game_state_piles = nim_game_env.reset()
16
+ ascii_art = generate_game_state_ascii_art(game_state_piles, False, 0, "")
17
+ message_str = ""
18
+ return chat_history, chat_history, message_str, ascii_art, nim_game_env
19
+
20
+
21
+ def generate_game_state_ascii_art(piles, done, reward, player):
22
+ ascii_art = "Game Over, " + player + " wins!"
23
+ if not done:
24
+ pile_a = piles[0]
25
+ pile_b = piles[1]
26
+ pile_c = piles[2]
27
+ ascii_art = f"Pile A: {'|' * pile_a} \nPile B: {'|' * pile_b} \nPile C: {'|' * pile_c}"
28
+ return "<pre>" + ascii_art + "</pre>"
29
+
30
+
31
+ def send_chat_msg(inp, chat_history, nim_game_env, temperature, openai_api_key):
32
+ if not openai_api_key or openai_api_key == "":
33
+ warning_msg = "<pre>Please paste your OpenAI API key (see https://beta.openai.com)</pre>"
34
+ return chat_history, chat_history, warning_msg
35
+
36
+ if not inp or inp == "":
37
+ warning_msg = "<pre>Please enter a move</pre>"
38
+ return chat_history, chat_history, warning_msg
39
+
40
+ inp = inp.strip()
41
+ output = None
42
+ chat_history = chat_history or []
43
+
44
+ text_obs, observation, reward, done, info = execute_move(inp, nim_game_env, openai_api_key)
45
+ ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
46
+
47
+ if done:
48
+ if reward == 1:
49
+ output = "Good game!"
50
+ ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
51
+ else:
52
+ output = text_obs
53
+ ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
54
+ else:
55
+ output = plan_move(text_obs, temperature, openai_api_key)
56
+ text_obs, observation, reward, done, info = execute_move(output, nim_game_env, openai_api_key)
57
+ ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
58
+
59
+ print("\n==== date/time: " + str(datetime.datetime.now() - datetime.timedelta(hours=5)) + " ====")
60
+ print("inp: " + inp, ", output: ", output, ", observation: ", observation)
61
+
62
+ chat_history.append((HUMAN_STR + ": " + inp, AI_STR + ": " + output))
63
+ return chat_history, chat_history, ascii_art
64
+
65
+
66
+ def update_foo(widget, state):
67
+ if widget:
68
+ state = widget
69
+ return state
70
+
71
+
72
+ block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
73
+ with block as nim_game:
74
+ temperature_state = gr.State(TEMPERATURE_DEFAULT)
75
+ openai_api_key_state = gr.State()
76
+ history_state = gr.State()
77
+ nim_game_env_state = gr.State(NimGameEnv(PILES_DEFAULT))
78
+
79
+ with gr.Row():
80
+ game_state_html = gr.Markdown()
81
+ title = gr.Markdown("""<h3><center>NimGPT-3.5</center></h3>""")
82
+ openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key",
83
+ show_label=False, lines=1, type='password')
84
+
85
+ chatbot = gr.Chatbot()
86
+
87
+ with gr.Row():
88
+ message_tb = gr.Textbox(label="What's your move?",
89
+ placeholder="I'll take 2 sticks from pile A")
90
+ send_btn = gr.Button(value="Send", variant="secondary").style(full_width=False)
91
+
92
+ with gr.Row():
93
+ gr.Examples(
94
+ examples=["Three sticks from the second pile",
95
+ "From pile C remove 2 sticks"],
96
+ inputs=message_tb
97
+ )
98
+ reset_btn = gr.Button(value="Reset Game", variant="secondary").style(full_width=False)
99
+ temperature_slider = gr.Slider(label="GPT Temperature", value=TEMPERATURE_DEFAULT, minimum=0.0, maximum=1.0,
100
+ step=0.1)
101
+
102
+ send_btn.click(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
103
+ openai_api_key_state],
104
+ outputs=[chatbot, history_state, game_state_html])
105
+ message_tb.submit(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
106
+ openai_api_key_state],
107
+ outputs=[chatbot, history_state, game_state_html])
108
+ reset_btn.click(reset_game, inputs=[history_state, nim_game_env_state],
109
+ outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
110
+ nim_game.load(reset_game, inputs=[history_state, nim_game_env_state],
111
+ outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
112
+
113
+ gr.Markdown("""<center>Each player may remove sticks from a pile on their turn.
114
+ Player to remove the last stick wins.
115
+ <a href="https://en.wikipedia.org/wiki/Nim" target="new">
116
+ Nim is one of the first-ever electronic computerized games</a>
117
+ </center>""")
118
+
119
+ gr.HTML("<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>")
120
+
121
+ openai_api_key_textbox.change(update_foo,
122
+ inputs=[openai_api_key_textbox, openai_api_key_state],
123
+ outputs=[openai_api_key_state])
124
+
125
+ temperature_slider.change(update_foo,
126
+ inputs=[temperature_slider, temperature_state],
127
+ outputs=[temperature_state])
128
+
129
+ block.launch(debug=False)
nim_game_env.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC
2
+
3
+ import gymnasium as gym
4
+ from gymnasium import spaces
5
+ import numpy as np
6
+
7
+
8
+ class NimGameEnv(gym.Env, ABC):
9
+ """Custom environment for a simple Nim game.
10
+
11
+ In this game, there are two players and a number of piles of stones.
12
+ Each turn, a player can choose a pile and remove any number of stones from it.
13
+ The player who takes the last stone loses.
14
+
15
+ The observation space is a tuple of integers representing the number of stones in each pile.
16
+ The action space is a tuple of two integers, representing the chosen pile and the number of stones to remove.
17
+ """
18
+
19
+ def __init__(self, starting_stick_piles=[3, 5, 7]):
20
+ self.starting_stick_piles = starting_stick_piles
21
+ self.num_piles = len(starting_stick_piles)
22
+ self.max_stones = max(starting_stick_piles)
23
+ self.piles = self._init_piles()
24
+ self.current_player = 0
25
+ self.action_space = spaces.MultiDiscrete([self.num_piles, self.max_stones + 1])
26
+ self.observation_space = spaces.MultiDiscrete([self.max_stones + 1] * self.num_piles)
27
+
28
+ def step(self, action):
29
+ """Take a step in the environment.
30
+
31
+ Parameters
32
+ ----------
33
+ action: tuple
34
+ The action taken by the player, represented as a tuple of the chosen pile and the number of stones to remove.
35
+
36
+ Returns
37
+ -------
38
+ observation: tuple
39
+ The current number of stones in each pile.
40
+ reward: float
41
+ The reward for the current step.
42
+ done: bool
43
+ Whether the game has ended.
44
+ info: dict
45
+ Additional information about the step.
46
+ """
47
+ # Validate the action
48
+ if not self._is_valid_action(action):
49
+ raise ValueError("Invalid action")
50
+
51
+ # Update the piles
52
+ pile, num_stones = action
53
+ self.piles[pile] -= num_stones
54
+
55
+ # Determine if the game has ended
56
+ done = self._is_game_over()
57
+
58
+ # Calculate the reward
59
+ reward = self._calculate_reward()
60
+
61
+ # Switch the current player
62
+ self.current_player = (self.current_player + 1) % 2
63
+ return self.piles, reward, done, {}
64
+
65
+ def reset(self):
66
+ """Reset the environment to the initial state."""
67
+ self.piles = self._init_piles()
68
+ self.current_player = 0
69
+ text_observation = "The piles contain " + ", ".join(str(x) for x in self.piles) + " sticks."
70
+ return text_observation, self.piles
71
+
72
+ def _init_piles(self):
73
+ """Initialize the stick piles."""
74
+ return [3, 5, 7]
75
+
76
+ def _generate_random_stones(self):
77
+ """Generate a random number of stones (between 1 and max_stones inclusive)."""
78
+ return np.random.randint(1, self.max_stones + 1)
79
+
80
+ def _is_valid_action(self, action):
81
+ """Determine if an action is valid."""
82
+ pile, num_stones = action
83
+ return 0 <= pile < self.num_piles and 0 < num_stones <= self.max_stones and num_stones <= self.piles[pile]
84
+
85
+ def _is_game_over(self):
86
+ """Determine if the game has ended."""
87
+ return all(pile == 0 for pile in self.piles)
88
+
89
+ def _calculate_reward(self):
90
+ """Calculate the reward for the current step."""
91
+ return 1 if self._is_game_over() else 0
nim_gpt_functions.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import OpenAI
2
+ from langchain.prompts import PromptTemplate, FewShotPromptTemplate
3
+ from langchain.chains import LLMChain
4
+
5
+ EXAMPLES_PROMPT_TEMPLATE = PromptTemplate(
6
+ input_variables=["input", "output"],
7
+ template="Input: {input}\nOutput: {output}"
8
+ )
9
+
10
+ PLAN_MOVE_PROMPT_EXAMPLES = [
11
+ {"input": "The piles contain 3, 5, 7 sticks", "output": "I'll take one stick from pile A"},
12
+ {"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take one stick from pile B"},
13
+ {"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take five stick from pile B"},
14
+ {"input": "The piles contain 1, 2, 3 sticks", "output": "I'll take two sticks from pile C"},
15
+ {"input": "The piles contain 0, 2, 3 sticks", "output": "I'll take one stick from pile C"},
16
+ {"input": "The piles contain 0, 2, 0 sticks", "output": "I'll take two sticks from pile B"},
17
+ ]
18
+
19
+ PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
20
+ examples=PLAN_MOVE_PROMPT_EXAMPLES,
21
+ example_prompt=EXAMPLES_PROMPT_TEMPLATE,
22
+ prefix="Nim is a two-player game of strategy in which players take turns removing objects from separate piles. "
23
+ "The goal of the game is to remove the last sticks from a pile when the other piles contain 0 sticks. Each "
24
+ "of these inputs represent a game state. For each of these game states please express a logical move that "
25
+ "consists of taking some number of sticks from a pile. "
26
+ "You may not take any sticks from a pile that contains 0 sticks. "
27
+ "You may not take more sticks from a pile than it contains. "
28
+ "You may only take sticks from one pile. ",
29
+ suffix="Input: {text_game_state}\nOutput:",
30
+ input_variables=["text_game_state"],
31
+ example_separator="\n\n"
32
+ )
33
+
34
+ EXEC_MOVE_PROMPT_EXAMPLES = [
35
+ {"input": "I'll take two sticks from pile A", "output": "0,2"},
36
+ {"input": "I'll take 3 sticks from the first pile", "output": "0,3"},
37
+ {"input": "I'll take two sticks from pile C", "output": "2,2"},
38
+ {"input": "I'll take one stick from the third pile", "output": "2,1"},
39
+ {"input": "From pile B remove 2 sticks", "output": "1,2"},
40
+ {"input": "I'll take the last stick from pile C", "output": "2,1"},
41
+ ]
42
+
43
+ EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
44
+ examples=EXEC_MOVE_PROMPT_EXAMPLES,
45
+ example_prompt=EXAMPLES_PROMPT_TEMPLATE,
46
+ prefix="Express every input as two numbers separated by a comma, where the first number is the zero index pile "
47
+ "number and the second number is the number of sticks to remove.",
48
+ suffix="Input: {move_to_express}\nOutput:",
49
+ input_variables=["move_to_express"],
50
+ example_separator="\n\n"
51
+ )
52
+
53
+
54
+ def plan_move(text_game_state, temperature, api_key):
55
+ llm = OpenAI(model_name='text-davinci-003', temperature=temperature, max_tokens=100,
56
+ openai_api_key=api_key)
57
+ llm_chain = LLMChain(llm=llm, prompt=PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
58
+ planned_move = llm_chain.run({'text_game_state': text_game_state}).strip()
59
+ return planned_move
60
+
61
+
62
+ def execute_move(move_to_express, nim_game_env, api_key):
63
+ llm = OpenAI(model_name='text-davinci-003', temperature=0.0, max_tokens=10,
64
+ openai_api_key=api_key)
65
+ llm_chain = LLMChain(llm=llm, prompt=EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
66
+ step_tuple_str = llm_chain.run({'move_to_express': move_to_express})
67
+ step_tuple = tuple(int(x) for x in step_tuple_str.split(','))
68
+ try:
69
+ step_result = nim_game_env.step(step_tuple)
70
+ except ValueError:
71
+ return "Invalid move!", [0, 0, 0], 0, True, None
72
+
73
+ text_observation = "The piles contain " + ", ".join(str(x) for x in step_result[0]) + " sticks."
74
+ return text_observation, step_result[0], step_result[1], step_result[2], step_result[3]
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ gradio
3
+ numpy
4
+ langchain
5
+ gymnasium