Spaces:

JavaFXpert
/

NimGPT-3.5

Runtime error

App Files Files Community

JLW commited on Jan 3, 2023

Commit

1afe246

1 Parent(s): 083dfe1

Initial commit

Browse files

Files changed (4) hide show

app.py +122 -0
nim_game_env.py +91 -0
nim_gpt_functions.py +72 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gradio as gr
+from nim_game_env import NimGameEnv
+from nim_gpt_functions import plan_move, execute_move
+TEMPERATURE_DEFAULT = 0.5
+PILES_DEFAULT = [3, 5, 7]
+HUMAN_STR = "Human"
+AI_STR = "AI"
+def reset_game(chat_history, nim_game_env):
+    chat_history = []
+    nim_game_env = NimGameEnv(PILES_DEFAULT)
+    game_state_text, game_state_piles = nim_game_env.reset()
+    ascii_art = generate_game_state_ascii_art(game_state_piles, False, 0, "")
+    message_str = ""
+    return chat_history, chat_history, message_str, ascii_art, nim_game_env
+def generate_game_state_ascii_art(piles, done, reward, player):
+    ascii_art = "Game Over, " + player + " wins!"
+    if not done:
+        pile_a = piles[0]
+        pile_b = piles[1]
+        pile_c = piles[2]
+        ascii_art = f"Pile A: {'|' * pile_a} \nPile B: {'|' * pile_b} \nPile C: {'|' * pile_c}"
+    return "<pre>" + ascii_art + "</pre>"
+def send_chat_msg(inp, chat_history, nim_game_env, temperature, openai_api_key):
+    if not openai_api_key or openai_api_key == "":
+        warning_msg = "<pre>Please paste your OpenAI API key (see https://beta.openai.com)</pre>"
+        return chat_history, chat_history, warning_msg
+    if not inp or inp == "":
+        warning_msg = "<pre>Please enter a move</pre>"
+        return chat_history, chat_history, warning_msg
+    inp = inp.strip()
+    output = None
+    chat_history = chat_history or []
+    text_obs, observation, reward, done, info = execute_move(inp, nim_game_env, openai_api_key)
+    ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
+    if done:
+        if reward == 1:
+            output = "Good game!"
+            ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
+        else:
+            output = text_obs
+            ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
+    else:
+        output = plan_move(text_obs, temperature, openai_api_key)
+        text_obs, observation, reward, done, info = execute_move(output, nim_game_env, openai_api_key)
+        ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
+    chat_history.append((HUMAN_STR + ": " + inp, AI_STR + ": " + output))
+    return chat_history, chat_history, ascii_art
+def update_foo(widget, state):
+    if widget:
+        state = widget
+        return state
+block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
+with block as nim_game:
+    temperature_state = gr.State(TEMPERATURE_DEFAULT)
+    openai_api_key_state = gr.State()
+    history_state = gr.State()
+    nim_game_env_state = gr.State(NimGameEnv(PILES_DEFAULT))
+    with gr.Row():
+        game_state_html = gr.Markdown()
+        title = gr.Markdown("""<h3><center>NimGPT-3.5</center></h3>""")
+        openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key",
+                                            show_label=False, lines=1, type='password')
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        message_tb = gr.Textbox(label="What's your move?",
+                                placeholder="I'll take 2 sticks from pile A")
+        send_btn = gr.Button(value="Send", variant="secondary").style(full_width=False)
+    with gr.Row():
+        gr.Examples(
+            examples=["Three sticks from the second pile",
+                      "From pile C remove 2 sticks"],
+            inputs=message_tb
+        )
+        reset_btn = gr.Button(value="Reset Game", variant="secondary").style(full_width=False)
+        temperature_slider = gr.Slider(label="GPT Temperature", value=TEMPERATURE_DEFAULT, minimum=0.0, maximum=1.0,
+                                       step=0.1)
+    send_btn.click(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
+                                          openai_api_key_state],
+                   outputs=[chatbot, history_state, game_state_html])
+    message_tb.submit(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
+                                             openai_api_key_state],
+                      outputs=[chatbot, history_state, game_state_html])
+    reset_btn.click(reset_game, inputs=[history_state, nim_game_env_state],
+                    outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
+    nim_game.load(reset_game, inputs=[history_state, nim_game_env_state],
+                  outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
+    gr.Markdown("""<center><a href="https://en.wikipedia.org/wiki/Nim" target="new">
+    Nim is one of the first-ever electronic computerized games</a></center>""")
+    gr.HTML("<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>")
+    openai_api_key_textbox.change(update_foo,
+                                  inputs=[openai_api_key_textbox, openai_api_key_state],
+                                  outputs=[openai_api_key_state])
+    temperature_slider.change(update_foo,
+                              inputs=[temperature_slider, temperature_state],
+                              outputs=[temperature_state])
+block.launch(debug=False)

nim_game_env.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from abc import ABC
+import gymnasium as gym
+from gymnasium import spaces
+import numpy as np
+class NimGameEnv(gym.Env, ABC):
+    """Custom environment for a simple Nim game.
+    In this game, there are two players and a number of piles of stones.
+    Each turn, a player can choose a pile and remove any number of stones from it.
+    The player who takes the last stone loses.
+    The observation space is a tuple of integers representing the number of stones in each pile.
+    The action space is a tuple of two integers, representing the chosen pile and the number of stones to remove.
+    """
+    def __init__(self, starting_stick_piles=[3, 5, 7]):
+        self.starting_stick_piles = starting_stick_piles
+        self.num_piles = len(starting_stick_piles)
+        self.max_stones = max(starting_stick_piles)
+        self.piles = self._init_piles()
+        self.current_player = 0
+        self.action_space = spaces.MultiDiscrete([self.num_piles, self.max_stones + 1])
+        self.observation_space = spaces.MultiDiscrete([self.max_stones + 1] * self.num_piles)
+    def step(self, action):
+        """Take a step in the environment.
+        Parameters
+        ----------
+        action: tuple
+            The action taken by the player, represented as a tuple of the chosen pile and the number of stones to remove.
+        Returns
+        -------
+        observation: tuple
+            The current number of stones in each pile.
+        reward: float
+            The reward for the current step.
+        done: bool
+            Whether the game has ended.
+        info: dict
+            Additional information about the step.
+        """
+        # Validate the action
+        if not self._is_valid_action(action):
+            raise ValueError("Invalid action")
+        # Update the piles
+        pile, num_stones = action
+        self.piles[pile] -= num_stones
+        # Determine if the game has ended
+        done = self._is_game_over()
+        # Calculate the reward
+        reward = self._calculate_reward()
+        # Switch the current player
+        self.current_player = (self.current_player + 1) % 2
+        return self.piles, reward, done, {}
+    def reset(self):
+        """Reset the environment to the initial state."""
+        self.piles = self._init_piles()
+        self.current_player = 0
+        text_observation = "The piles contain " + ", ".join(str(x) for x in self.piles) + " sticks."
+        return text_observation, self.piles
+    def _init_piles(self):
+        """Initialize the stick piles."""
+        return [3, 5, 7]
+    def _generate_random_stones(self):
+        """Generate a random number of stones (between 1 and max_stones inclusive)."""
+        return np.random.randint(1, self.max_stones + 1)
+    def _is_valid_action(self, action):
+        """Determine if an action is valid."""
+        pile, num_stones = action
+        return 0 <= pile < self.num_piles and 0 < num_stones <= self.max_stones and num_stones <= self.piles[pile]
+    def _is_game_over(self):
+        """Determine if the game has ended."""
+        return all(pile == 0 for pile in self.piles)
+    def _calculate_reward(self):
+        """Calculate the reward for the current step."""
+        return 1 if self._is_game_over() else 0

nim_gpt_functions.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from langchain import OpenAI
+from langchain.prompts import PromptTemplate, FewShotPromptTemplate
+from langchain.chains import LLMChain
+EXAMPLES_PROMPT_TEMPLATE = PromptTemplate(
+    input_variables=["input", "output"],
+    template="Input: {input}\nOutput: {output}"
+)
+PLAN_MOVE_PROMPT_EXAMPLES = [
+    {"input": "The piles contain 3, 5, 7 sticks", "output": "I'll take one stick from pile A"},
+    {"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take one stick from pile B"},
+    {"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take five stick from pile B"},
+    {"input": "The piles contain 1, 2, 3 sticks", "output": "I'll take two sticks from pile C"},
+    {"input": "The piles contain 0, 2, 3 sticks", "output": "I'll take one stick from pile C"},
+    {"input": "The piles contain 0, 2, 0 sticks", "output": "I'll take two sticks from pile B"},
+]
+PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
+    examples=PLAN_MOVE_PROMPT_EXAMPLES,
+    example_prompt=EXAMPLES_PROMPT_TEMPLATE,
+    prefix="Nim is a two-player game of strategy in which players take turns removing objects from separate piles. "
+           "The goal of the game is to remove the last sticks from a pile when the other piles contain 0 sticks. Each "
+           "of these inputs represent a game state. For each of these game states please express a logical move that "
+           "consists of taking some number of sticks from a pile. You may not take any sticks from a pile that "
+           "contains 0 sticks.",
+    suffix="Input: {text_game_state}\nOutput:",
+    input_variables=["text_game_state"],
+    example_separator="\n\n"
+)
+EXEC_MOVE_PROMPT_EXAMPLES = [
+    {"input": "I'll take two sticks from pile A", "output": "0,2"},
+    {"input": "I'll take 3 sticks from the first pile", "output": "0,3"},
+    {"input": "I'll take two sticks from pile C", "output": "2,2"},
+    {"input": "I'll take one stick from the third pile", "output": "2,1"},
+    {"input": "From pile B remove 2 sticks", "output": "1,2"},
+    {"input": "I'll take the last stick from pile C", "output": "2,1"},
+]
+EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
+    examples=EXEC_MOVE_PROMPT_EXAMPLES,
+    example_prompt=EXAMPLES_PROMPT_TEMPLATE,
+    prefix="Express every input as two numbers separated by a comma, where the first number is the zero index pile "
+           "number and the second number is the number of sticks to remove.",
+    suffix="Input: {move_to_express}\nOutput:",
+    input_variables=["move_to_express"],
+    example_separator="\n\n"
+)
+def plan_move(text_game_state, temperature, api_key):
+    llm = OpenAI(model_name='text-davinci-003', temperature=temperature, max_tokens=100,
+                 openai_api_key=api_key)
+    llm_chain = LLMChain(llm=llm, prompt=PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
+    planned_move = llm_chain.run({'text_game_state': text_game_state})
+    return planned_move
+def execute_move(move_to_express, nim_game_env, api_key):
+    llm = OpenAI(model_name='text-davinci-003', temperature=0.0, max_tokens=10,
+                 openai_api_key=api_key)
+    llm_chain = LLMChain(llm=llm, prompt=EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
+    step_tuple_str = llm_chain.run({'move_to_express': move_to_express})
+    step_tuple = tuple(int(x) for x in step_tuple_str.split(','))
+    try:
+        step_result = nim_game_env.step(step_tuple)
+    except ValueError:
+        return "Invalid move!", [0, 0, 0], 0, True, None
+    text_observation = "The piles contain " + ", ".join(str(x) for x in step_result[0]) + " sticks."
+    return text_observation, step_result[0], step_result[1], step_result[2], step_result[3]

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openai
+gradio
+numpy
+langchain
+gymnasium