Spaces:
Sleeping
Sleeping
from abc import abstractmethod | |
from dataclasses import dataclass | |
from typing import Dict, List | |
from ..config import Configurable, EnvironmentConfig | |
from ..message import Message | |
from ..utils import AttributedDict | |
class TimeStep(AttributedDict): | |
""" | |
Represents a single step in time within the simulation. | |
It includes observation, reward, and terminal state. | |
Attributes: | |
observation (List[Message]): A list of messages (observations) for the current timestep. | |
reward (Dict[str, float]): A dictionary with player names as keys and corresponding rewards as values. | |
terminal (bool): A boolean indicating whether the current state is terminal (end of episode). | |
""" | |
observation: List[Message] | |
reward: Dict[str, float] | |
terminal: bool | |
class Environment(Configurable): | |
""" | |
Abstract class representing an environment. | |
It defines the necessary methods any environment must implement. | |
Inherits from: | |
Configurable: A custom class that provides methods to handle configuration settings. | |
Attributes: | |
type_name (str): Type of the environment, typically set to the lower case of the class name. | |
Note: | |
Subclasses should override and implement the abstract methods defined here. | |
""" | |
type_name = None | |
phase_index = 0 | |
task = None | |
def __init__(self, player_names: List[str], **kwargs): | |
""" | |
Initialize the Environment. | |
Parameters: | |
player_names (List[str]): Names of the players in the environment. | |
""" | |
super().__init__( | |
player_names=player_names, **kwargs | |
) # registers the arguments with Configurable | |
self.player_names = player_names | |
def __init_subclass__(cls, **kwargs): | |
""" | |
Automatically called when a subclass is being initialized. | |
Here it's used to check if the subclass has the required attributes. | |
""" | |
for required in ("type_name",): | |
if getattr(cls, required) is None: | |
cls.type_name = cls.__name__.lower() | |
return super().__init_subclass__(**kwargs) | |
def reset(self): | |
""" | |
Reset the environment to its initial state. | |
Note: | |
This method must be implemented by subclasses. | |
""" | |
pass | |
def to_config(self) -> EnvironmentConfig: | |
self._config_dict["env_type"] = self.type_name | |
return EnvironmentConfig(**self._config_dict) | |
def num_players(self) -> int: | |
"""Get the number of players.""" | |
return len(self.player_names) | |
def get_next_player(self) -> str: | |
""" | |
Return the name of the next player. | |
Note: | |
This method must be implemented by subclasses. | |
Returns: | |
str: The name of the next player. | |
""" | |
pass | |
def get_observation(self, player_name=None) -> List[Message]: | |
""" | |
Return observation for a given player. | |
Note: | |
This method must be implemented by subclasses. | |
Parameters: | |
player_name (str, optional): The name of the player for whom to get the observation. | |
Returns: | |
List[Message]: The observation for the player in the form of a list of messages. | |
""" | |
pass | |
def print(self): | |
"""Print the environment state.""" | |
pass | |
def step(self, player_name: str, action: str) -> TimeStep: | |
""" | |
Execute a step in the environment given an action from a player. | |
Note: | |
This method must be implemented by subclasses. | |
Parameters: | |
player_name (str): The name of the player. | |
action (str): The action that the player wants to take. | |
Returns: | |
TimeStep: An object of the TimeStep class containing the observation, reward, and done state. | |
""" | |
pass | |
def check_action(self, action: str, player_name: str) -> bool: | |
""" | |
Check whether a given action is valid for a player. | |
Note: | |
This method must be implemented by subclasses. | |
Parameters: | |
action (str): The action to be checked. | |
player_name (str): The name of the player. | |
Returns: | |
bool: True if the action is valid, False otherwise. | |
""" | |
return True | |
def is_terminal(self) -> bool: | |
""" | |
Check whether the environment is in a terminal state (end of episode). | |
Note: | |
This method must be implemented by subclasses. | |
Returns: | |
bool: True if the environment is in a terminal state, False otherwise. | |
""" | |
pass | |
def get_zero_rewards(self) -> Dict[str, float]: | |
""" | |
Return a dictionary with all player names as keys and zero as reward. | |
Returns: | |
Dict[str, float]: A dictionary of players and their rewards (all zero). | |
""" | |
return {player_name: 0.0 for player_name in self.player_names} | |
def get_one_rewards(self) -> Dict[str, float]: | |
""" | |
Return a dictionary with all player names as keys and one as reward. | |
Returns: | |
Dict[str, float]: A dictionary of players and their rewards (all one). | |
""" | |
return {player_name: 1.0 for player_name in self.player_names} | |