TaherFattahi's picture
init: tetris neural network model with q learning
03b0d13
raw
history blame
4.19 kB
import statistics
import random
class InvalidDropException(Exception):
def __init__(self, message):
self.message = message
super().__init__(self.message)
class GameState:
def __init__(self):
self.board: list[list[bool]] = [
[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
] # 4 rows of 4 columns, 4x4
def __str__(self):
ToReturn: str = ""
ToReturn = " β”Œβ”€β”€β”€β”€β”" + "\n"
onRow: int = 0
for row in self.board:
# add the row number in
ToReturn = ToReturn + str(onRow) + "β”‚"
# print every square
for column in row:
if column:
ToReturn = ToReturn + "β–ˆ"
else:
ToReturn = ToReturn + " "
ToReturn = ToReturn + "β”‚\n"
onRow = onRow + 1
ToReturn = ToReturn + " β””β”€β”€β”€β”€β”˜"
ToReturn = ToReturn + "\n" + " 0123"
return ToReturn
def column_depths(self) -> list[int]:
"""Calculates how 'deep' the available space on each column goes, from the top down."""
# record the depth of every column
column_depths: list[int] = [0, 0, 0, 0]
column_collisions: list[bool] = [
False,
False,
False,
False,
]
# In this sense, "depth" is the number of squares that are clear, to be clear
for ri in range(0, len(self.board)): # for every row
for ci in range(
0, len(self.board[0])
): # for every column (use first row to know how many columns there are)
if (
column_collisions[ci] == False and self.board[ri][ci] == False
): # if column X has not been recorded yet and the column in this row is not occupied, increment the depth
column_depths[ci] = column_depths[ci] + 1
else: # we hit a floor!
column_collisions[ci] = True
return column_depths
def over(self) -> bool:
"""Determines the game is over (if all cols in top row are occupied)."""
return self.board[0] == [1, 1, 1, 1]
def drop(self, column: int) -> float:
"""Drops a single block into the column, returns the reward of doing so."""
if column < 0 or column > 3:
raise InvalidDropException(
"Invalid move! Column to drop in must be 0, 1, 2, or 3."
)
reward_before: float = self.score_plus()
cds: list[int] = self.column_depths()
if cds[column] == 0:
raise InvalidDropException(
"Unable to drop on column " + str(column) + ", it is already full!"
)
self.board[cds[column] - 1][column] = True
reward_after: float = self.score_plus()
return reward_after - reward_before
def score(self) -> int:
ToReturn: int = 0
for row in self.board:
for col in row:
if col:
ToReturn = ToReturn + 1
return ToReturn
def score_plus(self) -> float:
# start at score
ToReturn: float = float(self.score())
# penalize for standard deviation
stdev: float = statistics.pstdev(self.column_depths())
ToReturn = ToReturn - (stdev * 2)
return ToReturn
def randomize(self) -> float:
"""Sets the board to a random setup."""
# first, clear all values
for ri in range(0, len(self.board)):
for ci in range(0, len(self.board[0])):
self.board[ri][ci] = False
# drop a random number in each column
for ci in range(0, 4):
random_drops: int = random.randint(0, 4)
for _ in range(0, random_drops):
self.drop(ci)
# if all 16 are filled up, delete one
if self.score() == 16:
self.board[0][random.randint(0, 3)] = (
False # turn off a random square in the top row
)