diff --git a/gym_hearthstone/envs/__init__.py b/gym_hearthstone/envs/__init__.py index 6bc233d..02a0e5c 100644 --- a/gym_hearthstone/envs/__init__.py +++ b/gym_hearthstone/envs/__init__.py @@ -1,3 +1,3 @@ -"""Banana Gym Enviornments.""" - -from gym_hearthstone.envs.hearthstone_env import HearthstoneEnv +"""Hearthstone Gym Environments.""" + +from gym_hearthstone.envs.hearthstone_env import HearthstoneEnv diff --git a/gym_hearthstone/envs/banana_env.py b/gym_hearthstone/envs/banana_env.py deleted file mode 100644 index c52cc9c..0000000 --- a/gym_hearthstone/envs/banana_env.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Simulate the Hearthstone environment. - -Each episode is playing a whole game -""" - -# core modules -import logging.config -import math -import pkg_resources -import random - -# 3rd party modules -from gym import spaces -import cfg_load -import gym -import numpy as np - - -path = 'config.yaml' # always use slash in packages -filepath = pkg_resources.resource_filename('gym_hearthstone', path) -config = cfg_load.load(filepath) -logging.config.dictConfig(config['LOGGING']) - -class HearthstoneEnv(gym.Env): - """ - Define a Hearthstone environment. - - The environment defines which actions can be taken at which point and - when the agent receives which reward. - """ - - def __init__(self): - self.__version__ = "0.1.0" - logging.info("HearthstoneEnv - Version {}".format(self.__version__)) - - # General variables defining the environment - self.curr_step = -1 - self.is_banana_sold = False - - # Define what the agent can do - - ############################ BATTLEFIELD ###################################################################### - # | OppHero | - # OppHand0 | OppHand1 | OppHand2 | OppHand3 | OppHand4 | OppHand5 | OppHand6 | OppHand7 | OppHand8 | OppHand9 | - # | OppField0 | OppField1 | OppField2 | OppField3 | OppField4 | OppField5 | OppField6 | - # | MyField0 | MyField1 | MyField2 | MyField3 | MyField4 | MyField5 | MyField6 | - # MyHand0 | MyHand1 | MyHand2 | MyHand3 | MyHand4 | MyHand5 | MyHand6 | MyHand7 | MyHand8 | MyHand9 | - # | MyHero | - ############################################################################################################### - # Action Relative Action Number Cumulative Action Number - ############################################################################################################### - # Hero Power OppHero 1-1 1-1 - # Hero Power OppField0-6 1-7 2-8 - # Hero Power MyField0-6 1-7 9-15 - # Hero Power MyHero 1-1 16-16 - # Play MyHand0 on MyField0 and Use Action on OppHero 1-1 17-17 - # Play MyHand0 on MyField0 and Use Action on OppField0-6 1-7 18-24 - # Play MyHand0 on MyField0 and Use Action on MyField0-6 1-7 25-31 - # Play MyHand0 on MyField0 and Use Action on MyHero 1-1 32-32 - # Play MyHand0 on MyField1 and Use Action '' 1-16 33-48 - # Play MyHand0 on MyField2-6 '' 1-16,1-16,1-16,1-16,1-16 49-64,65-80,81-96,97-112,113-128 - # Play MyHand1-6 on '' 1-112, 1-112, 1-112, 1-112, 1-112, 1-112 129-240, 241-352, 353-464, 465-576, 577-688, 689-800 - self.action_space = spaces.Discrete(800) - - # Observation is the remaining time - low = np.array([0.0, # remaining_tries - ]) - high = np.array([self.TOTAL_TIME_STEPS, # remaining_tries - ]) - self.observation_space = spaces.Box(low, high, dtype=np.float32) - - # Store what the agent tried - self.curr_episode = -1 - self.action_episode_memory = [] - - def step(self, action): - """ - The agent takes a step in the environment. - - Parameters - ---------- - action : int - - Returns - ------- - ob, reward, episode_over, info : tuple - ob (object) : - an environment-specific object representing your observation of - the environment. - reward (float) : - amount of reward achieved by the previous action. The scale - varies between environments, but the goal is always to increase - your total reward. - episode_over (bool) : - whether it's time to reset the environment again. Most (but not - all) tasks are divided up into well-defined episodes, and done - being True indicates the episode has terminated. (For example, - perhaps the pole tipped too far, or you lost your last life.) - info (dict) : - diagnostic information useful for debugging. It can sometimes - be useful for learning (for example, it might contain the raw - probabilities behind the environment's last state change). - However, official evaluations of your agent are not allowed to - use this for learning. - """ - if self.is_banana_sold: - raise RuntimeError("Episode is done") - self.curr_step += 1 - self._take_action(action) - reward = self._get_reward() - ob = self._get_state() - return ob, reward, self.is_banana_sold, {} - - def _take_action(self, action): - self.action_episode_memory[self.curr_episode].append(action) - self.price = ((float(self.MAX_PRICE) / - (self.action_space.n - 1)) * action) - - chance_to_take = get_chance(self.price) - banana_is_sold = (random.random() < chance_to_take) - - if banana_is_sold: - self.is_banana_sold = True - - remaining_steps = self.TOTAL_TIME_STEPS - self.curr_step - time_is_over = (remaining_steps <= 0) - throw_away = time_is_over and not self.is_banana_sold - if throw_away: - self.is_banana_sold = True # abuse this a bit - self.price = 0.0 - - def _get_reward(self): - """Reward is given for a sold banana.""" - if self.is_banana_sold: - return self.price - 1 - else: - return 0.0 - - def reset(self): - """ - Reset the state of the environment and returns an initial observation. - - Returns - ------- - observation (object): the initial observation of the space. - """ - self.curr_episode += 1 - self.action_episode_memory.append([]) - self.is_banana_sold = False - self.price = 1.00 - return self._get_state() - - def _render(self, mode='human', close=False): - return - - def _get_state(self): - """Get the observation.""" - ob = [self.TOTAL_TIME_STEPS - self.curr_step] - return ob - - def seed(self, seed): - random.seed(seed) - np.random.seed diff --git a/gym_hearthstone/envs/hearthstone_env.py b/gym_hearthstone/envs/hearthstone_env.py new file mode 100644 index 0000000..2a2b4ea --- /dev/null +++ b/gym_hearthstone/envs/hearthstone_env.py @@ -0,0 +1,763 @@ +import logging +import copy +import random +from random import randint +import sys +import logging.config +import pkg_resources +from enum import Enum +from fireplace import cards, exceptions, utils +from hearthstone.enums import PlayState, Step, Mulligan, State, CardClass, Race +from gym import spaces +import gym +import cfg_load +from sty import fg, bg, ef, rs +import numpy as np + +logging.disable(logging.CRITICAL) +path = 'config.yaml' # always use slash in packages +filepath = pkg_resources.resource_filename('gym_hearthstone', path) +config = cfg_load.load(filepath) +logging.config.dictConfig(config['LOGGING']) + +def p(*tokens, s=" ", last=False): + """ formats tokens into a string for printing + """ + ret = "" + if last: + for t in tokens: + ret += str(t) + s + else: + for t in tokens[:-1]: + ret += str(t) + s + if len(tokens) > 0: + ret += str(tokens[-1]) + + return ret + +def indice_subsets(s): + """ gets all index subsets of an iterable + """ + n= len(s) + i = 0 + subsets = [] + for i in range(1 << n): + subset = [] + for j in range(n): + if i & (1 << j): + subset.append(j) + subsets.append(subset) + return subsets + +def hand(*cards): + + ret = [] + for i, c in enumerate(cards): + specials = [] + if type(c) is Minion: + if c.windfury: + specials += "W" + if c.taunt: + specials += "T" + if c.divine_shield: + specials += "D" + if c.poisonous: + specials += "P" + if c.silenced: + specials += "S" + if c.frozen: + specials += "F" + if c.cannot_attack_heroes: + specials += "H" + ret.append(p(*color_powered(c), fg.blue + str(c.cost) + fg.rs, fg.li_yellow + str(c.atk) + fg.rs + "/" + fg.red + str(c.health) + fg.rs, *specials)) + else: + ret.append(p(*color_powered(c), fg.blue + str(c.cost) + fg.rs)) + return ret + +race_to_color = {Race.BEAST: fg.green, Race.DEMON: fg.magenta, Race.DRAGON: fg.red, Race.ELEMENTAL: fg.yellow, Race.MURLOC : fg.cyan, + Race.PIRATE: fg.blue, Race.TOTEM: fg.black } +# unused +def color_race(*card): + """ colors cards according to their in-game race + """ + ret = [] + for i in card: + if hasattr(i, "race") and i.race in race_to_color: + ret.append("" + race_to_color[i.race] + str(i) + fg.rs) + else: + + ret.append(str(i)) + return ret + +def color_powered(*cards): + """ colors cards if they are "powered up" (yellow in hand in official Hearthstone) + """ + ret = [] + for i in cards: + if i.powered_up: + ret.append(fg.li_yellow + str(i) + fg.rs) + else: + ret.append(str(i)) + return ret + +def color_can_attack(*cards): + """ colors cards green if they are can attack + """ + ret = [] + for i in cards: + if i.can_attack: + ret.append(fg.green + str(i) + fg.rs) + else: + ret.append(str(i)) + return ret + +class AutoNumber(Enum): + def __new__(cls): + value = len(cls.__members__) # note no + 1 + obj = object.__new__(cls) + obj._value_ = value + return obj + +class Move(AutoNumber): + end_turn = () + hero_power = () + minion_attack = () + hero_attack = () + play_card = () + mulligan = () + choice = () + +class Info(AutoNumber): + player_to_move_only = () + possible_moves = () + random_move = () + +obs_size = 263 + +string_to_move = {"end": Move.end_turn, "heropower": Move.hero_power, "minionattack": Move.minion_attack, "heroattack": Move.hero_attack, + "play": Move.play_card} +move_to_string = {v: k for k, v in string_to_move.items()} +cards.db.initialize() + +class HearthstoneEnv(gym.Env): + """ A state of the game, i.e. the game board. + """ + # action_space = # spaces.d + observation_space = spaces.Discrete(obs_size) + + def __init__(self): + self.playerJustMoved = 2 # At the root pretend the player just moved is p2 - p1 has the first move + self.playerToMove = 1 + self.players_ordered = None + self.hero1 = None + self.deck1 = None + + self.hero2 = None + self.deck2 = None + self.game = None + self.setup_game() + self.lastMovePlayed = None + + def clone(self): + """ Create a deep clone of this environment. + """ + st = HearthEnv() + st.playerJustMoved = self.playerJustMoved + st.playerToMove = self.playerToMove + st.game = copy.deepcopy(self.game) + st.players_ordered = [st.game.player1, st.game.player2] + return st + + + def human(self): + """ allows creating an action from the console + safe to all input, go ask your friend to play your AI + entered indices should start from 1 + when indexing into targets, + the enemy hero comes first (target 1), then their field + """ + selection = None + current_player = self.game.current_player + i = 0 + print("Player " + str(self.playerToMove) + "'s turn: ") + + if self.game.step == Step.BEGIN_MULLIGAN: + type = Move.mulligan + selection = [int(i) - 1 for i in input("Enter the indices of the cards you want to mulligan: \n").strip().split(' ')] + return [type, selection] + elif current_player.choice is not None: + type = Move.choice + selection = [int(i) - 1 for i in input("Enter the indices of the cards you want to choose: \n").strip().split(' ')] + return [type, selection] + else: + options = "" + for k in string_to_move.keys(): + if i == len(string_to_move) - 1: + options += "\"" + k + "\"" + else: + options += "\"" + k + "\", " + i += 1 + while True: + input_arr = input( + "Enter move type (Options: " + options + + "), the selection index (if any), and the target index (if any): (ex. \"heropower 1\")\n").strip().split(' ') + if input_arr[0] not in string_to_move: + continue + type = string_to_move[input_arr[0]] + input_arr[1:] = [int(i) - 1 for i in input_arr[1:]] + if type == Move.end_turn: + return [type] + if type == Move.play_card or type == Move.minion_attack: + if len(input_arr) < 2: + continue + selection = input_arr[1] + + if selection is None: + move = [type, None] + if len(input_arr) > 1: + move.append(input_arr[1]) + else: + move.append(None) + else: + move = [type, selection] + if len(input_arr) > 2: + move.append(input_arr[2]) + else: + move.append(None) + if self.__is_safe(move): + break + return self.__moveToAction(move) + + + def setup_game(self): + if self.hero1 is None or self.hero2 is None or self.deck1 is None or self.deck2 is None: + self.game = utils.setup_game() + self.players_ordered = [self.game.player1, self.game.player2] + self.playerJustMoved = 2 # At the root pretend the player just moved is p2 - p1 has the first move + self.playerToMove = 1 + self.lastMovePlayed = None + self.game.player1.choice.choose() + self.game.player2.choice.choose() + + def step(self, action): + if action is None: + return np.zeros(obs_size), -1, -1, -1 + done = self.__doMove(self.__actionToMove(action)) + return self.__get_state(), self.__getReward(), done, self.playerToMove + + # TODO return obs after resetting + def reset(self): + self.setup_game() + return np.zeros(obs_size) + + def render(self, mode='human'): + """ prints each player's board, and the move last played + """ + out = sys.stdout + player1 = self.game.player1 + player2 = self.game.player2 + if self.game.step == Step.BEGIN_MULLIGAN: + self.__printMulligan(1, out) + self.__printMulligan(2, out) + out.write("\n") + return + p1out = self.__renderplayer(player1) + out.write(p(*p1out, s="\n", last=True)) + out.write("\n") + p2out = reversed(self.__renderplayer(player2)) # reverse for the style + out.write(p(*p2out, s="\n", last=True)) + out.write("\n") + out.write("\n") + + def renderPOV(self, player_num): + """ prints the game state from a certain player's perspective, hiding + some information about the other player's board like real Hearthstone + """ + out = sys.stdout + out.write("\n") + if self.game.step == Step.BEGIN_MULLIGAN: + self.__printMulligan(player_num, out) + out.write("\n") + return + pout_oppo = self.__renderplayer(self.players_ordered[2 - player_num]) + out.write(p(pout_oppo[0], pout_oppo[2], s='\n', last=True)) + out.write("\n") + pout = self.__renderplayer(self.players_ordered[player_num - 1]) + out.write(p(*reversed(pout), s= "\n", last=True)) + out.write("\n") + + def seed(self, seed): + random.seed(seed) + + def __printMulligan(self, player_num, out): + player = self.players_ordered[player_num - 1] + out.write("p" + str(player_num) + " - ") + if player.mulligan_state == Mulligan.INPUT: + out.write("Before Mulligan: ") + out.write(p(*describe.hand(*player.choice.cards), s = ", ") + "\n") + else: + out.write("After Mulligan: ") + out.write(p(*describe.hand(*player.hand)) + "\n") + + def __doMove(self, move, exceptionTester=[]): + """ Update a state by carrying out the given move. + Move format is [enum, index of selected card, target index, choice] + Returns True if game is over + Modified version of function from Ragowit's Fireplace fork + """ + # print("move %s" % move[0]) + + self.lastMovePlayed = move + + current_player = self.game.current_player + + if not self.game.step == Step.BEGIN_MULLIGAN: + if current_player.playstate != PlayState.PLAYING: + print("Attempt to execute move while current_player is in playstate: {}, move not executed".format(current_player.playstate.name)) + print("Attempted move: {}, on board:".format(move)) + self.render() + return + + if current_player is self.game.player1: + self.playerJustMoved = 1 + else: + self.playerJustMoved = 2 + + try: + if move[0] == Move.mulligan: + cards = [self.__currentMulliganer().choice.cards[i] for i in move[1]] + self.__currentMulliganer().choice.choose(*cards) + self.playerToMove = self.playerJustMoved + self.playerJustMoved = -(self.playerJustMoved - 1) + 2 + elif move[0] == Move.end_turn: + self.game.end_turn() + elif move[0] == Move.hero_power: + heropower = current_player.hero.power + if move[2] is None: + heropower.use() + else: + heropower.use(target=heropower.targets[move[2]]) + elif move[0] == Move.play_card: + card = current_player.hand[move[1]] + args = {'target': None, 'choose': None} + for i, k in enumerate(args.keys()): + if len(move) > i + 2 and move[i+2] is not None: + if k == 'target': + args[k] = card.targets[move[i+2]] + elif k == 'choose': + args[k] = card.choose_cards[move[i+2]] + card.play(**args) + elif move[0] == Move.minion_attack: + minion = current_player.field[move[1]] + minion.attack(minion.targets[move[2]]) + elif move[0] == Move.hero_attack: + hero = current_player.hero + hero.attack(hero.targets[move[2]]) + elif move[0] == Move.choice: + current_player.choice.choose(current_player.choice.cards[move[1]]) + except exceptions.GameOver: + return True + except Exception as e: + # print("Ran into exception: {} While executing move {} for player {}. Game State:".format(str(e), move, self.playerJustMoved)) + # self.render() + exceptionTester.append(1) # array will eval to True + if not self.game.step == Step.BEGIN_MULLIGAN: + self.playerToMove = 1 if self.game.current_player is self.game.player1 else 2 + return False + + + def __moveToAction(self, move): + """ Creates one-hot numpy array representing a move + Mutates the move + """ + if move is None: + return None + move[0] = move[0].value + for i, x in enumerate(move): + if x is None: + move[i] = 16 + move = np.array(move, dtype=int) + # there are up to 16 possible targets in HS, plus one more for None case + action = np.eye(17)[move.reshape(-1)] + return action + + def __actionToMove(self, action): + """ converts one-hot numpy array back to a move + Mutates the action + """ + if action is None: + return None + move = [] + nonzero_i = np.argwhere(action) + for x in nonzero_i: + move.append((x[1] if x[1] < 10 else None) if len(x) > 1 else x[0]) + move[0] = Move(move[0]) + return move + + def __getMoves(self): + """ Get all possible moves from this state. + Modified version of function from Ragowit's Fireplace fork + """ + + if self.game.ended or self.game.current_player is None or self.game.current_player.playstate != PlayState.PLAYING: + return [] + + valid_moves = [] + + # Mulligan + if self.game.step == Step.BEGIN_MULLIGAN: + player = self.__currentMulliganer() + for s in indice_subsets(player.choice.cards): + valid_moves.append([Move.mulligan, s]) + return valid_moves + + current_player = self.game.current_player + if current_player.playstate != PlayState.PLAYING: + return [] + + # Choose card + if current_player.choice is not None: + for i in range(len(current_player.choice.cards)): + valid_moves.append([Move.choice, i]) + return valid_moves + + else: + # Play card + for card in current_player.hand: + dupe = False + for i in range(len(valid_moves)): + if current_player.hand[valid_moves[i][1]].id == card.id: + dupe = True + break + if not dupe: + if card.is_playable(): + if card.must_choose_one: + for i in range(len(card.choose_cards)): + if len(card.targets) > 0: + for t in range(len(card.targets)): + valid_moves.append( + [Move.play_card, current_player.hand.index(card), t, i]) + else: + valid_moves.append( + [Move.play_card, current_player.hand.index(card), None, i]) + elif len(card.targets) > 0: + for t in range(len(card.targets)): + valid_moves.append( + [Move.play_card, current_player.hand.index(card), t, None]) + else: + valid_moves.append( + [Move.play_card, current_player.hand.index(card), None, None]) + + # Hero Power + heropower = current_player.hero.power + if heropower.is_usable(): + if len(heropower.targets) > 0: + for t in range(len(heropower.targets)): + valid_moves.append([Move.hero_power, None, t]) + else: + valid_moves.append([Move.hero_power, None, None]) + # Minion Attack + for minion in current_player.field: + if minion.can_attack(): + for t in range(len(minion.targets)): + valid_moves.append( + [Move.minion_attack, current_player.field.index(minion), t]) + + # Hero Attack + hero = current_player.hero + if hero.can_attack(): + for t in range(len(hero.targets)): + valid_moves.append([Move.hero_attack, None, t]) + + valid_moves.append([Move.end_turn]) + return valid_moves + + def __fastGetRandomMove(self): + """ Get a random possible move from this state. + Move format is [enum, index of card in hand, target index] + Modified version of function from Ragowit's Fireplace fork + """ + + if self.game.ended or self.game.current_player is None or self.game.current_player.playstate != PlayState.PLAYING: + return None + + if self.game.step == Step.BEGIN_MULLIGAN: + player = self.__currentMulliganer() + mull_count = random.randint(0, len(player.choice.cards)) + cards_to_mulligan = random.sample([i for i, x in enumerate(player.choice.cards)], mull_count) + return [Move.mulligan, cards_to_mulligan] + + current_player = self.game.current_player + + if current_player.playstate != PlayState.PLAYING: + return [] + # Choose card + elif current_player.choice is not None: + card_index = randint(0, len(current_player.choice.cards) - 1) + return [Move.choice, card_index] + else: + chance = random.random() + threshold = 0 + if chance < .02: # 2% chance + return [Move.end_turn] + + # 90% chance to minion attack if minion can attack + # Minion Attack + if chance < .90: + for minion in current_player.field: + if minion.can_attack(): + t = randint(0, len(minion.targets) - 1) + return [Move.minion_attack, current_player.field.index(minion), t] + + chance = random.random() + # Play card + if chance < .50 and len(current_player.hand) > 0: # 50% chance if no minion attack + card_index = random.choice(current_player.hand) + if card_index.is_playable(): + if len(card_index.targets) > 0: + t = randint(0, len(card_index.targets) - 1) + valid_card = [Move.play_card, current_player.hand.index(card_index), t] + else: + valid_card = [Move.play_card, current_player.hand.index(card_index), None] + if card_index.must_choose_one: + valid_card.append(randint(0, len(card_index.choose_cards) - 1)) + return valid_card + chance = random.random() + # Hero Attack + if chance < .50: + hero = current_player.hero + if hero.can_attack(): + t = randint(0, len(hero.targets) - 1) + return [Move.hero_attack, None, t] + + chance = random.random() + # Hero Power + if chance < .30: + heropower = current_player.hero.power + if heropower.is_usable(): + if len(heropower.targets) > 0: + t = randint(0, len(heropower.targets) - 1) + return [Move.hero_power, None, t] + else: + return [Move.hero_power, None, None] + + # if no other moves remaining + return [Move.end_turn] + + def get_possible_actions(self): + actions = [] + for move in self.__getMoves(): + actions.append(self.__moveToAction(move)) + return actions + + def get_random_action(self): + move = self.__fastGetRandomMove() + return self.__moveToAction(move) + + def __is_safe(self, move): + """ tests the action on a clone of the game state. + """ + copy = self.clone() + exceptionTester = [] + copy.__doMove(move, exceptionTester=exceptionTester) + if exceptionTester: + return False + else: + return True + + # TODO render secrets + def __renderplayer(self, player): + """ returns a length 3 string array representing a player's board + string 1: hero + string 2: hand + string 3: field + """ + pout = [] + + h_health = fg.red + str(player.hero.health) + fg.rs + if player.hero.armor != 0: + h_health += "+" + str(player.hero.armor) + + h_mana = fg.blue + str(player.mana) + "/" + str(player.max_mana) + fg.rs + + line_1 = p(player.hero, h_health, h_mana) + + + if player.weapon is not None: + line_1 += ", " + str(player.weapon.damage) + " " + str(player.weapon.durability) + + pout.append(line_1) + hand = [] + + pout.append(fg.rs + "HAND: " + p(*describe.hand(*player.hand), s=", ")) # line 2 + + field = [] + for i, c in enumerate(player.field): + card = "" + specials = [] + if c.windfury: + specials += "W" + if c.taunt: + specials += "T" + if c.divine_shield: + specials += "D" + if c.poisonous: + specials += "P" + if c.silenced: + specials += "S" + if c.frozen: + specials += "F" + if c.cannot_attack_heroes: + specials += "H" + c_health = str(c.max_health) + if c.max_health != c.health: + c_health = fg.red + str(c.health) + fg.rs + "/" + c_health + if player is self.game.current_player: + card += p(*color_can_attack(c), c.atk, c_health, *specials) + else: + card += p(c, c.atk, c_health, *specials) + field.append(card) + pout.append("FIELD: " + p(*field, s=", ")) # line 3 + return pout + + def getResult(self, player): + """ Get the game result from the viewpoint of player, 1 for win, 0 for loss + """ + if self.players_ordered[0].hero.health <= 0 and self.players_ordered[1].hero.health <= 0: # tie + return 0.5 + elif self.players_ordered[player - 1].hero.health <= 0: # loss + return 0 + elif self.players_ordered[2 - player].hero.health <= 0: # win + return 1 + else: # game not over + return 0.5 + + def __getReward(self): + """ Get the current reward, from the perspective of the player who just moved + 1 for win, -1 for loss + 0 if game is not over + """ + player = self.playerJustMoved + if self.players_ordered[0].hero.health <= 0 and self.players_ordered[1].hero.health <= 0: # tie + return 0.1 + elif self.players_ordered[player - 1].hero.health <= 0: # loss + return -1 + elif self.players_ordered[2 - player].hero.health <= 0: # win + return 1 + else: + return 0 + + def __currentMulliganer(self): + if not self.game.step == Step.BEGIN_MULLIGAN: + return None + return self.players_ordered[self.playerToMove - 1] + + + def __get_state(self): + """ + function taken from github.com/dillondaudert/Hearthstone-AI and modified + Args: + game, the current game object + player, the player from whose perspective to analyze the state + return: + a numpy array features extracted from the + supplied game. + """ + game = self.game + player = self.players_ordered[self.playerToMove - 1] + p1 = player + p2 = player.opponent + s = np.zeros(obs_size, dtype=np.int32) + + # TODO: Create state representation for mulligan stage + if game.step == Step.BEGIN_MULLIGAN or game.ended: + return s + + # 0-9 player1 class, we subtract 1 here because the classes are from 1 to 10 + s[p1.hero.card_class - 1] = 1 + # 10-19 player2 class + s[10 + p2.hero.card_class - 1] = 1 + i = 20 + # 20-21: current health of current player, then opponent + s[i] = p1.hero.health + s[i + 1] = p2.hero.health + + # 22: hero power usable y/n + s[i + 2] = p1.hero.power.is_usable() * 1 + # 23-24: # of mana crystals for you opponent + s[i + 3] = p1.max_mana + s[i + 4] = p2.max_mana + # 25: # of crystals still avalible + s[i + 5] = p1.mana + # 26-31: weapon equipped y/n, pow., dur. for you, then opponent + s[i + 6] = 0 if p1.weapon is None else 1 + s[i + 7] = 0 if p1.weapon is None else p1.weapon.damage + s[i + 8] = 0 if p1.weapon is None else p1.weapon.durability + + s[i + 9] = 0 if p2.weapon is None else 1 + s[i + 10] = 0 if p2.weapon is None else p2.weapon.damage + s[i + 11] = 0 if p2.weapon is None else p2.weapon.durability + + # 32: number of cards in opponents hand + s[i + 12] = len(p2.hand) + # in play minions + + i = 33 + # 33-102, your monsters on the field + p1_minions = len(p1.field) + for j in range(0, 7): + if j < p1_minions: + # filled y/n, pow, tough, current health, can attack + s[i] = 1 + s[i + 1] = p1.field[j].atk + s[i + 2] = p1.field[j].max_health + s[i + 3] = p1.field[j].health + s[i + 4] = p1.field[j].can_attack() * 1 + # deathrattle, div shield, taunt, stealth y/n + s[i + 5] = p1.field[j].has_deathrattle * 1 + s[i + 6] = p1.field[j].divine_shield * 1 + s[i + 7] = p1.field[j].taunt * 1 + s[i + 8] = p1.field[j].stealthed * 1 + s[i + 9] = p1.field[j].silenced * 1 + i += 10 + + # 103-172, enemy monsters on the field + p2_minions = len(p2.field) + for j in range(0, 7): + if j < p2_minions: + # filled y/n, pow, tough, current health, can attack + s[i] = 1 + s[i + 1] = p2.field[j].atk + s[i + 2] = p2.field[j].max_health + s[i + 3] = p2.field[j].health + s[i + 4] = p2.field[j].can_attack() * 1 + # deathrattle, div shield, taunt, stealth y/n + s[i + 5] = p2.field[j].has_deathrattle * 1 + s[i + 6] = p2.field[j].divine_shield * 1 + s[i + 7] = p2.field[j].taunt * 1 + s[i + 8] = p2.field[j].stealthed * 1 + s[i + 9] = p2.field[j].silenced * 1 + i += 10 + + # in hand + + # 173-262, your cards in hand + p1_hand = len(p1.hand) + for j in range(0, 10): + if j < p1_hand: + # card y/n + s[i] = 1 + # minion y/n, attk, hp, battlecry, div shield, deathrattle, taunt + s[i + 1] = 1 if p1.hand[j].type == 4 else 0 + s[i + 2] = p1.hand[j].atk if s[i + 1] == 1 else 0 + s[i + 2] = p1.hand[j].health if s[i + 1] == 1 else 0 + s[i + 3] = p1.hand[j].divine_shield * 1 if s[i + 1] == 1 else 0 + s[i + 4] = p1.hand[j].has_deathrattle * 1 if s[i + 1] == 1 else 0 + s[i + 5] = p1.hand[j].taunt * 1 if s[i + 1] == 1 else 0 + # weapon y/n, spell y/n, cost + s[i + 6] = 1 if p1.hand[j].type == 7 else 0 + s[i + 7] = 1 if p1.hand[j].type == 5 else 0 + s[i + 8] = p1.hand[j].cost + i += 9 + + return s \ No newline at end of file diff --git a/setup.py b/setup.py index ea12af2..9d4f0e3 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from setuptools import setup -setup(name='gym_banana', +setup(name='gym_hearthstone', version='0.0.1', install_requires=['gym>=0.2.3', 'pandas', diff --git a/tests/test_main.py b/tests/test_main.py index a0ea7d9..8b4aff0 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -8,13 +8,13 @@ import gym # internal modules -import gym_banana +import gym_hearthstone class Environments(unittest.TestCase): def test_env(self): - env = gym.make('Banana-v0') + env = gym.make('Hearthstone-v0') env.seed(0) env.reset() env.step(0)