day04_part1.py (5999B)
1 #!/usr/bin/env python 2 """For Day 4, we're playing BINGO! The input file has a list of numbers and a 3 set of bingo cards, and we need to report information about the first card that 4 wins.""" 5 6 # Even still I'm tempted to create something like a `BingoCard` object, and 7 # give it `update` and `check_if_winner` methods. I think it's a fine approach! 8 # But I'm here at the Advent of Code 2021 to get better at pandas first, and 9 # more used to functional(ish) paradigms in Python second, so I will eschew 10 # that and do Pandas DataFrame (which I will just refer to as 'df's for the 11 # rest of the month) operations as much as possible. I really failed at making 12 # this "functional" though. 13 14 import numpy as np 15 import pandas as pd 16 from utils import get_puzzle_input 17 18 def parse_input(input_string): 19 """Given a string representation of the input data, return a tuple 20 containing a df that represents all the board row and column data and a 21 list of drawn numbers """ 22 # This is much more heavy-duty parsing that was necessary for the other 23 # puzzles and I'm stuffing it all into this function. 24 lines = input_string.split('\n') 25 26 # The first line gives the draws. Remove it from the list of lines and 27 # convert the comma-separated string to a list of integers 28 number_draws = [int(x) for x in lines.pop(0).split(',')] 29 30 # Get rid of the first blank linke and prepare the dict that will store the 31 # bingo board data and the board/row counters 32 assert lines.pop(0) == '' 33 board_data = {'board': [], 34 'row': [], 35 'col': [], 36 'value': []} 37 current_board = 0 38 current_row = 0 39 40 # Now we work our way through the data representing the boards 41 for line in lines: 42 if line: 43 # If the line is not empty, it's a row from a bingo board 44 # I'm going to (obnoxiously) pretend that I have no idea how many 45 # rows or columns are in the boards, but I'm also going to assume 46 # that they all have the same number of columns 47 # 48 # We'll store the data in long format initially, with one row per 49 # bingo board entry 50 for current_col, value in enumerate(line.split()): 51 board_data['board'].append(current_board) 52 board_data['row'].append(current_row) 53 board_data['col'].append(current_col) 54 board_data['value'].append(int(value)) 55 56 current_row += 1 57 else: 58 # If the line is empty, it indicates that a new bingo board is 59 # (possibly) starting (it could just be the end of the file) 60 current_board += 1 61 current_row = 0 62 63 # We'll convert the data to a df and then 'melt' it, giving us two rows for 64 # each board entry, one associated with the row and another with a column. 65 # This will make it easier to find winners (this strategy will need to be 66 # rethought if we have to deal with diagonals in part 2, TBD). We'll also 67 # add the `marked` column here and initialize it to `False`. 68 board_df = ( 69 pd.DataFrame.from_dict(board_data) 70 .melt(id_vars=('board', 'value'), 71 var_name='var_name', 72 value_name='var_id') 73 .assign(marked=False) 74 ) 75 76 return (board_df, number_draws) 77 78 def mark_boards(board_df, value): 79 """Return a copy of `board_df` with all of the rows where `value` matches 80 the passed value have `marked` set to `True`""" 81 return ( 82 board_df 83 .assign(marked=lambda x: np.where(x['value'] == value, 84 True, x['marked'])) 85 ) 86 87 def check_for_winner(board_df): 88 """If there is a board that has all of the entries in its `var_name` marked 89 to `True`, return its number; otherwise return `None`""" 90 # Find any (and all) winning boards 91 winners_df = ( 92 board_df 93 .groupby(['board', 'var_name', 'var_id']) 94 .aggregate({'marked': 'all'}) # mark any row or col that's full 95 .groupby('board') 96 .any() # mark any board with a full row/col 97 .reset_index() # turn 'board' back into a column 98 ) 99 # The winners (if any) as a Series 100 winning_boards = winners_df.loc[winners_df['marked'], 'board'] 101 if winning_boards.empty: 102 return None 103 104 # Return the first (of potentially multiple) winners as an int 105 return int(winning_boards.iat[0]) 106 107 def score_winning_board(board_df, board_number): 108 """Return the sum of the unmarked items in the board with the given 109 number""" 110 return ( 111 board_df 112 .loc[(board_df['board'] == board_number) & \ 113 (board_df['var_name'] == 'row') & \ 114 (~board_df['marked'])] 115 .aggregate({'value': sum}) 116 .to_list()[0] 117 ) 118 119 def find_winning_board(board_df, number_draws): 120 """Given the df of board data and the list of numbers drawn, go through the 121 numbers until a winner is found. Returns a tuple containing the sum of the 122 unmarked elements on the winning board and last drawn number number""" 123 winning_number = None 124 unmarked_sum = None 125 126 for number_draw in number_draws: 127 board_df = mark_boards(board_df, number_draw) 128 winning_board = check_for_winner(board_df) 129 if winning_board is not None: 130 unmarked_sum = score_winning_board(board_df, winning_board) 131 winning_number = number_draw 132 break 133 else: 134 # We ran through the numbers without finding a winner 135 raise RuntimeError("No winning board found") 136 137 return unmarked_sum * winning_number 138 139 def solve_puzzle(input_string): 140 """Return the numeric solution to the puzzle""" 141 # 91.7 ms ± 1.19 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) 142 return find_winning_board(*parse_input(input_string)) 143 144 if __name__ == "__main__": 145 print("Winning puzzle score:", 146 solve_puzzle(get_puzzle_input(4)))