advent_of_code_2021

My attempts to work through the 2021 Advent of Code problems.
git clone https://git.eamoncaddigan.net/advent_of_code_2021.git
Log | Files | Refs | README | LICENSE

commit e7ed6b6452079a11a247c10b0b337d42f1208ce9
parent 7b933c5cbef22baf14de28d781bb2d6e0e0d2852
Author: Eamon Caddigan <eamon.caddigan@gmail.com>
Date:   Sat,  4 Dec 2021 20:29:37 -0500

Solution to day 4, part 1

Diffstat:
Aday04_part1.py | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 146 insertions(+), 0 deletions(-)

diff --git a/day04_part1.py b/day04_part1.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python +"""For Day 4, we're playing BINGO! The input file has a list of numbers and a +set of bingo cards, and we need to report information about the first card that +wins.""" + +# Even still I'm tempted to create something like a `BingoCard` object, and +# give it `update` and `check_if_winner` methods. I think it's a fine approach! +# But I'm here at the Advent of Code 2021 to get better at pandas first, and +# more used to functional(ish) paradigms in Python second, so I will eschew +# that and do Pandas DataFrame (which I will just refer to as 'df's for the +# rest of the month) operations as much as possible. I really failed at making +# this "functional" though. + +import numpy as np +import pandas as pd +from utils import get_puzzle_input + +def parse_input(input_string): + """Given a string representation of the input data, return a tuple + containing a df that represents all the board row and column data and a + list of drawn numbers """ + # This is much more heavy-duty parsing that was necessary for the other + # puzzles and I'm stuffing it all into this function. + lines = input_string.split('\n') + + # The first line gives the draws. Remove it from the list of lines and + # convert the comma-separated string to a list of integers + number_draws = [int(x) for x in lines.pop(0).split(',')] + + # Get rid of the first blank linke and prepare the dict that will store the + # bingo board data and the board/row counters + assert lines.pop(0) == '' + board_data = {'board': [], + 'row': [], + 'col': [], + 'value': []} + current_board = 0 + current_row = 0 + + # Now we work our way through the data representing the boards + for line in lines: + if line: + # If the line is not empty, it's a row from a bingo board + # I'm going to (obnoxiously) pretend that I have no idea how many + # rows or columns are in the boards, but I'm also going to assume + # that they all have the same number of columns + # + # We'll store the data in long format initially, with one row per + # bingo board entry + for current_col, value in enumerate(line.split()): + board_data['board'].append(current_board) + board_data['row'].append(current_row) + board_data['col'].append(current_col) + board_data['value'].append(int(value)) + + current_row += 1 + else: + # If the line is empty, it indicates that a new bingo board is + # (possibly) starting (it could just be the end of the file) + current_board += 1 + current_row = 0 + + # We'll convert the data to a df and then 'melt' it, giving us two rows for + # each board entry, one associated with the row and another with a column. + # This will make it easier to find winners (this strategy will need to be + # rethought if we have to deal with diagonals in part 2, TBD). We'll also + # add the `marked` column here and initialize it to `False`. + board_df = ( + pd.DataFrame.from_dict(board_data) + .melt(id_vars=('board', 'value'), + var_name='var_name', + value_name='var_id') + .assign(marked=False) + ) + + return (board_df, number_draws) + +def mark_boards(board_df, value): + """Return a copy of `board_df` with all of the rows where `value` matches + the passed value have `marked` set to `True`""" + return ( + board_df + .assign(marked=lambda x: np.where(x['value'] == value, + True, x['marked'])) + ) + +def check_for_winner(board_df): + """If there is a board that has all of the entries in its `var_name` marked + to `True`, return its number; otherwise return `None`""" + # Find any (and all) winning boards + winners_df = ( + board_df + .groupby(['board', 'var_name', 'var_id']) + .aggregate({'marked': 'all'}) # mark any row or col that's full + .groupby('board') + .any() # mark any board with a full row/col + .reset_index() # turn 'board' back into a column + ) + # The winners (if any) as a Series + winning_boards = winners_df.loc[winners_df['marked'], 'board'] + if winning_boards.empty: + return None + + # Return the first (of potentially multiple) winners as an int + return int(winning_boards.iat[0]) + +def score_winning_board(board_df, board_number): + """Return the sum of the unmarked items in the board with the given + number""" + return ( + board_df + .loc[(board_df['board'] == board_number) & \ + (board_df['var_name'] == 'row') & \ + (~board_df['marked'])] + .aggregate({'value': sum}) + .to_list()[0] + ) + +def find_winning_board(board_df, number_draws): + """Given the df of board data and the list of numbers drawn, go through the + numbers until a winner is found. Returns a tuple containing the sum of the + unmarked elements on the winning board and last drawn number number""" + winning_number = None + unmarked_sum = None + + for number_draw in number_draws: + board_df = mark_boards(board_df, number_draw) + winning_board = check_for_winner(board_df) + if winning_board is not None: + unmarked_sum = score_winning_board(board_df, winning_board) + winning_number = number_draw + break + else: + # We ran through the numbers without finding a winner + raise RuntimeError("No winning board found") + + return unmarked_sum * winning_number + +def solve_puzzle(input_string): + """Return the numeric solution to the puzzle""" + # 91.7 ms ± 1.19 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) + return find_winning_board(*parse_input(input_string)) + +if __name__ == "__main__": + print("Winning puzzle score:", + solve_puzzle(get_puzzle_input(4)))