advent_of_code_2021

My attempts to work through the 2021 Advent of Code problems.
git clone https://git.eamoncaddigan.net/advent_of_code_2021.git
Log | Files | Refs | README | LICENSE

commit 7577a99b6a66f5a4fe7338109efa4d2bc62d7f92
parent c12bbbf0b1d86c50ccf9dc07a6354e42ed125bb7
Author: Eamon Caddigan <eamon.caddigan@gmail.com>
Date:   Tue, 14 Dec 2021 14:30:19 -0500

Solution to day 14, part 1 (still need to figure out type hints/pandas)

Diffstat:
Aday14_part1.py | 134+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 134 insertions(+), 0 deletions(-)

diff --git a/day14_part1.py b/day14_part1.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +"""Advent of Code 2021, day 14 (part 1): Extended Polymerization +Perform 'pair insertions' on a 'polymer' string""" + +# Since the length of the string basically doubles every step of the problem, +# we need to make sure we have an efficient representation of the data. And +# once again, I'm going to use pandas (a Series) even when a dict would do just +# fine so that I can continue to familiarize myself with it + +from typing import List, Tuple +from functools import reduce +import pandas as pd +from utils import get_puzzle_input + +EXAMPLE_INPUT = \ +"""NNCB + +CH -> B +HH -> N +CB -> H +NH -> C +HB -> C +HC -> B +HN -> C +NN -> C +BH -> H +NC -> B +NB -> B +BN -> B +BB -> N +BC -> B +CC -> N +CN -> C +""" + +def split_instructions(instructions_list: List[str]) -> pd.Series: + """parses a list of the form ["AB", "C"] and returns a Series equivalent to + [("A", "C"), ("C", "B")]""" + return pd.Series([(instructions_list[0][0], instructions_list[1]), + (instructions_list[1], instructions_list[0][1])]) + +def parse_input(input_string: str) -> Tuple[pd.Series, pd.DataFrame, Tuple[str, str]]: + """Read the polymer template and the pair insertion instructions and + return: + * A pandas series giving the count of each pair (pairs are the index + elements and counts are the values) + * A pandas data frame representing insertion instructions (start pair is + the index and the two new pairs are the columns) + * The identity of the first and last elements in the chain, since every + element but these will be double-counted""" + # Again, python's built-in types are perfectly sufficient for this job, but + # I'm still getting a handle on pandas's indexing. + lines = input_string.rstrip('\n').split('\n') + + first_last = (lines[0][0], lines[0][-1]) + + # Using a Series as a Tuple[str, str] -> int dictionary + pair_counts = pd.Series(1, + index=zip(lines[0][0:-1], lines[0][1:]), + name='pair_count') + + instruction_pairs = pd.Series(lines[2:]).str.split(' -> ') + pair_insertions = ( + instruction_pairs.apply(split_instructions) + .set_axis(instruction_pairs.map(lambda x: (x[0][0], x[0][1]))) + ) + + return (pair_counts, pair_insertions, first_last) + +def insert_at_pairs(pair_counts: pd.Series, + pair_insertions: pd.DataFrame) -> pd.Series: + """Perform the pair insertions and return an updated series of pair + counts""" + # Still working out pandas's indexing interface, of which I am NOT A FAN. + # This first step gives a mult-indexed Series: + updated_pair_counts_multi = ( + pair_insertions + .merge(pair_counts, left_index=True, right_index=True) + .set_index([0, 1])['pair_count'] + ) + # I don't know a better way to fold this multiindex into a single index, so + # we concatenate the data with itself using each index in turn, then group + # by the new index and sum the pairs + return ( + pd.concat([updated_pair_counts_multi.droplevel(0).rename_axis(None), + updated_pair_counts_multi.droplevel(1).rename_axis(None)]) + .groupby(level=0) + .sum() + ) + +def count_elements(pair_counts: pd.Series, + first_last: Tuple[str, str]) -> pd.Series: + """Given the count of the pairs and the identity of the first and last + element in the string, return the count of each individual element""" + # Everything was double-counted except we're missing one instance each of + # the first and last element (that's why we preserve them) + element_count = ( + pd.concat([pair_counts.rename(lambda x: x[0]), + pair_counts.rename(lambda x: x[1])]) + .groupby(level=0) + .sum() + .rename('element_count') + ) + element_count[first_last[0]] += 1 + element_count[first_last[1]] += 1 + element_count /= 2 + return element_count + +def apply_insertions(pair_counts: pd.Series, + pair_insertions: pd.DataFrame, + first_last: Tuple[str, str], + num_insertions: int) -> pd.Series: + """Apply `num_insertions` insertions and get the final count of the + _elements_""" + return count_elements( + reduce(lambda x, y: insert_at_pairs(x, pair_insertions), + range(num_insertions), + pair_counts), + first_last + ) + +def solve_puzzle(input_string: str) -> int: + """Return the numeric solution to the puzzle""" + element_count = apply_insertions(*(parse_input(input_string) + (10,))) + return int(element_count.max() - element_count.min()) + +def main() -> None: + """Run when the file is called as a script""" + assert solve_puzzle(EXAMPLE_INPUT) == 1588 + print("Difference between most and least common elements:", + solve_puzzle(get_puzzle_input(14))) + +if __name__ == "__main__": + main()