commit 7577a99b6a66f5a4fe7338109efa4d2bc62d7f92
parent c12bbbf0b1d86c50ccf9dc07a6354e42ed125bb7
Author: Eamon Caddigan <eamon.caddigan@gmail.com>
Date: Tue, 14 Dec 2021 14:30:19 -0500
Solution to day 14, part 1 (still need to figure out type hints/pandas)
Diffstat:
A | day14_part1.py | | | 134 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 134 insertions(+), 0 deletions(-)
diff --git a/day14_part1.py b/day14_part1.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+"""Advent of Code 2021, day 14 (part 1): Extended Polymerization
+Perform 'pair insertions' on a 'polymer' string"""
+
+# Since the length of the string basically doubles every step of the problem,
+# we need to make sure we have an efficient representation of the data. And
+# once again, I'm going to use pandas (a Series) even when a dict would do just
+# fine so that I can continue to familiarize myself with it
+
+from typing import List, Tuple
+from functools import reduce
+import pandas as pd
+from utils import get_puzzle_input
+
+EXAMPLE_INPUT = \
+"""NNCB
+
+CH -> B
+HH -> N
+CB -> H
+NH -> C
+HB -> C
+HC -> B
+HN -> C
+NN -> C
+BH -> H
+NC -> B
+NB -> B
+BN -> B
+BB -> N
+BC -> B
+CC -> N
+CN -> C
+"""
+
+def split_instructions(instructions_list: List[str]) -> pd.Series:
+ """parses a list of the form ["AB", "C"] and returns a Series equivalent to
+ [("A", "C"), ("C", "B")]"""
+ return pd.Series([(instructions_list[0][0], instructions_list[1]),
+ (instructions_list[1], instructions_list[0][1])])
+
+def parse_input(input_string: str) -> Tuple[pd.Series, pd.DataFrame, Tuple[str, str]]:
+ """Read the polymer template and the pair insertion instructions and
+ return:
+ * A pandas series giving the count of each pair (pairs are the index
+ elements and counts are the values)
+ * A pandas data frame representing insertion instructions (start pair is
+ the index and the two new pairs are the columns)
+ * The identity of the first and last elements in the chain, since every
+ element but these will be double-counted"""
+ # Again, python's built-in types are perfectly sufficient for this job, but
+ # I'm still getting a handle on pandas's indexing.
+ lines = input_string.rstrip('\n').split('\n')
+
+ first_last = (lines[0][0], lines[0][-1])
+
+ # Using a Series as a Tuple[str, str] -> int dictionary
+ pair_counts = pd.Series(1,
+ index=zip(lines[0][0:-1], lines[0][1:]),
+ name='pair_count')
+
+ instruction_pairs = pd.Series(lines[2:]).str.split(' -> ')
+ pair_insertions = (
+ instruction_pairs.apply(split_instructions)
+ .set_axis(instruction_pairs.map(lambda x: (x[0][0], x[0][1])))
+ )
+
+ return (pair_counts, pair_insertions, first_last)
+
+def insert_at_pairs(pair_counts: pd.Series,
+ pair_insertions: pd.DataFrame) -> pd.Series:
+ """Perform the pair insertions and return an updated series of pair
+ counts"""
+ # Still working out pandas's indexing interface, of which I am NOT A FAN.
+ # This first step gives a mult-indexed Series:
+ updated_pair_counts_multi = (
+ pair_insertions
+ .merge(pair_counts, left_index=True, right_index=True)
+ .set_index([0, 1])['pair_count']
+ )
+ # I don't know a better way to fold this multiindex into a single index, so
+ # we concatenate the data with itself using each index in turn, then group
+ # by the new index and sum the pairs
+ return (
+ pd.concat([updated_pair_counts_multi.droplevel(0).rename_axis(None),
+ updated_pair_counts_multi.droplevel(1).rename_axis(None)])
+ .groupby(level=0)
+ .sum()
+ )
+
+def count_elements(pair_counts: pd.Series,
+ first_last: Tuple[str, str]) -> pd.Series:
+ """Given the count of the pairs and the identity of the first and last
+ element in the string, return the count of each individual element"""
+ # Everything was double-counted except we're missing one instance each of
+ # the first and last element (that's why we preserve them)
+ element_count = (
+ pd.concat([pair_counts.rename(lambda x: x[0]),
+ pair_counts.rename(lambda x: x[1])])
+ .groupby(level=0)
+ .sum()
+ .rename('element_count')
+ )
+ element_count[first_last[0]] += 1
+ element_count[first_last[1]] += 1
+ element_count /= 2
+ return element_count
+
+def apply_insertions(pair_counts: pd.Series,
+ pair_insertions: pd.DataFrame,
+ first_last: Tuple[str, str],
+ num_insertions: int) -> pd.Series:
+ """Apply `num_insertions` insertions and get the final count of the
+ _elements_"""
+ return count_elements(
+ reduce(lambda x, y: insert_at_pairs(x, pair_insertions),
+ range(num_insertions),
+ pair_counts),
+ first_last
+ )
+
+def solve_puzzle(input_string: str) -> int:
+ """Return the numeric solution to the puzzle"""
+ element_count = apply_insertions(*(parse_input(input_string) + (10,)))
+ return int(element_count.max() - element_count.min())
+
+def main() -> None:
+ """Run when the file is called as a script"""
+ assert solve_puzzle(EXAMPLE_INPUT) == 1588
+ print("Difference between most and least common elements:",
+ solve_puzzle(get_puzzle_input(14)))
+
+if __name__ == "__main__":
+ main()