Solution to day 14, part 1 (still need to figure out type hints/pandas) - advent_of_code_2021 - My attempts to work through the 2021 Advent of Code problems.

commit 7577a99b6a66f5a4fe7338109efa4d2bc62d7f92
parent c12bbbf0b1d86c50ccf9dc07a6354e42ed125bb7
Author: Eamon Caddigan <eamon.caddigan@gmail.com>
Date:   Tue, 14 Dec 2021 14:30:19 -0500

Solution to day 14, part 1 (still need to figure out type hints/pandas)

Diffstat:
A day14_part1.py  | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 134 insertions(+), 0 deletions(-)
diff --git a/day14_part1.py b/day14_part1.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+"""Advent of Code 2021, day 14 (part 1): Extended Polymerization
+Perform 'pair insertions' on a 'polymer' string"""
+
+# Since the length of the string basically doubles every step of the problem,
+# we need to make sure we have an efficient representation of the data. And
+# once again, I'm going to use pandas (a Series) even when a dict would do just
+# fine so that I can continue to familiarize myself with it
+
+from typing import List, Tuple
+from functools import reduce
+import pandas as pd
+from utils import get_puzzle_input
+
+EXAMPLE_INPUT = \
+"""NNCB
+
+CH -> B
+HH -> N
+CB -> H
+NH -> C
+HB -> C
+HC -> B
+HN -> C
+NN -> C
+BH -> H
+NC -> B
+NB -> B
+BN -> B
+BB -> N
+BC -> B
+CC -> N
+CN -> C
+"""
+
+def split_instructions(instructions_list: List[str]) -> pd.Series:
+    """parses a list of the form ["AB", "C"] and returns a Series equivalent to
+    [("A", "C"), ("C", "B")]"""
+    return pd.Series([(instructions_list[0][0], instructions_list[1]),
+                      (instructions_list[1], instructions_list[0][1])])
+
+def parse_input(input_string: str) -> Tuple[pd.Series, pd.DataFrame, Tuple[str, str]]:
+    """Read the polymer template and the pair insertion instructions and
+    return:
+    * A pandas series giving the count of each pair (pairs are the index
+      elements and counts are the values)
+    * A pandas data frame representing insertion instructions (start pair is
+      the index and the two new pairs are the columns)
+    * The identity of the first and last elements in the chain, since every
+      element but these will be double-counted"""
+    # Again, python's built-in types are perfectly sufficient for this job, but
+    # I'm still getting a handle on pandas's indexing.
+    lines = input_string.rstrip('\n').split('\n')
+
+    first_last = (lines[0][0], lines[0][-1])
+
+    # Using a Series as a Tuple[str, str] -> int dictionary
+    pair_counts = pd.Series(1,
+                            index=zip(lines[0][0:-1], lines[0][1:]),
+                            name='pair_count')
+
+    instruction_pairs = pd.Series(lines[2:]).str.split(' -> ')
+    pair_insertions = (
+        instruction_pairs.apply(split_instructions)
+        .set_axis(instruction_pairs.map(lambda x: (x[0][0], x[0][1])))
+    )
+
+    return (pair_counts, pair_insertions, first_last)
+
+def insert_at_pairs(pair_counts: pd.Series,
+                    pair_insertions: pd.DataFrame) -> pd.Series:
+    """Perform the pair insertions and return an updated series of pair
+    counts"""
+    # Still working out pandas's indexing interface, of which I am NOT A FAN.
+    # This first step gives a mult-indexed Series:
+    updated_pair_counts_multi = (
+        pair_insertions
+        .merge(pair_counts, left_index=True, right_index=True)
+        .set_index([0, 1])['pair_count']
+    )
+    # I don't know a better way to fold this multiindex into a single index, so
+    # we concatenate the data with itself using each index in turn, then group
+    # by the new index and sum the pairs
+    return (
+        pd.concat([updated_pair_counts_multi.droplevel(0).rename_axis(None),
+                   updated_pair_counts_multi.droplevel(1).rename_axis(None)])
+        .groupby(level=0)
+        .sum()
+    )
+
+def count_elements(pair_counts: pd.Series,
+                   first_last: Tuple[str, str]) -> pd.Series:
+    """Given the count of the pairs and the identity of the first and last
+    element in the string, return the count of each individual element"""
+    # Everything was double-counted except we're missing one instance each of
+    # the first and last element (that's why we preserve them)
+    element_count = (
+        pd.concat([pair_counts.rename(lambda x: x[0]),
+                   pair_counts.rename(lambda x: x[1])])
+        .groupby(level=0)
+        .sum()
+        .rename('element_count')
+    )
+    element_count[first_last[0]] += 1
+    element_count[first_last[1]] += 1
+    element_count /= 2
+    return element_count
+
+def apply_insertions(pair_counts: pd.Series,
+                     pair_insertions: pd.DataFrame,
+                     first_last: Tuple[str, str],
+                     num_insertions: int) -> pd.Series:
+    """Apply `num_insertions` insertions and get the final count of the
+    _elements_"""
+    return count_elements(
+        reduce(lambda x, y: insert_at_pairs(x, pair_insertions),
+               range(num_insertions),
+               pair_counts),
+        first_last
+    )
+
+def solve_puzzle(input_string: str) -> int:
+    """Return the numeric solution to the puzzle"""
+    element_count = apply_insertions(*(parse_input(input_string) + (10,)))
+    return int(element_count.max() - element_count.min())
+
+def main() -> None:
+    """Run when the file is called as a script"""
+    assert solve_puzzle(EXAMPLE_INPUT) == 1588
+    print("Difference between most and least common elements:",
+          solve_puzzle(get_puzzle_input(14)))
+
+if __name__ == "__main__":
+    main()

	advent_of_code_2021 My attempts to work through the 2021 Advent of Code problems.
	git clone https://git.eamoncaddigan.net/advent_of_code_2021.git
	Log \| Files \| Refs \| README \| LICENSE