Improved insertion logic and passing type checks - advent_of_code_2021 - My attempts to work through the 2021 Advent of Code problems.

commit 242c2e925eb7250789d8ed3aec603ece32e5028b
parent 3078459e2f86c11c6446a3470f9cb7af42eea255
Author: Eamon Caddigan <eamon.caddigan@gmail.com>
Date:   Tue, 14 Dec 2021 17:49:02 -0500

Improved insertion logic and passing type checks

Diffstat:
M day14_part1.py  | 47 ++++++++++++++++++++++-------------------------

1 file changed, 22 insertions(+), 25 deletions(-)
diff --git a/day14_part1.py b/day14_part1.py
@@ -7,7 +7,7 @@ Perform 'pair insertions' on a 'polymer' string"""
 # once again, I'm going to use pandas (a Series) even when a dict would do just
 # fine so that I can continue to familiarize myself with it
 
-from typing import List, Tuple
+from typing import List, Tuple, cast
 from functools import reduce
 import pandas as pd
 from utils import get_puzzle_input
@@ -39,13 +39,13 @@ def split_instructions(instructions_list: List[str]) -> pd.Series:
     return pd.Series([(instructions_list[0][0], instructions_list[1]),
                       (instructions_list[1], instructions_list[0][1])])
 
-def parse_input(input_string: str) -> Tuple[pd.Series, pd.DataFrame, Tuple[str, str]]:
+def parse_input(input_string: str) -> Tuple[pd.Series, pd.Series, Tuple[str, str]]:
     """Read the polymer template and the pair insertion instructions and
     return:
     * A pandas series giving the count of each pair (pairs are the index
       elements and counts are the values)
-    * A pandas data frame representing insertion instructions (start pair is
-      the index and the two new pairs are the columns)
+    * A pandas series representing insertion instructions (start pair is
+      the index and the values are new pairs)
     * The identity of the first and last elements in the chain, since every
       element but these will be double-counted"""
     # Again, python's built-in types are perfectly sufficient for this job, but
@@ -59,33 +59,30 @@ def parse_input(input_string: str) -> Tuple[pd.Series, pd.DataFrame, Tuple[str, 
                             index=zip(lines[0][0:-1], lines[0][1:]),
                             name='pair_count')
 
+    # Using a Series to map Tuple[str, str] -> two Tuple[str, str] entries,
+    # representing the new set of pairs created by an insertion
     instruction_pairs = pd.Series(lines[2:]).str.split(' -> ')
     pair_insertions = (
         instruction_pairs.apply(split_instructions)
         .set_axis(instruction_pairs.map(lambda x: (x[0][0], x[0][1])))
+        .melt(value_vars=[0, 1], ignore_index=False, value_name='new_pairs')
+        .loc[:, 'new_pairs']
     )
 
     return (pair_counts, pair_insertions, first_last)
 
 def insert_at_pairs(pair_counts: pd.Series,
-                    pair_insertions: pd.DataFrame) -> pd.Series:
+                    pair_insertions: pd.Series) -> pd.Series:
     """Perform the pair insertions and return an updated series of pair
     counts"""
-    # Still working out pandas's indexing interface, of which I am NOT A FAN.
-    # This first step gives a mult-indexed Series:
-    updated_pair_counts_multi = (
-        pair_insertions
-        .merge(pair_counts, left_index=True, right_index=True)
-        .set_index([0, 1])['pair_count']
-    )
-    # I don't know a better way to fold this multiindex into a single index, so
-    # we concatenate the data with itself using each index in turn, then group
-    # by the new index and sum the pairs
+    # Merge the counts and insertions series to get a data frame
     return (
-        pd.concat([updated_pair_counts_multi.droplevel(0).rename_axis(None),
-                   updated_pair_counts_multi.droplevel(1).rename_axis(None)])
-        .groupby(level=0)
+        pd.merge(pair_insertions, pair_counts,
+                 left_index=True, right_index=True)
+        .groupby('new_pairs')
         .sum()
+        .rename_axis(None)
+        .loc[:, 'pair_count']
     )
 
 def count_elements(pair_counts: pd.Series,
@@ -94,20 +91,20 @@ def count_elements(pair_counts: pd.Series,
     element in the string, return the count of each individual element"""
     # Everything was double-counted except we're missing one instance each of
     # the first and last element (that's why we preserve them)
+    # Note (to self?): `cast` is just here to make mypy happy
     element_count = (
-        pd.concat([pair_counts.rename(lambda x: x[0]),
-                   pair_counts.rename(lambda x: x[1])])
-        .groupby(level=0)
+        pd.concat([pair_counts.rename(lambda x: cast(tuple, x)[0]),
+                   pair_counts.rename(lambda x: cast(tuple, x)[1])])
+        .groupby(level=0)           # Group by indices
         .sum()
-        .rename('element_count')
+        .rename('element_count')    # Superflous, but nice
     )
-    element_count[first_last[0]] += 1
-    element_count[first_last[1]] += 1
+    element_count[list(first_last)] += 1
     element_count /= 2
     return element_count
 
 def apply_insertions(pair_counts: pd.Series,
-                     pair_insertions: pd.DataFrame,
+                     pair_insertions: pd.Series,
                      first_last: Tuple[str, str],
                      num_insertions: int) -> pd.Series:
     """Apply `num_insertions` insertions and get the final count of the

	advent_of_code_2021 My attempts to work through the 2021 Advent of Code problems.
	git clone https://git.eamoncaddigan.net/advent_of_code_2021.git
	Log \| Files \| Refs \| README \| LICENSE