commit 242c2e925eb7250789d8ed3aec603ece32e5028b
parent 3078459e2f86c11c6446a3470f9cb7af42eea255
Author: Eamon Caddigan <eamon.caddigan@gmail.com>
Date: Tue, 14 Dec 2021 17:49:02 -0500
Improved insertion logic and passing type checks
Diffstat:
1 file changed, 22 insertions(+), 25 deletions(-)
diff --git a/day14_part1.py b/day14_part1.py
@@ -7,7 +7,7 @@ Perform 'pair insertions' on a 'polymer' string"""
# once again, I'm going to use pandas (a Series) even when a dict would do just
# fine so that I can continue to familiarize myself with it
-from typing import List, Tuple
+from typing import List, Tuple, cast
from functools import reduce
import pandas as pd
from utils import get_puzzle_input
@@ -39,13 +39,13 @@ def split_instructions(instructions_list: List[str]) -> pd.Series:
return pd.Series([(instructions_list[0][0], instructions_list[1]),
(instructions_list[1], instructions_list[0][1])])
-def parse_input(input_string: str) -> Tuple[pd.Series, pd.DataFrame, Tuple[str, str]]:
+def parse_input(input_string: str) -> Tuple[pd.Series, pd.Series, Tuple[str, str]]:
"""Read the polymer template and the pair insertion instructions and
return:
* A pandas series giving the count of each pair (pairs are the index
elements and counts are the values)
- * A pandas data frame representing insertion instructions (start pair is
- the index and the two new pairs are the columns)
+ * A pandas series representing insertion instructions (start pair is
+ the index and the values are new pairs)
* The identity of the first and last elements in the chain, since every
element but these will be double-counted"""
# Again, python's built-in types are perfectly sufficient for this job, but
@@ -59,33 +59,30 @@ def parse_input(input_string: str) -> Tuple[pd.Series, pd.DataFrame, Tuple[str,
index=zip(lines[0][0:-1], lines[0][1:]),
name='pair_count')
+ # Using a Series to map Tuple[str, str] -> two Tuple[str, str] entries,
+ # representing the new set of pairs created by an insertion
instruction_pairs = pd.Series(lines[2:]).str.split(' -> ')
pair_insertions = (
instruction_pairs.apply(split_instructions)
.set_axis(instruction_pairs.map(lambda x: (x[0][0], x[0][1])))
+ .melt(value_vars=[0, 1], ignore_index=False, value_name='new_pairs')
+ .loc[:, 'new_pairs']
)
return (pair_counts, pair_insertions, first_last)
def insert_at_pairs(pair_counts: pd.Series,
- pair_insertions: pd.DataFrame) -> pd.Series:
+ pair_insertions: pd.Series) -> pd.Series:
"""Perform the pair insertions and return an updated series of pair
counts"""
- # Still working out pandas's indexing interface, of which I am NOT A FAN.
- # This first step gives a mult-indexed Series:
- updated_pair_counts_multi = (
- pair_insertions
- .merge(pair_counts, left_index=True, right_index=True)
- .set_index([0, 1])['pair_count']
- )
- # I don't know a better way to fold this multiindex into a single index, so
- # we concatenate the data with itself using each index in turn, then group
- # by the new index and sum the pairs
+ # Merge the counts and insertions series to get a data frame
return (
- pd.concat([updated_pair_counts_multi.droplevel(0).rename_axis(None),
- updated_pair_counts_multi.droplevel(1).rename_axis(None)])
- .groupby(level=0)
+ pd.merge(pair_insertions, pair_counts,
+ left_index=True, right_index=True)
+ .groupby('new_pairs')
.sum()
+ .rename_axis(None)
+ .loc[:, 'pair_count']
)
def count_elements(pair_counts: pd.Series,
@@ -94,20 +91,20 @@ def count_elements(pair_counts: pd.Series,
element in the string, return the count of each individual element"""
# Everything was double-counted except we're missing one instance each of
# the first and last element (that's why we preserve them)
+ # Note (to self?): `cast` is just here to make mypy happy
element_count = (
- pd.concat([pair_counts.rename(lambda x: x[0]),
- pair_counts.rename(lambda x: x[1])])
- .groupby(level=0)
+ pd.concat([pair_counts.rename(lambda x: cast(tuple, x)[0]),
+ pair_counts.rename(lambda x: cast(tuple, x)[1])])
+ .groupby(level=0) # Group by indices
.sum()
- .rename('element_count')
+ .rename('element_count') # Superflous, but nice
)
- element_count[first_last[0]] += 1
- element_count[first_last[1]] += 1
+ element_count[list(first_last)] += 1
element_count /= 2
return element_count
def apply_insertions(pair_counts: pd.Series,
- pair_insertions: pd.DataFrame,
+ pair_insertions: pd.Series,
first_last: Tuple[str, str],
num_insertions: int) -> pd.Series:
"""Apply `num_insertions` insertions and get the final count of the