New grid_to_distance_matrix is almost 17x faster - advent_of_code_2021 - My attempts to work through the 2021 Advent of Code problems.

commit 5478829a2bdfece833624da2f20828451d70702e
parent cc54381e861ab4c06c07210cdfc8b68c88f6f862
Author: Eamon Caddigan <eamon.caddigan@gmail.com>
Date:   Wed, 15 Dec 2021 18:40:50 -0500

New grid_to_distance_matrix is almost 17x faster

Diffstat:
M day15_part1.py  | 66 +++++++++++++++++++++++++-----------------------------------------

1 file changed, 25 insertions(+), 41 deletions(-)
diff --git a/day15_part1.py b/day15_part1.py
@@ -32,51 +32,35 @@ def grid_to_distance_matrix(input_array: np.ndarray) -> csr_matrix:
     # indicies into the original 2d array in the distance matrix: e.g., the
     # last element in a 3x3 array has index 8 (thanks to zero-indexing), and is
     # connected to index 7 (to its left) and index 5 (above it), so the sparse
-    # distance matrix will have connections in row 8 at columns 5 and 7.  I
-    # believe that this function could be much more efficient by being smart
-    # with divmod, but it's only run once per solution, so the easier-to-debug
-    # implementation wins the day. Also note that this is a _directed_
-    # distance/connectivity/association matrix, because the cost of going from
-    # #8 to #7 isn't necessarily teh same as going from #7 to #8.
-    dist_row_ids = []
-    dist_col_ids = []
-    dist_data = []
-    for grid_row_id in range(input_array.shape[0]):
-        for grid_col_id in range(input_array.shape[1]):
-            dist_row_id = grid_row_id * input_array.shape[1] + grid_col_id
-            if grid_row_id >= 1:
-                # Top neighbor
-                dist_row_ids.append(dist_row_id)
-                dist_col_ids.append(dist_row_id - \
-                                    input_array.shape[1])
-                dist_data.append(input_array[grid_row_id - 1,
-                                             grid_col_id])
+    # distance matrix will have connections in row 8 at columns 5 and 7. Note
+    # that this is a _directed_ distance/connectivity/association matrix,
+    # because the cost of going from #8 to #7 isn't necessarily the same as
+    # going from #7 to #8.
 
-            if grid_row_id < (input_array.shape[0] - 1):
-                # Bottom neighbor
-                dist_row_ids.append(dist_row_id)
-                dist_col_ids.append(dist_row_id + \
-                                    input_array.shape[1])
-                dist_data.append(input_array[grid_row_id + 1,
-                                             grid_col_id])
+    # In the (sparse) distance matrix, data will be stored as:
+    # Row: node id (row-major indexing), Col: neighbor id
+    # We'll start off by pretending that every node has four neighbors
+    dist_row_ids = np.repeat(np.arange(input_array.size, dtype=int), 4)
+    dist_col_ids = np.empty_like(dist_row_ids)
+    dist_col_ids[0::4] = dist_row_ids[0::4] - 1                     # Left
+    dist_col_ids[1::4] = dist_row_ids[1::4] + 1                     # Right
+    dist_col_ids[2::4] = dist_row_ids[2::4] - input_array.shape[1]  # Top
+    dist_col_ids[3::4] = dist_row_ids[3::4] + input_array.shape[1]  # Bottom
 
-            if grid_col_id >= 1:
-                # Left neighbor
-                dist_row_ids.append(dist_row_id)
-                dist_col_ids.append(dist_row_id - 1)
-                dist_data.append(input_array[grid_row_id,
-                                             grid_col_id - 1])
-
-            if grid_col_id < (input_array.shape[1] - 1):
-                # Right neighbor
-                dist_row_ids.append(dist_row_id)
-                dist_col_ids.append(dist_row_id + 1)
-                dist_data.append(input_array[grid_row_id,
-                                             grid_col_id + 1])
+    # Now we remove the invalid neighbors (at the edges)
+    grid_col_ids = dist_row_ids % input_array.shape[1]
+    grid_row_ids = dist_row_ids // input_array.shape[1]
+    keep_neighbor = np.empty_like(grid_col_ids, dtype=bool)
+    keep_neighbor[0::4] = grid_col_ids[0::4] >= 1
+    keep_neighbor[1::4] = grid_col_ids[1::4] < (input_array.shape[1] - 1)
+    keep_neighbor[2::4] = grid_row_ids[2::4] >= 1
+    keep_neighbor[3::4] = grid_row_ids[3::4] < (input_array.shape[0] - 1)
+    dist_row_ids = dist_row_ids[keep_neighbor]
+    dist_col_ids = dist_col_ids[keep_neighbor]
 
     # There are a bunch of ways to instantiate a csr_matrix, this is the most
-    # appropriate for these data
-    return csr_matrix((dist_data,
+    # appropriate for this data format
+    return csr_matrix((np.reshape(input_array, input_array.size)[dist_col_ids],
                        (dist_row_ids, dist_col_ids)))
 
 def find_shortest_path(distance_matrix: csr_matrix):

	advent_of_code_2021 My attempts to work through the 2021 Advent of Code problems.
	git clone https://git.eamoncaddigan.net/advent_of_code_2021.git
	Log \| Files \| Refs \| README \| LICENSE