From a50a2e720aa03101d4ae0abe15c67ddbc47fdb47 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Sat, 1 Jun 2024 23:09:37 -0300
Subject: [PATCH] start to update

---
 data-raw/update-s2.R                          |   11 +-
 src/Makevars.in                               |   34 +-
 src/Makevars.win                              |   16 +-
 src/RcppExports.cpp                           |   12 +-
 src/s2/_fp_contract_off.h                     |   11 +-
 src/s2/base/casts.h                           |  111 +-
 src/s2/base/commandlineflags.h                |   36 +-
 .../commandlineflags_declare.h}               |   23 +-
 src/s2/base/integral_types.h                  |    2 +
 src/s2/base/log_severity.h                    |    8 -
 src/s2/base/logging.h                         |  134 +-
 src/s2/base/port.h                            |  939 +------
 src/s2/base/stringprintf.cc                   |  107 -
 src/s2/base/stringprintf.h                    |   53 -
 src/s2/base/strtoint.cc                       |   65 -
 src/s2/base/strtoint.h                        |  106 -
 src/s2/encoded_s2cell_id_vector.cc            |   29 +-
 src/s2/encoded_s2cell_id_vector.h             |   10 +-
 src/s2/encoded_s2point_vector.cc              |  132 +-
 src/s2/encoded_s2point_vector.h               |   54 +-
 src/s2/encoded_s2shape_index.cc               |   95 +-
 src/s2/encoded_s2shape_index.h                |  143 +-
 src/s2/encoded_string_vector.cc               |   33 +-
 src/s2/encoded_string_vector.h                |   10 +-
 src/s2/encoded_uint_vector.h                  |   32 +-
 src/s2/id_set_lexicon.cc                      |   18 +-
 src/s2/id_set_lexicon.h                       |   11 +-
 src/s2/mutable_s2shape_index.cc               | 1207 +++++---
 src/s2/mutable_s2shape_index.h                |  297 +-
 src/s2/r1interval.h                           |   17 +-
 src/s2/r2rect.cc                              |    8 +-
 src/s2/r2rect.h                               |   15 +-
 src/s2/s1angle.cc                             |    6 +-
 src/s2/s1angle.h                              |   63 +-
 src/s2/s1chord_angle.cc                       |   23 +-
 src/s2/s1chord_angle.h                        |   96 +-
 src/s2/s1interval.cc                          |    2 -
 src/s2/s1interval.h                           |    8 +
 src/s2/s2boolean_operation.cc                 |  910 ++++--
 src/s2/s2boolean_operation.h                  |  135 +-
 src/s2/s2buffer_operation.cc                  |  770 +++++
 src/s2/s2buffer_operation.h                   |  359 +++
 src/s2/s2builder.cc                           |  876 ++++--
 src/s2/s2builder.h                            |  422 ++-
 src/s2/s2builder_graph.cc                     |  149 +-
 src/s2/s2builder_graph.h                      |  120 +-
 src/s2/s2builder_layer.h                      |    4 +-
 src/s2/s2builderutil_closed_set_normalizer.cc |   19 +-
 src/s2/s2builderutil_closed_set_normalizer.h  |   10 +-
 ...s2builderutil_find_polygon_degeneracies.cc |   19 +-
 .../s2builderutil_find_polygon_degeneracies.h |    1 +
 ...s2builderutil_get_snapped_winding_delta.cc |  438 +++
 .../s2builderutil_get_snapped_winding_delta.h |  165 ++
 src/s2/s2builderutil_graph_shape.h            |    2 +
 src/s2/s2builderutil_lax_polygon_layer.cc     |   20 +-
 src/s2/s2builderutil_lax_polygon_layer.h      |   13 +-
 src/s2/s2builderutil_lax_polyline_layer.cc    |  104 +
 src/s2/s2builderutil_lax_polyline_layer.h     |  154 +
 src/s2/s2builderutil_s2point_vector_layer.cc  |   11 +-
 src/s2/s2builderutil_s2point_vector_layer.h   |    7 +-
 src/s2/s2builderutil_s2polygon_layer.cc       |   16 +-
 src/s2/s2builderutil_s2polygon_layer.h        |    5 +-
 src/s2/s2builderutil_s2polyline_layer.cc      |    9 +
 src/s2/s2builderutil_s2polyline_layer.h       |   16 +-
 .../s2builderutil_s2polyline_vector_layer.cc  |   10 +
 .../s2builderutil_s2polyline_vector_layer.h   |   29 +-
 src/s2/s2builderutil_snap_functions.cc        |   17 +-
 src/s2/s2builderutil_snap_functions.h         |   10 +-
 src/s2/s2builderutil_testing.cc               |   77 +-
 src/s2/s2builderutil_testing.h                |   47 +-
 src/s2/s2cap.cc                               |   23 +-
 src/s2/s2cap.h                                |   21 +-
 src/s2/s2cell.cc                              |   26 +-
 src/s2/s2cell.h                               |   89 +-
 src/s2/s2cell_id.cc                           |  114 +-
 src/s2/s2cell_id.h                            |  108 +-
 src/s2/s2cell_index.cc                        |   29 +-
 src/s2/s2cell_index.h                         |   41 +-
 src/s2/s2cell_iterator.h                      |  203 ++
 src/s2/s2cell_iterator_join.h                 |  441 +++
 src/s2/s2cell_iterator_testing.h              |  119 +
 src/s2/s2cell_range_iterator.h                |  262 ++
 src/s2/s2cell_union.cc                        |  140 +-
 src/s2/s2cell_union.h                         |  120 +-
 src/s2/s2centroids.cc                         |    5 +-
 src/s2/s2closest_cell_query.cc                |   10 +-
 src/s2/s2closest_cell_query.h                 |    5 +
 src/s2/s2closest_cell_query_base.h            |   35 +-
 src/s2/s2closest_edge_query.cc                |   10 +-
 src/s2/s2closest_edge_query.h                 |   70 +-
 src/s2/s2closest_edge_query_base.h            |   39 +-
 src/s2/s2closest_edge_query_testing.h         |   13 +-
 src/s2/s2closest_point_query.cc               |    4 +
 src/s2/s2closest_point_query.h                |    3 +
 src/s2/s2closest_point_query_base.h           |   25 +-
 src/s2/s2coder.h                              |  159 ++
 src/s2/s2coder_testing.h                      |   53 +
 src/s2/s2contains_point_query.h               |   38 +-
 src/s2/s2contains_vertex_query.cc             |   16 +-
 src/s2/s2convex_hull_query.cc                 |   52 +-
 src/s2/s2convex_hull_query.h                  |    1 +
 src/s2/s2coords.cc                            |    2 +
 src/s2/s2coords.h                             |   10 +-
 src/s2/s2coords_internal.h                    |    3 +-
 src/s2/s2crossing_edge_query.cc               |   21 +-
 src/s2/s2crossing_edge_query.h                |    6 +-
 src/s2/s2debug.cc                             |    7 +-
 src/s2/s2debug.h                              |    3 +-
 src/s2/s2distance_target.h                    |    8 +-
 src/s2/s2earth.cc                             |    7 +-
 src/s2/s2earth.h                              |  248 +-
 src/s2/s2edge_clipping.cc                     |   21 +-
 src/s2/s2edge_clipping.h                      |   90 +-
 src/s2/s2edge_crosser.cc                      |   44 +-
 src/s2/s2edge_crosser.h                       |  355 ++-
 src/s2/s2edge_crossings.cc                    |  481 +++-
 src/s2/s2edge_crossings.h                     |  114 +-
 src/s2/s2edge_crossings_internal.h            |   70 +-
 src/s2/s2edge_distances.cc                    |  223 +-
 src/s2/s2edge_distances.h                     |  146 +-
 src/s2/s2edge_tessellator.cc                  |   16 +-
 src/s2/s2edge_tessellator.h                   |    2 +
 src/s2/s2edge_vector_shape.h                  |    8 +-
 src/s2/s2error.cc                             |  142 +-
 src/s2/s2error.h                              |   59 +-
 src/s2/s2furthest_edge_query.cc               |    7 +-
 src/s2/s2furthest_edge_query.h                |   72 +-
 src/s2/s2hausdorff_distance_query.cc          |  124 +
 src/s2/s2hausdorff_distance_query.h           |  210 ++
 src/s2/s2latlng.cc                            |   38 +-
 src/s2/s2latlng.h                             |  112 +-
 src/s2/s2latlng_rect.cc                       |   25 +-
 src/s2/s2latlng_rect.h                        |    8 +-
 src/s2/s2latlng_rect_bounder.cc               |   22 +-
 src/s2/s2lax_loop_shape.cc                    |   55 +-
 src/s2/s2lax_loop_shape.h                     |   20 +-
 src/s2/s2lax_polygon_shape.cc                 |  290 +-
 src/s2/s2lax_polygon_shape.h                  |  187 +-
 src/s2/s2lax_polyline_shape.cc                |   51 +-
 src/s2/s2lax_polyline_shape.h                 |   49 +-
 src/s2/s2loop.cc                              |  337 +--
 src/s2/s2loop.h                               |   87 +-
 src/s2/s2loop_measures.cc                     |   78 +-
 src/s2/s2loop_measures.h                      |  173 +-
 src/s2/s2max_distance_targets.cc              |   28 +-
 src/s2/s2max_distance_targets.h               |    2 +
 src/s2/s2measures.cc                          |   42 +-
 src/s2/s2memory_tracker.cc                    |   44 +
 src/s2/s2memory_tracker.h                     |  405 +++
 src/s2/s2metrics.cc                           |   38 +-
 src/s2/s2metrics.h                            |   32 +-
 src/s2/s2min_distance_targets.cc              |   28 +-
 src/s2/s2min_distance_targets.h               |    3 +
 src/s2/s2padded_cell.cc                       |    4 +
 src/s2/s2padded_cell.h                        |    2 +
 src/s2/s2point.h                              |  144 +-
 src/s2/s2point_compression.cc                 |   26 +-
 src/s2/s2point_compression.h                  |    2 +
 src/s2/s2point_index.h                        |   72 +-
 src/s2/s2point_region.cc                      |    5 +-
 src/s2/s2point_region.h                       |    5 +-
 src/s2/s2point_span.h                         |    2 +
 src/s2/s2point_vector_shape.h                 |   45 +-
 src/s2/s2pointutil.cc                         |   40 +-
 src/s2/s2pointutil.h                          |   66 +-
 src/s2/s2polygon.cc                           |  488 ++--
 src/s2/s2polygon.h                            |  207 +-
 src/s2/s2polyline.cc                          |  286 +-
 src/s2/s2polyline.h                           |  127 +-
 src/s2/s2polyline_alignment.cc                |   58 +-
 src/s2/s2polyline_alignment.h                 |   33 +-
 src/s2/s2polyline_alignment_internal.h        |    7 +-
 src/s2/s2polyline_measures.cc                 |   10 +-
 src/s2/s2polyline_simplifier.cc               |   84 +-
 src/s2/s2polyline_simplifier.h                |   63 +-
 src/s2/s2predicates.cc                        |  745 ++++-
 src/s2/s2predicates.h                         |  167 +-
 src/s2/s2predicates_internal.h                |   42 +
 src/s2/s2projections.cc                       |    4 +
 src/s2/s2projections.h                        |    4 +-
 src/s2/s2r2rect.cc                            |    6 +-
 src/s2/s2r2rect.h                             |    8 +
 src/s2/s2region.cc                            |    4 +-
 src/s2/s2region.h                             |   19 +-
 src/s2/s2region_coverer.cc                    |   38 +-
 src/s2/s2region_coverer.h                     |   15 +-
 src/s2/s2region_intersection.cc               |   15 +-
 src/s2/s2region_intersection.h                |    4 +-
 src/s2/s2region_term_indexer.cc               |   54 +-
 src/s2/s2region_term_indexer.h                |   25 +-
 src/s2/s2region_union.cc                      |   17 +-
 src/s2/s2region_union.h                       |   24 +-
 src/s2/s2shape.h                              |  434 ++-
 src/s2/s2shape_index.cc                       |   11 +
 src/s2/s2shape_index.h                        |  234 +-
 src/s2/s2shape_index_buffered_region.cc       |   16 +-
 src/s2/s2shape_index_buffered_region.h        |    5 +
 src/s2/s2shape_index_measures.cc              |    6 +
 src/s2/s2shape_index_region.h                 |  121 +-
 src/s2/s2shape_measures.cc                    |   22 +-
 src/s2/s2shape_nesting_query.cc               |  264 ++
 src/s2/s2shape_nesting_query.h                |  192 ++
 .../s2shapeutil_build_polygon_boundaries.cc   |   16 +-
 src/s2/s2shapeutil_build_polygon_boundaries.h |    1 +
 src/s2/s2shapeutil_coding.cc                  |  136 +-
 src/s2/s2shapeutil_coding.h                   |   46 +-
 src/s2/s2shapeutil_contains_brute_force.cc    |    3 +-
 src/s2/s2shapeutil_contains_brute_force.h     |    1 +
 src/s2/s2shapeutil_conversion.cc              |   72 +
 src/s2/s2shapeutil_conversion.h               |   51 +
 src/s2/s2shapeutil_count_edges.h              |   12 +-
 src/s2/s2shapeutil_edge_iterator.cc           |    8 +-
 src/s2/s2shapeutil_edge_iterator.h            |    4 +
 src/s2/s2shapeutil_get_reference_point.cc     |    6 +-
 src/s2/s2shapeutil_get_reference_point.h      |    1 +
 src/s2/s2shapeutil_range_iterator.cc          |   58 -
 src/s2/s2shapeutil_range_iterator.h           |   65 -
 src/s2/s2shapeutil_shape_edge.h               |   10 +-
 src/s2/s2shapeutil_shape_edge_id.h            |   21 +-
 src/s2/s2shapeutil_testing.cc                 |  109 +
 .../s2shapeutil_visit_crossing_edge_pairs.cc  |   60 +-
 .../s2shapeutil_visit_crossing_edge_pairs.h   |    4 +-
 src/s2/s2testing.cc                           |  101 +-
 src/s2/s2testing.h                            |   34 +-
 src/s2/s2text_format.cc                       |  205 +-
 src/s2/s2text_format.h                        |  114 +-
 src/s2/s2wedge_relations.cc                   |    1 +
 src/s2/s2winding_operation.cc                 |  561 ++++
 src/s2/s2winding_operation.h                  |  229 ++
 src/s2/s2wrapped_shape.h                      |   53 +
 src/s2/sequence_lexicon.h                     |    6 +
 src/s2/strings/ostringstream.cc               |   35 -
 src/s2/strings/ostringstream.h                |  105 -
 src/s2/strings/serialize.cc                   |   46 -
 src/s2/strings/serialize.h                    |   40 -
 src/s2/{base/mutex.h => testing/gtest_prod.h} |   14 +-
 src/s2/thread_testing.cc                      |   93 +
 src/s2/thread_testing.h                       |   65 +
 src/s2/util/bitmap/bitmap.h                   | 1137 ++++++++
 src/s2/util/bits/bit-interleave.cc            |  225 +-
 src/s2/util/bits/bit-interleave.h             |    3 +-
 src/s2/util/bits/bits.cc                      |  155 --
 src/s2/util/bits/bits.h                       |  745 +----
 src/s2/util/coding/coder.cc                   |   67 +-
 src/s2/util/coding/coder.h                    |  244 +-
 src/s2/util/coding/varint.cc                  |  241 +-
 src/s2/util/coding/varint.h                   |  131 +-
 src/s2/util/endian/endian.h                   |  176 +-
 src/s2/util/gtl/btree.h                       | 2471 -----------------
 src/s2/util/gtl/btree_container.h             |  411 ---
 src/s2/util/gtl/btree_map.h                   |   79 -
 src/s2/util/gtl/btree_set.h                   |   73 -
 src/s2/util/gtl/compact_array.h               |   48 +-
 src/s2/util/gtl/container_logging.h           |   26 +-
 src/s2/util/gtl/dense_hash_set.h              |    1 +
 src/s2/util/gtl/densehashtable.h              |   75 +-
 src/s2/util/gtl/hashtable_common.h            |    1 +
 src/s2/util/hash/mix.h                        |    3 +
 src/s2/util/math/exactfloat/exactfloat.cc     |   22 +-
 src/s2/util/math/exactfloat/exactfloat.h      |   48 +-
 src/s2/util/math/mathutil.cc                  |   11 -
 src/s2/util/math/mathutil.h                   |   62 +
 src/s2/util/math/matrix3x3.h                  |  215 +-
 src/s2/util/math/vector.h                     |  299 +-
 src/s2/util/math/vector3_hash.h               |   54 -
 src/s2/util/units/physical-units.h            |   78 +-
 src/s2/value_lexicon.h                        |    3 +
 267 files changed, 18689 insertions(+), 10921 deletions(-)
 rename src/s2/{util/gtl/layout.h => base/commandlineflags_declare.h} (53%)
 delete mode 100644 src/s2/base/stringprintf.cc
 delete mode 100644 src/s2/base/stringprintf.h
 delete mode 100644 src/s2/base/strtoint.cc
 delete mode 100644 src/s2/base/strtoint.h
 create mode 100644 src/s2/s2buffer_operation.cc
 create mode 100644 src/s2/s2buffer_operation.h
 create mode 100644 src/s2/s2builderutil_get_snapped_winding_delta.cc
 create mode 100644 src/s2/s2builderutil_get_snapped_winding_delta.h
 create mode 100644 src/s2/s2builderutil_lax_polyline_layer.cc
 create mode 100644 src/s2/s2builderutil_lax_polyline_layer.h
 create mode 100644 src/s2/s2cell_iterator.h
 create mode 100644 src/s2/s2cell_iterator_join.h
 create mode 100644 src/s2/s2cell_iterator_testing.h
 create mode 100644 src/s2/s2cell_range_iterator.h
 create mode 100644 src/s2/s2coder.h
 create mode 100644 src/s2/s2coder_testing.h
 create mode 100644 src/s2/s2hausdorff_distance_query.cc
 create mode 100644 src/s2/s2hausdorff_distance_query.h
 create mode 100644 src/s2/s2memory_tracker.cc
 create mode 100644 src/s2/s2memory_tracker.h
 create mode 100644 src/s2/s2shape_nesting_query.cc
 create mode 100644 src/s2/s2shape_nesting_query.h
 create mode 100644 src/s2/s2shapeutil_conversion.cc
 create mode 100644 src/s2/s2shapeutil_conversion.h
 delete mode 100644 src/s2/s2shapeutil_range_iterator.cc
 delete mode 100644 src/s2/s2shapeutil_range_iterator.h
 create mode 100644 src/s2/s2shapeutil_testing.cc
 create mode 100644 src/s2/s2winding_operation.cc
 create mode 100644 src/s2/s2winding_operation.h
 create mode 100644 src/s2/s2wrapped_shape.h
 delete mode 100644 src/s2/strings/ostringstream.cc
 delete mode 100644 src/s2/strings/ostringstream.h
 delete mode 100644 src/s2/strings/serialize.cc
 delete mode 100644 src/s2/strings/serialize.h
 rename src/s2/{base/mutex.h => testing/gtest_prod.h} (66%)
 create mode 100644 src/s2/thread_testing.cc
 create mode 100644 src/s2/thread_testing.h
 create mode 100644 src/s2/util/bitmap/bitmap.h
 delete mode 100644 src/s2/util/bits/bits.cc
 delete mode 100644 src/s2/util/gtl/btree.h
 delete mode 100644 src/s2/util/gtl/btree_container.h
 delete mode 100644 src/s2/util/gtl/btree_map.h
 delete mode 100644 src/s2/util/gtl/btree_set.h
 delete mode 100644 src/s2/util/math/vector3_hash.h

diff --git a/data-raw/update-s2.R b/data-raw/update-s2.R
index 14f66d42..6e302254 100644
--- a/data-raw/update-s2.R
+++ b/data-raw/update-s2.R
@@ -2,7 +2,7 @@
 library(tidyverse)
 
 # download S2
-source_url <- "https://github.com/google/s2geometry/archive/v0.9.0.zip"
+source_url <- "https://github.com/google/s2geometry/archive/v0.11.1.zip"
 curl::curl_download(source_url, "data-raw/s2-source.tar.gz")
 unzip("data-raw/s2-source.tar.gz", exdir = "data-raw")
 
@@ -11,15 +11,13 @@ s2_dir <- list.files("data-raw", "^s2geometry-[0-9.]+", include.dirs = TRUE, ful
 stopifnot(dir.exists(s2_dir), length(s2_dir) == 1)
 src_dir <- file.path(s2_dir, "src/s2")
 
-# headers live in inst/include
-# keeping the directory structure means that
-# we don't have to update any source files (beause of header locations)
+# Process headers
 headers <- tibble(
   path = list.files(file.path(s2_dir, "src", "s2"), "\\.(h|inc)$", full.names = TRUE, recursive = TRUE),
-  final_path = str_replace(path, ".*?s2/", "inst/include/s2/")
+  final_path = str_replace(path, ".*?s2/", "src/s2/")
 )
 
-# Put S2 compilation units in src/s2/...
+# Process compilation units
 source_files <- tibble(
   path = list.files(file.path(s2_dir, "src", "s2"), "\\.cc$", full.names = TRUE, recursive = TRUE),
   final_path = str_replace(path, ".*?src/", "src/") %>%
@@ -29,7 +27,6 @@ source_files <- tibble(
 
 # clean current headers and source files
 unlink("src/s2", recursive = TRUE)
-unlink("inst/include/s2", recursive = TRUE)
 
 # create destination dirs
 dest_dirs <- c(
diff --git a/src/Makevars.in b/src/Makevars.in
index 2bc496ee..2be2c92c 100644
--- a/src/Makevars.in
+++ b/src/Makevars.in
@@ -111,16 +111,6 @@ OBJECTS = $(ABSL_LIBS) \
      s2-lnglat.o \
      s2-matrix.o \
      wk-impl.o \
-     s2geography/accessors.o \
-     s2geography/accessors-geog.o \
-     s2geography/linear-referencing.o \
-     s2geography/distance.o \
-     s2geography/build.o \
-     s2geography/coverings.o \
-     s2geography/geography.o \
-     s2geography/predicates.o \
-     s2/base/stringprintf.o \
-     s2/base/strtoint.o \
      s2/encoded_s2cell_id_vector.o \
      s2/encoded_s2point_vector.o \
      s2/encoded_s2shape_index.o \
@@ -132,11 +122,14 @@ OBJECTS = $(ABSL_LIBS) \
      s2/s1chord_angle.o \
      s2/s1interval.o \
      s2/s2boolean_operation.o \
+     s2/s2buffer_operation.o \
      s2/s2builder_graph.o \
      s2/s2builder.o \
      s2/s2builderutil_closed_set_normalizer.o \
      s2/s2builderutil_find_polygon_degeneracies.o \
+     s2/s2builderutil_get_snapped_winding_delta.o \
      s2/s2builderutil_lax_polygon_layer.o \
+     s2/s2builderutil_lax_polyline_layer.o \
      s2/s2builderutil_s2point_vector_layer.o \
      s2/s2builderutil_s2polygon_layer.o \
      s2/s2builderutil_s2polyline_layer.o \
@@ -165,6 +158,7 @@ OBJECTS = $(ABSL_LIBS) \
      s2/s2edge_tessellator.o \
      s2/s2error.o \
      s2/s2furthest_edge_query.o \
+     s2/s2hausdorff_distance_query.o \
      s2/s2latlng_rect_bounder.o \
      s2/s2latlng_rect.o \
      s2/s2latlng.o \
@@ -175,6 +169,7 @@ OBJECTS = $(ABSL_LIBS) \
      s2/s2loop.o \
      s2/s2max_distance_targets.o \
      s2/s2measures.o \
+     s2/s2memory_tracker.o \
      s2/s2metrics.o \
      s2/s2min_distance_targets.o \
      s2/s2padded_cell.o \
@@ -198,22 +193,31 @@ OBJECTS = $(ABSL_LIBS) \
      s2/s2shape_index_measures.o \
      s2/s2shape_index.o \
      s2/s2shape_measures.o \
+     s2/s2shape_nesting_query.o \
      s2/s2shapeutil_build_polygon_boundaries.o \
      s2/s2shapeutil_coding.o \
      s2/s2shapeutil_contains_brute_force.o \
+     s2/s2shapeutil_conversion.o \
      s2/s2shapeutil_edge_iterator.o \
      s2/s2shapeutil_get_reference_point.o \
-     s2/s2shapeutil_range_iterator.o \
+     s2/s2shapeutil_testing.o \
      s2/s2shapeutil_visit_crossing_edge_pairs.o \
      s2/s2testing.o \
      s2/s2text_format.o \
      s2/s2wedge_relations.o \
-     s2/strings/ostringstream.o \
-     s2/strings/serialize.o \
+     s2/s2winding_operation.o \
+     s2/thread_testing.o \
      s2/util/bits/bit-interleave.o \
-     s2/util/bits/bits.o \
      s2/util/coding/coder.o \
      s2/util/coding/varint.o \
      s2/util/math/exactfloat/exactfloat.o \
      s2/util/math/mathutil.o \
-     s2/util/units/length-units.o
+     s2/util/units/length-units.o \
+     s2geography/accessors-geog.o \
+     s2geography/accessors.o \
+     s2geography/build.o \
+     s2geography/coverings.o \
+     s2geography/distance.o \
+     s2geography/geography.o \
+     s2geography/linear-referencing.o \
+     s2geography/predicates.o
diff --git a/src/Makevars.win b/src/Makevars.win
index 31ed40b7..1edfafd1 100644
--- a/src/Makevars.win
+++ b/src/Makevars.win
@@ -115,8 +115,6 @@ S2LIBS = $(ABSL_LIBS) \
      s2geography/coverings.o \
      s2geography/geography.o \
      s2geography/predicates.o \
-     s2/base/stringprintf.o \
-     s2/base/strtoint.o \
      s2/encoded_s2cell_id_vector.o \
      s2/encoded_s2point_vector.o \
      s2/encoded_s2shape_index.o \
@@ -128,11 +126,14 @@ S2LIBS = $(ABSL_LIBS) \
      s2/s1chord_angle.o \
      s2/s1interval.o \
      s2/s2boolean_operation.o \
+     s2/s2buffer_operation.o \
      s2/s2builder_graph.o \
      s2/s2builder.o \
      s2/s2builderutil_closed_set_normalizer.o \
      s2/s2builderutil_find_polygon_degeneracies.o \
+     s2/s2builderutil_get_snapped_winding_delta.o \
      s2/s2builderutil_lax_polygon_layer.o \
+     s2/s2builderutil_lax_polyline_layer.o \
      s2/s2builderutil_s2point_vector_layer.o \
      s2/s2builderutil_s2polygon_layer.o \
      s2/s2builderutil_s2polyline_layer.o \
@@ -161,6 +162,7 @@ S2LIBS = $(ABSL_LIBS) \
      s2/s2edge_tessellator.o \
      s2/s2error.o \
      s2/s2furthest_edge_query.o \
+     s2/s2hausdorff_distance_query.o \
      s2/s2latlng_rect_bounder.o \
      s2/s2latlng_rect.o \
      s2/s2latlng.o \
@@ -171,6 +173,7 @@ S2LIBS = $(ABSL_LIBS) \
      s2/s2loop.o \
      s2/s2max_distance_targets.o \
      s2/s2measures.o \
+     s2/s2memory_tracker.o \
      s2/s2metrics.o \
      s2/s2min_distance_targets.o \
      s2/s2padded_cell.o \
@@ -194,20 +197,21 @@ S2LIBS = $(ABSL_LIBS) \
      s2/s2shape_index_measures.o \
      s2/s2shape_index.o \
      s2/s2shape_measures.o \
+     s2/s2shape_nesting_query.o \
      s2/s2shapeutil_build_polygon_boundaries.o \
      s2/s2shapeutil_coding.o \
      s2/s2shapeutil_contains_brute_force.o \
+     s2/s2shapeutil_conversion.o \
      s2/s2shapeutil_edge_iterator.o \
      s2/s2shapeutil_get_reference_point.o \
-     s2/s2shapeutil_range_iterator.o \
+     s2/s2shapeutil_testing.o \
      s2/s2shapeutil_visit_crossing_edge_pairs.o \
      s2/s2testing.o \
      s2/s2text_format.o \
      s2/s2wedge_relations.o \
-     s2/strings/ostringstream.o \
-     s2/strings/serialize.o \
+     s2/s2winding_operation.o \
+     s2/thread_testing.o \
      s2/util/bits/bit-interleave.o \
-     s2/util/bits/bits.o \
      s2/util/coding/coder.o \
      s2/util/coding/varint.o \
      s2/util/math/exactfloat/exactfloat.o \
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 24a5ac1b..6350151d 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -1344,12 +1344,12 @@ BEGIN_RCPP
 END_RCPP
 }
 
-RcppExport SEXP c_s2_geography_writer_new(SEXP, SEXP, SEXP, SEXP);
-RcppExport SEXP c_s2_handle_geography(SEXP, SEXP);
-RcppExport SEXP c_s2_handle_geography_tessellated(SEXP, SEXP);
-RcppExport SEXP c_s2_projection_mercator(SEXP);
-RcppExport SEXP c_s2_projection_orthographic(SEXP);
-RcppExport SEXP c_s2_projection_plate_carree(SEXP);
+RcppExport SEXP c_s2_geography_writer_new(void *, void *, void *, void *);
+RcppExport SEXP c_s2_handle_geography(void *, void *);
+RcppExport SEXP c_s2_handle_geography_tessellated(void *, void *);
+RcppExport SEXP c_s2_projection_mercator(void *);
+RcppExport SEXP c_s2_projection_orthographic(void *);
+RcppExport SEXP c_s2_projection_plate_carree(void *);
 RcppExport SEXP c_s2_trans_s2_lnglat_new(void);
 RcppExport SEXP c_s2_trans_s2_point_new(void);
 
diff --git a/src/s2/_fp_contract_off.h b/src/s2/_fp_contract_off.h
index a053c7a7..4f7e915d 100644
--- a/src/s2/_fp_contract_off.h
+++ b/src/s2/_fp_contract_off.h
@@ -38,16 +38,7 @@
 // appears before the first non-inline function definition.  It is
 // named with an underscore so that it is included first among the S2 headers.
 
-// TODO(compiler-team): Figure out how to do this in a portable way.
-#if defined(HAVE_ARMEABI_V7A)
-// Some android builds use a buggy compiler that runs out of memory while
-// parsing the pragma (--cpu=armeabi-v7a).
-
-#elif defined(__ANDROID__)
-// Other android builds use a buggy compiler that crashes with an internal
-// error (Android NDK R9).
-
-#elif defined(__clang__)
+#if defined(__clang__)
 // Clang supports the standard C++ pragma for turning off this optimization.
 #pragma STDC FP_CONTRACT OFF
 
diff --git a/src/s2/base/casts.h b/src/s2/base/casts.h
index 84880c7b..a227d234 100644
--- a/src/s2/base/casts.h
+++ b/src/s2/base/casts.h
@@ -13,7 +13,6 @@
 // limitations under the License.
 //
 
-
 //
 // Various Google-specific casting templates.
 //
@@ -25,12 +24,16 @@
 #ifndef S2_BASE_CASTS_H_
 #define S2_BASE_CASTS_H_
 
-#include <cassert>         // for use with down_cast<>
-#include <climits>         // for enumeration casts and tests
+#include <cassert>  // for use with down_cast<>
+#include <climits>  // for enumeration casts and tests
+
 #include <type_traits>
 
-#include "absl/base/casts.h"
-#include "absl/base/macros.h"
+#include "absl/base/casts.h"  // IWYU pragma: keep
+#include "absl/base/config.h"
+#include "absl/log/log.h"
+
+#include "s2/base/logging.h"
 
 // An "upcast", i.e. a conversion from a pointer to an object to a pointer to a
 // base subobject, always succeeds if the base is unambiguous and accessible,
@@ -44,10 +47,11 @@
 // downcast in a polymorphic type hierarchy, you should use the following
 // function template.
 //
-// In debug mode, we use dynamic_cast to double-check whether the downcast is
-// legal (we die if it's not). In normal mode, we do the efficient static_cast
-// instead. Thus, it's important to test in debug mode to make sure the cast is
-// legal!
+// This function never returns null. In debug mode, we use dynamic_cast to
+// double-check whether the downcast is legal (we die if it's not). In normal
+// mode, we do the efficient static_cast instead. Because the process will die
+// in debug mode, it's important to test to make sure the cast is legal before
+// calling this function!
 //
 // This is the only place in the codebase we should use dynamic_cast.
 // In particular, you should NOT use dynamic_cast for RTTI, e.g. for
@@ -56,14 +60,13 @@
 //    if (auto* p = dynamic_cast<Subclass2*>(foo)) HandleASubclass2Object(p);
 // You should design the code some other way not to need this.
 
-template<typename To, typename From>     // use like this: down_cast<T*>(foo);
-inline To down_cast(From* f) {           // so we only accept pointers
-  static_assert(
-      (std::is_base_of<From, typename std::remove_pointer<To>::type>::value),
-      "target type not derived from source type");
+template <typename To, typename From>  // use like this: down_cast<T*>(foo);
+inline To down_cast(From* f) {         // so we only accept pointers
+  static_assert((std::is_base_of<From, std::remove_pointer_t<To>>::value),
+                "target type not derived from source type");
 
-  // We skip the assert and hence the dynamic_cast if RTTI is disabled.
-#if !defined(__GNUC__) || defined(__GXX_RTTI)
+// We skip the assert and hence the dynamic_cast if RTTI is disabled.
+#if ABSL_INTERNAL_HAS_RTTI
   // Uses RTTI in dbg and fastbuild. asserts are disabled in opt builds.
   assert(f == nullptr || dynamic_cast<To>(f) != nullptr);
 #endif  // !defined(__GNUC__) || defined(__GXX_RTTI)
@@ -79,19 +82,17 @@ inline To down_cast(From* f) {           // so we only accept pointers
 // There's no need for a special const overload either for the pointer
 // or the reference form. If you call down_cast with a const T&, the
 // compiler will just bind From to const T.
-template<typename To, typename From>
+template <typename To, typename From>
 inline To down_cast(From& f) {
-  static_assert(
-      std::is_lvalue_reference<To>::value, "target type not a reference");
-  static_assert(
-      (std::is_base_of<From, typename std::remove_reference<To>::type>::value),
-      "target type not derived from source type");
+  static_assert(std::is_lvalue_reference<To>::value,
+                "target type not a reference");
+  static_assert((std::is_base_of<From, std::remove_reference_t<To>>::value),
+                "target type not derived from source type");
 
   // We skip the assert and hence the dynamic_cast if RTTI is disabled.
-#if !defined(__GNUC__) || defined(__GXX_RTTI)
+#if ABSL_INTERNAL_HAS_RTTI
   // RTTI: debug mode only
-  assert(dynamic_cast<typename std::remove_reference<To>::type*>(&f) !=
-         nullptr);
+  assert(dynamic_cast<std::remove_reference_t<To>*>(&f) != nullptr);
 #endif  // !defined(__GNUC__) || defined(__GXX_RTTI)
 
   return static_cast<To>(f);
@@ -111,7 +112,7 @@ inline To down_cast(From& f) {
 //   enum A { A_min = -18, A_max = 33 };
 //   MAKE_ENUM_LIMITS(A, A_min, A_max)
 //
-// Convert an int to an enum in one of two ways.  The prefered way is a
+// Convert an int to an enum in one of two ways.  The preferred way is a
 // tight conversion, which ensures that A_min <= value <= A_max.
 //
 //   A var = tight_enum_cast<A>(3);
@@ -159,25 +160,25 @@ inline To down_cast(From& f) {
 template <typename Enum>
 class enum_limits {
  public:
-  static const Enum min_enumerator = 0;
-  static const Enum max_enumerator = 0;
-  static const bool is_specialized = false;
+  static constexpr Enum min_enumerator = 0;
+  static constexpr Enum max_enumerator = 0;
+  static constexpr bool is_specialized = false;
 };
 
 // Now we define the macro to define the specialization for enum_limits.
 // The specialization checks that the enumerators fit within an int.
 // This checking relies on integral promotion.
 
-#define MAKE_ENUM_LIMITS(ENUM_TYPE, ENUM_MIN, ENUM_MAX) \
-template <> \
-class enum_limits<ENUM_TYPE> { \
-public: \
-  static const ENUM_TYPE min_enumerator = ENUM_MIN; \
-  static const ENUM_TYPE max_enumerator = ENUM_MAX; \
-  static const bool is_specialized = true; \
-  static_assert(ENUM_MIN >= INT_MIN, "enumerator too negative for int"); \
-  static_assert(ENUM_MAX <= INT_MAX, "enumerator too positive for int"); \
-};
+#define MAKE_ENUM_LIMITS(ENUM_TYPE, ENUM_MIN, ENUM_MAX)                    \
+  template <>                                                              \
+  class enum_limits<ENUM_TYPE> {                                           \
+   public:                                                                 \
+    static const ENUM_TYPE min_enumerator = ENUM_MIN;                      \
+    static const ENUM_TYPE max_enumerator = ENUM_MAX;                      \
+    static const bool is_specialized = true;                               \
+    static_assert(ENUM_MIN >= INT_MIN, "enumerator too negative for int"); \
+    static_assert(ENUM_MAX <= INT_MAX, "enumerator too positive for int"); \
+  };
 
 // The loose enum test/cast is actually the more complicated one,
 // because of the problem of finding the bounds.
@@ -217,7 +218,7 @@ inline bool loose_enum_test(int e_val) {
   // Find the unary bounding negative number of e_max.
   // This would be b_min = e_max < 0 ? e_max : ~e_max,
   // but we want to avoid branches to help the compiler.
-  int e_max_sign = e_max >> (sizeof(e_val)*8 - 1);
+  int e_max_sign = e_max >> (sizeof(e_val) * 8 - 1);
   int b_min = ~e_max_sign ^ e_max;
 
   // Find the binary bounding negative of both e_min and e_max.
@@ -241,7 +242,7 @@ inline bool loose_enum_test(int e_val) {
 
   // Find the binary bounding positive number of that
   // and the unary bounding positive number of e_min.
-  int e_min_sign = e_min >> (sizeof(e_val)*8 - 1);
+  int e_min_sign = e_min >> (sizeof(e_val) * 8 - 1);
   b_max |= e_min_sign ^ e_min;
 
   // Now set all bits right of the most significant set bit,
@@ -270,39 +271,27 @@ inline bool tight_enum_test(int e_val) {
 template <typename Enum>
 inline bool loose_enum_test_cast(int e_val, Enum* e_var) {
   if (loose_enum_test<Enum>(e_val)) {
-     *e_var = static_cast<Enum>(e_val);
-     return true;
+    *e_var = static_cast<Enum>(e_val);
+    return true;
   } else {
-     return false;
+    return false;
   }
 }
 
 template <typename Enum>
 inline bool tight_enum_test_cast(int e_val, Enum* e_var) {
   if (tight_enum_test<Enum>(e_val)) {
-     *e_var = static_cast<Enum>(e_val);
-     return true;
+    *e_var = static_cast<Enum>(e_val);
+    return true;
   } else {
-     return false;
+    return false;
   }
 }
 
-// The plain casts require logging, and we get header recursion if
-// it is done directly.  So, we do it indirectly.
-// The following function is defined in logging.cc.
-
-namespace base {
-namespace internal {
-
-void WarnEnumCastError(int value_of_int);
-
-}  // namespace internal
-}  // namespace base
-
 template <typename Enum>
 inline Enum loose_enum_cast(int e_val) {
   if (!loose_enum_test<Enum>(e_val)) {
-    base::internal::WarnEnumCastError(e_val);
+    S2_LOG(ERROR) << "enum_cast error for value " << e_val;
   }
   return static_cast<Enum>(e_val);
 }
@@ -310,7 +299,7 @@ inline Enum loose_enum_cast(int e_val) {
 template <typename Enum>
 inline Enum tight_enum_cast(int e_val) {
   if (!tight_enum_test<Enum>(e_val)) {
-    base::internal::WarnEnumCastError(e_val);
+    S2_LOG(ERROR) << "enum_cast error for value " << e_val;
   }
   return static_cast<Enum>(e_val);
 }
diff --git a/src/s2/base/commandlineflags.h b/src/s2/base/commandlineflags.h
index 1763be0e..8fa81e63 100644
--- a/src/s2/base/commandlineflags.h
+++ b/src/s2/base/commandlineflags.h
@@ -16,36 +16,26 @@
 #ifndef S2_BASE_COMMANDLINEFLAGS_H_
 #define S2_BASE_COMMANDLINEFLAGS_H_
 
-#ifdef S2_USE_GFLAGS
-
-#include <gflags/gflags.h>
-
-#else  // !defined(S2_USE_GFLAGS)
-
 #include <string>
 
+#include "absl/flags/flag.h"
+
+#include "s2/base/commandlineflags_declare.h"
 #include "s2/base/integral_types.h"
 
-#define DEFINE_bool(name, default_value, description) \
-  bool FLAGS_##name = default_value
-#define DECLARE_bool(name) \
-  extern bool FLAGS_##name
+#define S2_DEFINE_bool(name, default_value, description) \
+  ABSL_FLAG(bool, name, default_value, description)
 
-#define DEFINE_double(name, default_value, description) \
-  double FLAGS_##name = default_value
-#define DECLARE_double(name) \
-  extern double FLAGS_##name
+#define S2_DEFINE_double(name, default_value, description) \
+  ABSL_FLAG(double, name, default_value, description)
 
-#define DEFINE_int32(name, default_value, description) \
-  int32 FLAGS_##name = default_value
-#define DECLARE_int32(name) \
-  extern int32 FLAGS_##name
+#define S2_DEFINE_int32(name, default_value, description) \
+  ABSL_FLAG(int32, name, default_value, description)
 
-#define DEFINE_string(name, default_value, description) \
-  std::string FLAGS_##name = default_value
-#define DECLARE_string(name) \
-  extern std::string FLAGS_##name
+#define S2_DEFINE_int64(name, default_value, description) \
+  ABSL_FLAG(int64, name, default_value, description)
 
-#endif  // !defined(S2_USE_GFLAGS)
+#define S2_DEFINE_string(name, default_value, description) \
+  ABSL_FLAG(std::string, name, default_value, description)
 
 #endif  // S2_BASE_COMMANDLINEFLAGS_H_
diff --git a/src/s2/util/gtl/layout.h b/src/s2/base/commandlineflags_declare.h
similarity index 53%
rename from src/s2/util/gtl/layout.h
rename to src/s2/base/commandlineflags_declare.h
index fafb6ab2..42d80e0e 100644
--- a/src/s2/util/gtl/layout.h
+++ b/src/s2/base/commandlineflags_declare.h
@@ -13,16 +13,23 @@
 // limitations under the License.
 //
 
-#ifndef S2_UTIL_GTL_LAYOUT_H_
-#define S2_UTIL_GTL_LAYOUT_H_
+#ifndef S2_BASE_COMMANDLINEFLAGS_DECLARE_H_
+#define S2_BASE_COMMANDLINEFLAGS_DECLARE_H_
 
-#include "absl/container/internal/layout.h"
+#include <string>
 
-namespace gtl {
+#include "absl/flags/declare.h"
 
-using absl::container_internal::Aligned;
-using absl::container_internal::Layout;
+#include "s2/base/integral_types.h"
 
-}  // namespace gtl
+#define S2_DECLARE_bool(name) ABSL_DECLARE_FLAG(bool, name)
 
-#endif  // S2_UTIL_GTL_LAYOUT_H_
+#define S2_DECLARE_double(name) ABSL_DECLARE_FLAG(double, name)
+
+#define S2_DECLARE_int32(name) ABSL_DECLARE_FLAG(int32, name)
+
+#define S2_DECLARE_int64(name) ABSL_DECLARE_FLAG(int64, name)
+
+#define S2_DECLARE_string(name) ABSL_DECLARE_FLAG(std::string, name)
+
+#endif  // S2_BASE_COMMANDLINEFLAGS_DECLARE_H_
diff --git a/src/s2/base/integral_types.h b/src/s2/base/integral_types.h
index d20f35f5..48449631 100644
--- a/src/s2/base/integral_types.h
+++ b/src/s2/base/integral_types.h
@@ -16,6 +16,7 @@
 #ifndef S2_BASE_INTEGRAL_TYPES_H_
 #define S2_BASE_INTEGRAL_TYPES_H_
 
+// NOLINTBEGIN(runtime/int)
 using int8 = signed char;
 using int16 = short;
 using int32 = int;
@@ -27,5 +28,6 @@ using uint32 = unsigned int;
 using uint64 = unsigned long long;
 
 using uword_t = unsigned long;
+// NOLINTEND(runtime/int)
 
 #endif  // S2_BASE_INTEGRAL_TYPES_H_
diff --git a/src/s2/base/log_severity.h b/src/s2/base/log_severity.h
index b0e9de31..d4644080 100644
--- a/src/s2/base/log_severity.h
+++ b/src/s2/base/log_severity.h
@@ -16,12 +16,6 @@
 #ifndef S2_BASE_LOG_SEVERITY_H_
 #define S2_BASE_LOG_SEVERITY_H_
 
-#ifdef S2_USE_GLOG
-
-#include <glog/log_severity.h>
-
-#else  // !defined(S2_USE_GLOG)
-
 #include "absl/base/log_severity.h"
 
 // Stay compatible with glog.
@@ -35,6 +29,4 @@ constexpr bool DEBUG_MODE = true;
 
 }  // namespace google
 
-#endif  // !defined(S2_USE_GLOG)
-
 #endif  // S2_BASE_LOG_SEVERITY_H_
diff --git a/src/s2/base/logging.h b/src/s2/base/logging.h
index d32b6118..aac48a0a 100644
--- a/src/s2/base/logging.h
+++ b/src/s2/base/logging.h
@@ -15,18 +15,21 @@
 
 #ifndef S2_BASE_LOGGING_H_
 #define S2_BASE_LOGGING_H_
-#include "cpp-compat.h"
 
-#ifdef S2_USE_GLOG
-
-#include <glog/logging.h>
+// TODO(user): Get rid of `base/logging.h` includes and
+// include the relevant absl file directly instead.
+#include "absl/log/check.h"
+#include "absl/log/log.h"
+#include "s2/base/log_severity.h"
 
 // The names CHECK, etc. are too common and may conflict with other
 // packages.  We use S2_CHECK to make it easier to switch to
-// something other than GLOG for logging.
+// something other than abseil-cpp for logging.
+// TODO(user): Remove these or make absl::log optional.
 
 #define S2_LOG LOG
 #define S2_LOG_IF LOG_IF
+#define S2_DLOG DLOG
 #define S2_DLOG_IF DLOG_IF
 
 #define S2_CHECK CHECK
@@ -45,133 +48,14 @@
 #define S2_DCHECK_GT DCHECK_GT
 #define S2_DCHECK_GE DCHECK_GE
 
-#define S2_VLOG VLOG
-#define S2_VLOG_IS_ON VLOG_IS_ON
-
-#else  // !defined(S2_USE_GLOG)
-
-#include <iostream>
-
-#include "s2/base/log_severity.h"
-#include "absl/base/attributes.h"
-#include "absl/base/log_severity.h"
-
-class S2LogMessage {
- public:
-  S2LogMessage(const char* file, int line,
-               absl::LogSeverity severity, std::ostream& stream)
-    : severity_(severity), stream_(stream) {
-    if (enabled()) {
-      stream_ << file << ":" << line << " "
-              << absl::LogSeverityName(severity) << " ";
-    }
-  }
-  ~S2LogMessage() { if (enabled()) stream_ << std::endl; }
-
-  std::ostream& stream() { return stream_; }
-
-  // silences an 'unused member' compiler warning
-  absl::LogSeverity severity() { return severity_; }
-
- private:
-  bool enabled() const {
-#ifdef ABSL_MIN_LOG_LEVEL
-    return (static_cast<int>(severity_) >= ABSL_MIN_LOG_LEVEL ||
-            severity_ >= absl::LogSeverity::kFatal);
-#else
-    return true;
-#endif
-  }
-
-  absl::LogSeverity severity_;
-  std::ostream& stream_;
-};
-
-// Same as S2LogMessage, but destructor is marked no-return to avoid
-// "no return value warnings" in functions that return non-void.
-class S2FatalLogMessage : public S2LogMessage {
- public:
-  S2FatalLogMessage(const char* file, int line,
-                    absl::LogSeverity severity, std::ostream& stream)
-      ABSL_ATTRIBUTE_COLD
-    : S2LogMessage(file, line, severity, stream) {}
-  ABSL_ATTRIBUTE_NORETURN ~S2FatalLogMessage() { cpp_compat_abort(); }
-};
-
 // Logging stream that does nothing.
 struct S2NullStream {
   template <typename T>
   S2NullStream& operator<<(const T& v) { return *this; }
 };
 
-// Used to suppress "unused value" warnings.
-struct S2LogMessageVoidify {
-  // Must have precedence lower than << but higher than ?:.
-  void operator&(std::ostream&) {}
-};
-
-#define S2_LOG_MESSAGE_(LogMessageClass, log_severity) \
-    LogMessageClass(__FILE__, __LINE__, log_severity, cpp_compat_cerr)
-#define S2_LOG_INFO \
-    S2_LOG_MESSAGE_(S2LogMessage, absl::LogSeverity::kInfo)
-#define S2_LOG_WARNING \
-    S2_LOG_MESSAGE_(S2LogMessage, absl::LogSeverity::kWarning)
-#define S2_LOG_ERROR \
-    S2_LOG_MESSAGE_(S2LogMessage, absl::LogSeverity::kError)
-#define S2_LOG_FATAL \
-    S2_LOG_MESSAGE_(S2FatalLogMessage, absl::LogSeverity::kFatal)
-#ifndef NDEBUG
-#define S2_LOG_DFATAL S2_LOG_FATAL
-#else
-#define S2_LOG_DFATAL S2_LOG_ERROR
-#endif
-
-#define S2_LOG(severity) S2_LOG_##severity.stream()
-
-// Implementing this as if (...) {} else S2_LOG(...) will cause dangling else
-// warnings when someone does if (...) S2_LOG_IF(...), so do this tricky
-// thing instead.
-#define S2_LOG_IF(severity, condition) \
-    !(condition) ? (void)0 : S2LogMessageVoidify() & S2_LOG(severity)
-
-#define S2_CHECK(condition) \
-    S2_LOG_IF(FATAL, ABSL_PREDICT_FALSE(!(condition))) \
-        << ("Check failed: " #condition " ")
-
-#ifndef NDEBUG
-
-#define S2_DLOG_IF S2_LOG_IF
-#define S2_DCHECK S2_CHECK
-
-#else  // defined(NDEBUG)
-
-#define S2_DLOG_IF(severity, condition) \
-    while (false && (condition)) S2NullStream()
-#define S2_DCHECK(condition) \
-    while (false && (condition)) S2NullStream()
-
-#endif  // defined(NDEBUG)
-
-#define S2_CHECK_OP(op, val1, val2) S2_CHECK((val1) op (val2))
-#define S2_CHECK_EQ(val1, val2) S2_CHECK_OP(==, val1, val2)
-#define S2_CHECK_NE(val1, val2) S2_CHECK_OP(!=, val1, val2)
-#define S2_CHECK_LT(val1, val2) S2_CHECK_OP(<, val1, val2)
-#define S2_CHECK_LE(val1, val2) S2_CHECK_OP(<=, val1, val2)
-#define S2_CHECK_GT(val1, val2) S2_CHECK_OP(>, val1, val2)
-#define S2_CHECK_GE(val1, val2) S2_CHECK_OP(>=, val1, val2)
-
-#define S2_DCHECK_OP(op, val1, val2) S2_DCHECK((val1) op (val2))
-#define S2_DCHECK_EQ(val1, val2) S2_DCHECK_OP(==, val1, val2)
-#define S2_DCHECK_NE(val1, val2) S2_DCHECK_OP(!=, val1, val2)
-#define S2_DCHECK_LT(val1, val2) S2_DCHECK_OP(<, val1, val2)
-#define S2_DCHECK_LE(val1, val2) S2_DCHECK_OP(<=, val1, val2)
-#define S2_DCHECK_GT(val1, val2) S2_DCHECK_OP(>, val1, val2)
-#define S2_DCHECK_GE(val1, val2) S2_DCHECK_OP(>=, val1, val2)
-
-// We don't support VLOG.
+// Abseil-cpp doesn't support VLOG yet.  Make VLOG a no-op.
 #define S2_VLOG(verbose_level) S2NullStream()
 #define S2_VLOG_IS_ON(verbose_level) (false)
 
-#endif  // !defined(S2_USE_GLOG)
-
 #endif  // S2_BASE_LOGGING_H_
diff --git a/src/s2/base/port.h b/src/s2/base/port.h
index 10724712..8d7448f3 100644
--- a/src/s2/base/port.h
+++ b/src/s2/base/port.h
@@ -17,293 +17,39 @@
 #define S2_BASE_PORT_H_
 
 // This file contains things that are not used in third_party/absl but needed by
-// - Platform specific requirement
-//   - MSVC
-// - Utility macros
+// s2geometry. It is structed into the following high-level categories:
 // - Endianness
-// - Hash
-// - Global variables
-// - Type alias
-// - Predefined system/language macros
-// - Predefined system/language functions
 // - Performance optimization (alignment)
-// - Obsolete
 
-#include <cassert>
-#include <climits>
-#include <cstdlib>
 #include <cstring>
 
 #include "s2/base/integral_types.h"
-#include "absl/base/config.h"
-#include "absl/base/port.h"
-
-#ifdef SWIG
-%include "third_party/absl/base/port.h"
-#endif
-
-// -----------------------------------------------------------------------------
-// MSVC Specific Requirements
-// -----------------------------------------------------------------------------
-
-#ifdef _MSC_VER /* if Visual C++ */
-
-#include <winsock2.h>  // Must come before <windows.h>
-#include <intrin.h>
-#include <process.h>  // _getpid()
-#include <windows.h>
-#undef ERROR
-#undef DELETE
-#undef DIFFERENCE
-#define STDIN_FILENO 0
-#define STDOUT_FILENO 1
-#define STDERR_FILENO 2
-#define S_IRUSR 00400
-#define S_IWUSR 00200
-#define S_IXUSR 00100
-#define S_IRGRP 00040
-#define S_IWGRP 00020
-#define S_IXGRP 00010
-#define S_IROTH 00004
-#define S_IWOTH 00002
-#define S_IXOTH 00001
-
-// This compiler flag can be easily overlooked on MSVC.
-// _CHAR_UNSIGNED gets set with the /J flag.
-#ifndef _CHAR_UNSIGNED
-#error chars must be unsigned!  Use the /J flag on the compiler command line.  // NOLINT
-#endif
-
-// Allow comparisons between signed and unsigned values.
-//
-// Lots of Google code uses this pattern:
-//   for (int i = 0; i < container.size(); ++i)
-// Since size() returns an unsigned value, this warning would trigger
-// frequently.  Very few of these instances are actually bugs since containers
-// rarely exceed MAX_INT items.  Unfortunately, there are bugs related to
-// signed-unsigned comparisons that have been missed because we disable this
-// warning.  For example:
-//   const long stop_time = os::GetMilliseconds() + kWaitTimeoutMillis;
-//   while (os::GetMilliseconds() <= stop_time) { ... }
-#pragma warning(disable : 4018)  // level 3
-#pragma warning(disable : 4267)  // level 3
-
-// Don't warn about unused local variables.
-//
-// extension to silence particular instances of this warning.  There's no way
-// to define ABSL_ATTRIBUTE_UNUSED to quiet particular instances of this warning
-// in VC++, so we disable it globally.  Currently, there aren't many false
-// positives, so perhaps we can address those in the future and re-enable these
-// warnings, which sometimes catch real bugs.
-#pragma warning(disable : 4101)  // level 3
-
-// Allow initialization and assignment to a smaller type without warnings about
-// possible loss of data.
-//
-// There is a distinct warning, 4267, that warns about size_t conversions to
-// smaller types, but we don't currently disable that warning.
-//
-// Correct code can be written in such a way as to avoid false positives
-// by making the conversion explicit, but Google code isn't usually that
-// verbose.  There are too many false positives to address at this time.  Note
-// that this warning triggers at levels 2, 3, and 4 depending on the specific
-// type of conversion.  By disabling it, we not only silence minor narrowing
-// conversions but also serious ones.
-#pragma warning(disable : 4244)  // level 2, 3, and 4
-
-// Allow silent truncation of double to float.
-//
-// Silencing this warning has caused us to miss some subtle bugs.
-#pragma warning(disable : 4305)  // level 1
-
-// Allow a constant to be assigned to a type that is too small.
-//
-// I don't know why we allow this at all.  I can't think of a case where this
-// wouldn't be a bug, but enabling the warning breaks many builds today.
-#pragma warning(disable : 4307)  // level 2
-
-// Allow passing the this pointer to an initializer even though it refers
-// to an uninitialized object.
-//
-// Some observer implementations rely on saving the this pointer.  Those are
-// safe because the pointer is not dereferenced until after the object is fully
-// constructed.  This could however, obscure other instances.  In the future, we
-// should look into disabling this warning locally rather globally.
-#pragma warning(disable : 4355)  // level 1 and 4
-
-// Allow implicit coercion from an integral type to a bool.
-//
-// These could be avoided by making the code more explicit, but that's never
-// been the style here, so there would be many false positives.  It's not
-// obvious if a true positive would ever help to find an actual bug.
-#pragma warning(disable : 4800)  // level 3
-
-#endif  // _MSC_VER
-
-// -----------------------------------------------------------------------------
-// Utility Macros
-// -----------------------------------------------------------------------------
-
-// OS_IOS
-#if defined(__APPLE__)
-// Currently, blaze supports iOS yet doesn't define a flag. Mac users have
-// traditionally defined OS_IOS themselves via other build systems, since mac
-// hasn't been supported by blaze.
-// TODO(user): Remove this when all toolchains make the proper defines.
-#include <TargetConditionals.h>
-#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
-#ifndef OS_IOS
-#define OS_IOS 1
-#endif
-#endif  // defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
-#endif  // defined(__APPLE__)
-
-// __GLIBC_PREREQ
-#if defined __linux__
-// GLIBC-related macros.
-#include <features.h>
-
-#ifndef __GLIBC_PREREQ
-#define __GLIBC_PREREQ(a, b) 0  // not a GLIBC system
-#endif
-#endif  // __linux__
-
-// STATIC_ANALYSIS
-// Klocwork static analysis tool's C/C++ complier kwcc
-#if defined(__KLOCWORK__)
-#define STATIC_ANALYSIS
-#endif  // __KLOCWORK__
-
-// SIZEOF_MEMBER, OFFSETOF_MEMBER
-#define SIZEOF_MEMBER(t, f) sizeof(reinterpret_cast<t *>(4096)->f)
-
-#define OFFSETOF_MEMBER(t, f)                                  \
-  (reinterpret_cast<char *>(&(reinterpret_cast<t *>(16)->f)) - \
-   reinterpret_cast<char *>(16))
-
-// LANG_CXX11
-// GXX_EXPERIMENTAL_CXX0X is defined by gcc and clang up to at least
-// gcc-4.7 and clang-3.1 (2011-12-13).  __cplusplus was defined to 1
-// in gcc before 4.7 (Crosstool 16) and clang before 3.1, but is
-// defined according to the language version in effect thereafter.
-// Microsoft Visual Studio 14 (2015) sets __cplusplus==199711 despite
-// reasonably good C++11 support, so we set LANG_CXX for it and
-// newer versions (_MSC_VER >= 1900).
-#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \
-     (defined(_MSC_VER) && _MSC_VER >= 1900))
-// DEPRECATED: Do not key off LANG_CXX11. Instead, write more accurate condition
-// that checks whether the C++ feature you need is available or missing, and
-// define a more specific feature macro (GOOGLE_HAVE_FEATURE_FOO). You can check
-// http://en.cppreference.com/w/cpp/compiler_support for compiler support on C++
-// features.
-// Define this to 1 if the code is compiled in C++11 mode; leave it
-// undefined otherwise.  Do NOT define it to 0 -- that causes
-// '#ifdef LANG_CXX11' to behave differently from '#if LANG_CXX11'.
-#define LANG_CXX11 1
-#endif
-
-// This sanity check can be removed when all references to
-// LANG_CXX11 is removed from the code base.
-#if defined(__cplusplus) && !defined(LANG_CXX11) && !defined(SWIG)
-#error "LANG_CXX11 is required."
-#endif
-
-// GOOGLE_OBSCURE_SIGNAL
-#if defined(__APPLE__)
-// No SIGPWR on MacOSX.  SIGINFO seems suitably obscure.
-#define GOOGLE_OBSCURE_SIGNAL SIGINFO
-#else
-/* We use SIGPWR since that seems unlikely to be used for other reasons. */
-#define GOOGLE_OBSCURE_SIGNAL SIGPWR
-#endif
-
-// ABSL_FUNC_PTR_TO_CHAR_PTR
-// On some platforms, a "function pointer" points to a function descriptor
-// rather than directly to the function itself.
-// Use ABSL_FUNC_PTR_TO_CHAR_PTR(func) to get a char-pointer to the first
-// instruction of the function func.
-// TODO(b/30407660): Move this macro into Abseil when symbolizer is released in
-// Abseil.
-#if defined(__cplusplus)
-#if (defined(__powerpc__) && !(_CALL_ELF > 1)) || defined(__ia64)
-// use opd section for function descriptors on these platforms, the function
-// address is the first word of the descriptor
-namespace absl {
-enum { kPlatformUsesOPDSections = 1 };
-}  // namespace absl
-#define ABSL_FUNC_PTR_TO_CHAR_PTR(func) (reinterpret_cast<char **>(func)[0])
-#else  // not PPC or IA64
-namespace absl {
-enum { kPlatformUsesOPDSections = 0 };
-}  // namespace absl
-#define ABSL_FUNC_PTR_TO_CHAR_PTR(func) (reinterpret_cast<char *>(func))
-#endif  // PPC or IA64
-#endif  // __cplusplus
-
-// -----------------------------------------------------------------------------
-// Utility Functions
-// -----------------------------------------------------------------------------
-
-// sized_delete
-#ifdef __cplusplus
-namespace base {
-// We support C++14's sized deallocation for all C++ builds,
-// though for other toolchains, we fall back to using delete.
-inline void sized_delete(void *ptr, size_t size) {
-#ifdef GOOGLE_HAVE_SIZED_DELETE
-  ::operator delete(ptr, size);
-#else
-  (void)size;
-  ::operator delete(ptr);
-#endif  // GOOGLE_HAVE_SIZED_DELETE
-}
-
-inline void sized_delete_array(void *ptr, size_t size) {
-#ifdef GOOGLE_HAVE_SIZED_DELETEARRAY
-  ::operator delete[](ptr, size);
-#else
-  (void) size;
-  ::operator delete[](ptr);
-#endif
-}
-}  // namespace base
-#endif  // __cplusplus
 
 // -----------------------------------------------------------------------------
 // Endianness
 // -----------------------------------------------------------------------------
 
 // IS_LITTLE_ENDIAN, IS_BIG_ENDIAN
-
-// Allow compiler -D defines to override detection here
-// which occasionally fails (e.g., on CRAN Solaris)
-#if defined(IS_LITTLE_ENDIAN)
-#undef IS_BIG_ENDIAN
-#elif defined(IS_BIG_ENDIAN)
-#undef IS_LITTLE_ENDIAN
-#else
-
-#if defined __linux__ || defined OS_ANDROID || defined(__ANDROID__)
-// TODO(user): http://b/21460321; use one of OS_ANDROID or __ANDROID__.
-// _BIG_ENDIAN
+#if defined(__linux__) || defined(__ANDROID__)
 #include <endian.h>
 
 #elif defined(__APPLE__)
 
 // BIG_ENDIAN
 #include <machine/endian.h>  // NOLINT(build/include)
+
 /* Let's try and follow the Linux convention */
-#define __BYTE_ORDER  BYTE_ORDER
+#define __BYTE_ORDER BYTE_ORDER
 #define __LITTLE_ENDIAN LITTLE_ENDIAN
 #define __BIG_ENDIAN BIG_ENDIAN
 
 #endif
 
-// defines __BYTE_ORDER for MSVC
-#ifdef _MSC_VER
+// defines __BYTE_ORDER
+#ifdef _WIN32
 #define __BYTE_ORDER __LITTLE_ENDIAN
 #define IS_LITTLE_ENDIAN
-#else
+#else // _WIN32
 
 // define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
 // using the above endian definitions from endian.h if
@@ -326,317 +72,7 @@ inline void sized_delete_array(void *ptr, size_t size) {
 #endif
 
 #endif  // __BYTE_ORDER
-#endif  // _MSC_VER
-#endif // #if defined(IS_LITTLE_ENDIAN) ... #else
-
-// byte swap functions (bswap_16, bswap_32, bswap_64).
-
-// The following guarantees declaration of the byte swap functions
-#ifdef _MSC_VER
-#include <cstdlib>  // NOLINT(build/include)
-#define bswap_16(x) _byteswap_ushort(x)
-#define bswap_32(x) _byteswap_ulong(x)
-#define bswap_64(x) _byteswap_uint64(x)
-
-#elif defined(__APPLE__)
-// Mac OS X / Darwin features
-#include <libkern/OSByteOrder.h>
-#define bswap_16(x) OSSwapInt16(x)
-#define bswap_32(x) OSSwapInt32(x)
-#define bswap_64(x) OSSwapInt64(x)
-
-#elif defined(__GLIBC__) || defined(__BIONIC__) || defined(__ASYLO__)
-#include <byteswap.h>  // IWYU pragma: export
-
-#else
-
-static inline uint16 bswap_16(uint16 x) {
-#ifdef __cplusplus
-  return static_cast<uint16>(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8));
-#else
-  return (uint16)(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8));  // NOLINT
-#endif  // __cplusplus
-}
-#define bswap_16(x) bswap_16(x)
-static inline uint32 bswap_32(uint32 x) {
-  return (((x & 0xFF) << 24) |
-          ((x & 0xFF00) << 8) |
-          ((x & 0xFF0000) >> 8) |
-          ((x & 0xFF000000) >> 24));
-}
-#define bswap_32(x) bswap_32(x)
-static inline uint64 bswap_64(uint64 x) {
-  return (((x & 0xFFULL) << 56) |
-          ((x & 0xFF00ULL) << 40) |
-          ((x & 0xFF0000ULL) << 24) |
-          ((x & 0xFF000000ULL) << 8) |
-          ((x & 0xFF00000000ULL) >> 8) |
-          ((x & 0xFF0000000000ULL) >> 24) |
-          ((x & 0xFF000000000000ULL) >> 40) |
-          ((x & 0xFF00000000000000ULL) >> 56));
-}
-#define bswap_64(x) bswap_64(x)
-
-#endif
-
-// -----------------------------------------------------------------------------
-// Hash
-// -----------------------------------------------------------------------------
-
-#ifdef __cplusplus
-#ifdef STL_MSVC  // not always the same as _MSC_VER
-#include "absl/base/internal/port_hash.inc"
-#else
-struct PortableHashBase {};
-#endif  // STL_MSVC
-#endif  // __cplusplus
-
-// -----------------------------------------------------------------------------
-// Global Variables
-// -----------------------------------------------------------------------------
-
-// PATH_SEPARATOR
-// Define the OS's path separator
-//
-// NOTE: Assuming the path separator at compile time is discouraged.
-// Prefer instead to be tolerant of both possible separators whenever possible.
-#ifdef __cplusplus  // C won't merge duplicate const variables at link time
-// Some headers provide a macro for this (GCC's system.h), remove it so that we
-// can use our own.
-#undef PATH_SEPARATOR
-#if defined(_WIN32)
-const char PATH_SEPARATOR = '\\';
-#else
-const char PATH_SEPARATOR = '/';
 #endif  // _WIN32
-#endif  // __cplusplus
-
-// -----------------------------------------------------------------------------
-// Type Alias
-// -----------------------------------------------------------------------------
-
-// uint, ushort, ulong
-#if defined __linux__
-// The uint mess:
-// mysql.h sets _GNU_SOURCE which sets __USE_MISC in <features.h>
-// sys/types.h typedefs uint if __USE_MISC
-// mysql typedefs uint if HAVE_UINT not set
-// The following typedef is carefully considered, and should not cause
-//  any clashes
-#if !defined(__USE_MISC)
-#if !defined(HAVE_UINT)
-#define HAVE_UINT 1
-typedef unsigned int uint;
-#endif  // !HAVE_UINT
-#if !defined(HAVE_USHORT)
-#define HAVE_USHORT 1
-typedef unsigned short ushort;  // NOLINT
-#endif  // !HAVE_USHORT
-#if !defined(HAVE_ULONG)
-#define HAVE_ULONG 1
-typedef unsigned long ulong;  // NOLINT
-#endif  // !HAVE_ULONG
-#endif  // !__USE_MISC
-
-#endif  // __linux__
-
-#ifdef _MSC_VER /* if Visual C++ */
-// VC++ doesn't understand "uint"
-#ifndef HAVE_UINT
-#define HAVE_UINT 1
-typedef unsigned int uint;
-#endif  // !HAVE_UINT
-#endif  // _MSC_VER
-
-#ifdef _MSC_VER
-// uid_t
-// MSVC doesn't have uid_t
-typedef int uid_t;
-
-// pid_t
-// Defined all over the place.
-typedef int pid_t;
-#endif  // _MSC_VER
-
-// mode_t
-#ifdef _MSC_VER
-// From stat.h
-typedef unsigned int mode_t;
-#endif  // _MSC_VER
-
-// sig_t
-#ifdef _MSC_VER
-typedef void (*sig_t)(int);
-#endif  // _MSC_VER
-
-// u_int16_t, int16_t
-#ifdef _MSC_VER
-// u_int16_t, int16_t don't exist in MSVC
-typedef unsigned short u_int16_t;  // NOLINT
-typedef short int16_t;             // NOLINT
-#endif                             // _MSC_VER
-
-// using std::hash
-#ifdef _MSC_VER
-#ifdef __cplusplus
-// Define a minimal set of things typically available in the global
-// namespace in Google code.  ::string is handled elsewhere, and uniformly
-// for all targets.
-#include <functional>
-using std::hash;
-#endif  // __cplusplus
-#endif  // _MSC_VER
-
-// printf macros
-// __STDC_FORMAT_MACROS must be defined before inttypes.h inclusion */
-#if defined(__APPLE__)
-/* From MacOSX's inttypes.h:
- * "C++ implementations should define these macros only when
- *  __STDC_FORMAT_MACROS is defined before <inttypes.h> is included." */
-#ifndef __STDC_FORMAT_MACROS
-#define __STDC_FORMAT_MACROS
-#endif /* __STDC_FORMAT_MACROS */
-#endif /* __APPLE__ */
-
-// printf macros for size_t, in the style of inttypes.h
-#if defined(_LP64) || defined(__APPLE__)
-#define __PRIS_PREFIX "z"
-#else
-#define __PRIS_PREFIX
-#endif
-
-// Use these macros after a % in a printf format string
-// to get correct 32/64 bit behavior, like this:
-// size_t size = records.size();
-// printf("%" PRIuS "\n", size);
-#define PRIdS __PRIS_PREFIX "d"
-#define PRIxS __PRIS_PREFIX "x"
-#define PRIuS __PRIS_PREFIX "u"
-#define PRIXS __PRIS_PREFIX "X"
-#define PRIoS __PRIS_PREFIX "o"
-
-#define GPRIuPTHREAD "lu"
-#define GPRIxPTHREAD "lx"
-#if defined(__APPLE__)
-#define PRINTABLE_PTHREAD(pthreadt) reinterpret_cast<uintptr_t>(pthreadt)
-#else
-#define PRINTABLE_PTHREAD(pthreadt) pthreadt
-#endif
-
-#ifdef PTHREADS_REDHAT_WIN32
-#include <pthread.h>  // NOLINT(build/include)
-#include <iosfwd>     // NOLINT(build/include)
-// pthread_t is not a simple integer or pointer on Win32
-std::ostream &operator<<(std::ostream &out, const pthread_t &thread_id);
-#endif
-
-// -----------------------------------------------------------------------------
-// Predefined System/Language Macros
-// -----------------------------------------------------------------------------
-
-// EXFULL
-#if defined(__APPLE__)
-// Linux has this in <linux/errno.h>
-#define EXFULL ENOMEM  // not really that great a translation...
-#endif                 // __APPLE__
-#ifdef _MSC_VER
-// This actually belongs in errno.h but there's a name conflict in errno
-// on WinNT. They (and a ton more) are also found in Winsock2.h, but
-// if'd out under NT. We need this subset at minimum.
-#define EXFULL ENOMEM  // not really that great a translation...
-#endif                 // _MSC_VER
-
-// MSG_NOSIGNAL
-#if defined(__APPLE__)
-// Doesn't exist on OSX.
-#define MSG_NOSIGNAL 0
-#endif  // __APPLE__
-
-// __ptr_t
-#if defined(__APPLE__)
-// Linux has this in <sys/cdefs.h>
-#define __ptr_t void *
-#endif  // __APPLE__
-#ifdef _MSC_VER
-// From glob.h
-#define __ptr_t void *
-#endif
-
-// HUGE_VALF
-#ifdef _MSC_VER
-#include <cmath>  // for HUGE_VAL
-
-#ifndef HUGE_VALF
-#define HUGE_VALF (static_cast<float>(HUGE_VAL))
-#endif
-#endif  // _MSC_VER
-
-// MAP_ANONYMOUS
-#if defined(__APPLE__)
-// For mmap, Linux defines both MAP_ANONYMOUS and MAP_ANON and says MAP_ANON is
-// deprecated. In Darwin, MAP_ANON is all there is.
-#if !defined MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif  // !MAP_ANONYMOUS
-#endif  // __APPLE__
-
-// PATH_MAX
-// You say tomato, I say atotom
-#ifdef _MSC_VER
-#define PATH_MAX MAX_PATH
-#endif
-
-// -----------------------------------------------------------------------------
-// Predefined System/Language Functions
-// -----------------------------------------------------------------------------
-
-// strtoq, strtouq, atoll
-#ifdef _MSC_VER
-#define strtoq _strtoi64
-#define strtouq _strtoui64
-#define atoll _atoi64
-#endif  // _MSC_VER
-
-#ifdef _MSC_VER
-// You say tomato, I say _tomato
-#define strcasecmp _stricmp
-#define strncasecmp _strnicmp
-#define strdup _strdup
-#define tempnam _tempnam
-#define chdir _chdir
-#define getpid _getpid
-#define getcwd _getcwd
-#define putenv _putenv
-#define timezone _timezone
-#define tzname _tzname
-#endif  // _MSC_VER
-
-// random, srandom
-#ifdef _MSC_VER
-// You say tomato, I say toma
-inline int random() { return rand(); }
-inline void srandom(unsigned int seed) { srand(seed); }
-#endif  // _MSC_VER
-
-// bcopy, bzero
-#ifdef _MSC_VER
-// You say juxtapose, I say transpose
-#define bcopy(s, d, n) memcpy(d, s, n)
-// Really from <string.h>
-inline void bzero(void *s, int n) { memset(s, 0, n); }
-#endif  // _MSC_VER
-
-// gethostbyname
-#if defined(_WIN32) || defined(__APPLE__)
-// gethostbyname() *is* thread-safe for Windows native threads. It is also
-// safe on Mac OS X and iOS, where it uses thread-local storage, even though the
-// manpages claim otherwise. For details, see
-// http://lists.apple.com/archives/Darwin-dev/2006/May/msg00008.html
-#else
-// gethostbyname() is not thread-safe.  So disallow its use.  People
-// should either use the HostLookup::Lookup*() methods, or gethostbyname_r()
-#define gethostbyname gethostbyname_is_not_thread_safe_DO_NOT_USE
-#endif
 
 // -----------------------------------------------------------------------------
 // Performance Optimization
@@ -646,368 +82,69 @@ inline void bzero(void *s, int n) { memset(s, 0, n); }
 
 // Unaligned APIs
 
-// Portable handling of unaligned loads, stores, and copies.
-// On some platforms, like ARM, the copy functions can be more efficient
-// then a load and a store.
+// Portable handling of unaligned loads, stores, and copies. These are simply
+// constant-length memcpy calls.
 //
-// It is possible to implement all of these these using constant-length memcpy
-// calls, which is portable and will usually be inlined into simple loads and
-// stores if the architecture supports it. However, such inlining usually
-// happens in a pass that's quite late in compilation, which means the resulting
-// loads and stores cannot participate in many other optimizations, leading to
-// overall worse code.
 // TODO(user): These APIs are forked in Abseil, see
-// LLVM, we should reimplement these APIs with functions calling memcpy(), and
-// maybe publish them in Abseil.
-
+// "third_party/absl/base/internal/unaligned_access.h".
+//
 // The unaligned API is C++ only.  The declarations use C++ features
 // (namespaces, inline) which are absent or incompatible in C.
 #if defined(__cplusplus)
 
-#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || \
-    defined(MEMORY_SANITIZER)
-// Consider we have an unaligned load/store of 4 bytes from address 0x...05.
-// AddressSanitizer will treat it as a 3-byte access to the range 05:07 and
-// will miss a bug if 08 is the first unaddressable byte.
-// ThreadSanitizer will also treat this as a 3-byte access to 05:07 and will
-// miss a race between this access and some other accesses to 08.
-// MemorySanitizer will correctly propagate the shadow on unaligned stores
-// and correctly report bugs on unaligned loads, but it may not properly
-// update and report the origin of the uninitialized memory.
-// For all three tools, replacing an unaligned access with a tool-specific
-// callback solves the problem.
-
-// Make sure uint16_t/uint32_t/uint64_t are defined.
-#include <cstdint>
-
-extern "C" {
-uint16_t __sanitizer_unaligned_load16(const void *p);
-uint32_t __sanitizer_unaligned_load32(const void *p);
-uint64_t __sanitizer_unaligned_load64(const void *p);
-void __sanitizer_unaligned_store16(void *p, uint16_t v);
-void __sanitizer_unaligned_store32(void *p, uint32_t v);
-void __sanitizer_unaligned_store64(void *p, uint64_t v);
-}  // extern "C"
-
-inline uint16 UNALIGNED_LOAD16(const void *p) {
-  return __sanitizer_unaligned_load16(p);
-}
-
-inline uint32 UNALIGNED_LOAD32(const void *p) {
-  return __sanitizer_unaligned_load32(p);
-}
-
-inline uint64 UNALIGNED_LOAD64(const void *p) {
-  return __sanitizer_unaligned_load64(p);
-}
-
-inline void UNALIGNED_STORE16(void *p, uint16 v) {
-  __sanitizer_unaligned_store16(p, v);
-}
-
-inline void UNALIGNED_STORE32(void *p, uint32 v) {
-  __sanitizer_unaligned_store32(p, v);
-}
+namespace base {
 
-inline void UNALIGNED_STORE64(void *p, uint64 v) {
-  __sanitizer_unaligned_store64(p, v);
-}
+// Can't use ATTRIBUTE_NO_SANITIZE_MEMORY because this file is included before
+// attributes.h is.
+#ifdef __has_attribute
+#if __has_attribute(no_sanitize_memory)
+#define NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
+#endif  // __has_attribute(no_sanitize_memory)
+#endif  // defined __has_attribute
 
-#elif defined(UNDEFINED_BEHAVIOR_SANITIZER)
+#ifndef NO_SANITIZE_MEMORY
+#define NO_SANITIZE_MEMORY /**/
+#endif
 
-inline uint16 UNALIGNED_LOAD16(const void *p) {
-  uint16 t;
+template <typename T>
+T NO_SANITIZE_MEMORY UnalignedLoad(const void *p) {
+  T t;
   memcpy(&t, p, sizeof t);
   return t;
 }
 
-inline uint32 UNALIGNED_LOAD32(const void *p) {
-  uint32 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
+#undef NO_SANITIZE_MEMORY
 
-inline uint64 UNALIGNED_LOAD64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
+template <typename T>
+void UnalignedStore(void *p, T t) {
+  memcpy(p, &t, sizeof t);
 }
-
-inline void UNALIGNED_STORE16(void *p, uint16 v) { memcpy(p, &v, sizeof v); }
-
-inline void UNALIGNED_STORE32(void *p, uint32 v) { memcpy(p, &v, sizeof v); }
-
-inline void UNALIGNED_STORE64(void *p, uint64 v) { memcpy(p, &v, sizeof v); }
-
-#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
-    defined(_M_IX86) || defined(__ppc__) || defined(__PPC__) ||    \
-    defined(__ppc64__) || defined(__PPC64__)
-
-// x86 and x86-64 can perform unaligned loads/stores directly;
-// modern PowerPC hardware can also do unaligned integer loads and stores;
-// but note: the FPU still sends unaligned loads and stores to a trap handler!
-
-#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
-#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
-#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
-
-#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
-#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
-#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
-
-#elif defined(__arm__) && !defined(__ARM_ARCH_5__) &&          \
-    !defined(__ARM_ARCH_5T__) && !defined(__ARM_ARCH_5TE__) && \
-    !defined(__ARM_ARCH_5TEJ__) && !defined(__ARM_ARCH_6__) && \
-    !defined(__ARM_ARCH_6J__) && !defined(__ARM_ARCH_6K__) &&  \
-    !defined(__ARM_ARCH_6Z__) && !defined(__ARM_ARCH_6ZK__) && \
-    !defined(__ARM_ARCH_6T2__)
-
-// ARMv7 and newer support native unaligned accesses, but only of 16-bit
-// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
-// do an unaligned read and rotate the words around a bit, or do the reads very
-// slowly (trip through kernel mode). There's no simple #define that says just
-// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
-// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
-// so in time, maybe we can move on to that.
-//
-// This is a mess, but there's not much we can do about it.
-//
-// To further complicate matters, only LDR instructions (single reads) are
-// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
-// explicitly tell the compiler that these accesses can be unaligned, it can and
-// will combine accesses. On armcc, the way to signal this is done by accessing
-// through the type (uint32 __packed *), but GCC has no such attribute
-// (it ignores __attribute__((packed)) on individual variables). However,
-// we can tell it that a _struct_ is unaligned, which has the same effect,
-// so we do that.
-
-namespace base {
-namespace internal {
-
-struct Unaligned16Struct {
-  uint16 value;
-  uint8 dummy;  // To make the size non-power-of-two.
-} ABSL_ATTRIBUTE_PACKED;
-
-struct Unaligned32Struct {
-  uint32 value;
-  uint8 dummy;  // To make the size non-power-of-two.
-} ABSL_ATTRIBUTE_PACKED;
-
-}  // namespace internal
 }  // namespace base
 
-#define UNALIGNED_LOAD16(_p) \
-  ((reinterpret_cast<const ::base::internal::Unaligned16Struct *>(_p))->value)
-#define UNALIGNED_LOAD32(_p) \
-  ((reinterpret_cast<const ::base::internal::Unaligned32Struct *>(_p))->value)
-
-#define UNALIGNED_STORE16(_p, _val)                                        \
-  ((reinterpret_cast< ::base::internal::Unaligned16Struct *>(_p))->value = \
-       (_val))
-#define UNALIGNED_STORE32(_p, _val)                                        \
-  ((reinterpret_cast< ::base::internal::Unaligned32Struct *>(_p))->value = \
-       (_val))
-
-// TODO(user): NEON supports unaligned 64-bit loads and stores.
-// See if that would be more efficient on platforms supporting it,
-// at least for copies.
-
-inline uint64 UNALIGNED_LOAD64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UNALIGNED_STORE64(void *p, uint64 v) { memcpy(p, &v, sizeof v); }
-
-#else
-
-#define NEED_ALIGNED_LOADS
-
-// These functions are provided for architectures that don't support
-// unaligned loads and stores.
-
 inline uint16 UNALIGNED_LOAD16(const void *p) {
-  uint16 t;
-  memcpy(&t, p, sizeof t);
-  return t;
+  return base::UnalignedLoad<uint16>(p);
 }
 
 inline uint32 UNALIGNED_LOAD32(const void *p) {
-  uint32 t;
-  memcpy(&t, p, sizeof t);
-  return t;
+  return base::UnalignedLoad<uint32>(p);
 }
 
 inline uint64 UNALIGNED_LOAD64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
+  return base::UnalignedLoad<uint64>(p);
 }
 
-inline void UNALIGNED_STORE16(void *p, uint16 v) { memcpy(p, &v, sizeof v); }
-
-inline void UNALIGNED_STORE32(void *p, uint32 v) { memcpy(p, &v, sizeof v); }
-
-inline void UNALIGNED_STORE64(void *p, uint64 v) { memcpy(p, &v, sizeof v); }
-
-#endif
-
-// The UNALIGNED_LOADW and UNALIGNED_STOREW macros load and store values
-// of type uword_t.
-#ifdef _LP64
-#define UNALIGNED_LOADW(_p) UNALIGNED_LOAD64(_p)
-#define UNALIGNED_STOREW(_p, _val) UNALIGNED_STORE64(_p, _val)
-#else
-#define UNALIGNED_LOADW(_p) UNALIGNED_LOAD32(_p)
-#define UNALIGNED_STOREW(_p, _val) UNALIGNED_STORE32(_p, _val)
-#endif
-
-inline void UnalignedCopy16(const void *src, void *dst) {
-  UNALIGNED_STORE16(dst, UNALIGNED_LOAD16(src));
+inline void UNALIGNED_STORE16(void *p, uint16 v) {
+  base::UnalignedStore(p, v);
 }
 
-inline void UnalignedCopy32(const void *src, void *dst) {
-  UNALIGNED_STORE32(dst, UNALIGNED_LOAD32(src));
+inline void UNALIGNED_STORE32(void *p, uint32 v) {
+  base::UnalignedStore(p, v);
 }
 
-inline void UnalignedCopy64(const void *src, void *dst) {
-  if (sizeof(void *) == 8) {
-    UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
-  } else {
-    const char *src_char = reinterpret_cast<const char *>(src);
-    char *dst_char = reinterpret_cast<char *>(dst);
-
-    UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
-    UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
-  }
+inline void UNALIGNED_STORE64(void *p, uint64 v) {
+  base::UnalignedStore(p, v);
 }
 
 #endif  // defined(__cplusplus), end of unaligned API
 
-// aligned_malloc, aligned_free
-#if defined(__ANDROID__) || defined(__ASYLO__) || defined(_WIN32)
-#include <malloc.h>  // for memalign()
-#endif
-
-// __ASYLO__ platform uses newlib without an underlying OS, which provides
-// memalign, but not posix_memalign.
-#if defined(__cplusplus) &&                                             \
-    (((defined(__GNUC__) || defined(__APPLE__) ||                  \
-       defined(__NVCC__)) &&   \
-      !defined(SWIG)) ||                                                \
-     ((__GNUC__ >= 3 || defined(__clang__)) && defined(__ANDROID__)) || \
-     defined(__ASYLO__))
-inline void *aligned_malloc(size_t size, size_t minimum_alignment) {
-#if defined(__ANDROID__) || defined(OS_ANDROID) || defined(__ASYLO__) || defined(_WIN32)  || defined(__sun) || defined(sun)
-# if defined(_WIN32)
-  return _aligned_malloc(size, minimum_alignment);
-# else
-  return memalign(minimum_alignment, size);
-# endif
-#else  // !__ANDROID__ && !OS_ANDROID && !__ASYLO__
-  // posix_memalign requires that the requested alignment be at least
-  // sizeof(void*). In this case, fall back on malloc which should return memory
-  // aligned to at least the size of a pointer.
-  const size_t required_alignment = sizeof(void*);
-  if (minimum_alignment < required_alignment)
-    return malloc(size);
-  void *ptr = nullptr;
-  if (posix_memalign(&ptr, minimum_alignment, size) == 0)
-    return ptr;
-  return nullptr;
-#endif
-}
-
-inline void aligned_free(void *aligned_memory) {
-  free(aligned_memory);
-}
-
-#elif defined(_MSC_VER)  // MSVC
-
-inline void *aligned_malloc(size_t size, size_t minimum_alignment) {
-  return _aligned_malloc(size, minimum_alignment);
-}
-
-inline void aligned_free(void *aligned_memory) {
-  _aligned_free(aligned_memory);
-}
-
-#endif  // aligned_malloc, aligned_free
-
-// ALIGNED_CHAR_ARRAY
-//
-// Provides a char array with the exact same alignment as another type. The
-// first parameter must be a complete type, the second parameter is how many
-// of that type to provide space for.
-//
-//   ALIGNED_CHAR_ARRAY(struct stat, 16) storage_;
-//
-#if defined(__cplusplus)
-#undef ALIGNED_CHAR_ARRAY
-// Because MSVC and older GCCs require that the argument to their alignment
-// construct to be a literal constant integer, we use a template instantiated
-// at all the possible powers of two.
-#ifndef SWIG
-template<int alignment, int size> struct AlignType { };
-template<int size> struct AlignType<0, size> { typedef char result[size]; };
-#if defined(_MSC_VER)
-#define BASE_PORT_H_ALIGN_ATTRIBUTE(X) __declspec(align(X))
-#define BASE_PORT_H_ALIGN_OF(T) __alignof(T)
-#elif defined(__GNUC__) || defined(__INTEL_COMPILER)
-#define BASE_PORT_H_ALIGN_ATTRIBUTE(X) __attribute__((aligned(X)))
-#define BASE_PORT_H_ALIGN_OF(T) __alignof__(T)
-#endif
-
-#if defined(BASE_PORT_H_ALIGN_ATTRIBUTE)
-
-#define BASE_PORT_H_ALIGNTYPE_TEMPLATE(X) \
-  template<int size> struct AlignType<X, size> { \
-    typedef BASE_PORT_H_ALIGN_ATTRIBUTE(X) char result[size]; \
-  }
-
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(1);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(2);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(4);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(8);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(16);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(32);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(64);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(128);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(256);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(512);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(1024);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(2048);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(4096);
-BASE_PORT_H_ALIGNTYPE_TEMPLATE(8192);
-// Any larger and MSVC++ will complain.
-
-#define ALIGNED_CHAR_ARRAY(T, Size) \
-  typename AlignType<BASE_PORT_H_ALIGN_OF(T), sizeof(T) * Size>::result
-
-#undef BASE_PORT_H_ALIGNTYPE_TEMPLATE
-#undef BASE_PORT_H_ALIGN_ATTRIBUTE
-
-#else  // defined(BASE_PORT_H_ALIGN_ATTRIBUTE)
-#define ALIGNED_CHAR_ARRAY \
-  you_must_define_ALIGNED_CHAR_ARRAY_for_your_compiler_in_base_port_h
-#endif  // defined(BASE_PORT_H_ALIGN_ATTRIBUTE)
-
-#else  // !SWIG
-
-// SWIG can't represent alignment and doesn't care about alignment on data
-// members (it works fine without it).
-template<typename Size>
-struct AlignType { typedef char result[Size]; };
-#define ALIGNED_CHAR_ARRAY(T, Size) AlignType<Size * sizeof(T)>::result
-
-// Enough to parse with SWIG, will never be used by running code.
-#define BASE_PORT_H_ALIGN_OF(Type) 16
-
-#endif  // !SWIG
-#else  // __cplusplus
-#define ALIGNED_CHAR_ARRAY ALIGNED_CHAR_ARRAY_is_not_available_without_Cplusplus
-#endif  // __cplusplus
-
 #endif  // S2_BASE_PORT_H_
diff --git a/src/s2/base/stringprintf.cc b/src/s2/base/stringprintf.cc
deleted file mode 100644
index 501cdb7a..00000000
--- a/src/s2/base/stringprintf.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-// Copyright 2002 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-
-#include "s2/base/stringprintf.h"
-
-#include <cerrno>
-#include <cstdarg> // For va_list and related operations
-#include <cstdio> // MSVC requires this for _vsnprintf
-#include <vector>
-
-#include "s2/base/logging.h"
-
-#ifdef _MSC_VER
-enum { IS__MSC_VER = 1 };
-#else
-enum { IS__MSC_VER = 0 };
-#endif
-
-void StringAppendV(std::string* dst, const char* format, va_list ap) {
-  // First try with a small fixed size buffer
-  static const int kSpaceLength = 1024;
-  char space[kSpaceLength];
-
-  // It's possible for methods that use a va_list to invalidate
-  // the data in it upon use.  The fix is to make a copy
-  // of the structure before using it and use that copy instead.
-  va_list backup_ap;
-  va_copy(backup_ap, ap);
-  int result = vsnprintf(space, kSpaceLength, format, backup_ap);
-  va_end(backup_ap);
-
-  if (result < kSpaceLength) {
-    if (result >= 0) {
-      // Normal case -- everything fit.
-      dst->append(space, result);
-      return;
-    }
-
-    if (IS__MSC_VER) {
-      // Error or MSVC running out of space.  MSVC 8.0 and higher
-      // can be asked about space needed with the special idiom below:
-      va_copy(backup_ap, ap);
-      result = vsnprintf(nullptr, 0, format, backup_ap);
-      va_end(backup_ap);
-    }
-
-    if (result < 0) {
-      // Just an error.
-      return;
-    }
-  }
-
-  // Increase the buffer size to the size requested by vsnprintf,
-  // plus one for the closing \0.
-  int length = result+1;
-  char* buf = new char[length];
-
-  // Restore the va_list before we use it again
-  va_copy(backup_ap, ap);
-  result = vsnprintf(buf, length, format, backup_ap);
-  va_end(backup_ap);
-
-  if (result >= 0 && result < length) {
-    // It fit
-    dst->append(buf, result);
-  }
-  delete[] buf;
-}
-
-
-std::string StringPrintf(const char* format, ...) {
-  va_list ap;
-  va_start(ap, format);
-  std::string result;
-  StringAppendV(&result, format, ap);
-  va_end(ap);
-  return result;
-}
-
-const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
-  va_list ap;
-  va_start(ap, format);
-  dst->clear();
-  StringAppendV(dst, format, ap);
-  va_end(ap);
-  return *dst;
-}
-
-void StringAppendF(std::string* dst, const char* format, ...) {
-  va_list ap;
-  va_start(ap, format);
-  StringAppendV(dst, format, ap);
-  va_end(ap);
-}
diff --git a/src/s2/base/stringprintf.h b/src/s2/base/stringprintf.h
deleted file mode 100644
index 9a97e2d6..00000000
--- a/src/s2/base/stringprintf.h
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-// NOTE: See third_party/absl/strings for more options.
-//
-// As of 2017q4, most use of these routines is considered legacy: use
-// of absl::StrCat, absl::Substitute, or absl::StrFormat is preferred for
-// performance and safety reasons.
-
-#ifndef S2_BASE_STRINGPRINTF_H_
-#define S2_BASE_STRINGPRINTF_H_
-
-#include <cstdarg>
-#include <string>
-#include <vector>
-
-#include "s2/base/port.h"
-
-// Return a C++ string
-extern std::string StringPrintf(const char* format, ...)
-    // Tell the compiler to do printf format string checking.
-    ABSL_PRINTF_ATTRIBUTE(1, 2);
-
-// Store result into a supplied string and return it
-extern const std::string& SStringPrintf(std::string* dst, const char* format, ...)
-    // Tell the compiler to do printf format string checking.
-    ABSL_PRINTF_ATTRIBUTE(2, 3);
-
-// Append result to a supplied string
-extern void StringAppendF(std::string* dst, const char* format, ...)
-    // Tell the compiler to do printf format string checking.
-    ABSL_PRINTF_ATTRIBUTE(2, 3);
-
-// Lower-level routine that takes a va_list and appends to a specified
-// string.  All other routines are just convenience wrappers around it.
-//
-// Implementation note: the va_list is never modified, this implementation
-// always operates on copies.
-extern void StringAppendV(std::string* dst, const char* format, va_list ap);
-
-#endif  // S2_BASE_STRINGPRINTF_H_
diff --git a/src/s2/base/strtoint.cc b/src/s2/base/strtoint.cc
deleted file mode 100644
index b626f2df..00000000
--- a/src/s2/base/strtoint.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright 2008 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-//
-// Architecture-neutral plug compatible replacements for strtol() friends.
-// See strtoint.h for details on how to use this component.
-//
-
-#include <cerrno>
-#include <climits>
-#include <limits>
-
-#include "s2/base/integral_types.h"
-#include "s2/base/port.h"
-#include "s2/base/strtoint.h"
-
-// Replacement strto[u]l functions that have identical overflow and underflow
-// characteristics for both ILP-32 and LP-64 platforms, including errno
-// preservation for error-free calls.
-int32 strto32_adapter(const char *nptr, char **endptr, int base) {
-  const int saved_errno = errno;
-  errno = 0;
-  const long result = strtol(nptr, endptr, base);
-  if (errno == ERANGE && result == LONG_MIN) {
-    return std::numeric_limits<int32>::min();
-  } else if (errno == ERANGE && result == LONG_MAX) {
-    return std::numeric_limits<int32>::max();
-  } else if (errno == 0 && result < std::numeric_limits<int32>::min()) {
-    errno = ERANGE;
-    return std::numeric_limits<int32>::min();
-  } else if (errno == 0 && result > std::numeric_limits<int32>::max()) {
-    errno = ERANGE;
-    return std::numeric_limits<int32>::max();
-  }
-  if (errno == 0)
-    errno = saved_errno;
-  return static_cast<int32>(result);
-}
-
-uint32 strtou32_adapter(const char *nptr, char **endptr, int base) {
-  const int saved_errno = errno;
-  errno = 0;
-  const unsigned long result = strtoul(nptr, endptr, base);
-  if (errno == ERANGE && result == ULONG_MAX) {
-    return std::numeric_limits<uint32>::max();
-  } else if (errno == 0 && result > std::numeric_limits<uint32>::max()) {
-    errno = ERANGE;
-    return std::numeric_limits<uint32>::max();
-  }
-  if (errno == 0)
-    errno = saved_errno;
-  return static_cast<uint32>(result);
-}
diff --git a/src/s2/base/strtoint.h b/src/s2/base/strtoint.h
deleted file mode 100644
index 479624fd..00000000
--- a/src/s2/base/strtoint.h
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright 2008 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-//
-// Architecture-neutral plug compatible replacements for strtol() friends.
-//
-// Long's have different lengths on ILP-32 and LP-64 platforms, and so overflow
-// behavior across the two varies when strtol() and similar are used to parse
-// 32-bit integers.  Similar problems exist with atoi(), because although it
-// has an all-integer interface, it uses strtol() internally, and so suffers
-// from the same narrowing problems on assignments to int.
-//
-// Examples:
-//   errno = 0;
-//   i = strtol("3147483647", nullptr, 10);
-//   printf("%d, errno %d\n", i, errno);
-//   //   32-bit platform: 2147483647, errno 34
-//   //   64-bit platform: -1147483649, errno 0
-//
-//   printf("%d\n", atoi("3147483647"));
-//   //   32-bit platform: 2147483647
-//   //   64-bit platform: -1147483649
-//
-// A way round this is to define local replacements for these, and use them
-// instead of the standard libc functions.
-//
-// In most 32-bit cases the replacements can be inlined away to a call to the
-// libc function.  In a couple of 64-bit cases, however, adapters are required,
-// to provide the right overflow and errno behavior.
-//
-
-#ifndef S2_BASE_STRTOINT_H_
-#define S2_BASE_STRTOINT_H_
-
-#include <cstdlib> // For strtol* functions.
-#include <string>
-#include "s2/base/integral_types.h"
-#include "s2/base/port.h"
-#include "absl/base/macros.h"
-
-// Adapter functions for handling overflow and errno.
-int32 strto32_adapter(const char *nptr, char **endptr, int base);
-uint32 strtou32_adapter(const char *nptr, char **endptr, int base);
-
-// Conversions to a 32-bit integer can pass the call to strto[u]l on 32-bit
-// platforms, but need a little extra work on 64-bit platforms.
-inline int32 strto32(const char *nptr, char **endptr, int base) {
-  if (sizeof(int32) == sizeof(long))
-    return static_cast<int32>(strtol(nptr, endptr, base));
-  else
-    return strto32_adapter(nptr, endptr, base);
-}
-
-inline uint32 strtou32(const char *nptr, char **endptr, int base) {
-  if (sizeof(uint32) == sizeof(unsigned long))
-    return static_cast<uint32>(strtoul(nptr, endptr, base));
-  else
-    return strtou32_adapter(nptr, endptr, base);
-}
-
-// For now, long long is 64-bit on all the platforms we care about, so these
-// functions can simply pass the call to strto[u]ll.
-inline int64 strto64(const char *nptr, char **endptr, int base) {
-  static_assert(sizeof(int64) == sizeof(long long),
-                "sizeof int64 is not sizeof long long");
-  return strtoll(nptr, endptr, base);
-}
-
-inline uint64 strtou64(const char *nptr, char **endptr, int base) {
-  static_assert(sizeof(uint64) == sizeof(unsigned long long),
-                "sizeof uint64 is not sizeof long long");
-  return strtoull(nptr, endptr, base);
-}
-
-// Although it returns an int, atoi() is implemented in terms of strtol, and
-// so has differing overflow and underflow behavior.  atol is the same.
-inline int32 atoi32(const char *nptr) {
-  return strto32(nptr, nullptr, 10);
-}
-
-inline int64 atoi64(const char *nptr) {
-  return strto64(nptr, nullptr, 10);
-}
-
-// Convenience versions of the above that take a string argument.
-inline int32 atoi32(const std::string &s) {
-  return atoi32(s.c_str());
-}
-
-inline int64 atoi64(const std::string &s) {
-  return atoi64(s.c_str());
-}
-
-#endif  // S2_BASE_STRTOINT_H_
diff --git a/src/s2/encoded_s2cell_id_vector.cc b/src/s2/encoded_s2cell_id_vector.cc
index b3ebf205..b93edcee 100644
--- a/src/s2/encoded_s2cell_id_vector.cc
+++ b/src/s2/encoded_s2cell_id_vector.cc
@@ -17,6 +17,17 @@
 
 #include "s2/encoded_s2cell_id_vector.h"
 
+#include <algorithm>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "absl/numeric/bits.h"
+#include "absl/types/span.h"
+#include "s2/util/bits/bits.h"
+#include "s2/util/coding/coder.h"
+#include "s2/encoded_uint_vector.h"
+#include "s2/s2cell_id.h"
+
 using absl::Span;
 using std::max;
 using std::min;
@@ -62,7 +73,7 @@ void EncodeS2CellIdVector(Span<const S2CellId> v, Encoder* encoder) {
     v_max = max(v_max, cellid.id());
   }
   // These variables represent the values that will used during encoding.
-  uint64 e_base = 0;        // Base value.
+  uint64 e_base = 0;      // Base value.
   int e_base_len = 0;       // Number of bytes to represent "base".
   int e_shift = 0;          // Delta shift.
   int e_max_delta_msb = 0;  // Bit position of the MSB of the largest delta.
@@ -80,12 +91,14 @@ void EncodeS2CellIdVector(Span<const S2CellId> v, Encoder* encoder) {
     uint64 e_bytes = ~0ULL;  // Best encoding size so far.
     for (int len = 0; len < 8; ++len) {
       // "t_base" is the base value being tested (first "len" bytes of v_min).
-      // "t_max_delta_msb" is the most-significant bit position of the largest
-      // delta (or zero if there are no deltas, i.e. if v.size() == 0).
-      // "t_bytes" is the total size of the variable portion of the encoding.
+      // "t_max_delta_msb" is the most-significant bit position (i.e. bit-width
+      // minus one) of the largest delta (or zero if there are no deltas, i.e.
+      // if v.size() == 0).  "t_bytes" is the total size of the variable
+      // portion of the encoding.
       uint64 t_base = v_min & ~(~0ULL >> (8 * len));
-      int t_max_delta_msb =
-          max(0, Bits::Log2Floor64((v_max - t_base) >> e_shift));
+      int t_max_delta_msb = max(
+          0,
+          static_cast<int>(absl::bit_width((v_max - t_base) >> e_shift)) - 1);
       uint64 t_bytes = len + v.size() * ((t_max_delta_msb >> 3) + 1);
       if (t_bytes < e_bytes) {
         e_base = t_base;
@@ -137,7 +150,9 @@ bool EncodedS2CellIdVector::Init(Decoder* decoder) {
   int shift_code = code_plus_len >> 3;
   if (shift_code == 31) {
     shift_code = 29 + decoder->get8();
+    if (shift_code > 56) return false;  // Valid range 0..56
   }
+
   // Decode the "base_len" most-significant bytes of "base".
   int base_len = code_plus_len & 7;
   if (!DecodeUintWithLength(base_len, decoder, &base_)) return false;
@@ -155,7 +170,7 @@ bool EncodedS2CellIdVector::Init(Decoder* decoder) {
 
 vector<S2CellId> EncodedS2CellIdVector::Decode() const {
   vector<S2CellId> result(size());
-  for (int i = 0; i < size(); ++i) {
+  for (size_t i = 0; i < size(); ++i) {
     result[i] = (*this)[i];
   }
   return result;
diff --git a/src/s2/encoded_s2cell_id_vector.h b/src/s2/encoded_s2cell_id_vector.h
index 3b2e7f44..9a4fc374 100644
--- a/src/s2/encoded_s2cell_id_vector.h
+++ b/src/s2/encoded_s2cell_id_vector.h
@@ -18,7 +18,13 @@
 #ifndef S2_ENCODED_S2CELL_ID_VECTOR_H_
 #define S2_ENCODED_S2CELL_ID_VECTOR_H_
 
+#include <cstddef>
+
+#include <vector>
+
+#include "s2/base/integral_types.h"
 #include "absl/types/span.h"
+#include "s2/util/coding/coder.h"
 #include "s2/encoded_uint_vector.h"
 #include "s2/s2cell_id.h"
 
@@ -51,7 +57,7 @@ void EncodeS2CellIdVector(absl::Span<const S2CellId> v, Encoder* encoder);
 class EncodedS2CellIdVector {
  public:
   // Constructs an uninitialized object; requires Init() to be called.
-  EncodedS2CellIdVector() {}
+  EncodedS2CellIdVector() = default;
 
   // Initializes the EncodedS2CellIdVector.
   //
@@ -102,7 +108,7 @@ inline size_t EncodedS2CellIdVector::lower_bound(S2CellId target) const {
   if (target.id() <= base_) return 0;
   if (target >= S2CellId::End(S2CellId::kMaxLevel)) return size();
   return deltas_.lower_bound(
-      (target.id() - base_ + (1ULL << shift_) - 1) >> shift_);
+      (target.id() - base_ + (uint64{1} << shift_) - 1) >> shift_);
 }
 
 }  // namespace s2coding
diff --git a/src/s2/encoded_s2point_vector.cc b/src/s2/encoded_s2point_vector.cc
index b7fdb237..bd33ec20 100644
--- a/src/s2/encoded_s2point_vector.cc
+++ b/src/s2/encoded_s2point_vector.cc
@@ -17,10 +17,26 @@
 
 #include "s2/encoded_s2point_vector.h"
 
+#include <cstddef>
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "s2/base/port.h"
 #include "absl/base/internal/unaligned_access.h"
+#include "absl/numeric/bits.h"
+#include "absl/types/span.h"
 #include "s2/util/bits/bits.h"
+#include "s2/util/coding/coder.h"
+#include "s2/util/coding/varint.h"
+#include "s2/encoded_string_vector.h"
+#include "s2/encoded_uint_vector.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2coder.h"
 #include "s2/s2coords.h"
+#include "s2/s2point.h"
 
 using absl::MakeSpan;
 using absl::Span;
@@ -33,7 +49,8 @@ namespace s2coding {
 // Like util_bits::InterleaveUint32, but interleaves bit pairs rather than
 // individual bits.  This format is faster to decode than the fully interleaved
 // format, and produces the same results for our use case.
-inline uint64 InterleaveUint32BitPairs(const uint32 val0, const uint32 val1) {
+inline uint64 InterleaveUint32BitPairs(const uint32 val0,
+                                         const uint32 val1) {
   uint64 v0 = val0, v1 = val1;
   v0 = (v0 | (v0 << 16)) & 0x0000ffff0000ffff;
   v1 = (v1 | (v1 << 16)) & 0x0000ffff0000ffff;
@@ -50,8 +67,8 @@ inline uint64 InterleaveUint32BitPairs(const uint32 val0, const uint32 val1) {
 // uses a lookup table.  The speed advantage is expected to be even larger in
 // code that mixes bit interleaving with other significant operations since it
 // doesn't require keeping a 256-byte lookup table in the L1 data cache.
-inline void DeinterleaveUint32BitPairs(uint64 code,
-                                       uint32 *val0, uint32 *val1) {
+inline void DeinterleaveUint32BitPairs(uint64 code, uint32* val0,
+                                       uint32* val1) {
   uint64 v0 = code, v1 = code >> 2;
   v0 &= 0x3333333333333333;
   v0 |= v0 >> 2;
@@ -96,7 +113,7 @@ void EncodeS2PointVector(Span<const S2Point> points, CodingHint hint,
       return EncodeS2PointVectorCompact(points, encoder);
 
     default:
-      S2_LOG(DFATAL) << "Unknown CodingHint: " << static_cast<int>(hint);
+      S2_LOG(ERROR) << "Unknown CodingHint: " << static_cast<int>(hint);
   }
 }
 
@@ -105,7 +122,7 @@ bool EncodedS2PointVector::Init(Decoder* decoder) {
 
   // Peek at the format but don't advance the decoder; the format-specific
   // Init functions will do that.
-  format_ = static_cast<Format>(*decoder->ptr() & kEncodingFormatMask);
+  format_ = static_cast<Format>(*decoder->skip(0) & kEncodingFormatMask);
   switch (format_) {
     case UNCOMPRESSED:
       return InitUncompressedFormat(decoder);
@@ -121,12 +138,29 @@ bool EncodedS2PointVector::Init(Decoder* decoder) {
 vector<S2Point> EncodedS2PointVector::Decode() const {
   vector<S2Point> points;
   points.reserve(size_);
-  for (int i = 0; i < size_; ++i) {
+  for (size_t i = 0; i < size_; ++i) {
     points.push_back((*this)[i]);
   }
   return points;
 }
 
+// The encoding must be identical to EncodeS2PointVector().
+void EncodedS2PointVector::Encode(Encoder* encoder) const {
+  switch (format_) {
+    case UNCOMPRESSED:
+      EncodeS2PointVectorFast(MakeSpan(uncompressed_.points, size_), encoder);
+      break;
+
+    case CELL_IDS: {
+      // This is a full decode/encode dance, and not at all efficient.
+      EncodeS2PointVectorCompact(Decode(), encoder);
+      break;
+    }
+
+    default:
+      S2_LOG(FATAL) << "Unknown Format: " << static_cast<int>(format_);
+  }
+}
 
 //////////////////////////////////////////////////////////////////////////////
 //                     UNCOMPRESSED Encoding Format
@@ -147,7 +181,7 @@ void EncodeS2PointVectorFast(Span<const S2Point> points, Encoder* encoder) {
   // This is followed by an array of S2Points in little-endian order.
   encoder->Ensure(Varint::kMax64 + points.size() * sizeof(S2Point));
   uint64 size_format = (points.size() << kEncodingFormatBits |
-                        EncodedS2PointVector::UNCOMPRESSED);
+                          EncodedS2PointVector::UNCOMPRESSED);
   encoder->put_varint64(size_format);
   encoder->putn(points.data(), points.size() * sizeof(S2Point));
 }
@@ -155,15 +189,15 @@ void EncodeS2PointVectorFast(Span<const S2Point> points, Encoder* encoder) {
 bool EncodedS2PointVector::InitUncompressedFormat(Decoder* decoder) {
 #if !defined(IS_LITTLE_ENDIAN) || defined(__arm__) || \
   defined(ABSL_INTERNAL_NEED_ALIGNED_LOADS)
-  // TODO(ericv): Make this work on platforms that don't support unaligned
-  // 64-bit little-endian reads, e.g. by falling back to
+  // TODO(b/231674214): Make this work on platforms that don't support
+  // unaligned 64-bit little-endian reads, e.g. by falling back to
   //
   //   bit_cast<double>(little_endian::Load64()).
   //
   // Maybe the compiler is smart enough that we can do this all the time,
   // but more likely we will need two cases using the #ifdef above.
   // (Note that even ARMv7 does not support unaligned 64-bit loads.)
-  S2_LOG(DFATAL) << "Needs architecture with 64-bit little-endian unaligned loads";
+  S2_LOG(ERROR) << "Needs architecture with 64-bit little-endian unaligned loads";
   return false;
 #endif
 
@@ -179,7 +213,7 @@ bool EncodedS2PointVector::InitUncompressedFormat(Decoder* decoder) {
   size_t bytes = size_t{size_} * sizeof(S2Point);
   if (decoder->avail() < bytes) return false;
 
-  uncompressed_.points = reinterpret_cast<const S2Point*>(decoder->ptr());
+  uncompressed_.points = reinterpret_cast<const S2Point*>(decoder->skip(0));
   decoder->skip(bytes);
   return true;
 }
@@ -194,7 +228,7 @@ bool EncodedS2PointVector::InitUncompressedFormat(Decoder* decoder) {
 struct CellPoint {
   // Constructor necessary in order to narrow "int" arguments to "int8".
   CellPoint(int level, int face, uint32 si, uint32 ti)
-    : level(level), face(face), si(si), ti(ti) {}
+      : level(level), face(face), si(si), ti(ti) {}
 
   int8 level, face;
   uint32 si, ti;
@@ -205,12 +239,12 @@ struct CellPoint {
 // Block sizes of 4, 8, 16, and 32 were tested and kBlockSize == 16 seems to
 // offer the best compression.  (Note that kBlockSize == 32 requires some code
 // modifications which have since been removed.)
-constexpr int kBlockShift = 4;
-constexpr size_t kBlockSize = 1 << kBlockShift;
+static constexpr int kBlockShift = 4;
+static constexpr size_t kBlockSize = 1 << kBlockShift;
 
 // Used to indicate that a point must be encoded as an exception (a 24-byte
 // S2Point) rather than as an S2CellId.
-constexpr uint64 kException = ~0ULL;
+static constexpr uint64 kException = ~0ULL;
 
 // Represents the encoding parameters to be used for a given block (consisting
 // of kBlockSize encodable 64-bit values).  See below.
@@ -221,9 +255,7 @@ struct BlockCode {
 };
 
 // Returns a bit mask with "n" low-order 1 bits, for 0 <= n <= 64.
-inline uint64 BitMask(int n) {
-  return (n == 0) ? 0 : (~0ULL >> (64 - n));
-}
+inline uint64 BitMask(int n) { return (n == 0) ? 0 : (~0ULL >> (64 - n)); }
 
 // Returns the maximum number of bits per value at the given S2CellId level.
 inline int MaxBitsForLevel(int level) {
@@ -240,9 +272,9 @@ inline int BaseShift(int level, int base_bits) {
 // Forward declarations.
 int ChooseBestLevel(Span<const S2Point> points, vector<CellPoint>* cell_points);
 vector<uint64> ConvertCellsToValues(const vector<CellPoint>& cell_points,
-                                    int level, bool* have_exceptions);
-uint64 ChooseBase(const vector<uint64>& values, int level, bool have_exceptions,
-                  int* base_bits);
+                                      int level, bool* have_exceptions);
+uint64 ChooseBase(const vector<uint64>& values, int level,
+                    bool have_exceptions, int* base_bits);
 BlockCode GetBlockCode(Span<const uint64> values, uint64 base,
                        bool have_exceptions);
 
@@ -292,9 +324,9 @@ void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder) {
   // except that it is faster to decode and the spatial locality is not quite
   // as good.
   //
-  // The 64-bit values are divided into blocks of size 8, and then each value is
-  // encoded as the sum of a base value, a per-block offset, and a per-value
-  // delta within that block:
+  // The 64-bit values are divided into blocks of size kBlockSize, and then
+  // each value is encoded as the sum of a base value, a per-block offset, and
+  // a per-value delta within that block:
   //
   //   v[i,j] = base + offset[i] + delta[i, j]
   //
@@ -380,10 +412,11 @@ void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder) {
   //
   // If there are any points that could not be represented as S2CellIds, then
   // "have_exceptions" in the header is true.  In that case the delta values
-  // within each block are encoded as (delta + 8), and values 0-7 are used to
-  // represent exceptions.  If a block has exceptions, they are encoded
-  // immediately following the array of deltas, and are referenced by encoding
-  // the corresponding exception index (0-7) as the delta.
+  // within each block are encoded as (delta + kBlockSize), and values
+  // 0...kBlockSize-1 are used to represent exceptions.  If a block has
+  // exceptions, they are encoded immediately following the array of deltas,
+  // and are referenced by encoding the corresponding exception index
+  // 0...kBlockSize-1 as the delta.
   //
   // TODO(ericv): A vector containing a single leaf cell is currently encoded as
   // 13 bytes (2 byte header, 7 byte base, 1 byte block count, 1 byte block
@@ -394,7 +427,7 @@ void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder) {
   // (3 bits), followed by the S2CellId bytes.  The extra 2 header bits could be
   // used to store single points using other encodings, e.g. E7.
   //
-  // If we wind up using 8-value blocks, we could also use the extra bit in the
+  // If we had used 8-value blocks, we could have used the extra bit in the
   // first byte of the header to indicate that there is only one value, and
   // then skip the 2nd byte of header and the EncodedStringVector.  But this
   // would be messy because it also requires special cases while decoding.
@@ -414,8 +447,8 @@ void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder) {
   //
   // TODO(ericv): Benchmark using shifted S2CellIds instead.
   bool have_exceptions;
-  vector<uint64> values = ConvertCellsToValues(cell_points, level,
-                                               &have_exceptions);
+  vector<uint64> values =
+      ConvertCellsToValues(cell_points, level, &have_exceptions);
 
   // 3. Choose the global encoding parameter "base" (consisting of the bit
   // prefix shared by all values to be encoded).
@@ -443,10 +476,7 @@ void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder) {
   // Now we encode the contents of each block.
   StringVectorEncoder blocks;
   vector<S2Point> exceptions;
-  uint64 offset_bytes_sum = 0;
-  uint64 delta_nibbles_sum = 0;
-  uint64 exceptions_sum = 0;
-  for (int i = 0; i < values.size(); i += kBlockSize) {
+  for (size_t i = 0; i < values.size(); i += kBlockSize) {
     int block_size = min(kBlockSize, values.size() - i);
     BlockCode code = GetBlockCode(MakeSpan(&values[i], block_size),
                                   base, have_exceptions);
@@ -515,9 +545,6 @@ void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder) {
       block->Ensure(exceptions_bytes);
       block->putn(exceptions.data(), exceptions_bytes);
     }
-    offset_bytes_sum += offset_bytes;
-    delta_nibbles_sum += delta_nibbles;
-    exceptions_sum += num_exceptions;
   }
   blocks.Encode(encoder);
 }
@@ -569,7 +596,7 @@ int ChooseBestLevel(Span<const S2Point> points,
 // indicated by the value "kException".  "have_exceptions" is set to indicate
 // whether any exceptions were present.
 vector<uint64> ConvertCellsToValues(const vector<CellPoint>& cell_points,
-                                    int level, bool* have_exceptions) {
+                                      int level, bool* have_exceptions) {
   vector<uint64> values;
   values.reserve(cell_points.size());
   *have_exceptions = false;
@@ -595,8 +622,8 @@ vector<uint64> ConvertCellsToValues(const vector<CellPoint>& cell_points,
   return values;
 }
 
-uint64 ChooseBase(const vector<uint64>& values, int level, bool have_exceptions,
-                  int* base_bits) {
+uint64 ChooseBase(const vector<uint64>& values, int level,
+                    bool have_exceptions, int* base_bits) {
   // Find the minimum and maximum non-exception values to be represented.
   uint64 v_min = kException, v_max = 0;
   for (auto v : values) {
@@ -619,8 +646,8 @@ uint64 ChooseBase(const vector<uint64>& values, int level, bool have_exceptions,
   // 2. The format only allows us to represent up to 7 bytes (56 bits) of
   // "base", so we need to ensure that "base" conforms to this requirement.
   int min_delta_bits = (have_exceptions || values.size() == 1) ? 8 : 4;
-  int excluded_bits = max(Bits::Log2Floor64(v_min ^ v_max) + 1,
-                          max(min_delta_bits, BaseShift(level, 56)));
+  int excluded_bits = max<int>(absl::bit_width(v_min ^ v_max),
+                               max(min_delta_bits, BaseShift(level, 56)));
   uint64 base = v_min & ~BitMask(excluded_bits);
 
   // Determine how many bytes are needed to represent this prefix.
@@ -645,8 +672,8 @@ uint64 ChooseBase(const vector<uint64>& values, int level, bool have_exceptions,
 
 // Returns true if the range of values [d_min, d_max] can be encoded using the
 // specified parameters (delta_bits, overlap_bits, and have_exceptions).
-bool CanEncode(uint64 d_min, uint64 d_max, int delta_bits,
-               int overlap_bits, bool have_exceptions) {
+bool CanEncode(uint64 d_min, uint64 d_max, int delta_bits, int overlap_bits,
+               bool have_exceptions) {
   // "offset" can't represent the lowest (delta_bits - overlap_bits) of d_min.
   d_min &= ~BitMask(delta_bits - overlap_bits);
 
@@ -721,7 +748,8 @@ BlockCode GetBlockCode(Span<const uint64> values, uint64 base,
   //
   // It is possible to show that this last example is the worst case, i.e.  we
   // do not need to consider increasing delta_bits or overlap_bits further.
-  int delta_bits = (max(1, Bits::Log2Floor64(b_max - b_min)) + 3) & ~3;
+  int delta_bits =
+      (max(1, static_cast<int>(absl::bit_width(b_max - b_min)) - 1) + 3) & ~3;
   int overlap_bits = 0;
   if (!CanEncode(b_min, b_max, delta_bits, 0, have_exceptions)) {
     if (CanEncode(b_min, b_max, delta_bits, 4, have_exceptions)) {
@@ -736,9 +764,12 @@ BlockCode GetBlockCode(Span<const uint64> values, uint64 base,
     }
   }
 
-  // Avoid wasting 4 bits of delta when the block size is 1.  This reduces the
-  // encoding size for single leaf cells by one byte.
-  if (values.size() == 1) {
+  // When the block size is 1 and no exceptions exist, we have delta_bits == 4
+  // and overlap_bits == 0 which wastes 4 bits.  We fix this below, which
+  // among other things reduces the encoding size for single leaf cells by one
+  // byte.  (Note that when exceptions exist, delta_bits == 8 and overlap_bits
+  // may be 0 or 4.  These cases are covered by the unit tests.)
+  if (values.size() == 1 && !have_exceptions) {
     S2_DCHECK(delta_bits == 4 && overlap_bits == 0);
     delta_bits = 8;
   }
@@ -798,7 +829,8 @@ S2Point EncodedS2PointVector::DecodeCellIdsFormat(int i) const {
 
   // Decode the offset for this block.
   int offset_shift = (delta_nibbles - overlap_nibbles) << 2;
-  uint64 offset = GetUintWithLength<uint64>(ptr, offset_bytes) << offset_shift;
+  uint64 offset = GetUintWithLength<uint64>(ptr, offset_bytes)
+                    << offset_shift;
   ptr += offset_bytes;
 
   // Decode the delta for the requested value.
diff --git a/src/s2/encoded_s2point_vector.h b/src/s2/encoded_s2point_vector.h
index 44ce5eb0..470322d3 100644
--- a/src/s2/encoded_s2point_vector.h
+++ b/src/s2/encoded_s2point_vector.h
@@ -18,19 +18,22 @@
 #ifndef S2_ENCODED_S2POINT_VECTOR_H_
 #define S2_ENCODED_S2POINT_VECTOR_H_
 
+#include <cstddef>
+
 #include <atomic>
+#include <vector>
+
+#include "s2/base/integral_types.h"
 #include "absl/types/span.h"
+#include "s2/util/coding/coder.h"
 #include "s2/encoded_string_vector.h"
 #include "s2/encoded_uint_vector.h"
+#include "s2/s2coder.h"
 #include "s2/s2point.h"
+#include "s2/s2shape.h"
 
 namespace s2coding {
 
-// Controls whether to optimize for speed or size when encoding points.  (Note
-// that encoding is always lossless, and that currently compact encodings are
-// only possible when points have been snapped to S2CellId centers.)
-enum class CodingHint : uint8 { FAST, COMPACT };
-
 // Encodes a vector of S2Points in a format that can later be decoded as an
 // EncodedS2PointVector.
 //
@@ -68,6 +71,10 @@ class EncodedS2PointVector {
   // Decodes and returns the entire original vector.
   std::vector<S2Point> Decode() const;
 
+  // Copy the encoded data to the encoder. This allows for "reserialization" of
+  // encoded shapes created through lazy decoding.
+  void Encode(Encoder* encoder) const;
+
   // TODO(ericv): Consider adding a method that returns an adjacent pair of
   // points.  This would save some decoding overhead.
 
@@ -90,26 +97,6 @@ class EncodedS2PointVector {
   // TODO(ericv): Once additional formats have been implemented, consider
   // using std::variant<> instead.  It's unclear whether this would have
   // better or worse performance than the current approach.
-
-  // dd: These structs are anonymous in the upstream S2 code; however,
-  // this generates CMD-check failure due to the [-Wnested-anon-types]
-  // (anonymous types declared in an anonymous union are an extension)
-  // The approach here just names the types.
-  struct CellIDStruct {
-    EncodedStringVector blocks;
-    uint64 base;
-    uint8 level;
-    bool have_exceptions;
-
-    // TODO(ericv): Use std::atomic_flag to cache the last point decoded in
-    // a thread-safe way.  This reduces benchmark times for actual polygon
-    // operations (e.g. S2ClosestEdgeQuery) by about 15%.
-  };
-
-  struct UncompressedStruct {
-    const S2Point* points;
-  };
-
   enum Format : uint8 {
     UNCOMPRESSED = 0,
     CELL_IDS = 1,
@@ -117,8 +104,19 @@ class EncodedS2PointVector {
   Format format_;
   uint32 size_;
   union {
-    struct UncompressedStruct uncompressed_;
-    struct CellIDStruct cell_ids_;
+    struct {
+      const S2Point* points;
+    } uncompressed_;
+    struct {
+      EncodedStringVector blocks;
+      uint64 base;
+      uint8 level;
+      bool have_exceptions;
+
+      // TODO(ericv): Use std::atomic_flag to cache the last point decoded in
+      // a thread-safe way.  This reduces benchmark times for actual polygon
+      // operations (e.g. S2ClosestEdgeQuery) by about 15%.
+    } cell_ids_;
   };
 };
 
@@ -139,7 +137,7 @@ inline S2Point EncodedS2PointVector::operator[](int i) const {
       return DecodeCellIdsFormat(i);
 
     default:
-      S2_LOG(DFATAL) << "Unrecognized format";
+      S2_DLOG(FATAL) << "Unrecognized format";
       return S2Point();
   }
 }
diff --git a/src/s2/encoded_s2shape_index.cc b/src/s2/encoded_s2shape_index.cc
index e68f2e48..5b6adad9 100644
--- a/src/s2/encoded_s2shape_index.cc
+++ b/src/s2/encoded_s2shape_index.cc
@@ -17,23 +17,28 @@
 
 #include "s2/encoded_s2shape_index.h"
 
+#include <cstddef>
+
+#include <atomic>
 #include <memory>
-#include "absl/memory/memory.h"
+#include <vector>
+
+#include "s2/base/casts.h"
+#include "s2/base/integral_types.h"
+#include "s2/util/bits/bits.h"
+#include "s2/util/coding/coder.h"
+#include "s2/encoded_s2cell_id_vector.h"
+#include "s2/encoded_string_vector.h"
 #include "s2/mutable_s2shape_index.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
 
-using absl::make_unique;
+using std::make_unique;
 using std::unique_ptr;
 using std::vector;
 
-bool EncodedS2ShapeIndex::Iterator::Locate(const S2Point& target) {
-  return LocateImpl(target, this);
-}
-
-EncodedS2ShapeIndex::CellRelation EncodedS2ShapeIndex::Iterator::Locate(
-    S2CellId target) {
-  return LocateImpl(target, this);
-}
-
 unique_ptr<EncodedS2ShapeIndex::IteratorBase>
 EncodedS2ShapeIndex::Iterator::Clone() const {
   return make_unique<Iterator>(*this);
@@ -49,33 +54,51 @@ S2Shape* EncodedS2ShapeIndex::GetShape(int id) const {
   if (shape) shape->id_ = id;
   S2Shape* expected = kUndecodedShape();
   if (shapes_[id].compare_exchange_strong(expected, shape.get(),
-                                          std::memory_order_relaxed)) {
+                                          std::memory_order_acq_rel)) {
     return shape.release();  // Ownership has been transferred to shapes_.
   }
-  return shapes_[id].load(std::memory_order_relaxed);
+  return expected;  // Another thread updated shapes_[id] first.
 }
 
 inline const S2ShapeIndexCell* EncodedS2ShapeIndex::GetCell(int i) const {
-  if (cell_decoded(i)) {
-    auto cell = cells_[i].load(std::memory_order_acquire);
-    if (cell != nullptr) return cell;
-  }
-  // We decode the cell before acquiring the spinlock in order to minimize the
+  // memory_order_release ensures that no reads or writes in the current
+  // thread can be reordered after this store, and all writes in the current
+  // thread are visible to other threads that acquire the same atomic
+  // variable.
+  //
+  // memory_order_acquire ensures that no reads or writes in the current
+  // thread can be reordered before this load, and all writes in other threads
+  // that release the same atomic variable are visible in this thread.
+  //
+  // We use this to implement lock-free synchronization on the read path as
+  // follows:
+  //
+  //  1. cells_decoded(i) is updated using acquire/release semantics
+  //  2. cells_[i] is written before cells_decoded(i)
+  //  3. cells_[i] is read after cells_decoded(i)
+  //
+  // Note that we do still use a lock for the write path to ensure that
+  // cells_[i] and cell_decoded(i) are updated together atomically.
+  if (cell_decoded(i)) return cells_[i];
+
+  // Decode the cell before acquiring the spinlock in order to minimize the
   // time that the lock is held.
   auto cell = make_unique<S2ShapeIndexCell>();
   Decoder decoder = encoded_cells_.GetDecoder(i);
   if (!cell->Decode(num_shape_ids(), &decoder)) {
     return nullptr;
   }
+  // Recheck cell_decoded(i) once we hold the lock in case another thread
+  // has decoded this cell in the meantime.
   SpinLockHolder l(&cells_lock_);
-  if (test_and_set_cell_decoded(i)) {
-    // This cell has already been decoded.
-    return cells_[i].load(std::memory_order_relaxed);
-  }
-  if (cell_cache_.size() < max_cell_cache_size()) {
+  if (cell_decoded(i)) return cells_[i];
+
+  // Update the cell, setting cells_[i] before cell_decoded(i).
+  cells_[i] = cell.get();
+  set_cell_decoded(i);
+  if (cell_cache_.size() < static_cast<size_t>(max_cell_cache_size())) {
     cell_cache_.push_back(i);
   }
-  cells_[i].store(cell.get(), std::memory_order_relaxed);
   return cell.release();  // Ownership has been transferred to cells_.
 }
 
@@ -83,8 +106,7 @@ const S2ShapeIndexCell* EncodedS2ShapeIndex::Iterator::GetCell() const {
   return index_->GetCell(cell_pos_);
 }
 
-EncodedS2ShapeIndex::EncodedS2ShapeIndex() {
-}
+EncodedS2ShapeIndex::EncodedS2ShapeIndex() = default;
 
 EncodedS2ShapeIndex::~EncodedS2ShapeIndex() {
   // Although Minimize() does slightly more than required for destruction
@@ -107,7 +129,7 @@ bool EncodedS2ShapeIndex::Init(Decoder* decoder,
   // AtomicShape is a subtype of std::atomic<S2Shape*> that changes the
   // default constructor value to kUndecodedShape().  This saves the effort of
   // initializing all the elements twice.
-  shapes_ = std::vector<AtomicShape>(shape_factory.size());
+  shapes_ = vector<AtomicShape>(shape_factory.size());
   shape_factory_ = shape_factory.Clone();
   if (!cell_ids_.Init(decoder)) return false;
 
@@ -118,17 +140,18 @@ bool EncodedS2ShapeIndex::Init(Decoder* decoder,
   // need to initialize one bit per cell to zero.
   //
   // For very large S2ShapeIndexes the internal memset() call to initialize
-  // cells_decoded_ still takes about 4 microseconds per million cells, but
-  // this seems reasonable relative to other likely costs (I/O, etc).
+  // cells_decoded_ still takes about 1.3 microseconds per million cells
+  // (assuming an optimized implementation that writes 32 bytes per cycle),
+  // but this seems reasonable relative to other likely costs (I/O, etc).
   //
   // NOTE(ericv): DO NOT use make_unique<> here!  make_unique<> allocates memory
   // using "new T[n]()", which initializes all elements of the array.  This
   // slows down some benchmarks by over 100x.
   //
-  // cells_ = make_unique<std::atomic<S2ShapeIndexCell*>[]>(cell_ids_.size());
+  // cells_ = make_unique<S2ShapeIndexCell*>[](cell_ids_.size());
   // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   //                                NO NO NO
-  cells_.reset(new std::atomic<S2ShapeIndexCell*>[cell_ids_.size()]);
+  cells_.reset(new S2ShapeIndexCell*[cell_ids_.size()]);
   cells_decoded_ = vector<std::atomic<uint64>>((cell_ids_.size() + 63) >> 6);
 
   return encoded_cells_.Init(decoder);
@@ -144,23 +167,23 @@ void EncodedS2ShapeIndex::Minimize() {
       delete shape;
     }
   }
-  if (cell_cache_.size() < max_cell_cache_size()) {
+  if (cell_cache_.size() < static_cast<size_t>(max_cell_cache_size())) {
     // When only a tiny fraction of the cells are decoded, we keep track of
     // those cells in cell_cache_ to avoid the cost of scanning the
     // cells_decoded_ vector.  (The cost is only about 1 cycle per 64 cells,
     // but for a huge polygon with 1 million cells that's still 16000 cycles.)
     for (int pos : cell_cache_) {
       cells_decoded_[pos >> 6].store(0, std::memory_order_relaxed);
-      delete cells_[pos].load(std::memory_order_relaxed);
+      delete cells_[pos];
     }
   } else {
     // Scan the cells_decoded_ vector looking for cells that must be deleted.
-    for (int i = cells_decoded_.size(), base = 0; --i >= 0; base += 64) {
+    for (int i = cells_decoded_.size(); --i >= 0;) {
       uint64 bits = cells_decoded_[i].load(std::memory_order_relaxed);
       if (bits == 0) continue;
       do {
         int offset = Bits::FindLSBSetNonZero64(bits);
-        delete cells_[(i << 6) + offset].load(std::memory_order_relaxed);
+        delete cells_[(i << 6) + offset];
         bits &= bits - 1;
       } while (bits != 0);
       cells_decoded_[i].store(0, std::memory_order_relaxed);
@@ -170,7 +193,7 @@ void EncodedS2ShapeIndex::Minimize() {
 }
 
 size_t EncodedS2ShapeIndex::SpaceUsed() const {
-  // TODO(ericv): Add SpaceUsed() method to S2Shape base class,and Include
+  // TODO(ericv): Add SpaceUsed() method to S2Shape base class,and include
   // memory owned by the allocated S2Shapes (here and in S2ShapeIndex).
   size_t size = sizeof(*this);
   size += shapes_.capacity() * sizeof(std::atomic<S2Shape*>);
diff --git a/src/s2/encoded_s2shape_index.h b/src/s2/encoded_s2shape_index.h
index 3232474b..540a3154 100644
--- a/src/s2/encoded_s2shape_index.h
+++ b/src/s2/encoded_s2shape_index.h
@@ -18,10 +18,112 @@
 #ifndef S2_ENCODED_S2SHAPE_INDEX_H_
 #define S2_ENCODED_S2SHAPE_INDEX_H_
 
+#include <cstddef>
+
+#include <atomic>
+#include <memory>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "absl/strings/cord.h"
+#include "s2/util/coding/coder.h"
 #include "s2/encoded_s2cell_id_vector.h"
 #include "s2/encoded_string_vector.h"
 #include "s2/mutable_s2shape_index.h"
-
+#include "s2/s2cell_id.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+
+// EncodedS2ShapeIndex is an S2ShapeIndex implementation that works directly
+// with encoded data.  Rather than decoding everything in advance, geometry is
+// decoded incrementally (down to individual edges) as needed.  It can be
+// initialized from a single block of data in nearly constant time (about 1.3
+// microseconds per million edges).  This saves large amounts of memory and is
+// also much faster in the common situation where geometric data is loaded
+// from somewhere, decoded, and then only a single operation is performed on
+// it.  It supports all S2ShapeIndex operations including boolean operations,
+// measuring distances, etc.
+//
+// The speedups can be over 1000x for large geometric objects.  For example
+// vertices and 50,000 loops.  If this geometry is represented as an
+// S2Polygon, then simply decoding it takes ~250ms and building its internal
+// S2ShapeIndex takes a further ~1500ms.  These times are much longer than the
+// time needed for many operations, e.g. e.g. measuring the distance from the
+// polygon to one of its vertices takes only about 0.001ms.
+//
+// If the same geometry is represented using EncodedLaxPolygonShape and
+// EncodedS2ShapeIndex, initializing the index takes only 0.005ms.  The
+// distance measuring operation itself takes slightly longer than before
+// (0.0013ms vs. the original 0.001ms) but the overall time is now much lower
+// (~0.007ms vs. 1750ms).  This is possible because the new classes decode
+// data lazily (only when it is actually needed) and with fine granularity
+// (down to the level of individual edges).  The overhead associated with this
+// incremental decoding is small; operations are typically 25% slower compared
+// to fully decoding the MutableS2ShapeIndex and its underlying shapes.
+//
+// EncodedS2ShapeIndex also uses less memory than MutableS2ShapeIndex.  The
+// encoded data is contiguous block of memory that is typically between 4-20%
+// of the original index size (see MutableS2ShapeIndex::Encode for examples).
+// Constructing the EncodedS2ShapeIndex uses additional memory, but even so
+// the total memory usage immediately after construction is typically 25-35%
+// of the corresponding MutableS2ShapeIndex size.
+//
+// Note that MutableS2ShapeIndex will still be faster and use less memory if
+// you need to decode the entire index.  Similarly MutableS2ShapeIndex will be
+// faster if you plan to execute a large number of operations on it.  The main
+// advantage of EncodedS2ShapeIndex is that it is much faster and uses less
+// memory when only a small portion of the data needs to be decoded.
+//
+// Example code showing how to create an encoded index:
+//
+//   Encoder encoder;
+//   s2shapeutil::CompactEncodeTaggedShapes(index, encoder);
+//   index.Encode(encoder);
+//   string encoded(encoder.base(), encoder.length());  // Encoded data.
+//
+// Example code showing how to use an encoded index:
+//
+//   Decoder decoder(encoded.data(), encoded.size());
+//   EncodedS2ShapeIndex index;
+//   index.Init(&decoder, s2shapeutil::LazyDecodeShapeFactory(&decoder));
+//   S2ClosestEdgeQuery query(&index);
+//   S2ClosestEdgeQuery::PointTarget target(test_point);
+//   if (query.IsDistanceLessOrEqual(&target, limit)) {
+//     ...
+//   }
+//
+// Note that EncodedS2ShapeIndex does not make a copy of the encoded data, and
+// therefore the client must ensure that this data outlives the
+// EncodedS2ShapeIndex object.
+//
+// There are a number of built-in classes that work with S2ShapeIndex objects.
+// Generally these classes accept any collection of geometry that can be
+// represented by an S2ShapeIndex, i.e. any combination of points, polylines,
+// and polygons.  Such classes include:
+//
+// - S2ContainsPointQuery: returns the shape(s) that contain a given point.
+//
+// - S2ClosestEdgeQuery: returns the closest edge(s) to a given point, edge,
+//                       S2CellId, or S2ShapeIndex.
+//
+// - S2CrossingEdgeQuery: returns the edge(s) that cross a given edge.
+//
+// - S2BooleanOperation: computes boolean operations such as union,
+//                       and boolean predicates such as containment.
+//
+// - S2ShapeIndexRegion: can be used together with S2RegionCoverer to
+//                       approximate geometry as a set of S2CellIds.
+//
+// - S2ShapeIndexBufferedRegion: computes approximations that have been
+//                               expanded by a given radius.
+//
+// EncodedS2ShapeIndex is thread-compatible, meaning that const methods are
+// thread safe, and non-const methods are not thread safe.  The only non-const
+// method is Minimize(), so if you plan to call Minimize() while other threads
+// are actively using the index that you must use an external reader-writer
+// lock such as absl::Mutex to guard access to it.  (There is no global state
+// and therefore each index can be guarded independently.)
 class EncodedS2ShapeIndex final : public S2ShapeIndex {
  public:
   using Options = MutableS2ShapeIndex::Options;
@@ -88,14 +190,20 @@ class EncodedS2ShapeIndex final : public S2ShapeIndex {
     //   bool done() const;
     //   S2Point center() const;
 
-    // IteratorBase API:
+    // S2CellIterator API:
     void Begin() override;
     void Finish() override;
     void Next() override;
     bool Prev() override;
     void Seek(S2CellId target) override;
-    bool Locate(const S2Point& target) override;
-    CellRelation Locate(S2CellId target) override;
+
+    bool Locate(const S2Point& target) override {
+      return LocateImpl(*this, target);
+    }
+
+    S2CellRelation Locate(S2CellId target) override {
+      return LocateImpl(*this, target);
+    }
 
    protected:
     const S2ShapeIndexCell* GetCell() const override;
@@ -134,7 +242,7 @@ class EncodedS2ShapeIndex final : public S2ShapeIndex {
   S2Shape* GetShape(int id) const;
   const S2ShapeIndexCell* GetCell(int i) const;
   bool cell_decoded(int i) const;
-  bool test_and_set_cell_decoded(int i) const;
+  void set_cell_decoded(int i) const;
   int max_cell_cache_size() const;
 
   std::unique_ptr<ShapeFactory> shape_factory_;
@@ -156,7 +264,7 @@ class EncodedS2ShapeIndex final : public S2ShapeIndex {
   // A raw array containing the decoded contents of each cell in the index.
   // Initially all values are *uninitialized memory*.  The cells_decoded_
   // field below keeps track of which elements are present.
-  mutable std::unique_ptr<std::atomic<S2ShapeIndexCell*>[]> cells_;
+  mutable std::unique_ptr<S2ShapeIndexCell*[]> cells_;
 
   // A bit vector indicating which elements of cells_ have been decoded.
   // All other elements of cells_ contain uninitialized (random) memory.
@@ -236,28 +344,31 @@ inline void EncodedS2ShapeIndex::Iterator::Seek(S2CellId target) {
 
 inline std::unique_ptr<EncodedS2ShapeIndex::IteratorBase>
 EncodedS2ShapeIndex::NewIterator(InitialPosition pos) const {
-  return absl::make_unique<Iterator>(this, pos);
+  return std::make_unique<Iterator>(this, pos);
 }
 
 inline S2Shape* EncodedS2ShapeIndex::shape(int id) const {
-  S2Shape* shape = shapes_[id].load(std::memory_order_relaxed);
+  S2Shape* shape = shapes_[id].load(std::memory_order_acquire);
   if (shape != kUndecodedShape()) return shape;
   return GetShape(id);
 }
 
-// Returns true if the given cell has been decoded yet.
+// Returns true if the given cell has already been decoded.
 inline bool EncodedS2ShapeIndex::cell_decoded(int i) const {
-  uint64 group_bits = cells_decoded_[i >> 6].load(std::memory_order_relaxed);
+  // cell_decoded(i) uses acquire/release synchronization (see .cc file).
+  uint64 group_bits = cells_decoded_[i >> 6].load(std::memory_order_acquire);
   return (group_bits & (1ULL << (i & 63))) != 0;
 }
 
-// Marks the given cell as decoded and returns true if it was already marked.
-inline bool EncodedS2ShapeIndex::test_and_set_cell_decoded(int i) const {
+// Marks the given cell as having been decoded.
+// REQUIRES: cells_lock_ is held
+inline void EncodedS2ShapeIndex::set_cell_decoded(int i) const {
+  // We use memory_order_release for the store operation below to ensure that
+  // cells_decoded(i) sees the most recent value, however we can use
+  // memory_order_relaxed for the load because cells_lock_ is held.
   std::atomic<uint64>* group = &cells_decoded_[i >> 6];
-  uint64 group_bits = group->load(std::memory_order_relaxed);
-  uint64 test_bit = 1ULL << (i & 63);
-  group->store(group_bits | test_bit, std::memory_order_relaxed);
-  return (group_bits & test_bit) != 0;
+  uint64 bits = group->load(std::memory_order_relaxed);
+  group->store(bits | 1ULL << (i & 63), std::memory_order_release);
 }
 
 inline int EncodedS2ShapeIndex::max_cell_cache_size() const {
diff --git a/src/s2/encoded_string_vector.cc b/src/s2/encoded_string_vector.cc
index 2a6d7a58..10c775ca 100644
--- a/src/s2/encoded_string_vector.cc
+++ b/src/s2/encoded_string_vector.cc
@@ -17,15 +17,27 @@
 
 #include "s2/encoded_string_vector.h"
 
+#include <cstddef>
+#include <cstdint>
+
+#include <string>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "s2/util/coding/coder.h"
+#include "s2/encoded_uint_vector.h"
+
 using absl::MakeSpan;
 using absl::Span;
 using absl::string_view;
+using std::string;
 using std::vector;
 
 namespace s2coding {
 
-StringVectorEncoder::StringVectorEncoder() {
-}
+StringVectorEncoder::StringVectorEncoder() = default;
 
 void StringVectorEncoder::Encode(Encoder* encoder) {
   offsets_.push_back(data_.length());
@@ -37,7 +49,7 @@ void StringVectorEncoder::Encode(Encoder* encoder) {
   encoder->putn(data_.base(), data_.length());
 }
 
-void StringVectorEncoder::Encode(Span<const std::string> v, Encoder* encoder) {
+void StringVectorEncoder::Encode(Span<const string> v, Encoder* encoder) {
   StringVectorEncoder string_vector;
   for (const auto& str : v) string_vector.Add(str);
   string_vector.Encode(encoder);
@@ -45,7 +57,7 @@ void StringVectorEncoder::Encode(Span<const std::string> v, Encoder* encoder) {
 
 bool EncodedStringVector::Init(Decoder* decoder) {
   if (!offsets_.Init(decoder)) return false;
-  data_ = reinterpret_cast<const char*>(decoder->ptr());
+  data_ = decoder->skip(0);
   if (offsets_.size() > 0) {
     uint64 length = offsets_[offsets_.size() - 1];
     if (decoder->avail() < length) return false;
@@ -57,10 +69,21 @@ bool EncodedStringVector::Init(Decoder* decoder) {
 vector<string_view> EncodedStringVector::Decode() const {
   size_t n = size();
   vector<string_view> result(n);
-  for (int i = 0; i < n; ++i) {
+  for (size_t i = 0; i < n; ++i) {
     result[i] = (*this)[i];
   }
   return result;
 }
 
+// The encoding must be identical to StringVectorEncoder::Encode().
+void EncodedStringVector::Encode(Encoder* encoder) const {
+  offsets_.Encode(encoder);
+
+  if (offsets_.size() > 0) {
+    const uint64 length = offsets_[offsets_.size() - 1];
+    encoder->Ensure(length);
+    encoder->putn(data_, length);
+  }
+}
+
 }  // namespace s2coding
diff --git a/src/s2/encoded_string_vector.h b/src/s2/encoded_string_vector.h
index bef16977..da6b6a08 100644
--- a/src/s2/encoded_string_vector.h
+++ b/src/s2/encoded_string_vector.h
@@ -18,10 +18,16 @@
 #ifndef S2_ENCODED_STRING_VECTOR_H_
 #define S2_ENCODED_STRING_VECTOR_H_
 
+#include <cstddef>
+
 #include <memory>
 #include <string>
+#include <vector>
+
+#include "s2/base/integral_types.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
+#include "s2/util/coding/coder.h"
 #include "s2/encoded_uint_vector.h"
 
 namespace s2coding {
@@ -84,7 +90,7 @@ class StringVectorEncoder {
 class EncodedStringVector {
  public:
   // Constructs an uninitialized object; requires Init() to be called.
-  EncodedStringVector() {}
+  EncodedStringVector() = default;
 
   // Initializes the EncodedStringVector.  Returns false on errors, leaving
   // the vector in an unspecified state.
@@ -113,6 +119,8 @@ class EncodedStringVector {
   // no longer needed.
   std::vector<absl::string_view> Decode() const;
 
+  void Encode(Encoder* encoder) const;
+
  private:
   EncodedUintVector<uint64> offsets_;
   const char* data_;
diff --git a/src/s2/encoded_uint_vector.h b/src/s2/encoded_uint_vector.h
index 9ca50ba0..c4a3732a 100644
--- a/src/s2/encoded_uint_vector.h
+++ b/src/s2/encoded_uint_vector.h
@@ -18,11 +18,19 @@
 #ifndef S2_ENCODED_UINT_VECTOR_H_
 #define S2_ENCODED_UINT_VECTOR_H_
 
+#include <cstddef>
+
+#include <limits>
 #include <type_traits>
 #include <vector>
+
+#include "s2/base/integral_types.h"
 #include "absl/base/internal/unaligned_access.h"
 #include "absl/types/span.h"
+
+#include "s2/util/bits/bits.h"
 #include "s2/util/coding/coder.h"
+#include "s2/util/coding/varint.h"
 
 namespace s2coding {
 
@@ -59,7 +67,7 @@ class EncodedUintVector {
   static_assert(sizeof(T) & 0xe, "Unsupported integer length");
 
   // Constructs an uninitialized object; requires Init() to be called.
-  EncodedUintVector() {}
+  EncodedUintVector() = default;
 
   // Initializes the EncodedUintVector.  Returns false on errors, leaving the
   // vector in an unspecified state.
@@ -85,6 +93,8 @@ class EncodedUintVector {
   // Decodes and returns the entire original vector.
   std::vector<T> Decode() const;
 
+  void Encode(Encoder* encoder) const;
+
  private:
   template <int length> size_t lower_bound(T target) const;
 
@@ -111,7 +121,7 @@ void EncodeUintWithLength(T value, int length, Encoder* encoder);
 // REQUIRES: 2 <= sizeof(T) <= 8
 // REQUIRES: 0 <= length <= sizeof(T)
 template <class T>
-T GetUintWithLength(const void* ptr, int length);
+T GetUintWithLength(const char* ptr, int length);
 
 // Decodes and consumes a variable-length integer consisting of "length" bytes
 // in little-endian format.  Returns false if not enough bytes are available.
@@ -185,8 +195,8 @@ inline T GetUintWithLength(const char* ptr, int length) {
 
 template <class T>
 bool DecodeUintWithLength(int length, Decoder* decoder, T* result) {
-  if (decoder->avail() < length) return false;
-  const char* ptr = reinterpret_cast<const char*>(decoder->ptr());
+  if (decoder->avail() < static_cast<size_t>(length)) return false;
+  const char* ptr = decoder->skip(0);
   *result = GetUintWithLength<T>(ptr, length);
   decoder->skip(length);
   return true;
@@ -223,9 +233,9 @@ bool EncodedUintVector<T>::Init(Decoder* decoder) {
   size_ = size_len / sizeof(T);  // Optimized into bit shift.
   len_ = (size_len & (sizeof(T) - 1)) + 1;
   if (size_ > std::numeric_limits<size_t>::max() / sizeof(T)) return false;
-  size_t bytes = size_ * len_;
+  size_t bytes = static_cast<size_t>(size_) * static_cast<size_t>(len_);
   if (decoder->avail() < bytes) return false;
-  data_ = reinterpret_cast<const char*>(decoder->ptr());
+  data_ = decoder->skip(0);
   decoder->skip(bytes);
   return true;
 }
@@ -294,6 +304,16 @@ std::vector<T> EncodedUintVector<T>::Decode() const {
   return result;
 }
 
+template <class T>
+// The encoding must be identical to StringVectorEncoder::Encode().
+void EncodedUintVector<T>::Encode(Encoder* encoder) const {
+  uint64 size_len = (uint64{size_} * sizeof(T)) | (len_ - 1);
+
+  encoder->Ensure(Varint::kMax64 + size_len);
+  encoder->put_varint64(size_len);
+  encoder->putn(data_, size_ * len_);
+}
+
 }  // namespace s2coding
 
 #endif  // S2_ENCODED_UINT_VECTOR_H_
diff --git a/src/s2/id_set_lexicon.cc b/src/s2/id_set_lexicon.cc
index f4dd9114..9f7c6c2e 100644
--- a/src/s2/id_set_lexicon.cc
+++ b/src/s2/id_set_lexicon.cc
@@ -18,15 +18,17 @@
 #include "s2/id_set_lexicon.h"
 
 #include <algorithm>
+#include <utility>
 #include <vector>
 
-#include "s2/base/logging.h"
+#include "s2/base/integral_types.h"
+#include "s2/sequence_lexicon.h"
 
-IdSetLexicon::IdSetLexicon() {
-}
+using std::vector;
 
-IdSetLexicon::~IdSetLexicon() {
-}
+IdSetLexicon::IdSetLexicon() = default;
+
+IdSetLexicon::~IdSetLexicon() = default;
 
 // We define the copy/move constructors and assignment operators explicitly
 // in order to avoid copying/moving the temporary storage vector "tmp_".
@@ -51,7 +53,7 @@ void IdSetLexicon::Clear() {
   id_sets_.Clear();
 }
 
-int32 IdSetLexicon::AddInternal(std::vector<int32>* ids) {
+int32 IdSetLexicon::AddInternal(vector<int32>* ids) {
   if (ids->empty()) {
     // Empty sets have a special id chosen not to conflict with other ids.
     return kEmptySetId;
@@ -62,6 +64,10 @@ int32 IdSetLexicon::AddInternal(std::vector<int32>* ids) {
     // Canonicalize the set by sorting and removing duplicates.
     std::sort(ids->begin(), ids->end());
     ids->erase(std::unique(ids->begin(), ids->end()), ids->end());
+
+    // After eliminating duplicates, we may now have a singleton.
+    if (ids->size() == 1) return (*ids)[0];
+
     // Non-singleton sets are represented by the bitwise complement of the id
     // returned by SequenceLexicon.
     return ~id_sets_.Add(*ids);
diff --git a/src/s2/id_set_lexicon.h b/src/s2/id_set_lexicon.h
index 25ace84c..db8443b9 100644
--- a/src/s2/id_set_lexicon.h
+++ b/src/s2/id_set_lexicon.h
@@ -18,6 +18,9 @@
 #ifndef S2_ID_SET_LEXICON_H_
 #define S2_ID_SET_LEXICON_H_
 
+#include <cstddef>
+
+#include <iterator>
 #include <limits>
 #include <vector>
 
@@ -65,8 +68,8 @@
 // This class is similar to SequenceLexicon, except:
 //
 // 1. Empty and singleton sets are represented implicitly; they use no space.
-// 2. Sets are represented rather than sequences; the ordering of values is
-//    not important and duplicates are removed.
+// 2. Sets are represented rather than sequences; values are reordered to be in
+//    sorted order, and duplicates are removed.
 // 3. The values must be 32-bit non-negative integers (only).
 class IdSetLexicon {
  public:
@@ -114,6 +117,8 @@ class IdSetLexicon {
   // This class represents a set of integers stored in the IdSetLexicon.
   class IdSet {
    public:
+    using value_type = const int32;
+
     Iterator begin() const;
     Iterator end() const;
     size_t size() const;
@@ -132,7 +137,7 @@ class IdSetLexicon {
  private:
   // Choose kEmptySetId to be the last id that will ever be generated.
   // (Non-negative ids are reserved for singleton sets.)
-  static const int32 kEmptySetId = std::numeric_limits<int32>::min();
+  static constexpr int32 kEmptySetId = std::numeric_limits<int32>::min();
   int32 AddInternal(std::vector<int32>* ids);
 
   SequenceLexicon<int32> id_sets_;
diff --git a/src/s2/mutable_s2shape_index.cc b/src/s2/mutable_s2shape_index.cc
index a81ee9cf..3e29fed6 100644
--- a/src/s2/mutable_s2shape_index.cc
+++ b/src/s2/mutable_s2shape_index.cc
@@ -17,13 +17,25 @@
 
 #include "s2/mutable_s2shape_index.h"
 
+#include <cstddef>
+
 #include <algorithm>
 #include <atomic>
 #include <cmath>
+#include <memory>
+#include <utility>
+#include <vector>
 
 #include "s2/base/casts.h"
 #include "s2/base/commandlineflags.h"
-#include "s2/base/spinlock.h"
+#include "s2/base/integral_types.h"
+#include "absl/base/attributes.h"
+#include "absl/container/btree_map.h"
+#include "absl/flags/flag.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/utility/utility.h"
+#include "s2/util/coding/coder.h"
+#include "s2/util/coding/varint.h"
 #include "s2/encoded_s2cell_id_vector.h"
 #include "s2/encoded_string_vector.h"
 #include "s2/r1interval.h"
@@ -34,30 +46,38 @@
 #include "s2/s2coords.h"
 #include "s2/s2edge_clipping.h"
 #include "s2/s2edge_crosser.h"
+#include "s2/s2memory_tracker.h"
 #include "s2/s2metrics.h"
 #include "s2/s2padded_cell.h"
-#include "s2/s2pointutil.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
 #include "s2/s2shapeutil_contains_brute_force.h"
+#include "s2/s2shapeutil_shape_edge_id.h"
+#include "s2/util/gtl/compact_array.h"
+#include "s2/util/math/mathutil.h"
 
 using std::fabs;
+using std::make_pair;
+using std::make_unique;
 using std::max;
+using std::min;
 using std::unique_ptr;
 using std::vector;
 
 // FLAGS_s2shape_index_default_max_edges_per_cell
 //
-// The default maximum number of edges per cell (not counting "long" edges).
+// The default maximum number of edges per cell (not counting 'long' edges).
 // If a cell has more than this many edges, and it is not a leaf cell, then it
 // is subdivided.  This flag can be overridden via MutableS2ShapeIndex::Options.
 // Reasonable values range from 10 to about 50 or so.
-DEFINE_int32(
+S2_DEFINE_int32(
     s2shape_index_default_max_edges_per_cell, 10,
-    "Default maximum number of edges (not counting 'long' edges) per cell; "
+    "Default maximum number of edges per cell (not counting 'long' edges); "
     "reasonable values range from 10 to 50.  Small values makes queries "
-    "faster, while large values make construction faster and use less "
-    "memory.");
+    "faster, while large values make construction faster and use less memory.");
 
-// FLAGS_s2shape_index_tmp_memory_budget_mb
+// FLAGS_s2shape_index_tmp_memory_budget
 //
 // Attempt to limit the amount of temporary memory allocated while building or
 // updating a MutableS2ShapeIndex to at most this value.  This is achieved by
@@ -70,33 +90,88 @@ DEFINE_int32(
 //      with huge numbers of edges may exceed the budget;
 //  (3) shapes being removed are always processed in a single batch.  (This
 //      could be fixed, but it seems better to keep the code simpler for now.)
-DEFINE_int32(
-    s2shape_index_tmp_memory_budget_mb, 100,
+S2_DEFINE_int64(
+    s2shape_index_tmp_memory_budget, int64{100} << 20 /*100 MB*/,
     "Attempts to limit the amount of temporary memory used by "
-    "MutableS2ShapeIndex when creating or updating very large indexes "
-    "to at most this value.  If more memory than this is needed, updates "
+    "MutableS2ShapeIndex when creating or updating very large indexes to at "
+    "most this number of bytes.  If more memory than this is needed, updates "
     "will automatically be split into batches internally.");
 
 // FLAGS_s2shape_index_cell_size_to_long_edge_ratio
 //
-// The cell size relative to the length of an edge at which it is first
-// considered to be "long".  Long edges do not contribute toward the decision
-// to subdivide a cell further.  For example, a value of 2.0 means that the
-// cell must be at least twice the size of the edge in order for that edge to
-// be counted.  There are two reasons for not counting long edges: (1) such
-// edges typically need to be propagated to several children, which increases
-// time and memory costs without much benefit, and (2) in pathological cases,
-// many long edges close together could force subdivision to continue all the
-// way to the leaf cell level.
-DEFINE_double(
+// The maximum cell size, relative to an edge's length, for which that edge is
+// considered 'long'.  Cell size is defined as the average edge length of all
+// cells at a given level.  For example, a value of 2.0 means that an edge E
+// is long at cell level k iff the average edge length at level k is at most
+// twice the length of E.  Long edges edges are not counted towards the
+// max_edges_per_cell() limit because such edges typically need to be
+// propagated to several children, which increases time and memory costs
+// without commensurate benefits.
+S2_DEFINE_double(
     s2shape_index_cell_size_to_long_edge_ratio, 1.0,
-    "The cell size relative to the length of an edge at which it is first "
-    "considered to be 'long'.  Long edges do not contribute to the decision "
-    "to subdivide a cell further.  The size and speed of the index are "
+    "The maximum cell size, relative to an edge's length, for which that "
+    "edge is considered 'long'.  Long edges are not counted towards the "
+    "max_edges_per_cell() limit.  The size and speed of the index are "
     "typically not very sensitive to this parameter.  Reasonable values range "
     "from 0.1 to 10, with smaller values causing more aggressive subdivision "
     "of long edges grouped closely together.");
 
+// FLAGS_s2shape_index_min_short_edge_fraction
+//
+// The minimum fraction of 'short' edges that must be present in a cell in
+// order for it to be subdivided.  If this parameter is non-zero then the
+// total index size and construction time are guaranteed to be linear in the
+// number of input edges; this prevents the worst-case quadratic space and
+// time usage that can otherwise occur with certain input configurations.
+// Specifically, the maximum index size is
+//
+//     O((c1 + c2 * (1 - f) / f) * n)
+//
+// where n is the number of input edges, f is this parameter value, and
+// constant c2 is roughly 20 times larger than constant c1.  (The exact values
+// of c1 and c2 depend on the cell_size_to_long_edge_ratio and
+// max_edges_per_cell parameters and certain properties of the input geometry
+// such as whether it consists of O(1) shapes, whether it includes polygons,
+// and whether the polygon interiors are disjoint.)
+//
+// Reasonable parameter values range from 0.1 up to perhaps 0.95.  The main
+// factors to consider when choosing this parameter are:
+//
+//  - For pathological geometry, larger values result in indexes that are
+//    smaller and faster to construct but have worse query performance (due to
+//    having more edges per cell).  However note that even a setting of 0.1
+//    reduces the worst case by 100x compared with a setting of 0.001.
+//
+//  - For normal geometry, values up to about 0.8 result in indexes that are
+//    virtually unchanged except for a slight increase in index construction
+//    time (proportional to the parameter value f) for very large inputs.
+//    With millions of edges, indexing time increases by about (15% * f),
+//    e.g. a parameter value of 0.5 slows down indexing for very large inputs
+//    by about 7.5%.  (Indexing time for small inputs is not affected.)
+//
+//  - Values larger than about 0.8 start to affect index construction even for
+//    normal geometry, resulting in smaller indexes and faster construction
+//    times but gradually worse query performance.
+//
+// Essentially this parameter provides control over a space-time tradeoff that
+// largely affects only pathological geometry.  The default value of 0.2 was
+// chosen to make index construction as fast as possible while still
+// protecting against possible quadratic space usage.
+S2_DEFINE_double(
+    s2shape_index_min_short_edge_fraction, 0.2,
+    "The minimum fraction of 'short' edges that must be present in a cell in "
+    "order for it to be subdivided.  If this parameter is non-zero then the "
+    "total index size and construction time are guaranteed to be linear in the "
+    "number of input edges, where the constant of proportionality has the "
+    "form (c1 + c2 * (1 - f) / f).  Reasonable values range from 0.1 to "
+    "perhaps 0.95.  Values up to about 0.8 have almost no effect on 'normal' "
+    "geometry except for a small increase in index construction time "
+    "(proportional to f) for very large inputs.  For worst-case geometry, "
+    "larger parameter values result in indexes that are smaller and faster "
+    "to construct but have worse query performance (due to having more edges "
+    "per cell).  Essentially this parameter provides control over a space-time "
+    "tradeoff that largely affects only pathological geometry.");
+
 // The total error when clipping an edge comes from two sources:
 // (1) Clipping the original spherical edge to a cube face (the "face edge").
 //     The maximum error in this step is S2::kFaceClipErrorUVCoord.
@@ -109,129 +184,28 @@ const double MutableS2ShapeIndex::kCellPadding =
     2 * (S2::kFaceClipErrorUVCoord + S2::kEdgeClipErrorUVCoord);
 
 MutableS2ShapeIndex::Options::Options()
-    : max_edges_per_cell_(FLAGS_s2shape_index_default_max_edges_per_cell) {
-}
+    : max_edges_per_cell_(
+          absl::GetFlag(FLAGS_s2shape_index_default_max_edges_per_cell)) {}
 
 void MutableS2ShapeIndex::Options::set_max_edges_per_cell(
     int max_edges_per_cell) {
   max_edges_per_cell_ = max_edges_per_cell;
 }
 
-bool MutableS2ShapeIndex::Iterator::Locate(const S2Point& target) {
-  return LocateImpl(target, this);
-}
-
-MutableS2ShapeIndex::CellRelation MutableS2ShapeIndex::Iterator::Locate(
-    S2CellId target) {
-  return LocateImpl(target, this);
-}
-
 const S2ShapeIndexCell* MutableS2ShapeIndex::Iterator::GetCell() const {
-  S2_LOG(DFATAL) << "Should never be called";
+  S2_LOG(ERROR) << "Should never be called";
   return nullptr;
 }
 
 unique_ptr<MutableS2ShapeIndex::IteratorBase>
 MutableS2ShapeIndex::Iterator::Clone() const {
-  return absl::make_unique<Iterator>(*this);
+  return make_unique<Iterator>(*this);
 }
 
 void MutableS2ShapeIndex::Iterator::Copy(const IteratorBase& other)  {
   *this = *down_cast<const Iterator*>(&other);
 }
 
-// Defines the initial focus point of MutableS2ShapeIndex::InteriorTracker
-// (the start of the S2CellId space-filling curve).
-//
-// TODO(ericv): Move InteriorTracker here to avoid the need for this method.
-static S2Point kInteriorTrackerOrigin() {
-  return S2::FaceUVtoXYZ(0, -1, -1).Normalize();
-}
-
-MutableS2ShapeIndex::MutableS2ShapeIndex()
-    : index_status_(FRESH) {
-}
-
-MutableS2ShapeIndex::MutableS2ShapeIndex(const Options& options)
-    : options_(options),
-      index_status_(FRESH) {
-}
-
-void MutableS2ShapeIndex::Init(const Options& options) {
-  S2_DCHECK(shapes_.empty());
-  options_ = options;
-}
-
-MutableS2ShapeIndex::~MutableS2ShapeIndex() {
-  Clear();
-}
-
-void MutableS2ShapeIndex::Minimize() {
-  // TODO(ericv): Implement.  In theory we should be able to discard the
-  // entire index and rebuild it the next time it is needed.
-}
-
-int MutableS2ShapeIndex::Add(unique_ptr<S2Shape> shape) {
-  // Additions are processed lazily by ApplyUpdates().
-  const int id = shapes_.size();
-  shape->id_ = id;
-  shapes_.push_back(std::move(shape));
-  index_status_.store(STALE, std::memory_order_relaxed);
-  return id;
-}
-
-unique_ptr<S2Shape> MutableS2ShapeIndex::Release(int shape_id) {
-  // This class updates itself lazily, because it is much more efficient to
-  // process additions and removals in batches.  However this means that when
-  // a shape is removed, we need to make a copy of all its edges, since the
-  // client is free to delete "shape" once this call is finished.
-
-  S2_DCHECK(shapes_[shape_id] != nullptr);
-  auto shape = std::move(shapes_[shape_id]);
-  if (shape_id >= pending_additions_begin_) {
-    // We are removing a shape that has not yet been added to the index,
-    // so there is nothing else to do.
-  } else {
-    if (!pending_removals_) {
-      pending_removals_.reset(new vector<RemovedShape>);
-    }
-    // We build the new RemovedShape in place, since it includes a potentially
-    // large vector of edges that might be expensive to copy.
-    pending_removals_->push_back(RemovedShape());
-    RemovedShape* removed = &pending_removals_->back();
-    removed->shape_id = shape->id();
-    removed->has_interior = (shape->dimension() == 2);
-    removed->contains_tracker_origin =
-        s2shapeutil::ContainsBruteForce(*shape, kInteriorTrackerOrigin());
-    int num_edges = shape->num_edges();
-    removed->edges.reserve(num_edges);
-    for (int e = 0; e < num_edges; ++e) {
-      removed->edges.push_back(shape->edge(e));
-    }
-  }
-  index_status_.store(STALE, std::memory_order_relaxed);
-  return shape;
-}
-
-vector<unique_ptr<S2Shape>> MutableS2ShapeIndex::ReleaseAll() {
-  Iterator it;
-  for (it.InitStale(this, S2ShapeIndex::BEGIN); !it.done(); it.Next()) {
-    delete &it.cell();
-  }
-  cell_map_.clear();
-  pending_additions_begin_ = 0;
-  pending_removals_.reset();
-  S2_DCHECK(update_state_ == nullptr);
-  index_status_.store(FRESH, std::memory_order_relaxed);
-  vector<unique_ptr<S2Shape>> result;
-  result.swap(shapes_);
-  return result;
-}
-
-void MutableS2ShapeIndex::Clear() {
-  ReleaseAll();
-}
-
 // FaceEdge and ClippedEdge store temporary edge data while the index is being
 // updated.  FaceEdge represents an edge that has been projected onto a given
 // face, while ClippedEdge represents the portion of that edge that has been
@@ -251,9 +225,9 @@ void MutableS2ShapeIndex::Clear() {
 //    ClippedEdge and this data is cached more successfully.
 
 struct MutableS2ShapeIndex::FaceEdge {
-  int32 shape_id;      // The shape that this edge belongs to
-  int32 edge_id;       // Edge id within that shape
-  int32 max_level;     // Not desirable to subdivide this edge beyond this level
+  int32 shape_id;    // The shape that this edge belongs to
+  int32 edge_id;     // Edge id within that shape
+  int32 max_level;   // Not desirable to subdivide this edge beyond this level
   bool has_interior;   // Belongs to a shape of dimension 2.
   R2Point a, b;        // The edge endpoints, clipped to a given face
   S2Shape::Edge edge;  // The edge endpoints
@@ -299,15 +273,15 @@ class MutableS2ShapeIndex::InteriorTracker {
   // Returns true if any shapes are being tracked.
   bool is_active() const { return is_active_; }
 
-  // Adds a shape whose interior should be tracked.  "is_inside" indicates
-  // whether the current focus point is inside the shape.  Alternatively, if
-  // the focus point is in the process of being moved (via MoveTo/DrawTo), you
-  // can also specify "is_inside" at the old focus point and call TestEdge()
+  // Adds a shape whose interior should be tracked.  "contains_focus" indicates
+  // whether the current focus point is inside the shape.  Alternatively, if the
+  // focus point is in the process of being moved (via MoveTo/DrawTo), you can
+  // also specify "contains_focus" at the old focus point and call TestEdge()
   // for every edge of the shape that might cross the current DrawTo() line.
   // This updates the state to correspond to the new focus point.
   //
   // REQUIRES: shape->dimension() == 2
-  void AddShape(int32 shape_id, bool is_inside);
+  void AddShape(int32 shape_id, bool contains_focus);
 
   // Moves the focus to the given point.  This method should only be used when
   // it is known that there are no edge crossings between the old and new
@@ -353,6 +327,11 @@ class MutableS2ShapeIndex::InteriorTracker {
   // only affects the state for shape_ids below "limit_shape_id".
   void RestoreStateBefore(int32 limit_shape_id);
 
+  // Indicates that only some edges of the given shape are being added, and
+  // therefore its interior should not be processed yet.
+  int partial_shape_id() const { return partial_shape_id_; }
+  void set_partial_shape_id(int shape_id) { partial_shape_id_ = shape_id; }
+
  private:
   // Removes "shape_id" from shape_ids_ if it exists, otherwise insert it.
   void ToggleShape(int shape_id);
@@ -360,7 +339,7 @@ class MutableS2ShapeIndex::InteriorTracker {
   // Returns a pointer to the first entry "x" where x >= shape_id.
   ShapeIdSet::iterator lower_bound(int32 shape_id);
 
-  bool is_active_;
+  bool is_active_ = false;
   S2Point a_, b_;
   S2CellId next_cellid_;
   S2EdgeCrosser crosser_;
@@ -369,14 +348,22 @@ class MutableS2ShapeIndex::InteriorTracker {
   // Shape ids saved by SaveAndClearStateBefore().  The state is never saved
   // recursively so we don't need to worry about maintaining a stack.
   ShapeIdSet saved_ids_;
+
+  // As an optimization, we also save is_active_ so that RestoreStateBefore()
+  // can deactivate the tracker again in the case where the shapes being added
+  // and removed do not have an interior, but some existing shapes do.
+  bool saved_is_active_;
+
+  // If non-negative, indicates that only some edges of the given shape are
+  // being added and therefore its interior should not be tracked yet.
+  int partial_shape_id_ = -1;
 };
 
 // As shapes are added, we compute which ones contain the start of the
 // S2CellId space-filling curve by drawing an edge from S2::Origin() to this
 // point and counting how many shape edges cross this edge.
 MutableS2ShapeIndex::InteriorTracker::InteriorTracker()
-    : is_active_(false), b_(Origin()),
-      next_cellid_(S2CellId::Begin(S2CellId::kMaxLevel)) {
+    : b_(Origin()), next_cellid_(S2CellId::Begin(S2CellId::kMaxLevel)) {
 }
 
 S2Point MutableS2ShapeIndex::InteriorTracker::Origin() {
@@ -422,8 +409,10 @@ void MutableS2ShapeIndex::InteriorTracker::DrawTo(const S2Point& b) {
   crosser_.Init(&a_, &b_);
 }
 
-inline void MutableS2ShapeIndex::InteriorTracker::TestEdge(
-    int32 shape_id, const S2Shape::Edge& edge) {
+ABSL_ATTRIBUTE_ALWAYS_INLINE  // ~1% faster
+    inline void
+    MutableS2ShapeIndex::InteriorTracker::TestEdge(int32 shape_id,
+                                                   const S2Shape::Edge& edge) {
   if (crosser_.EdgeOrVertexCrossing(&edge.v0, &edge.v1)) {
     ToggleShape(shape_id);
   }
@@ -445,6 +434,7 @@ void MutableS2ShapeIndex::InteriorTracker::SaveAndClearStateBefore(
   ShapeIdSet::iterator limit = lower_bound(limit_shape_id);
   saved_ids_.assign(shape_ids_.begin(), limit);
   shape_ids_.erase(shape_ids_.begin(), limit);
+  saved_is_active_ = is_active_;
 }
 
 void MutableS2ShapeIndex::InteriorTracker::RestoreStateBefore(
@@ -452,6 +442,143 @@ void MutableS2ShapeIndex::InteriorTracker::RestoreStateBefore(
   shape_ids_.erase(shape_ids_.begin(), lower_bound(limit_shape_id));
   shape_ids_.insert(shape_ids_.begin(), saved_ids_.begin(), saved_ids_.end());
   saved_ids_.clear();
+  is_active_ = saved_is_active_;
+}
+
+MutableS2ShapeIndex::MutableS2ShapeIndex() = default;
+
+MutableS2ShapeIndex::MutableS2ShapeIndex(const Options& options) {
+  Init(options);
+}
+
+MutableS2ShapeIndex::MutableS2ShapeIndex(MutableS2ShapeIndex&& b)
+    : S2ShapeIndex(std::move(b)),
+      shapes_(std::move(b.shapes_)),
+      cell_map_(std::move(b.cell_map_)),
+      options_(std::move(b.options_)),
+      pending_additions_begin_(absl::exchange(b.pending_additions_begin_, 0)),
+      pending_removals_(std::move(b.pending_removals_)),
+      index_status_(b.index_status_.exchange(FRESH, std::memory_order_relaxed)),
+      mem_tracker_(std::move(b.mem_tracker_)) {}
+
+MutableS2ShapeIndex& MutableS2ShapeIndex::operator=(MutableS2ShapeIndex&& b) {
+  // We need to delegate to our parent move-assignment operator since we can't
+  // move any of its private state.  This is a little odd since b is in a
+  // half-moved state after calling but is ultimately safe.
+  S2ShapeIndex::operator=(static_cast<S2ShapeIndex&&>(b));
+  shapes_ = std::move(b.shapes_);
+  cell_map_ = std::move(b.cell_map_);
+  options_ = std::move(b.options_);
+  pending_additions_begin_ = absl::exchange(b.pending_additions_begin_, 0);
+  pending_removals_ = std::move(b.pending_removals_);
+  index_status_.store(
+      b.index_status_.exchange(FRESH, std::memory_order_relaxed),
+      std::memory_order_relaxed);
+  mem_tracker_ = std::move(b.mem_tracker_);
+  return *this;
+}
+
+void MutableS2ShapeIndex::Init(const Options& options) {
+  S2_DCHECK(shapes_.empty());
+  options_ = options;
+  // Memory tracking is not affected by this method.
+}
+
+MutableS2ShapeIndex::~MutableS2ShapeIndex() {
+  Clear();
+}
+
+void MutableS2ShapeIndex::set_memory_tracker(S2MemoryTracker* tracker) {
+  mem_tracker_.Tally(-mem_tracker_.client_usage_bytes());
+  mem_tracker_.Init(tracker);
+  if (mem_tracker_.is_active()) mem_tracker_.Tally(SpaceUsed());
+}
+
+// Called to set the index status when the index needs to be rebuilt.
+void MutableS2ShapeIndex::MarkIndexStale() {
+  // The UPDATING status can only be changed in ApplyUpdatesThreadSafe().
+  if (index_status_.load(std::memory_order_relaxed) == UPDATING) return;
+
+  // If a memory tracking error has occurred we set the index status to FRESH
+  // in order to prevent us from attempting to rebuild it.
+  IndexStatus status = (shapes_.empty() || !mem_tracker_.ok()) ? FRESH : STALE;
+  index_status_.store(status, std::memory_order_relaxed);
+}
+
+void MutableS2ShapeIndex::Minimize() {
+  mem_tracker_.Tally(-mem_tracker_.client_usage_bytes());
+  Iterator it;
+  for (it.InitStale(this, S2ShapeIndex::BEGIN); !it.done(); it.Next()) {
+    delete &it.cell();
+  }
+  cell_map_.clear();
+  pending_removals_.reset();
+  pending_additions_begin_ = 0;
+  MarkIndexStale();
+  if (mem_tracker_.is_active()) mem_tracker_.Tally(SpaceUsed());
+}
+
+int MutableS2ShapeIndex::Add(unique_ptr<S2Shape> shape) {
+  // Additions are processed lazily by ApplyUpdates().  Note that in order to
+  // avoid unexpected client behavior, this method continues to add shapes
+  // even once the specified S2MemoryTracker limit has been exceeded.
+  const int id = shapes_.size();
+  shape->id_ = id;
+  mem_tracker_.AddSpace(&shapes_, 1);
+  shapes_.push_back(std::move(shape));
+  MarkIndexStale();
+  return id;
+}
+
+unique_ptr<S2Shape> MutableS2ShapeIndex::Release(int shape_id) {
+  // This class updates itself lazily, because it is much more efficient to
+  // process additions and removals in batches.  However this means that when
+  // a shape is removed we need to make a copy of all its edges, since the
+  // client is free to delete "shape" once this call is finished.
+
+  S2_DCHECK(shapes_[shape_id] != nullptr);
+  auto shape = std::move(shapes_[shape_id]);
+  if (shape_id >= pending_additions_begin_) {
+    // We are removing a shape that has not yet been added to the index,
+    // so there is nothing else to do.
+  } else {
+    if (!pending_removals_) {
+      if (!mem_tracker_.Tally(sizeof(*pending_removals_))) {
+        Minimize();
+        return shape;
+      }
+      pending_removals_ = make_unique<vector<RemovedShape>>();
+    }
+    RemovedShape removed;
+    removed.shape_id = shape->id();
+    removed.has_interior = (shape->dimension() == 2);
+    removed.contains_tracker_origin =
+        s2shapeutil::ContainsBruteForce(*shape, InteriorTracker::Origin());
+    int num_edges = shape->num_edges();
+    if (!mem_tracker_.AddSpace(&removed.edges, num_edges) ||
+        !mem_tracker_.AddSpace(pending_removals_.get(), 1)) {
+      Minimize();
+      return shape;
+    }
+    for (int e = 0; e < num_edges; ++e) {
+      removed.edges.push_back(shape->edge(e));
+    }
+    pending_removals_->push_back(std::move(removed));
+  }
+  MarkIndexStale();
+  return shape;
+}
+
+vector<unique_ptr<S2Shape>> MutableS2ShapeIndex::ReleaseAll() {
+  S2_DCHECK(update_state_ == nullptr);
+  vector<unique_ptr<S2Shape>> result;
+  result.swap(shapes_);
+  Minimize();
+  return result;
+}
+
+void MutableS2ShapeIndex::Clear() {
+  ReleaseAll();
 }
 
 // Apply any pending updates in a thread-safe way.
@@ -479,7 +606,7 @@ void MutableS2ShapeIndex::ApplyUpdatesThreadSafe() {
     // and this saves an extra lock and unlock step; (3) even in the rare case
     // where there is contention, the main side effect is that some other
     // thread will burn a few CPU cycles rather than sleeping.
-    update_state_.reset(new UpdateState);
+    update_state_ = make_unique<UpdateState>();
     // lock_.Lock wait_mutex *before* calling Unlock() to ensure that all other
     // threads will block on it.
     update_state_->wait_mutex.Lock();
@@ -516,44 +643,21 @@ inline void MutableS2ShapeIndex::UnlockAndSignal() {
   }
 }
 
-void MutableS2ShapeIndex::ForceBuild() {
-  // No locks required because this is not a const method.  It is the client's
-  // responsibility to ensure correct thread synchronization.
-  if (index_status_.load(std::memory_order_relaxed) != FRESH) {
-    ApplyUpdatesInternal();
-    index_status_.store(FRESH, std::memory_order_relaxed);
-  }
-}
-
-// A BatchDescriptor represents a set of pending updates that will be applied
-// at the same time.  The batch consists of all updates with shape ids between
-// the current value of "ShapeIndex::pending_additions_begin_" (inclusive) and
-// "additions_end" (exclusive).  The first batch to be processed also
-// implicitly includes all shapes being removed.  "num_edges" is the total
-// number of edges that will be added or removed in this batch.
-struct MutableS2ShapeIndex::BatchDescriptor {
-  BatchDescriptor(int _additions_end, int _num_edges)
-      : additions_end(_additions_end), num_edges(_num_edges) {
-  }
-  int additions_end;
-  int num_edges;
-};
-
 // This method updates the index by applying all pending additions and
 // removals.  It does *not* update index_status_ (see ApplyUpdatesThreadSafe).
 void MutableS2ShapeIndex::ApplyUpdatesInternal() {
   // Check whether we have so many edges to process that we should process
   // them in multiple batches to save memory.  Building the index can use up
   // to 20x as much memory (per edge) as the final index size.
-  vector<BatchDescriptor> batches;
-  GetUpdateBatches(&batches);
-  int i = 0;
+  vector<BatchDescriptor> batches = GetUpdateBatches();
   for (const BatchDescriptor& batch : batches) {
+    if (mem_tracker_.is_active()) {
+      S2_DCHECK_EQ(mem_tracker_.client_usage_bytes(), SpaceUsed());  // Invariant.
+    }
     vector<FaceEdge> all_edges[6];
-    S2_VLOG(1) << "Batch " << i++ << ": shape_limit=" << batch.additions_end
-               << ", edges=" << batch.num_edges;
-
     ReserveSpace(batch, all_edges);
+    if (!mem_tracker_.ok()) return Minimize();
+
     InteriorTracker tracker;
     if (pending_removals_) {
       // The first batch implicitly includes all shapes being removed.
@@ -562,24 +666,41 @@ void MutableS2ShapeIndex::ApplyUpdatesInternal() {
       }
       pending_removals_.reset(nullptr);
     }
-    for (int id = pending_additions_begin_; id < batch.additions_end; ++id) {
-      AddShape(id, all_edges, &tracker);
+    // A batch consists of zero or more full shapes followed by zero or one
+    // partial shapes.  The loop below handles all such cases.
+    for (auto begin = batch.begin; begin < batch.end;
+         ++begin.shape_id, begin.edge_id = 0) {
+      const S2Shape* shape = this->shape(begin.shape_id);
+      if (shape == nullptr) continue;  // Already removed.
+      int edges_end = begin.shape_id == batch.end.shape_id ? batch.end.edge_id
+                                                           : shape->num_edges();
+      AddShape(shape, begin.edge_id, edges_end, all_edges, &tracker);
     }
     for (int face = 0; face < 6; ++face) {
       UpdateFaceEdges(face, all_edges[face], &tracker);
       // Save memory by clearing vectors after we are done with them.
       vector<FaceEdge>().swap(all_edges[face]);
     }
-    pending_additions_begin_ = batch.additions_end;
+    pending_additions_begin_ = batch.end.shape_id;
+    if (batch.begin.edge_id > 0 && batch.end.edge_id == 0) {
+      // We have just finished adding the edges of shape that was split over
+      // multiple batches.  Now we need to mark the interior of the shape, if
+      // any, by setting contains_center() on the appropriate index cells.
+      FinishPartialShape(tracker.partial_shape_id());
+    }
+    if (mem_tracker_.is_active()) {
+      mem_tracker_.Tally(-mem_tracker_.client_usage_bytes());
+      if (!mem_tracker_.Tally(SpaceUsed())) return Minimize();
+    }
   }
   // It is the caller's responsibility to update index_status_.
 }
 
 // Count the number of edges being updated, and break them into several
 // batches if necessary to reduce the amount of memory needed.  (See the
-// documentation for FLAGS_s2shape_index_tmp_memory_budget_mb.)
-void MutableS2ShapeIndex::GetUpdateBatches(vector<BatchDescriptor>* batches)
-    const {
+// documentation for FLAGS_s2shape_index_tmp_memory_budget.)
+vector<MutableS2ShapeIndex::BatchDescriptor>
+MutableS2ShapeIndex::GetUpdateBatches() const {
   // Count the edges being removed and added.
   int num_edges_removed = 0;
   if (pending_removals_) {
@@ -588,125 +709,188 @@ void MutableS2ShapeIndex::GetUpdateBatches(vector<BatchDescriptor>* batches)
     }
   }
   int num_edges_added = 0;
-  for (int id = pending_additions_begin_; id < shapes_.size(); ++id) {
+  for (size_t id = pending_additions_begin_; id < shapes_.size(); ++id) {
     const S2Shape* shape = this->shape(id);
-    if (shape == nullptr) continue;
-    num_edges_added += shape->num_edges();
+    if (shape) num_edges_added += shape->num_edges();
   }
-  int num_edges = num_edges_removed + num_edges_added;
+  BatchGenerator batch_gen(num_edges_removed, num_edges_added,
+                           pending_additions_begin_);
+  for (size_t id = pending_additions_begin_; id < shapes_.size(); ++id) {
+    const S2Shape* shape = this->shape(id);
+    if (shape) batch_gen.AddShape(id, shape->num_edges());
+  }
+  return batch_gen.Finish();
+}
 
-  // The following memory estimates are based on heap profiling.
-  //
-  // The final size of a MutableS2ShapeIndex depends mainly on how finely the
-  // index is subdivided, as controlled by Options::max_edges_per_cell() and
-  // --s2shape_index_default_max_edges_per_cell. For realistic values of
-  // max_edges_per_cell() and shapes with moderate numbers of edges, it is
-  // difficult to get much below 8 bytes per edge.  [The minimum possible size
-  // is 4 bytes per edge (to store a 32-bit edge id in an S2ClippedShape) plus
-  // 24 bytes per shape (for the S2ClippedShape itself plus a pointer in the
-  // shapes_ vector.]
-  //
-  // The temporary memory consists mainly of the FaceEdge and ClippedEdge
-  // structures plus a ClippedEdge pointer for every level of recursive
-  // subdivision.  For very large indexes this can be 200 bytes per edge.
-  const size_t kFinalBytesPerEdge = 8;
-  const size_t kTmpBytesPerEdge = 200;
-  const size_t kTmpMemoryBudgetBytes =
-      static_cast<size_t>(FLAGS_s2shape_index_tmp_memory_budget_mb) << 20;
-
-  // We arbitrarily limit the number of batches just as a safety measure.
-  // With the current default memory budget of 100 MB, this limit is not
-  // reached even when building an index of 350 million edges.
-  const int kMaxUpdateBatches = 100;
-
-  if (num_edges * kTmpBytesPerEdge <= kTmpMemoryBudgetBytes) {
-    // We can update all edges at once without exceeding kTmpMemoryBudgetBytes.
-    batches->push_back(BatchDescriptor(shapes_.size(), num_edges));
-    return;
+// The following memory estimates are based on heap profiling.
+
+// The batch sizes during a given update gradually decrease as the space
+// occupied by the index itself grows.  In order to do this, we need a
+// conserative lower bound on how much the index grows per edge.
+//
+// The final size of a MutableS2ShapeIndex depends mainly on how finely the
+// index is subdivided, as controlled by Options::max_edges_per_cell() and
+// --s2shape_index_default_max_edges_per_cell.  For realistic values of
+// max_edges_per_cell() and shapes with moderate numbers of edges, it is
+// difficult to get much below 8 bytes per edge.  *The minimum possible size
+// is 4 bytes per edge (to store a 32-bit edge id in an S2ClippedShape) plus
+// 24 bytes per shape (for the S2ClippedShape itself plus a pointer in the
+// shapes_ vector.)  Note that this value is a lower bound; a typical final
+// index size is closer to 24 bytes per edge.
+static constexpr size_t kFinalBytesPerEdge = 8;
+
+// The temporary memory consists mainly of the FaceEdge and ClippedEdge
+// structures plus a ClippedEdge pointer for every level of recursive
+// subdivision.  This can be more than 220 bytes per edge even for typical
+// geometry.  (The pathological worst case is higher, but we don't use this to
+// determine the batch sizes.)
+static constexpr size_t kTmpBytesPerEdge = 226;
+
+// We arbitrarily limit the number of batches as a safety measure.  With the
+// current default memory budget of 100 MB, this limit is not reached even
+// when building an index of 350 million edges.
+static constexpr int kMaxBatches = 100;
+
+MutableS2ShapeIndex::BatchGenerator::BatchGenerator(int num_edges_removed,
+                                                    int num_edges_added,
+                                                    int shape_id_begin)
+    : max_batch_sizes_(GetMaxBatchSizes(num_edges_removed, num_edges_added)),
+      batch_begin_(shape_id_begin, 0),
+      shape_id_end_(shape_id_begin) {
+  if (max_batch_sizes_.size() > 1) {
+    S2_VLOG(1) << "Removing " << num_edges_removed << ", adding "
+            << num_edges_added << " edges in " << max_batch_sizes_.size()
+            << " batches";
   }
-  // Otherwise, break the updates into up to several batches, where the size
-  // of each batch is chosen so that all batches use approximately the same
-  // high-water memory.  GetBatchSizes() returns the recommended number of
-  // edges in each batch.
-  vector<int> batch_sizes;
-  GetBatchSizes(num_edges, kMaxUpdateBatches, kFinalBytesPerEdge,
-                kTmpBytesPerEdge, kTmpMemoryBudgetBytes, &batch_sizes);
-
-  // We always process removed edges in a single batch, since (1) they already
-  // take up a lot of memory because we have copied all their edges, and (2)
-  // AbsorbIndexCell() uses (shapes_[id] == nullptr) to detect when a shape is
-  // being removed, so in order to split the removals into batches we would
-  // need a different approach (e.g., temporarily add fake entries to shapes_
-  // and restore them back to nullptr as shapes are actually removed).
-  num_edges = 0;
-  if (pending_removals_) {
-    num_edges += num_edges_removed;
-    if (num_edges >= batch_sizes[0]) {
-      batches->push_back(BatchDescriptor(pending_additions_begin_, num_edges));
-      num_edges = 0;
+  // Duplicate the last entry to simplify next_max_batch_size().
+  max_batch_sizes_.push_back(max_batch_sizes_.back());
+
+  // We process edge removals before additions, and edges are always removed
+  // in a single batch.  The reasons for this include: (1) removed edges use
+  // quite a bit of memory (about 50 bytes each) and this space can be freed
+  // immediately when we process them in one batch; (2) removed shapes are
+  // expected to be small fraction of the index size in typical use cases
+  // (e.g. incremental updates of large indexes), and (3) AbsorbIndexCell()
+  // uses (shape(id) == nullptr) to detect when a shape is being removed, so
+  // in order to split the removed shapes into multiple batches we would need
+  // a different approach (e.g., temporarily adding fake entries to shapes_
+  // and restoring them back to nullptr as shapes are removed).  Removing
+  // individual shapes over multiple batches would be even more work.
+  batch_size_ = num_edges_removed;
+}
+
+void MutableS2ShapeIndex::BatchGenerator::AddShape(int shape_id,
+                                                   int num_edges) {
+  int batch_remaining = max_batch_size() - batch_size_;
+  if (num_edges <= batch_remaining) {
+    ExtendBatch(num_edges);
+  } else if (num_edges <= next_max_batch_size()) {
+    // Avoid splitting shapes across batches unnecessarily.
+    FinishBatch(0, ShapeEdgeId(shape_id, 0));
+    ExtendBatch(num_edges);
+  } else {
+    // This shape must be split across at least two batches.  We simply fill
+    // each batch until the remaining edges will fit in two batches, and then
+    // divide those edges such that both batches have the same amount of
+    // remaining space relative to their maximum size.
+    int e_begin = 0;
+    while (batch_remaining + next_max_batch_size() < num_edges) {
+      e_begin += batch_remaining;
+      FinishBatch(batch_remaining, ShapeEdgeId(shape_id, e_begin));
+      num_edges -= batch_remaining;
+      batch_remaining = max_batch_size();
     }
+    // Figure out how many edges to add to the current batch so that it will
+    // have the same amount of remaining space as the next batch.
+    int n = (num_edges + batch_remaining - next_max_batch_size()) / 2;
+    FinishBatch(n, ShapeEdgeId(shape_id, e_begin + n));
+    FinishBatch(num_edges - n, ShapeEdgeId(shape_id + 1, 0));
   }
-  // Keep adding shapes to each batch until the recommended number of edges
-  // for that batch is reached, then move on to the next batch.
-  for (int id = pending_additions_begin_; id < shapes_.size(); ++id) {
-    const S2Shape* shape = this->shape(id);
-    if (shape == nullptr) continue;
-    num_edges += shape->num_edges();
-    if (num_edges >= batch_sizes[batches->size()]) {
-      batches->push_back(BatchDescriptor(id + 1, num_edges));
-      num_edges = 0;
-    }
+  shape_id_end_ = shape_id + 1;
+}
+
+vector<MutableS2ShapeIndex::BatchDescriptor>
+MutableS2ShapeIndex::BatchGenerator::Finish() {
+  // We must generate at least one batch even when num_edges_removed ==
+  // num_edges_added == 0, because some shapes have an interior but no edges.
+  // (Specifically, the full polygon has this property.)
+  if (batches_.empty() || shape_id_end_ != batch_begin_.shape_id) {
+    FinishBatch(0, ShapeEdgeId(shape_id_end_, 0));
   }
-  // Some shapes have no edges.  If a shape with no edges is the last shape to
-  // be added or removed, then the final batch may not include it, so we fix
-  // that problem here.
-  batches->back().additions_end = shapes_.size();
-  S2_DCHECK_LE(batches->size(), kMaxUpdateBatches);
+  return std::move(batches_);
 }
 
-// Given "num_items" items, each of which uses "tmp_bytes_per_item" while it
-// is being updated but only "final_bytes_per_item" in the end, divide the
-// items into batches that have approximately the same *total* memory usage
-// consisting of the temporary memory needed for the items in the current
-// batch plus the final size of all the items that have already been
-// processed.  Use the fewest number of batches (but never more than
-// "max_batches") such that the total memory usage does not exceed the
-// combined final size of all the items plus "tmp_memory_budget_bytes".
+void MutableS2ShapeIndex::BatchGenerator::FinishBatch(int num_edges,
+                                                      ShapeEdgeId batch_end) {
+  ExtendBatch(num_edges);
+  batches_.push_back(BatchDescriptor{batch_begin_, batch_end, batch_size_});
+  batch_begin_ = batch_end;
+  batch_index_edges_left_ -= batch_size_;
+  while (batch_index_edges_left_ < 0) {
+    batch_index_edges_left_ += max_batch_size();
+    batch_index_ += 1;
+  }
+  batch_size_ = 0;
+}
+
+// Divides "num_edges" edges into batches where each batch needs about the
+// same total amount of memory.  (The total memory needed by a batch consists
+// of the temporary memory needed to process the edges in that batch plus the
+// final representations of the edges that have already been indexed.)  It
+// uses the fewest number of batches (up to kMaxBatches) such that the total
+// memory usage does not exceed the combined final size of all the edges plus
+// FLAGS_s2shape_index_tmp_memory_budget.  Returns a vector of sizes
+// indicating the desired number of edges in each batch.
 /* static */
-void MutableS2ShapeIndex::GetBatchSizes(int num_items, int max_batches,
-                                        double final_bytes_per_item,
-                                        double tmp_bytes_per_item,
-                                        double tmp_memory_budget_bytes,
-                                        vector<int>* batch_sizes) {
-  // This code tries to fit all the data into the same memory space
-  // ("total_budget_bytes") at every iteration.  The data consists of some
-  // number of processed items (at "final_bytes_per_item" each), plus some
-  // number being updated (at "tmp_bytes_per_item" each).  The space occupied
-  // by the items being updated is the "free space".  At each iteration, the
-  // free space is multiplied by (1 - final_bytes_per_item/tmp_bytes_per_item)
-  // as the items are converted into their final form.
-  double final_bytes = num_items * final_bytes_per_item;
-  double final_bytes_ratio = final_bytes_per_item / tmp_bytes_per_item;
-  double free_space_multiplier = 1 - final_bytes_ratio;
+vector<int> MutableS2ShapeIndex::BatchGenerator::GetMaxBatchSizes(
+    int num_edges_removed, int num_edges_added) {
+  // Check whether we can update all the edges at once.
+  int num_edges_total = num_edges_removed + num_edges_added;
+  const double tmp_memory_budget_bytes =
+      absl::GetFlag(FLAGS_s2shape_index_tmp_memory_budget);
+  if (num_edges_total * kTmpBytesPerEdge <= tmp_memory_budget_bytes) {
+    return vector<int>{num_edges_total};
+  }
+
+  // Each batch is allowed to use up to "total_budget_bytes".  The memory
+  // usage consists of some number of edges already added by previous batches
+  // (at kFinalBytesPerEdge each), plus some number being updated in the
+  // current batch (at kTmpBytesPerEdge each).  The available free space is
+  // multiplied by (1 - kFinalBytesPerEdge / kTmpBytesPerEdge) after each
+  // batch is processed as edges are converted into their final form.
+  const double final_bytes = num_edges_added * kFinalBytesPerEdge;
+  constexpr double kFinalBytesRatio = 1.0 * kFinalBytesPerEdge /
+                                      kTmpBytesPerEdge;
+  constexpr double kTmpSpaceMultiplier = 1 - kFinalBytesRatio;
 
   // The total memory budget is the greater of the final size plus the allowed
   // temporary memory, or the minimum amount of memory required to limit the
-  // number of batches to "max_batches".
-  double total_budget_bytes = max(
+  // number of batches to "kMaxBatches".
+  const double total_budget_bytes = max(
       final_bytes + tmp_memory_budget_bytes,
-      final_bytes / (1 - pow(free_space_multiplier, max_batches)));
-
-  // "max_batch_items" is the number of items in the current batch.
-  double max_batch_items = total_budget_bytes / tmp_bytes_per_item;
-  batch_sizes->clear();
-  for (int i = 0; i + 1 < max_batches && num_items > 0; ++i) {
-    int batch_items =
-        std::min(num_items, static_cast<int>(max_batch_items + 1));
-    batch_sizes->push_back(batch_items);
-    num_items -= batch_items;
-    max_batch_items *= free_space_multiplier;
+      final_bytes / (1 - MathUtil::IPow(kTmpSpaceMultiplier, kMaxBatches - 1)));
+
+  // "ideal_batch_size" is the number of edges in the current batch before
+  // rounding to an integer.
+  double ideal_batch_size = total_budget_bytes / kTmpBytesPerEdge;
+
+  // Removed edges are always processed in the first batch, even if this might
+  // use more memory than requested (see the BatchGenerator constructor).
+  vector<int> batch_sizes;
+  int num_edges_left = num_edges_added;
+  if (num_edges_removed > ideal_batch_size) {
+    batch_sizes.push_back(num_edges_removed);
+  } else {
+    num_edges_left += num_edges_removed;
+  }
+  for (int i = 0; num_edges_left > 0; ++i) {
+    int batch_size = static_cast<int>(ideal_batch_size + 1);
+    batch_sizes.push_back(batch_size);
+    num_edges_left -= batch_size;
+    ideal_batch_size *= kTmpSpaceMultiplier;
   }
-  S2_DCHECK_LE(batch_sizes->size(), max_batches);
+  S2_DCHECK_LE(batch_sizes.size(), kMaxBatches);
+  return batch_sizes;
 }
 
 // Reserve an appropriate amount of space for the top-level face edges in the
@@ -714,17 +898,27 @@ void MutableS2ShapeIndex::GetBatchSizes(int num_items, int max_batches,
 // needed during index construction.  Furthermore, if the arrays are grown via
 // push_back() then up to 10% of the total run time consists of copying data
 // as these arrays grow, so it is worthwhile to preallocate space for them.
-void MutableS2ShapeIndex::ReserveSpace(const BatchDescriptor& batch,
-                                       vector<FaceEdge> all_edges[6]) const {
+void MutableS2ShapeIndex::ReserveSpace(
+    const BatchDescriptor& batch, vector<FaceEdge> all_edges[6]) {
+  // The following accounts for the temporary space needed for everything
+  // except the FaceEdge vectors (which are allocated separately below).
+  int64 other_usage = batch.num_edges * (kTmpBytesPerEdge - sizeof(FaceEdge));
+
   // If the number of edges is relatively small, then the fastest approach is
   // to simply reserve space on every face for the maximum possible number of
-  // edges.  We use a different threshold for this calculation than for
-  // deciding when to break updates into batches, because the cost/benefit
-  // ratio is different.  (Here the only extra expense is that we need to
-  // sample the edges to estimate how many edges per face there are.)
-  const size_t kMaxCheapBytes = 30 << 20;  // 30 MB
-  const int kMaxCheapEdges = kMaxCheapBytes / (6 * sizeof(FaceEdge));
-  if (batch.num_edges <= kMaxCheapEdges) {
+  // edges.  (We use a different threshold for this calculation than for
+  // deciding when to break updates into batches because the cost/benefit
+  // ratio is different.  Here the only extra expense is that we need to
+  // sample the edges to estimate how many edges per face there are, and
+  // therefore we generally use a lower threshold.)
+  const size_t kMaxCheapBytes =
+      min(absl::GetFlag(FLAGS_s2shape_index_tmp_memory_budget) / 2,
+          int64{30} << 20 /*30 MB*/);
+  int64 face_edge_usage = batch.num_edges * (6 * sizeof(FaceEdge));
+  if (static_cast<size_t>(face_edge_usage) <= kMaxCheapBytes) {
+    if (!mem_tracker_.TallyTemp(face_edge_usage + other_usage)) {
+      return;
+    }
     for (int face = 0; face < 6; ++face) {
       all_edges[face].reserve(batch.num_edges);
     }
@@ -758,16 +952,19 @@ void MutableS2ShapeIndex::ReserveSpace(const BatchDescriptor& batch,
       }
     }
   }
-  for (int id = pending_additions_begin_; id < batch.additions_end; ++id) {
-    const S2Shape* shape = this->shape(id);
-    if (shape == nullptr) continue;
-    edge_id += shape->num_edges();
+  for (auto begin = batch.begin; begin < batch.end;
+       ++begin.shape_id, begin.edge_id = 0) {
+    const S2Shape* shape = this->shape(begin.shape_id);
+    if (shape == nullptr) continue;  // Already removed.
+    int edges_end = begin.shape_id == batch.end.shape_id ? batch.end.edge_id
+                                                         : shape->num_edges();
+    edge_id += edges_end - begin.edge_id;
     while (edge_id >= sample_interval) {
       edge_id -= sample_interval;
       // For speed, we only count the face containing one endpoint of the
       // edge.  In general the edge could span all 6 faces (with padding), but
       // it's not worth the expense to compute this more accurately.
-      face_count[S2::GetFace(shape->edge(edge_id).v0)] += 1;
+      face_count[S2::GetFace(shape->edge(edge_id + begin.edge_id).v0)] += 1;
     }
   }
   // Now given the raw face counts, compute a confidence interval such that we
@@ -783,11 +980,22 @@ void MutableS2ShapeIndex::ReserveSpace(const BatchDescriptor& batch,
   //    It is quite likely that such faces are truly empty, so we save time
   //    and memory this way.  If the face does contain some edges, there will
   //    only be a few so it is fine to let the vector grow automatically.
-  // On average, we reserve 2% extra space for each face that has geometry.
+  // On average, we reserve 2% extra space for each face that has geometry
+  // (which could be up to 12% extra space overall, but typically 2%).
 
   // kMaxSemiWidth is the maximum semi-width over all probabilities p of a
   // 4-sigma binomial confidence interval with a sample size of 10,000.
   const double kMaxSemiWidth = 0.02;
+
+  // First estimate the total amount of memory we are about to allocate.
+  double multiplier = 1.0;
+  for (int face = 0; face < 6; ++face) {
+    if (face_count[face] != 0) multiplier += kMaxSemiWidth;
+  }
+  face_edge_usage = multiplier * batch.num_edges * sizeof(FaceEdge);
+  if (!mem_tracker_.TallyTemp(face_edge_usage + other_usage)) {
+    return;
+  }
   const double sample_ratio = 1.0 / actual_sample_size;
   for (int face = 0; face < 6; ++face) {
     if (face_count[face] == 0) continue;
@@ -796,24 +1004,30 @@ void MutableS2ShapeIndex::ReserveSpace(const BatchDescriptor& batch,
   }
 }
 
-// Clip all edges of the given shape to the six cube faces, add the clipped
+// Clips the edges of the given shape to the six cube faces, add the clipped
 // edges to "all_edges", and start tracking its interior if necessary.
-void MutableS2ShapeIndex::AddShape(int id, vector<FaceEdge> all_edges[6],
-                                   InteriorTracker* tracker) const {
-  const S2Shape* shape = this->shape(id);
-  if (shape == nullptr) {
-    return;  // This shape has already been removed.
-  }
+void MutableS2ShapeIndex::AddShape(
+    const S2Shape* shape, int edges_begin, int edges_end,
+    vector<FaceEdge> all_edges[6], InteriorTracker* tracker) const {
   // Construct a template for the edges to be added.
   FaceEdge edge;
-  edge.shape_id = id;
-  edge.has_interior = (shape->dimension() == 2);
-  if (edge.has_interior) {
-    tracker->AddShape(id, s2shapeutil::ContainsBruteForce(*shape,
-                                                          tracker->focus()));
+  edge.shape_id = shape->id();
+  edge.has_interior = false;
+  if (shape->dimension() == 2) {
+    // To add a single shape with an interior over multiple batches, we first
+    // add all the edges without tracking the interior.  After all edges have
+    // been added, the interior is updated in a separate step by setting the
+    // contains_center() flags appropriately.
+    if (edges_begin > 0 || edges_end < shape->num_edges()) {
+      tracker->set_partial_shape_id(edge.shape_id);
+    } else {
+      edge.has_interior = true;
+      tracker->AddShape(
+          edge.shape_id,
+          s2shapeutil::ContainsBruteForce(*shape, tracker->focus()));
+    }
   }
-  int num_edges = shape->num_edges();
-  for (int e = 0; e < num_edges; ++e) {
+  for (int e = edges_begin; e < edges_end; ++e) {
     edge.edge_id = e;
     edge.edge = shape->edge(e);
     edge.max_level = GetEdgeMaxLevel(edge.edge);
@@ -838,6 +1052,108 @@ void MutableS2ShapeIndex::RemoveShape(const RemovedShape& removed,
   }
 }
 
+void MutableS2ShapeIndex::FinishPartialShape(int shape_id) {
+  if (shape_id < 0) return;  // The partial shape did not have an interior.
+  const S2Shape* shape = this->shape(shape_id);
+
+  // Filling in the interior of a partial shape can grow the cell_map_
+  // significantly, however the new cells have just one shape and no edges.
+  // The following is a rough estimate of how much extra memory is needed
+  // based on experiments.  It assumes that one new cell is required for every
+  // 10 shape edges, and that the cell map uses 50% more space than necessary
+  // for the new entries because they are inserted between existing entries
+  // (which means that the btree nodes are not full).
+  if (mem_tracker_.is_active()) {
+    const int64 new_usage =
+        SpaceUsed() - mem_tracker_.client_usage_bytes() +
+        0.1 * shape->num_edges() *
+            (1.5 * sizeof(CellMap::value_type) + sizeof(S2ShapeIndexCell) +
+             sizeof(S2ClippedShape));
+    if (!mem_tracker_.TallyTemp(new_usage)) return;
+  }
+
+  // All the edges of the partial shape have already been indexed, now we just
+  // need to set the contains_center() flags appropriately.  We use a fresh
+  // InteriorTracker for this purpose since we don't want to continue tracking
+  // the interior state of any other shapes in this batch.
+  //
+  // We have implemented this below in the simplest way possible, namely by
+  // scanning through the entire index.  In theory it would be more efficient
+  // to keep track of the set of index cells that were modified when the
+  // partial shape's edges were added, and then visit only those cells.
+  // However in practice any shape that is added over multiple batches is
+  // likely to occupy most or all of the index anyway, so it is faster and
+  // simpler to just iterate through the entire index.
+  //
+  // "tmp_edges" below speeds up large polygon index construction by 3-12%.
+  vector<S2Shape::Edge> tmp_edges;  // Temporary storage.
+  InteriorTracker tracker;
+  tracker.AddShape(shape_id,
+                   s2shapeutil::ContainsBruteForce(*shape, tracker.focus()));
+  S2CellId begin = S2CellId::Begin(S2CellId::kMaxLevel);
+  for (CellMap::iterator index_it = cell_map_.begin(); ; ++index_it) {
+    if (!tracker.shape_ids().empty()) {
+      // Check whether we need to add new cells that are entirely contained by
+      // the partial shape.
+      S2CellId fill_end =
+          (index_it != cell_map_.end()) ? index_it->first.range_min()
+                                        : S2CellId::End(S2CellId::kMaxLevel);
+      if (begin != fill_end) {
+        for (S2CellId cellid : S2CellUnion::FromBeginEnd(begin, fill_end)) {
+          S2ShapeIndexCell* cell = new S2ShapeIndexCell;
+          S2ClippedShape* clipped = cell->add_shapes(1);
+          clipped->Init(shape_id, 0);
+          clipped->set_contains_center(true);
+          index_it = cell_map_.insert(index_it, make_pair(cellid, cell));
+          ++index_it;
+        }
+      }
+    }
+    if (index_it == cell_map_.end()) break;
+
+    // Now check whether the current index cell needs to be updated.
+    S2CellId cellid = index_it->first;
+    S2ShapeIndexCell* cell = index_it->second;
+    int n = cell->shapes_.size();
+    if (n > 0 && cell->shapes_[n - 1].shape_id() == shape_id) {
+      // This cell contains edges of the partial shape.  If the partial shape
+      // contains the center of this cell, we must update the index.
+      S2PaddedCell pcell(cellid, kCellPadding);
+      if (!tracker.at_cellid(cellid)) {
+        tracker.MoveTo(pcell.GetEntryVertex());
+      }
+      tracker.DrawTo(pcell.GetCenter());
+      S2ClippedShape* clipped = &cell->shapes_[n - 1];
+      int num_edges = clipped->num_edges();
+      S2_DCHECK_GT(num_edges, 0);
+      for (int i = 0; i < num_edges; ++i) {
+        tmp_edges.push_back(shape->edge(clipped->edge(i)));
+      }
+      for (const auto& edge : tmp_edges) {
+        tracker.TestEdge(shape_id, edge);
+      }
+      if (!tracker.shape_ids().empty()) {
+        // The partial shape contains the center of this index cell.
+        clipped->set_contains_center(true);
+      }
+      tracker.DrawTo(pcell.GetExitVertex());
+      for (const auto& edge : tmp_edges) {
+        tracker.TestEdge(shape_id, edge);
+      }
+      tracker.set_next_cellid(cellid.next());
+      tmp_edges.clear();
+
+    } else if (!tracker.shape_ids().empty()) {
+      // The partial shape contains the center of an existing index cell that
+      // does not intersect any of its edges.
+      S2ClippedShape* clipped = cell->add_shapes(1);
+      clipped->Init(shape_id, 0);
+      clipped->set_contains_center(true);
+    }
+    begin = cellid.range_max().next();
+  }
+}
+
 inline void MutableS2ShapeIndex::AddFaceEdge(
     FaceEdge* edge, vector<FaceEdge> all_edges[6]) const {
   // Fast path: both endpoints are on the same face, and are far enough from
@@ -862,17 +1178,19 @@ inline void MutableS2ShapeIndex::AddFaceEdge(
   }
 }
 
-// Return the first level at which the edge will *not* contribute towards
-// the decision to subdivide.
+// Returns the first level for which the given edge will be considered "long",
+// i.e. it will not count towards the max_edges_per_cell() limit.
 int MutableS2ShapeIndex::GetEdgeMaxLevel(const S2Shape::Edge& edge) const {
-  // Compute the maximum cell size for which this edge is considered "long".
-  // The calculation does not need to be perfectly accurate, so we use Norm()
-  // rather than Angle() for speed.
-  double cell_size = ((edge.v0 - edge.v1).Norm() *
-                      FLAGS_s2shape_index_cell_size_to_long_edge_ratio);
+  // Compute the maximum cell edge length for which this edge is considered
+  // "long".  The calculation does not need to be perfectly accurate, so we
+  // use Norm() rather than Angle() for speed.
+  double max_cell_edge =
+      ((edge.v0 - edge.v1).Norm() *
+       absl::GetFlag(FLAGS_s2shape_index_cell_size_to_long_edge_ratio));
+
   // Now return the first level encountered during subdivision where the
-  // average cell size is at most "cell_size".
-  return S2::kAvgEdge.GetLevelForMaxValue(cell_size);
+  // average cell edge length at that level is at most "max_cell_edge".
+  return S2::kAvgEdge.GetLevelForMaxValue(max_cell_edge);
 }
 
 // EdgeAllocator provides temporary storage for new ClippedEdges that are
@@ -953,8 +1271,13 @@ void MutableS2ShapeIndex::UpdateFaceEdges(int face,
   S2PaddedCell pcell(face_id, kCellPadding);
 
   // "disjoint_from_index" means that the current cell being processed (and
-  // all its descendants) are not already present in the index.
-  bool disjoint_from_index = is_first_update();
+  // all its descendants) are not already present in the index.  It is set to
+  // true during the recursion whenever we detect that the current cell is
+  // disjoint from the index.  We could save a tiny bit of work by setting
+  // this flag to true here on the very first update, however currently there
+  // is no easy way to check that.  (It's not sufficient to test whether
+  // cell_map_.empty() or pending_additions_begin_ == 0.)
+  bool disjoint_from_index = false;
   if (num_edges > 0) {
     S2CellId shrunk_id = ShrinkToFit(pcell, bound);
     if (shrunk_id != pcell.id()) {
@@ -975,17 +1298,19 @@ void MutableS2ShapeIndex::UpdateFaceEdges(int face,
   UpdateEdges(pcell, &clipped_edges, tracker, &alloc, disjoint_from_index);
 }
 
-inline S2CellId MutableS2ShapeIndex::ShrinkToFit(const S2PaddedCell& pcell,
-                                                 const R2Rect& bound) const {
+S2CellId MutableS2ShapeIndex::ShrinkToFit(const S2PaddedCell& pcell,
+                                          const R2Rect& bound) const {
   S2CellId shrunk_id = pcell.ShrinkToFit(bound);
-  if (!is_first_update() && shrunk_id != pcell.id()) {
+  if (shrunk_id != pcell.id()) {
     // Don't shrink any smaller than the existing index cells, since we need
-    // to combine the new edges with those cells.
-    // Use InitStale() to avoid applying updated recursively.
+    // to combine the new edges with those cells.  Use InitStale() to avoid
+    // applying updates recursively.
     Iterator iter;
     iter.InitStale(this);
-    CellRelation r = iter.Locate(shrunk_id);
-    if (r == INDEXED) { shrunk_id = iter.id(); }
+    S2CellRelation r = iter.Locate(shrunk_id);
+    if (r == S2CellRelation::INDEXED) {
+      shrunk_id = iter.id();
+    }
   }
   return shrunk_id;
 }
@@ -1008,6 +1333,28 @@ void MutableS2ShapeIndex::SkipCellRange(S2CellId begin, S2CellId end,
   }
 }
 
+// Given an edge and an interval "middle" along the v-axis, clip the edge
+// against the boundaries of "middle" and add the edge to the corresponding
+// children.
+/* static */ ABSL_ATTRIBUTE_ALWAYS_INLINE  // ~8% faster
+inline void MutableS2ShapeIndex::ClipVAxis(
+    const ClippedEdge* edge,
+    const R1Interval& middle,
+    vector<const ClippedEdge*> child_edges[2],
+    EdgeAllocator* alloc) {
+  if (edge->bound[1].hi() <= middle.lo()) {
+    // Edge is entirely contained in the lower child.
+    child_edges[0].push_back(edge);
+  } else if (edge->bound[1].lo() >= middle.hi()) {
+    // Edge is entirely contained in the upper child.
+    child_edges[1].push_back(edge);
+  } else {
+    // The edge bound spans both children.
+    child_edges[0].push_back(ClipVBound(edge, 1, middle.hi(), alloc));
+    child_edges[1].push_back(ClipVBound(edge, 0, middle.lo(), alloc));
+  }
+}
+
 // Given a cell and a set of ClippedEdges whose bounding boxes intersect that
 // cell, add or remove all the edges from the index.  Temporary space for
 // edges that need to be subdivided is allocated from the given EdgeAllocator.
@@ -1047,21 +1394,21 @@ void MutableS2ShapeIndex::UpdateEdges(const S2PaddedCell& pcell,
   if (!disjoint_from_index) {
     // There may be existing index cells contained inside "pcell".  If we
     // encounter such a cell, we need to combine the edges being updated with
-    // the existing cell contents by "absorbing" the cell.
-    // Use InitStale() to avoid applying updated recursively.
+    // the existing cell contents by "absorbing" the cell.  We use InitStale()
+    // to avoid applying updates recursively.
     Iterator iter;
     iter.InitStale(this);
-    CellRelation r = iter.Locate(pcell.id());
-    if (r == DISJOINT) {
+    S2CellRelation r = iter.Locate(pcell.id());
+    if (r == S2CellRelation::DISJOINT) {
       disjoint_from_index = true;
-    } else if (r == INDEXED) {
+    } else if (r == S2CellRelation::INDEXED) {
       // Absorb the index cell by transferring its contents to "edges" and
       // deleting it.  We also start tracking the interior of any new shapes.
       AbsorbIndexCell(pcell, iter, edges, tracker, alloc);
       index_cell_absorbed = true;
       disjoint_from_index = true;
     } else {
-      S2_DCHECK_EQ(SUBDIVIDED, r);
+      S2_DCHECK_EQ(S2CellRelation::SUBDIVIDED, r);
     }
   }
 
@@ -1152,28 +1499,6 @@ void MutableS2ShapeIndex::UpdateEdges(const S2PaddedCell& pcell,
   }
 }
 
-// Given an edge and an interval "middle" along the v-axis, clip the edge
-// against the boundaries of "middle" and add the edge to the corresponding
-// children.
-/* static */
-inline void MutableS2ShapeIndex::ClipVAxis(
-    const ClippedEdge* edge,
-    const R1Interval& middle,
-    vector<const ClippedEdge*> child_edges[2],
-    EdgeAllocator* alloc) {
-  if (edge->bound[1].hi() <= middle.lo()) {
-    // Edge is entirely contained in the lower child.
-    child_edges[0].push_back(edge);
-  } else if (edge->bound[1].lo() >= middle.hi()) {
-    // Edge is entirely contained in the upper child.
-    child_edges[1].push_back(edge);
-  } else {
-    // The edge bound spans both children.
-    child_edges[0].push_back(ClipVBound(edge, 1, middle.hi(), alloc));
-    child_edges[1].push_back(ClipVBound(edge, 0, middle.lo(), alloc));
-  }
-}
-
 // Given an edge, clip the given endpoint (lo=0, hi=1) of the u-axis so that
 // it does not extend past the given value.
 /* static */
@@ -1264,7 +1589,8 @@ void MutableS2ShapeIndex::AbsorbIndexCell(const S2PaddedCell& pcell,
   // Here we first update the InteriorTracker state for removed edges to
   // correspond to the exit vertex of this cell, and then save the
   // InteriorTracker state.  This state will be restored by UpdateEdges when
-  // it is finished processing the contents of this cell.
+  // it is finished processing the contents of this cell.  (Note in the test
+  // below that removed edges are always sorted before added edges.)
   if (tracker->is_active() && !edges->empty() &&
       is_shape_being_removed((*edges)[0]->face_edge->shape_id)) {
     // We probably need to update the InteriorTracker.  ("Probably" because
@@ -1284,11 +1610,12 @@ void MutableS2ShapeIndex::AbsorbIndexCell(const S2PaddedCell& pcell,
       }
     }
   }
-  // Save the state of the edges being removed, so that it can be restored
-  // when we are finished processing this cell and its children.  We don't
-  // need to save the state of the edges being added because they aren't being
-  // removed from "edges" and will therefore be updated normally as we visit
-  // this cell and its children.
+  // Save the state of the edges being removed so that it can be restored when
+  // we are finished processing this cell and its children.  Below we not only
+  // remove those edges but also add new edges whose state only needs to be
+  // tracked within this subtree.  We don't need to save the state of the
+  // edges being added because they aren't being removed from "edges" and will
+  // therefore be updated normally as we visit this cell and its children.
   tracker->SaveAndClearStateBefore(pending_additions_begin_);
 
   // Create a FaceEdge for each edge in this cell that isn't being removed.
@@ -1309,8 +1636,9 @@ void MutableS2ShapeIndex::AbsorbIndexCell(const S2PaddedCell& pcell,
     // cell is inside the shape, so we need to test all the edges against the
     // line segment from the cell center to the entry vertex.
     FaceEdge edge;
-    edge.shape_id = shape->id();
-    edge.has_interior = (shape->dimension() == 2);
+    edge.shape_id = shape_id;
+    edge.has_interior = (shape->dimension() == 2 &&
+                         shape_id != tracker->partial_shape_id());
     if (edge.has_interior) {
       tracker->AddShape(shape_id, clipped.contains_center());
       // There might not be any edges in this entire cell (i.e., it might be
@@ -1331,7 +1659,7 @@ void MutableS2ShapeIndex::AbsorbIndexCell(const S2PaddedCell& pcell,
       if (edge.has_interior) tracker->TestEdge(shape_id, edge.edge);
       if (!S2::ClipToPaddedFace(edge.edge.v0, edge.edge.v1, pcell.id().face(),
                                 kCellPadding, &edge.a, &edge.b)) {
-        S2_LOG(DFATAL) << "Invariant failure in MutableS2ShapeIndex";
+        S2_LOG(ERROR) << "Invariant failure in MutableS2ShapeIndex";
       }
       face_edges->push_back(edge);
     }
@@ -1347,7 +1675,7 @@ void MutableS2ShapeIndex::AbsorbIndexCell(const S2PaddedCell& pcell,
   }
   // Discard any edges from "edges" that are being removed, and append the
   // remainder to "new_edges".  (This keeps the edges sorted by shape id.)
-  for (int i = 0; i < edges->size(); ++i) {
+  for (size_t i = 0; i < edges->size(); ++i) {
     const ClippedEdge* clipped = (*edges)[i];
     if (!is_shape_being_removed(clipped->face_edge->shape_id)) {
       new_edges.insert(new_edges.end(), edges->begin() + i, edges->end());
@@ -1372,13 +1700,120 @@ bool MutableS2ShapeIndex::MakeIndexCell(const S2PaddedCell& pcell,
     return true;
   }
 
-  // Count the number of edges that have not reached their maximum level yet.
-  // Return false if there are too many such edges.
-  int count = 0;
-  for (const ClippedEdge* edge : edges) {
-    count += (pcell.level() < edge->face_edge->max_level);
-    if (count > options_.max_edges_per_cell())
-      return false;
+  // We can show using amortized analysis that the total index size is
+  //
+  //     O(c1 * n + c2 * (1 - f) / f * n)
+  //
+  // where n is the number of input edges (and where we also count an "edge"
+  // for each shape with an interior but no edges), f is the value of
+  // FLAGS_s2shape_index_min_short_edge_fraction, and c1 and c2 are constants
+  // where c2 is about 20 times larger than c1.
+  //
+  // First observe that the space used by a MutableS2ShapeIndex is
+  // proportional to the space used by all of its index cells, and the space
+  // used by an S2ShapeIndexCell is proportional to the number of edges that
+  // intersect that cell plus the number of shapes that contain the entire
+  // cell ("containing shapes").  Define an "index entry" as an intersecting
+  // edge or containing shape stored by an index cell.  Our goal is then to
+  // bound the number of index entries.
+  //
+  // We divide the index entries into two groups.  An index entry is "short"
+  // if it represents an edge that was considered short in that index cell's
+  // parent, and "long" otherwise.  (Note that the long index entries also
+  // include the containing shapes mentioned above.)  We then bound the
+  // maximum number of both types of index entries by associating them with
+  // edges that were considered short in those index cells' parents.
+  //
+  // First consider the short index entries for a given edge E.  Let S be the
+  // set of index cells that intersect E and where E was considered short in
+  // those index cells' parents.  Since E was short in each parent cell, the
+  // width of those parent cells is at least some fraction "g" of E's length
+  // (as controlled by FLAGS_s2shape_index_cell_size_to_long_edge_ratio).
+  // Therefore the minimum width of each cell in S is also at least some
+  // fraction of E's length (i.e., g / 2).  This implies that there are at most
+  // a constant number c1 of such cells, since they all intersect E and do not
+  // overlap, which means that there are at most (c1 * n) short entries in
+  // total.
+  //
+  // With index_cell_size_to_long_edge_ratio = 1.0 (the default value), it can
+  // be shown that c1 = 10.  In other words, it is not possible for a given
+  // edge to intersect more than 10 index cells where it was considered short
+  // in those cells' parents.  The value of c1 can be reduced as low c1 = 4 by
+  // increasing index_cell_size_to_long_edge_ratio to about 3.1.  (The reason
+  // the minimum value is 3.1 rather than 2.0 is that this ratio is defined in
+  // terms of the average edge length of cells at a given level, rather than
+  // their minimum width, and 2 * (S2::kAvgEdge / S2::kMinWidth) ~= 3.1.)
+  //
+  // Next we consider the long index entries.  Let c2 be the maximum number of
+  // index cells where a given edge E was considered short in those cells'
+  // parents.  (Unlike the case above, we do not require that these cells
+  // intersect E.)  Because the minimum width of each parent cell is at least
+  // some fraction of E's length and the parent cells at a given level do not
+  // overlap, there can be at most a small constant number of index cells at
+  // each level where E is considered short in those cells' parents.  For
+  // example, consider a very short edge E that intersects the midpoint of a
+  // cell edge at level 0.  There are 16 cells at level 30 where E was
+  // considered short in the parent cell, 12 cells at each of levels 29..2, and
+  // 4 cells at levels 1 and 0 (pretending that all 6 face cells share a common
+  // "parent").  This yields a total of c2 = 360 index cells.  This is actually
+  // the worst case for index_cell_size_to_long_edge_ratio >= 3.1; with the
+  // default value of 1.0 it is possible to have a few more index cells at
+  // levels 29 and 30, for a maximum of c2 = 366 index cells.
+  //
+  // The code below subdivides a given cell only if
+  //
+  //     s > f * (s + l)
+  //
+  // where "f" is the min_short_edge_fraction parameter, "s" is the number of
+  // short edges that intersect the cell, and "l" is the number of long edges
+  // that intersect the cell plus an upper bound on the number of shapes that
+  // contain the entire cell.  (It is an upper bound rather than an exact count
+  // because we use the number of shapes that contain an arbitrary vertex of
+  // the cell.)  Note that the number of long index entries in each child of
+  // this cell is at most "l" because no child intersects more edges than its
+  // parent or is entirely contained by more shapes than its parent.
+  //
+  // The inequality above can be rearranged to give
+  //
+  //    l < s * (1 - f) / f
+  //
+  // This says that each long index entry in a child cell can be associated
+  // with at most (1 - f) / f edges that were considered short when the parent
+  // cell was subdivided.  Furthermore we know that there are at most c2 index
+  // cells where a given edge was considered short in the parent cell.  Since
+  // there are only n edges in total, this means that the maximum number of
+  // long index entries is at most
+  //
+  //    c2 * (1 - f) / f * n
+  //
+  // and putting this together with the result for short index entries gives
+  // the desired bound.
+  //
+  // There are a variety of ways to make this bound tighter, e.g. when "n" is
+  // relatively small.  For example when the indexed geometry satisfies the
+  // requirements of S2BooleanOperation (i.e., shape interiors are disjoint)
+  // and the min_short_edge_fraction parameter is not too large, then the
+  // constant c2 above is only about half as big (i.e., c2 ~= 180).  This is
+  // because the worst case under these circumstances requires having many
+  // shapes whose interiors overlap.
+
+  // Continue subdividing if the proposed index cell would contain too many
+  // edges that are "short" relative to its size (as controlled by the
+  // FLAGS_s2shape_index_cell_size_to_long_edge_ratio parameter).  Usually "too
+  // many" means more than options_.max_edges_per_cell(), but this value might
+  // be increased if the cell has a lot of long edges and/or containing shapes.
+  // This strategy ensures that the total index size is linear (see above).
+  if (edges.size() > static_cast<size_t>(options_.max_edges_per_cell())) {
+    int max_short_edges =
+        max(options_.max_edges_per_cell(),
+            static_cast<int>(
+                absl::GetFlag(FLAGS_s2shape_index_min_short_edge_fraction) *
+                (edges.size() + tracker->shape_ids().size())));
+    int count = 0;
+    for (const ClippedEdge* edge : edges) {
+      count += (pcell.level() < edge->face_edge->max_level);
+      if (count > max_short_edges) return false;
+    }
   }
 
   // Possible optimization: Continue subdividing as long as exactly one child
@@ -1424,7 +1859,7 @@ bool MutableS2ShapeIndex::MakeIndexCell(const S2PaddedCell& pcell,
   // "containing shapes" (those that contain the cell center).  We keep track
   // of the index of the next intersecting edge and the next containing shape
   // as we go along.  Both sets of shape ids are already sorted.
-  int enext = 0;
+  size_t enext = 0;
   ShapeIdSet::const_iterator cnext = cshape_ids.begin();
   for (int i = 0; i < num_shapes; ++i) {
     S2ClippedShape* clipped = base + i;
@@ -1448,7 +1883,7 @@ bool MutableS2ShapeIndex::MakeIndexCell(const S2PaddedCell& pcell,
         ++enext;
       }
       clipped->Init(eshape_id, enext - ebegin);
-      for (int e = ebegin; e < enext; ++e) {
+      for (size_t e = ebegin; e < enext; ++e) {
         clipped->set_edge(e - ebegin, edges[e]->face_edge->edge_id);
       }
       if (cshape_id == eshape_id) {
@@ -1462,7 +1897,7 @@ bool MutableS2ShapeIndex::MakeIndexCell(const S2PaddedCell& pcell,
   // is much faster to give an insertion hint in this case.  Otherwise the
   // hint doesn't do much harm.  With more effort we could provide a hint even
   // during incremental updates, but this is probably not worth the effort.
-  cell_map_.insert(cell_map_.end(), std::make_pair(pcell.id(), cell));
+  cell_map_.insert(cell_map_.end(), make_pair(pcell.id(), cell));
 
   // Shift the InteriorTracker focus point to the exit vertex of this cell.
   if (tracker->is_active() && !edges.empty()) {
@@ -1512,7 +1947,7 @@ int MutableS2ShapeIndex::CountShapes(const vector<const ClippedEdge*>& edges,
 
 size_t MutableS2ShapeIndex::SpaceUsed() const {
   size_t size = sizeof(*this);
-  size += shapes_.capacity() * sizeof(std::unique_ptr<S2Shape>);
+  size += shapes_.capacity() * sizeof(unique_ptr<S2Shape>);
   // cell_map_ itself is already included in sizeof(*this).
   size += cell_map_.bytes_used() - sizeof(cell_map_);
   size += cell_map_.size() * sizeof(S2ShapeIndexCell);
@@ -1528,9 +1963,12 @@ size_t MutableS2ShapeIndex::SpaceUsed() const {
     }
   }
   if (pending_removals_ != nullptr) {
+    size += sizeof(*pending_removals_);
     size += pending_removals_->capacity() * sizeof(RemovedShape);
+    for (const RemovedShape& removed : *pending_removals_) {
+      size += removed.edges.capacity() * sizeof(S2Shape::Edge);
+    }
   }
-
   return size;
 }
 
@@ -1542,6 +1980,9 @@ void MutableS2ShapeIndex::Encode(Encoder* encoder) const {
   uint64 max_edges = options_.max_edges_per_cell();
   encoder->put_varint64(max_edges << 2 | kCurrentEncodingVersionNumber);
 
+  // The index will be built anyway when we iterate through it, but building
+  // it in advance lets us size the cell_ids vector correctly.
+  ForceBuild();
   vector<S2CellId> cell_ids;
   cell_ids.reserve(cell_map_.size());
   s2coding::StringVectorEncoder encoded_cells;
@@ -1563,7 +2004,7 @@ bool MutableS2ShapeIndex::Init(Decoder* decoder,
   options_.set_max_edges_per_cell(max_edges_version >> 2);
   uint32 num_shapes = shape_factory.size();
   shapes_.reserve(num_shapes);
-  for (int shape_id = 0; shape_id < num_shapes; ++shape_id) {
+  for (size_t shape_id = 0; shape_id < num_shapes; ++shape_id) {
     auto shape = shape_factory[shape_id];
     if (shape) shape->id_ = shape_id;
     shapes_.push_back(std::move(shape));
@@ -1574,12 +2015,12 @@ bool MutableS2ShapeIndex::Init(Decoder* decoder,
   if (!cell_ids.Init(decoder)) return false;
   if (!encoded_cells.Init(decoder)) return false;
 
-  for (int i = 0; i < cell_ids.size(); ++i) {
+  for (size_t i = 0; i < cell_ids.size(); ++i) {
     S2CellId id = cell_ids[i];
     S2ShapeIndexCell* cell = new S2ShapeIndexCell;
     Decoder decoder = encoded_cells.GetDecoder(i);
     if (!cell->Decode(num_shapes, &decoder)) return false;
-    cell_map_.insert(cell_map_.end(), std::make_pair(id, cell));
+    cell_map_.insert(cell_map_.end(), make_pair(id, cell));
   }
   return true;
 }
diff --git a/src/s2/mutable_s2shape_index.h b/src/s2/mutable_s2shape_index.h
index 2585e8f7..145cdacf 100644
--- a/src/s2/mutable_s2shape_index.h
+++ b/src/s2/mutable_s2shape_index.h
@@ -25,26 +25,35 @@
 #include <utility>
 #include <vector>
 
+#include "absl/base/macros.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/container/btree_map.h"
+#include "absl/synchronization/mutex.h"
 
+#include "s2/base/commandlineflags.h"
+#include "s2/base/commandlineflags_declare.h"
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
-#include "s2/base/mutex.h"
 #include "s2/base/spinlock.h"
 #include "s2/_fp_contract_off.h"
+#include "s2/r1interval.h"
+#include "s2/r2rect.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
-#include "absl/base/macros.h"
-#include "absl/base/thread_annotations.h"
-#include "absl/memory/memory.h"
-#include "absl/container/btree_map.h"
+#include "s2/s2shapeutil_shape_edge_id.h"
+#include "s2/util/coding/coder.h"
+
+class S2PaddedCell;
 
 namespace s2internal {
 // Hack to expose bytes_used.
 template <typename Key, typename Value>
 class BTreeMap : public absl::btree_map<Key, Value> {
-public:
+ public:
   size_t bytes_used() const { return this->tree_.bytes_used(); }
 };
 }  // namespace s2internal
@@ -53,14 +62,15 @@ class BTreeMap : public absl::btree_map<Key, Value> {
 // The objects in the index are known as "shapes", and may consist of points,
 // polylines, and/or polygons, possibly overlapping.  The index makes it very
 // fast to answer queries such as finding nearby shapes, measuring distances,
-// testing for intersection and containment, etc.
+// testing for intersection and containment, etc.  It is one of several
+// implementations of the S2ShapeIndex interface (see EncodedS2ShapeIndex).
 //
 // MutableS2ShapeIndex allows not only building an index, but also updating it
-// incrementally by adding or removing shapes (hence its name).  It is one of
-// several implementations of the S2ShapeIndex interface.  MutableS2ShapeIndex
-// is designed to be compact; usually it is smaller than the underlying
-// geometry being indexed.  It is capable of indexing up to hundreds of
-// millions of edges.  The index is also fast to construct.
+// incrementally by adding or removing shapes (hence its name).  It is designed
+// to be compact; usually the index is smaller than the underlying geometry.
+// It is capable of indexing up to hundreds of millions of edges.  The index is
+// also fast to construct.  The index size and construction time are guaranteed
+// to be linear in the number of input edges.
 //
 // There are a number of built-in classes that work with S2ShapeIndex objects.
 // Generally these classes accept any collection of geometry that can be
@@ -77,7 +87,8 @@ class BTreeMap : public absl::btree_map<Key, Value> {
 // - S2BooleanOperation: computes boolean operations such as union,
 //                       and boolean predicates such as containment.
 //
-// - S2ShapeIndexRegion: computes approximations for a collection of geometry.
+// - S2ShapeIndexRegion: can be used together with S2RegionCoverer to
+//                       approximate geometry as a set of S2CellIds.
 //
 // - S2ShapeIndexBufferedRegion: computes approximations that have been
 //                               expanded by a given radius.
@@ -89,7 +100,7 @@ class BTreeMap : public absl::btree_map<Key, Value> {
 //                        const vector<S2Polygon*>& polygons) {
 //     MutableS2ShapeIndex index;
 //     for (auto polygon : polygons) {
-//       index.Add(absl::make_unique<S2Polygon::Shape>(polygon));
+//       index.Add(std::make_unique<S2Polygon::Shape>(polygon));
 //     }
 //     auto query = MakeS2ContainsPointQuery(&index);
 //     for (const auto& point : points) {
@@ -115,10 +126,10 @@ class BTreeMap : public absl::btree_map<Key, Value> {
 // if one thread updates the index, you must ensure that no other thread is
 // reading or updating the index at the same time.
 //
-// TODO(ericv): MutableS2ShapeIndex has an Encode() method that allows the
-// index to be serialized.  An encoded S2ShapeIndex can be decoded either into
-// its original form (MutableS2ShapeIndex) or into an EncodedS2ShapeIndex.
-// The key property of EncodedS2ShapeIndex is that it can be constructed
+// MutableS2ShapeIndex has an Encode() method that allows the index to be
+// serialized.  An encoded S2ShapeIndex can be decoded either into its
+// original form (MutableS2ShapeIndex) or into an EncodedS2ShapeIndex.  The
+// key property of EncodedS2ShapeIndex is that it can be constructed
 // instantaneously, since the index is kept in its original encoded form.
 // Data is decoded only when an operation needs it.  For example, to determine
 // which shapes(s) contain a given query point only requires decoding the data
@@ -128,6 +139,10 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
   using CellMap = s2internal::BTreeMap<S2CellId, S2ShapeIndexCell*>;
 
  public:
+  // The amount by which cells are "padded" to compensate for numerical errors
+  // when clipping line segments to cell boundaries.
+  static const double kCellPadding;
+
   // Options that affect construction of the MutableS2ShapeIndex.
   class Options {
    public:
@@ -166,13 +181,65 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
 
   ~MutableS2ShapeIndex() override;
 
+  MutableS2ShapeIndex(MutableS2ShapeIndex&&);
+  MutableS2ShapeIndex& operator=(MutableS2ShapeIndex&&);
+
   // Initialize a MutableS2ShapeIndex with the given options.  This method may
-  // only be called when the index is empty (i.e. newly created or Reset() has
-  // just been called).
+  // only be called when the index is empty (i.e. newly created or Clear() has
+  // just been called).  May be called before or after set_memory_tracker().
   void Init(const Options& options);
 
   const Options& options() const { return options_; }
 
+  // Specifies that memory usage should be tracked and/or limited by the given
+  // S2MemoryTracker.  For example:
+  //
+  //   S2MemoryTracker tracker;
+  //   tracker.set_limit(500 << 20);  // 500 MB memory limit
+  //   MutableS2ShapeIndex index;
+  //   index.set_memory_tracker(&tracker);
+  //
+  // If the memory limit is exceeded, an appropriate status is returned in
+  // memory_tracker()->error() and any partially built index is discarded
+  // (equivalent to calling Minimize()).
+  //
+  // This method may be called multiple times in order to switch from one
+  // memory tracker to another or stop memory tracking altogether (by passing
+  // nullptr) in which case the memory usage due to this index is subtracted.
+  //
+  // REQUIRES: The lifetime of "tracker" must exceed the lifetime of the index
+  //           unless set_memory_tracker(nullptr) is called to stop memory
+  //           tracking before the index destructor is called.
+  //
+  //           This implies that the S2MemoryTracker must be declared *before*
+  //           the MutableS2ShapeIndex in the example above.
+  //
+  // CAVEATS:
+  //
+  //  - This method is not const and is therefore not thread-safe.
+  //
+  //  - Does not track memory used by the S2Shapes in the index.
+  //
+  //  - While the index representation itself is tracked very accurately,
+  //    the temporary data needed for index construction is tracked using
+  //    heuristics and may be underestimated or overestimated.
+  //
+  //  - Temporary memory usage is typically 10x larger than the final index
+  //    size, however it can be reduced by specifying a suitable value for
+  //    FLAGS_s2shape_index_tmp_memory_budget (the default is 100 MB).  If
+  //    more temporary memory than this is needed during construction, index
+  //    updates will be split into multiple batches in order to keep the
+  //    estimated temporary memory usage below this limit.
+  //
+  //  - S2MemoryTracker::limit() has no effect on how much temporary memory
+  //    MutableS2ShapeIndex will attempt to use during index construction; it
+  //    simply causes an error to be returned when the limit would otherwise
+  //    be exceeded.  If you set a memory limit smaller than 100MB and want to
+  //    reduce memory usage rather than simply generating an error then you
+  //    should also set FLAGS_s2shape_index_tmp_memory_budget appropriately.
+  void set_memory_tracker(S2MemoryTracker* tracker);
+  S2MemoryTracker* memory_tracker() const { return mem_tracker_.tracker(); }
+
   // The number of distinct shape ids that have been assigned.  This equals
   // the number of shapes in the index provided that no shapes have ever been
   // removed.  (Shape ids are not reused.)
@@ -198,6 +265,18 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
   //   s2shapeutil::CompactEncodeTaggedShapes(index, encoder);
   //   index.Encode(encoder);
   //
+  // The encoded size is typically much smaller than the in-memory size.
+  // Here are a few examples:
+  //
+  //  Number of edges     In-memory space used     Encoded size  (%)
+  //  --------------------------------------------------------------
+  //                8                      192                8   4%
+  //              768                   18,264            2,021  11%
+  //        3,784,212               80,978,992       17,039,020  21%
+  //
+  // The encoded form also has the advantage of being a contiguous block of
+  // memory.
+  //
   // REQUIRES: "encoder" uses the default constructor, so that its buffer
   //           can be enlarged as necessary by calling Ensure(int).
   void Encode(Encoder* encoder) const;
@@ -251,14 +330,20 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
     //   S2Point center() const;
     const S2ShapeIndexCell& cell() const;
 
-    // IteratorBase API:
+    // S2CellIterator API:
     void Begin() override;
     void Finish() override;
     void Next() override;
     bool Prev() override;
     void Seek(S2CellId target) override;
-    bool Locate(const S2Point& target) override;
-    CellRelation Locate(S2CellId target) override;
+
+    bool Locate(const S2Point& target) override {
+      return LocateImpl(*this, target);
+    }
+
+    S2CellRelation Locate(S2CellId target) override {
+      return LocateImpl(*this, target);
+    }
 
    protected:
     const S2ShapeIndexCell* GetCell() const override;
@@ -275,6 +360,9 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
   // assigns a unique id to the shape (shape->id()) and returns that id.
   // Shape ids are assigned sequentially starting from 0 in the order shapes
   // are added.  Invalidates all iterators and their associated data.
+  //
+  // Note that this method is not affected by S2MemoryTracker, i.e. shapes can
+  // continue to be added even once the specified limit has been reached.
   int Add(std::unique_ptr<S2Shape> shape);
 
   // Removes the given shape from the index and return ownership to the caller.
@@ -296,14 +384,27 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
   size_t SpaceUsed() const override;
 
   // Calls to Add() and Release() are normally queued and processed on the
-  // first subsequent query (in a thread-safe way).  This has many advantages,
-  // the most important of which is that sometimes there *is* no subsequent
-  // query, which lets us avoid building the index completely.
+  // first subsequent query (in a thread-safe way).  Building the index lazily
+  // in this way has several advantages, the most important of which is that
+  // sometimes there *is* no subsequent query and the index doesn't need to be
+  // built at all.
   //
-  // This method forces any pending updates to be applied immediately.
-  // Calling this method is rarely a good idea.  (One valid reason is to
-  // exclude the cost of building the index from benchmark results.)
-  void ForceBuild();
+  // In contrast, ForceBuild() causes any pending updates to be applied
+  // immediately.  It is thread-safe and may be called simultaneously with
+  // other "const" methods (see notes on thread safety above).  Similarly this
+  // method is "const" since it does not modify the visible index contents.
+  //
+  // ForceBuild() should not normally be called since it prevents lazy index
+  // construction (which is usually benficial).  Some reasons to use it
+  // include:
+  //
+  //  - To exclude the cost of building the index from benchmark results.
+  //  - To ensure that the first subsequent query is as fast as possible.
+  //  - To ensure that the index can be built successfully without exceeding a
+  //    specified S2MemoryTracker limit (see the constructor for details).
+  //
+  // Note that this method is thread-safe.
+  void ForceBuild() const;
 
   // Returns true if there are no pending updates that need to be applied.
   // This can be useful to avoid building the index unnecessarily, or for
@@ -325,38 +426,37 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
   friend class MutableS2ShapeIndexTest;
   friend class S2Stats;
 
+  class BatchGenerator;
+  class EdgeAllocator;
+  class InteriorTracker;
   struct BatchDescriptor;
   struct ClippedEdge;
-  class EdgeAllocator;
   struct FaceEdge;
-  class InteriorTracker;
   struct RemovedShape;
 
+  using ShapeEdgeId = s2shapeutil::ShapeEdgeId;
   using ShapeIdSet = std::vector<int>;
 
   // When adding a new encoding, be aware that old binaries will not be able
   // to decode it.
-  static const unsigned char kCurrentEncodingVersionNumber = 0;
+  static constexpr unsigned char kCurrentEncodingVersionNumber = 0;
 
   // Internal methods are documented with their definitions.
-  bool is_first_update() const;
   bool is_shape_being_removed(int shape_id) const;
+  void MarkIndexStale();
   void MaybeApplyUpdates() const;
   void ApplyUpdatesThreadSafe();
   void ApplyUpdatesInternal();
-  void GetUpdateBatches(std::vector<BatchDescriptor>* batches) const;
-  static void GetBatchSizes(int num_items, int max_batches,
-                            double final_bytes_per_item,
-                            double high_water_bytes_per_item,
-                            double preferred_max_bytes_per_batch,
-                            std::vector<int>* batch_sizes);
+  std::vector<BatchDescriptor> GetUpdateBatches() const;
   void ReserveSpace(const BatchDescriptor& batch,
-                    std::vector<FaceEdge> all_edges[6]) const;
-  void AddShape(int id, std::vector<FaceEdge> all_edges[6],
+                    std::vector<FaceEdge> all_edges[6]);
+  void AddShape(const S2Shape* shape, int edges_begin, int edges_end,
+                std::vector<FaceEdge> all_edges[6],
                 InteriorTracker* tracker) const;
   void RemoveShape(const RemovedShape& removed,
                    std::vector<FaceEdge> all_edges[6],
                    InteriorTracker* tracker) const;
+  void FinishPartialShape(int shape_id);
   void AddFaceEdge(FaceEdge* edge, std::vector<FaceEdge> all_edges[6]) const;
   void UpdateFaceEdges(int face, const std::vector<FaceEdge>& face_edges,
                        InteriorTracker* tracker);
@@ -394,10 +494,6 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
                         std::vector<const ClippedEdge*> child_edges[2],
                         EdgeAllocator* alloc);
 
-  // The amount by which cells are "padded" to compensate for numerical errors
-  // when clipping line segments to cell boundaries.
-  static const double kCellPadding;
-
   // The shapes in the index, accessed by their shape id.  Removed shapes are
   // replaced by nullptr pointers.
   std::vector<std::unique_ptr<S2Shape>> shapes_;
@@ -460,7 +556,7 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
     FRESH,     // There are no pending updates.
   };
   // Reads and writes to this field are guarded by "lock_".
-  std::atomic<IndexStatus> index_status_;
+  std::atomic<IndexStatus> index_status_{FRESH};
 
   // UpdateState holds temporary data related to thread synchronization.  It
   // is only allocated while updates are being applied.
@@ -485,19 +581,105 @@ class MutableS2ShapeIndex final : public S2ShapeIndex {
   };
   std::unique_ptr<UpdateState> update_state_;
 
+  S2MemoryTracker::Client mem_tracker_;
+
+#ifndef SWIG
   // Documented in the .cc file.
-  void UnlockAndSignal()
-      UNLOCK_FUNCTION(lock_)
-      UNLOCK_FUNCTION(update_state_->wait_mutex);
+  void UnlockAndSignal() ABSL_UNLOCK_FUNCTION(lock_)
+      ABSL_UNLOCK_FUNCTION(update_state_->wait_mutex);
+#endif
 
   MutableS2ShapeIndex(const MutableS2ShapeIndex&) = delete;
-  void operator=(const MutableS2ShapeIndex&) = delete;
+  MutableS2ShapeIndex& operator=(const MutableS2ShapeIndex&) = delete;
 };
 
+// The following flag can be used to limit the amount of temporary memory used
+// when building an S2ShapeIndex.  See the .cc file for details.
+//
+// DEFAULT: 100 MB
+S2_DECLARE_int64(s2shape_index_tmp_memory_budget);
+
 
 //////////////////   Implementation details follow   ////////////////////
 
 
+// A BatchDescriptor represents a set of pending updates that will be applied
+// at the same time.  The batch consists of all edges in (shape id, edge id)
+// order from "begin" (inclusive) to "end" (exclusive).  Note that the last
+// shape in a batch may have only some of its edges added.  The first batch
+// also implicitly includes all shapes being removed.  "num_edges" is the
+// total number of edges that will be added or removed in this batch.
+struct MutableS2ShapeIndex::BatchDescriptor {
+  // REQUIRES: If end.edge_id != 0, it must refer to a valid edge.
+  ShapeEdgeId begin, end;
+  int num_edges;
+};
+
+// The purpose of BatchGenerator is to divide large updates into batches such
+// that all batches use approximately the same amount of high-water memory.
+// This class is defined here so that it can be tested independently.
+class MutableS2ShapeIndex::BatchGenerator {
+ public:
+  // Given the total number of edges that will be removed and added, prepares
+  // to divide the edges into batches.  "shape_id_begin" identifies the first
+  // shape whose edges will be added.
+  BatchGenerator(int num_edges_removed, int num_edges_added,
+                 int shape_id_begin);
+
+  // Indicates that the given shape will be added to the index.  Shapes with
+  // few edges will be grouped together into a single batch, while shapes with
+  // many edges will be split over several batches if necessary.
+  void AddShape(int shape_id, int num_edges);
+
+  // Returns a vector describing each batch.  This method should be called
+  // once all shapes have been added.
+  std::vector<BatchDescriptor> Finish();
+
+ private:
+  // Returns a vector indicating the maximum number of edges in each batch.
+  // (The actual batch sizes are adjusted later in order to avoid splitting
+  // shapes between batches unnecessarily.)
+  static std::vector<int> GetMaxBatchSizes(int num_edges_removed,
+                                           int num_edges_added);
+
+  // Returns the maximum number of edges in the current batch.
+  int max_batch_size() const { return max_batch_sizes_[batch_index_]; }
+
+  // Returns the maximum number of edges in the next batch.
+  int next_max_batch_size() const { return max_batch_sizes_[batch_index_ + 1]; }
+
+  // Adds the given number of edges to the current batch.
+  void ExtendBatch(int num_edges) {
+    batch_size_ += num_edges;
+  }
+
+  // Adds the given number of edges to the current batch, ending with the edge
+  // just before "batch_end", and then starts a new batch.
+  void FinishBatch(int num_edges, ShapeEdgeId batch_end);
+
+  // A vector representing the ideal number of edges in each batch; the batch
+  // sizes gradually decrease to ensure that each batch uses approximately the
+  // same total amount of memory as the index grows.  The actual batch sizes
+  // are then adjusted based on how many edges each shape has in order to
+  // avoid splitting shapes between batches unnecessarily.
+  std::vector<int> max_batch_sizes_;
+
+  // The maximum size of the current batch is determined by how many edges
+  // have been added to the index so far.  For example if GetBatchSizes()
+  // returned {100, 70, 50, 30} and we have added 0 edges, the current batch
+  // size is 100.  But if we have already added 90 edges then the current
+  // batch size would be 70, and if have added 150 edges the batch size would
+  // be 50.  We keep track of (1) the current index into batch_sizes and (2)
+  // the number of edges remaining before we increment the batch index.
+  int batch_index_ = 0;
+  int batch_index_edges_left_ = 0;
+
+  ShapeEdgeId batch_begin_;  // The start of the current batch.
+  int shape_id_end_;         // One beyond the last shape to be added.
+  int batch_size_ = 0;       // The number of edges in the current batch.
+  std::vector<BatchDescriptor> batches_;  // The completed batches so far.
+};
+
 inline MutableS2ShapeIndex::Iterator::Iterator() : index_(nullptr) {
 }
 
@@ -570,18 +752,15 @@ inline void MutableS2ShapeIndex::Iterator::Seek(S2CellId target) {
 
 inline std::unique_ptr<MutableS2ShapeIndex::IteratorBase>
 MutableS2ShapeIndex::NewIterator(InitialPosition pos) const {
-  return absl::make_unique<Iterator>(this, pos);
+  return std::make_unique<Iterator>(this, pos);
 }
 
-inline bool MutableS2ShapeIndex::is_fresh() const {
-  return index_status_.load(std::memory_order_relaxed) == FRESH;
+inline void MutableS2ShapeIndex::ForceBuild() const {
+  MaybeApplyUpdates();
 }
 
-// Return true if this is the first update to the index.
-inline bool MutableS2ShapeIndex::is_first_update() const {
-  // Note that it is not sufficient to check whether cell_map_ is empty, since
-  // entries are added during the update process.
-  return pending_additions_begin_ == 0;
+inline bool MutableS2ShapeIndex::is_fresh() const {
+  return index_status_.load(std::memory_order_relaxed) == FRESH;
 }
 
 // Given that the given shape is being updated, return true if it is being
diff --git a/src/s2/r1interval.h b/src/s2/r1interval.h
index f1208038..a502b308 100644
--- a/src/s2/r1interval.h
+++ b/src/s2/r1interval.h
@@ -22,6 +22,7 @@
 #include <cmath>
 #include <iosfwd>
 #include <iostream>
+#include <ostream>
 
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
@@ -66,8 +67,9 @@ class R1Interval {
     }
   }
 
-  // Accessors methods.
+  // The low bound of the interval.
   double lo() const { return bounds_[0]; }
+  // The high bound of the interval.
   double hi() const { return bounds_[1]; }
 
   // Methods to modify one endpoint of an existing R1Interval.  Do not use
@@ -97,10 +99,12 @@ class R1Interval {
   // is negative.
   double GetLength() const { return hi() - lo(); }
 
+  // Returns true if the given point is in the closed interval [lo, hi].
   bool Contains(double p) const {
     return p >= lo() && p <= hi();
   }
 
+  // Returns true if the given point is in the open interval (lo, hi).
   bool InteriorContains(double p) const {
     return p > lo() && p < hi();
   }
@@ -145,9 +149,14 @@ class R1Interval {
 
   // Expand the interval so that it contains the given point "p".
   void AddPoint(double p) {
-    if (is_empty()) { set_lo(p); set_hi(p); }
-    else if (p < lo()) { set_lo(p); }  // NOLINT
-    else if (p > hi()) { set_hi(p); }  // NOLINT
+    if (is_empty()) {
+      set_lo(p);
+      set_hi(p);
+    } else if (p < lo()) {
+      set_lo(p);
+    } else if (p > hi()) {
+      set_hi(p);
+    }
   }
 
   // Expand the interval so that it contains the given interval "y".
diff --git a/src/s2/r2rect.cc b/src/s2/r2rect.cc
index 030578a2..b68f3ddd 100644
--- a/src/s2/r2rect.cc
+++ b/src/s2/r2rect.cc
@@ -17,9 +17,8 @@
 
 #include "s2/r2rect.h"
 
-#include <iosfwd>
+#include <ostream>
 
-#include "s2/base/logging.h"
 #include "s2/r1interval.h"
 #include "s2/r2.h"
 
@@ -30,11 +29,6 @@ R2Rect R2Rect::FromCenterSize(const R2Point& center, const R2Point& size) {
                            center.y() + 0.5 * size.y()));
 }
 
-R2Rect R2Rect::FromPointPair(const R2Point& p1, const R2Point& p2) {
-  return R2Rect(R1Interval::FromPointPair(p1.x(), p2.x()),
-                R1Interval::FromPointPair(p1.y(), p2.y()));
-}
-
 bool R2Rect::Contains(const R2Rect& other) const {
   return x().Contains(other.x()) && y().Contains(other.y());
 }
diff --git a/src/s2/r2rect.h b/src/s2/r2rect.h
index 7700fae4..47c6cca8 100644
--- a/src/s2/r2rect.h
+++ b/src/s2/r2rect.h
@@ -19,6 +19,7 @@
 #define S2_R2RECT_H_
 
 #include <iosfwd>
+#include <ostream>
 
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
@@ -135,7 +136,7 @@ class R2Rect {
 
   // Return a rectangle that has been expanded on each side in the x-direction
   // by margin.x(), and on each side in the y-direction by margin.y().  If
-  // either margin is empty, then shrink the interval on the corresponding
+  // either margin is negative, then shrink the interval on the corresponding
   // sides instead.  The resulting rectangle may be empty.  Any expansion of
   // an empty rectangle remains empty.
   R2Rect Expanded(const R2Point& margin) const;
@@ -152,6 +153,9 @@ class R2Rect {
   // Return true if two rectangles contains the same set of points.
   bool operator==(const R2Rect& other) const;
 
+  // Return true if two rectangles do not contain the same set of points.
+  bool operator!=(const R2Rect& other) const;
+
   // Return true if the x- and y-intervals of the two rectangles are the same
   // up to the given tolerance (see r1interval.h for details).
   bool ApproxEquals(const R2Rect& other, double max_error = 1e-15) const;
@@ -181,6 +185,11 @@ inline R2Rect R2Rect::Empty() {
   return R2Rect(R1Interval::Empty(), R1Interval::Empty());
 }
 
+inline R2Rect R2Rect::FromPointPair(const R2Point& p1, const R2Point& p2) {
+  return R2Rect(R1Interval::FromPointPair(p1.x(), p2.x()),
+                R1Interval::FromPointPair(p1.y(), p2.y()));
+}
+
 inline bool R2Rect::is_valid() const {
   // The x/y ranges must either be both empty or both non-empty.
   return x().is_empty() == y().is_empty();
@@ -229,6 +238,10 @@ inline bool R2Rect::operator==(const R2Rect& other) const {
   return x() == other.x() && y() == other.y();
 }
 
+inline bool R2Rect::operator!=(const R2Rect& other) const {
+  return !operator==(other);
+}
+
 std::ostream& operator<<(std::ostream& os, const R2Rect& r);
 
 #endif  // S2_R2RECT_H_
diff --git a/src/s2/s1angle.cc b/src/s2/s1angle.cc
index 78cceda6..6696aee3 100644
--- a/src/s2/s1angle.cc
+++ b/src/s2/s1angle.cc
@@ -17,11 +17,14 @@
 
 #include "s2/s1angle.h"
 
+#include <sys/types.h>
+
 #include <cmath>
 #include <cstdio>
 #include <ostream>
 
 #include "s2/s2latlng.h"
+#include "s2/s2point.h"
 
 S1Angle::S1Angle(const S2Point& x, const S2Point& y)
     : radians_(x.Angle(y)) {
@@ -46,7 +49,8 @@ std::ostream& operator<<(std::ostream& os, S1Angle a) {
   double degrees = a.degrees();
   char buffer[13];
   int sz = snprintf(buffer, sizeof(buffer), "%.7f", degrees);
-  if (sz >= 0 && sz < sizeof(buffer)) {
+  // Fix sign/unsign comparison for client that use `-Wextra` (e.g. Chrome).
+  if (sz >= 0 && static_cast<uint32_t>(sz) < sizeof(buffer)) {
     return os << buffer;
   } else {
     return os << degrees;
diff --git a/src/s2/s1angle.h b/src/s2/s1angle.h
index 96291e9c..93137c6c 100644
--- a/src/s2/s1angle.h
+++ b/src/s2/s1angle.h
@@ -131,23 +131,24 @@ class S1Angle {
 
   // Return the absolute value of an angle.
   S1Angle abs() const;
+  friend S1Angle abs(S1Angle a);
 
   // Comparison operators.
-  friend bool operator==(S1Angle x, S1Angle y);
-  friend bool operator!=(S1Angle x, S1Angle y);
-  friend bool operator<(S1Angle x, S1Angle y);
-  friend bool operator>(S1Angle x, S1Angle y);
-  friend bool operator<=(S1Angle x, S1Angle y);
-  friend bool operator>=(S1Angle x, S1Angle y);
+  friend IFNDEF_SWIG(constexpr) bool operator==(S1Angle x, S1Angle y);
+  friend IFNDEF_SWIG(constexpr) bool operator!=(S1Angle x, S1Angle y);
+  friend IFNDEF_SWIG(constexpr) bool operator<(S1Angle x, S1Angle y);
+  friend IFNDEF_SWIG(constexpr) bool operator>(S1Angle x, S1Angle y);
+  friend IFNDEF_SWIG(constexpr) bool operator<=(S1Angle x, S1Angle y);
+  friend IFNDEF_SWIG(constexpr) bool operator>=(S1Angle x, S1Angle y);
 
   // Simple arithmetic operators for manipulating S1Angles.
-  friend S1Angle operator-(S1Angle a);
-  friend S1Angle operator+(S1Angle a, S1Angle b);
-  friend S1Angle operator-(S1Angle a, S1Angle b);
-  friend S1Angle operator*(double m, S1Angle a);
-  friend S1Angle operator*(S1Angle a, double m);
-  friend S1Angle operator/(S1Angle a, double m);
-  friend double operator/(S1Angle a, S1Angle b);
+  friend IFNDEF_SWIG(constexpr) S1Angle operator-(S1Angle a);
+  friend IFNDEF_SWIG(constexpr) S1Angle operator+(S1Angle a, S1Angle b);
+  friend IFNDEF_SWIG(constexpr) S1Angle operator-(S1Angle a, S1Angle b);
+  friend IFNDEF_SWIG(constexpr) S1Angle operator*(double m, S1Angle a);
+  friend IFNDEF_SWIG(constexpr) S1Angle operator*(S1Angle a, double m);
+  friend IFNDEF_SWIG(constexpr) S1Angle operator/(S1Angle a, double m);
+  friend IFNDEF_SWIG(constexpr) double operator/(S1Angle a, S1Angle b);
   S1Angle& operator+=(S1Angle a);
   S1Angle& operator-=(S1Angle a);
   S1Angle& operator*=(double m);
@@ -164,9 +165,9 @@ class S1Angle {
   // Normalize this angle to the range (-180, 180] degrees.
   void Normalize();
 
-  // When S1Angle is used as a key in one of the btree container types
-  // (util/btree), indicate that linear rather than binary search should be
-  // used.  This is much faster when the comparison function is cheap.
+  // When S1Angle is used as a key in one of the absl::btree container types,
+  // indicate that linear rather than binary search should be used.  This is
+  // much faster when the comparison function is cheap.
   typedef std::true_type absl_btree_prefer_linear_node_search;
 
  private:
@@ -215,55 +216,59 @@ inline S1Angle S1Angle::abs() const {
   return S1Angle(std::fabs(radians_));
 }
 
-inline bool operator==(S1Angle x, S1Angle y) {
+inline S1Angle abs(S1Angle a) {
+  return S1Angle(std::fabs(a.radians_));
+}
+
+inline constexpr bool operator==(S1Angle x, S1Angle y) {
   return x.radians() == y.radians();
 }
 
-inline bool operator!=(S1Angle x, S1Angle y) {
+inline constexpr bool operator!=(S1Angle x, S1Angle y) {
   return x.radians() != y.radians();
 }
 
-inline bool operator<(S1Angle x, S1Angle y) {
+inline constexpr bool operator<(S1Angle x, S1Angle y) {
   return x.radians() < y.radians();
 }
 
-inline bool operator>(S1Angle x, S1Angle y) {
+inline constexpr bool operator>(S1Angle x, S1Angle y) {
   return x.radians() > y.radians();
 }
 
-inline bool operator<=(S1Angle x, S1Angle y) {
+inline constexpr bool operator<=(S1Angle x, S1Angle y) {
   return x.radians() <= y.radians();
 }
 
-inline bool operator>=(S1Angle x, S1Angle y) {
+inline constexpr bool operator>=(S1Angle x, S1Angle y) {
   return x.radians() >= y.radians();
 }
 
-inline S1Angle operator-(S1Angle a) {
+inline constexpr S1Angle operator-(S1Angle a) {
   return S1Angle::Radians(-a.radians());
 }
 
-inline S1Angle operator+(S1Angle a, S1Angle b) {
+inline constexpr S1Angle operator+(S1Angle a, S1Angle b) {
   return S1Angle::Radians(a.radians() + b.radians());
 }
 
-inline S1Angle operator-(S1Angle a, S1Angle b) {
+inline constexpr S1Angle operator-(S1Angle a, S1Angle b) {
   return S1Angle::Radians(a.radians() - b.radians());
 }
 
-inline S1Angle operator*(double m, S1Angle a) {
+inline constexpr S1Angle operator*(double m, S1Angle a) {
   return S1Angle::Radians(m * a.radians());
 }
 
-inline S1Angle operator*(S1Angle a, double m) {
+inline constexpr S1Angle operator*(S1Angle a, double m) {
   return S1Angle::Radians(m * a.radians());
 }
 
-inline S1Angle operator/(S1Angle a, double m) {
+inline constexpr S1Angle operator/(S1Angle a, double m) {
   return S1Angle::Radians(a.radians() / m);
 }
 
-inline double operator/(S1Angle a, S1Angle b) {
+inline constexpr double operator/(S1Angle a, S1Angle b) {
   return a.radians() / b.radians();
 }
 
diff --git a/src/s2/s1chord_angle.cc b/src/s2/s1chord_angle.cc
index eb5fccc4..a79b0759 100644
--- a/src/s2/s1chord_angle.cc
+++ b/src/s2/s1chord_angle.cc
@@ -17,11 +17,12 @@
 
 #include "s2/s1chord_angle.h"
 
+#include <algorithm>
 #include <cfloat>
 #include <cmath>
+#include <ostream>
 
 #include "s2/s1angle.h"
-#include "s2/s2pointutil.h"
 
 using std::max;
 using std::min;
@@ -88,16 +89,17 @@ double S1ChordAngle::GetS2PointConstructorMaxError() const {
 
 double S1ChordAngle::GetS1AngleConstructorMaxError() const {
   // Assuming that an accurate math library is being used, the sin() call and
-  // the multiply each have a relative error of 0.5 * DBL_EPSILON.
-  return DBL_EPSILON * length2_;
+  // the multiply each have a relative error of 0.5 * DBL_EPSILON.  However
+  // the sin() error is squared.
+  return 1.5 * DBL_EPSILON * length2_;
 }
 
 S1ChordAngle operator+(S1ChordAngle a, S1ChordAngle b) {
   // Note that this method is much more efficient than converting the chord
   // angles to S1Angles and adding those.  It requires only one square root
   // plus a few additions and multiplications.
-  S2_DCHECK(!a.is_special());
-  S2_DCHECK(!b.is_special());
+  S2_DCHECK(!a.is_special()) << a;
+  S2_DCHECK(!b.is_special()) << b;
 
   // Optimization for the common case where "b" is an error tolerance
   // parameter that happens to be set to zero.
@@ -120,14 +122,19 @@ S1ChordAngle operator+(S1ChordAngle a, S1ChordAngle b) {
 
 S1ChordAngle operator-(S1ChordAngle a, S1ChordAngle b) {
   // See comments in operator+().
-  S2_DCHECK(!a.is_special());
-  S2_DCHECK(!b.is_special());
+  S2_DCHECK(!a.is_special()) << a;
+  S2_DCHECK(!b.is_special()) << b;
   double a2 = a.length2(), b2 = b.length2();
   if (b2 == 0) return a;
   if (a2 <= b2) return S1ChordAngle::Zero();
   double x = a2 * (1 - 0.25 * b2);
   double y = b2 * (1 - 0.25 * a2);
-  return S1ChordAngle(max(0.0, x + y - 2 * sqrt(x * y)));
+
+  // The calculation below is formulated differently (with two square roots
+  // rather than one) to avoid excessive cancellation error when two nearly
+  // equal values are subtracted.
+  double c = max(0.0, sqrt(x) - sqrt(y));
+  return S1ChordAngle(c * c);
 }
 
 double sin2(S1ChordAngle a) {
diff --git a/src/s2/s1chord_angle.h b/src/s2/s1chord_angle.h
index 40772213..25659cde 100644
--- a/src/s2/s1chord_angle.h
+++ b/src/s2/s1chord_angle.h
@@ -18,28 +18,104 @@
 #ifndef S2_S1CHORD_ANGLE_H_
 #define S2_S1CHORD_ANGLE_H_
 
+#include <algorithm>
 #include <cmath>
 #include <limits>
 #include <ostream>
 #include <type_traits>
 
+#include "s2/base/integral_types.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s1angle.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 
 // S1ChordAngle represents the angle subtended by a chord (i.e., the straight
 // line segment connecting two points on the sphere).  Its representation
 // makes it very efficient for computing and comparing distances, but unlike
 // S1Angle it is only capable of representing angles between 0 and Pi radians.
-// Generally, S1ChordAngle should only be used in loops where many angles need
-// to be calculated and compared.  Otherwise it is simpler to use S1Angle.
+// S1ChordAngle is intended for applications where many angles need to be
+// computed and compared, otherwise it is simpler to use S1Angle.
 //
 // S1ChordAngle also loses some accuracy as the angle approaches Pi radians.
-// Specifically, the representation of (Pi - x) radians has an error of about
-// (1e-15 / x), with a maximum error of about 2e-8 radians (about 13cm on the
-// Earth's surface).  For comparison, for angles up to 90 degrees (10000km)
-// the worst-case representation error is about 2e-16 radians (1 nanometer),
-// which is about the same as S1Angle.
+// There are several different ways to measure this error, including the
+// representational error (i.e., how accurately S1ChordAngle can represent
+// angles near Pi radians), the conversion error (i.e., how much precision is
+// lost when an S1Angle is converted to an S1ChordAngle), and the measurement
+// error (i.e., how accurate the S1ChordAngle(a, b) constructor is when the
+// points A and B are separated by angles close to Pi radians).  All of these
+// errors differ by a small constant factor.
+//
+// For the measurement error (which is the largest of these errors and also
+// the most important in practice), let the angle between A and B be (Pi - x)
+// radians, i.e. A and B are within "x" radians of being antipodal.  The
+// corresponding chord length is
+//
+//    r = 2 * sin((Pi - x) / 2) = 2 * cos(x / 2) .
+//
+// For values of x not close to Pi the relative error in the squared chord
+// length is at most 4.5 * DBL_EPSILON (see GetS2PointConstructorMaxError).
+// The relative error in "r" is thus at most 2.25 * DBL_EPSILON ~= 5e-16.  To
+// convert this error into an equivalent angle, we have
+//
+//    |dr / dx| = sin(x / 2)
+//
+// and therefore
+//
+//    |dx| = dr / sin(x / 2)
+//         = 5e-16 * (2 * cos(x / 2)) / sin(x / 2)
+//         = 1e-15 / tan(x / 2)
+//
+// The maximum error is attained when
+//
+//    x  = |dx|
+//       = 1e-15 / tan(x / 2)
+//      ~= 1e-15 / (x / 2)
+//      ~= sqrt(2e-15)
+//
+// In summary, the measurement error for an angle (Pi - x) is at most
+//
+//    dx  = min(1e-15 / tan(x / 2), sqrt(2e-15))
+//      (~= min(2e-15 / x, sqrt(2e-15)) when x is small).
+//
+// On the Earth's surface (assuming a radius of 6371km), this corresponds to
+// the following worst-case measurement errors:
+//
+//     Accuracy:             Unless antipodal to within:
+//     ---------             ---------------------------
+//     6.4 nanometers        10,000 km (90 degrees)
+//     1 micrometer          81.2 kilometers
+//     1 millimeter          81.2 meters
+//     1 centimeter          8.12 meters
+//     28.5 centimeters      28.5 centimeters
+//
+// The representational and conversion errors referred to earlier are somewhat
+// smaller than this.  For example, maximum distance between adjacent
+// representable S1ChordAngle values is only 13.5 cm rather than 28.5 cm.  To
+// see this, observe that the closest representable value to r^2 = 4 is
+// r^2 =  4 * (1 - DBL_EPSILON / 2).  Thus r = 2 * (1 - DBL_EPSILON / 4) and
+// the angle between these two representable values is
+//
+//    x  = 2 * acos(r / 2)
+//       = 2 * acos(1 - DBL_EPSILON / 4)
+//      ~= 2 * asin(sqrt(DBL_EPSILON / 2)
+//      ~= sqrt(2 * DBL_EPSILON)
+//      ~= 2.1e-8
+//
+// which is 13.5 cm on the Earth's surface.
+//
+// The worst case rounding error occurs when the value halfway between these
+// two representable values is rounded up to 4.  This halfway value is
+// r^2 = (4 * (1 - DBL_EPSILON / 4)), thus r = 2 * (1 - DBL_EPSILON / 8) and
+// the worst case rounding error is
+//
+//    x  = 2 * acos(r / 2)
+//       = 2 * acos(1 - DBL_EPSILON / 8)
+//      ~= 2 * asin(sqrt(DBL_EPSILON / 4)
+//      ~= sqrt(DBL_EPSILON)
+//      ~= 1.5e-8
+//
+// which is 9.5 cm on the Earth's surface.
 //
 // This class is intended to be copied by value as desired.  It uses
 // the default copy constructor and assignment operator.
@@ -213,9 +289,9 @@ class S1ChordAngle {
   // Infinity() are both considered valid.
   bool is_valid() const;
 
-  // When S1ChordAngle is used as a key in one of the btree container types
-  // (util/btree), indicate that linear rather than binary search should be
-  // used.  This is much faster when the comparison function is cheap.
+  // When S1ChordAngle is used as a key in one of the absl::btree container
+  // types, indicate that linear rather than binary search should be used.
+  // This is much faster when the comparison function is cheap.
   typedef std::true_type absl_btree_prefer_linear_node_search;
 
  private:
diff --git a/src/s2/s1interval.cc b/src/s2/s1interval.cc
index d47875d3..eaf1ebb8 100644
--- a/src/s2/s1interval.cc
+++ b/src/s2/s1interval.cc
@@ -21,8 +21,6 @@
 #include <cfloat>
 #include <cmath>
 
-#include "s2/base/logging.h"
-
 using std::fabs;
 using std::max;
 
diff --git a/src/s2/s1interval.h b/src/s2/s1interval.h
index b37471ab..b11b2baf 100644
--- a/src/s2/s1interval.h
+++ b/src/s2/s1interval.h
@@ -21,6 +21,7 @@
 #include <cmath>
 #include <iosfwd>
 #include <iostream>
+#include <ostream>
 
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
@@ -180,6 +181,9 @@ class S1Interval {
   // Return true if two intervals contains the same set of points.
   bool operator==(const S1Interval& y) const;
 
+  // Return true if two intervals do not contain the same set of points.
+  bool operator!=(const S1Interval& y) const;
+
   // Return true if this interval can be transformed into the given interval by
   // moving each endpoint by at most "max_error" (and without the endpoints
   // crossing, which would invert the interval).  Empty and full intervals are
@@ -249,6 +253,10 @@ inline bool S1Interval::operator==(const S1Interval& y) const {
   return lo() == y.lo() && hi() == y.hi();
 }
 
+inline bool S1Interval::operator!=(const S1Interval& y) const {
+  return !operator==(y);
+}
+
 inline void S1Interval::set_lo(double p) {
   bounds_[0] = p;
   S2_DCHECK(is_valid());
diff --git a/src/s2/s2boolean_operation.cc b/src/s2/s2boolean_operation.cc
index 4ef36c87..ef9a3084 100644
--- a/src/s2/s2boolean_operation.cc
+++ b/src/s2/s2boolean_operation.cc
@@ -1,4 +1,3 @@
-#include "cpp-compat.h"
 // Copyright 2017 Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -65,32 +64,57 @@
 
 #include "s2/s2boolean_operation.h"
 
+#include <cfloat>
+#include <cmath>
+
 #include <algorithm>
+#include <iostream>
 #include <limits>
 #include <memory>
+#include <ostream>
 #include <utility>
+#include <vector>
 
+#include "absl/cleanup/cleanup.h"
 #include "absl/container/btree_map.h"
-#include "absl/memory/memory.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/inlined_vector.h"
+#include "absl/strings/string_view.h"
+
+#include "s2/base/integral_types.h"
+#include "s2/id_set_lexicon.h"
+#include "s2/s1angle.h"
 #include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
 #include "s2/s2builder_layer.h"
 #include "s2/s2builderutil_snap_functions.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2contains_point_query.h"
 #include "s2/s2crossing_edge_query.h"
-#include "s2/s2edge_crosser.h"
 #include "s2/s2edge_crossings.h"
+#include "s2/s2error.h"
 #include "s2/s2measures.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2point.h"
 #include "s2/s2predicates.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
 #include "s2/s2shape_index_measures.h"
+#include "s2/s2shapeutil_shape_edge.h"
+#include "s2/s2shapeutil_shape_edge_id.h"
 #include "s2/s2shapeutil_visit_crossing_edge_pairs.h"
+#include "s2/value_lexicon.h"
 
 // TODO(ericv): Remove this debugging output at some point.
 extern bool s2builder_verbose;
 
 namespace {  // Anonymous namespace for helper classes.
 
-using absl::make_unique;
+using absl::flat_hash_map;
+using absl::string_view;
+using std::lower_bound;
 using std::make_pair;
+using std::make_unique;
 using std::max;
 using std::min;
 using std::pair;
@@ -147,8 +171,10 @@ class CrossingInputEdge {
   InputEdgeId input_id_ : 31;
 };
 
-// InputEdgeCrossings represents all pairs of intersecting input edges.
-// It is sorted in lexicographic order.
+// InputEdgeCrossings represents all pairs of intersecting input edges and
+// also certain GraphEdgeClipper state modifications (kSetInside, etc).
+// It is sorted lexicographically except for entries representing state
+// modifications, which are sorted by the first InputEdgeId only.
 using InputEdgeCrossings = vector<pair<InputEdgeId, CrossingInputEdge>>;
 
 // Given two input edges A and B that intersect, suppose that A maps to a
@@ -202,7 +228,7 @@ static vector<EdgeId> GetInputEdgeChainOrder(
   // chain order (e.g.  AB, BC, CD).
   vector<pair<VertexId, EdgeId>> vmap;     // Map from source vertex to edge id.
   vector<int> indegree(g.num_vertices());  // Restricted to current input edge.
-  for (int end, begin = 0; begin < order.size(); begin = end) {
+  for (size_t end, begin = 0; begin < order.size(); begin = end) {
     // Gather the edges that came from a single input edge.
     InputEdgeId input_id = input_ids[order[begin]];
     for (end = begin; end < order.size(); ++end) {
@@ -213,7 +239,7 @@ static vector<EdgeId> GetInputEdgeChainOrder(
     // Build a map from the source vertex of each edge to its edge id,
     // and also compute the indegree at each vertex considering only the edges
     // that came from the current input edge.
-    for (int i = begin; i < end; ++i) {
+    for (size_t i = begin; i < end; ++i) {
       EdgeId e = order[i];
       vmap.push_back(make_pair(g.edge(e).first, e));
       indegree[g.edge(e).second] += 1;
@@ -222,12 +248,12 @@ static vector<EdgeId> GetInputEdgeChainOrder(
 
     // Find the starting edge for building the edge chain.
     EdgeId next = g.num_edges();
-    for (int i = begin; i < end; ++i) {
+    for (size_t i = begin; i < end; ++i) {
       EdgeId e = order[i];
       if (indegree[g.edge(e).first] == 0) next = e;
     }
     // Build the edge chain.
-    for (int i = begin; ;) {
+    for (size_t i = begin;;) {
       order[i] = next;
       VertexId v = g.edge(next).second;
       indegree[v] = 0;  // Clear as we go along.
@@ -304,12 +330,14 @@ class GraphEdgeClipper {
   vector<int> rank_;      // The rank of each graph edge within order_.
 };
 
-GraphEdgeClipper::GraphEdgeClipper(
-    const Graph& g, const vector<int8>& input_dimensions,
-    const InputEdgeCrossings& input_crossings,
-    vector<Graph::Edge>* new_edges,
-    vector<InputEdgeIdSetId>* new_input_edge_ids)
-    : g_(g), in_(g), out_(g),
+GraphEdgeClipper::GraphEdgeClipper(const Graph& g,
+                                   const vector<int8>& input_dimensions,
+                                   const InputEdgeCrossings& input_crossings,
+                                   vector<Graph::Edge>* new_edges,
+                                   vector<InputEdgeIdSetId>* new_input_edge_ids)
+    : g_(g),
+      in_(g),
+      out_(g),
       input_dimensions_(input_dimensions),
       input_crossings_(input_crossings),
       new_edges_(new_edges),
@@ -317,9 +345,13 @@ GraphEdgeClipper::GraphEdgeClipper(
       input_ids_(g.input_edge_id_set_ids()),
       order_(GetInputEdgeChainOrder(g_, input_ids_)),
       rank_(order_.size()) {
-  for (int i = 0; i < order_.size(); ++i) {
+  for (size_t i = 0; i < order_.size(); ++i) {
     rank_[order_[i]] = i;
   }
+  // new_edges_ is obtained by filtering the graph edges and therefore the
+  // number of graph edges is an upper bound on its size.
+  new_edges_->reserve(g_.num_edges());
+  new_input_edge_ids_->reserve(g_.num_edges());
 }
 
 inline void GraphEdgeClipper::AddEdge(Graph::Edge edge,
@@ -340,7 +372,7 @@ void GraphEdgeClipper::Run() {
   bool invert_b = false;
   bool reverse_a = false;
   auto next = input_crossings_.begin();
-  for (int i = 0; i < order_.size(); ++i) {
+  for (size_t i = 0; i < order_.size(); ++i) {
     // For each input edge (the "A" input edge), gather all the input edges
     // that cross it (the "B" input edges).
     InputEdgeId a_input_id = input_ids_[order_[i]];
@@ -393,8 +425,8 @@ void GraphEdgeClipper::Run() {
     }
     --i;
     if (s2builder_verbose) {
-      cpp_compat_cout << "input edge " << a_input_id << " (inside=" << inside << "):";
-      for (VertexId id : a_vertices) cpp_compat_cout << " " << id;
+      std::cout << "input edge " << a_input_id << " (inside=" << inside << "):";
+      for (VertexId id : a_vertices) std::cout << " " << id;
     }
     // Now for each B edge chain, decide which vertex of the A chain it
     // crosses, and keep track of the number of signed crossings at each A
@@ -416,18 +448,18 @@ void GraphEdgeClipper::Run() {
     a_num_crossings.resize(a_vertices.size());
     a_isolated.clear();
     a_isolated.resize(a_vertices.size());
-    for (int bi = 0; bi < b_input_edges.size(); ++bi) {
+    for (size_t bi = 0; bi < b_input_edges.size(); ++bi) {
       bool left_to_right = b_input_edges[bi].left_to_right();
       int a_index = GetCrossedVertexIndex(a_vertices, b_edges[bi],
                                           left_to_right);
       if (a_index >= 0) {
         if (s2builder_verbose) {
-          cpp_compat_cout << std::endl << "  " << "b input edge "
+          std::cout << std::endl << "  " << "b input edge "
                     << b_input_edges[bi].input_id() << " (l2r=" << left_to_right
                     << ", crossing=" << a_vertices[a_index] << ")";
           for (const auto& x : b_edges[bi]) {
             const Graph::Edge& e = g_.edge(x.id);
-            cpp_compat_cout << " (" << e.first << ", " << e.second << ")";
+            std::cout << " (" << e.first << ", " << e.second << ")";
           }
         }
         // Keep track of the number of signed crossings (see above).
@@ -440,10 +472,10 @@ void GraphEdgeClipper::Run() {
         a_isolated[a_index] = true;
       } else {
         // TODO(b/112043775): fix this condition.
-        S2_LOG(DFATAL) << "Failed to get crossed vertex index.";
+        S2_LOG(ERROR) << "Failed to get crossed vertex index.";
       }
     }
-    if (s2builder_verbose) cpp_compat_cout << std::endl;
+    if (s2builder_verbose) std::cout << std::endl;
 
     // Finally, we iterate through the A edge chain, keeping track of the
     // number of signed crossings as we go along.  The "multiplicity" is
@@ -452,7 +484,7 @@ void GraphEdgeClipper::Run() {
     // up the edge crossings in the correct order.  (The multiplicity is
     // almost always either 0 or 1 except in very rare cases.)
     int multiplicity = inside + a_num_crossings[0];
-    for (int ai = 1; ai < a_vertices.size(); ++ai) {
+    for (size_t ai = 1; ai < a_vertices.size(); ++ai) {
       if (multiplicity != 0) {
         a_isolated[ai - 1] = a_isolated[ai] = false;
       }
@@ -474,7 +506,7 @@ void GraphEdgeClipper::Run() {
     // Output any isolated polyline vertices.
     // TODO(ericv): Only do this if an output layer wants degenerate edges.
     if (input_dimensions_[a_input_id] != 0) {
-      for (int ai = 0; ai < a_vertices.size(); ++ai) {
+      for (size_t ai = 0; ai < a_vertices.size(); ++ai) {
         if (a_isolated[ai]) {
           AddEdge(Graph::Edge(a_vertices[ai], a_vertices[ai]), a_input_id);
         }
@@ -534,8 +566,12 @@ int GraphEdgeClipper::GetVertexRank(const CrossingGraphEdge& e) const {
 int GraphEdgeClipper::GetCrossedVertexIndex(
     const vector<VertexId>& a, const CrossingGraphEdgeVector& b,
     bool left_to_right) const {
-  S2_DCHECK(!a.empty());
-  S2_DCHECK(!b.empty());
+  if (a.empty() || b.empty()) {
+    S2_LOG(ERROR) << "GraphEdgeClipper::GetCrossedVertexIndex called with "
+                << a.size() << " vertex ids and " << b.size()
+                << " crossing graph edges.";
+    return -1;
+  }
 
   // The reason this calculation is tricky is that after snapping, the A and B
   // chains may meet and separate several times.  For example, if B crosses A
@@ -631,20 +667,23 @@ int GraphEdgeClipper::GetCrossedVertexIndex(
   // in which case we could check which side of the B chain the A edge is on
   // and use this to limit the possible crossing locations.
   if (b_first >= 0 && b_last >= 0) {
-    // The B subchain connects the first and last vertices of A.  Test whether
-    // the chain includes any interior vertices of A.  We do this indirectly
-    // by testing whether any edge of B has restricted the range of allowable
-    // crossing vertices (since any interior edge of the B subchain incident
-    // to any interior edge of A is guaranteed to do so).
-    int min_rank = order_.size(), max_rank = -1;
+    // Swap the edges if necessary so that they are in B chain order.
+    if (b_reversed) swap(b_first, b_last);
+
+    // The B subchain connects the first and last vertices of A.  We test
+    // whether the chain includes any interior vertices of A by iterating
+    // through the incident B edges again, looking for ones that belong to
+    // the B subchain and are not incident to the first or last vertex of A.
+    bool has_interior_vertex = false;
     for (const auto& e : b) {
-      min_rank = min(min_rank, GetVertexRank(e));
-      max_rank = max(max_rank, GetVertexRank(e));
+      if (e.a_index > 0 && e.a_index < n - 1 &&
+          rank_[e.id] >= rank_[b_first] && rank_[e.id] <= rank_[b_last]) {
+        has_interior_vertex = true;
+        break;
+      }
     }
-    if (lo <= min_rank && hi >= max_rank) {
+    if (!has_interior_vertex) {
       // The B subchain is not incident to any interior vertex of A.
-      // Swap the edges if necessary so that they are in B chain order.
-      if (b_reversed) swap(b_first, b_last);
       bool on_left = EdgeChainOnLeft(a, b_first, b_last);
       if (left_to_right == on_left) {
         lo = max(lo, rank_[b_last] + 1);
@@ -688,7 +727,7 @@ bool GraphEdgeClipper::EdgeChainOnLeft(
   }
   // Now B is to the left of A if and only if the loop is counterclockwise.
   double sum = 0;
-  for (int i = 2; i < loop.size(); ++i) {
+  for (size_t i = 2; i < loop.size(); ++i) {
     sum += S2::TurnAngle(g_.vertex(loop[i - 2]), g_.vertex(loop[i - 1]),
                          g_.vertex(loop[i]));
   }
@@ -704,11 +743,12 @@ class EdgeClippingLayer : public S2Builder::Layer {
  public:
   EdgeClippingLayer(const vector<unique_ptr<S2Builder::Layer>>* layers,
                     const vector<int8>* input_dimensions,
-                    const InputEdgeCrossings* input_crossings)
+                    const InputEdgeCrossings* input_crossings,
+                    S2MemoryTracker::Client* tracker)
       : layers_(*layers),
         input_dimensions_(*input_dimensions),
-        input_crossings_(*input_crossings) {
-  }
+        input_crossings_(*input_crossings),
+        tracker_(tracker) {}
 
   // Layer interface:
   GraphOptions graph_options() const override;
@@ -718,6 +758,7 @@ class EdgeClippingLayer : public S2Builder::Layer {
   const vector<unique_ptr<S2Builder::Layer>>& layers_;
   const vector<int8>& input_dimensions_;
   const InputEdgeCrossings& input_crossings_;
+  S2MemoryTracker::Client* tracker_;
 };
 
 GraphOptions EdgeClippingLayer::graph_options() const {
@@ -728,30 +769,32 @@ GraphOptions EdgeClippingLayer::graph_options() const {
                       DuplicateEdges::KEEP, SiblingPairs::KEEP);
 }
 
-// Helper function (in anonymous namespace) to create an S2Builder::Graph from
-// a vector of edges.
-Graph MakeGraph(
-    const Graph& g, GraphOptions* options, vector<Graph::Edge>* new_edges,
-    vector<InputEdgeIdSetId>* new_input_edge_ids,
-    IdSetLexicon* new_input_edge_id_set_lexicon, S2Error* error) {
-  if (options->edge_type() == EdgeType::UNDIRECTED) {
-    // Create a reversed edge for every edge.
-    int n = new_edges->size();
-    new_edges->reserve(2 * n);
-    new_input_edge_ids->reserve(2 * n);
-    for (int i = 0; i < n; ++i) {
-      new_edges->push_back(Graph::reverse((*new_edges)[i]));
-      new_input_edge_ids->push_back(IdSetLexicon::EmptySetId());
-    }
+void EdgeClippingLayer::Build(const Graph& g, S2Error* error) {
+  // Data per graph edge:
+  //   vector<EdgeId> order_;
+  //   vector<int> rank_;
+  //   vector<Graph::Edge> new_edges;
+  //   vector<InputEdgeIdSetId> new_input_edge_ids;
+  // Data per graph vertex:
+  //   Graph::VertexInMap in_;
+  //   Graph::VertexOutMap out_;
+  //
+  // The first and last two vectors above are freed upon GraphEdgeClipper
+  // destruction.  There is also a temporary vector "indegree" in
+  // GetInputEdgeChainOrder() but this does not affect peak memory usage.
+  int64 tmp_bytes = g.num_edges() * (sizeof(EdgeId) + sizeof(int)) +
+                      g.num_vertices() * (2 * sizeof(EdgeId));
+  int64 final_bytes =
+      g.num_edges() * (sizeof(Graph::Edge) + sizeof(InputEdgeIdSetId));
+
+  // The order of the calls below is important.  Note that all memory tracked
+  // through this client is automatically untallied upon object destruction.
+  if (!tracker_->Tally(final_bytes) || !tracker_->TallyTemp(tmp_bytes)) {
+    // We don't need to copy memory tracking errors to "error" because this
+    // is already done for us in S2BooleanOperation::Impl::Build().
+    return;
   }
-  Graph::ProcessEdges(options, new_edges, new_input_edge_ids,
-                      new_input_edge_id_set_lexicon, error);
-  return Graph(*options, &g.vertices(), new_edges, new_input_edge_ids,
-               new_input_edge_id_set_lexicon, &g.label_set_ids(),
-               &g.label_set_lexicon(), g.is_full_polygon_predicate());
-}
 
-void EdgeClippingLayer::Build(const Graph& g, S2Error* error) {
   // The bulk of the work is handled by GraphEdgeClipper.
   vector<Graph::Edge> new_edges;
   vector<InputEdgeIdSetId> new_input_edge_ids;
@@ -759,44 +802,55 @@ void EdgeClippingLayer::Build(const Graph& g, S2Error* error) {
   GraphEdgeClipper(g, input_dimensions_, input_crossings_,
                    &new_edges, &new_input_edge_ids).Run();
   if (s2builder_verbose) {
-    cpp_compat_cout << "Edges after clipping: " << std::endl;
-    for (int i = 0; i < new_edges.size(); ++i) {
-      cpp_compat_cout << "  " << new_input_edge_ids[i] << " (" << new_edges[i].first
+    std::cout << "Edges after clipping: " << std::endl;
+    for (size_t i = 0; i < new_edges.size(); ++i) {
+      std::cout << "  " << new_input_edge_ids[i] << " (" << new_edges[i].first
                 << ", " << new_edges[i].second << ")" << std::endl;
     }
   }
-  // Construct one or more graphs from the clipped edges and pass them to the
-  // given output layer(s).
-  IdSetLexicon new_input_edge_id_set_lexicon;
+  // Construct one or more subgraphs from the clipped edges and pass them to
+  // the given output layer(s).  We start with a copy of the input graph's
+  // IdSetLexicon because this is necessary in general, even though in this
+  // case it is guaranteed to be empty because no edges have been merged.
+  IdSetLexicon new_input_edge_id_set_lexicon = g.input_edge_id_set_lexicon();
   if (layers_.size() == 1) {
-    GraphOptions options = layers_[0]->graph_options();
-    Graph new_graph = MakeGraph(g, &options, &new_edges, &new_input_edge_ids,
-                                &new_input_edge_id_set_lexicon, error);
-    layers_[0]->Build(new_graph, error);
+    Graph new_graph = g.MakeSubgraph(
+        layers_[0]->graph_options(), &new_edges, &new_input_edge_ids,
+        &new_input_edge_id_set_lexicon, g.is_full_polygon_predicate(),
+        error, tracker_);
+    if (tracker_->ok()) layers_[0]->Build(new_graph, error);
+    tracker_->Untally(new_edges);
+    tracker_->Untally(new_input_edge_ids);
   } else {
     // The Graph objects must be valid until the last Build() call completes,
     // so we store all of the graph data in arrays with 3 elements.
     S2_DCHECK_EQ(3, layers_.size());
     vector<Graph::Edge> layer_edges[3];
     vector<InputEdgeIdSetId> layer_input_edge_ids[3];
-    S2Builder::GraphOptions layer_options[3];
-    vector<S2Builder::Graph> layer_graphs;  // No default constructor.
-    layer_graphs.reserve(3);
     // Separate the edges according to their dimension.
-    for (int i = 0; i < new_edges.size(); ++i) {
+    for (size_t i = 0; i < new_edges.size(); ++i) {
       int d = input_dimensions_[new_input_edge_ids[i]];
+      if (!tracker_->AddSpace(&layer_edges[d], 1)) return;
+      if (!tracker_->AddSpace(&layer_input_edge_ids[d], 1)) return;
       layer_edges[d].push_back(new_edges[i]);
       layer_input_edge_ids[d].push_back(new_input_edge_ids[i]);
     }
     // Clear variables to save space.
-    vector<Graph::Edge>().swap(new_edges);
-    vector<InputEdgeIdSetId>().swap(new_input_edge_ids);
+    if (!tracker_->Clear(&new_edges)) return;
+    if (!tracker_->Clear(&new_input_edge_ids)) return;
+
+    vector<S2Builder::Graph> layer_graphs;  // No default constructor.
+    layer_graphs.reserve(3);
+    for (int d = 0; d < 3; ++d) {
+      layer_graphs.push_back(g.MakeSubgraph(
+          layers_[d]->graph_options(), &layer_edges[d],
+          &layer_input_edge_ids[d], &new_input_edge_id_set_lexicon,
+          g.is_full_polygon_predicate(), error, tracker_));
+      if (tracker_->ok()) layers_[d]->Build(layer_graphs[d], error);
+    }
     for (int d = 0; d < 3; ++d) {
-      layer_options[d] = layers_[d]->graph_options();
-      layer_graphs.push_back(MakeGraph(
-          g, &layer_options[d], &layer_edges[d], &layer_input_edge_ids[d],
-          &new_input_edge_id_set_lexicon, error));
-      layers_[d]->Build(layer_graphs[d], error);
+      tracker_->Untally(layer_edges[d]);
+      tracker_->Untally(layer_input_edge_ids[d]);
     }
   }
 }
@@ -806,13 +860,16 @@ void EdgeClippingLayer::Build(const Graph& g, S2Error* error) {
 class S2BooleanOperation::Impl {
  public:
   explicit Impl(S2BooleanOperation* op)
-      : op_(op), index_crossings_first_region_id_(-1) {
+      : op_(op), index_crossings_first_region_id_(-1),
+        tracker_(op->options_.memory_tracker()) {
   }
+
   bool Build(S2Error* error);
 
  private:
   class CrossingIterator;
   class CrossingProcessor;
+
   using ShapeEdge = s2shapeutil::ShapeEdge;
   using ShapeEdgeId = s2shapeutil::ShapeEdgeId;
 
@@ -828,7 +885,7 @@ class S2BooleanOperation::Impl {
 
     // True if "a_edge" crosses "b_edge" from left to right.  Undefined if
     // is_interior_crossing is false.
-    uint32 left_to_right: 1;
+    uint32 left_to_right : 1;
 
     // Equal to S2::VertexCrossing(a_edge, b_edge).  Undefined if "a_edge" and
     // "b_edge" do not share exactly one vertex or either edge is degenerate.
@@ -843,6 +900,7 @@ class S2BooleanOperation::Impl {
     friend bool operator==(const IndexCrossing& x, const IndexCrossing& y) {
       return x.a == y.a && x.b == y.b;
     }
+
     friend bool operator<(const IndexCrossing& x, const IndexCrossing& y) {
       // The compiler (2017) doesn't optimize the following as well:
       // return x.a < y.a || (x.a == y.a && x.b < y.b);
@@ -857,6 +915,34 @@ class S2BooleanOperation::Impl {
   };
   using IndexCrossings = vector<IndexCrossing>;
 
+  class MemoryTracker : public S2MemoryTracker::Client {
+   public:
+    using S2MemoryTracker::Client::Client;
+
+    // Used to track memory used by CrossingProcessor::source_id_map_.  (The
+    // type is a template parameter so that SourceIdMap can be private.)
+    template <class T>
+    bool TallySourceIdMap(int num_entries) {
+      int64 delta_bytes = num_entries * GetBtreeMinBytesPerEntry<T>();
+      source_id_map_bytes_ += delta_bytes;
+      return Tally(delta_bytes);
+    }
+
+    // Used to clear CrossingProcessor::source_id_map_ and update the tracked
+    // memory usage accordingly.
+    template <class T>
+    bool ClearSourceIdMap(T* source_id_map) {
+      source_id_map->clear();
+      Tally(-source_id_map_bytes_);
+      source_id_map_bytes_ = 0;
+      return ok();
+    }
+
+   private:
+    // The amount of memory used by CrossingProcessor::source_id_map_.
+    int64 source_id_map_bytes_ = 0;
+  };
+
   bool is_boolean_output() const { return op_->result_empty_ != nullptr; }
 
   // All of the methods below support "early exit" in the case of boolean
@@ -873,8 +959,8 @@ class S2BooleanOperation::Impl {
                             S2ContainsPointQuery<S2ShapeIndex>* query,
                             CrossingProcessor* cp);
   static bool HasInterior(const S2ShapeIndex& index);
-  static bool AddIndexCrossing(const ShapeEdge& a, const ShapeEdge& b,
-                               bool is_interior, IndexCrossings* crossings);
+  bool AddIndexCrossing(const ShapeEdge& a, const ShapeEdge& b,
+                        bool is_interior, IndexCrossings* crossings);
   bool GetIndexCrossings(int region_id);
   bool AddBoundaryPair(bool invert_a, bool invert_b, bool invert_result,
                        CrossingProcessor* cp);
@@ -889,13 +975,18 @@ class S2BooleanOperation::Impl {
                                const S2ShapeIndex& b) const;
   bool IsFullPolygonSymmetricDifference(const S2ShapeIndex& a,
                                         const S2ShapeIndex& b) const;
+  void DoBuild(S2Error* error);
 
   // A bit mask representing all six faces of the S2 cube.
   static constexpr uint8 kAllFacesMask = 0x3f;
 
   S2BooleanOperation* op_;
 
-  // The S2Builder used to construct the output.
+  // The S2Builder options used to construct the output.
+  S2Builder::Options builder_options_;
+
+  // The S2Builder used to construct the output.  Note that the S2Builder
+  // object is created only when is_boolean_output() is false.
   unique_ptr<S2Builder> builder_;
 
   // A vector specifying the dimension of each edge added to S2Builder.
@@ -922,6 +1013,9 @@ class S2BooleanOperation::Impl {
   // Temporary storage used in GetChainStarts(), declared here to avoid
   // repeatedly allocating memory.
   IndexCrossings tmp_crossings_;
+
+  // An object to track the memory usage of this class.
+  MemoryTracker tracker_;
 };
 
 const s2shapeutil::ShapeEdgeId S2BooleanOperation::Impl::kSentinel(
@@ -1031,15 +1125,17 @@ class S2BooleanOperation::Impl::CrossingProcessor {
   // be nullptr.
   CrossingProcessor(const PolygonModel& polygon_model,
                     const PolylineModel& polyline_model,
-                    bool polyline_loops_have_boundaries,
-                    S2Builder* builder,
+                    bool polyline_loops_have_boundaries, S2Builder* builder,
                     vector<int8>* input_dimensions,
-                    InputEdgeCrossings *input_crossings)
-      : polygon_model_(polygon_model), polyline_model_(polyline_model),
+                    InputEdgeCrossings* input_crossings, MemoryTracker* tracker)
+      : polygon_model_(polygon_model),
+        polyline_model_(polyline_model),
         polyline_loops_have_boundaries_(polyline_loops_have_boundaries),
-        builder_(builder), input_dimensions_(input_dimensions),
-        input_crossings_(input_crossings), prev_inside_(false) {
-  }
+        builder_(builder),
+        input_dimensions_(input_dimensions),
+        input_crossings_(input_crossings),
+        tracker_(tracker),
+        prev_inside_(false) {}
 
   // Starts processing edges from the given region.  "invert_a", "invert_b",
   // and "invert_result" indicate whether region A, region B, and/or the
@@ -1085,8 +1181,8 @@ class S2BooleanOperation::Impl::CrossingProcessor {
   // edge; it crosses the edge from left to right iff the second parameter
   // is "true".
   using SourceEdgeCrossing = pair<SourceId, bool>;
-  struct PointCrossingResult;
   struct EdgeCrossingResult;
+  struct PointCrossingResult;
 
   InputEdgeId input_edge_id() const { return input_dimensions_->size(); }
 
@@ -1112,10 +1208,22 @@ class S2BooleanOperation::Impl::CrossingProcessor {
     return (polyline_model_ != PolylineModel::OPEN || edge_id > chain_start);
   }
 
+  bool is_degenerate(ShapeEdgeId a_id) const {
+    return is_degenerate_hole_.contains(a_id);
+  }
+
   void AddCrossing(const SourceEdgeCrossing& crossing) {
+    if (!tracker_->AddSpace(&source_edge_crossings_, 1)) return;
     source_edge_crossings_.push_back(make_pair(input_edge_id(), crossing));
   }
 
+  void AddInteriorCrossing(const SourceEdgeCrossing& crossing) {
+    // Crossing edges are queued until the S2Builder edge that they are
+    // supposed to be associated with is created (see AddEdge() and
+    // pending_source_edge_crossings_ for details).
+    pending_source_edge_crossings_.push_back(crossing);
+  }
+
   void SetClippingState(InputEdgeId parameter, bool state) {
     AddCrossing(SourceEdgeCrossing(SourceId(parameter), state));
   }
@@ -1126,18 +1234,31 @@ class S2BooleanOperation::Impl::CrossingProcessor {
                int dimension, int interior_crossings) {
     if (builder_ == nullptr) return false;  // Boolean output.
     if (interior_crossings > 0) {
+      // Add the edges that cross this edge to the output so that
+      // GraphEdgeClipper can find them.
+      if (!tracker_->AddSpace(&source_edge_crossings_,
+                              pending_source_edge_crossings_.size())) {
+        return false;
+      }
+      for (const auto& crossing : pending_source_edge_crossings_) {
+        source_edge_crossings_.push_back(make_pair(input_edge_id(), crossing));
+      }
       // Build a map that translates temporary edge ids (SourceId) to
       // the representation used by EdgeClippingLayer (InputEdgeId).
+      if (!tracker_->TallySourceIdMap<decltype(source_id_map_)>(1)) {
+        return false;
+      }
       SourceId src_id(a_region_id_, a_id.shape_id, a_id.edge_id);
       source_id_map_[src_id] = input_edge_id();
     }
     // Set the GraphEdgeClipper's "inside" state to match ours.
     if (inside_ != prev_inside_) SetClippingState(kSetInside, inside_);
+    if (!tracker_->AddSpace(input_dimensions_, 1)) return false;
     input_dimensions_->push_back(dimension);
     builder_->AddEdge(a.v0, a.v1);
     inside_ ^= (interior_crossings & 1);
     prev_inside_ = inside_;
-    return true;
+    return tracker_->ok();
   }
 
   // Supports "early exit" in the case of boolean results by returning false
@@ -1145,10 +1266,11 @@ class S2BooleanOperation::Impl::CrossingProcessor {
   bool AddPointEdge(const S2Point& p, int dimension) {
     if (builder_ == nullptr) return false;  // Boolean output.
     if (!prev_inside_) SetClippingState(kSetInside, true);
+    if (!tracker_->AddSpace(input_dimensions_, 1)) return false;
     input_dimensions_->push_back(dimension);
     builder_->AddEdge(p, p);
     prev_inside_ = true;
-    return true;
+    return tracker_->ok();
   }
 
   bool ProcessEdge0(ShapeEdgeId a_id, const S2Shape::Edge& a,
@@ -1166,9 +1288,11 @@ class S2BooleanOperation::Impl::CrossingProcessor {
 
   bool IsPolylineVertexInside(bool matches_polyline,
                               bool matches_polygon) const;
-  bool IsPolylineEdgeInside(const EdgeCrossingResult& r) const;
+  bool IsPolylineEdgeInside(const EdgeCrossingResult& r,
+                            bool is_degenerate) const;
   bool PolylineEdgeContainsVertex(const S2Point& v,
-                                  const CrossingIterator& it) const;
+                                  const CrossingIterator& it,
+                                  int dimension) const;
 
   // Constructor parameters:
 
@@ -1182,9 +1306,10 @@ class S2BooleanOperation::Impl::CrossingProcessor {
   // edges belong to the output.  The auxiliary information consists of the
   // dimension of each input edge, and set of input edges from the other
   // region that cross each input input edge.
-  S2Builder* builder_;
+  S2Builder* builder_;  // (nullptr if boolean output was requested)
   vector<int8>* input_dimensions_;
   InputEdgeCrossings* input_crossings_;
+  MemoryTracker* tracker_;
 
   // Fields set by StartBoundary:
 
@@ -1220,16 +1345,34 @@ class S2BooleanOperation::Impl::CrossingProcessor {
   //
   // All crossings are represented twice, once to indicate that an edge from
   // polygon 0 is crossed by an edge from polygon 1, and once to indicate that
-  // an edge from polygon 1 is crossed by an edge from polygon 0.
+  // an edge from polygon 1 is crossed by an edge from polygon 0.  The entries
+  // are sorted lexicographically by their eventual InputEdgeIds except for
+  // GraphEdgeClipper state modifications, which are sorted by the first
+  // InputEdgeId only.
   using SourceEdgeCrossings = vector<pair<InputEdgeId, SourceEdgeCrossing>>;
   SourceEdgeCrossings source_edge_crossings_;
 
-  // A map that translates from SourceId (the (region_id, shape_id,
-  // edge_id) triple that identifies an S2ShapeIndex edge) to InputEdgeId (the
+  // A set of edges that cross the current edge being processed by
+  // ProcessEdge() but that have not yet been associated with a particular
+  // S2Builder edge.  This is necessary because ProcessEdge can create up to
+  // three S2Builder edges per input edge: one to represent the edge interior,
+  // and up to two more to represent an isolated start and/or end vertex.  The
+  // crossing edges must be associated with the S2Builder edge that represents
+  // the edge interior, and they are stored here until that edge is created.
+  vector<SourceEdgeCrossing> pending_source_edge_crossings_;
+
+  // A map that translates from SourceId (the (region_id, shape_id, edge_id)
+  // triple that identifies an S2ShapeIndex edge) to InputEdgeId (the
   // sequentially increasing numbers assigned to input edges by S2Builder).
   using SourceIdMap = absl::btree_map<SourceId, InputEdgeId>;
   SourceIdMap source_id_map_;
 
+  // For each edge in region B that defines a degenerate loop (either a point
+  // loop or a sibling pair), indicates whether that loop represents a shell
+  // or a hole.  This information is used during the second pass of
+  // AddBoundaryPair() to determine the output for degenerate edges.
+  flat_hash_map<ShapeEdgeId, bool> is_degenerate_hole_;
+
   // Indicates whether the point being processed along the current edge chain
   // is in the polygonal interior of the opposite region, using semi-open
   // boundaries.  If "invert_b_" is true then this field is inverted.
@@ -1364,7 +1507,7 @@ S2BooleanOperation::Impl::CrossingProcessor::ProcessPointCrossings(
     if (it->b_dimension() == 0) {
       r.matches_point = true;
     } else if (it->b_dimension() == 1) {
-      if (PolylineEdgeContainsVertex(a0, *it)) {
+      if (PolylineEdgeContainsVertex(a0, *it, 0)) {
         r.matches_polyline = true;
       }
     } else {
@@ -1374,37 +1517,44 @@ S2BooleanOperation::Impl::CrossingProcessor::ProcessPointCrossings(
   return r;
 }
 
-// EdgeCrossingResult describes the relationship between an edge from region A
-// ("a_edge") and a set of crossing edges from region B.  For example,
-// "matches_polygon" indicates whether "a_edge" matches a polygon edge from
+// EdgeCrossingResult describes the relationship between an edge (a0, a1) from
+// region A and a set of crossing edges from region B.  For example,
+// "matches_polygon" indicates whether (a0, a1) matches a polygon edge from
 // region B.
 struct S2BooleanOperation::Impl::CrossingProcessor::EdgeCrossingResult {
-  EdgeCrossingResult()
-      : matches_polyline(false), matches_polygon(false), matches_sibling(false),
-        a0_matches_polyline(false), a1_matches_polyline(false),
-        a0_matches_polygon(false), a1_matches_polygon(false),
-        a0_crossings(0), a1_crossings(0), interior_crossings(0) {
-  }
-  // These fields indicate that "a_edge" exactly matches an edge of B.
-  bool matches_polyline;     // Matches polyline edge (either direction).
-  bool matches_polygon;      // Matches polygon edge (same direction).
-  bool matches_sibling;      // Matches polygon edge (reverse direction).
-
-  // These fields indicate that a vertex of "a_edge" matches a polyline vertex
-  // of B *and* the polyline contains that vertex.
-  bool a0_matches_polyline;  // Start vertex matches contained polyline vertex.
-  bool a1_matches_polyline;  // End vertex matches contained polyline vertex.
-
-  // These fields indicate that a vertex of "a_edge" matches a polygon vertex
+  // These fields indicate that (a0, a1) exactly matches an edge of B.
+  bool matches_polyline = false;  // Matches polyline edge (either direction).
+
+  // These fields indicate that a B polyline contains the degenerate polyline
+  // (a0, a0) or (a1, a1).  (This is identical to whether the B polyline
+  // contains the point a0 or a1 except when the B polyline is degenerate,
+  // since a degenerate polyline VV contains itself in all boundary models but
+  // contains the point V only in the CLOSED polyline model.)
+  bool a0_matches_polyline = false;  // B polyline contains (a0, a0)
+  bool a1_matches_polyline = false;  // B polyline contains (a1, a1)
+
+  // These fields indicate that a vertex of (a0, a1) matches a polygon vertex
   // of B.  (Unlike with polylines, the polygon may not contain that vertex.)
-  bool a0_matches_polygon;   // Start vertex matches polygon vertex.
-  bool a1_matches_polygon;   // End vertex matches polygon vertex.
-
-  // These fields count the number of edge crossings at the start vertex, end
-  // vertex, and interior of "a_edge".
-  int a0_crossings;          // Count of polygon crossings at start vertex.
-  int a1_crossings;          // Count of polygon crossings at end vertex.
-  int interior_crossings;    // Count of polygon crossings in edge interior.
+  bool a0_matches_polygon = false;   // a0 matches polygon vertex.
+  bool a1_matches_polygon = false;   // a1 matches polygon vertex.
+
+  // When a0 != a1, the first two fields identify any B polygon edge that
+  // exactly matches (a0, a1) or the sibling edge (a1, a0).  The third field
+  // identifies any B polygon edge that exactly matches (a0, a0).
+  ShapeEdgeId polygon_match_id;  // B polygon edge that matches (a0, a1).
+  ShapeEdgeId sibling_match_id;  // B polygon edge that matches (a1, a0).
+  ShapeEdgeId a0_loop_match_id;  // B polygon edge that matches (a0, a0).
+
+  // Convenience functions to test whether a matching edge was found.
+  bool matches_polygon() const { return polygon_match_id.edge_id >= 0; }
+  bool matches_sibling() const { return sibling_match_id.edge_id >= 0; }
+  bool loop_matches_a0() const { return a0_loop_match_id.edge_id >= 0; }
+
+  // These fields count the number of edge crossings at a0, a1, and the
+  // interior of (a0, a1).
+  int a0_crossings = 0;        // Count of polygon crossings at a0.
+  int a1_crossings = 0;        // Count of polygon crossings at a1.
+  int interior_crossings = 0;  // Count of polygon crossings in edge interior.
 };
 
 // Processes an edge of dimension 1 (i.e., a polyline edge) from region A.
@@ -1426,8 +1576,9 @@ bool S2BooleanOperation::Impl::CrossingProcessor::ProcessEdge1(
 
   // Test whether the entire polyline edge should be emitted (or not emitted)
   // because it matches a polyline or polygon edge.
+  bool is_degenerate = (a.v0 == a.v1);
   inside_ ^= (r.a0_crossings & 1);
-  if (inside_ != IsPolylineEdgeInside(r)) {
+  if (inside_ != IsPolylineEdgeInside(r, is_degenerate)) {
     inside_ ^= true;   // Invert the inside_ state.
     ++r.a1_crossings;  // Restore the correct (semi-open) state later.
   }
@@ -1440,7 +1591,7 @@ bool S2BooleanOperation::Impl::CrossingProcessor::ProcessEdge1(
     // This is the first vertex of a polyline loop, so we can't decide if it
     // is isolated until we process the last polyline edge.
     chain_v0_emitted_ = inside_;
-  } else if (is_v0_isolated(a_id) &&
+  } else if (is_v0_isolated(a_id) && !is_degenerate &&
              polyline_contains_v0(a_id.edge_id, chain_start_) && a0_inside) {
     if (!AddPointEdge(a.v0, 1)) return false;
   }
@@ -1465,7 +1616,8 @@ bool S2BooleanOperation::Impl::CrossingProcessor::ProcessEdge1(
 
   // Special case to test whether the last vertex of a polyline should be
   // emitted as an isolated vertex.
-  if (it->crossings_complete() && is_chain_last_vertex_isolated(a_id) &&
+  if (it->crossings_complete() && !is_degenerate &&
+      is_chain_last_vertex_isolated(a_id) &&
       (polyline_model_ == PolylineModel::CLOSED ||
        (!polyline_loops_have_boundaries_ &&
         a.v1 == a_shape_->chain_edge(chain_id_, chain_start_).v0)) &&
@@ -1481,8 +1633,8 @@ bool S2BooleanOperation::Impl::CrossingProcessor::ProcessEdge1(
 // vertex matches a polyline/polygon vertex of the opposite region.
 bool S2BooleanOperation::Impl::CrossingProcessor::IsPolylineVertexInside(
     bool matches_polyline, bool matches_polygon) const {
-  // "contained" indicates whether the current point is inside the polygonal
-  // interior of the opposite region using semi-open boundaries.
+  // Initially "contained" indicates whether the current point is inside the
+  // polygonal interior of region B using semi-open boundaries.
   bool contained = inside_ ^ invert_b_;
 
   // For UNION the output includes duplicate polylines.  The test below
@@ -1500,19 +1652,37 @@ bool S2BooleanOperation::Impl::CrossingProcessor::IsPolylineVertexInside(
 // Returns true if the current polyline edge is contained by the opposite
 // region (after inversion if "invert_b_" is true).
 inline bool S2BooleanOperation::Impl::CrossingProcessor::IsPolylineEdgeInside(
-    const EdgeCrossingResult& r) const {
-  // "contained" indicates whether the current point is inside the polygonal
-  // interior of the opposite region using semi-open boundaries.
+    const EdgeCrossingResult& r, bool is_degenerate) const {
+  // Initially "contained" indicates whether the current point (just past a0)
+  // is inside the polygonal interior of region B using semi-open boundaries.
   bool contained = inside_ ^ invert_b_;
+
+  // Note that if r.matches_polyline and is_union_ is true, then "contained"
+  // will be false (unless there is also a matching polygon edge) since
+  // polyline edges are not allowed in the interior of B.  In this case we
+  // leave "contained" as false since it causes both matching edges to be
+  // emitted.
   if (r.matches_polyline && !is_union_) {
     contained = true;
-  } else if (r.matches_polygon) {
+  } else if (is_degenerate) {
+    // First allow the polygon boundary model to override the semi-open rules.
+    // Note that a polygon vertex (dimension 2) is considered to completely
+    // contain degenerate OPEN and SEMI_OPEN polylines (dimension 1) even
+    // though the latter do not contain any points.  This is because dimension
+    // 2 points are considered to be a strict superset of dimension 1 points.
+    if (polygon_model_ != PolygonModel::SEMI_OPEN && r.a0_matches_polygon) {
+      contained = (polygon_model_ == PolygonModel::CLOSED);
+    }
+    // Note that r.a0_matches_polyline is true if and only if some B polyline
+    // contains the degenerate polyline (a0, a0).
+    if (r.a0_matches_polyline && !is_union_) contained = true;
+  } else if (r.matches_polygon()) {
     // In the SEMI_OPEN model, polygon sibling pairs cancel each other and
     // have no effect on point or edge containment.
-    if (!(r.matches_sibling && polygon_model_ == PolygonModel::SEMI_OPEN)) {
+    if (!(polygon_model_ == PolygonModel::SEMI_OPEN && r.matches_sibling())) {
       contained = (polygon_model_ != PolygonModel::OPEN);
     }
-  } else if (r.matches_sibling) {
+  } else if (r.matches_sibling()) {
     contained = (polygon_model_ == PolygonModel::CLOSED);
   }
   // Finally, invert the result if the opposite region should be inverted.
@@ -1525,48 +1695,184 @@ inline bool S2BooleanOperation::Impl::CrossingProcessor::IsPolylineEdgeInside(
 // as soon as the result is known to be non-empty.
 bool S2BooleanOperation::Impl::CrossingProcessor::ProcessEdge2(
     ShapeEdgeId a_id, const S2Shape::Edge& a, CrossingIterator* it) {
-  // In order to keep only one copy of any shared polygon edges, we only
-  // output shared edges when processing the second region.
+  // Whenever the two regions contain the same edge, or opposite edges of a
+  // sibling pair, or one region contains a point loop while the other
+  // contains a matching vertex, then in general the result depends on whether
+  // one or both sides represent a degenerate shell or hole.
+  //
+  // In each pass it is easy to determine whether edges in region B represent
+  // degenerate geometry, and if so whether they represent a shell or hole,
+  // since this can be determined from the inside_ state and the
+  // matches_polygon() / matches_sibling() methods of EdgeCrossingResult.
+  // However this information is not readily available for region A.
+  //
+  // We handle this by saving the shell/hole status of each degenerate loop in
+  // region B during the first pass, and deferring the processing of any edges
+  // that meet the criteria above until the second pass.  (Note that regions
+  // A,B correspond to regions 0,1 respectively in the first pass whereas they
+  // refer to regions 1,0 respectively in the second pass.)
+  //
+  // The first pass ignores:
+  //  - degenerate edges of A that are incident to any edge of B
+  //  - non-degenerate edges of A that match or are siblings to an edge of B
+  //
+  // The first pass also records the shell/hole status of:
+  //  - degenerate edges of B that are incident to any edge of A
+  //  - sibling pairs of B where either edge matches an edge of A
+  //
+  // The second pass processes and perhaps outputs:
+  //  - degenerate edges of B that are incident to any edge of A
+  //  - non-degenerate edges of B that match or are siblings to an edge of A
+  //
+  // The following flag indicates that we are in the second pass described
+  // above, i.e. that we are emitting any necessary edges that were ignored by
+  // the first pass.
   bool emit_shared = (a_region_id_ == 1);
 
   // Degeneracies such as isolated vertices and sibling pairs can only be
   // created by intersecting CLOSED polygons or unioning OPEN polygons.
-  bool emit_degenerate =
+  bool create_degen =
       (polygon_model_ == PolygonModel::CLOSED && !invert_a_ && !invert_b_) ||
       (polygon_model_ == PolygonModel::OPEN && invert_a_ && invert_b_);
 
+  // In addition, existing degeneracies are kept when an open boundary is
+  // subtracted.  Note that "keep_degen_b" is only defined for completeness.
+  // It is needed to ensure that the "reverse subtraction operator" (B - A)
+  // preserves degeneracies correctly, however in practice this operator is
+  // only used internally to implement symmetric difference, and in that
+  // situation the preserved degeneracy is always removed from the final
+  // result because it overlaps other geometry.
+  bool keep_degen_a = (polygon_model_ == PolygonModel::OPEN && invert_b_);
+  bool keep_degen_b = (polygon_model_ == PolygonModel::OPEN && invert_a_);
+
   EdgeCrossingResult r = ProcessEdgeCrossings(a_id, a, it);
   S2_DCHECK(!r.matches_polyline);
-  inside_ ^= (r.a0_crossings & 1);
 
   // If only one region is inverted, matching/sibling relations are reversed.
-  // TODO(ericv): Update the following code to handle degenerate loops.
-  S2_DCHECK(!r.matches_polygon || !r.matches_sibling);
-  if (invert_a_ != invert_b_) swap(r.matches_polygon, r.matches_sibling);
-
-  // Test whether the entire polygon edge should be emitted (or not emitted)
-  // because it matches a polygon edge or its sibling.
-  bool new_inside = inside_;
+  if (invert_a_ != invert_b_) swap(r.polygon_match_id, r.sibling_match_id);
+
+  bool is_point = (a.v0 == a.v1);
+  if (!emit_shared) {
+    // Remember the shell/hole status of degenerate B edges that are incident
+    // to any edge of A.  (We don't need to do this for vertex a1 since it is
+    // the same as vertex a0 of the following A loop edge.)
+    if (r.loop_matches_a0()) {
+      is_degenerate_hole_[r.a0_loop_match_id] = inside_;
+      if (is_point) return true;
+    }
 
-  // Shared edge are emitted only while processing the second region.
-  if (r.matches_polygon) new_inside = emit_shared;
+    // Point loops are handled identically to points in the semi-open model,
+    // and are easier to process in the first pass (since otherwise in the
+    // r.a0_matches_polygon case we would need to remember the containment
+    // status of the matching vertex).  Otherwise we defer processing such
+    // loops to the second pass so that we can distinguish whether the
+    // degenerate edge represents a hole or shell.
+    if (polygon_model_ != PolygonModel::SEMI_OPEN) {
+      if (is_point && r.a0_matches_polygon) return true;
+    }
+  }
+  inside_ ^= (r.a0_crossings & 1);
+  if (!emit_shared) {
+    // Defer processing A edges that match or are siblings to an edge of B.
+    if (r.matches_polygon() || r.matches_sibling()) {
+      // For sibling pairs, also remember their shell/hole status.
+      if (r.matches_polygon() && r.matches_sibling()) {
+        is_degenerate_hole_[r.polygon_match_id] = inside_;
+        is_degenerate_hole_[r.sibling_match_id] = inside_;
+      }
+      S2_DCHECK_EQ(r.interior_crossings, 0);
+      inside_ ^= (r.a1_crossings & 1);
+      return true;
+    }
+  }
 
-  // Sibling pairs are emitted only when degeneracies are desired.
-  if (r.matches_sibling) new_inside = emit_degenerate;
-  if (inside_ != new_inside) {
-    inside_ ^= true;   // Invert the inside_ state.
-    ++r.a1_crossings;  // Restore the correct (semi-open) state later.
+  // Remember whether the B geometry represents a sibling pair hole.
+  bool is_b_hole = r.matches_polygon() && r.matches_sibling() && inside_;
+
+  // At this point, "inside_" indicates whether the initial part of the A edge
+  // is contained by the B geometry using semi-open rules.  The following code
+  // implements the various other polygon boundary rules by changing the value
+  // of "inside_" when necessary to indicate whether the current A edge should
+  // be emitted to the output or not.  "semi_open_inside" remembers the true
+  // value of "inside_" so that it can be restored later.
+  bool semi_open_inside = inside_;
+  if (is_point) {
+    if (r.loop_matches_a0()) {
+      // Both sides are point loops.  The edge is kept only:
+      //  - for closed intersection, open union, and open difference;
+      //  - if A and B are both holes or both shells.
+      inside_ = create_degen || keep_degen_a ||
+                (inside_ == is_degenerate_hole_[r.a0_loop_match_id]);
+    } else if (r.a0_matches_polygon) {
+      // A point loop in A matches a polygon vertex in B.  Note that this code
+      // can emit an extra isolated vertex if A represents a point hole, but
+      // this doesn't matter (see comments on the call to AddPointEdge below).
+      if (polygon_model_ != PolygonModel::SEMI_OPEN) {
+        inside_ = create_degen || keep_degen_a;
+      }
+    }
+  } else if (r.matches_polygon()) {
+    if (is_degenerate(a_id)) {
+      // The A edge has a sibling.  The edge is kept only:
+      //  - for closed intersection, open union, and open difference;
+      //  - if the A sibling pair is a hole and the B edge has no sibling; or
+      //  - if the B geometry is also a sibling pair and A and B are both
+      //    holes or both shells.
+      inside_ = create_degen || keep_degen_a ||
+                (!r.matches_sibling() || inside_) == is_degenerate_hole_[a_id];
+    } else {
+      // Matching edges are kept unless the B geometry is a sibling pair, in
+      // which case it is kept only for closed intersection, open union, and
+      // open difference.
+      if (!r.matches_sibling() || create_degen || keep_degen_b) inside_ = true;
+    }
+  } else if (r.matches_sibling()) {
+    if (is_degenerate(a_id)) {
+      // The A edge has a sibling.  The edge is kept only if A is a sibling
+      // pair shell and the operation is closed intersection, open union, or
+      // open difference.
+      inside_ = (create_degen || keep_degen_a) && !is_degenerate_hole_[a_id];
+    } else {
+      inside_ = create_degen;
+    }
+  }
+  if (inside_ != semi_open_inside) {
+    ++r.a1_crossings;  // Restores the correct (semi-open) state later.
   }
 
   // Test whether the first vertex of this edge should be emitted as an
-  // isolated degenerate vertex.
-  if (a_id.edge_id == chain_start_) {
-    chain_v0_emitted_ = inside_;
-  } else if (emit_shared && emit_degenerate && r.a0_matches_polygon &&
-             is_v0_isolated(a_id)) {
+  // isolated degenerate vertex.  This is only needed in the second pass when:
+  //  - a0 matches a vertex of the B polygon;
+  //  - the initial part of the A edge will not be emitted; and
+  //  - the operation is closed intersection or open union, or open difference
+  //    and the B geometry is a point loop.
+  //
+  // The logic does not attempt to avoid redundant extra vertices (e.g. the
+  // extra code in ProcessEdge1() that checks whether the vertex is the
+  // endpoint of the preceding emitted edge) since these these will be removed
+  // during S2Builder::Graph creation by DegenerateEdges::DISCARD or
+  // DISCARD_EXCESS (which are necessary in any case due to snapping).
+  if (emit_shared && r.a0_matches_polygon && !inside_ &&
+      (create_degen || (keep_degen_b && r.loop_matches_a0()))) {
     if (!AddPointEdge(a.v0, 2)) return false;
   }
 
+  // Since we skipped edges in the first pass that only had a sibling pair
+  // match in the B geometry, we sometimes need to emit the sibling pair of an
+  // edge in the second pass.  This happens only if:
+  //  - the operation is closed intersection, open union, or open difference;
+  //  - the A geometry is not a sibling pair (since otherwise we will process
+  //    that edge as well); and
+  //  - the B geometry is not a sibling pair hole (since then only one edge
+  //    should be emitted).
+  if (r.matches_sibling() && (create_degen || keep_degen_b) &&
+      !is_degenerate(a_id) && !is_b_hole) {
+    S2Shape::Edge sibling(a.v1, a.v0);
+    if (!AddEdge(r.sibling_match_id, sibling, 2 /*dimension*/, 0)) {
+      return false;
+    }
+  }
+
   // Test whether the entire edge or any part of it belongs to the output.
   if (inside_ || r.interior_crossings > 0) {
     // Note: updates "inside_" to correspond to the state just before a1.
@@ -1574,10 +1880,6 @@ bool S2BooleanOperation::Impl::CrossingProcessor::ProcessEdge2(
       return false;
     }
   }
-
-  // Remember whether the edge portion just before "a1" was emitted, so that
-  // we can decide whether "a1" need to be emitted as an isolated vertex.
-  if (inside_) v0_emitted_max_edge_id_ = a_id.edge_id + 1;
   inside_ ^= (r.a1_crossings & 1);
 
   // Verify that edge crossings are being counted correctly.
@@ -1585,13 +1887,6 @@ bool S2BooleanOperation::Impl::CrossingProcessor::ProcessEdge2(
     S2_DCHECK_EQ(MakeS2ContainsPointQuery(&it->b_index()).Contains(a.v1),
               inside_ ^ invert_b_);
   }
-
-  // Special case to test whether the last vertex of a loop should be emitted
-  // as an isolated degenerate vertex.
-  if (emit_shared && emit_degenerate && r.a1_matches_polygon &&
-      it->crossings_complete() && is_chain_last_vertex_isolated(a_id)) {
-    if (!AddPointEdge(a.v1, 2)) return false;
-  }
   return true;
 }
 
@@ -1600,50 +1895,57 @@ bool S2BooleanOperation::Impl::CrossingProcessor::ProcessEdge2(
 //
 // NOTE(ericv): We could save a bit of work when matching polygon vertices by
 // passing in a flag saying whether this information is needed.  For example
-// if is only needed in ProcessEdge2 when (emit_shared && emit_degenerate).
+// it is only needed in ProcessEdge2 when (emit_shared && create_degenerate).
 S2BooleanOperation::Impl::CrossingProcessor::EdgeCrossingResult
 S2BooleanOperation::Impl::CrossingProcessor::ProcessEdgeCrossings(
     ShapeEdgeId a_id, const S2Shape::Edge& a, CrossingIterator* it) {
+  pending_source_edge_crossings_.clear();
   EdgeCrossingResult r;
   if (it->Done(a_id)) return r;
 
-  // TODO(ericv): bool a_degenerate = (a.v0 == a.v1);
   for (; !it->Done(a_id); it->Next()) {
-    // Polylines and polygons are not affected by point geometry.
+    // Polyline and polygon "inside" states are not affected by point geometry.
     if (it->b_dimension() == 0) continue;
     S2Shape::Edge b = it->b_edge();
     if (it->is_interior_crossing()) {
       // The crossing occurs in the edge interior.  The condition below says
-      // that (1) polyline crossings don't affect polygon output, and (2)
-      // subtracting a crossing polyline from a polyline has no effect.
+      // that (1) polyline crossings don't affect the polygon "inside" state,
+      // and (2) subtracting a crossing polyline from a polyline does not
+      // affect its "inside" state.  (Note that vertices are still created at
+      // the intersection points.)
       if (a_dimension_ <= it->b_dimension() &&
           !(invert_b_ != invert_result_ && it->b_dimension() == 1)) {
         SourceId src_id(b_region_id_, it->b_shape_id(), it->b_edge_id());
-        AddCrossing(make_pair(src_id, it->left_to_right()));
+        AddInteriorCrossing(make_pair(src_id, it->left_to_right()));
       }
       r.interior_crossings += (it->b_dimension() == 1) ? 2 : 1;
     } else if (it->b_dimension() == 1) {
-      // Polygons are not affected by polyline geometry.
+      // The polygon "inside" state is not affected by polyline geometry.
       if (a_dimension_ == 2) continue;
       if ((a.v0 == b.v0 && a.v1 == b.v1) || (a.v0 == b.v1 && a.v1 == b.v0)) {
         r.matches_polyline = true;
       }
       if ((a.v0 == b.v0 || a.v0 == b.v1) &&
-          PolylineEdgeContainsVertex(a.v0, *it)) {
+          PolylineEdgeContainsVertex(a.v0, *it, 1)) {
         r.a0_matches_polyline = true;
       }
       if ((a.v1 == b.v0 || a.v1 == b.v1) &&
-          PolylineEdgeContainsVertex(a.v1, *it)) {
+          PolylineEdgeContainsVertex(a.v1, *it, 1)) {
         r.a1_matches_polyline = true;
       }
     } else {
       S2_DCHECK_EQ(2, it->b_dimension());
-      if (a.v0 == b.v0 && a.v1 == b.v1) {
+      if (a.v0 == a.v1 || b.v0 == b.v1) {
+        // There are no edge crossings since at least one edge is degenerate.
+        if (a.v0 == b.v0 && a.v0 == b.v1) {
+          r.a0_loop_match_id = it->b_id();
+        }
+      } else if (a.v0 == b.v0 && a.v1 == b.v1) {
         ++r.a0_crossings;
-        r.matches_polygon = true;
+        r.polygon_match_id = it->b_id();
       } else if (a.v0 == b.v1 && a.v1 == b.v0) {
         ++r.a0_crossings;
-        r.matches_sibling = true;
+        r.sibling_match_id = it->b_id();
       } else if (it->is_vertex_crossing()) {
         if (a.v0 == b.v0 || a.v0 == b.v1) {
           ++r.a0_crossings;
@@ -1664,13 +1966,18 @@ S2BooleanOperation::Impl::CrossingProcessor::ProcessEdgeCrossings(
 
 // Returns true if the vertex "v" is contained by the polyline edge referred
 // to by the CrossingIterator "it", taking into account the PolylineModel.
+// "dimension" is 0 or 1 according to whether "v" should be modeled as a point
+// or as a degenerate polyline.  (This only makes a difference when the
+// containing polyline is degenerate, since the polyline AA contains itself in
+// all boundary models but contains the point A only in the CLOSED model.)
 //
 // REQUIRES: it.b_dimension() == 1
 // REQUIRES: "v" is an endpoint of it.b_edge()
 bool S2BooleanOperation::Impl::CrossingProcessor::PolylineEdgeContainsVertex(
-    const S2Point& v, const CrossingIterator& it) const {
+    const S2Point& v, const CrossingIterator& it, int dimension) const {
   S2_DCHECK_EQ(1, it.b_dimension());
   S2_DCHECK(it.b_edge().v0 == v || it.b_edge().v1 == v);
+  S2_DCHECK(dimension == 0 || dimension == 1);
 
   // Closed polylines contain all their vertices.
   if (polyline_model_ == PolylineModel::CLOSED) return true;
@@ -1680,11 +1987,14 @@ bool S2BooleanOperation::Impl::CrossingProcessor::PolylineEdgeContainsVertex(
   const auto& b_chain = it.b_chain_info();
   int b_edge_id = it.b_edge_id();
 
-  // The last polyline vertex is never contained.  (For polyline loops, it is
-  // sufficient to treat the first vertex as begin contained.)  This case also
-  // handles degenerate polylines (polylines with one edge where v0 == v1),
-  // which do not contain any points.
-  if (b_edge_id == b_chain.limit - 1 && v == it.b_edge().v1) return false;
+  // A polyline contains its last vertex only when the polyline is degenerate
+  // (v0 == v1) and "v" is modeled as a degenerate polyline (dimension == 1).
+  // This corresponds to the fact that the polyline AA contains itself in all
+  // boundary models, but contains the point A only in the CLOSED model.
+  if (b_edge_id == b_chain.limit - 1 && v == it.b_edge().v1 &&
+      (dimension == 0 || b_edge_id > 0 || v != it.b_edge().v0)) {
+    return false;
+  }
 
   // Otherwise all interior vertices are contained.  The first polyline
   // vertex is contained if either the polyline model is not OPEN, or the
@@ -1699,20 +2009,21 @@ bool S2BooleanOperation::Impl::CrossingProcessor::PolylineEdgeContainsVertex(
 // Translates the temporary representation of crossing edges (SourceId) into
 // the format expected by EdgeClippingLayer (InputEdgeId).
 void S2BooleanOperation::Impl::CrossingProcessor::DoneBoundaryPair() {
+  tracker_->AddSpaceExact(input_crossings_, source_edge_crossings_.size());
+  if (!tracker_->TallySourceIdMap<decltype(source_id_map_)>(3)) return;
+
   // Add entries that translate the "special" crossings.
   source_id_map_[SourceId(kSetInside)] = kSetInside;
   source_id_map_[SourceId(kSetInvertB)] = kSetInvertB;
   source_id_map_[SourceId(kSetReverseA)] = kSetReverseA;
-  input_crossings_->reserve(input_crossings_->size() +
-                            source_edge_crossings_.size());
   for (const auto& tmp : source_edge_crossings_) {
     auto it = source_id_map_.find(tmp.second.first);
     S2_DCHECK(it != source_id_map_.end());
     input_crossings_->push_back(make_pair(
         tmp.first, CrossingInputEdge(it->second, tmp.second.second)));
   }
-  source_edge_crossings_.clear();
-  source_id_map_.clear();
+  tracker_->Clear(&source_edge_crossings_);
+  tracker_->ClearSourceIdMap(&source_id_map_);
 }
 
 // Clips the boundary of A to the interior of the opposite region B and adds
@@ -1832,6 +2143,7 @@ bool S2BooleanOperation::Impl::GetChainStarts(
         ShapeEdge a(shape_id, chain.start, a_shape->chain_edge(chain_id, 0));
         bool inside = (b_has_interior && query.Contains(a.v0())) != invert_b;
         if (inside) {
+          if (!tracker_.AddSpace(chain_starts, 1)) return false;
           chain_starts->push_back(ShapeEdgeId(shape_id, chain.start));
         }
         if (is_boolean_output()) {
@@ -1841,6 +2153,7 @@ bool S2BooleanOperation::Impl::GetChainStarts(
       }
     }
   }
+  if (!tracker_.AddSpace(chain_starts, 1)) return false;
   chain_starts->push_back(kSentinel);
   return true;
 }
@@ -1882,6 +2195,7 @@ bool S2BooleanOperation::Impl::HasInterior(const S2ShapeIndex& index) {
 inline bool S2BooleanOperation::Impl::AddIndexCrossing(
     const ShapeEdge& a, const ShapeEdge& b, bool is_interior,
     IndexCrossings* crossings) {
+  if (!tracker_.AddSpace(crossings, 1)) return false;
   crossings->push_back(IndexCrossing(a.id(), b.id()));
   IndexCrossing* crossing = &crossings->back();
   if (is_interior) {
@@ -1889,6 +2203,8 @@ inline bool S2BooleanOperation::Impl::AddIndexCrossing(
     if (s2pred::Sign(a.v0(), a.v1(), b.v0()) > 0) {
       crossing->left_to_right = true;
     }
+    builder_->AddIntersection(
+        S2::GetIntersection(a.v0(), a.v1(), b.v0(), b.v1()));
   } else {
     // TODO(ericv): This field isn't used unless one shape is a polygon and
     // the other is a polyline or polygon, but we don't have the shape
@@ -1909,6 +2225,9 @@ bool S2BooleanOperation::Impl::GetIndexCrossings(int region_id) {
   if (region_id == index_crossings_first_region_id_) return true;
   if (index_crossings_first_region_id_ < 0) {
     S2_DCHECK_EQ(region_id, 0);  // For efficiency, not correctness.
+    // TODO(ericv): This would be more efficient if VisitCrossingEdgePairs()
+    // returned the sign (+1 or -1) of the interior crossing, i.e.
+    // "int interior_crossing_sign" rather than "bool is_interior".
     if (!s2shapeutil::VisitCrossingEdgePairs(
             *op_->regions_[0], *op_->regions_[1],
             s2shapeutil::CrossingType::ALL,
@@ -1928,6 +2247,7 @@ bool S2BooleanOperation::Impl::GetIndexCrossings(int region_id) {
           index_crossings_.end());
     }
     // Add a sentinel value to simplify the loop logic.
+    tracker_.AddSpace(&index_crossings_, 1);
     index_crossings_.push_back(IndexCrossing(kSentinel, kSentinel));
     index_crossings_first_region_id_ = 0;
   }
@@ -1941,7 +2261,7 @@ bool S2BooleanOperation::Impl::GetIndexCrossings(int region_id) {
     std::sort(index_crossings_.begin(), index_crossings_.end());
     index_crossings_first_region_id_ = region_id;
   }
-  return true;
+  return tracker_.ok();
 }
 
 // Supports "early exit" in the case of boolean results by returning false
@@ -1951,16 +2271,19 @@ bool S2BooleanOperation::Impl::AddBoundaryPair(
   // Optimization: if the operation is DIFFERENCE or SYMMETRIC_DIFFERENCE,
   // it is worthwhile checking whether the two regions are identical (in which
   // case the output is empty).
-  //
-  // TODO(ericv): When boolean output is requested there are other quick
-  // checks that could be done here, such as checking whether a full cell from
-  // one S2ShapeIndex intersects a non-empty cell of the other S2ShapeIndex.
-  auto type = op_->op_type();
+  auto type = op_->op_type_;
   if (type == OpType::DIFFERENCE || type == OpType::SYMMETRIC_DIFFERENCE) {
     if (AreRegionsIdentical()) return true;
-  } else if (!is_boolean_output()) {
+  } else if (is_boolean_output()) {
+    // TODO(ericv): When boolean output is requested there are other quick
+    // checks that could be done here, such as checking whether a full cell from
+    // one S2ShapeIndex intersects a non-empty cell of the other S2ShapeIndex.
   }
   vector<ShapeEdgeId> a_starts, b_starts;
+  auto _ = absl::MakeCleanup([&]() {
+      tracker_.Untally(a_starts);
+      tracker_.Untally(b_starts);
+    });
   if (!GetChainStarts(0, invert_a, invert_b, invert_result, cp, &a_starts) ||
       !GetChainStarts(1, invert_b, invert_a, invert_result, cp, &b_starts) ||
       !AddBoundary(0, invert_a, invert_b, invert_result, a_starts, cp) ||
@@ -1968,7 +2291,7 @@ bool S2BooleanOperation::Impl::AddBoundaryPair(
     return false;
   }
   if (!is_boolean_output()) cp->DoneBoundaryPair();
-  return true;
+  return tracker_.ok();
 }
 
 // Supports "early exit" in the case of boolean results by returning false
@@ -1978,7 +2301,8 @@ bool S2BooleanOperation::Impl::BuildOpType(OpType op_type) {
   CrossingProcessor cp(op_->options_.polygon_model(),
                        op_->options_.polyline_model(),
                        op_->options_.polyline_loops_have_boundaries(),
-                       builder_.get(), &input_dimensions_, &input_crossings_);
+                       builder_.get(), &input_dimensions_, &input_crossings_,
+                       &tracker_);
   switch (op_type) {
     case OpType::UNION:
       // A | B == ~(~A & ~B)
@@ -1990,6 +2314,11 @@ bool S2BooleanOperation::Impl::BuildOpType(OpType op_type) {
 
     case OpType::DIFFERENCE:
       // A - B = A & ~B
+      //
+      // Note that degeneracies are implemented such that the symmetric
+      // operation (-B + A) also produces correct results.  This can be tested
+      // by swapping op_->regions[0, 1] and calling AddBoundaryPair(true,
+      // false, false), which computes (~B & A).
       return AddBoundaryPair(false, true, false, &cp);
 
     case OpType::SYMMETRIC_DIFFERENCE:
@@ -2049,7 +2378,7 @@ bool S2BooleanOperation::Impl::IsFullPolygonResult(
   // but would also allows all cases to be handled 100% robustly.
   const S2ShapeIndex& a = *op_->regions_[0];
   const S2ShapeIndex& b = *op_->regions_[1];
-  switch (op_->op_type()) {
+  switch (op_->op_type_) {
     case OpType::UNION:
       return IsFullPolygonUnion(a, b);
 
@@ -2162,8 +2491,7 @@ bool S2BooleanOperation::Impl::IsFullPolygonSymmetricDifference(
   // To determine whether the result is ambiguous, we compute a rough estimate
   // of the maximum expected area error (including errors due to snapping),
   // using the worst-case error bound for a hemisphere defined by 4 vertices.
-  auto edge_snap_radius = op_->options_.snap_function().snap_radius() +
-                          S2::kIntersectionError;  // split_crossing_edges
+  auto edge_snap_radius = builder_options_.edge_snap_radius();
   double hemisphere_area_error = 2 * M_PI * edge_snap_radius.radians() +
                                  40 * DBL_EPSILON;  // GetCurvatureMaxError
 
@@ -2193,29 +2521,48 @@ bool S2BooleanOperation::Impl::IsFullPolygonSymmetricDifference(
   return error_sign > 0;
 }
 
+// When subtracting regions, we can save a lot of work by detecting the
+// relatively common case where the two regions are identical.
 bool S2BooleanOperation::Impl::AreRegionsIdentical() const {
   const S2ShapeIndex* a = op_->regions_[0];
   const S2ShapeIndex* b = op_->regions_[1];
   if (a == b) return true;
+
+  // If the regions are not identical, we would like to detect that fact as
+  // quickly as possible.  In particular we would like to avoid fully decoding
+  // both shapes if they are represented as encoded shape types.
+  //
+  // First we test whether the two geometries have the same dimensions and
+  // chain structure.  This can be done without decoding any S2Points.
   int num_shape_ids = a->num_shape_ids();
   if (num_shape_ids != b->num_shape_ids()) return false;
   for (int s = 0; s < num_shape_ids; ++s) {
     const S2Shape* a_shape = a->shape(s);
     const S2Shape* b_shape = b->shape(s);
-    if (a_shape->dimension() != b_shape->dimension()) return false;
-    if (a_shape->dimension() == 2) {
-      auto a_ref = a_shape->GetReferencePoint();
-      auto b_ref = b_shape->GetReferencePoint();
-      if (a_ref.point != b_ref.point) return false;
-      if (a_ref.contained != b_ref.contained) return false;
-    }
+    int dimension = a_shape->dimension();
+    if (dimension != b_shape->dimension()) return false;
     int num_chains = a_shape->num_chains();
     if (num_chains != b_shape->num_chains()) return false;
+    int num_edges = a_shape->num_edges();
+    if (num_edges != b_shape->num_edges()) return false;
+    if (dimension == 0) {
+      S2_DCHECK_EQ(num_edges, num_chains);  // All chains are of length 1.
+      continue;
+    }
     for (int c = 0; c < num_chains; ++c) {
       S2Shape::Chain a_chain = a_shape->chain(c);
       S2Shape::Chain b_chain = b_shape->chain(c);
       S2_DCHECK_EQ(a_chain.start, b_chain.start);
       if (a_chain.length != b_chain.length) return false;
+    }
+  }
+  // Next we test whether both geometries have the same vertex positions.
+  for (int s = 0; s < num_shape_ids; ++s) {
+    const S2Shape* a_shape = a->shape(s);
+    const S2Shape* b_shape = b->shape(s);
+    int num_chains = a_shape->num_chains();
+    for (int c = 0; c < num_chains; ++c) {
+      S2Shape::Chain a_chain = a_shape->chain(c);
       for (int i = 0; i < a_chain.length; ++i) {
         S2Shape::Edge a_edge = a_shape->chain_edge(c, i);
         S2Shape::Edge b_edge = b_shape->chain_edge(c, i);
@@ -2223,32 +2570,36 @@ bool S2BooleanOperation::Impl::AreRegionsIdentical() const {
         if (a_edge.v1 != b_edge.v1) return false;
       }
     }
+    // Note that we don't need to test whether both shapes have the same
+    // GetReferencePoint(), because S2Shape requires that the full geometry of
+    // the shape (including its interior) must be derivable from its chains
+    // and edges.  This is why the "full loop" exists; see s2shape.h.
   }
   return true;
 }
 
-bool S2BooleanOperation::Impl::Build(S2Error* error) {
-  error->Clear();
+void S2BooleanOperation::Impl::DoBuild(S2Error* error) {
+  if (!tracker_.ok()) return;
+  builder_options_ = S2Builder::Options(op_->options_.snap_function());
+  builder_options_.set_intersection_tolerance(S2::kIntersectionError);
+  builder_options_.set_memory_tracker(tracker_.tracker());
+  if (op_->options_.split_all_crossing_polyline_edges()) {
+    builder_options_.set_split_crossing_edges(true);
+  }
+  // TODO(ericv): Ideally idempotent() should be true, but existing clients
+  // expect vertices closer than the full "snap_radius" to be snapped.
+  builder_options_.set_idempotent(false);
+
   if (is_boolean_output()) {
     // BuildOpType() returns true if and only if the result has no edges.
     S2Builder::Graph g;  // Unused by IsFullPolygonResult() implementation.
     *op_->result_empty_ =
-        BuildOpType(op_->op_type()) && !IsFullPolygonResult(g, error);
-    return true;
+        BuildOpType(op_->op_type_) && !IsFullPolygonResult(g, error);
+    return;
   }
-  // TODO(ericv): Rather than having S2Builder split the edges, it would be
-  // faster to call AddVertex() in this class and have a new S2Builder
-  // option that increases the edge_snap_radius_ to account for errors in
-  // the intersection point (the way that split_crossing_edges does).
-  S2Builder::Options options(op_->options_.snap_function());
-  options.set_split_crossing_edges(true);
-
-  // TODO(ericv): Ideally idempotent() should be true, but existing clients
-  // expect vertices closer than the full "snap_radius" to be snapped.
-  options.set_idempotent(false);
-  builder_ = make_unique<S2Builder>(options);
+  builder_ = make_unique<S2Builder>(builder_options_);
   builder_->StartLayer(make_unique<EdgeClippingLayer>(
-      &op_->layers_, &input_dimensions_, &input_crossings_));
+      &op_->layers_, &input_dimensions_, &input_crossings_, &tracker_));
 
   // Add a predicate that decides whether a result with no polygon edges should
   // be interpreted as the empty polygon or the full polygon.
@@ -2256,8 +2607,19 @@ bool S2BooleanOperation::Impl::Build(S2Error* error) {
       [this](const S2Builder::Graph& g, S2Error* error) {
         return IsFullPolygonResult(g, error);
       });
-  (void) BuildOpType(op_->op_type());
-  return builder_->Build(error);
+  (void) BuildOpType(op_->op_type_);
+
+  // Release memory that is no longer needed.
+  if (!tracker_.Clear(&index_crossings_)) return;
+  builder_->Build(error);
+}
+
+bool S2BooleanOperation::Impl::Build(S2Error* error) {
+  // This wrapper ensures that memory tracking errors are reported.
+  error->Clear();
+  DoBuild(error);
+  if (!tracker_.ok()) *error = tracker_.error();
+  return error->ok();
 }
 
 S2BooleanOperation::Options::Options()
@@ -2270,13 +2632,16 @@ S2BooleanOperation::Options::Options(const SnapFunction& snap_function)
 }
 
 S2BooleanOperation::Options::Options(const Options& options)
-    :  snap_function_(options.snap_function_->Clone()),
-       polygon_model_(options.polygon_model_),
-       polyline_model_(options.polyline_model_),
-       polyline_loops_have_boundaries_(options.polyline_loops_have_boundaries_),
-       precision_(options.precision_),
-       conservative_output_(options.conservative_output_),
-       source_id_lexicon_(options.source_id_lexicon_) {
+    : snap_function_(options.snap_function_->Clone()),
+      polygon_model_(options.polygon_model_),
+      polyline_model_(options.polyline_model_),
+      polyline_loops_have_boundaries_(options.polyline_loops_have_boundaries_),
+      split_all_crossing_polyline_edges_(
+          options.split_all_crossing_polyline_edges_),
+      precision_(options.precision_),
+      conservative_output_(options.conservative_output_),
+      source_id_lexicon_(options.source_id_lexicon_),
+      memory_tracker_(options.memory_tracker_) {
 }
 
 S2BooleanOperation::Options& S2BooleanOperation::Options::operator=(
@@ -2285,9 +2650,12 @@ S2BooleanOperation::Options& S2BooleanOperation::Options::operator=(
   polygon_model_ = options.polygon_model_;
   polyline_model_ = options.polyline_model_;
   polyline_loops_have_boundaries_ = options.polyline_loops_have_boundaries_;
+  split_all_crossing_polyline_edges_ =
+      options.split_all_crossing_polyline_edges_;
   precision_ = options.precision_;
   conservative_output_ = options.conservative_output_;
   source_id_lexicon_ = options.source_id_lexicon_;
+  memory_tracker_ = options.memory_tracker_;
   return *this;
 }
 
@@ -2325,6 +2693,15 @@ void S2BooleanOperation::Options::set_polyline_loops_have_boundaries(
   polyline_loops_have_boundaries_ = value;
 }
 
+bool S2BooleanOperation::Options::split_all_crossing_polyline_edges() const {
+  return split_all_crossing_polyline_edges_;
+}
+
+void S2BooleanOperation::Options::set_split_all_crossing_polyline_edges(
+    bool value) {
+  split_all_crossing_polyline_edges_ = value;
+}
+
 Precision S2BooleanOperation::Options::precision() const {
   return precision_;
 }
@@ -2338,7 +2715,15 @@ S2BooleanOperation::Options::source_id_lexicon() const {
   return source_id_lexicon_;
 }
 
-const char* S2BooleanOperation::OpTypeToString(OpType op_type) {
+S2MemoryTracker* S2BooleanOperation::Options::memory_tracker() const {
+  return memory_tracker_;
+}
+
+void S2BooleanOperation::Options::set_memory_tracker(S2MemoryTracker* tracker) {
+  memory_tracker_ = tracker;
+}
+
+string_view S2BooleanOperation::OpTypeToString(OpType op_type) {
   switch (op_type) {
     case OpType::UNION:                return "UNION";
     case OpType::INTERSECTION:         return "INTERSECTION";
@@ -2348,27 +2733,44 @@ const char* S2BooleanOperation::OpTypeToString(OpType op_type) {
   }
 }
 
+string_view S2BooleanOperation::PolygonModelToString(PolygonModel model) {
+  switch (model) {
+    case PolygonModel::OPEN:           return "OPEN";
+    case PolygonModel::SEMI_OPEN:      return "SEMI_OPEN";
+    case PolygonModel::CLOSED:         return "CLOSED";
+    default:                           return "Unknown PolygonModel";
+  }
+}
+
+string_view S2BooleanOperation::PolylineModelToString(PolylineModel model) {
+  switch (model) {
+    case PolylineModel::OPEN:          return "OPEN";
+    case PolylineModel::SEMI_OPEN:     return "SEMI_OPEN";
+    case PolylineModel::CLOSED:        return "CLOSED";
+    default:                           return "Unknown PolylineModel";
+  }
+}
+
 S2BooleanOperation::S2BooleanOperation(OpType op_type,
                                        const Options& options)
-    : op_type_(op_type), options_(options), result_empty_(nullptr) {
+    : options_(options), op_type_(op_type), result_empty_(nullptr) {
 }
 
 S2BooleanOperation::S2BooleanOperation(OpType op_type, bool* result_empty,
                                        const Options& options)
-    : op_type_(op_type), options_(options),
-      result_empty_(result_empty) {
+    : options_(options), op_type_(op_type), result_empty_(result_empty) {
 }
 
 S2BooleanOperation::S2BooleanOperation(
     OpType op_type, unique_ptr<S2Builder::Layer> layer, const Options& options)
-    : op_type_(op_type), options_(options), result_empty_(nullptr) {
+    : options_(options), op_type_(op_type), result_empty_(nullptr) {
   layers_.push_back(std::move(layer));
 }
 
 S2BooleanOperation::S2BooleanOperation(
     OpType op_type, vector<unique_ptr<S2Builder::Layer>> layers,
     const Options& options)
-    : op_type_(op_type), options_(options), layers_(std::move(layers)),
+    : options_(options), op_type_(op_type), layers_(std::move(layers)),
       result_empty_(nullptr) {
 }
 
diff --git a/src/s2/s2boolean_operation.h b/src/s2/s2boolean_operation.h
index acceb06d..d40c9e65 100644
--- a/src/s2/s2boolean_operation.h
+++ b/src/s2/s2boolean_operation.h
@@ -21,9 +21,14 @@
 #include <memory>
 #include <utility>
 #include <vector>
+
+#include "s2/base/integral_types.h"
 #include "s2/s2builder.h"
 #include "s2/s2builder_graph.h"
 #include "s2/s2builder_layer.h"
+#include "s2/s2error.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2shape_index.h"
 #include "s2/value_lexicon.h"
 
 // This class implements boolean operations (intersection, union, difference,
@@ -36,15 +41,40 @@
 // these objects, except that polygon interiors must be disjoint from all
 // other geometry (including other polygon interiors).  If the input geometry
 // for a region does not meet this condition, it can be normalized by
-// computing its union first.  Note that points or polylines are allowed to
-// coincide with the boundaries of polygons.
-//
-// Degeneracies are supported.  A polygon loop or polyline may consist of a
-// single edge from a vertex to itself, and polygons may contain "sibling
-// pairs" consisting of an edge and its corresponding reverse edge.  Polygons
-// must not have any duplicate edges (due to the requirement that polygon
-// interiors are disjoint), but polylines may have duplicate edges or can even
-// be self-intersecting.
+// computing its union first.  Duplicate polygon edges are not allowed (even
+// among different polygons), however polylines may have duplicate edges and
+// may even be self-intersecting.  Note that points or polylines are allowed
+// to coincide with the boundaries of polygons.
+//
+// Degeneracies are fully supported.  Supported degeneracy types include the
+// following:
+//
+//  - Point polylines consisting of a single degenerate edge AA.
+//
+//  - Point loops consisting of a single vertex A.  Such loops may represent
+//    either shells or holes according to whether the loop adds to or
+//    subtracts from the surrounding region of the polygon.
+//
+//  - Sibling edge pairs of the form {AB, BA}.  Such sibling pairs may
+//    represent either shells or holes according to whether they add to or
+//    subtract from the surrounding region.  The edges of a sibling pair may
+//    belong to the same polygon loop (e.g. a loop AB) or to different polygon
+//    loops or polygons (e.g. the polygons {ABC, CBD}).
+//
+// A big advantage of degeneracy support is that geometry may be simplified
+// without completely losing small details.  For example, if a polygon
+// representing a land area with many lakes and rivers is simplified using a
+// tolerance of 1 kilometer, every water feature in the input is guaranteed to
+// be within 1 kilometer of some water feature in the input (even if some
+// lakes and rivers are merged and/or reduced to degenerate point or sibling
+// edge pair holes).  Mathematically speaking, degeneracy support allows
+// geometry to be simplified while guaranteeing that the Hausdorff distance
+// betweeen the boundaries of the original and simplified geometries is at
+// most the simplification tolerance.  It also allows geometry to be
+// simplified without changing its dimension, thus preserving boundary
+// semantics.  (Note that the boundary of a polyline ABCD is {A,D}, whereas
+// the boundary of a degenerate shell ABCDCB is its entire set of vertices and
+// edges.)
 //
 // Points and polyline edges are treated as multisets: if the same point or
 // polyline edge appears multiple times in the input, it will appear multiple
@@ -53,8 +83,14 @@
 // sets of points or polylines as a single region while maintaining their
 // distinct identities, even when the points or polylines intersect each
 // other.  It is also useful for reconstructing polylines that loop back on
-// themselves.  If duplicate geometry is not desired, it can be merged by
-// GraphOptions::DuplicateEdges::MERGE in the S2Builder output layer.
+// themselves (e.g., time series such as GPS tracks).  If duplicate geometry
+// is not desired, it can easily be removed by choosing the appropriate
+// S2Builder output layer options.
+//
+// Self-intersecting polylines can be manipulated without materializing new
+// vertices at the self-intersection points.  This feature is important when
+// processing polylines with large numbers of self-intersections such as GPS
+// tracks (e.g., consider the path of a race car in the Indy 500).
 //
 // Polylines are always considered to be directed.  Polyline edges between the
 // same pair of vertices are defined to intersect even if the two edges are in
@@ -167,7 +203,7 @@
 //   S2BooleanOperation::Options options;
 //   options.set_snap_function(snap_function);
 //   S2BooleanOperation op(S2BooleanOperation::OpType::INTERSECTION,
-//                         absl::make_unique<S2PolygonLayer>(&polygon),
+//                         std::make_unique<S2PolygonLayer>(&polygon),
 //                         options);
 //   S2Error error;
 //   if (!op.Build(a, b, &error)) {
@@ -183,29 +219,35 @@
 //   S2Polygon polygon;
 //   S2BooleanOperation op(
 //       S2BooleanOperation::OpType::UNION,
-//       absl::make_unique<s2builderutil::PointVectorLayer>(&points),
-//       absl::make_unique<s2builderutil::S2PolylineVectorLayer>(&polylines),
-//       absl::make_unique<S2PolygonLayer>(&polygon));
+//       std::make_unique<s2builderutil::PointVectorLayer>(&points),
+//       std::make_unique<s2builderutil::S2PolylineVectorLayer>(&polylines),
+//       std::make_unique<S2PolygonLayer>(&polygon));
 
 class S2BooleanOperation {
  public:
   // The supported operation types.
-  enum class OpType {
+  enum class OpType : uint8 {
     UNION,                // Contained by either region.
     INTERSECTION,         // Contained by both regions.
     DIFFERENCE,           // Contained by the first region but not the second.
     SYMMETRIC_DIFFERENCE  // Contained by one region but not the other.
   };
   // Translates OpType to one of the strings above.
-  static const char* OpTypeToString(OpType op_type);
+  static absl::string_view OpTypeToString(OpType op_type);
 
   // Defines whether polygons are considered to contain their vertices and/or
   // edges (see definitions above).
-  enum class PolygonModel { OPEN, SEMI_OPEN, CLOSED };
+  enum class PolygonModel : uint8 { OPEN, SEMI_OPEN, CLOSED };
+
+  // Translates PolygonModel to one of the strings above.
+  static absl::string_view PolygonModelToString(PolygonModel model);
 
   // Defines whether polylines are considered to contain their endpoints
   // (see definitions above).
-  enum class PolylineModel { OPEN, SEMI_OPEN, CLOSED };
+  enum class PolylineModel : uint8 { OPEN, SEMI_OPEN, CLOSED };
+
+  // Translates PolylineModel to one of the strings above.
+  static absl::string_view PolylineModelToString(PolylineModel model);
 
   // With Precision::EXACT, the operation is evaluated using the exact input
   // geometry.  Predicates that use this option will produce exact results;
@@ -228,7 +270,7 @@ class S2BooleanOperation {
   // Conceptually, the difference between these two options is that with
   // Precision::SNAPPED, the inputs are snap rounded (together), whereas with
   // Precision::EXACT only the result is snap rounded.
-  enum class Precision { EXACT, SNAPPED };
+  enum class Precision : uint8 { EXACT, SNAPPED };
 
   // SourceId identifies an edge from one of the two input S2ShapeIndexes.
   // It consists of a region id (0 or 1), a shape id within that region's
@@ -308,6 +350,18 @@ class S2BooleanOperation {
     bool polyline_loops_have_boundaries() const;
     void set_polyline_loops_have_boundaries(bool value);
 
+    // Specifies that a new vertex should be added whenever a polyline edge
+    // crosses another polyline edge.  Note that this can cause the size of
+    // polylines with many self-intersections to increase quadratically.
+    //
+    // If false, new vertices are added only when a polyline from one input
+    // region cross a polyline from the other input region.  This allows
+    // self-intersecting input polylines to be modified as little as possible.
+    //
+    // DEFAULT: false
+    bool split_all_crossing_polyline_edges() const;
+    void set_split_all_crossing_polyline_edges(bool value);
+
     // Specifies whether the operation should use the exact input geometry
     // (Precision::EXACT), or whether the two input regions should be snapped
     // together first (Precision::SNAPPED).
@@ -367,6 +421,36 @@ class S2BooleanOperation {
     ValueLexicon<SourceId>* source_id_lexicon() const;
     // void set_source_id_lexicon(ValueLexicon<SourceId>* source_id_lexicon);
 
+    // Specifies that internal memory usage should be tracked using the given
+    // S2MemoryTracker.  If a memory limit is specified and more more memory
+    // than this is required then an error will be returned.  Example usage:
+    //
+    //   S2MemoryTracker tracker;
+    //   tracker.set_limit(500 << 20);  // 500 MB
+    //   S2BooleanOperation::Options options;
+    //   options.set_memory_tracker(&tracker);
+    //   S2BooleanOperation op(..., options);
+    //   ...
+    //   S2Error error;
+    //   if (!op.Build(..., &error)) {
+    //     if (error.code() == S2Error::RESOURCE_EXHAUSTED) {
+    //       S2_LOG(ERROR) << error;  // Memory limit exceeded
+    //     }
+    //   }
+    //
+    // CAVEATS:
+    //
+    //  - Memory used by the input S2ShapeIndexes and the output S2Builder
+    //    layers is not counted towards the total.
+    //
+    //  - While memory tracking is reasonably complete and accurate, it does
+    //    not account for every last byte.  It is intended only for the
+    //    purpose of preventing clients from running out of memory.
+    //
+    // DEFAULT: nullptr (memory tracking disabled)
+    S2MemoryTracker* memory_tracker() const;
+    void set_memory_tracker(S2MemoryTracker* tracker);
+
     // Options may be assigned and copied.
     Options(const Options& options);
     Options& operator=(const Options& options);
@@ -376,11 +460,18 @@ class S2BooleanOperation {
     PolygonModel polygon_model_ = PolygonModel::SEMI_OPEN;
     PolylineModel polyline_model_ = PolylineModel::CLOSED;
     bool polyline_loops_have_boundaries_ = true;
+    bool split_all_crossing_polyline_edges_ = false;
     Precision precision_ = Precision::EXACT;
     bool conservative_output_ = false;
     ValueLexicon<SourceId>* source_id_lexicon_ = nullptr;
+    S2MemoryTracker* memory_tracker_ = nullptr;
   };
 
+#ifndef SWIG
+  // Specifies that the output boundary edges should be sent to a single
+  // S2Builder layer.  This version can be used when the dimension of the
+  // output geometry is known (e.g., intersecting two polygons to yield a
+  // third polygon).
   S2BooleanOperation(OpType op_type,
                      std::unique_ptr<S2Builder::Layer> layer,
                      const Options& options = Options());
@@ -406,8 +497,10 @@ class S2BooleanOperation {
   S2BooleanOperation(OpType op_type,
                      std::vector<std::unique_ptr<S2Builder::Layer>> layers,
                      const Options& options = Options());
+#endif
 
   OpType op_type() const { return op_type_; }
+  const Options& options() const { return options_; }
 
   // Executes the given operation.  Returns true on success, and otherwise
   // sets "error" appropriately.  (This class does not generate any errors
@@ -454,8 +547,8 @@ class S2BooleanOperation {
   S2BooleanOperation(OpType op_type, bool* result_empty,
                      const Options& options = Options());
 
-  OpType op_type_;
   Options options_;
+  OpType op_type_;
 
   // The input regions.
   const S2ShapeIndex* regions_[2];
diff --git a/src/s2/s2buffer_operation.cc b/src/s2/s2buffer_operation.cc
new file mode 100644
index 00000000..b4d50a09
--- /dev/null
+++ b/src/s2/s2buffer_operation.cc
@@ -0,0 +1,770 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+//
+// The algorithm below essentially computes the offset curve of the original
+// boundary, and uses this curve to divide the sphere into regions of constant
+// winding number.  Since winding numbers on the sphere are relative rather
+// than absolute (see s2winding_operation.h), we also need to keep track of
+// the desired winding number at a fixed reference point.  The initial winding
+// number for this point is the number of input shapes that contain it.  We
+// then update it during the buffering process by imagining a "sweep edge"
+// that extends from the current point A on the input boundary to the
+// corresponding point B on the offset curve.  As we process an input loop and
+// generate the corresponding offset curve, the sweep edge moves continuously
+// and covers the entire buffer region (i.e., the region added to or
+// subtracted from the input geometry).  We increase the winding number of the
+// reference point by one whenever it crosses the sweep edge from left to
+// right, and we decrease the winding number by one whenever it crosses the
+// sweep edge from right to left.
+//
+// Concave vertices require special handling, because the corresponding offset
+// curve can leave behind regions whose winding number is zero or negative.
+// We handle this by splicing the concave vertex into the offset curve itself;
+// this effectively terminates the current buffer region and starts a new one,
+// such that the region of overlap is counted twice (i.e., its winding number
+// increases by two).  The end result is the same as though we had computed
+// the union of a sequence of buffered convex boundary segments.  This trick
+// is described in the following paper: "Polygon Offsetting by Computing
+// Winding Numbers" (Chen and McMains, Proceedings of IDETC/CIE 2005).
+//
+// TODO(ericv): The algorithm below is much faster than, say, computing the
+// union of many buffered edges.  However further improvements are possible.
+// In particular, there is an unimplemented optimization that would make it
+// much faster to buffer concave boundaries when the buffer radius is large.
+
+#include "s2/s2buffer_operation.h"
+
+#include <algorithm>
+#include <cmath>
+#include <iterator>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "absl/types/span.h"
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_layer.h"
+#include "s2/s2builderutil_snap_functions.h"
+#include "s2/s2contains_point_query.h"
+#include "s2/s2edge_crosser.h"
+#include "s2/s2edge_crossings.h"
+#include "s2/s2edge_distances.h"
+#include "s2/s2error.h"
+#include "s2/s2lax_loop_shape.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2point.h"
+#include "s2/s2point_span.h"
+#include "s2/s2pointutil.h"
+#include "s2/s2predicates.h"
+#include "s2/s2predicates_internal.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+#include "s2/s2shape_measures.h"
+#include "s2/s2shapeutil_contains_brute_force.h"
+#include "s2/s2winding_operation.h"
+#include "s2/util/math/mathutil.h"
+
+using s2pred::DBL_ERR;
+using std::ceil;
+using std::make_unique;
+using std::max;
+using std::min;
+using std::unique_ptr;
+using std::vector;
+
+// The errors due to buffering can be categorized as follows:
+//
+//  1. Requested error.  This represents the error due to approximating the
+//     buffered boundary as a sequence of line segments rather than a sequence
+//     of circular arcs.  It is largely controlled by options.error_fraction(),
+//     and can be bounded as
+//
+//       max(kMinRequestedError, error_fraction * buffer_radius)
+//
+//     where kMinRequestedError reflects the fact that S2Points do not have
+//     infinite precision.  (For example, it makes no sense to attempt to
+//     buffer geometry by 1e-100 radians because the spacing between
+//     representable S2Points is only about 2e-16 radians in general.)
+//
+//  2. Relative interpolation errors.  These are numerical errors whose
+//     magnitude is proportional to the buffer radius.  For such errors the
+//     worst-case coefficient of proportionality turns out to be so tiny
+//     compared to the smallest allowable error fraction (kMinErrorFraction)
+//     that we simply ignore such errors.
+//
+//  3. Absolute interpolation errors.  These are numerical errors that are not
+//     proportional to the buffer radius.  The main sources of such errors are
+//     (1) calling S2::RobustCrossProd() to compute edge normals, and (2) calls
+//     to S2::GetPointOnRay() to interpolate points along the buffered
+//     boundary.  It is possible to show that this error is at most
+//     kMaxAbsoluteInterpolationError as defined below.
+//
+// Putting these together, the final error bound looks like this:
+//
+//   max_error = kMaxAbsoluteInterpolationError +
+//               max(kMinRequestedError,
+//                   max(kMinErrorFraction, options.error_fraction()) *
+//                   options.buffer_radius())
+
+// The maximum angular spacing between representable S2Points on the unit
+// sphere is roughly 2 * DBL_ERR.  We require the requested absolute error to
+// be at least this large because attempting to achieve a smaller error does
+// not increase the precision of the result and can increase the running time
+// and space requirements considerably.
+static constexpr S1Angle kMinRequestedError = S1Angle::Radians(2 * DBL_ERR);
+
+// The maximum absolute error due to interpolating points on the buffered
+// boundary.  The following constant bounds the maximum additional error
+// perpendicular to the buffered boundary due to all steps of the calculation
+// (S2::RobustCrossProd, the two calls to GetPointOnRay, etc).
+//
+// This distance represents about 10 nanometers on the Earth's surface.  Note
+// that this is a conservative upper bound and that it is difficult to
+// construct inputs where the error is anywhere close to this large.
+static constexpr S1Angle kMaxAbsoluteInterpolationError =
+    S2::kGetPointOnLineError + S2::kGetPointOnRayPerpendicularError;
+
+// TODO(user, b/210097200): Remove when we require c++17 for opensource.
+constexpr double S2BufferOperation::Options::kMinErrorFraction;
+constexpr double S2BufferOperation::Options::kMaxCircleSegments;
+
+S2BufferOperation::Options::Options()
+    : snap_function_(
+          make_unique<s2builderutil::IdentitySnapFunction>(S1Angle::Zero())) {
+}
+
+S2BufferOperation::Options::Options(S1Angle buffer_radius) : Options() {
+  buffer_radius_ = buffer_radius;
+}
+
+S2BufferOperation::Options::Options(const Options& options)
+    : buffer_radius_(options.buffer_radius_),
+      error_fraction_(options.error_fraction_),
+      end_cap_style_(options.end_cap_style_),
+      polyline_side_(options.polyline_side_),
+      snap_function_(options.snap_function_->Clone()),
+      memory_tracker_(options.memory_tracker_) {
+}
+
+S2BufferOperation::Options& S2BufferOperation::Options::operator=(
+    const Options& options) {
+  buffer_radius_ = options.buffer_radius_;
+  error_fraction_ = options.error_fraction_;
+  end_cap_style_ = options.end_cap_style_;
+  polyline_side_ = options.polyline_side_;
+  snap_function_ = options.snap_function_->Clone();
+  memory_tracker_ = options.memory_tracker_;
+  return *this;
+}
+
+S1Angle S2BufferOperation::Options::buffer_radius() const {
+  return buffer_radius_;
+}
+
+void S2BufferOperation::Options::set_buffer_radius(S1Angle buffer_radius) {
+  buffer_radius_ = buffer_radius;
+}
+
+double S2BufferOperation::Options::error_fraction() const {
+  return error_fraction_;
+}
+
+void S2BufferOperation::Options::set_error_fraction(double error_fraction) {
+  S2_DCHECK_GE(error_fraction, kMinErrorFraction);
+  S2_DCHECK_LE(error_fraction, 1.0);
+  error_fraction_ = max(kMinErrorFraction, min(1.0, error_fraction));
+}
+
+const S1Angle S2BufferOperation::Options::max_error() const {
+  // See comments for kMinRequestedError above.
+  S2Builder::Options builder_options(*snap_function_);
+  builder_options.set_split_crossing_edges(true);
+  return max(kMinRequestedError, error_fraction_ * abs(buffer_radius_))
+      + kMaxAbsoluteInterpolationError + builder_options.max_edge_deviation();
+}
+
+double S2BufferOperation::Options::circle_segments() const {
+#if 0
+  // This formula assumes that vertices can be placed anywhere.  TODO(ericv).
+  return M_PI / acos((1 - error_fraction_) / (1 + error_fraction_));
+#else
+  // This formula assumes that all vertices are placed on the midline.
+  return M_PI / acos(1 - error_fraction_);
+#endif
+}
+
+void S2BufferOperation::Options::set_circle_segments(double circle_segments) {
+  S2_DCHECK_GE(circle_segments, 2.0);
+  S2_DCHECK_LE(circle_segments, kMaxCircleSegments);
+  circle_segments = max(2.0, min(kMaxCircleSegments, circle_segments));
+
+  // We convert circle_segments to error_fraction using planar geometry,
+  // because the number of segments required to approximate a circle on the
+  // sphere to within a given tolerance is not constant.  Unlike in the plane,
+  // the total curvature of a circle on the sphere decreases as the area
+  // enclosed by the circle increases; great circles have no curvature at all.
+  // We round up when converting to ensure that we won't generate any tiny
+  // extra edges.
+  //
+#if 0
+  // Note that we take advantage of both positive and negative errors when
+  // approximating circles (i.e., vertices are not necessarily on the midline)
+  // and thus the relationships between circle_segments and error_fraction are
+  //        e = (1 - cos(Pi/n)) / (1 + cos(Pi/n))
+  //        n = Pi / acos((1 - e) / (1 + e))
+  double r = cos(M_PI / circle_segments);
+  set_error_fraction((1 - r) / (1 + r) + 1e-15);
+#else
+  // When all vertices are on the midline, the relationships are
+  //        e = 1 - cos(Pi/n)
+  //        n = Pi / acos(1 - e)
+  set_error_fraction(1 - cos(M_PI / circle_segments) + 1e-15);
+#endif
+}
+
+S2BufferOperation::EndCapStyle S2BufferOperation::Options::end_cap_style()
+    const {
+  return end_cap_style_;
+}
+
+void S2BufferOperation::Options::set_end_cap_style(EndCapStyle end_cap_style) {
+  end_cap_style_ = end_cap_style;
+}
+
+S2BufferOperation::PolylineSide S2BufferOperation::Options::polyline_side()
+    const {
+  return polyline_side_;
+}
+
+void S2BufferOperation::Options::set_polyline_side(
+    PolylineSide polyline_side) {
+  polyline_side_ = polyline_side;
+}
+
+const S2Builder::SnapFunction& S2BufferOperation::Options::snap_function()
+    const {
+  return *snap_function_;
+}
+
+void S2BufferOperation::Options::set_snap_function(
+    const S2Builder::SnapFunction& snap_function) {
+  snap_function_ = snap_function.Clone();
+}
+
+S2MemoryTracker* S2BufferOperation::Options::memory_tracker() const {
+  return memory_tracker_;
+}
+
+void S2BufferOperation::Options::set_memory_tracker(S2MemoryTracker* tracker) {
+  memory_tracker_ = tracker;
+}
+
+S2BufferOperation::S2BufferOperation() = default;
+
+S2BufferOperation::S2BufferOperation(unique_ptr<S2Builder::Layer> result_layer,
+                                     const Options& options) {
+  Init(std::move(result_layer), options);
+}
+
+void S2BufferOperation::Init(unique_ptr<S2Builder::Layer> result_layer,
+                             const Options& options) {
+  options_ = options;
+  ref_point_ = S2::Origin();
+  ref_winding_ = 0;
+  have_input_start_ = false;
+  have_offset_start_ = false;
+  buffer_sign_ = sgn(options_.buffer_radius().radians());
+  S1Angle abs_radius = abs(options_.buffer_radius());
+  S1Angle requested_error = max(kMinRequestedError,
+                                options_.error_fraction() * abs_radius);
+  S1Angle max_error = kMaxAbsoluteInterpolationError + requested_error;
+  if (abs_radius <= max_error) {
+    // If the requested radius is smaller than the maximum error, buffering
+    // could yield points on the wrong side of the original input boundary
+    // (e.g., shrinking geometry slightly rather than expanding it).  Rather
+    // than taking that risk, we set the buffer radius to zero when this
+    // happens (which causes the original geometry to be returned).
+    abs_radius_ = S1ChordAngle::Zero();
+    buffer_sign_ = 0;
+  } else if (abs_radius + max_error >= S1Angle::Radians(M_PI)) {
+    // If the permissible range of buffer angles includes Pi then we might
+    // as well take advantage of that.
+    abs_radius_ = S1ChordAngle::Straight();
+  } else {
+    abs_radius_ = S1ChordAngle(abs_radius);
+    S1Angle vertex_step = GetMaxEdgeSpan(abs_radius, requested_error);
+    vertex_step_ = S1ChordAngle(vertex_step);
+
+    // We take extra care to ensure that points are buffered as regular
+    // polygons.  The step angle is adjusted up slightly to ensure that we
+    // don't wind up with a tiny extra edge.
+    point_step_ = S1ChordAngle::Radians(
+        2 * M_PI / ceil(2 * M_PI / vertex_step.radians()) + 1e-15);
+
+    // Edges are buffered only if the buffer radius (including permissible
+    // error) is less than 90 degrees.
+    S1Angle edge_radius = S1Angle::Radians(M_PI_2) - abs_radius;
+    if (edge_radius > max_error) {
+      edge_step_ = S1ChordAngle(GetMaxEdgeSpan(edge_radius, requested_error));
+    }
+  }
+
+  // The buffered output should include degeneracies (i.e., isolated points
+  // and/or sibling edge pairs) only if (1) the user specified a non-negative
+  // buffer radius, and (2) the adjusted buffer radius is zero.  The only
+  // purpose of keeping degeneracies is to allow points/polylines in the input
+  // geometry to be converted back to points/polylines in the output if the
+  // client so desires.
+  S2WindingOperation::Options winding_options{options.snap_function()};
+  winding_options.set_include_degeneracies(
+      buffer_sign_ == 0 && options_.buffer_radius() >= S1Angle::Zero());
+  winding_options.set_memory_tracker(options.memory_tracker());
+  op_.Init(std::move(result_layer), winding_options);
+  tracker_.Init(options.memory_tracker());
+}
+
+const S2BufferOperation::Options& S2BufferOperation::options() const {
+  return options_;
+}
+
+S1Angle S2BufferOperation::GetMaxEdgeSpan(S1Angle radius,
+                                          S1Angle requested_error) const {
+  // If the allowable radius range spans Pi/2 then we can use edges as long as
+  // we like, however we always use at least 3 edges to approximate a circle.
+  S1Angle step = S1Angle::Radians(2 * M_PI / 3 + 1e-15);
+  S1Angle min_radius = radius - requested_error;
+  S2_DCHECK_GE(min_radius, S1Angle::Zero());
+  if (radius.radians() < M_PI_2) {
+    step = min(step, S1Angle::Radians(2 * acos(tan(min_radius) / tan(radius))));
+  } else if (min_radius.radians() > M_PI_2) {
+    step = min(step, S1Angle::Radians(2 * acos(tan(radius) / tan(min_radius))));
+  }
+  return step;
+}
+
+// The sweep edge AB (see introduction) consists of one point on the input
+// boundary (A) and one point on the offset curve (B).  This function advances
+// the sweep edge by moving its first vertex A to "new_a" and updating the
+// winding number of the reference point if necessary.
+void S2BufferOperation::SetInputVertex(const S2Point& new_a) {
+  if (have_input_start_) {
+    S2_DCHECK(have_offset_start_);
+    UpdateRefWinding(sweep_a_, sweep_b_, new_a);
+  } else {
+    input_start_ = new_a;
+    have_input_start_ = true;
+  }
+  sweep_a_ = new_a;
+}
+
+// Adds the point "new_b" to the offset path.  Also advances the sweep edge AB
+// by moving its second vertex B to "new_b" and updating the winding number of
+// the reference point if necessary (see introduction).
+void S2BufferOperation::AddOffsetVertex(const S2Point& new_b) {
+  if (!tracker_.AddSpace(&path_, 1)) return;
+  path_.push_back(new_b);
+  if (have_offset_start_) {
+    S2_DCHECK(have_input_start_);
+    UpdateRefWinding(sweep_a_, sweep_b_, new_b);
+  } else {
+    offset_start_ = new_b;
+    have_offset_start_ = true;
+  }
+  sweep_b_ = new_b;
+}
+
+// Finishes buffering the current loop by advancing the sweep edge back to its
+// starting location, updating the winding number of the reference point if
+// necessary.
+void S2BufferOperation::CloseBufferRegion() {
+  if (have_offset_start_ && have_input_start_) {
+    UpdateRefWinding(sweep_a_, sweep_b_, input_start_);
+    UpdateRefWinding(input_start_, sweep_b_, offset_start_);
+  }
+}
+
+// Outputs the current buffered path (which is assumed to be a loop), and
+// resets the state to prepare for buffering a new loop.
+void S2BufferOperation::OutputPath() {
+  op_.AddLoop(path_);
+  path_.clear();  // Does not change capacity.
+  have_input_start_ = false;
+  have_offset_start_ = false;
+}
+
+// Given a triangle ABC that has just been covered by the sweep edge AB,
+// updates the winding number of the reference point if necessary.
+void S2BufferOperation::UpdateRefWinding(
+    const S2Point& a, const S2Point& b, const S2Point& c) {
+  // TODO(ericv): This code could be made much faster by maintaining a
+  // bounding plane that separates the current sweep edge from the reference
+  // point.  Whenever the sweep_a_ or sweep_b_ is updated we would just need
+  // to check that the new vertex is still on the opposite side of the
+  // bounding plane (i.e., one dot product).  If not, we test the current
+  // triangle using the code below and then compute a new bounding plane.
+  //
+  // Another optimization would be to choose the reference point to be 90
+  // degrees away from the first input vertex, since then triangle tests would
+  // not be needed unless the input geometry spans more than 90 degrees.  This
+  // would involve adding a new flag have_ref_point_ rather than always
+  // choosing the reference point to be S2::Origin().
+  //
+  // According to profiling these optimizations are not currently worthwhile,
+  // but this is worth revisiting if and when other improvements are made.
+  int sign = s2pred::Sign(a, b, c);
+  if (sign == 0) return;
+  bool inside = S2::AngleContainsVertex(a, b, c) == (sign > 0);
+  S2EdgeCrosser crosser(&b, &ref_point_);
+  inside ^= crosser.EdgeOrVertexCrossing(&a, &b);
+  inside ^= crosser.EdgeOrVertexCrossing(&b, &c);
+  inside ^= crosser.EdgeOrVertexCrossing(&c, &a);
+  if (inside) ref_winding_ += sign;
+}
+
+// Ensures that the output will be the full polygon.
+void S2BufferOperation::AddFullPolygon() {
+  ref_winding_ += 1;
+}
+
+void S2BufferOperation::AddPoint(const S2Point& point) {
+  // If buffer_radius < 0, points are discarded.
+  if (buffer_sign_ < 0) return;
+
+  // Buffering by 180 degrees or more always yields the full polygon.
+  // (We don't need to worry about buffering by 180 degrees yielding
+  // a degenerate hole because error_fraction_ is always positive.
+  if (abs_radius_ >= S1ChordAngle::Straight()) {
+    return AddFullPolygon();
+  }
+
+  // If buffer_radius == 0, points are converted into degenerate loops.
+  if (buffer_sign_ == 0) {
+    if (!tracker_.AddSpace(&path_, 1)) return;
+    path_.push_back(point);
+  } else {
+    // Since S1ChordAngle can only represent angles between 0 and 180 degrees,
+    // we generate the circle in four 90 degree increments.
+    SetInputVertex(point);
+    S2Point start = S2::Ortho(point);
+    S1ChordAngle angle = S1ChordAngle::Zero();
+    for (int quadrant = 0; quadrant < 4; ++quadrant) {
+      // Generate 90 degrees of the circular arc.  Normalize "rotate_dir" at
+      // each iteration to avoid magnifying normalization errors in "point".
+      S2Point rotate_dir = point.CrossProd(start).Normalize();
+      for (; angle < S1ChordAngle::Right(); angle += point_step_) {
+        S2Point dir = S2::GetPointOnRay(start, rotate_dir, angle);
+        AddOffsetVertex(S2::GetPointOnRay(point, dir, abs_radius_));
+      }
+      angle -= S1ChordAngle::Right();
+      start = rotate_dir;
+    }
+    CloseBufferRegion();
+  }
+  OutputPath();
+}
+
+// Returns the edge normal for the given edge AB.  The sign is chosen such
+// that the normal is on the right of AB if buffer_sign_ > 0, and on the left
+// of AB if buffer_sign_ < 0.
+inline S2Point S2BufferOperation::GetEdgeAxis(const S2Point& a,
+                                              const S2Point& b) const {
+  S2_DCHECK_NE(buffer_sign_, 0);
+  return buffer_sign_ * S2::RobustCrossProd(b, a).Normalize();
+}
+
+// Adds a semi-open offset arc around vertex V.  The arc proceeds CCW from
+// "start" to "end" (both of which must be perpendicular to V).
+void S2BufferOperation::AddVertexArc(const S2Point& v, const S2Point& start,
+                                     const S2Point& end) {
+  // Make sure that we output at least one point even when span == 0.
+  S2Point rotate_dir = buffer_sign_ * v.CrossProd(start).Normalize();
+  S1ChordAngle angle, span(start, end);
+  do {
+    S2Point dir = S2::GetPointOnRay(start, rotate_dir, angle);
+    AddOffsetVertex(S2::GetPointOnRay(v, dir, abs_radius_));
+  } while ((angle += vertex_step_) < span);
+}
+
+// Closes the semi-open arc generated by AddVertexArc().
+void S2BufferOperation::CloseVertexArc(const S2Point& v, const S2Point& end) {
+  AddOffsetVertex(S2::GetPointOnRay(v, end, abs_radius_));
+}
+
+// Adds a semi-open offset arc for the given edge AB.
+void S2BufferOperation::AddEdgeArc(const S2Point& a, const S2Point& b) {
+  S2Point ab_axis = GetEdgeAxis(a, b);
+  if (edge_step_ == S1ChordAngle::Zero()) {
+    // If the buffer radius is more than 90 degrees, edges do not contribute to
+    // the buffered boundary.  Instead we force the offset path to pass
+    // through a vertex located at the edge normal.  This is similar to the
+    // case of concave vertices (below) where it is necessary to route the
+    // offset path through the concave vertex to ensure that the winding
+    // numbers in all output regions have the correct sign.
+    AddOffsetVertex(ab_axis);
+  } else {
+    // Make sure that we output at least one point even when span == 0.
+    S2Point rotate_dir = buffer_sign_ * a.CrossProd(ab_axis).Normalize();
+    S1ChordAngle angle, span(a, b);
+    do {
+      S2Point p = S2::GetPointOnRay(a, rotate_dir, angle);
+      AddOffsetVertex(S2::GetPointOnRay(p, ab_axis, abs_radius_));
+    } while ((angle += edge_step_) < span);
+  }
+  SetInputVertex(b);
+}
+
+// Closes the semi-open arc generated by AddEdgeArc().
+void S2BufferOperation::CloseEdgeArc(const S2Point& a, const S2Point& b) {
+  if (edge_step_ != S1ChordAngle::Zero()) {
+    AddOffsetVertex(S2::GetPointOnRay(b, GetEdgeAxis(a, b), abs_radius_));
+  }
+}
+
+// Buffers the edge AB and the vertex B.  (The vertex C is used to determine
+// the range of angles that should be buffered at B.)
+//
+// TODO(ericv): Let A* denote the possible offset points of A with respect to
+// the edge AB for buffer radii in the range specified by "radius" and
+// "error_fraction".  Rather than requiring that the path so far terminates at
+// a point in A*, as you might expect, instead we only require that the path
+// terminates at a point X such that for any point Y in A*, the edge XY does
+// not leave the valid buffer zone of the previous edge and vertex.
+void S2BufferOperation::BufferEdgeAndVertex(const S2Point& a, const S2Point& b,
+                                            const S2Point& c) {
+  S2_DCHECK_NE(a, b);
+  S2_DCHECK_NE(b, c);
+  S2_DCHECK_NE(buffer_sign_, 0);
+  if (!tracker_.ok()) return;
+
+  // For left (convex) turns we need to add an offset arc.  For right
+  // (concave) turns we connect the end of the current offset path to the
+  // vertex itself and then to the start of the offset path for the next edge.
+  // Note that A == C is considered to represent a convex (left) turn.
+  AddEdgeArc(a, b);
+  if (buffer_sign_ * s2pred::Sign(a, b, c) >= 0) {
+    // The boundary makes a convex turn.  If there is no following edge arc
+    // then we need to generate a closed vertex arc.
+    S2Point start = GetEdgeAxis(a, b);
+    S2Point end = GetEdgeAxis(b, c);
+    AddVertexArc(b, start, end);
+    if (edge_step_ == S1ChordAngle::Zero()) CloseVertexArc(b, end);
+  } else {
+    // The boundary makes a concave turn.  It is tempting to simply connect
+    // the end of the current offset path to the start of the offset path for
+    // the next edge, however this can create output regions where the winding
+    // number is incorrect.  A solution that always works is to terminate the
+    // current offset path and start a new one by connecting the two offset
+    // paths through the input vertex whenever it is concave.  We first need
+    // to close the previous semi-open edge arc if necessary.
+    CloseEdgeArc(a, b);
+    AddOffsetVertex(b);  // Connect through the input vertex.
+  }
+}
+
+// Given a polyline that starts with the edge AB, adds an end cap (as
+// specified by end_cap_style() and polyline_side()) for the vertex A.
+void S2BufferOperation::AddStartCap(const S2Point& a, const S2Point& b) {
+  S2Point axis = GetEdgeAxis(a, b);
+  if (options_.end_cap_style() == EndCapStyle::FLAT) {
+    // One-sided flat end caps require no additional vertices since the
+    // "offset curve" for the opposite side is simply the reversed polyline.
+    if (options_.polyline_side() == PolylineSide::BOTH) {
+      AddOffsetVertex(S2::GetPointOnRay(a, -axis, abs_radius_));
+    }
+  } else {
+    S2_DCHECK(options_.end_cap_style() == EndCapStyle::ROUND);
+    if (options_.polyline_side() == PolylineSide::BOTH) {
+      // The end cap consists of a semicircle.
+      AddVertexArc(a, -axis, axis);
+    } else {
+      // The end cap consists of a quarter circle.  Note that for
+      // PolylineSide::LEFT, the polyline direction has been reversed.
+      AddVertexArc(a, axis.CrossProd(a).Normalize(), axis);
+    }
+  }
+}
+
+// Given a polyline that ends with the edge AB, adds an end cap (as specified
+// by end_cap_style() and polyline_side()) for the vertex B.
+void S2BufferOperation::AddEndCap(const S2Point& a, const S2Point& b) {
+  S2Point axis = GetEdgeAxis(a, b);
+  if (options_.end_cap_style() == EndCapStyle::FLAT) {
+    CloseEdgeArc(a, b);  // Close the previous semi-open edge arc if necessary.
+  } else {
+    S2_DCHECK(options_.end_cap_style() == EndCapStyle::ROUND);
+    if (options_.polyline_side() == PolylineSide::BOTH) {
+      // The end cap consists of a semicircle.
+      AddVertexArc(b, axis, -axis);
+    } else {
+      // The end cap consists of a quarter circle.  We close the arc since it
+      // will be followed by the reversed polyline vertices.  Note that for
+      // PolylineSide::LEFT, the polyline direction has been reversed.
+      S2Point end = b.CrossProd(axis).Normalize();
+      AddVertexArc(b, axis, end);
+      CloseVertexArc(b, end);
+    }
+  }
+}
+
+// Helper function that buffers the given loop.
+void S2BufferOperation::BufferLoop(S2PointLoopSpan loop) {
+  // Empty loops always yield an empty path.
+  if (loop.empty() || !tracker_.ok()) return;
+
+  // Loops with one degenerate edge are treated as points.
+  if (loop.size() == 1) return AddPoint(loop[0]);
+
+  // Buffering by 180 degrees or more always yields the full polygon.
+  // Buffering by -180 degrees or more always yields the empty polygon.
+  if (abs_radius_ >= S1ChordAngle::Straight()) {
+    if (buffer_sign_ > 0) AddFullPolygon();
+    return;
+  }
+
+  // If buffer_radius == 0, the loop is passed through unchanged.
+  if (buffer_sign_ == 0) {
+    if (!tracker_.AddSpace(&path_, loop.size())) return;
+    path_.assign(loop.begin(), loop.end());
+  } else {
+    SetInputVertex(loop[0]);
+    for (size_t i = 0; i < loop.size(); ++i) {
+      BufferEdgeAndVertex(loop[i], loop[i + 1], loop[i + 2]);
+    }
+    CloseBufferRegion();
+  }
+  OutputPath();
+}
+
+void S2BufferOperation::AddPolyline(S2PointSpan polyline) {
+  // Left-sided buffering is supported by reversing the polyline and then
+  // buffering on the right.
+  vector<S2Point> reversed;
+  if (options_.polyline_side() == PolylineSide::LEFT) {
+    reversed.reserve(polyline.size());
+    std::reverse_copy(polyline.begin(), polyline.end(),
+                      std::back_inserter(reversed));
+    polyline = reversed;
+  }
+
+  // If buffer_radius < 0, polylines are discarded.
+  if (buffer_sign_ < 0 || !tracker_.ok()) return;
+
+  // Polylines with 0 or 1 vertices are defined to have no edges.
+  int n = polyline.size();
+  if (n <= 1) return;
+
+  // Polylines with one degenerate edge are treated as points.
+  if (n == 2 && polyline[0] == polyline[1]) {
+    return AddPoint(polyline[0]);
+  }
+
+  // Buffering by 180 degrees or more always yields the full polygon.
+  if (abs_radius_ >= S1ChordAngle::Straight()) {
+    return AddFullPolygon();
+  }
+
+  // If buffer_radius == 0, polylines are converted into degenerate loops.
+  if (buffer_sign_ == 0) {
+    if (!tracker_.AddSpace(&path_, 2 * (n - 1))) return;
+    path_.assign(polyline.begin(), polyline.end() - 1);
+    path_.insert(path_.end(), polyline.rbegin(), polyline.rend() - 1);
+  } else {
+    // Otherwise we buffer each side of the polyline separately.
+    SetInputVertex(polyline[0]);
+    AddStartCap(polyline[0], polyline[1]);
+    for (int i = 0; i < n - 2; ++i) {
+      BufferEdgeAndVertex(polyline[i], polyline[i + 1], polyline[i + 2]);
+    }
+    AddEdgeArc(polyline[n - 2], polyline[n - 1]);
+    AddEndCap(polyline[n - 2], polyline[n - 1]);
+
+    if (options_.polyline_side() == PolylineSide::BOTH) {
+      for (int i = n - 3; i >= 0; --i) {
+        BufferEdgeAndVertex(polyline[i + 2], polyline[i + 1], polyline[i]);
+      }
+      AddEdgeArc(polyline[1], polyline[0]);
+      CloseBufferRegion();
+    } else {
+      // The other side of the polyline is not buffered.  Note that for
+      // PolylineSide::LEFT, the polyline direction has been reversed.
+      if (!tracker_.AddSpace(&path_, n)) return;
+      path_.insert(path_.end(), polyline.rbegin(), polyline.rend());
+      // Don't call CloseBufferRegion() since the path has already been closed.
+    }
+  }
+  OutputPath();
+}
+
+void S2BufferOperation::AddLoop(S2PointLoopSpan loop) {
+  if (loop.empty()) return;
+  BufferLoop(loop);
+
+  // The vertex copying below could be avoided by adding a version of
+  // S2LaxLoopShape that doesn't own its vertices.
+  if (!tracker_.ok()) return;
+  ref_winding_ += s2shapeutil::ContainsBruteForce(S2LaxLoopShape(loop),
+                                                  ref_point_);
+  num_polygon_layers_ += 1;
+}
+
+void S2BufferOperation::BufferShape(const S2Shape& shape) {
+  int dimension = shape.dimension();
+  int num_chains = shape.num_chains();
+  for (int c = 0; c < num_chains; ++c) {
+    S2Shape::Chain chain = shape.chain(c);
+    if (chain.length == 0) continue;
+    if (dimension == 0) {
+      AddPoint(shape.edge(c).v0);
+    } else {
+      S2::GetChainVertices(shape, c, &tmp_vertices_);
+      if (dimension == 1) {
+        AddPolyline(S2PointSpan(tmp_vertices_));
+      } else {
+        BufferLoop(S2PointLoopSpan(tmp_vertices_));
+      }
+    }
+  }
+}
+
+void S2BufferOperation::AddShape(const S2Shape& shape) {
+  BufferShape(shape);
+  ref_winding_ += s2shapeutil::ContainsBruteForce(shape, ref_point_);
+  num_polygon_layers_ += (shape.dimension() == 2);
+}
+
+void S2BufferOperation::AddShapeIndex(const S2ShapeIndex& index) {
+  int max_dimension = -1;
+  for (const S2Shape* shape : index) {
+    if (shape == nullptr) continue;
+    max_dimension = max(max_dimension, shape->dimension());
+    BufferShape(*shape);
+  }
+  ref_winding_ += MakeS2ContainsPointQuery(&index).Contains(ref_point_);
+  num_polygon_layers_ += (max_dimension == 2);
+}
+
+bool S2BufferOperation::Build(S2Error* error) {
+  if (buffer_sign_ < 0 && num_polygon_layers_ > 1) {
+    error->Init(S2Error::FAILED_PRECONDITION,
+                "Negative buffer radius requires at most one polygon layer");
+    return false;
+  }
+  return op_.Build(ref_point_, ref_winding_,
+                   S2WindingOperation::WindingRule::POSITIVE, error);
+}
diff --git a/src/s2/s2buffer_operation.h b/src/s2/s2buffer_operation.h
new file mode 100644
index 00000000..3ba4a3ff
--- /dev/null
+++ b/src/s2/s2buffer_operation.h
@@ -0,0 +1,359 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#ifndef S2_S2BUFFER_OPERATION_H_
+#define S2_S2BUFFER_OPERATION_H_
+
+#include <memory>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2builder.h"
+#include "s2/s2error.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2point.h"
+#include "s2/s2point_span.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+#include "s2/s2winding_operation.h"
+
+// This class provides a way to expand an arbitrary collection of geometry by
+// a fixed radius (an operation variously known as "buffering", "offsetting",
+// or "Minkowski sum with a disc").  The output consists of a polygon
+// (possibly with multiple shells) that contains all points within the given
+// radius of the original geometry.
+//
+// The radius can also be negative, in which case the geometry is contracted.
+// This causes the boundaries of polygons to shrink or disappear, and removes
+// all points and polylines.
+//
+// The input consists of a sequence of layers.  Each layer may consist of any
+// combination of points, polylines, and polygons, with the restriction that
+// polygon interiors within each layer may not intersect any other geometry
+// (including other polygon interiors).  The output is the union of the
+// buffered input layers.  Note that only a single layer is allowed if the
+// buffer radius is negative.
+//
+// This class may be used to compute polygon unions by setting the buffer
+// radius to zero.  The union is computed using a single snapping operation.
+//
+// Note that if you only want to compute an S2CellId covering of the buffered
+// geometry, it is much faster to use S2ShapeIndexBufferedRegion instead.
+//
+// Keywords: buffer, buffering, expand, expanding, offset, offsetting,
+//           widen, contract, shrink, Minkowski sum
+class S2BufferOperation {
+ public:
+  // For polylines, specifies whether the end caps should be round or flat.
+  // See Options::set_end_cap_style() below.
+  enum class EndCapStyle : uint8 { ROUND, FLAT };
+
+  // Specifies whether polylines should be buffered only on the left, only on
+  // the right, or on both sides.
+  enum class PolylineSide : uint8 { LEFT, RIGHT, BOTH };
+
+  class Options {
+   public:
+    Options();
+
+    // Convenience constructor that calls set_buffer_radius().
+    explicit Options(S1Angle buffer_radius);
+
+    // If positive, specifies that all points within the given radius of the
+    // input geometry should be added to the output.  If negative, specifies
+    // that all points within the given radius of complement of the input
+    // geometry should be subtracted from the output.  If the buffer radius
+    // is zero then the input geometry is passed through to the output layer
+    // after first converting points and polylines into degenerate loops.
+    //
+    // DEFAULT: S1Angle::Zero()
+    S1Angle buffer_radius() const;
+    void set_buffer_radius(S1Angle buffer_radius);
+
+    // Specifies the allowable error when buffering, expressed as a fraction
+    // of buffer_radius().  The actual buffer distance will be in the range
+    // [(1-f) * r - C, (1 + f) * r + C] where "f" is the error fraction, "r"
+    // is the buffer radius, and "C" is S2BufferOperation::kAbsError.
+    //
+    // Be aware that the number of output edges increases proportionally to
+    // (1 / sqrt(error_fraction)), so setting a small value may increase the
+    // size of the output considerably.
+    //
+    // REQUIRES: error_fraction() >= kMinErrorFraction
+    // REQUIRES: error_fraction() <= 1.0
+    //
+    // DEFAULT: 0.01  (i.e., maximum error of 1%)
+    static constexpr double kMinErrorFraction = 1e-6;
+    double error_fraction() const;
+    void set_error_fraction(double error_fraction);
+
+    // Returns the maximum error in the buffered result for the current
+    // buffer_radius(), error_fraction(), and snap_function().  Note that the
+    // error due to buffering consists of both relative errors (those
+    // proportional to the buffer radius) and absolute errors.  The maximum
+    // relative error is controlled by error_fraction(), while the maximum
+    // absolute error is about 10 nanometers on the Earth's surface and is
+    // defined internally.  The error due to snapping is defined by the
+    // specified snap_function().
+    const S1Angle max_error() const;
+
+    // Alternatively, error_fraction() may be specified as the number of
+    // polyline segments used to approximate a planar circle.  These two
+    // values are related according to the formula
+    //
+    //    error_fraction = (1 - cos(theta)) / (1 + cos(theta))
+    //                  ~= 0.25 * (theta ** 2)
+    //
+    // where (theta == Pi / circle_segments), i.e. error decreases
+    // quadratically with the number of circle segments.
+    //
+    // REQUIRES: circle_segments() >= 2.0
+    // REQUIRES: circle_segments() <= kMaxCircleSegments
+    //           (about 1570; corresponds to kMinErrorFraction)
+    //
+    // DEFAULT: about 15.76 (corresponding to  error_fraction() default value)
+    static constexpr double kMaxCircleSegments = 1570.7968503979573;
+    double circle_segments() const;
+    void set_circle_segments(double circle_segments);
+
+    // For polylines, specifies whether the end caps should be round or flat.
+    //
+    // Note that with flat end caps, there is no buffering beyond the polyline
+    // endpoints (unlike "square" end caps, which are not implemented).
+    //
+    // DEFAULT: EndCapStyle::ROUND
+    EndCapStyle end_cap_style() const;
+    void set_end_cap_style(EndCapStyle end_cap_style);
+
+    // Specifies whether polylines should be buffered only on the left, only
+    // on the right, or on both sides.  For one-sided buffering please note
+    // the following:
+    //
+    //  - EndCapStyle::ROUND yields two quarter-circles, one at each end.
+    //
+    //  - To buffer by a different radius on each side of the polyline, you
+    //    can use two S2BufferOperations and compute their union.  (Note that
+    //    round end caps will yield two quarter-circles at each end of the
+    //    polyline with different radii.)
+    //
+    //  - Polylines consisting of a single degenerate edge are always buffered
+    //    identically to points, i.e. this option has no effect.
+    //
+    //  - When the polyline turns right by more than 90 degrees, buffering may
+    //    or may not extend to the non-buffered side of the polyline.  For
+    //    example if ABC makes a 170 degree right turn at B, it is unspecified
+    //    whether the buffering of AB extends across edge BC and vice versa.
+    //    Similarly if ABCD represents two right turns of 90 degrees where AB
+    //    and CD are separated by less than the buffer radius, it is
+    //    unspecified whether buffering of AB extends across CD and vice versa.
+    //
+    // DEFAULT: PolylineSide::BOTH
+    PolylineSide polyline_side() const;
+    void set_polyline_side(PolylineSide polyline_side);
+
+    // Specifies the function used for snap rounding the output during the
+    // call to Build().  Note that any errors due to snapping are in addition
+    // to those specified by error_fraction().
+    //
+    // DEFAULT: s2builderutil::IdentitySnapFunction(S1Angle::Zero())
+    const S2Builder::SnapFunction& snap_function() const;
+    void set_snap_function(const S2Builder::SnapFunction& snap_function);
+
+    // Specifies that internal memory usage should be tracked using the given
+    // S2MemoryTracker.  If a memory limit is specified and more more memory
+    // than this is required then an error will be returned.  Example usage:
+    //
+    //   S2MemoryTracker tracker;
+    //   tracker.set_limit(500 << 20);  // 500 MB
+    //   S2BufferOperation::Options options;
+    //   options.set_buffer_radius(S1Angle::Degrees(1e-5));
+    //   options.set_memory_tracker(&tracker);
+    //   S2BufferOperation op{options};
+    //   ...
+    //   S2Error error;
+    //   if (!op.Build(&error)) {
+    //     if (error.code() == S2Error::RESOURCE_EXHAUSTED) {
+    //       S2_LOG(ERROR) << error;  // Memory limit exceeded
+    //     }
+    //   }
+    //
+    // CAVEATS:
+    //
+    //  - Memory allocated by the output S2Builder layer is not tracked.
+    //
+    //  - While memory tracking is reasonably complete and accurate, it does
+    //    not account for every last byte.  It is intended only for the
+    //    purpose of preventing clients from running out of memory.
+    //
+    // DEFAULT: nullptr (memory tracking disabled)
+    S2MemoryTracker* memory_tracker() const;
+    void set_memory_tracker(S2MemoryTracker* tracker);
+
+    // Options may be assigned and copied.
+    Options(const Options& options);
+    Options& operator=(const Options& options);
+
+   private:
+    S1Angle buffer_radius_ = S1Angle::Zero();
+    //    double error_fraction_ = 0.01;
+    double error_fraction_ = 0.02;
+    EndCapStyle end_cap_style_ = EndCapStyle::ROUND;
+    PolylineSide polyline_side_ = PolylineSide::BOTH;
+    std::unique_ptr<S2Builder::SnapFunction> snap_function_;
+    S2MemoryTracker* memory_tracker_ = nullptr;
+  };
+
+  // Default constructor; requires Init() to be called.
+  S2BufferOperation();
+
+#ifndef SWIG
+  // Convenience constructor that calls Init().
+  explicit S2BufferOperation(std::unique_ptr<S2Builder::Layer> result_layer,
+                             const Options& options = Options{});
+#endif
+
+  // Starts a buffer operation that sends the output polygon to the given
+  // S2Builder layer.  This method may be called more than once.
+  //
+  // Note that buffering always yields a polygon, even if the input includes
+  // polylines and points.  If the buffer radius is zero, points and polylines
+  // will be converted into degenerate polygon loops; if the buffer radius is
+  // negative, points and polylines will be removed.
+  void Init(std::unique_ptr<S2Builder::Layer> result_layer,
+            const Options& options = Options());
+
+  const Options& options() const;
+
+  // Each call below represents a different input layer.  Note that if the
+  // buffer radius is negative, then at most one input layer is allowed
+  // (ignoring any layers that contain only points and polylines).
+
+  // Adds an input layer containing a single point.
+  void AddPoint(const S2Point& point);
+
+  // Adds an input layer containing a polyline.  Note the following:
+  //
+  //  - Polylines with 0 or 1 vertices are considered to be empty.
+  //  - A polyline with 2 identical vertices is equivalent to a point.
+  //  - Polylines have end caps (see Options::end_cap_style).
+  //  - One-sided polyline buffering is supported (see Options::polyline_side).
+  void AddPolyline(S2PointSpan polyline);
+
+  // Adds an input layer containing a loop.  Note the following:
+  //
+  //  - A loop with no vertices is empty.
+  //  - A loop with 1 vertex is equivalent to a point.
+  //  - The interior of the loop is on its left.
+  //  - Buffering a self-intersecting loop produces undefined results.
+  void AddLoop(S2PointLoopSpan loop);
+
+  // Adds an input layer containing the given shape.  Shapes are handled as
+  // points, polylines, or polygons according to the rules above.  In addition
+  // note the following:
+  //
+  //  - Polygons holes may be degenerate (e.g., consisting of a
+  //    single vertex or entirely of sibling pairs such as ABCBDB).
+  //  - Full polygons are supported.  Note that since full polygons do
+  //    not have a boundary, they are not affected by buffering.
+  void AddShape(const S2Shape& shape);
+
+  // Adds an input layer containing all of the shapes in the given index.
+  //
+  // REQUIRES: The interiors of polygons must be disjoint from all other
+  //           indexed geometry, including other polygon interiors.
+  //           (S2BooleanOperation also requires this.)
+  void AddShapeIndex(const S2ShapeIndex& index);
+
+  // Computes the union of the buffered input shapes and sends the output
+  // polygon to the S2Builder layer specified in the constructor.  Returns
+  // true on success and otherwise sets "error" appropriately.
+  //
+  // Note that if the buffer radius is negative, only a single input layer is
+  // allowed (ignoring any layers that contain only points and polylines).
+  bool Build(S2Error* error);
+
+ private:
+  S1Angle GetMaxEdgeSpan(S1Angle radius, S1Angle requested_error) const;
+  void SetInputVertex(const S2Point& new_a);
+  void AddOffsetVertex(const S2Point& new_b);
+  void CloseBufferRegion();
+  void OutputPath();
+  void UpdateRefWinding(const S2Point& a, const S2Point& b, const S2Point& c);
+  void AddFullPolygon();
+  S2Point GetEdgeAxis(const S2Point& a, const S2Point& b) const;
+  void AddVertexArc(const S2Point& v, const S2Point& start, const S2Point& end);
+  void CloseVertexArc(const S2Point& v, const S2Point& end);
+  void AddEdgeArc(const S2Point& a, const S2Point& b);
+  void CloseEdgeArc(const S2Point& a, const S2Point& b);
+  void BufferEdgeAndVertex(const S2Point& a, const S2Point& b,
+                           const S2Point& c);
+  void AddStartCap(const S2Point& a, const S2Point& b);
+  void AddEndCap(const S2Point& a, const S2Point& b);
+  void BufferLoop(S2PointLoopSpan loop);
+  void BufferShape(const S2Shape& shape);
+
+  Options options_;
+
+  // The number of layers containing two-dimension geometry that have been
+  // added so far.  This is used to enforce the requirement that negative
+  // buffer radii allow only a single such layer.
+  int num_polygon_layers_ = 0;
+
+  // Parameters for buffering vertices and edges.
+  int buffer_sign_;  // The sign of buffer_radius (-1, 0, or +1).
+  S1ChordAngle abs_radius_;
+  S1ChordAngle vertex_step_, edge_step_;
+
+  // We go to extra effort to ensure that points are transformed into regular
+  // polygons.  (We don't do this for arcs in general because we would rather
+  // use the allowable error to reduce the complexity of the output rather
+  // than increase its symmetry.)
+  S1ChordAngle point_step_;
+
+  // Contains the buffered loops that have been accumulated so far.
+  S2WindingOperation op_;
+
+  // The current offset path.  When each path is completed into a loop it is
+  // added to op_ (the S2WindingOperation).
+  std::vector<S2Point> path_;
+
+  // As buffered loops are added we keep track of the winding number of a
+  // fixed reference point.  This is used to derive the winding numbers of
+  // every region in the spherical partition induced by the buffered loops.
+  S2Point ref_point_;
+
+  // The winding number associated with ref_point_.
+  int ref_winding_;
+
+  // The endpoints of the current sweep edge.  sweep_a_ is a vertex of the
+  // original geometry and sweep_b_ is a vertex of the current offset path.
+  S2Point sweep_a_, sweep_b_;
+
+  // The starting vertices of the current input loop and offset curve.  These
+  // are used to close the buffer region when a loop is completed.
+  S2Point input_start_, offset_start_;
+  bool have_input_start_, have_offset_start_;
+
+  // Used internally as a temporary to avoid excessive memory allocation.
+  std::vector<S2Point> tmp_vertices_;
+
+  S2MemoryTracker::Client tracker_;
+};
+
+#endif  // S2_S2BUFFER_OPERATION_H_
diff --git a/src/s2/s2builder.cc b/src/s2/s2builder.cc
index 7781e7bc..68aae5fc 100644
--- a/src/s2/s2builder.cc
+++ b/src/s2/s2builder.cc
@@ -1,4 +1,3 @@
-#include "cpp-compat.h"
 // Copyright 2016 Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -69,19 +68,26 @@
 
 #include "s2/s2builder.h"
 
+#include <cstddef>
+
 #include <algorithm>
 #include <cfloat>
 #include <cmath>
 #include <iostream>
 #include <memory>
-#include <numeric>
-#include <string>
+#include <ostream>
+#include <utility>
 #include <vector>
 
+#include "absl/cleanup/cleanup.h"
+#include "absl/container/btree_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/types/span.h"
+
 #include "s2/base/casts.h"
+#include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
-#include "absl/memory/memory.h"
-#include "s2/util/bits/bits.h"
+#include "s2/base/log_severity.h"
 #include "s2/id_set_lexicon.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s1angle.h"
@@ -89,23 +95,33 @@
 #include "s2/s2builder_graph.h"
 #include "s2/s2builder_layer.h"
 #include "s2/s2builderutil_snap_functions.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2closest_edge_query.h"
+#include "s2/s2closest_edge_query_base.h"
 #include "s2/s2closest_point_query.h"
+#include "s2/s2closest_point_query_base.h"
+#include "s2/s2crossing_edge_query.h"
 #include "s2/s2edge_crossings.h"
 #include "s2/s2edge_distances.h"
 #include "s2/s2error.h"
 #include "s2/s2loop.h"
+#include "s2/s2point.h"
 #include "s2/s2point_index.h"
-#include "s2/s2pointutil.h"
+#include "s2/s2point_span.h"
 #include "s2/s2polygon.h"
 #include "s2/s2polyline.h"
 #include "s2/s2polyline_simplifier.h"
 #include "s2/s2predicates.h"
+#include "s2/s2shape.h"
+#include "s2/s2shapeutil_shape_edge.h"
 #include "s2/s2shapeutil_visit_crossing_edge_pairs.h"
 #include "s2/s2text_format.h"
+#include "s2/util/bits/bits.h"
+#include "s2/util/gtl/compact_array.h"
 
-using absl::make_unique;
+using absl::flat_hash_set;
 using gtl::compact_array;
+using std::make_unique;
 using std::max;
 using std::pair;
 using std::unique_ptr;
@@ -114,28 +130,6 @@ using std::vector;
 // Internal flag intended to be set from within a debugger.
 bool s2builder_verbose = false;
 
-S1Angle S2Builder::SnapFunction::max_edge_deviation() const {
-  // We want max_edge_deviation() to be large enough compared to snap_radius()
-  // such that edge splitting is rare.
-  //
-  // Using spherical trigonometry, if the endpoints of an edge of length L
-  // move by at most a distance R, the center of the edge moves by at most
-  // asin(sin(R) / cos(L / 2)).  Thus the (max_edge_deviation / snap_radius)
-  // ratio increases with both the snap radius R and the edge length L.
-  //
-  // We arbitrarily limit the edge deviation to be at most 10% more than the
-  // snap radius.  With the maximum allowed snap radius of 70 degrees, this
-  // means that edges up to 30.6 degrees long are never split.  For smaller
-  // snap radii, edges up to 49 degrees long are never split.  (Edges of any
-  // length are not split unless their endpoints move far enough so that the
-  // actual edge deviation exceeds the limit; in practice, splitting is rare
-  // even with long edges.)  Note that it is always possible to split edges
-  // when max_edge_deviation() is exceeded; see MaybeAddExtraSites().
-  S2_DCHECK_LE(snap_radius(), kMaxSnapRadius());
-  const double kMaxEdgeDeviationRatio = 1.1;
-  return kMaxEdgeDeviationRatio * snap_radius();
-}
-
 S2Builder::Options::Options()
     : snap_function_(
           make_unique<s2builderutil::IdentitySnapFunction>(S1Angle::Zero())) {
@@ -146,20 +140,50 @@ S2Builder::Options::Options(const SnapFunction& snap_function)
 }
 
 S2Builder::Options::Options(const Options& options)
-    :  snap_function_(options.snap_function_->Clone()),
-       split_crossing_edges_(options.split_crossing_edges_),
-       simplify_edge_chains_(options.simplify_edge_chains_),
-       idempotent_(options.idempotent_) {
+    : snap_function_(options.snap_function_->Clone()),
+      split_crossing_edges_(options.split_crossing_edges_),
+      intersection_tolerance_(options.intersection_tolerance_),
+      simplify_edge_chains_(options.simplify_edge_chains_),
+      idempotent_(options.idempotent_),
+      memory_tracker_(options.memory_tracker_) {
 }
 
 S2Builder::Options& S2Builder::Options::operator=(const Options& options) {
   snap_function_ = options.snap_function_->Clone();
   split_crossing_edges_ = options.split_crossing_edges_;
+  intersection_tolerance_ = options.intersection_tolerance_;
   simplify_edge_chains_ = options.simplify_edge_chains_;
   idempotent_ = options.idempotent_;
+  memory_tracker_ = options.memory_tracker_;
   return *this;
 }
 
+S1Angle S2Builder::Options::edge_snap_radius() const {
+  return snap_function().snap_radius() + intersection_tolerance();
+}
+
+S1Angle S2Builder::Options::max_edge_deviation() const {
+  // We want max_edge_deviation() to be large enough compared to snap_radius()
+  // such that edge splitting is rare.
+  //
+  // Using spherical trigonometry, if the endpoints of an edge of length L
+  // move by at most a distance R, the center of the edge moves by at most
+  // asin(sin(R) / cos(L / 2)).  Thus the (max_edge_deviation / snap_radius)
+  // ratio increases with both the snap radius R and the edge length L.
+  //
+  // We arbitrarily limit the edge deviation to be at most 10% more than the
+  // snap radius.  With the maximum allowed snap radius of 70 degrees, this
+  // means that edges up to 30.6 degrees long are never split.  For smaller
+  // snap radii, edges up to 49 degrees long are never split.  (Edges of any
+  // length are not split unless their endpoints move far enough so that the
+  // actual edge deviation exceeds the limit; in practice, splitting is rare
+  // even with long edges.)  Note that it is always possible to split edges
+  // when max_edge_deviation() is exceeded; see MaybeAddExtraSites().
+  S2_DCHECK_LE(snap_function().snap_radius(), SnapFunction::kMaxSnapRadius());
+  const double kMaxEdgeDeviationRatio = 1.1;
+  return kMaxEdgeDeviationRatio * edge_snap_radius();
+}
+
 bool operator==(const S2Builder::GraphOptions& x,
                 const S2Builder::GraphOptions& y) {
   return (x.edge_type() == y.edge_type() &&
@@ -184,8 +208,7 @@ static S1ChordAngle AddPointToEdgeError(S1ChordAngle ca) {
   return ca.PlusError(S2::GetUpdateMinDistanceMaxError(ca));
 }
 
-S2Builder::S2Builder() {
-}
+S2Builder::S2Builder() = default;
 
 S2Builder::S2Builder(const Options& options) {
   Init(options);
@@ -201,49 +224,67 @@ void S2Builder::Init(const Options& options) {
   // radius" used when evaluating exact predicates (s2predicates.h).
   site_snap_radius_ca_ = S1ChordAngle(snap_radius);
 
-  // When split_crossing_edges() is true, we need to use a larger snap radius
-  // for edges than for vertices to ensure that both edges are snapped to the
-  // edge intersection location.  This is because the computed intersection
-  // point is not exact; it may be up to kIntersectionError away from its true
-  // position.  The computed intersection point might then be snapped to some
-  // other vertex up to snap_radius away.  So to ensure that both edges are
-  // snapped to a common vertex, we need to increase the snap radius for edges
-  // to at least the sum of these two values (calculated conservatively).
-  S1Angle edge_snap_radius = snap_radius;
-  if (!options.split_crossing_edges()) {
-    edge_snap_radius_ca_ = site_snap_radius_ca_;
-  } else {
-    edge_snap_radius += S2::kIntersectionError;
-    edge_snap_radius_ca_ = RoundUp(edge_snap_radius);
-  }
+  // When intersection_tolerance() is non-zero we need to use a larger snap
+  // radius for edges than for vertices to ensure that both edges are snapped
+  // to the edge intersection location.  This is because the computed
+  // intersection point is not exact; it may be up to intersection_tolerance()
+  // away from its true position.  The computed intersection point might then
+  // be snapped to some other vertex up to snap_radius away.  So to ensure
+  // that both edges are snapped to a common vertex, we need to increase the
+  // snap radius for edges to at least the sum of these two values (calculated
+  // conservatively).
+  S1Angle edge_snap_radius = options.edge_snap_radius();
+  edge_snap_radius_ca_ = RoundUp(edge_snap_radius);
   snapping_requested_ = (edge_snap_radius > S1Angle::Zero());
 
   // Compute the maximum distance that a vertex can be separated from an
   // edge while still affecting how that edge is snapped.
-  max_edge_deviation_ = snap_function.max_edge_deviation();
+  max_edge_deviation_ = options.max_edge_deviation();
   edge_site_query_radius_ca_ = S1ChordAngle(
       max_edge_deviation_ + snap_function.min_edge_vertex_separation());
 
   // Compute the maximum edge length such that even if both endpoints move by
-  // the maximum distance allowed (i.e., snap_radius), the center of the edge
-  // will still move by less than max_edge_deviation().  This saves us a lot
-  // of work since then we don't need to check the actual deviation.
-  min_edge_length_to_split_ca_ = S1ChordAngle::Radians(
-      2 * acos(sin(snap_radius) / sin(max_edge_deviation_)));
-
-  // If the condition below is violated, then AddExtraSites() needs to be
-  // modified to check that snapped edges pass on the same side of each "site
-  // to avoid" as the input edge.  Currently it doesn't need to do this
-  // because the condition below guarantees that if the snapped edge passes on
-  // the wrong side of the site then it is also too close, which will cause a
-  // separation site to be added.
+  // the maximum distance allowed (i.e., edge_snap_radius), the center of the
+  // edge will still move by less than max_edge_deviation().  This saves us a
+  // lot of work since then we don't need to check the actual deviation.
+  if (!snapping_requested_) {
+    min_edge_length_to_split_ca_ = S1ChordAngle::Infinity();
+  } else {
+    // This value varies between 30 and 50 degrees depending on the snap radius.
+    min_edge_length_to_split_ca_ = S1ChordAngle::Radians(
+        2 * acos(sin(edge_snap_radius) / sin(max_edge_deviation_)));
+  }
+
+  // In rare cases we may need to explicitly check that the input topology is
+  // preserved, i.e. that edges do not cross vertices when snapped.  This is
+  // only necessary (1) for vertices added using ForceVertex(), and (2) when the
+  // snap radius is smaller than intersection_tolerance() (which is typically
+  // either zero or S2::kIntersectionError, about 9e-16 radians).  This
+  // condition arises because when a geodesic edge is snapped, the edge center
+  // can move further than its endpoints.  This can cause an edge to pass on the
+  // wrong side of an input vertex.  (Note that this could not happen in a
+  // planar version of this algorithm.)  Usually we don't need to consider this
+  // possibility explicitly, because if the snapped edge passes on the wrong
+  // side of a vertex then it is also closer than min_edge_vertex_separation()
+  // to that vertex, which will cause a separation site to be added.
+  //
+  // If the condition below is true then we need to check all sites (i.e.,
+  // snapped input vertices) for topology changes.  However this is almost never
+  // the case because
+  //
+  //            max_edge_deviation() == 1.1 * edge_snap_radius()
+  //      and   min_edge_vertex_separation() >= 0.219 * snap_radius()
   //
-  // Currently max_edge_deviation() is at most 1.1 * snap_radius(), whereas
-  // min_edge_vertex_separation() is at least 0.219 * snap_radius() (based on
-  // S2CellIdSnapFunction, which is currently the worst case).
-  S2_DCHECK_LE(snap_function.max_edge_deviation(),
-            snap_function.snap_radius() +
-            snap_function.min_edge_vertex_separation());
+  // for all currently implemented snap functions.  The condition below is
+  // only true when intersection_tolerance() is non-zero (which causes
+  // edge_snap_radius() to exceed snap_radius() by S2::kIntersectionError) and
+  // snap_radius() is very small (at most S2::kIntersectionError / 1.19).
+  check_all_site_crossings_ = (options.max_edge_deviation() >
+                               options.edge_snap_radius() +
+                               snap_function.min_edge_vertex_separation());
+  if (options.intersection_tolerance() <= S1Angle::Zero()) {
+    S2_DCHECK(!check_all_site_crossings_);
+  }
 
   // To implement idempotency, we check whether the input geometry could
   // possibly be the output of a previous S2Builder invocation.  This involves
@@ -272,7 +313,7 @@ void S2Builder::Init(const Options& options) {
   // error in the calculation to compare this distance against the bound.
   double d = sin(edge_snap_radius);
   edge_snap_radius_sin2_ = d * d;
-  edge_snap_radius_sin2_ += ((9.5 * d + 2.5 + 2 * sqrt(3.0)) * d +
+  edge_snap_radius_sin2_ += ((9.5 * d + 2.5 + 2 * sqrt(3)) * d +
                              9 * DBL_EPSILON) * DBL_EPSILON;
 
   // Initialize the current label set.
@@ -284,6 +325,8 @@ void S2Builder::Init(const Options& options) {
   // idempotency, and can also save work.  If we discover any reason that the
   // input geometry needs to be modified, snapping_needed_ is set to true.
   snapping_needed_ = false;
+
+  tracker_.Init(options.memory_tracker());
 }
 
 void S2Builder::clear_labels() {
@@ -341,20 +384,32 @@ S2Builder::InputVertexId S2Builder::AddVertex(const S2Point& v) {
   // vertices once they have all been added, remove duplicates, and update the
   // edges.
   if (input_vertices_.empty() || v != input_vertices_.back()) {
+    if (!tracker_.AddSpace(&input_vertices_, 1)) return -1;
     input_vertices_.push_back(v);
   }
   return input_vertices_.size() - 1;
 }
 
+void S2Builder::AddIntersection(const S2Point& vertex) {
+  // It is an error to call this method without first setting
+  // intersection_tolerance() to a non-zero value.
+  S2_DCHECK_GT(options_.intersection_tolerance(), S1Angle::Zero());
+
+  // Calling this method also overrides the idempotent() option.
+  snapping_needed_ = true;
+
+  AddVertex(vertex);
+}
+
 void S2Builder::AddEdge(const S2Point& v0, const S2Point& v1) {
   S2_DCHECK(!layers_.empty()) << "Call StartLayer before adding any edges";
-
   if (v0 == v1 && (layer_options_.back().degenerate_edges() ==
                    GraphOptions::DegenerateEdges::DISCARD)) {
     return;
   }
   InputVertexId j0 = AddVertex(v0);
   InputVertexId j1 = AddVertex(v1);
+  if (!tracker_.AddSpace(&input_edges_, 1)) return;
   input_edges_.push_back(InputEdge(j0, j1));
 
   // If there are any labels, then attach them to this input edge.
@@ -371,6 +426,12 @@ void S2Builder::AddEdge(const S2Point& v0, const S2Point& v1) {
   }
 }
 
+void S2Builder::AddPolyline(S2PointSpan polyline) {
+  for (size_t i = 1; i < polyline.size(); ++i) {
+    AddEdge(polyline[i - 1], polyline[i]);
+  }
+}
+
 void S2Builder::AddPolyline(const S2Polyline& polyline) {
   const int n = polyline.num_vertices();
   for (int i = 1; i < n; ++i) {
@@ -378,6 +439,12 @@ void S2Builder::AddPolyline(const S2Polyline& polyline) {
   }
 }
 
+void S2Builder::AddLoop(S2PointLoopSpan loop) {
+  for (size_t i = 0; i < loop.size(); ++i) {
+    AddEdge(loop[i], loop[i + 1]);
+  }
+}
+
 void S2Builder::AddLoop(const S2Loop& loop) {
   // Ignore loops that do not have a boundary.
   if (loop.is_empty_or_full()) return;
@@ -414,6 +481,7 @@ void S2Builder::AddIsFullPolygonPredicate(IsFullPolygonPredicate predicate) {
 }
 
 void S2Builder::ForceVertex(const S2Point& vertex) {
+  if (!tracker_.AddSpace(&sites_, 1)) return;
   sites_.push_back(vertex);
 }
 
@@ -425,8 +493,7 @@ class VertexIdEdgeVectorShape final : public S2Shape {
   // Requires that "edges" is constant for the lifetime of this object.
   VertexIdEdgeVectorShape(const vector<pair<int32, int32>>& edges,
                           const vector<S2Point>& vertices)
-      : edges_(edges), vertices_(vertices) {
-  }
+      : edges_(edges), vertices_(vertices) {}
 
   const S2Point& vertex0(int e) const { return vertex(edges_[e].first); }
   const S2Point& vertex1(int e) const { return vertex(edges_[e].second); }
@@ -457,12 +524,12 @@ class VertexIdEdgeVectorShape final : public S2Shape {
 
 bool S2Builder::Build(S2Error* error) {
   // S2_CHECK rather than S2_DCHECK because this is friendlier than crashing on the
-  // "error->ok()" call below.  It would be easy to allow (error == nullptr)
+  // "error->Clear()" call below.  It would be easy to allow (error == nullptr)
   // by declaring a local "tmp_error", but it seems better to make clients
   // think about error handling.
   S2_CHECK(error != nullptr);
-  error->Clear();
   error_ = error;
+  error_->Clear();
 
   // Mark the end of the last layer.
   layer_begins_.push_back(input_edges_.size());
@@ -474,10 +541,12 @@ bool S2Builder::Build(S2Error* error) {
   ChooseSites();
   BuildLayers();
   Reset();
-  return error->ok();
+  if (!tracker_.ok()) *error_ = tracker_.error();
+  return error_->ok();
 }
 
 void S2Builder::Reset() {
+  // Note that these calls do not change vector capacities.
   input_vertices_.clear();
   input_edges_.clear();
   layers_.clear();
@@ -494,36 +563,48 @@ void S2Builder::Reset() {
 }
 
 void S2Builder::ChooseSites() {
-  if (input_vertices_.empty()) return;
+  if (!tracker_.ok() || input_vertices_.empty()) return;
 
+  // Note that although we always create an S2ShapeIndex, often it is not
+  // actually built (because this happens lazily).  Therefore we only test
+  // its memory usage at the places where it is used.
   MutableS2ShapeIndex input_edge_index;
-  input_edge_index.Add(make_unique<VertexIdEdgeVectorShape>(
-      input_edges_, input_vertices_));
+  input_edge_index.set_memory_tracker(tracker_.tracker());
+  input_edge_index.Add(make_unique<VertexIdEdgeVectorShape>(input_edges_,
+                                                            input_vertices_));
   if (options_.split_crossing_edges()) {
     AddEdgeCrossings(input_edge_index);
   }
+
   if (snapping_requested_) {
     S2PointIndex<SiteId> site_index;
+    auto _ = absl::MakeCleanup([&]() { tracker_.DoneSiteIndex(site_index); });
     AddForcedSites(&site_index);
     ChooseInitialSites(&site_index);
+    if (!tracker_.FixSiteIndexTally(site_index)) return;
     CollectSiteEdges(site_index);
   }
   if (snapping_needed_) {
     AddExtraSites(input_edge_index);
   } else {
-    CopyInputEdges();
+    ChooseAllVerticesAsSites();
   }
 }
 
-void S2Builder::CopyInputEdges() {
-  // Sort the input vertices, discard duplicates, and update the input edges
-  // to refer to the pruned vertex list.  (We sort in the same order used by
-  // ChooseInitialSites() to avoid inconsistencies in tests.)
+void S2Builder::ChooseAllVerticesAsSites() {
+  // Sort the input vertices, discard duplicates, and use the result as the
+  // list of sites.  (We sort in the same order used by ChooseInitialSites()
+  // to avoid inconsistencies in tests.)  We also copy the result back to
+  // input_vertices_ and update the input edges to use the new vertex
+  // numbering (so that InputVertexId == SiteId).  This simplifies the
+  // implementation of SnapEdge() for this case.
+  sites_.clear();
+  if (!tracker_.AddSpaceExact(&sites_, input_vertices_.size())) return;
+  const int64 kTempPerVertex = sizeof(InputVertexKey) + sizeof(InputVertexId);
+  if (!tracker_.TallyTemp(input_vertices_.size() * kTempPerVertex)) return;
   vector<InputVertexKey> sorted = SortInputVertices();
   vector<InputVertexId> vmap(input_vertices_.size());
-  sites_.clear();
-  sites_.reserve(input_vertices_.size());
-  for (int in = 0; in < sorted.size(); ) {
+  for (size_t in = 0; in < sorted.size();) {
     const S2Point& site = input_vertices_[sorted[in].second];
     vmap[sorted[in].second] = sites_.size();
     while (++in < sorted.size() && input_vertices_[sorted[in].second] == site) {
@@ -531,7 +612,7 @@ void S2Builder::CopyInputEdges() {
     }
     sites_.push_back(site);
   }
-  input_vertices_ = sites_;
+  input_vertices_ = sites_;  // Does not change allocated size.
   for (InputEdge& e : input_edges_) {
     e.first = vmap[e.first];
     e.second = vmap[e.second];
@@ -593,7 +674,8 @@ vector<S2Builder::InputVertexKey> S2Builder::SortInputVertices() {
 
   vector<InputVertexKey> keys;
   keys.reserve(input_vertices_.size());
-  for (InputVertexId i = 0; i < input_vertices_.size(); ++i) {
+  for (InputVertexId i = 0; static_cast<size_t>(i) < input_vertices_.size();
+       ++i) {
     keys.push_back(InputVertexKey(S2CellId(input_vertices_[i]), i));
   }
   std::sort(keys.begin(), keys.end(),
@@ -609,21 +691,28 @@ vector<S2Builder::InputVertexKey> S2Builder::SortInputVertices() {
 // points to input_vertices_.  (The intersection points will be snapped and
 // merged with the other vertices during site selection.)
 void S2Builder::AddEdgeCrossings(const MutableS2ShapeIndex& input_edge_index) {
+  input_edge_index.ForceBuild();
+  if (!tracker_.ok()) return;
+
   // We need to build a list of intersections and add them afterwards so that
   // we don't reallocate vertices_ during the VisitCrossings() call.
   vector<S2Point> new_vertices;
+  auto _ = absl::MakeCleanup([&]() { tracker_.Untally(new_vertices); });
   s2shapeutil::VisitCrossingEdgePairs(
       input_edge_index, s2shapeutil::CrossingType::INTERIOR,
-      [&new_vertices](const s2shapeutil::ShapeEdge& a,
-                      const s2shapeutil::ShapeEdge& b, bool) {
+      [this, &new_vertices](const s2shapeutil::ShapeEdge& a,
+                            const s2shapeutil::ShapeEdge& b, bool) {
+        if (!tracker_.AddSpace(&new_vertices, 1)) return false;
         new_vertices.push_back(
             S2::GetIntersection(a.v0(), a.v1(), b.v0(), b.v1()));
-        return true;  // Continue visiting.
+        return true;
       });
-  if (!new_vertices.empty()) {
-    snapping_needed_ = true;
-    for (const auto& vertex : new_vertices) AddVertex(vertex);
-  }
+  if (new_vertices.empty()) return;
+
+  snapping_needed_ = true;
+  if (!tracker_.AddSpaceExact(&input_vertices_, new_vertices.size())) return;
+  input_vertices_.insert(input_vertices_.end(),
+                         new_vertices.begin(), new_vertices.end());
 }
 
 void S2Builder::AddForcedSites(S2PointIndex<SiteId>* site_index) {
@@ -631,14 +720,15 @@ void S2Builder::AddForcedSites(S2PointIndex<SiteId>* site_index) {
   std::sort(sites_.begin(), sites_.end());
   sites_.erase(std::unique(sites_.begin(), sites_.end()), sites_.end());
   // Add the forced sites to the index.
-  for (SiteId id = 0; id < sites_.size(); ++id) {
+  for (SiteId id = 0; static_cast<size_t>(id) < sites_.size(); ++id) {
+    if (!tracker_.TallyIndexedSite()) return;
     site_index->Add(sites_[id], id);
   }
   num_forced_sites_ = sites_.size();
 }
 
 void S2Builder::ChooseInitialSites(S2PointIndex<SiteId>* site_index) {
-  // Find all points whose distance is <= min_site_separation_ca_.
+  // Prepare to find all points whose distance is <= min_site_separation_ca_.
   S2ClosestPointQueryOptions options;
   options.set_conservative_max_distance(min_site_separation_ca_);
   S2ClosestPointQuery<SiteId> site_query(site_index, options);
@@ -658,32 +748,43 @@ void S2Builder::ChooseInitialSites(S2PointIndex<SiteId>* site_index) {
   // "0:0, 0:0" rather than the expected "0:0, 0:1", because the snap radius
   // is approximately sqrt(2) degrees and therefore it is legal to snap both
   // input points to "0:0".  "Snap first" produces "0:0, 0:1" as expected.
-  for (const InputVertexKey& key : SortInputVertices()) {
+  //
+  // Track the memory used by SortInputVertices() before calling it.
+  if (!tracker_.Tally(input_vertices_.size() * sizeof(InputVertexKey))) return;
+  vector<InputVertexKey> sorted_keys = SortInputVertices();
+  auto _ = absl::MakeCleanup([&]() { tracker_.Untally(sorted_keys); });
+  for (const InputVertexKey& key : sorted_keys) {
     const S2Point& vertex = input_vertices_[key.second];
     S2Point site = SnapSite(vertex);
     // If any vertex moves when snapped, the output cannot be idempotent.
     snapping_needed_ = snapping_needed_ || site != vertex;
 
-    // FindClosestPoints() measures distances conservatively, so we need to
-    // recheck the distances using exact predicates.
-    //
-    // NOTE(ericv): When the snap radius is large compared to the average
-    // vertex spacing, we could possibly avoid the call the FindClosestPoints
-    // by checking whether sites_.back() is close enough.
-    S2ClosestPointQueryPointTarget target(site);
-    site_query.FindClosestPoints(&target, &results);
     bool add_site = true;
-    for (const auto& result : results) {
-      if (s2pred::CompareDistance(site, result.point(),
-                                  min_site_separation_ca_) <= 0) {
-        add_site = false;
-        // This pair of sites is too close.  If the sites are distinct, then
-        // the output cannot be idempotent.
-        snapping_needed_ = snapping_needed_ || site != result.point();
+    if (site_snap_radius_ca_ == S1ChordAngle::Zero()) {
+      add_site = sites_.empty() || site != sites_.back();
+    } else {
+      // FindClosestPoints() measures distances conservatively, so we need to
+      // recheck the distances using exact predicates.
+      //
+      // NOTE(ericv): When the snap radius is large compared to the average
+      // vertex spacing, we could possibly avoid the call the FindClosestPoints
+      // by checking whether sites_.back() is close enough.
+      S2ClosestPointQueryPointTarget target(site);
+      site_query.FindClosestPoints(&target, &results);
+      for (const auto& result : results) {
+        if (s2pred::CompareDistance(site, result.point(),
+                                    min_site_separation_ca_) <= 0) {
+          add_site = false;
+          // This pair of sites is too close.  If the sites are distinct, then
+          // the output cannot be idempotent.
+          snapping_needed_ = snapping_needed_ || site != result.point();
+        }
       }
     }
     if (add_site) {
+      if (!tracker_.TallyIndexedSite()) return;
       site_index->Add(site, sites_.size());
+      if (!tracker_.AddSpace(&sites_, 1)) return;
       sites_.push_back(site);
       site_query.ReInit();
     }
@@ -693,7 +794,7 @@ void S2Builder::ChooseInitialSites(S2PointIndex<SiteId>* site_index) {
 S2Point S2Builder::SnapSite(const S2Point& point) const {
   if (!snapping_requested_) return point;
   S2Point site = options_.snap_function().SnapPoint(point);
-S1ChordAngle dist_moved(site, point);
+  S1ChordAngle dist_moved(site, point);
   if (dist_moved > site_snap_radius_ca_) {
     error_->Init(S2Error::BUILDER_SNAP_RADIUS_TOO_SMALL,
                  "Snap function moved vertex (%.15g, %.15g, %.15g) "
@@ -705,23 +806,27 @@ S1ChordAngle dist_moved(site, point);
   return site;
 }
 
-// For each edge, find all sites within min_edge_site_query_radius_ca_ and
+// For each edge, find all sites within edge_site_query_radius_ca_ and
 // store them in edge_sites_.  Also, to implement idempotency this method also
 // checks whether the input vertices and edges may already satisfy the output
 // criteria.  If any problems are found then snapping_needed_ is set to true.
 void S2Builder::CollectSiteEdges(const S2PointIndex<SiteId>& site_index) {
   // Find all points whose distance is <= edge_site_query_radius_ca_.
+  //
+  // Memory used by S2ClosestPointQuery is not tracked, but it is temporary,
+  // typically insignificant, and does not affect the high water mark.
   S2ClosestPointQueryOptions options;
   options.set_conservative_max_distance(edge_site_query_radius_ca_);
   S2ClosestPointQuery<SiteId> site_query(&site_index, options);
   vector<S2ClosestPointQuery<SiteId>::Result> results;
-  edge_sites_.resize(input_edges_.size());
-  for (InputEdgeId e = 0; e < input_edges_.size(); ++e) {
+  if (!tracker_.AddSpaceExact(&edge_sites_, input_edges_.size())) return;
+  edge_sites_.resize(input_edges_.size());  // Construct all elements.
+  for (InputEdgeId e = 0; static_cast<size_t>(e) < input_edges_.size(); ++e) {
     const InputEdge& edge = input_edges_[e];
     const S2Point& v0 = input_vertices_[edge.first];
     const S2Point& v1 = input_vertices_[edge.second];
     if (s2builder_verbose) {
-      cpp_compat_cout << "S2Polyline: " << s2textformat::ToString(v0)
+      std::cout << "S2Polyline: " << s2textformat::ToString(v0)
                 << ", " << s2textformat::ToString(v1) << "\n";
     }
     S2ClosestPointQueryEdgeTarget target(v0, v1);
@@ -739,20 +844,32 @@ void S2Builder::CollectSiteEdges(const S2PointIndex<SiteId>& site_index) {
       }
     }
     SortSitesByDistance(v0, sites);
+    if (!tracker_.TallyEdgeSites(*sites)) return;
   }
 }
 
+// Sorts the sites in increasing order of distance to X.
 void S2Builder::SortSitesByDistance(const S2Point& x,
                                     compact_array<SiteId>* sites) const {
-  // Sort sites in increasing order of distance to X.
   std::sort(sites->begin(), sites->end(),
             [&x, this](SiteId i, SiteId j) {
       return s2pred::CompareDistances(x, sites_[i], sites_[j]) < 0;
     });
 }
 
-// There are two situatons where we need to add extra Voronoi sites in order
-// to ensure that the snapped edges meet the output requirements:
+// Like the above, but inserts "new_site_id" into an already-sorted list.
+void S2Builder::InsertSiteByDistance(SiteId new_site_id, const S2Point& x,
+                                     compact_array<SiteId>* sites) {
+  if (!tracker_.ReserveEdgeSite(sites)) return;
+  sites->insert(std::lower_bound(
+      sites->begin(), sites->end(), new_site_id,
+      [&x, this](SiteId i, SiteId j) {
+        return s2pred::CompareDistances(x, sites_[i], sites_[j]) < 0;
+      }), new_site_id);
+}
+
+// There are two situations where we need to add extra Voronoi sites in order to
+// ensure that the snapped edges meet the output requirements:
 //
 //  (1) If a snapped edge deviates from its input edge by more than
 //      max_edge_deviation(), we add a new site on the input edge near the
@@ -760,58 +877,99 @@ void S2Builder::SortSitesByDistance(const S2Point& x,
 //      into two pieces, so that it follows the input edge more closely.
 //
 //  (2) If a snapped edge is closer than min_edge_vertex_separation() to any
-//      nearby site (the "site to avoid"), then we add a new site (the
-//      "separation site") on the input edge near the site to avoid.  This
-//      causes the snapped edge to follow the input edge more closely and is
-//      guaranteed to increase the separation to the required distance.
+//      nearby site (the "site to avoid") or passes on the wrong side of it
+//      relative to the input edge, then we add a new site (the "separation
+//      site") along the input edge near the site to avoid.  This causes the
+//      snapped edge to follow the input edge more closely, so that it is
+//      guaranteed to pass on the correct side of the site to avoid with a
+//      separation of at least the required distance.
 //
 // We check these conditions by snapping all the input edges to a chain of
 // Voronoi sites and then testing each edge in the chain.  If a site needs to
 // be added, we mark all nearby edges for re-snapping.
 void S2Builder::AddExtraSites(const MutableS2ShapeIndex& input_edge_index) {
-  // When options_.split_crossing_edges() is true, this function may be called
-  // even when site_snap_radius_ca_ == 0 (because edge_snap_radius_ca_ > 0).
-  // However neither of the conditions above apply in that case.
-  if (site_snap_radius_ca_ == S1ChordAngle::Zero()) return;
-
-  vector<SiteId> chain;  // Temporary
-  vector<InputEdgeId> snap_queue;
-  for (InputEdgeId max_e = 0; max_e < input_edges_.size(); ++max_e) {
-    snap_queue.push_back(max_e);
-    while (!snap_queue.empty()) {
-      InputEdgeId e = snap_queue.back();
-      snap_queue.pop_back();
+  // Note that we could save some work in AddSnappedEdges() by saving the
+  // snapped edge chains in a vector, but currently this is not worthwhile
+  // since SnapEdge() accounts for less than 5% of the runtime.
+
+  // Using 18 buckets is equivalent to `reserve(16)`, which was the
+  // `expected_max_elements` used by the old `dense_hash_set` version.
+  // This will actually get us 31 buckets since it gets rounded up.
+  // We could experiment with different values here.
+  flat_hash_set<InputEdgeId> edges_to_resnap(/*bucket_count=*/18);
+
+  vector<SiteId> chain;  // Temporary storage.
+  int num_edges_after_snapping = 0;
+
+  // CheckEdge() defines the body of the loops below.
+  const auto CheckEdge = [&](InputEdgeId e) -> bool {
+      if (!tracker_.ok()) return false;
       SnapEdge(e, &chain);
-      // We could save the snapped chain here in a snapped_chains_ vector, to
-      // avoid resnapping it in AddSnappedEdges() below, however currently
-      // SnapEdge only accounts for less than 5% of the runtime.
-      MaybeAddExtraSites(e, max_e, chain, input_edge_index, &snap_queue);
+      edges_to_resnap.erase(e);
+      num_edges_after_snapping += chain.size();
+      MaybeAddExtraSites(e, chain, input_edge_index, &edges_to_resnap);
+      return true;
+    };
+
+  // The first pass is different because we snap every edge.  In the following
+  // passes we only snap edges that are near the extra sites that were added.
+  S2_VLOG(1) << "Before pass 0: sites=" << sites_.size();
+  for (InputEdgeId e = 0; static_cast<size_t>(e) < input_edges_.size(); ++e) {
+    if (!CheckEdge(e)) return;
+  }
+  S2_VLOG(1) << "Pass 0: edges snapped=" << input_edges_.size()
+          << ", output edges=" << num_edges_after_snapping
+          << ", sites=" << sites_.size();
+
+  for (int num_passes = 1; !edges_to_resnap.empty(); ++num_passes) {
+    auto edges_to_snap = edges_to_resnap;
+    edges_to_resnap.clear();
+    num_edges_after_snapping = 0;
+    for (InputEdgeId e : edges_to_snap) {
+      if (!CheckEdge(e)) return;
     }
+    S2_VLOG(1) << "Pass " << num_passes
+            << ": edges snapped=" << edges_to_snap.size()
+            << ", output edges=" << num_edges_after_snapping
+            << ", sites=" << sites_.size();
   }
 }
 
-void S2Builder::MaybeAddExtraSites(InputEdgeId edge_id,
-                                   InputEdgeId max_edge_id,
-                                   const vector<SiteId>& chain,
-                                   const MutableS2ShapeIndex& input_edge_index,
-                                   vector<InputEdgeId>* snap_queue) {
-  // The snapped chain is always a *subsequence* of the nearby sites
+void S2Builder::MaybeAddExtraSites(
+    InputEdgeId edge_id, const vector<SiteId>& chain,
+    const MutableS2ShapeIndex& input_edge_index,
+    flat_hash_set<InputEdgeId>* edges_to_resnap) {
+  // If the memory tracker has a periodic callback function, tally an amount
+  // of memory proportional to the work being done so that the caller has an
+  // opportunity to cancel the operation if necessary.
+  if (!tracker_.TallyTemp(chain.size() * sizeof(chain[0]))) return;
+
+  // If the input includes NaN vertices, snapping can produce an empty chain.
+  if (chain.empty()) return;
+
+  // The snapped edge chain is always a subsequence of the nearby sites
   // (edge_sites_), so we walk through the two arrays in parallel looking for
-  // sites that weren't snapped.  We also keep track of the current snapped
-  // edge, since it is the only edge that can be too close.
-  int i = 0;
-  for (SiteId id : edge_sites_[edge_id]) {
+  // sites that weren't snapped.  These are the "sites to avoid".  We also keep
+  // track of the current snapped edge, since it is the only edge that can be
+  // too close or pass on the wrong side of a site to avoid.  Vertices beyond
+  // the chain endpoints in either direction can be ignored because only the
+  // interiors of chain edges can be too close to a site to avoid.
+  const InputEdge& edge = input_edges_[edge_id];
+  const S2Point& a0 = input_vertices_[edge.first];
+  const S2Point& a1 = input_vertices_[edge.second];
+  const auto& nearby_sites = edge_sites_[edge_id];
+  for (size_t i = 0, j = 0; j < nearby_sites.size(); ++j) {
+    SiteId id = nearby_sites[j];
     if (id == chain[i]) {
-      if (++i == chain.size()) break;
+      // This site is a vertex of the snapped edge chain.
+      if (++i == chain.size()) {
+        break;  // Sites beyond the end of the snapped chain can be ignored.
+      }
       // Check whether this snapped edge deviates too far from its original
       // position.  If so, we split the edge by adding an extra site.
       const S2Point& v0 = sites_[chain[i - 1]];
       const S2Point& v1 = sites_[chain[i]];
       if (S1ChordAngle(v0, v1) < min_edge_length_to_split_ca_) continue;
-
-      const InputEdge& edge = input_edges_[edge_id];
-      const S2Point& a0 = input_vertices_[edge.first];
-      const S2Point& a1 = input_vertices_[edge.second];
       if (!S2::IsEdgeBNearEdgeA(a0, a1, v0, v1, max_edge_deviation_)) {
         // Add a new site on the input edge, positioned so that it splits the
         // snapped edge into two approximately equal pieces.  Then we find all
@@ -825,57 +983,109 @@ void S2Builder::MaybeAddExtraSites(InputEdgeId edge_id,
         S2Point mid = (S2::Project(v0, a0, a1) +
                        S2::Project(v1, a0, a1)).Normalize();
         S2Point new_site = GetSeparationSite(mid, v0, v1, edge_id);
-        AddExtraSite(new_site, max_edge_id, input_edge_index, snap_queue);
+        AddExtraSite(new_site, input_edge_index, edges_to_resnap);
+
+        // In the case where the edge wrapped around the sphere the "wrong
+        // way", it is not safe to continue checking this edge.  It will be
+        // marked for resnapping and we will come back to it in the next pass.
         return;
       }
-    } else if (i > 0 && id >= num_forced_sites_) {
-      // Check whether this "site to avoid" is closer to the snapped edge than
-      // min_edge_vertex_separation().  Note that this is the only edge of the
-      // chain that can be too close because its vertices must span the point
-      // where "site_to_avoid" projects onto the input edge XY (this claim
-      // relies on the fact that all sites are separated by at least the snap
-      // radius).  We don't try to avoid sites added using ForceVertex()
-      // because we don't guarantee any minimum separation from such sites.
+    } else {
+      // This site is near the input edge but is not part of the snapped chain.
+      if (i == 0) {
+        continue;  // Sites before the start of the chain can be ignored.
+      }
+      // We need to ensure that non-forced sites are separated by at least
+      // min_edge_vertex_separation() from the snapped chain.  This happens
+      // automatically as part of the algorithm except where there are portions
+      // of the input edge that are not within edge_snap_radius() of any site.
+      // These portions of the original edge are called "coverage gaps".
+      // Therefore if we find that a site to avoid that is too close to the
+      // snapped edge chain, we can fix the problem by adding a new site (the
+      // "separation site") in the corresponding coverage gap located as closely
+      // as possible to the site to avoid.  This technique is is guaranteed to
+      // produce the required minimum separation, and the entire process of
+      // adding separation sites is guaranteed to terminate.
       const S2Point& site_to_avoid = sites_[id];
       const S2Point& v0 = sites_[chain[i - 1]];
       const S2Point& v1 = sites_[chain[i]];
-      if (s2pred::CompareEdgeDistance(
+      bool add_separation_site = false;
+      if (!is_forced(id) &&
+          min_edge_site_separation_ca_ > S1ChordAngle::Zero() &&
+          s2pred::CompareEdgeDistance(
               site_to_avoid, v0, v1, min_edge_site_separation_ca_) < 0) {
-        // A snapped edge can only approach a site too closely when there are
-        // no sites near the input edge near that point.  We fix that by
-        // adding a new site along the input edge (a "separation site"), then
-        // we find all the edges near the new site (including this one) and
+        add_separation_site = true;
+      }
+      // Similarly, we also add a separation site whenever a snapped edge passes
+      // on the wrong side of a site to avoid.  Normally we don't need to worry
+      // about this, since if an edge passes on the wrong side of a nearby site
+      // then it is also too close to it.  However if the snap radius is very
+      // small and intersection_tolerance() is non-zero then we need to check
+      // this condition explicitly (see the "check_all_site_crossings_" flag for
+      // details).  We also need to check this condition explicitly for forced
+      // vertices.  Again, we can solve this problem by adding a "separation
+      // site" in the corresponding coverage gap located as closely as possible
+      // to the site to avoid.
+      //
+      // It is possible to show that when all points are projected onto the
+      // great circle through (a0, a1), no improper crossing occurs unless the
+      // the site to avoid is located between a0 and a1, and also between v0
+      // and v1.  TODO(ericv): Verify whether all these checks are necessary.
+      if (!add_separation_site &&
+          (is_forced(id) || check_all_site_crossings_) &&
+          (s2pred::Sign(a0, a1, site_to_avoid) !=
+           s2pred::Sign(v0, v1, site_to_avoid))&&
+          s2pred::CompareEdgeDirections(a0, a1, a0, site_to_avoid) > 0 &&
+          s2pred::CompareEdgeDirections(a0, a1, site_to_avoid, a1) > 0 &&
+          s2pred::CompareEdgeDirections(a0, a1, v0, site_to_avoid) > 0 &&
+          s2pred::CompareEdgeDirections(a0, a1, site_to_avoid, v1) > 0) {
+        add_separation_site = true;
+      }
+      if (add_separation_site) {
+        // We add a new site (the separation site) in the coverage gap along the
+        // input edge, located as closely as possible to the site to avoid.
+        // Then we find all the edges near the new site (including this one) and
         // add them to the snap queue.
         S2Point new_site = GetSeparationSite(site_to_avoid, v0, v1, edge_id);
         S2_DCHECK_NE(site_to_avoid, new_site);
-        AddExtraSite(new_site, max_edge_id, input_edge_index, snap_queue);
-        return;
+        AddExtraSite(new_site, input_edge_index, edges_to_resnap);
+
+        // Skip the remaining sites near this chain edge, and then continue
+        // scanning this chain.  Note that this is safe even though the call
+        // to AddExtraSite() above added a new site to "nearby_sites".
+        for (; nearby_sites[j + 1] != chain[i]; ++j) {}
       }
     }
   }
 }
 
-// Adds a new site, then updates "edge_sites"_ for all edges near the new site
-// and adds them to "snap_queue" for resnapping (unless their edge id exceeds
-// "max_edge_id", since those edges have not been snapped the first time yet).
+// Adds a new site, then updates "edge_sites_" for all edges near the new site
+// and adds them to "edges_to_resnap" for resnapping.
 void S2Builder::AddExtraSite(const S2Point& new_site,
-                             InputEdgeId max_edge_id,
                              const MutableS2ShapeIndex& input_edge_index,
-                             vector<InputEdgeId>* snap_queue) {
+                             flat_hash_set<InputEdgeId>* edges_to_resnap) {
+  if (!sites_.empty()) S2_DCHECK_NE(new_site, sites_.back());
+  if (!tracker_.AddSpace(&sites_, 1)) return;
   SiteId new_site_id = sites_.size();
   sites_.push_back(new_site);
+
   // Find all edges whose distance is <= edge_site_query_radius_ca_.
   S2ClosestEdgeQuery::Options options;
   options.set_conservative_max_distance(edge_site_query_radius_ca_);
   options.set_include_interiors(false);
+
+  if (!input_edge_index.is_fresh()) input_edge_index.ForceBuild();
+  if (!tracker_.ok()) return;
+
+  // Memory used by S2ClosestEdgeQuery is not tracked, but it is temporary,
+  // typically insignificant, and does not affect the high water mark.
   S2ClosestEdgeQuery query(&input_edge_index, options);
   S2ClosestEdgeQuery::PointTarget target(new_site);
   for (const auto& result : query.FindClosestEdges(&target)) {
     InputEdgeId e = result.edge_id();
-    auto* site_ids = &edge_sites_[e];
-    site_ids->push_back(new_site_id);
-    SortSitesByDistance(input_vertices_[input_edges_[e].first], site_ids);
-    if (e <= max_edge_id) snap_queue->push_back(e);
+    const S2Point& v0 = input_vertices_[input_edges_[e].first];
+    InsertSiteByDistance(new_site_id, v0, &edge_sites_[e]);
+    edges_to_resnap->insert(e);
   }
 }
 
@@ -904,8 +1114,8 @@ S2Point S2Builder::GetSeparationSite(const S2Point& site_to_avoid,
   Vector3_d xy_dir = y - x;
   S2Point n = S2::RobustCrossProd(x, y);
   S2Point new_site = S2::Project(site_to_avoid, x, y, n);
-  S2Point gap_min = GetCoverageEndpoint(v0, x, y, n);
-  S2Point gap_max = GetCoverageEndpoint(v1, y, x, -n);
+  S2Point gap_min = GetCoverageEndpoint(v0, n);
+  S2Point gap_max = GetCoverageEndpoint(v1, -n);
   if ((new_site - gap_min).DotProd(xy_dir) < 0) {
     new_site = gap_min;
   } else if ((gap_max - new_site).DotProd(xy_dir) < 0) {
@@ -920,8 +1130,7 @@ S2Point S2Builder::GetSeparationSite(const S2Point& site_to_avoid,
 // Given a site P and an edge XY with normal N, intersect XY with the disc of
 // radius snap_radius() around P, and return the intersection point that is
 // further along the edge XY toward Y.
-S2Point S2Builder::GetCoverageEndpoint(const S2Point& p, const S2Point& x,
-                                       const S2Point& y, const S2Point& n)
+S2Point S2Builder::GetCoverageEndpoint(const S2Point& p, const S2Point& n)
     const {
   // Consider the plane perpendicular to P that cuts off a spherical cap of
   // radius snap_radius().  This plane intersects the plane through the edge
@@ -956,6 +1165,8 @@ void S2Builder::SnapEdge(InputEdgeId e, vector<SiteId>* chain) const {
   chain->clear();
   const InputEdge& edge = input_edges_[e];
   if (!snapping_needed_) {
+    // Note that the input vertices have been renumbered such that
+    // InputVertexId and SiteId are the same (see ChooseAllVerticesAsSites).
     chain->push_back(edge.first);
     chain->push_back(edge.second);
     return;
@@ -1041,53 +1252,66 @@ void S2Builder::SnapEdge(InputEdgeId e, vector<SiteId>* chain) const {
     }
   }
   if (s2builder_verbose) {
-    cpp_compat_cout << "(" << edge.first << "," << edge.second << "): ";
-    for (SiteId id : *chain) cpp_compat_cout << id << " ";
-    cpp_compat_cout << std::endl;
+    std::cout << "(" << edge.first << "," << edge.second << "): ";
+    for (SiteId id : *chain) std::cout << id << " ";
+    std::cout << std::endl;
   }
 }
 
 void S2Builder::BuildLayers() {
+  if (!tracker_.ok()) return;
+
   // Each output edge has an "input edge id set id" (an int32) representing
   // the set of input edge ids that were snapped to this edge.  The actual
   // InputEdgeIds can be retrieved using "input_edge_id_set_lexicon".
   vector<vector<Edge>> layer_edges;
   vector<vector<InputEdgeIdSetId>> layer_input_edge_ids;
   IdSetLexicon input_edge_id_set_lexicon;
+  vector<vector<S2Point>> layer_vertices;
   BuildLayerEdges(&layer_edges, &layer_input_edge_ids,
                   &input_edge_id_set_lexicon);
-
-  // At this point we have no further need for the input geometry or nearby
-  // site data, so we clear those fields to save space.
-  vector<S2Point>().swap(input_vertices_);
-  vector<InputEdge>().swap(input_edges_);
-  vector<compact_array<SiteId>>().swap(edge_sites_);
+  auto _ = absl::MakeCleanup([&]() {
+    for (size_t i = 0; i < layers_.size(); ++i) {
+      tracker_.Untally(layer_edges[i]);
+      tracker_.Untally(layer_input_edge_ids[i]);
+      if (!layer_vertices.empty()) tracker_.Untally(layer_vertices[i]);
+    }
+  });
 
   // If there are a large number of layers, then we build a minimal subset of
   // vertices for each layer.  This ensures that layer types that iterate over
   // vertices will run in time proportional to the size of that layer rather
   // than the size of all layers combined.
-  vector<vector<S2Point>> layer_vertices;
   static const int kMinLayersForVertexFiltering = 10;
   if (layers_.size() >= kMinLayersForVertexFiltering) {
     // Disable vertex filtering if it is disallowed by any layer.  (This could
     // be optimized, but in current applications either all layers allow
     // filtering or none of them do.)
-    bool allow_vertex_filtering = false;
+    bool allow_vertex_filtering = true;
     for (const auto& options : layer_options_) {
       allow_vertex_filtering &= options.allow_vertex_filtering();
     }
     if (allow_vertex_filtering) {
-      vector<Graph::VertexId> filter_tmp;  // Temporary used by FilterVertices.
+      // Track the temporary memory used by FilterVertices().  Note that
+      // although vertex filtering can increase the number of vertices stored
+      // (i.e., if the same vertex is referred to by multiple layers), it
+      // never increases storage quadratically because there can be at most
+      // two filtered vertices per edge.
+      if (!tracker_.TallyFilterVertices(sites_.size(), layer_edges)) return;
+      auto _ = absl::MakeCleanup([this]() { tracker_.DoneFilterVertices(); });
       layer_vertices.resize(layers_.size());
-      for (int i = 0; i < layers_.size(); ++i) {
+      vector<Graph::VertexId> filter_tmp;  // Temporary used by FilterVertices.
+      for (size_t i = 0; i < layers_.size(); ++i) {
         layer_vertices[i] = Graph::FilterVertices(sites_, &layer_edges[i],
                                                   &filter_tmp);
+        if (!tracker_.Tally(layer_vertices[i])) return;
       }
-      vector<S2Point>().swap(sites_);  // Release memory
+      tracker_.Clear(&sites_);  // Releases memory.
     }
   }
-  for (int i = 0; i < layers_.size(); ++i) {
+  if (!tracker_.ok()) return;
+
+  for (size_t i = 0; i < layers_.size(); ++i) {
     const vector<S2Point>& vertices = (layer_vertices.empty() ?
                                        sites_ : layer_vertices[i]);
     Graph graph(layer_options_[i], &vertices, &layer_edges[i],
@@ -1106,7 +1330,7 @@ static void DumpEdges(const vector<S2Builder::Graph::Edge>& edges,
     vector<S2Point> v;
     v.push_back(vertices[e.first]);
     v.push_back(vertices[e.second]);
-    cpp_compat_cout << "S2Polyline: " << s2textformat::ToString(v)
+    std::cout << "S2Polyline: " << s2textformat::ToString(v)
               << "(" << e.first << "," << e.second << ")" << std::endl;
   }
 }
@@ -1123,31 +1347,41 @@ void S2Builder::BuildLayerEdges(
     IdSetLexicon* input_edge_id_set_lexicon) {
   // Edge chains are simplified only when a non-zero snap radius is specified.
   // If so, we build a map from each site to the set of input vertices that
-  // snapped to that site.
+  // snapped to that site.  (Note that site_vertices is relatively small and
+  // that its memory tracking is deferred until TallySimplifyEdgeChains.)
   vector<compact_array<InputVertexId>> site_vertices;
   bool simplify = snapping_needed_ && options_.simplify_edge_chains();
   if (simplify) site_vertices.resize(sites_.size());
 
   layer_edges->resize(layers_.size());
   layer_input_edge_ids->resize(layers_.size());
-  for (int i = 0; i < layers_.size(); ++i) {
+  for (size_t i = 0; i < layers_.size(); ++i) {
     AddSnappedEdges(layer_begins_[i], layer_begins_[i+1], layer_options_[i],
                     &(*layer_edges)[i], &(*layer_input_edge_ids)[i],
                     input_edge_id_set_lexicon, &site_vertices);
   }
+
+  // We simplify edge chains before processing the per-layer GraphOptions
+  // because simplification can create duplicate edges and/or sibling edge
+  // pairs which may need to be removed.
   if (simplify) {
     SimplifyEdgeChains(site_vertices, layer_edges, layer_input_edge_ids,
                        input_edge_id_set_lexicon);
+    vector<compact_array<InputVertexId>>().swap(site_vertices);
   }
-  // We simplify edge chains before processing the per-layer GraphOptions
-  // because simplification can create duplicate edges and/or sibling edge
-  // pairs which may need to be removed.
-  for (int i = 0; i < layers_.size(); ++i) {
+
+  // At this point we have no further need for nearby site data, so we clear
+  // it to save space.  We keep input_vertices_ and input_edges_ so that
+  // S2Builder::Layer implementations can access them if desired.  (This is
+  // useful for determining how snapping has changed the input geometry.)
+  tracker_.ClearEdgeSites(&edge_sites_);
+  for (size_t i = 0; i < layers_.size(); ++i) {
     // The errors generated by ProcessEdges are really warnings, so we simply
     // record them and continue.
     Graph::ProcessEdges(&layer_options_[i], &(*layer_edges)[i],
                         &(*layer_input_edge_ids)[i],
-                        input_edge_id_set_lexicon, error_);
+                        input_edge_id_set_lexicon, error_, &tracker_);
+    if (!tracker_.ok()) return;
   }
 }
 
@@ -1159,13 +1393,21 @@ void S2Builder::AddSnappedEdges(
     InputEdgeId begin, InputEdgeId end, const GraphOptions& options,
     vector<Edge>* edges, vector<InputEdgeIdSetId>* input_edge_ids,
     IdSetLexicon* input_edge_id_set_lexicon,
-    vector<compact_array<InputVertexId>>* site_vertices) const {
+    vector<compact_array<InputVertexId>>* site_vertices) {
   bool discard_degenerate_edges = (options.degenerate_edges() ==
                                    GraphOptions::DegenerateEdges::DISCARD);
   vector<SiteId> chain;
   for (InputEdgeId e = begin; e < end; ++e) {
     InputEdgeIdSetId id = input_edge_id_set_lexicon->AddSingleton(e);
     SnapEdge(e, &chain);
+    if (chain.empty()) {
+      continue;
+    }
+
+    int num_snapped_edges = max<int>(1, chain.size() - 1);
+    if (options.edge_type() == EdgeType::UNDIRECTED) num_snapped_edges *= 2;
+    if (!tracker_.AddSpace(edges, num_snapped_edges)) return;
+    if (!tracker_.AddSpace(input_edge_ids, num_snapped_edges)) return;
     MaybeAddInputVertex(input_edges_[e].first, chain[0], site_vertices);
     if (chain.size() == 1) {
       if (discard_degenerate_edges) continue;
@@ -1173,7 +1415,7 @@ void S2Builder::AddSnappedEdges(
                      edges, input_edge_ids);
     } else {
       MaybeAddInputVertex(input_edges_[e].second, chain.back(), site_vertices);
-      for (int i = 1; i < chain.size(); ++i) {
+      for (size_t i = 1; i < chain.size(); ++i) {
         AddSnappedEdge(chain[i-1], chain[i], id, options.edge_type(),
                        edges, input_edge_ids);
       }
@@ -1183,7 +1425,9 @@ void S2Builder::AddSnappedEdges(
 }
 
 // If "site_vertices" is non-empty, ensures that (*site_vertices)[id] contains
-// "v".  Duplicate entries are allowed.
+// "v".  Duplicate entries are allowed.  The purpose of this function is to
+// build a map so that SimplifyEdgeChains() can quickly find all the input
+// vertices that snapped to a particular site.
 inline void S2Builder::MaybeAddInputVertex(
     InputVertexId v, SiteId id,
     vector<compact_array<InputVertexId>>* site_vertices) const {
@@ -1194,6 +1438,7 @@ inline void S2Builder::MaybeAddInputVertex(
   // destination of one edge is the same as the source of the next edge.
   auto& vertices = (*site_vertices)[id];
   if (vertices.empty() || vertices.back() != v) {
+    // Memory tracking is deferred until SimplifyEdgeChains.
     vertices.push_back(v);
   }
 }
@@ -1236,6 +1481,7 @@ class S2Builder::EdgeChainSimplifier {
   using VertexId = Graph::VertexId;
 
   class InteriorVertexMatcher;
+
   void OutputEdge(EdgeId e);
   int graph_edge_layer(EdgeId e) const;
   int input_edge_layer(InputEdgeId id) const;
@@ -1245,24 +1491,26 @@ class S2Builder::EdgeChainSimplifier {
   void OutputAllEdges(VertexId v0, VertexId v1);
   bool TargetInputVertices(VertexId v, S2PolylineSimplifier* simplifier) const;
   bool AvoidSites(VertexId v0, VertexId v1, VertexId v2,
+                  flat_hash_set<VertexId>* used_vertices,
                   S2PolylineSimplifier* simplifier) const;
   void MergeChain(const vector<VertexId>& vertices);
   void AssignDegenerateEdges(
       const vector<InputEdgeId>& degenerate_ids,
-      vector<vector<InputEdgeId>>* merged_input_ids) const;
+      vector<vector<InputEdgeId>>* merged_ids) const;
 
+  // LINT.IfChange
   const S2Builder& builder_;
   const Graph& g_;
   Graph::VertexInMap in_;
   Graph::VertexOutMap out_;
-  vector<int> edge_layers_;
+  const vector<int>& edge_layers_;
   const vector<compact_array<InputVertexId>>& site_vertices_;
   vector<vector<Edge>>* layer_edges_;
   vector<vector<InputEdgeIdSetId>>* layer_input_edge_ids_;
   IdSetLexicon* input_edge_id_set_lexicon_;
 
   // Convenience member copied from builder_.
-  const std::vector<InputEdgeId>& layer_begins_;
+  const vector<InputEdgeId>& layer_begins_;
 
   // is_interior_[v] indicates that VertexId "v" is eligible to be an interior
   // vertex of a simplified edge chain.  You can think of it as vertex whose
@@ -1273,9 +1521,10 @@ class S2Builder::EdgeChainSimplifier {
   // used_[e] indicates that EdgeId "e" has already been processed.
   vector<bool> used_;
 
-  // Temporary vectors, declared here to avoid repeated allocation.
+  // Temporary objects declared here to avoid repeated allocation.
   vector<VertexId> tmp_vertices_;
   vector<EdgeId> tmp_edges_;
+  flat_hash_set<VertexId> tmp_vertex_set_;
 
   // The output edges after simplification.
   vector<Edge> new_edges_;
@@ -1288,8 +1537,9 @@ void S2Builder::SimplifyEdgeChains(
     const vector<compact_array<InputVertexId>>& site_vertices,
     vector<vector<Edge>>* layer_edges,
     vector<vector<InputEdgeIdSetId>>* layer_input_edge_ids,
-    IdSetLexicon* input_edge_id_set_lexicon) const {
+    IdSetLexicon* input_edge_id_set_lexicon) {
   if (layers_.empty()) return;
+  if (!tracker_.TallySimplifyEdgeChains(site_vertices, *layer_edges)) return;
 
   // Merge the edges from all layers (in order to build a single graph).
   vector<Edge> merged_edges;
@@ -1316,6 +1566,7 @@ void S2Builder::SimplifyEdgeChains(
       layer_edges, layer_input_edge_ids, input_edge_id_set_lexicon);
   simplifier.Run();
 }
+// LINT.ThenChange(:TallySimplifyEdgeChains)
 
 // Merges the edges from all layers and sorts them in lexicographic order so
 // that we can construct a single graph.  The sort is stable, which means that
@@ -1326,8 +1577,8 @@ void S2Builder::MergeLayerEdges(
     vector<Edge>* edges, vector<InputEdgeIdSetId>* input_edge_ids,
     vector<int>* edge_layers) const {
   vector<LayerEdgeId> order;
-  for (int i = 0; i < layer_edges.size(); ++i) {
-    for (int e = 0; e < layer_edges[i].size(); ++e) {
+  for (size_t i = 0; i < layer_edges.size(); ++i) {
+    for (size_t e = 0; e < layer_edges[i].size(); ++e) {
       order.push_back(LayerEdgeId(i, e));
     }
   }
@@ -1353,7 +1604,7 @@ inline bool S2Builder::StableLessThan(
     const Edge& a, const Edge& b,
     const LayerEdgeId& ai, const LayerEdgeId& bi) {
   // The compiler doesn't optimize this as well as it should:
-  //   return make_pair(a, ai) < make_pair(b, bi);
+  //   return std::make_pair(a, ai) < std::make_pair(b, bi);
   if (a.first < b.first) return true;
   if (b.first < a.first) return false;
   if (a.second < b.second) return true;
@@ -1372,7 +1623,9 @@ S2Builder::EdgeChainSimplifier::EdgeChainSimplifier(
       layer_input_edge_ids_(layer_input_edge_ids),
       input_edge_id_set_lexicon_(input_edge_id_set_lexicon),
       layer_begins_(builder_.layer_begins_),
-      is_interior_(g.num_vertices()), used_(g.num_edges()) {
+      is_interior_(g.num_vertices()), used_(g.num_edges()),
+      // See `AddExtraSites` for explanation of `bucket_count`.
+      tmp_vertex_set_(/*bucket_count=*/18) {
   new_edges_.reserve(g.num_edges());
   new_input_edge_ids_.reserve(g.num_edges());
   new_edge_layers_.reserve(g.num_edges());
@@ -1419,10 +1672,12 @@ void S2Builder::EdgeChainSimplifier::Run() {
       SimplifyChain(edge.first, edge.second);
     }
   }
+  // TODO(ericv): The graph is not needed past here, so we could save some
+  // memory by clearing the underlying Edge and InputEdgeIdSetId vectors.
 
   // Finally, copy the output edges into the appropriate layers.  They don't
   // need to be sorted because the input edges were also unsorted.
-  for (int e = 0; e < new_edges_.size(); ++e) {
+  for (size_t e = 0; e < new_edges_.size(); ++e) {
     int layer = new_edge_layers_[e];
     (*layer_edges_)[layer].push_back(new_edges_[e]);
     (*layer_input_edge_ids_)[layer].push_back(new_input_edge_ids_[e]);
@@ -1527,7 +1782,7 @@ bool S2Builder::EdgeChainSimplifier::IsInterior(VertexId v) {
   // Check a few simple prerequisites.
   if (out_.degree(v) == 0) return false;
   if (out_.degree(v) != in_.degree(v)) return false;
-  if (v < builder_.num_forced_sites_) return false;  // Keep forced vertices.
+  if (builder_.is_forced(v)) return false;  // Keep forced vertices.
 
   // Sort the edges so that they are grouped by layer.
   vector<EdgeId>& edges = tmp_edges_;  // Avoid allocating each time.
@@ -1558,16 +1813,25 @@ bool S2Builder::EdgeChainSimplifier::IsInterior(VertexId v) {
 void S2Builder::EdgeChainSimplifier::SimplifyChain(VertexId v0, VertexId v1) {
   // Avoid allocating "chain" each time by reusing it.
   vector<VertexId>& chain = tmp_vertices_;
+  // Contains the set of vertices that have either been avoided or added to
+  // the chain so far.  This is necessary so that AvoidSites() doesn't try to
+  // avoid vertices that have already been added to the chain.
+  flat_hash_set<VertexId>& used_vertices = tmp_vertex_set_;
   S2PolylineSimplifier simplifier;
   VertexId vstart = v0;
   bool done = false;
   do {
-    // Simplify a subchain of edges starting (v0, v1).
-    simplifier.Init(g_.vertex(v0));
-    AvoidSites(v0, v0, v1, &simplifier);
+    // Simplify a subchain of edges starting with (v0, v1).
     chain.push_back(v0);
+    used_vertices.insert(v0);
+    simplifier.Init(g_.vertex(v0));
+    // Note that if the first edge (v0, v1) is longer than the maximum length
+    // allowed for simplification, then AvoidSites() will return false and we
+    // exit the loop below after the first iteration.
+    const bool simplify = AvoidSites(v0, v0, v1, &used_vertices, &simplifier);
     do {
       chain.push_back(v1);
+      used_vertices.insert(v1);
       done = !is_interior_[v1] || v1 == vstart;
       if (done) break;
 
@@ -1575,8 +1839,8 @@ void S2Builder::EdgeChainSimplifier::SimplifyChain(VertexId v0, VertexId v1) {
       VertexId vprev = v0;
       v0 = v1;
       v1 = FollowChain(vprev, v0);
-    } while (TargetInputVertices(v0, &simplifier) &&
-             AvoidSites(chain[0], v0, v1, &simplifier) &&
+    } while (simplify && TargetInputVertices(v0, &simplifier) &&
+             AvoidSites(chain[0], v0, v1, &used_vertices, &simplifier) &&
              simplifier.Extend(g_.vertex(v1)));
 
     if (chain.size() == 2) {
@@ -1587,6 +1851,7 @@ void S2Builder::EdgeChainSimplifier::SimplifyChain(VertexId v0, VertexId v1) {
     // Note that any degenerate edges that were not merged into a chain are
     // output by EdgeChainSimplifier::Run().
     chain.clear();
+    used_vertices.clear();
   } while (!done);
 }
 
@@ -1626,6 +1891,7 @@ bool S2Builder::EdgeChainSimplifier::TargetInputVertices(
 // near the edge (v1, v2) are avoided by at least min_edge_vertex_separation.
 bool S2Builder::EdgeChainSimplifier::AvoidSites(
     VertexId v0, VertexId v1, VertexId v2,
+    flat_hash_set<VertexId>* used_vertices,
     S2PolylineSimplifier* simplifier) const {
   const S2Point& p0 = g_.vertex(v0);
   const S2Point& p1 = g_.vertex(v1);
@@ -1669,15 +1935,16 @@ bool S2Builder::EdgeChainSimplifier::AvoidSites(
   S2_DCHECK_GE(best, 0);  // Because there is at least one outgoing edge.
 
   for (VertexId v : edge_sites[best]) {
-    // This test is optional since these sites are excluded below anyway.
-    if (v == v0 || v == v1 || v == v2) continue;
-
-    // We are only interested in sites whose distance from "p0" is in the
-    // range (r1, r2).  Sites closer than "r1" have already been processed,
-    // and sites further than "r2" aren't relevant yet.
+    // Sites whose distance from "p0" is at least "r2" are not relevant yet.
     const S2Point& p = g_.vertex(v);
     S1ChordAngle r(p0, p);
-    if (r <= r1 || r >= r2) continue;
+    if (r >= r2) continue;
+
+    // The following test prevents us from avoiding previous vertices of the
+    // edge chain that also happen to be nearby the current edge.  (It also
+    // happens to ensure that each vertex is avoided at most once, but this is
+    // just an optimization.)
+    if (!used_vertices->insert(v).second) continue;
 
     // We need to figure out whether this site is to the left or right of the
     // edge chain.  For the first edge this is easy.  Otherwise, since we are
@@ -1706,7 +1973,7 @@ void S2Builder::EdgeChainSimplifier::MergeChain(
   vector<vector<InputEdgeId>> merged_input_ids;
   vector<InputEdgeId> degenerate_ids;
   int num_out;  // Edge count in the outgoing direction.
-  for (int i = 1; i < vertices.size(); ++i) {
+  for (size_t i = 1; i < vertices.size(); ++i) {
     VertexId v0 = vertices[i-1];
     VertexId v1 = vertices[i];
     auto out_edges = out_.edge_ids(v0, v1);
@@ -1802,7 +2069,7 @@ void S2Builder::EdgeChainSimplifier::AssignDegenerateEdges(
   // such edges from the lists of candidates.
   vector<unsigned> order;
   order.reserve(merged_ids->size());
-  for (int i = 0; i < merged_ids->size(); ++i) {
+  for (size_t i = 0; i < merged_ids->size(); ++i) {
     if (!(*merged_ids)[i].empty()) order.push_back(i);
   }
   std::sort(order.begin(), order.end(), [&merged_ids](int i, int j) {
@@ -1827,3 +2094,138 @@ void S2Builder::EdgeChainSimplifier::AssignDegenerateEdges(
     (*merged_ids)[it[0]].push_back(degenerate_id);
   }
 }
+
+
+/////////////////////// S2Builder::MemoryTracker /////////////////////////
+
+
+// Called to track memory used to store the set of sites near a given edge.
+bool S2Builder::MemoryTracker::TallyEdgeSites(
+    const compact_array<SiteId>& sites) {
+  int64 size = GetCompactArrayAllocBytes(sites);
+  edge_sites_bytes_ += size;
+  return Tally(size);
+}
+
+// Ensures that "sites" contains space for at least one more edge site.
+bool S2Builder::MemoryTracker::ReserveEdgeSite(compact_array<SiteId>* sites) {
+  int64 new_size = sites->size() + 1;
+  if (new_size <= sites->capacity()) return true;
+  int64 old_bytes = GetCompactArrayAllocBytes(*sites);
+  sites->reserve(new_size);
+  int64 added_bytes = GetCompactArrayAllocBytes(*sites) - old_bytes;
+  edge_sites_bytes_ += added_bytes;
+  return Tally(added_bytes);
+}
+
+// Releases and tracks the memory used to store nearby edge sites.
+bool S2Builder::MemoryTracker::ClearEdgeSites(
+    vector<compact_array<SiteId>>* edge_sites) {
+  Tally(-edge_sites_bytes_);
+  edge_sites_bytes_ = 0;
+  return Clear(edge_sites);
+}
+
+// Called when a site is added to the S2PointIndex.
+bool S2Builder::MemoryTracker::TallyIndexedSite() {
+  // S2PointIndex stores its data in a btree.  In general btree nodes are only
+  // guaranteed to be half full, but in our case all nodes are full except for
+  // the rightmost node at each btree level because the values are added in
+  // sorted order.
+  int64 delta_bytes = GetBtreeMinBytesPerEntry<
+      absl::btree_multimap<S2CellId, S2PointIndex<SiteId>::PointData>>();
+  site_index_bytes_ += delta_bytes;
+  return Tally(delta_bytes);
+}
+
+// Corrects the approximate S2PointIndex memory tracking done above.
+bool S2Builder::MemoryTracker::FixSiteIndexTally(
+    const S2PointIndex<SiteId>& index) {
+  int64 delta_bytes = index.SpaceUsed() - site_index_bytes_;
+  site_index_bytes_ += delta_bytes;
+  return Tally(delta_bytes);
+}
+
+// Tracks memory due to destroying the site index.
+bool S2Builder::MemoryTracker::DoneSiteIndex(
+    const S2PointIndex<SiteId>& index) {
+  Tally(-site_index_bytes_);
+  site_index_bytes_ = 0;
+  return ok();
+}
+
+// Called to indicate that edge simplification was requested.
+// LINT.IfChange(TallySimplifyEdgeChains)
+bool S2Builder::MemoryTracker::TallySimplifyEdgeChains(
+    const vector<compact_array<InputVertexId>>& site_vertices,
+    const vector<vector<Edge>>& layer_edges) {
+  if (!is_active()) return true;
+
+  // The simplify_edge_chains() option uses temporary memory per site
+  // (output vertex) and per output edge, as outlined below.
+  //
+  // Per site:
+  //  vector<compact_array<InputVertexId>> site_vertices;  // BuildLayerEdges
+  //   - compact_array non-inlined space is tallied separately
+  //  vector<bool> is_interior_;  // EdgeChainSimplifier
+  //  Graph::VertexInMap in_;     // EdgeChainSimplifier
+  //  Graph::VertexOutMap out_;   // EdgeChainSimplifier
+  const int64 kTempPerSite =
+      sizeof(compact_array<InputVertexId>) + sizeof(bool) + 2 * sizeof(EdgeId);
+
+  // Per output edge:
+  //  vector<bool> used_;                              // EdgeChainSimplifier
+  //  Graph::VertexInMap in_;                          // EdgeChainSimplifier
+  //  vector<Edge> merged_edges;                       // SimplifyEdgeChains
+  //  vector<InputEdgeIdSetId> merged_input_edge_ids;  // SimplifyEdgeChains
+  //  vector<int> merged_edge_layers;                  // SimplifyEdgeChains
+  //  vector<Edge> new_edges_;                         // EdgeChainSimplifier
+  //  vector<InputEdgeIdSetId> new_input_edge_ids_;    // EdgeChainSimplifier
+  //  vector<int> new_edge_layers_;                    // EdgeChainSimplifier
+  //
+  // Note that the temporary vector<LayerEdgeId> in MergeLayerEdges() does not
+  // affect peak usage.
+  const int64 kTempPerEdge = sizeof(bool) + sizeof(EdgeId) +
+                               2 * sizeof(Edge) + 2 * sizeof(InputEdgeIdSetId) +
+                               2 * sizeof(int);
+  int64 simplify_bytes = site_vertices.size() * kTempPerSite;
+  for (const auto& array : site_vertices) {
+    simplify_bytes += GetCompactArrayAllocBytes(array);
+  }
+  for (const auto& edges : layer_edges) {
+    simplify_bytes += edges.size() * kTempPerEdge;
+  }
+  return TallyTemp(simplify_bytes);
+}
+// LINT.ThenChange()
+
+// Tracks the temporary memory used by Graph::FilterVertices.
+// LINT.IfChange(TallyFilterVertices)
+bool S2Builder::MemoryTracker::TallyFilterVertices(
+    int num_sites, const vector<vector<Edge>>& layer_edges) {
+  if (!is_active()) return true;
+
+  // Vertex filtering (see BuildLayers) uses temporary space of one VertexId
+  // per Voronoi site plus 2 VertexIds per layer edge, plus space for all the
+  // vertices after filtering.
+  //
+  //  vector<VertexId> *tmp;      // Graph::FilterVertices
+  //  vector<VertexId> used;      // Graph::FilterVertices
+  const int64 kTempPerSite = sizeof(Graph::VertexId);
+  const int64 kTempPerEdge = 2 * sizeof(Graph::VertexId);
+
+  size_t max_layer_edges = 0;
+  for (const auto& edges : layer_edges) {
+    max_layer_edges = max(max_layer_edges, edges.size());
+  }
+  filter_vertices_bytes_ = (num_sites * kTempPerSite +
+                            max_layer_edges * kTempPerEdge);
+  return Tally(filter_vertices_bytes_);
+}
+// LINT.ThenChange()
+
+bool S2Builder::MemoryTracker::DoneFilterVertices() {
+  Tally(-filter_vertices_bytes_);
+  filter_vertices_bytes_ = 0;
+  return ok();
+}
diff --git a/src/s2/s2builder.h b/src/s2/s2builder.h
index 32bf0e0c..86648cf4 100644
--- a/src/s2/s2builder.h
+++ b/src/s2/s2builder.h
@@ -21,19 +21,30 @@
 #ifndef S2_S2BUILDER_H_
 #define S2_S2BUILDER_H_
 
+#include <algorithm>
+#include <functional>
 #include <memory>
 #include <utility>
 #include <vector>
-#include "s2/base/integral_types.h"
+
 #include "absl/base/macros.h"
+#include "absl/container/flat_hash_set.h"
+
+#include "s2/base/integral_types.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/id_set_lexicon.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2edge_crossings.h"
+#include "s2/s2edge_distances.h"
 #include "s2/s2error.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2point.h"
 #include "s2/s2point_index.h"
+#include "s2/s2point_span.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 #include "s2/util/gtl/compact_array.h"
 
@@ -124,7 +135,7 @@ class S2Polyline;
 //  using s2builderutil::IntLatLngSnapFunction;
 //  S2Builder builder(S2Builder::Options(IntLatLngSnapFunction(7)));
 //  S2Polygon output;
-//  builder.StartLayer(absl::make_unique<s2builderutil::S2PolygonLayer>(&output));
+//  builder.StartLayer(std::make_unique<s2builderutil::S2PolygonLayer>(&output));
 //  builder.AddPolygon(input);
 //  S2Error error;
 //  if (!builder.Build(&error)) {
@@ -155,7 +166,7 @@ class S2Builder {
   // holes, the outer loops ("shells") should be directed counter-clockwise
   // while the inner loops ("holes") should be directed clockwise.  Note that
   // S2Builder::AddPolygon() follows this convention automatically.
-  enum class EdgeType { DIRECTED, UNDIRECTED };
+  enum class EdgeType : uint8 { DIRECTED, UNDIRECTED };
 
   // A SnapFunction restricts the locations of the output vertices.  For
   // example, there are predefined snap functions that require vertices to be
@@ -172,19 +183,19 @@ class S2Builder {
   //    snapped.  The snap_radius must be at least as large as the maximum
   //    distance between P and SnapPoint(P) for any point P.
   //
-  // 3. "max_edge_deviation", the maximum distance that edges can move when
-  //    snapped.  It is slightly larger than "snap_radius" because when a
-  //    geodesic edge is snapped, the center of the edge moves further than
-  //    its endpoints.  This value is computed automatically by S2Builder.
+  //    Note that the maximum distance that edge interiors can move when
+  //    snapped is slightly larger than "snap_radius", and is returned by the
+  //    function S2Builder::Options::max_edge_deviation() (see there for
+  //    details).
   //
-  // 4. "min_vertex_separation", the guaranteed minimum distance between
+  // 3. "min_vertex_separation", the guaranteed minimum distance between
   //    vertices in the output.  This is generally a fraction of
   //    "snap_radius" where the fraction depends on the snap function.
   //
-  // 5. A "min_edge_vertex_separation", the guaranteed minimum distance
-  //    between edges and non-incident vertices in the output.  This is
-  //    generally a fraction of "snap_radius" where the fraction depends on
-  //    the snap function.
+  // 4. "min_edge_vertex_separation", the guaranteed minimum distance between
+  //    edges and non-incident vertices in the output.  This is generally a
+  //    fraction of "snap_radius" where the fraction depends on the snap
+  //    function.
   //
   // It is important to note that SnapPoint() does not define the actual
   // mapping from input vertices to output vertices, since the points it
@@ -219,14 +230,15 @@ class S2Builder {
   //    crosses an edge.
   class SnapFunction {
    public:
-    virtual ~SnapFunction() {}
+    virtual ~SnapFunction() = default;
 
-    // The maximum distance that vertices can move when snapped.
+    // The maximum distance that vertices can move when snapped.  The snap
+    // radius can be any value between zero and SnapFunction::kMaxSnapRadius().
     //
     // If the snap radius is zero, then vertices are snapped together only if
     // they are identical.  Edges will not be snapped to any vertices other
     // than their endpoints, even if there are vertices whose distance to the
-    // edge is zero, unless split_crossing_edges() is true.
+    // edge is zero, unless split_crossing_edges() is true (see below).
     //
     // REQUIRES: snap_radius() <= kMaxSnapRadius
     virtual S1Angle snap_radius() const = 0;
@@ -234,11 +246,6 @@ class S2Builder {
     // The maximum supported snap radius (equivalent to about 7800km).
     static S1Angle kMaxSnapRadius();
 
-    // The maximum distance that the center of an edge can move when snapped.
-    // This is slightly larger than "snap_radius" because when a geodesic edge
-    // is snapped, the center of the edge moves further than its endpoints.
-    S1Angle max_edge_deviation() const;
-
     // The guaranteed minimum distance between vertices in the output.
     // This is generally some fraction of "snap_radius".
     virtual S1Angle min_vertex_separation() const = 0;
@@ -281,13 +288,30 @@ class S2Builder {
     const SnapFunction& snap_function() const;
     void set_snap_function(const SnapFunction& snap_function);
 
+    // The maximum distance from snapped edge vertices to the original edge.
+    // This is the same as snap_function().snap_radius() except when
+    // split_crossing_edges() is true (see below), in which case the edge snap
+    // radius is increased by S2::kIntersectionError.
+    S1Angle edge_snap_radius() const;
+
+    // The maximum distance that any point along an edge can move when snapped.
+    // It is slightly larger than edge_snap_radius() because when a geodesic
+    // edge is snapped, the edge center moves further than its endpoints.
+    // S2Builder ensures that this distance is at most 10% larger than
+    // edge_snap_radius().
+    S1Angle max_edge_deviation() const;
+
     // If true, then detect all pairs of crossing edges and eliminate them by
-    // adding a new vertex at their intersection point.
+    // adding a new vertex at their intersection point.  See also the
+    // AddIntersection() method which allows intersection points to be added
+    // selectively.
     //
-    // When this option is true, the effective snap_radius() for edges is
-    // increased by S2::kIntersectionError to take into account the
-    // additional error when computing intersection points.  In other words,
-    // edges may move by up to snap_radius() + S2::kIntersectionError.
+    // When this option if true, intersection_tolerance() is automatically set
+    // to a minimum of S2::kIntersectionError (see intersection_tolerance()
+    // for why this is necessary).  Note that this means that edges can move
+    // by up to S2::kIntersectionError even when the specified snap radius is
+    // zero.  The exact distance that edges can move is always given by
+    // max_edge_deviation() defined above.
     //
     // Undirected edges should always be used when the output is a polygon,
     // since splitting a directed loop at a self-intersection converts it into
@@ -300,12 +324,59 @@ class S2Builder {
     // projection.  You can minimize this problem by subdividing the input
     // edges so that the S2 edges (which are geodesics) stay close to the
     // original projected edges (which are curves on the sphere).  This can
-    // be done using s2builderutil::EdgeSplitter(), for example.
+    // be done using S2EdgeTessellator, for example.
     //
     // DEFAULT: false
     bool split_crossing_edges() const;
     void set_split_crossing_edges(bool split_crossing_edges);
 
+    // Specifes the maximum allowable distance between a vertex added by
+    // AddIntersection() and the edge(s) that it is intended to snap to.  This
+    // method must be called before AddIntersection() can be used.  It has the
+    // effect of increasing the snap radius for edges (but not vertices) by
+    // the given distance.
+    //
+    // The intersection tolerance should be set to the maximum error in the
+    // intersection calculation used.  For example, if S2::GetIntersection()
+    // is used then the error should be set to S2::kIntersectionError.  If
+    // S2::GetPointOnLine() is used then the error should be set to
+    // S2::kGetPointOnLineError.  If S2::Project() is used then the error
+    // should be set to S2::kProjectPerpendicularError.  If more than one
+    // method is used then the intersection tolerance should be set to the
+    // maximum such error.
+    //
+    // The reason this option is necessary is that computed intersection
+    // points are not exact.  For example, S2::GetIntersection(a, b, c, d)
+    // returns a point up to S2::kIntersectionError away from the true
+    // mathematical intersection of the edges AB and CD.  Furthermore such
+    // intersection points are subject to further snapping in order to ensure
+    // that no pair of vertices is closer than the specified snap radius.  For
+    // example, suppose the computed intersection point X of edges AB and CD
+    // is 1 nanonmeter away from both edges, and the snap radius is 1 meter.
+    // In that case X might snap to another vertex Y exactly 1 meter away,
+    // which would leave us with a vertex Y that could be up to 1.000000001
+    // meters from the edges AB and/or CD.  This means that AB and/or CD might
+    // not snap to Y leaving us with two edges that still cross each other.
+    //
+    // However if the intersection tolerance is set to 1 nanometer then the
+    // snap radius for edges is increased to 1.000000001 meters ensuring that
+    // both edges snap to a common vertex even in this worst case.  (Tthis
+    // technique does not work if the vertex snap radius is increased as well;
+    // it requires edges and vertices to be handled differently.)
+    //
+    // Note that this option allows edges to move by up to the given
+    // intersection tolerance even when the snap radius is zero.  The exact
+    // distance that edges can move is always given by max_edge_deviation()
+    // defined above.
+    //
+    // When split_crossing_edges() is true, the intersection tolerance is
+    // automatically set to a minimum of S2::kIntersectionError.  A larger
+    // value can be specified by calling this method explicitly.
+    //
+    // DEFAULT: S1Angle::Zero()
+    S1Angle intersection_tolerance() const;
+    void set_intersection_tolerance(S1Angle intersection_tolerance);
+
     // If true, then simplify the output geometry by replacing nearly straight
     // chains of short edges with a single long edge.
     //
@@ -369,6 +440,35 @@ class S2Builder {
     bool idempotent() const;
     void set_idempotent(bool idempotent);
 
+    // Specifies that internal memory usage should be tracked using the given
+    // S2MemoryTracker.  If a memory limit is specified and more more memory
+    // than this is required then an error will be returned.  Example usage:
+    //
+    //   S2MemoryTracker tracker;
+    //   tracker.set_limit(500 << 20);  // 500 MB
+    //   S2Builder::Options options;
+    //   options.set_memory_tracker(&tracker);
+    //   S2Builder builder{options};
+    //   ...
+    //   S2Error error;
+    //   if (!builder.Build(&error)) {
+    //     if (error.code() == S2Error::RESOURCE_EXHAUSTED) {
+    //       S2_LOG(ERROR) << error;  // Memory limit exceeded
+    //     }
+    //   }
+    //
+    // CAVEATS:
+    //
+    //  - Memory allocated by the output S2Builder layers is not tracked.
+    //
+    //  - While memory tracking is reasonably complete and accurate, it does
+    //    not account for every last byte.  It is intended only for the
+    //    purpose of preventing clients from running out of memory.
+    //
+    // DEFAULT: nullptr (memory tracking disabled)
+    S2MemoryTracker* memory_tracker() const;
+    void set_memory_tracker(S2MemoryTracker* tracker);
+
     // Options may be assigned and copied.
     Options(const Options& options);
     Options& operator=(const Options& options);
@@ -376,13 +476,15 @@ class S2Builder {
    private:
     std::unique_ptr<SnapFunction> snap_function_;
     bool split_crossing_edges_ = false;
+    S1Angle intersection_tolerance_ = S1Angle::Zero();
     bool simplify_edge_chains_ = false;
     bool idempotent_ = true;
+    S2MemoryTracker* memory_tracker_ = nullptr;
   };
 
+  class Graph;
   // The following classes are only needed by Layer implementations.
   class GraphOptions;
-  class Graph;
 
   // For output layers that represent polygons, there is an ambiguity inherent
   // in spherical geometry that does not exist in planar geometry.  Namely, if
@@ -445,6 +547,7 @@ class S2Builder {
   // S2Error error;
   // S2_CHECK(builder.Build(&error)) << error;  // Builds "line1" & "line2"
   class Layer;
+
   void StartLayer(std::unique_ptr<Layer> layer);
 
   // Adds a degenerate edge (representing a point) to the current layer.
@@ -453,25 +556,56 @@ class S2Builder {
   // Adds the given edge to the current layer.
   void AddEdge(const S2Point& v0, const S2Point& v1);
 
-  // Adds the edges in the given polyline.  (Note that if the polyline
-  // consists of 0 or 1 vertices, this method does nothing.)
+  // Adds the edges in the given polyline to the current layer.  Note that
+  // polylines with 0 or 1 vertices are defined to have no edges.
+  void AddPolyline(S2PointSpan polyline);
   void AddPolyline(const S2Polyline& polyline);
 
-  // Adds the edges in the given loop.  If the sign() of the loop is negative
-  // (i.e. this loop represents a hole within a polygon), the edge directions
-  // are automatically reversed to ensure that the polygon interior is always
-  // to the left of every edge.
+  // Adds the edges in the given loop to the current layer.  Note that a loop
+  // consisting of one vertex adds a single degenerate edge.
+  //
+  // If the sign() of an S2Loop is negative (i.e. the loop represents a hole
+  // within a polygon), the edge directions are automatically reversed to
+  // ensure that the polygon interior is always to the left of every edge.
+  void AddLoop(S2PointLoopSpan loop);
   void AddLoop(const S2Loop& loop);
 
-  // Adds the loops in the given polygon.  Loops representing holes have their
-  // edge directions automatically reversed as described for AddLoop().  Note
-  // that this method does not distinguish between the empty and full polygons,
-  // i.e. adding a full polygon has the same effect as adding an empty one.
+  // Adds the loops in the given polygon to the current layer.  Loops
+  // representing holes have their edge directions automatically reversed as
+  // described for AddLoop().  Note that this method does not distinguish
+  // between the empty and full polygons, i.e. adding a full polygon has the
+  // same effect as adding an empty one.
   void AddPolygon(const S2Polygon& polygon);
 
   // Adds the edges of the given shape to the current layer.
   void AddShape(const S2Shape& shape);
 
+  // If "vertex" is the intersection point of two edges AB and CD (as computed
+  // by S2::GetIntersection()), this method ensures that AB and CD snap to a
+  // common vertex.  (Note that the common vertex may be different than
+  // "vertex" in order to ensure that no pair of vertices is closer than the
+  // given snap radius.)  Unlike Options::split_crossing_edges(), this method
+  // may be used to split crossing edge pairs selectively.
+  //
+  // This method can also be used to tessellate edges using S2::GetPointOnLine()
+  // or S2::Project() provided that a suitable intersection tolerance is
+  // specified (see intersection_tolerance() for details).
+  //
+  // This method implicitly overrides the idempotent() option, since adding an
+  // intersection point implies a desire to have nearby edges snapped to it
+  // even if these edges already satisfy the S2Builder output guarantees.
+  // (Otherwise for example edges would never be snapped to nearby
+  // intersection points when the snap radius is zero.)
+  //
+  // Note that unlike ForceVertex(), this method maintains all S2Builder
+  // guarantees regarding minimum vertex-vertex separation, minimum
+  // edge-vertex separation, and edge chain simplification.
+  //
+  // REQUIRES: options().intersection_tolerance() > S1Angle::Zero()
+  // REQUIRES: "vertex" was computed by S2::GetIntersection() (in order to
+  //           guarantee that both edges snap to a common vertex)
+  void AddIntersection(const S2Point& vertex);
+
   // For layers that are assembled into polygons, this method specifies a
   // predicate that is called when the output consists entirely of degenerate
   // edges and/or sibling pairs.  The predicate is given an S2Builder::Graph
@@ -510,15 +644,30 @@ class S2Builder {
 
   // Forces a vertex to be located at the given position.  This can be used to
   // prevent certain input vertices from moving.  However if you are trying to
-  // preserve part of the input boundary, be aware that this option does not
-  // prevent edges from being split by new vertices.
+  // preserve input edges, be aware that this option does not prevent edges from
+  // being split by new vertices.
+  //
+  // Forced vertices are subject to the following limitations:
   //
-  // Forced vertices are never snapped; if this is desired then you need to
-  // call options().snap_function().SnapPoint() explicitly.  Forced vertices
-  // are also never simplified away (if simplify_edge_chains() is used).
+  //  - Forced vertices are never snapped.  This is true even when the given
+  //    position is not allowed by the given snap function (e.g. you can force
+  //    a vertex at a non-S2CellId center when using S2CellIdSnapFunction).
+  //    If you want to ensure that forced vertices obey the snap function
+  //    restrictions, you must call snap_function().SnapPoint() explicitly.
   //
-  // Caveat: Since this method can place vertices arbitrarily close together,
-  // S2Builder makes no minimum separation guaranteees with forced vertices.
+  //  - There is no guaranteed minimum separation between pairs of forced
+  //    vertices, i.e. snap_function().min_vertex_separation() does not apply.
+  //    (This must be true because forced vertices can be placed arbitrarily.)
+  //
+  //  - There is no guaranteed minimum separation between forced vertices and
+  //    non-incident edges, i.e. snap_function().min_edge_vertex_separation()
+  //    does not apply.
+  //
+  //  - Forced vertices are never simplified away (i.e. when simplification is
+  //    requested using options().simplify_edge_chains()).
+  //
+  // All other guarantees continue to hold, e.g. the input topology will always
+  // be preserved.
   void ForceVertex(const S2Point& vertex);
 
   // Every edge can have a set of non-negative integer labels attached to it.
@@ -568,6 +717,18 @@ class S2Builder {
   // specified are preserved.
   void Reset();
 
+  ///////////////////////////////////////////////////////////////////////////
+  // The following methods may be called at any time, including from
+  // S2Builder::Layer implementations.
+
+  // Returns the number of input edges.
+  int num_input_edges() const;
+
+  // Returns the endpoints of the given input edge.
+  //
+  // REQUIRES: 0 <= input_edge_id < num_input_edges()
+  S2Shape::Edge input_edge(int input_edge_id) const;
+
  private:
   //////////////////////  Input Types  /////////////////////////
   // All types associated with the S2Builder inputs are prefixed with "Input".
@@ -606,11 +767,61 @@ class S2Builder {
   // Identifies an output edge in a particular layer.
   using LayerEdgeId = std::pair<int, EdgeId>;
 
+  //////////////////////  Internal Types  /////////////////////////
   class EdgeChainSimplifier;
 
+  // MemoryTracker is a helper class to measure S2Builder memory usage.  It is
+  // based on a detailed analysis of the data structures used.  This approach
+  // is fragile because the memory tracking code needs to be updated whenever
+  // S2Builder is modified, however S2Builder has been quite stable and this
+  // approach allows the memory usage to be measured quite accurately.
+  //
+  // CAVEATS:
+  //
+  //  - Does not track memory used by edge labels.  (It is tricky to do this
+  //    accurately because they are stored in an IdSetLexicon, and labels
+  //    are typically a tiny fraction of the total space used.)
+  //
+  //  - Does not track memory used to represent layers internally.  (The
+  //    number of layers is typically small compared to the numbers of
+  //    vertices and edges, and the amount of memory used by the Layer and
+  //    IsFullPolygonPredicate objects is difficult to measure.)
+  //
+  //  - Does not track memory used by the output layer Build() methods.  (This
+  //    includes both temporary space, e.g. due to calling S2Builder::Graph
+  //    methods, and also any geometric objects created by these layers.)
+  class MemoryTracker : public S2MemoryTracker::Client {
+   public:
+    bool TallyEdgeSites(const gtl::compact_array<SiteId>& sites);
+    bool ReserveEdgeSite(gtl::compact_array<SiteId>* sites);
+    bool ClearEdgeSites(std::vector<gtl::compact_array<SiteId>>* edge_sites);
+
+    bool TallyIndexedSite();
+    bool FixSiteIndexTally(const S2PointIndex<SiteId>& index);
+    bool DoneSiteIndex(const S2PointIndex<SiteId>& index);
+
+    bool TallySimplifyEdgeChains(
+        const std::vector<gtl::compact_array<InputVertexId>>& site_vertices,
+        const std::vector<std::vector<Edge>>& layer_edges);
+
+    bool TallyFilterVertices(int num_sites,
+                             const std::vector<std::vector<Edge>>& layer_edges);
+    bool DoneFilterVertices();
+
+   private:
+    // The amount of non-inline memory used to store edge sites.
+    int64 edge_sites_bytes_ = 0;
+
+    // The amount of memory used by the S2PointIndex for sites.
+    int64 site_index_bytes_ = 0;
+
+    // The amount of temporary memory used by Graph::FilterVertices().
+    int64 filter_vertices_bytes_ = 0;
+  };
+
   InputVertexId AddVertex(const S2Point& v);
   void ChooseSites();
-  void CopyInputEdges();
+  void ChooseAllVerticesAsSites();
   std::vector<InputVertexKey> SortInputVertices();
   void AddEdgeCrossings(const MutableS2ShapeIndex& input_edge_index);
   void AddForcedSites(S2PointIndex<SiteId>* site_index);
@@ -620,21 +831,19 @@ class S2Builder {
   void CollectSiteEdges(const S2PointIndex<SiteId>& site_index);
   void SortSitesByDistance(const S2Point& x,
                            gtl::compact_array<SiteId>* sites) const;
+  void InsertSiteByDistance(SiteId new_site_id, const S2Point& x,
+                            gtl::compact_array<SiteId>* sites);
   void AddExtraSites(const MutableS2ShapeIndex& input_edge_index);
-  void MaybeAddExtraSites(InputEdgeId edge_id,
-                          InputEdgeId max_edge_id,
-                          const std::vector<SiteId>& chain,
+  void MaybeAddExtraSites(InputEdgeId edge_id, const std::vector<SiteId>& chain,
                           const MutableS2ShapeIndex& input_edge_index,
-                          std::vector<InputEdgeId>* snap_queue);
+                          absl::flat_hash_set<InputEdgeId>* edges_to_resnap);
   void AddExtraSite(const S2Point& new_site,
-                    InputEdgeId max_edge_id,
                     const MutableS2ShapeIndex& input_edge_index,
-                    std::vector<InputEdgeId>* snap_queue);
+                    absl::flat_hash_set<InputEdgeId>* edges_to_resnap);
   S2Point GetSeparationSite(const S2Point& site_to_avoid,
                             const S2Point& v0, const S2Point& v1,
                             InputEdgeId input_edge_id) const;
-  S2Point GetCoverageEndpoint(const S2Point& p, const S2Point& x,
-                              const S2Point& y, const S2Point& n) const;
+  S2Point GetCoverageEndpoint(const S2Point& p, const S2Point& n) const;
   void SnapEdge(InputEdgeId e, std::vector<SiteId>* chain) const;
 
   void BuildLayers();
@@ -646,7 +855,7 @@ class S2Builder {
       InputEdgeId begin, InputEdgeId end, const GraphOptions& options,
       std::vector<Edge>* edges, std::vector<InputEdgeIdSetId>* input_edge_ids,
       IdSetLexicon* input_edge_id_set_lexicon,
-      std::vector<gtl::compact_array<InputVertexId>>* site_vertices) const;
+      std::vector<gtl::compact_array<InputVertexId>>* site_vertices);
   void MaybeAddInputVertex(
       InputVertexId v, SiteId id,
       std::vector<gtl::compact_array<InputVertexId>>* site_vertices) const;
@@ -657,7 +866,7 @@ class S2Builder {
       const std::vector<gtl::compact_array<InputVertexId>>& site_vertices,
       std::vector<std::vector<Edge>>* layer_edges,
       std::vector<std::vector<InputEdgeIdSetId>>* layer_input_edge_ids,
-      IdSetLexicon* input_edge_id_set_lexicon) const;
+      IdSetLexicon* input_edge_id_set_lexicon);
   void MergeLayerEdges(
       const std::vector<std::vector<Edge>>& layer_edges,
       const std::vector<std::vector<InputEdgeIdSetId>>& layer_input_edge_ids,
@@ -681,6 +890,15 @@ class S2Builder {
   // edges are being split at crossings.
   S1ChordAngle edge_snap_radius_ca_;
 
+  // True if we need to check that snapping has not changed the input topology
+  // around any vertex (i.e. Voronoi site).  Normally this is only necessary for
+  // forced vertices, but if the snap radius is very small (e.g., zero) and
+  // split_crossing_edges() is true then we need to do this for all vertices.
+  // In all other situations, any snapped edge that crosses a vertex will also
+  // be closer than min_edge_vertex_separation() to that vertex, which will
+  // cause us to add a separation site anyway.
+  bool check_all_site_crossings_;
+
   S1Angle max_edge_deviation_;
   S1ChordAngle edge_site_query_radius_ca_;
   S1ChordAngle min_edge_length_to_split_ca_;
@@ -761,6 +979,9 @@ class S2Builder {
   // the "sites to avoid" (needed for simplification).
   std::vector<gtl::compact_array<SiteId>> edge_sites_;
 
+  // An object to track the memory usage of this class.
+  MemoryTracker tracker_;
+
   S2Builder(const S2Builder&) = delete;
   S2Builder& operator=(const S2Builder&) = delete;
 };
@@ -775,9 +996,9 @@ class S2Builder {
 class S2Builder::GraphOptions {
  public:
   using EdgeType = S2Builder::EdgeType;
-  enum class DegenerateEdges;
-  enum class DuplicateEdges;
-  enum class SiblingPairs;
+  enum class DegenerateEdges : uint8;
+  enum class DuplicateEdges : uint8;
+  enum class SiblingPairs : uint8;
 
   // All S2Builder::Layer subtypes should specify GraphOptions explicitly
   // using this constructor, rather than relying on default values.
@@ -800,10 +1021,13 @@ class S2Builder::GraphOptions {
 
   // Specifies whether the S2Builder input edges should be treated as
   // undirected.  If true, then all input edges are duplicated into pairs
-  // consisting of an edge and a sibling (reverse) edge.  The layer
-  // implementation is responsible for ensuring that exactly one edge from
-  // each pair is used in the output, i.e. *only half* of the graph edges will
-  // be used.  (Note that some values of the sibling_pairs() option
+  // consisting of an edge and a sibling (reverse) edge.  Note that the
+  // automatically created sibling edge has an empty set of labels and does
+  // not have an associated InputEdgeId.
+  //
+  // The layer implementation is responsible for ensuring that exactly one
+  // edge from each pair is used in the output, i.e. *only half* of the graph
+  // edges will be used.  (Note that some values of the sibling_pairs() option
   // automatically take care of this issue by removing half of the edges and
   // changing edge_type() to DIRECTED.)
   //
@@ -820,10 +1044,10 @@ class S2Builder::GraphOptions {
   //          do not support degeneracies, such as S2PolygonLayer.
   //
   // DISCARD_EXCESS: Discards all degenerate edges that are connected to
-  //                 non-degenerate edges.  (Any remaining duplicate edges can
-  //                 be merged using DuplicateEdges::MERGE.)  This is useful
-  //                 for simplifying polygons while ensuring that loops that
-  //                 collapse to a single point do not disappear.
+  //                 non-degenerate edges and merges any remaining duplicate
+  //                 degenerate edges.  This is useful for simplifying
+  //                 polygons while ensuring that loops that collapse to a
+  //                 single point do not disappear.
   //
   // KEEP: Keeps all degenerate edges.  Be aware that this may create many
   //       redundant edges when simplifying geometry (e.g., a polyline of the
@@ -831,7 +1055,7 @@ class S2Builder::GraphOptions {
   //       for algorithms that require an output edge for every input edge.
   //
   // DEFAULT: DegenerateEdges::KEEP
-  enum class DegenerateEdges { DISCARD, DISCARD_EXCESS, KEEP };
+  enum class DegenerateEdges : uint8 { DISCARD, DISCARD_EXCESS, KEEP };
   DegenerateEdges degenerate_edges() const;
   void set_degenerate_edges(DegenerateEdges degenerate_edges);
 
@@ -842,7 +1066,7 @@ class S2Builder::GraphOptions {
   // input edge ids.
   //
   // DEFAULT: DuplicateEdges::KEEP
-  enum class DuplicateEdges { MERGE, KEEP };
+  enum class DuplicateEdges : uint8 { MERGE, KEEP };
   DuplicateEdges duplicate_edges() const;
   void set_duplicate_edges(DuplicateEdges duplicate_edges);
 
@@ -870,8 +1094,8 @@ class S2Builder::GraphOptions {
   //
   // CREATE: Ensures that all edges have a sibling edge by creating them if
   //         necessary.  This is useful with polygon meshes where the input
-  //         polygons do not cover the entire sphere.  Such edges always
-  //         have an empty set of labels.
+  //         polygons do not cover the entire sphere.  Such edges always have
+  //         an empty set of labels and do not have an associated InputEdgeId.
   //
   // If edge_type() is EdgeType::UNDIRECTED, a sibling edge pair is considered
   // to consist of four edges (two duplicate edges and their siblings), since
@@ -895,23 +1119,27 @@ class S2Builder::GraphOptions {
   // when duplicate edges are present, all of the corresponding edge labels
   // are merged together and assigned to the remaining edges.  (This avoids
   // the problem of having to decide which edges are discarded.)  Note that
-  // this merging takes place even when all copies of an edge are kept, and
-  // that even labels attached to duplicate degenerate edges are merged.  For
-  // example, consider the graph {AB1, AB2, BA3, CD4, CD5} (where XYn denotes
-  // an edge from X to Y with label "n").  With SiblingPairs::DISCARD, we need
-  // to discard one of the copies of AB.  But which one?  Rather than choosing
-  // arbitrarily, instead we merge the labels of all duplicate edges (even
-  // ones where no sibling pairs were discarded), yielding {AB12, CD45, CD45}
-  // (assuming that duplicate edges are being kept).
+  // this merging takes place even when all copies of an edge are kept.  For
+  // example, consider the graph {AB1, AB2, AB3, BA4, CD5, CD6} (where XYn
+  // denotes an edge from X to Y with label "n").  With SiblingPairs::DISCARD,
+  // we need to discard one of the copies of AB.  But which one?  Rather than
+  // choosing arbitrarily, instead we merge the labels of all duplicate edges
+  // (even ones where no sibling pairs were discarded), yielding {AB123,
+  // AB123, CD45, CD45} (assuming that duplicate edges are being kept).
+  // Notice that the labels of duplicate edges are merged even if no siblings
+  // were discarded (such as CD5, CD6 in this example), and that this would
+  // happen even with duplicate degenerate edges (e.g. the edges EE7, EE8).
   //
   // DEFAULT: SiblingPairs::KEEP
-  enum class SiblingPairs { DISCARD, DISCARD_EXCESS, KEEP, REQUIRE, CREATE };
+  enum class SiblingPairs : uint8 {
+    DISCARD, DISCARD_EXCESS, KEEP, REQUIRE, CREATE
+  };
   SiblingPairs sibling_pairs() const;
   void set_sibling_pairs(SiblingPairs sibling_pairs);
 
-  // This is a specialized option that is only needed by clients want to work
-  // with the graphs for multiple layers at the same time (e.g., in order to
-  // check whether the same edge is present in two different graphs).  [Note
+  // This is a specialized option that is only needed by clients that want to
+  // work with the graphs for multiple layers at the same time (e.g., in order
+  // to check whether the same edge is present in two different graphs).  [Note
   // that if you need to do this, usually it is easier just to build a single
   // graph with suitable edge labels.]
   //
@@ -974,6 +1202,17 @@ inline void S2Builder::Options::set_split_crossing_edges(
   split_crossing_edges_ = split_crossing_edges;
 }
 
+inline S1Angle S2Builder::Options::intersection_tolerance() const {
+  if (!split_crossing_edges()) return intersection_tolerance_;
+  return std::max(intersection_tolerance_, S2::kIntersectionError);
+}
+
+inline void S2Builder::Options::set_intersection_tolerance(
+    S1Angle intersection_tolerance) {
+  S2_DCHECK_GE(intersection_tolerance, S1Angle::Zero());
+  intersection_tolerance_ = intersection_tolerance;
+}
+
 inline bool S2Builder::Options::simplify_edge_chains() const {
   return simplify_edge_chains_;
 }
@@ -998,6 +1237,15 @@ inline void S2Builder::Options::set_idempotent(bool idempotent) {
   idempotent_ = idempotent;
 }
 
+inline S2MemoryTracker* S2Builder::Options::memory_tracker() const {
+  return memory_tracker_;
+}
+
+inline void S2Builder::Options::set_memory_tracker(
+    S2MemoryTracker* tracker) {
+  memory_tracker_ = tracker;
+}
+
 inline S2Builder::GraphOptions::EdgeType
 S2Builder::GraphOptions::edge_type() const {
   return edge_type_;
@@ -1054,4 +1302,14 @@ inline void S2Builder::AddPoint(const S2Point& v) {
   AddEdge(v, v);
 }
 
+inline int S2Builder::num_input_edges() const {
+  return input_edges_.size();
+}
+
+inline S2Shape::Edge S2Builder::input_edge(int input_edge_id) const {
+  const InputEdge& edge = input_edges_[input_edge_id];
+  return S2Shape::Edge(input_vertices_[edge.first],
+                       input_vertices_[edge.second]);
+}
+
 #endif  // S2_S2BUILDER_H_
diff --git a/src/s2/s2builder_graph.cc b/src/s2/s2builder_graph.cc
index 3d513535..fb6f6014 100644
--- a/src/s2/s2builder_graph.cc
+++ b/src/s2/s2builder_graph.cc
@@ -18,15 +18,19 @@
 #include "s2/s2builder_graph.h"
 
 #include <algorithm>
+#include <array>
 #include <limits>
-#include <memory>
 #include <numeric>
+#include <utility>
 #include <vector>
-#include "s2/base/logging.h"
+
+#include "s2/base/integral_types.h"
 #include "absl/container/btree_map.h"
 #include "s2/id_set_lexicon.h"
 #include "s2/s2builder.h"
 #include "s2/s2error.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2point.h"
 #include "s2/s2predicates.h"
 
 using std::make_pair;
@@ -72,6 +76,12 @@ vector<Graph::EdgeId> Graph::GetInEdgeIds() const {
 vector<Graph::EdgeId> Graph::GetSiblingMap() const {
   vector<EdgeId> in_edge_ids = GetInEdgeIds();
   MakeSiblingMap(&in_edge_ids);
+  // Validates the sibling map, and indirectly the edge ordering comparator,
+  // which must break ties on equal edges correctly for the sibling map to be
+  // created correctly.
+  for (EdgeId e = 0; e < num_edges(); ++e) {
+    S2_DCHECK(e == in_edge_ids[in_edge_ids[e]]);
+  }
   return in_edge_ids;
 }
 
@@ -172,15 +182,13 @@ vector<Graph::EdgeId> Graph::GetInputEdgeOrder(
 
 // A struct for sorting the incoming and outgoing edges around a vertex "v0".
 struct VertexEdge {
-  VertexEdge(bool _incoming, Graph::EdgeId _index,
-             Graph::VertexId _endpoint, int32 _rank)
-      : incoming(_incoming), index(_index),
-        endpoint(_endpoint), rank(_rank) {
-  }
+  VertexEdge(bool _incoming, Graph::EdgeId _index, Graph::VertexId _endpoint,
+             int32 _rank)
+      : incoming(_incoming), index(_index), endpoint(_endpoint), rank(_rank) {}
   bool incoming;             // Is this an incoming edge to "v0"?
   Graph::EdgeId index;       // Index of this edge in "edges_" or "in_edge_ids"
   Graph::VertexId endpoint;  // The other (not "v0") endpoint of this edge
-  int32 rank;                // Secondary key for edges with the same endpoint
+  int32 rank;              // Secondary key for edges with the same endpoint
 };
 
 // Given a set of duplicate outgoing edges (v0, v1) and a set of duplicate
@@ -327,9 +335,9 @@ void Graph::CanonicalizeLoopOrder(const vector<InputEdgeId>& min_input_ids,
   // This has the advantage that if an undirected loop is assembled with the
   // wrong orientation and later inverted (e.g. by S2Polygon::InitOriented),
   // we still end up preserving the original cyclic vertex order.
-  int pos = 0;
+  size_t pos = 0;
   bool saw_gap = false;
-  for (int i = 1; i < loop->size(); ++i) {
+  for (size_t i = 1; i < loop->size(); ++i) {
     int cmp = min_input_ids[(*loop)[i]] - min_input_ids[(*loop)[pos]];
     if (cmp < 0) {
       saw_gap = true;
@@ -346,9 +354,10 @@ void Graph::CanonicalizeVectorOrder(const vector<InputEdgeId>& min_input_ids,
                                     vector<vector<EdgeId>>* chains) {
   std::sort(chains->begin(), chains->end(),
     [&min_input_ids](const vector<EdgeId>& a, const vector<EdgeId>& b) {
-      return min_input_ids[a[0]] < min_input_ids[b[0]];
-    });
-}
+      // Comparison function ensures sort is stable.
+      return make_pair(min_input_ids[a[0]], a[0]) <
+             make_pair(min_input_ids[b[0]], b[0]);
+    });}
 
 bool Graph::GetDirectedLoops(LoopType loop_type, vector<EdgeLoop>* loops,
                              S2Error* error) const {
@@ -414,10 +423,9 @@ bool Graph::GetDirectedComponents(
          options_.sibling_pairs() == SiblingPairs::CREATE);
   S2_DCHECK(options_.edge_type() == EdgeType::DIRECTED);  // Implied by above.
 
-  vector<EdgeId> sibling_map = GetInEdgeIds();
+  vector<EdgeId> sibling_map = GetSiblingMap();
   vector<EdgeId> left_turn_map;
   if (!GetLeftTurnMap(sibling_map, &left_turn_map, error)) return false;
-  MakeSiblingMap(&sibling_map);
   vector<InputEdgeId> min_input_ids = GetMinInputEdgeIds();
   vector<EdgeId> frontier;  // Unexplored sibling edges.
 
@@ -427,24 +435,24 @@ bool Graph::GetDirectedComponents(
   if (degenerate_boundaries == DegenerateBoundaries::DISCARD) {
     path_index.assign(num_edges(), -1);
   }
-  for (EdgeId min_start = 0; min_start < num_edges(); ++min_start) {
-    if (left_turn_map[min_start] < 0) continue;  // Already used.
+  for (EdgeId start = 0; start < num_edges(); ++start) {
+    if (left_turn_map[start] < 0) continue;  // Already used.
 
     // Build a connected component by keeping a stack of unexplored siblings
     // of the edges used so far.
     DirectedComponent component;
-    frontier.push_back(min_start);
+    frontier.push_back(start);
     while (!frontier.empty()) {
-      EdgeId start = frontier.back();
+      EdgeId e = frontier.back();
       frontier.pop_back();
-      if (left_turn_map[start] < 0) continue;  // Already used.
+      if (left_turn_map[e] < 0) continue;  // Already used.
 
-      // Build a path by making left turns at each vertex until we return to
-      // "start".  Whenever we encounter an edge that is a sibling of an edge
+      // Build a path by making left turns at each vertex until we complete a
+      // loop.  Whenever we encounter an edge that is a sibling of an edge
       // that is already on the path, we "peel off" a loop consisting of any
       // edges that were between these two edges.
       vector<EdgeId> path;
-      for (EdgeId e = start, next; left_turn_map[e] >= 0; e = next) {
+      for (EdgeId next; left_turn_map[e] >= 0; e = next) {
         path.push_back(e);
         next = left_turn_map[e];
         left_turn_map[e] = -1;
@@ -460,7 +468,7 @@ bool Graph::GetDirectedComponents(
 
           // Common special case: the edge and its sibling are adjacent, in
           // which case we can simply remove them from the path and continue.
-          if (sibling_index == path.size() - 2) {
+          if (static_cast<size_t>(sibling_index) == path.size() - 2) {
             path.resize(sibling_index);
             // We don't need to update "path_index" for these two edges
             // because both edges of the sibling pair have now been used.
@@ -752,7 +760,7 @@ vector<Graph::EdgePolyline> Graph::PolylineBuilder::BuildWalks() {
   // start from the edge with minimum input edge id.  If the minimal input
   // edge was split into several edges, then we start from the first edge of
   // the chain.
-  for (int i = 0; i < edges.size() && edges_left_ > 0; ++i) {
+  for (size_t i = 0; i < edges.size() && edges_left_ > 0; ++i) {
     EdgeId e = edges[i];
     if (used_[e]) continue;
 
@@ -763,7 +771,8 @@ vector<Graph::EdgePolyline> Graph::PolylineBuilder::BuildWalks() {
     VertexId v = g_.edge(e).first;
     InputEdgeId id = min_input_ids_[e];
     int excess = 0;
-    for (int j = i; j < edges.size() && min_input_ids_[edges[j]] == id; ++j) {
+    for (size_t j = i; j < edges.size() && min_input_ids_[edges[j]] == id;
+         ++j) {
       EdgeId e2 = edges[j];
       if (used_[e2]) continue;
       if (g_.edge(e2).first == v) ++excess;
@@ -820,7 +829,7 @@ void Graph::PolylineBuilder::MaximizeWalk(EdgePolyline* polyline) {
   // and insert it into the polyline.  (The walk is guaranteed to be a loop
   // because this method is only called when all vertices have equal numbers
   // of unused incoming and outgoing edges.)
-  for (int i = 0; i <= polyline->size(); ++i) {
+  for (size_t i = 0; i <= polyline->size(); ++i) {
     VertexId v = (i == 0 ? g_.edge((*polyline)[i]).first
                   : g_.edge((*polyline)[i - 1]).second);
     for (EdgeId e : out_.edge_ids(v)) {
@@ -863,18 +872,43 @@ class Graph::EdgeProcessor {
   vector<InputEdgeId> tmp_ids_;
 };
 
-void Graph::ProcessEdges(
-    GraphOptions* options, std::vector<Edge>* edges,
-    std::vector<InputEdgeIdSetId>* input_ids, IdSetLexicon* id_set_lexicon,
-    S2Error* error) {
-  EdgeProcessor processor(*options, edges, input_ids, id_set_lexicon);
-  processor.Run(error);
+void Graph::ProcessEdges(GraphOptions* options, vector<Edge>* edges,
+                         vector<InputEdgeIdSetId>* input_ids,
+                         IdSetLexicon* id_set_lexicon, S2Error* error,
+                         S2MemoryTracker::Client* tracker) {
+  // Graph::EdgeProcessor uses 8 bytes per input edge (out_edges_ and
+  // in_edges_) plus 12 bytes per output edge (new_edges_, new_input_ids_).
+  // For simplicity we assume that num_input_edges == num_output_edges, since
+  // Graph:EdgeProcessor does not increase the number of edges except possibly
+  // in the case of SiblingPairs::CREATE (which we ignore).
+  //
+  //  vector<EdgeId> out_edges_;                 // Graph::EdgeProcessor
+  //  vector<EdgeId> in_edges_;                  // Graph::EdgeProcessor
+  //  vector<Edge> new_edges_;                   // Graph::EdgeProcessor
+  //  vector<InputEdgeIdSetId> new_input_ids_;   // Graph::EdgeProcessor
+  //
+  // EdgeProcessor discards the "edges" and "input_ids" vectors and replaces
+  // them with new vectors that could be larger or smaller.  To handle this
+  // correctly, we untally these vectors now and retally them at the end.
+  const int64 kFinalPerEdge = sizeof(Edge) + sizeof(InputEdgeIdSetId);
+  const int64 kTempPerEdge = kFinalPerEdge + 2 * sizeof(EdgeId);
+  if (tracker) {
+    tracker->TallyTemp(edges->size() * kTempPerEdge);
+    tracker->Tally(-edges->capacity() * kFinalPerEdge);
+  }
+  if (!tracker || tracker->ok()) {
+    EdgeProcessor processor(*options, edges, input_ids, id_set_lexicon);
+    processor.Run(error);
+  }
   // Certain values of sibling_pairs() discard half of the edges and change
   // the edge_type() to DIRECTED (see the description of GraphOptions).
   if (options->sibling_pairs() == SiblingPairs::REQUIRE ||
       options->sibling_pairs() == SiblingPairs::CREATE) {
     options->set_edge_type(EdgeType::DIRECTED);
   }
+  if (tracker && !tracker->Tally(edges->capacity() * kFinalPerEdge)) {
+    *error = tracker->error();
+  }
 }
 
 Graph::EdgeProcessor::EdgeProcessor(const GraphOptions& options,
@@ -959,6 +993,7 @@ void Graph::EdgeProcessor::Run(S2Error* error) {
     int n_out = out - out_begin;
     int n_in = in - in_begin;
     if (edge.first == edge.second) {
+      // This is a degenerate edge.
       S2_DCHECK_EQ(n_out, n_in);
       if (options_.degenerate_edges() == DegenerateEdges::DISCARD) {
         continue;
@@ -972,15 +1007,18 @@ void Graph::EdgeProcessor::Run(S2Error* error) {
            (in < num_edges && edges_[in_edges_[in]].second == edge.first))) {
         continue;  // There were non-degenerate incident edges, so discard.
       }
+      // DegenerateEdges::DISCARD_EXCESS also merges degenerate edges.
+      bool merge =
+          (options_.duplicate_edges() == DuplicateEdges::MERGE ||
+           options_.degenerate_edges() == DegenerateEdges::DISCARD_EXCESS);
       if (options_.edge_type() == EdgeType::UNDIRECTED &&
           (options_.sibling_pairs() == SiblingPairs::REQUIRE ||
            options_.sibling_pairs() == SiblingPairs::CREATE)) {
         // When we have undirected edges and are guaranteed to have siblings,
         // we cut the number of edges in half (see s2builder.h).
         S2_DCHECK_EQ(0, n_out & 1);  // Number of edges is always even.
-        AddEdges(options_.duplicate_edges() == DuplicateEdges::MERGE ?
-                 1 : (n_out / 2), edge, MergeInputIds(out_begin, out));
-      } else if (options_.duplicate_edges() == DuplicateEdges::MERGE) {
+        AddEdges(merge ? 1 : (n_out / 2), edge, MergeInputIds(out_begin, out));
+      } else if (merge) {
         AddEdges(options_.edge_type() == EdgeType::UNDIRECTED ? 2 : 1,
                  edge, MergeInputIds(out_begin, out));
       } else if (options_.sibling_pairs() == SiblingPairs::DISCARD ||
@@ -1047,13 +1085,14 @@ void Graph::EdgeProcessor::Run(S2Error* error) {
     }
   }
   edges_.swap(new_edges_);
-  edges_.shrink_to_fit();
   input_ids_.swap(new_input_ids_);
+  edges_.shrink_to_fit();
   input_ids_.shrink_to_fit();
 }
 
+// LINT.IfChange
 vector<S2Point> Graph::FilterVertices(const vector<S2Point>& vertices,
-                                      std::vector<Edge>* edges,
+                                      vector<Edge>* edges,
                                       vector<VertexId>* tmp) {
   // Gather the vertices that are actually used.
   vector<VertexId> used;
@@ -1071,7 +1110,7 @@ vector<S2Point> Graph::FilterVertices(const vector<S2Point>& vertices,
   vector<VertexId>& vmap = *tmp;
   vmap.resize(vertices.size());
   vector<S2Point> new_vertices(used.size());
-  for (int i = 0; i < used.size(); ++i) {
+  for (size_t i = 0; i < used.size(); ++i) {
     new_vertices[i] = vertices[used[i]];
     vmap[used[i]] = i;
   }
@@ -1082,3 +1121,35 @@ vector<S2Point> Graph::FilterVertices(const vector<S2Point>& vertices,
   }
   return new_vertices;
 }
+// LINT.ThenChange(s2builder.cc:TallyFilterVertices)
+
+Graph Graph::MakeSubgraph(
+    GraphOptions new_options, vector<Edge>* new_edges,
+    vector<InputEdgeIdSetId>* new_input_edge_id_set_ids,
+    IdSetLexicon* new_input_edge_id_set_lexicon,
+    IsFullPolygonPredicate is_full_polygon_predicate,
+    S2Error* error, S2MemoryTracker::Client* tracker) const {
+  if (options().edge_type() == EdgeType::DIRECTED &&
+      new_options.edge_type() == EdgeType::UNDIRECTED) {
+    // Create a reversed edge for every edge.
+    int n = new_edges->size();
+    if (tracker == nullptr) {
+      new_edges->reserve(2 * n);
+      new_input_edge_id_set_ids->reserve(2 * n);
+    } else if (!tracker->AddSpaceExact(new_edges, n) ||
+               !tracker->AddSpaceExact(new_input_edge_id_set_ids, n)) {
+      *error = tracker->error();
+      return Graph();
+    }
+    for (int i = 0; i < n; ++i) {
+      new_edges->push_back(Graph::reverse((*new_edges)[i]));
+      new_input_edge_id_set_ids->push_back(IdSetLexicon::EmptySetId());
+    }
+  }
+  Graph::ProcessEdges(&new_options, new_edges, new_input_edge_id_set_ids,
+                      new_input_edge_id_set_lexicon, error, tracker);
+  if (tracker && !tracker->ok()) return Graph();  // Graph would be invalid.
+  return Graph(new_options, &vertices(), new_edges, new_input_edge_id_set_ids,
+               new_input_edge_id_set_lexicon, &label_set_ids(),
+               &label_set_lexicon(), std::move(is_full_polygon_predicate));
+}
diff --git a/src/s2/s2builder_graph.h b/src/s2/s2builder_graph.h
index b1ba3e43..21e2d381 100644
--- a/src/s2/s2builder_graph.h
+++ b/src/s2/s2builder_graph.h
@@ -18,15 +18,20 @@
 #ifndef S2_S2BUILDER_GRAPH_H_
 #define S2_S2BUILDER_GRAPH_H_
 
+#include <algorithm>
 #include <array>
 #include <cstddef>
 #include <iterator>
+#include <limits>
 #include <utility>
 #include <vector>
+
 #include "s2/base/integral_types.h"
 #include "s2/id_set_lexicon.h"
 #include "s2/s2builder.h"
 #include "s2/s2error.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2point.h"
 
 // An S2Builder::Graph represents a collection of snapped edges that is passed
 // to a Layer for assembly.  (Example layers include polygons, polylines, and
@@ -95,9 +100,11 @@ class S2Builder::Graph {
   // "label_set_ids":
   //   - a vector indexed by InputEdgeId that allows access to the set of
   //     labels that were attached to the given input edge, by looking up the
-  //     returned value (a LabelSetId) in the "label_set_lexicon".
+  //     returned value (a LabelSetId) in the "label_set_lexicon".  This
+  //     vector may be empty to indicate that no labels are present.
   // "label_set_lexicon":
-  //   - a class that maps a LabelSetId to a set of S2Builder::Labels.
+  //   - a class that maps a LabelSetId to a set of S2Builder::Labels.  (Must
+  //     be provided even if no labels are present.)
   // "is_full_polygon_predicate":
   //   - a predicate called to determine whether a graph consisting only of
   //     polygon degeneracies represents the empty polygon or the full polygon
@@ -143,8 +150,9 @@ class S2Builder::Graph {
   // from EdgeId to the sibling EdgeId.  This method is identical to
   // GetInEdgeIds() except that (1) it requires edges to have siblings, and
   // (2) undirected degenerate edges are grouped together in pairs such that
-  // one edge is the sibling of the other.  Handles duplicate edges correctly
-  // and is also consistent with GetLeftTurnMap().
+  // one edge is the sibling of the other.  (The sibling of a directed
+  // degenerate edge is itself.)  Handles duplicate edges correctly and is
+  // also consistent with GetLeftTurnMap().
   //
   // REQUIRES: An option is chosen that guarantees sibling pairs:
   //     (options.sibling_pairs() == { REQUIRE, CREATE } ||
@@ -153,13 +161,13 @@ class S2Builder::Graph {
 
   // Like GetSiblingMap(), but constructs the map starting from the vector of
   // incoming edge ids returned by GetInEdgeIds().  (This operation is a no-op
-  // except unless undirected degenerate edges are present, in which case such
-  // edges are grouped together in pairs to satisfy the requirement that every
-  // edge must have a sibling edge.)
+  // unless undirected degenerate edges are present, in which case such edges
+  // are grouped together in pairs to satisfy the requirement that every edge
+  // must have a sibling edge.)
   void MakeSiblingMap(std::vector<EdgeId>* in_edge_ids) const;
 
-  class VertexOutMap;  // Forward declaration
   class VertexInMap;   // Forward declaration
+  class VertexOutMap;  // Forward declaration
 
   // A helper class for VertexOutMap that represents the outgoing edges
   // from a given vertex.
@@ -268,12 +276,12 @@ class S2Builder::Graph {
   };
 
   // Defines a value larger than any valid InputEdgeId.
-  static const InputEdgeId kMaxInputEdgeId =
+  static constexpr InputEdgeId kMaxInputEdgeId =
       std::numeric_limits<InputEdgeId>::max();
 
   // The following value of InputEdgeId means that an edge does not
   // corresponds to any input edge.
-  static const InputEdgeId kNoInputEdgeId = kMaxInputEdgeId - 1;
+  static constexpr InputEdgeId kNoInputEdgeId = kMaxInputEdgeId - 1;
 
   // Returns the set of input edge ids that were snapped to the given
   // edge.  ("Input edge ids" are assigned to input edges sequentially in
@@ -345,9 +353,10 @@ class S2Builder::Graph {
     // labels from one or both siblings are returned.
     void Init(const Graph& g, EdgeType edge_type);
 
-    // Returns the set of labels associated with edge "e" (and also the labels
-    // associated with the sibling of "e" if edge_type() is UNDIRECTED).
-    // Labels are sorted and duplicate labels are automatically removed.
+    // Fills "labels" with the set of labels associated with edge "e" (and also
+    // the labels associated with the sibling of "e" if edge_type() is
+    // UNDIRECTED). Labels are sorted and duplicate labels are automatically
+    // removed.
     //
     // This method uses an output parameter rather than returning by value in
     // order to avoid allocating a new vector on every call to this method.
@@ -361,6 +370,7 @@ class S2Builder::Graph {
 
   // Returns the set of labels associated with a given input edge.  Example:
   //   for (Label label : g.labels(input_edge_id)) { ... }
+  // See also LabelFetcher, which returns the labels for a given graph edge.
   IdSetLexicon::IdSet labels(InputEdgeId e) const;
 
   // Low-level method that returns an integer representing the set of
@@ -369,7 +379,8 @@ class S2Builder::Graph {
   LabelSetId label_set_id(InputEdgeId e) const;
 
   // Low-level method that returns a vector where each element represents the
-  // set of labels associated with a particular output edge.
+  // set of labels associated with a particular input edge.  Note that this
+  // vector may be empty, which indicates that no labels are present.
   const std::vector<LabelSetId>& label_set_ids() const;
 
   // Returns a mapping from a LabelSetId to a set of labels.
@@ -385,11 +396,11 @@ class S2Builder::Graph {
   // (see s2builder.h for details).
   const IsFullPolygonPredicate& is_full_polygon_predicate() const;
 
-  // Returns a map "m" that maps each edge e=(v0,v1) to the following outgoing
-  // edge around "v1" in clockwise order.  (This corresponds to making a "left
-  // turn" at the vertex.)  By starting at a given edge and making only left
-  // turns, you can construct a loop whose interior does not contain any edges
-  // in the same connected component.
+  // Fills in "left_turn_map" so it maps each edge e=(v0,v1) to the following
+  // outgoing edge around "v1" in clockwise order.  (This corresponds to making
+  // a "left turn" at the vertex.)  By starting at a given edge and making only
+  // left turns, you can construct a loop whose interior does not contain any
+  // edges in the same connected component.
   //
   // If the incoming and outgoing edges around a vertex do not alternate
   // perfectly (e.g., there are two incoming edges in a row), then adjacent
@@ -435,7 +446,7 @@ class S2Builder::Graph {
       const std::vector<InputEdgeId>& min_input_ids,
       std::vector<std::vector<EdgeId>>* chains);
 
-  // A loop consisting of a sequence of edges.
+  // A loop consisting of a sequence of edge ids.
   using EdgeLoop = std::vector<EdgeId>;
 
   // Indicates whether loops should be simple cycles (no repeated vertices) or
@@ -585,19 +596,28 @@ class S2Builder::Graph {
   // should already have been transformed into a pair of directed edges.
   //
   // "input_ids" is a vector of the same length as "edges" that indicates
-  // which input edges were snapped to each edge.  This vector is also updated
-  // appropriately as edges are discarded, merged, etc.
+  // which input edges were snapped to each edge, by mapping each edge id to a
+  // set of input edge ids in "id_set_lexicon".  This vector and the lexicon are
+  // also updated appropriately as edges are discarded, merged, etc.
   //
-  // Note that "options" may be modified by this method: in particular, the
-  // edge_type() can be changed if sibling_pairs() is CREATE or REQUIRE (see
-  // the description of S2Builder::GraphOptions).
+  // Note that "options" may be modified by this method: in particular, if
+  // edge_type() is UNDIRECTED and sibling_pairs() is CREATE or REQUIRE, then
+  // half of the edges in each direction will be discarded and edge_type()
+  // will be changed to DIRECTED (see S2Builder::GraphOptions::SiblingPairs).
+  //
+  // If "tracker" is provided then the memory usage of this method is tracked
+  // and an error is returned if the specified memory limit would be exceeded.
+  // This option requires that "new_edges" and "new_input_edge_id_set_ids" are
+  // already being tracked, i.e. their current memory usage is reflected in
+  // "tracker".  Note that "id_set_lexicon" typically uses a negligible amount
+  // of memory and is not tracked.
   static void ProcessEdges(
       GraphOptions* options, std::vector<Edge>* edges,
       std::vector<InputEdgeIdSetId>* input_ids, IdSetLexicon* id_set_lexicon,
-      S2Error* error);
+      S2Error* error, S2MemoryTracker::Client* tracker = nullptr);
 
   // Given a set of vertices and edges, removes all vertices that do not have
-  // any edges and returned the new, minimal set of vertices.  Also updates
+  // any edges and returns the new, minimal set of vertices.  Also updates
   // each edge in "edges" to correspond to the new vertex numbering.  (Note
   // that this method does *not* merge duplicate vertices, it simply removes
   // vertices of degree zero.)
@@ -623,6 +643,42 @@ class S2Builder::Graph {
   static bool StableLessThan(const Edge& a, const Edge& b,
                              EdgeId ai, EdgeId bi);
 
+  // Constructs a new graph with the given GraphOptions and containing the
+  // given edges.  Each edge is associated with a (possibly empty) set of
+  // input edges as specified by new_input_edge_id_set_ids (which must be the
+  // same length as "new_edges") and the given IdSetLexicon (which allows
+  // looking up the set of input edge ids associated with a graph edge).
+  // Finally, the subgraph may also specify a new IsFullPolygonPredicate
+  // (which is used to distinguish an empty polygon possibly with degenerate
+  // shells from a full polygon possibly with degenerate holes).
+  //
+  // The output graph has the same set of vertices and edge labels as the
+  // input graph (noting that edge labels are associated with *input* edges,
+  // not graph edges).
+  //
+  // If new_options.edge_type() is UNDIRECTED then note the following:
+  //
+  //  - If this->options().edge_type() is DIRECTED then each input edge will
+  //    be transformed into a pair of directed edges before calling
+  //    ProcessEdges() above.
+  //
+  //  - If new_options.sibling_pairs() is CREATE or REQUIRE then ProcessEdges()
+  //    will discard half of the edges in each direction and change edge_type()
+  //    to DIRECTED (see S2Builder::GraphOptions::SiblingPairs).
+  //
+  // If "tracker" is provided then the memory usage of this method is tracked
+  // and an error is returned if the specified memory limit would be exceeded.
+  // This option requires that "new_edges" and "new_input_edge_id_set_ids" are
+  // already being tracked, i.e. their current memory usage is reflected in
+  // "tracker".  Note that "id_set_lexicon" typically uses a negligible amount
+  // of memory and is not tracked.
+  Graph MakeSubgraph(
+      GraphOptions new_options, std::vector<Edge>* new_edges,
+      std::vector<InputEdgeIdSetId>* new_input_edge_id_set_ids,
+      IdSetLexicon* new_input_edge_id_set_lexicon,
+      IsFullPolygonPredicate is_full_polygon_predicate,
+      S2Error* error, S2MemoryTracker::Client* tracker = nullptr) const;
+
  private:
   class EdgeProcessor;
   class PolylineBuilder;
@@ -758,12 +814,14 @@ inline const IdSetLexicon& S2Builder::Graph::input_edge_id_set_lexicon() const {
   return *input_edge_id_set_lexicon_;
 }
 
-inline IdSetLexicon::IdSet S2Builder::Graph::labels(LabelSetId id) const {
-  return label_set_lexicon().id_set(label_set_ids()[id]);
+inline IdSetLexicon::IdSet S2Builder::Graph::labels(InputEdgeId e) const {
+  return label_set_lexicon().id_set(label_set_id(e));
 }
 
-inline S2Builder::LabelSetId S2Builder::Graph::label_set_id(EdgeId e) const {
-  return label_set_ids()[e];
+inline S2Builder::LabelSetId S2Builder::Graph::label_set_id(InputEdgeId e)
+    const {
+  return label_set_ids().empty() ? IdSetLexicon::EmptySetId()
+                                 : label_set_ids()[e];
 }
 
 inline const std::vector<S2Builder::LabelSetId>&
diff --git a/src/s2/s2builder_layer.h b/src/s2/s2builder_layer.h
index 051f07ea..b4d6039f 100644
--- a/src/s2/s2builder_layer.h
+++ b/src/s2/s2builder_layer.h
@@ -18,7 +18,9 @@
 #ifndef S2_S2BUILDER_LAYER_H_
 #define S2_S2BUILDER_LAYER_H_
 
+#include "s2/s2builder.h"
 #include "s2/s2builder_graph.h"
+#include "s2/s2error.h"
 
 // This class is not needed by ordinary S2Builder clients.  It is only
 // necessary if you wish to implement a new S2Builder::Layer subtype.
@@ -31,7 +33,7 @@ class S2Builder::Layer {
   using Label = S2Builder::Label;
   using LabelSetId = S2Builder::LabelSetId;
 
-  virtual ~Layer() {}
+  virtual ~Layer() = default;
 
   // Defines options for building the edge graph that is passed to Build().
   virtual GraphOptions graph_options() const = 0;
diff --git a/src/s2/s2builderutil_closed_set_normalizer.cc b/src/s2/s2builderutil_closed_set_normalizer.cc
index 8be85f72..bd3c00e7 100644
--- a/src/s2/s2builderutil_closed_set_normalizer.cc
+++ b/src/s2/s2builderutil_closed_set_normalizer.cc
@@ -17,12 +17,19 @@
 
 #include "s2/s2builderutil_closed_set_normalizer.h"
 
+#include <limits>
 #include <memory>
+#include <utility>
+#include <vector>
 
-#include "absl/memory/memory.h"
+#include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
 #include "s2/s2builder_layer.h"
+#include "s2/s2builderutil_find_polygon_degeneracies.h"
+#include "s2/s2error.h"
 
-using absl::make_unique;
+using std::make_unique;
 using std::shared_ptr;
 using std::unique_ptr;
 using std::vector;
@@ -103,7 +110,8 @@ const vector<Graph>& ClosedSetNormalizer::Run(
   bool modified[3];
   bool any_modified = false;
   for (int dim = 2; dim >= 0; --dim) {
-    if (new_edges_[dim].size() != g[dim].num_edges()) any_modified = true;
+    if (new_edges_[dim].size() != static_cast<size_t>(g[dim].num_edges()))
+      any_modified = true;
     modified[dim] = any_modified;
   }
   if (!any_modified) {
@@ -146,8 +154,9 @@ inline Edge ClosedSetNormalizer::Advance(const Graph& g, EdgeId* e) const {
 // returning a sentinel value once all edges are exhausted.
 inline Edge ClosedSetNormalizer::AdvanceIncoming(
     const Graph& g, const vector<EdgeId>& in_edges, int* i) const {
-  return ((++*i == in_edges.size()) ? sentinel_ :
-          Graph::reverse(g.edge(in_edges[*i])));
+  return ((static_cast<size_t>(++*i) == in_edges.size())
+              ? sentinel_
+              : Graph::reverse(g.edge(in_edges[*i])));
   }
 
 void ClosedSetNormalizer::NormalizeEdges(const vector<Graph>& g,
diff --git a/src/s2/s2builderutil_closed_set_normalizer.h b/src/s2/s2builderutil_closed_set_normalizer.h
index c7d9fc68..8bcb109a 100644
--- a/src/s2/s2builderutil_closed_set_normalizer.h
+++ b/src/s2/s2builderutil_closed_set_normalizer.h
@@ -18,10 +18,14 @@
 #ifndef S2_S2BUILDERUTIL_CLOSED_SET_NORMALIZER_H_
 #define S2_S2BUILDERUTIL_CLOSED_SET_NORMALIZER_H_
 
+#include <memory>
 #include <vector>
+
 #include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
 #include "s2/s2builder_graph.h"
 #include "s2/s2builderutil_find_polygon_degeneracies.h"
+#include "s2/s2error.h"
 
 namespace s2builderutil {
 
@@ -184,10 +188,10 @@ using LayerVector = std::vector<std::unique_ptr<S2Builder::Layer>>;
 //   polyline_options.set_polyline_type(Graph::PolylineType::WALK);
 //   polyline_options.set_duplicate_edges(DuplicateEdges::MERGE);
 //   LayerVector layers(3);
-//   layers[0] = absl::make_unique<IndexedS2PointVectorLayer>(index);
-//   layers[1] = absl::make_unique<IndexedS2PolylineVectorLayer>(
+//   layers[0] = std::make_unique<IndexedS2PointVectorLayer>(index);
+//   layers[1] = std::make_unique<IndexedS2PolylineVectorLayer>(
 //       index, polyline_options);
-//   layers[2] = absl::make_unique<IndexedS2PolygonLayer>(index);
+//   layers[2] = std::make_unique<IndexedS2PolygonLayer>(index);
 //   S2BooleanOperation op(S2BooleanOperation::OpType::UNION,
 //                         NormalizeClosedSet(std::move(layers)));
 //   return op.Build(a, b, error);
diff --git a/src/s2/s2builderutil_find_polygon_degeneracies.cc b/src/s2/s2builderutil_find_polygon_degeneracies.cc
index 28bc67cb..27a19b11 100644
--- a/src/s2/s2builderutil_find_polygon_degeneracies.cc
+++ b/src/s2/s2builderutil_find_polygon_degeneracies.cc
@@ -17,22 +17,27 @@
 
 #include "s2/s2builderutil_find_polygon_degeneracies.h"
 
-#include <cstdlib>
+#include <algorithm>
+#include <memory>
 #include <utility>
 #include <vector>
 
-#include "absl/memory/memory.h"
 #include "s2/mutable_s2shape_index.h"
+#include "s2/s2builder.h"
 #include "s2/s2builder_graph.h"
 #include "s2/s2builderutil_graph_shape.h"
 #include "s2/s2contains_vertex_query.h"
 #include "s2/s2crossing_edge_query.h"
 #include "s2/s2edge_crosser.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2predicates.h"
+#include "s2/s2shape.h"
+#include "s2/s2shapeutil_shape_edge_id.h"
 
-using absl::make_unique;
 using std::make_pair;
+using std::make_unique;
 using std::pair;
 using std::vector;
 
@@ -158,8 +163,8 @@ vector<PolygonDegeneracy> DegeneracyFinder::Run(S2Error* error) {
       known_vertex = FindUnbalancedVertex();
       known_vertex_sign = ContainsVertexSign(known_vertex);
     }
-    const int kMaxUnindexedContainsCalls = 20;  // Tuned using benchmarks.
-    if (num_unknown_signs <= kMaxUnindexedContainsCalls) {
+    const int kMaxUnindexedSignComputations = 25;  // Tuned using benchmarks.
+    if (num_unknown_signs <= kMaxUnindexedSignComputations) {
       ComputeUnknownSignsBruteForce(known_vertex, known_vertex_sign,
                                     &components);
     } else {
@@ -254,7 +259,7 @@ bool DegeneracyFinder::CrossingParity(VertexId v0, VertexId v1,
   int crossings = 0;
   S2Point p0 = g_.vertex(v0);
   S2Point p1 = g_.vertex(v1);
-  S2Point p0_ref = S2::Ortho(p0);
+  S2Point p0_ref = S2::RefDir(p0);
   for (const Edge& edge : out_.edges(v0)) {
     if (edge.second == v1) {
       if (include_same) ++crossings;
@@ -277,7 +282,7 @@ VertexId DegeneracyFinder::FindUnbalancedVertex() const {
   for (VertexId v = 0; v < g_.num_vertices(); ++v) {
     if (is_vertex_unbalanced_[v]) return v;
   }
-  S2_LOG(DFATAL) << "Could not find previously marked unbalanced vertex";
+  S2_LOG(ERROR) << "Could not find previously marked unbalanced vertex";
   return -1;
 }
 
diff --git a/src/s2/s2builderutil_find_polygon_degeneracies.h b/src/s2/s2builderutil_find_polygon_degeneracies.h
index 49157ac6..5e502ed9 100644
--- a/src/s2/s2builderutil_find_polygon_degeneracies.h
+++ b/src/s2/s2builderutil_find_polygon_degeneracies.h
@@ -21,6 +21,7 @@
 #include <vector>
 
 #include "s2/base/integral_types.h"
+#include "s2/s2builder.h"
 #include "s2/s2builder_graph.h"
 #include "s2/s2error.h"
 
diff --git a/src/s2/s2builderutil_get_snapped_winding_delta.cc b/src/s2/s2builderutil_get_snapped_winding_delta.cc
new file mode 100644
index 00000000..dd84f35f
--- /dev/null
+++ b/src/s2/s2builderutil_get_snapped_winding_delta.cc
@@ -0,0 +1,438 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+//
+// The following algorithm would not be necessary with planar geometry, since
+// then winding numbers could be computed by starting at a point at infinity
+// (whose winding number is zero) and counting signed edge crossings.  However
+// points at infinity do not exist on the sphere.
+//
+// Instead we compute the change in winding number of a reference vertex R
+// using only the set of edges incident to the snapped reference vertex R'.
+// Essentially this involves looking at the set of input edges that snapped to
+// R' and assembling them into edge chains.  These edge chains can be divided
+// into two categories:
+//
+// (1) Edge chains that are entirely contained by the Voronoi region of R'.
+//     This means that the input edges form a closed loop where every vertex
+//     snaps to R'.  We can compute the change in winding number due to this
+//     loop by simply choosing a point Z outside the Voronoi region of R' and
+//     computing the winding numbers of R and R' relative to Z.
+//
+// (2) Edge chains that enter the Voronoi region of R' and later leave it.  In
+//     this case the input chain has the form C = (A0, A1, ..., B0, B1) where
+//     A0 and B1 are outside the Voronoi region of R' and all other vertices
+//     snap to R'.  In the vicinity of R' this input chain snaps to a chain C'
+//     of the form (A0', R', B1') where A0' is the second-last vertex in the
+//     snapped edge chain for A0A1 and B1' is the second vertex in the snapped
+//     edge chain for B0B1.  In principle we handle this similarly to the case
+//     above (by finding a point Z whose change in winding number is known,
+//     and then counting signed edge crossings along ZR with respect to C and
+//     along ZR' with respect to C').  However the details are more
+//     complicated and are described in GetSnappedWindingDelta().
+//
+// The total change in winding number is simply the sum of the changes in
+// winding number due to each of these edge chains.
+
+#include "s2/s2builderutil_get_snapped_winding_delta.h"
+
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "absl/container/btree_map.h"
+#include "absl/types/span.h"
+#include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2edge_crosser.h"
+#include "s2/s2edge_crossings.h"
+#include "s2/s2edge_distances.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
+#include "s2/s2pointutil.h"
+#include "s2/s2shape.h"
+
+using absl::Span;
+using std::make_pair;
+using std::vector;
+
+using Graph = S2Builder::Graph;
+using GraphOptions = S2Builder::GraphOptions;
+
+using EdgeId = Graph::EdgeId;
+using InputEdgeId = Graph::InputEdgeId;
+using VertexId = Graph::VertexId;
+
+namespace s2builderutil {
+
+namespace {
+
+// An input edge may snap to zero, one, or two non-degenerate output edges
+// incident to the reference vertex, consisting of at most one incoming and
+// one outgoing edge.
+//
+// If v_in >= 0, an incoming edge to the reference vertex is present.
+// If v_out >= 0, an outgoing edge from the reference vertex is present.
+struct EdgeSnap {
+  S2Shape::Edge input;
+  VertexId v_in = -1;
+  VertexId v_out = -1;
+};
+
+// A map that allows finding all the input edges that start at a given point.
+using InputVertexEdgeMap = absl::btree_multimap<S2Point, EdgeSnap>;
+
+// The winding number returned when a usage error is detected.
+constexpr int kErrorResult = std::numeric_limits<int>::max();
+
+bool BuildChain(
+    VertexId ref_v, const Graph& g, InputVertexEdgeMap* input_vertex_edge_map,
+    vector<S2Point>* chain_in, vector<S2Point>* chain_out, S2Error* error) {
+  S2_DCHECK(chain_in->empty());
+  S2_DCHECK(chain_out->empty());
+
+  // First look for an incoming edge to the reference vertex.  (This will be
+  // the start of a chain that eventually leads to an outgoing edge.)
+  {
+    auto it = input_vertex_edge_map->begin();
+    for (; it != input_vertex_edge_map->end(); ++it) {
+      const EdgeSnap& snap = it->second;
+      if (snap.v_in >= 0) {
+        chain_out->push_back(g.vertex(snap.v_in));
+        break;
+      }
+    }
+    if (it == input_vertex_edge_map->end()) {
+      // Pick an arbitrary edge to start a closed loop.
+      it = input_vertex_edge_map->begin();
+    }
+    EdgeSnap snap = it->second;
+    input_vertex_edge_map->erase(it);
+
+    chain_in->push_back(snap.input.v0);
+    chain_in->push_back(snap.input.v1);
+    chain_out->push_back(g.vertex(ref_v));
+    if (snap.v_out >= 0) {
+      // This input edge enters and immediately exits the Voronoi region.
+      chain_out->push_back(g.vertex(snap.v_out));
+      return true;
+    }
+  }
+
+  // Now repeatedly add edges until the chain or loop is finished.
+  while (chain_in->back() != chain_in->front()) {
+    const auto& range = input_vertex_edge_map->equal_range(chain_in->back());
+    if (range.first == range.second) {
+      error->Init(S2Error::INVALID_ARGUMENT,
+                  "Input edges (after filtering) do not form loops");
+      return false;
+    }
+    EdgeSnap snap = range.first->second;
+    input_vertex_edge_map->erase(range.first);
+    chain_in->push_back(snap.input.v1);
+    if (snap.v_out >= 0) {
+      // The chain has exited the Voronoi region.
+      chain_out->push_back(g.vertex(snap.v_out));
+      break;
+    }
+  }
+  return true;
+}
+
+// Returns the change in winding number along the edge AB with respect to the
+// given edge chain.  This is simply the sum of the signed edge crossings.
+int GetEdgeWindingDelta(const S2Point& a, const S2Point& b,
+                        absl::Span<const S2Point> chain) {
+  S2_DCHECK_GT(chain.size(), 0);
+
+  int delta = 0;
+  S2EdgeCrosser crosser(&a, &b, &chain[0]);
+  for (size_t i = 1; i < chain.size(); ++i) {
+    delta += crosser.SignedEdgeOrVertexCrossing(&chain[i]);
+  }
+  return delta;
+}
+
+// Given an input edge (B0, B1) that snaps to an edge chain (B0', B1', ...),
+// returns a connecting vertex "Bc" that can be used as a substitute for the
+// remaining snapped vertices "..." when computing winding numbers.  This
+// requires that (1) the edge (B1', Bc) does not intersect the Voronoi region
+// of B0', and (2) the edge chain (B0', B1', Bc, B1) always stays within the
+// snap radius of the input edge (B0, B1).
+S2Point GetConnector(const S2Point& b0, const S2Point& b1,
+                     const S2Point& b1_snapped) {
+  // If B1' within 90 degrees of B1, no connecting vertex is necessary.
+  if (b1_snapped.DotProd(b1) >= 0) return b1;
+
+  // Otherwise we use the point on (B0, B1) that is 90 degrees away from B1'.
+  // This is sufficient to ensure conditions (1) and (2).
+  S2Point x = S2::RobustCrossProd(b0, b1).CrossProd(b1_snapped).Normalize();
+  return (x.DotProd(S2::Interpolate(b0, b1, 0.5)) >= 0) ? x : -x;
+}
+
+// Returns the set of incoming and outgoing edges incident to the given
+// vertex.  This method takes time linear in the size of the graph "g";
+// if you need to call this function many times then it is more efficient to
+// use Graph::VertexOutMap and Graph::VertexInMap instead.
+vector<EdgeId> GetIncidentEdgesBruteForce(VertexId v, const Graph& g) {
+  vector<EdgeId> result;
+  for (EdgeId e = 0; e < g.num_edges(); ++e) {
+    if (g.edge(e).first == v || g.edge(e).second == v) {
+      result.push_back(e);
+    }
+  }
+  return result;
+}
+
+}  // namespace
+
+int GetSnappedWindingDelta(
+    const S2Point& ref_in, VertexId ref_v, Span<const EdgeId> incident_edges,
+    const InputEdgeFilter& input_edge_filter, const S2Builder& builder,
+    const Graph& g, S2Error* error) {
+  S2_DCHECK(!builder.options().simplify_edge_chains());
+  S2_DCHECK(g.options().edge_type() == S2Builder::EdgeType::DIRECTED);
+  S2_DCHECK(g.options().degenerate_edges() == GraphOptions::DegenerateEdges::KEEP);
+  S2_DCHECK(g.options().sibling_pairs() == GraphOptions::SiblingPairs::KEEP ||
+         g.options().sibling_pairs() == GraphOptions::SiblingPairs::REQUIRE ||
+         g.options().sibling_pairs() == GraphOptions::SiblingPairs::CREATE);
+
+  // First we group all the incident edges by input edge id, to handle the
+  // problem that input edges can map to either one or two snapped edges.
+  absl::btree_map<InputEdgeId, EdgeSnap> input_id_edge_map;
+  for (EdgeId e : incident_edges) {
+    Graph::Edge edge = g.edge(e);
+    for (InputEdgeId input_id : g.input_edge_ids(e)) {
+      if (input_edge_filter && input_edge_filter(input_id)) continue;
+      EdgeSnap* snap = &input_id_edge_map[input_id];
+      snap->input = builder.input_edge(input_id);
+      if (edge.first != ref_v) snap->v_in = edge.first;
+      if (edge.second != ref_v) snap->v_out = edge.second;
+    }
+  }
+  // Now we regroup the edges according to the reference vertex of the
+  // corresponding input edge.  This makes it easier to assemble these edges
+  // into (portions of) input edge loops.
+  InputVertexEdgeMap input_vertex_edge_map;
+  for (const auto& entry : input_id_edge_map) {
+    const EdgeSnap& snap = entry.second;
+    input_vertex_edge_map.insert(make_pair(snap.input.v0, snap));
+  }
+
+  // The position of the reference vertex after snapping.  In comments we will
+  // refer to the reference vertex before and after snapping as R and R'.
+  S2Point ref_out = g.vertex(ref_v);
+
+  // Now we repeatedly assemble edges into an edge chain and determine the
+  // change in winding number due to snapping of that edge chain.  These
+  // values are summed to compute the final winding number delta.
+  //
+  // An edge chain is either a closed loop of input vertices where every
+  // vertex snapped to the reference vertex R', or a partial loop such that
+  // all interior vertices snap to R' while the first and last vertex do not.
+  // Note that the latter includes the case where the first and last input
+  // vertices in the chain are identical but do not snap to R'.
+  //
+  // Essentially we compute the winding number of the unsnapped reference
+  // vertex R with respect to the input chain and the winding number of the
+  // snapped reference vertex R' with respect to the output chain, and then
+  // subtract them.  In the case of open chains, we compute winding numbers as
+  // if the chains had been closed in a way that preserves topology while
+  // snapping (i.e., in a way that does not the cause chain to pass through
+  // the reference vertex as it continuously deforms from the input to the
+  // output).
+  //
+  // Any changes to this code should be validated by running the RandomLoops
+  // unit test with at least 10 million iterations.
+  int winding_delta = 0;
+  while (!input_vertex_edge_map.empty()) {
+    vector<S2Point> chain_in, chain_out;
+    if (!BuildChain(ref_v, g, &input_vertex_edge_map,
+                    &chain_in, &chain_out, error)) {
+      return kErrorResult;
+    }
+    if (chain_out.size() == 1) {
+      // We have a closed chain C of input vertices such that every vertex
+      // snaps to the reference vertex R'.  Therefore we can easily find a
+      // point Z whose winding number is not affected by the snapping of C; it
+      // just needs to be outside the Voronoi region of R'.  Since the snap
+      // radius is at most 70 degrees, we can use a point 90 degrees away such
+      // as S2::Ortho(R').
+      //
+      // We then compute the winding numbers of R and R' relative to Z.  We
+      // compute the winding number of R by counting signed crossings of the
+      // edge ZR, while the winding number of R' relative to Z is always zero
+      // because the snapped chain collapses to a single point.
+      S2_DCHECK_EQ(chain_out[0], ref_out);         // Snaps to R'.
+      S2_DCHECK_EQ(chain_in[0], chain_in.back());  // Chain is a loop.
+      S2Point z = S2::Ortho(ref_out);
+      winding_delta += 0 - GetEdgeWindingDelta(z, ref_in, chain_in);
+    } else {
+      // We have an input chain C = (A0, A1, ..., B0, B1) that snaps to a
+      // chain C' = (A0', R', B1'), where A0 and B1 are outside the Voronoi
+      // region of R' and all other input vertices snap to R'.  This includes
+      // the case where A0 == B1 and also the case where the input chain
+      // consists of only two vertices.  Note that technically the chain C
+      // snaps to a supersequence of C', since A0A1 snaps to a chain whose
+      // last two vertices are (A0', R') and B0B1 snaps to a chain whose first
+      // two vertices are (R', B1').  This implies that A0 does not
+      // necessarily snap to A0', and similarly for B1 and B1'.
+      //
+      // Note that A0 and B1 can be arbitrarily far away from R'.  This makes
+      // it difficult (on the sphere) to construct a point Z whose winding
+      // number is guaranteed not to be affected by snapping the edges A0A1
+      // and B0B1.  Instead we construct two points Za and Zb such that Za is
+      // guaranteed not be affected by the snapping of A0A1, Zb is guaranteed
+      // not to be affected by the snapping of B0B1, and both points are
+      // guaranteed not to be affected by the snapping of any other input
+      // edges.  We can then compute the change in winding number of Zb by
+      // considering only the single edge A0A1 that snaps to A0'R'.
+      // Furthermore we can compute the latter by using Za as the reference
+      // point, since its winding number is guaranteed not be affected by this
+      // particular edge.
+      //
+      // Given the point Zb, whose change in winding number is now known, we
+      // can compute the change in winding number of the reference vertex R.
+      // We essentially want to count the signed crossings of ZbR' with respect
+      // to C' and subtract the signed crossings of ZbR with respect to C,
+      // which we will write as s(ZbR', C') - s(ZbR, C).
+      //
+      // However to do this we need to close both chains in a way that is
+      // topologically consistent and does not affect the winding number of
+      // Zb.  This can be achieved by counting the signed crossings of ZbR' by
+      // following the two-edge path (Zb, R, R').  In other words, we compute
+      // this as s(ZbR, C') + s(RR', C') - s(ZbR, C).  We can then compute
+      // s(ZbR, C') - s(ZbR, C) by simply concatenating the vertices of C in
+      // reverse order to C' to form a single closed loop.  The remaining term
+      // s(RR', C') can be implemented as signed edge crossing tests, or more
+      // directly by testing whether R is contained by the wedge C'.
+      S2_DCHECK_EQ(chain_out.size(), 3);
+      S2_DCHECK_EQ(chain_out[1], ref_out);
+
+      // Compute two points Za and Zb such that Za is not affected by the
+      // snapping of any edge except possibly B0B1, and Zb is not affected by
+      // the snapping of any edge except possibly A0A1.  Za and Zb are simply
+      // the normals to the edges A0A1 and B0B1 respectively, with their sign
+      // chosen to point away from the Voronoi site R'.  This ensures at least
+      // 20 degrees of separation from all edges except the ones mentioned.
+      S2Point za = S2::RobustCrossProd(chain_in[0], chain_in[1]).Normalize();
+      S2Point zb = S2::RobustCrossProd(chain_in.end()[-2], chain_in.back())
+                   .Normalize();
+      if (za.DotProd(ref_out) > 0) za = -za;
+      if (zb.DotProd(ref_out) > 0) zb = -zb;
+
+      // We now want to determine the change in winding number of Zb due to
+      // A0A1 snapping to A0'R'.  Conceptually we do this by closing these
+      // two single-edge chains into loops L and L' and then computing
+      // s(ZaZb, L') - s(ZaZb, L).  Recall that Za was constructed so as not
+      // to be affected by the snapping of edge A0A1, however this is only
+      // true provided that L can snap to L' without passing through Za.
+      //
+      // To achieve this we let L be the degenerate loop (A0, A1, A0), and L'
+      // be the loop (A0', R', A1, A0, A0').  The only problem is that we need
+      // to ensure that the edge A0A0' stays within 90 degrees of A0A1, since
+      // otherwise when the latter edge snaps to the former it might pass
+      // through Za.  (This problem arises because we only consider the last
+      // two vertices (A0', R') that A0A1 snaps to.  If we used the full chain
+      // of snapped vertices for A0A1 then L' would always stay within the
+      // snap radius of this edge.)
+      //
+      // The simplest way to fix this problem is to insert a connecting vertex
+      // Ac between A0 and A0'.  THis vertex acts as a proxy for the missing
+      // snapped vertices, yielding the loop L' = (A0', R', A1, A0, Ac, A0').
+      // The vertex Ac is located on the edge A0A1 and is at most 90 degrees
+      // away from A0'.  This ensures that the chain (A0, Ac, A0') always
+      // stays within the snap radius of the input edge A0A1.
+      //
+      // Similarly we insert a connecting vertex Bc between B0 and B1 to
+      // ensure that the edge B1'B1 passes on the correct side of Zb.
+      S2Point a0_connector = GetConnector(chain_in[1], chain_in[0],
+                                          chain_out[0]);
+      S2Point b1_connector = GetConnector(chain_in.end()[-2], chain_in.back(),
+                                          chain_out[2]);
+
+      // Compute the change in winding number for reference vertex Zb.  Note
+      // that we must duplicate the first/last vertex of the loop since the
+      // argument to GetEdgeWindingDelta() is a polyline.
+      vector<S2Point> chain_z {chain_out[0], chain_out[1], chain_in[1],
+                               chain_in[0], a0_connector, chain_out[0]};
+      winding_delta += GetEdgeWindingDelta(za, zb, chain_z);
+
+      // Compute the change in winding number of ZbR due to snapping C to C'.
+      // As before, conceptually we do this by closing these chains into loops
+      // L and L' such that L snaps to L' without passing through Zb.  Again
+      // this can be done by concatenating the vertices of C' with the
+      // reversed vertices of C, along with the two extra connecting vertices
+      // Ac and Bc to ensure that L and L' pass on the same side of Za and Zb.
+      // This yields the loop (A0', R', B1', Bc, B1, B0, ..., A1, A0, Ac, A0').
+      vector<S2Point> chain_diff = chain_out;
+      chain_diff.push_back(b1_connector);
+      chain_diff.insert(chain_diff.end(), chain_in.rbegin(), chain_in.rend());
+      chain_diff.push_back(a0_connector);
+      chain_diff.push_back(chain_out[0]);  // Close the loop.
+      winding_delta += GetEdgeWindingDelta(zb, ref_in, chain_diff);
+
+      // Compute the change in winding number of RR' with respect to C' only.
+      // (This could also be computed using two calls to s2pred::OrderedCCW.)
+      S2_DCHECK_EQ(chain_out[1], ref_out);
+      winding_delta += GetEdgeWindingDelta(ref_in, ref_out, chain_out);
+    }
+  }
+  return winding_delta;
+}
+
+int GetSnappedWindingDelta(
+    const S2Point& ref_in, S2Builder::Graph::VertexId ref_v,
+    const InputEdgeFilter &input_edge_filter, const S2Builder& builder,
+    const Graph& g, S2Error* error) {
+  return GetSnappedWindingDelta(
+      ref_in, ref_v, GetIncidentEdgesBruteForce(ref_v, g),
+      input_edge_filter, builder, g, error);
+}
+
+VertexId FindFirstVertexId(InputEdgeId input_edge_id, const Graph& g) {
+  // A given input edge snaps to a chain of output edges.  To determine which
+  // output vertex the source of the given input edge snaps to, we must find
+  // the first edge in this chain.
+  //
+  // The search below takes linear time in the number of edges; it can be done
+  // more efficiently if duplicate edges are not being merged and the mapping
+  // returned by Graph::GetInputEdgeOrder() is available.  The algorithm would
+  // also be much simpler if input_edge_id were known to be degenerate.
+  absl::btree_map<VertexId, int> excess_degree_map;
+  for (EdgeId e = 0; e < g.num_edges(); ++e) {
+    IdSetLexicon::IdSet id_set = g.input_edge_ids(e);
+    for (InputEdgeId id : id_set) {
+      if (id == input_edge_id) {
+        excess_degree_map[g.edge(e).first] += 1;
+        excess_degree_map[g.edge(e).second] -= 1;
+        break;
+      }
+    }
+  }
+  if (excess_degree_map.empty()) return -1;  // Does not exist.
+
+  // Look for the (unique) vertex whose excess degree is +1.
+  for (const auto& entry : excess_degree_map) {
+    if (entry.second == 1) return entry.first;
+  }
+  // Otherwise "input_edge_id" must snap to a single degenerate edge.
+  S2_DCHECK_EQ(excess_degree_map.size(), 1);
+  return excess_degree_map.begin()->first;
+}
+
+}  // namespace s2builderutil
diff --git a/src/s2/s2builderutil_get_snapped_winding_delta.h b/src/s2/s2builderutil_get_snapped_winding_delta.h
new file mode 100644
index 00000000..d5a41174
--- /dev/null
+++ b/src/s2/s2builderutil_get_snapped_winding_delta.h
@@ -0,0 +1,165 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#ifndef S2_S2BUILDERUTIL_GET_SNAPPED_WINDING_DELTA_H_
+#define S2_S2BUILDERUTIL_GET_SNAPPED_WINDING_DELTA_H_
+
+#include <functional>
+
+#include "absl/types/span.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
+
+namespace s2builderutil {
+
+// A function that returns true if the given S2Builder input edge should be
+// ignored in the winding number calculation.  This means either that the edge
+// is not a loop edge (e.g., a non-closed polyline) or that this loop should
+// not affect the winding number.  This is useful for two purposes:
+//
+//  - To process graphs that contain polylines and points in addition to loops.
+//
+//  - To process graphs where the winding number is computed with respect to
+//    only a subset of the input loops.
+//
+// It can be default-constructed to indicate that no edges should be ignored.
+using InputEdgeFilter = std::function<bool (S2Builder::Graph::InputEdgeId)>;
+
+// Given an S2Builder::Graph of output edges after snap rounding and a
+// reference vertex R, computes the change in winding number of R due to
+// snapping.  (See S2WindingOperation for an introduction to winding numbers on
+// the sphere.)  The return value can be added to the original winding number
+// of R to obtain the winding number of the corresponding snapped vertex R'.
+//
+// The algorithm requires that the S2Builder input edges consist entirely of
+// (possibly self-intersecting) closed loops.  If you need to process inputs
+// that include other types of geometry (e.g., non-closed polylines), you will
+// need to either (1) put them into a different S2Builder layer, (2) close the
+// polylines into loops (e.g. using GraphOptions::SiblingEdges::CREATE), or (3)
+// provide a suitable InputEdgeFilter (see above) so that the non-loop edges
+// can be ignored.
+//
+// The algorithm is designed to be robust for any input edge configuration and
+// snapping result.  However note that it cannot be used in conjunction with
+// edge chain simplification (S2Builder::Options::simplify_edge_chains).  It
+// also requires that S2Builder::GraphOptions be configured to keep all snapped
+// edges, even degenerate ones (see requirements below).
+//
+// "ref_in" is the reference vertex location before snapping.  It *must* be an
+// input vertex to S2Builder, however this is not checked.
+//
+// "ref_v" is the Graph::VertexId of the reference vertex after snapping.
+// (This can be found using the FindFirstVertexId() function below if desired.)
+//
+// "input_edge_filter" can optionally be used to ignore input edges that
+// should not affect the winding number calculation (such as polyline edges).
+// The value can be default-constructed (InputEdgeFilter{}) to use all edges.
+//
+// "builder" is the S2Builder that produced the given edge graph.  It is used
+// to map InputEdgeIds back to the original edge definitions, and also to
+// verify that no incompatible S2Builder::Options were used (see below).
+//
+// "g" is the S2Builder output graph of snapped edges.
+//
+// The only possible errors are usage errors, in which case "error" is set to
+// an appropriate error message and a very large value is returned.
+//
+// Example usage:
+//
+// This function is generally called from an S2Builder::Layer implementation.
+// We assume here that the reference vertex is the first vertex of the input
+// edge identified by "ref_input_edge_id_", and that its desired winding number
+// with respect to the input loops is "ref_winding_".
+//
+// using Graph = S2Builder::Graph;
+// class SomeLayer : public S2Builder::Layer {
+//  private:
+//   int ref_input_edge_id_;
+//   int ref_winding_;
+//   const S2Builder& builder_;
+//
+//  public:
+//   ...
+//   void Build(const Graph& g, S2Error* error) {
+//     // Find the positions of the reference vertex before and after snapping.
+//     S2Point ref_in = builder_.input_edge(ref_input_edge_id_).v0;
+//     Graph::VertexId ref_v =
+//         s2builderutil::FindFirstVertexId(ref_input_edge_id_, g);
+//     S2Point ref_out = g.vertex(ref_v);
+//
+//     // Compute the change in winding number due to snapping.
+//     S2Error error;
+//     ref_winding_ += s2builderutil::GetSnappedWindingDelta(
+//         ref_in, ref_v, InputEdgeFilter{}, builder_, g, error);
+//     S2_CHECK(error->ok());  // All errors are usage errors.
+//
+//     // Winding numbers of others points can now be found by counting signed
+//     // edge crossings (S2EdgeCrosser::SignedEdgeOrVertexCrossing) between
+//     // "ref_out" and the desired point.  Note that if DuplicateEdges::MERGE
+//     // or SiblingPairs::CREATE was used, each crossing has a multiplicity
+//     // equal to the number of non-filtered input edges that snapped to that
+//     // output edge.
+//   }
+// }
+//
+// REQUIRES: The input edges after filtering consist entirely of closed loops.
+//           (If DuplicateEdges::MERGE or SiblingPairs::CREATE was used,
+//           each graph edge has a multiplicity equal to the number of
+//           non-filtered input edges that snapped to it.)
+//
+// REQUIRES: g.options().edge_type() == DIRECTED
+// REQUIRES: g.options().degenerate_edges() == KEEP
+// REQUIRES: g.options().sibling_pairs() == {KEEP, REQUIRE, CREATE}
+// REQUIRES: builder.options().simplify_edge_chains() == false
+//
+// CAVEAT: The running time is proportional to the S2Builder::Graph size.  If
+//         you need to call this function many times on the same graph then
+//         use the alternate version below.  (Most clients only need to call
+//         GetSnappedWindingDelta() once per graph because the winding numbers
+//         of other points can be computed by counting signed edge crossings.)
+int GetSnappedWindingDelta(
+    const S2Point& ref_in, S2Builder::Graph::VertexId ref_v,
+    const InputEdgeFilter &input_edge_filter, const S2Builder& builder,
+    const S2Builder::Graph& g, S2Error* error);
+
+// This version can be used when GetSnappedWindingDelta() needs to be called
+// many times on the same graph.  It is faster than the function above, but
+// less convenient to use because it requires the client to provide the set of
+// graph edges incident to the snapped reference vertex.  It runs in time
+// proportional to the size of this set.
+//
+// "incident_edges" is the set of incoming and outgoing graph edges incident
+// to ref_v.  (These edges can be found efficiently using Graph::VertexOutMap
+// and Graph::VertexInMap.)
+//
+// See the function above for the remaining parameters and requirements.
+int GetSnappedWindingDelta(
+    const S2Point& ref_in, S2Builder::Graph::VertexId ref_v,
+    absl::Span<const S2Builder::Graph::EdgeId> incident_edges,
+    const InputEdgeFilter &input_edge_filter, const S2Builder& builder,
+    const S2Builder::Graph& g, S2Error* error);
+
+// Returns the first vertex of the snapped edge chain for the given input
+// edge, or -1 if this input edge does not exist in the graph "g".
+S2Builder::Graph::VertexId FindFirstVertexId(
+    S2Builder::Graph::InputEdgeId input_edge_id, const S2Builder::Graph& g);
+
+}  // namespace s2builderutil
+
+#endif  // S2_S2BUILDERUTIL_GET_SNAPPED_WINDING_DELTA_H_
diff --git a/src/s2/s2builderutil_graph_shape.h b/src/s2/s2builderutil_graph_shape.h
index 9edbc2b2..5fbcfdec 100644
--- a/src/s2/s2builderutil_graph_shape.h
+++ b/src/s2/s2builderutil_graph_shape.h
@@ -20,7 +20,9 @@
 
 #include <vector>
 
+#include "s2/s2builder.h"
 #include "s2/s2builder_graph.h"
+#include "s2/s2shape.h"
 
 namespace s2builderutil {
 
diff --git a/src/s2/s2builderutil_lax_polygon_layer.cc b/src/s2/s2builderutil_lax_polygon_layer.cc
index 9b92240d..f84a0665 100644
--- a/src/s2/s2builderutil_lax_polygon_layer.cc
+++ b/src/s2/s2builderutil_lax_polygon_layer.cc
@@ -18,10 +18,17 @@
 #include "s2/s2builderutil_lax_polygon_layer.h"
 
 #include <algorithm>
-#include <memory>
-#include "absl/memory/memory.h"
+#include <utility>
+#include <vector>
+
+#include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2builder_layer.h"
 #include "s2/s2builderutil_find_polygon_degeneracies.h"
-#include "s2/s2debug.h"
+#include "s2/s2error.h"
+#include "s2/s2lax_polygon_shape.h"
+#include "s2/s2point.h"
 
 using std::vector;
 
@@ -72,11 +79,8 @@ GraphOptions LaxPolygonLayer::graph_options() const {
                         DuplicateEdges::KEEP, SiblingPairs::DISCARD);
   } else {
     // Keep at most one copy of each sibling pair and each isolated vertex.
-    // We need DuplicateEdges::MERGE because DegenerateEdges::DISCARD_EXCESS
-    // can still keep multiple copies (it only discards degenerate edges that
-    // are connected to non-degenerate edges).
     return GraphOptions(options_.edge_type(), DegenerateEdges::DISCARD_EXCESS,
-                        DuplicateEdges::MERGE, SiblingPairs::DISCARD_EXCESS);
+                        DuplicateEdges::KEEP, SiblingPairs::DISCARD_EXCESS);
   }
 }
 
@@ -165,7 +169,7 @@ void LaxPolygonLayer::BuildDirected(Graph g, S2Error* error) {
         (degenerate_boundaries == DegenerateBoundaries::DISCARD_HOLES);
     auto degeneracies = s2builderutil::FindPolygonDegeneracies(g, error);
     if (!error->ok()) return;
-    if (degeneracies.size() == g.num_edges()) {
+    if (degeneracies.size() == static_cast<size_t>(g.num_edges())) {
       if (degeneracies.empty()) {
         MaybeAddFullLoop(g, &loops, error);
       } else if (degeneracies[0].is_hole) {
diff --git a/src/s2/s2builderutil_lax_polygon_layer.h b/src/s2/s2builderutil_lax_polygon_layer.h
index 8a1e63ea..565cf98f 100644
--- a/src/s2/s2builderutil_lax_polygon_layer.h
+++ b/src/s2/s2builderutil_lax_polygon_layer.h
@@ -33,9 +33,11 @@
 #define S2_S2BUILDERUTIL_LAX_POLYGON_LAYER_H_
 
 #include <memory>
+#include <utility>
 #include <vector>
+
+#include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
-#include "absl/memory/memory.h"
 #include "s2/id_set_lexicon.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s2builder.h"
@@ -43,6 +45,8 @@
 #include "s2/s2builder_layer.h"
 #include "s2/s2error.h"
 #include "s2/s2lax_polygon_shape.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
 
 namespace s2builderutil {
 
@@ -106,8 +110,11 @@ class LaxPolygonLayer : public S2Builder::Layer {
     // since it maintains the closest fidelity to the original geometry.)
     //
     // DEFAULT: DegenerateBoundaries::KEEP
-    enum class DegenerateBoundaries {
-      DISCARD, DISCARD_HOLES, DISCARD_SHELLS, KEEP
+    enum class DegenerateBoundaries : uint8 {
+      DISCARD,
+      DISCARD_HOLES,
+      DISCARD_SHELLS,
+      KEEP
     };
     DegenerateBoundaries degenerate_boundaries() const;
     void set_degenerate_boundaries(DegenerateBoundaries degenerate_boundaries);
diff --git a/src/s2/s2builderutil_lax_polyline_layer.cc b/src/s2/s2builderutil_lax_polyline_layer.cc
new file mode 100644
index 00000000..4f4ab1b4
--- /dev/null
+++ b/src/s2/s2builderutil_lax_polyline_layer.cc
@@ -0,0 +1,104 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#include "s2/s2builderutil_lax_polyline_layer.h"
+
+#include <vector>
+
+#include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2builder_layer.h"
+#include "s2/s2error.h"
+#include "s2/s2lax_polyline_shape.h"
+#include "s2/s2point.h"
+
+using std::vector;
+
+using EdgeType = S2Builder::EdgeType;
+using Graph = S2Builder::Graph;
+using GraphOptions = S2Builder::GraphOptions;
+using Label = S2Builder::Label;
+
+using DegenerateEdges = GraphOptions::DegenerateEdges;
+using DuplicateEdges = GraphOptions::DuplicateEdges;
+using SiblingPairs = GraphOptions::SiblingPairs;
+
+using EdgeId = Graph::EdgeId;
+using PolylineType = Graph::PolylineType;
+
+namespace s2builderutil {
+
+LaxPolylineLayer::LaxPolylineLayer(S2LaxPolylineShape* polyline,
+                                   const LaxPolylineLayer::Options& options) {
+  Init(polyline, nullptr, nullptr, options);
+}
+
+LaxPolylineLayer::LaxPolylineLayer(
+    S2LaxPolylineShape* polyline, LabelSetIds* label_set_ids,
+    IdSetLexicon* label_set_lexicon, const Options& options) {
+  Init(polyline, label_set_ids, label_set_lexicon, options);
+}
+
+void LaxPolylineLayer::Init(S2LaxPolylineShape* polyline,
+                            LabelSetIds* label_set_ids,
+                            IdSetLexicon* label_set_lexicon,
+                            const Options& options) {
+  S2_DCHECK_EQ(label_set_ids == nullptr, label_set_lexicon == nullptr);
+  polyline_ = polyline;
+  label_set_ids_ = label_set_ids;
+  label_set_lexicon_ = label_set_lexicon;
+  options_ = options;
+}
+
+GraphOptions LaxPolylineLayer::graph_options() const {
+  return GraphOptions(options_.edge_type(), DegenerateEdges::KEEP,
+                      DuplicateEdges::KEEP, SiblingPairs::KEEP);
+}
+
+void LaxPolylineLayer::Build(const Graph& g, S2Error* error) {
+  if (g.num_edges() == 0) {
+    polyline_->Init(vector<S2Point>{});
+    return;
+  }
+  vector<Graph::EdgePolyline> edge_polylines =
+      g.GetPolylines(PolylineType::WALK);
+  if (edge_polylines.size() != 1) {
+    error->Init(S2Error::BUILDER_EDGES_DO_NOT_FORM_POLYLINE,
+                "Input edges cannot be assembled into polyline");
+    return;
+  }
+  const Graph::EdgePolyline& edge_polyline = edge_polylines[0];
+  vector<S2Point> vertices;  // Temporary storage for vertices.
+  vertices.reserve(edge_polyline.size());
+  vertices.push_back(g.vertex(g.edge(edge_polyline[0]).first));
+  for (EdgeId e : edge_polyline) {
+    vertices.push_back(g.vertex(g.edge(e).second));
+  }
+  if (label_set_ids_) {
+    Graph::LabelFetcher fetcher(g, options_.edge_type());
+    vector<Label> labels;  // Temporary storage for labels.
+    label_set_ids_->reserve(edge_polyline.size());
+    for (EdgeId e : edge_polyline) {
+      fetcher.Fetch(e, &labels);
+      label_set_ids_->push_back(label_set_lexicon_->Add(labels));
+    }
+  }
+  polyline_->Init(vertices);
+}
+
+}  // namespace s2builderutil
diff --git a/src/s2/s2builderutil_lax_polyline_layer.h b/src/s2/s2builderutil_lax_polyline_layer.h
new file mode 100644
index 00000000..d2c9d4bf
--- /dev/null
+++ b/src/s2/s2builderutil_lax_polyline_layer.h
@@ -0,0 +1,154 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#ifndef S2_S2BUILDERUTIL_LAX_POLYLINE_LAYER_H_
+#define S2_S2BUILDERUTIL_LAX_POLYLINE_LAYER_H_
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "s2/base/logging.h"
+#include "s2/id_set_lexicon.h"
+#include "s2/mutable_s2shape_index.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2builder_layer.h"
+#include "s2/s2error.h"
+#include "s2/s2lax_polyline_shape.h"
+#include "s2/s2shape.h"
+
+namespace s2builderutil {
+
+// A layer type that assembles edges (directed or undirected) into an
+// S2LaxPolylineShape.  Returns an error if the edges cannot be assembled into
+// a single unbroken polyline.
+//
+// Duplicate edges are handled correctly (e.g., if a polyline backtracks on
+// itself, or loops around and retraces some of its previous edges.)  The
+// implementation attempts to preserve the order of input edges whenever
+// possible, so that if the input is a polyline and it is not modified by
+// S2Builder, then the output will be the same polyline (even if the polyline
+// backtracks on itself or forms a loop).
+//
+// LaxPolylineLayer does not support options such as discarding sibling pairs
+// or merging duplicate edges because these options can split the polyline
+// into several pieces.  TODO(b/266625836): Implement LaxPolylineVectorLayer.
+class LaxPolylineLayer : public S2Builder::Layer {
+ public:
+  class Options {
+   public:
+    // Constructor that uses the default options (listed below).
+    Options();
+
+    // Constructor that specifies the edge type.
+    explicit Options(S2Builder::EdgeType edge_type);
+
+    // Indicates whether the input edges provided to S2Builder are directed or
+    // undirected.  Directed edges should be used whenever possible to avoid
+    // ambiguity.
+    //
+    // DEFAULT: S2Builder::EdgeType::DIRECTED
+    S2Builder::EdgeType edge_type() const;
+    void set_edge_type(S2Builder::EdgeType edge_type);
+
+   private:
+    S2Builder::EdgeType edge_type_;
+  };
+
+  // Specifies that a polyline should be constructed using the given options.
+  explicit LaxPolylineLayer(S2LaxPolylineShape* polyline,
+                            const Options& options = Options());
+
+  // Specifies that a polyline should be constructed using the given options,
+  // and that any labels attached to the input edges should be returned in
+  // "label_set_ids" and "label_set_lexicon".
+  //
+  // The labels associated with the edge "polyline.vertex({j, j+1})" can be
+  // retrieved as follows:
+  //
+  //   for (int32 label : label_set_lexicon.id_set(label_set_ids[j])) {...}
+  using LabelSetIds = std::vector<LabelSetId>;
+  LaxPolylineLayer(S2LaxPolylineShape* polyline, LabelSetIds* label_set_ids,
+                   IdSetLexicon* label_set_lexicon,
+                   const Options& options = Options());
+
+  // Layer interface:
+  GraphOptions graph_options() const override;
+  void Build(const Graph& g, S2Error* error) override;
+
+ private:
+  void Init(S2LaxPolylineShape* polyline, LabelSetIds* label_set_ids,
+            IdSetLexicon* label_set_lexicon, const Options& options);
+
+  S2LaxPolylineShape* polyline_;
+  LabelSetIds* label_set_ids_;
+  IdSetLexicon* label_set_lexicon_;
+  Options options_;
+};
+
+// Like LaxPolylineLayer, but adds the polyline to a MutableS2ShapeIndex (if
+// the polyline is non-empty).
+class IndexedLaxPolylineLayer : public S2Builder::Layer {
+ public:
+  using Options = LaxPolylineLayer::Options;
+  explicit IndexedLaxPolylineLayer(MutableS2ShapeIndex* index,
+                                   const Options& options = Options())
+      : index_(index), polyline_(new S2LaxPolylineShape),
+        layer_(polyline_.get(), options) {}
+
+  GraphOptions graph_options() const override {
+    return layer_.graph_options();
+  }
+
+  void Build(const Graph& g, S2Error* error) override {
+    layer_.Build(g, error);
+    if (error->ok() && polyline_->num_vertices() > 0) {
+      index_->Add(std::move(polyline_));
+    }
+  }
+
+ private:
+  MutableS2ShapeIndex* index_;
+  std::unique_ptr<S2LaxPolylineShape> polyline_;
+  LaxPolylineLayer layer_;
+};
+
+
+//////////////////   Implementation details follow   ////////////////////
+
+
+inline LaxPolylineLayer::Options::Options()
+    : edge_type_(S2Builder::EdgeType::DIRECTED) {
+}
+
+inline LaxPolylineLayer::Options::Options(S2Builder::EdgeType edge_type)
+    : edge_type_(edge_type) {
+}
+
+inline S2Builder::EdgeType LaxPolylineLayer::Options::edge_type() const {
+  return edge_type_;
+}
+
+inline void LaxPolylineLayer::Options::set_edge_type(
+    S2Builder::EdgeType edge_type) {
+  edge_type_ = edge_type;
+}
+
+}  // namespace s2builderutil
+
+#endif  // S2_S2BUILDERUTIL_LAX_POLYLINE_LAYER_H_
diff --git a/src/s2/s2builderutil_s2point_vector_layer.cc b/src/s2/s2builderutil_s2point_vector_layer.cc
index 013a912c..bba53f65 100644
--- a/src/s2/s2builderutil_s2point_vector_layer.cc
+++ b/src/s2/s2builderutil_s2point_vector_layer.cc
@@ -17,7 +17,15 @@
 
 #include "s2/s2builderutil_s2point_vector_layer.h"
 
+#include <utility>
+#include <vector>
+
+#include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
 #include "s2/s2builder_graph.h"
+#include "s2/s2builder_layer.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
 
 using std::vector;
 
@@ -51,7 +59,8 @@ void S2PointVectorLayer::Build(const Graph& g, S2Error* error) {
   Graph::LabelFetcher fetcher(g, EdgeType::DIRECTED);
 
   vector<Label> labels;  // Temporary storage for labels.
-  for (EdgeId edge_id = 0; edge_id < g.edges().size(); edge_id++) {
+  for (EdgeId edge_id = 0; static_cast<size_t>(edge_id) < g.edges().size();
+       edge_id++) {
     auto& edge = g.edge(edge_id);
     if (edge.first != edge.second) {
       error->Init(S2Error::INVALID_ARGUMENT, "Found non-degenerate edges");
diff --git a/src/s2/s2builderutil_s2point_vector_layer.h b/src/s2/s2builderutil_s2point_vector_layer.h
index b7d210a2..4b42d62c 100644
--- a/src/s2/s2builderutil_s2point_vector_layer.h
+++ b/src/s2/s2builderutil_s2point_vector_layer.h
@@ -19,16 +19,19 @@
 #define S2_S2BUILDERUTIL_S2POINT_VECTOR_LAYER_H_
 
 #include <memory>
+#include <utility>
 #include <vector>
+
 #include "s2/base/logging.h"
-#include "absl/memory/memory.h"
 #include "s2/id_set_lexicon.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s2builder.h"
 #include "s2/s2builder_graph.h"
 #include "s2/s2builder_layer.h"
 #include "s2/s2error.h"
+#include "s2/s2point.h"
 #include "s2/s2point_vector_shape.h"
+#include "s2/s2shape.h"
 
 namespace s2builderutil {
 
@@ -87,7 +90,7 @@ class IndexedS2PointVectorLayer : public S2Builder::Layer {
   void Build(const Graph& g, S2Error* error) override {
     layer_.Build(g, error);
     if (error->ok() && !points_.empty()) {
-      index_->Add(absl::make_unique<S2PointVectorShape>(std::move(points_)));
+      index_->Add(std::make_unique<S2PointVectorShape>(std::move(points_)));
     }
   }
 
diff --git a/src/s2/s2builderutil_s2polygon_layer.cc b/src/s2/s2builderutil_s2polygon_layer.cc
index b37a8bcb..3197c69f 100644
--- a/src/s2/s2builderutil_s2polygon_layer.cc
+++ b/src/s2/s2builderutil_s2polygon_layer.cc
@@ -18,12 +18,24 @@
 #include "s2/s2builderutil_s2polygon_layer.h"
 
 #include <algorithm>
+#include <array>
 #include <memory>
-#include "absl/memory/memory.h"
+#include <utility>
+#include <vector>
+
+#include "absl/container/btree_map.h"
+#include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2builder_layer.h"
 #include "s2/s2debug.h"
+#include "s2/s2error.h"
+#include "s2/s2loop.h"
+#include "s2/s2point.h"
+#include "s2/s2polygon.h"
 
-using absl::make_unique;
 using std::make_pair;
+using std::make_unique;
 using std::pair;
 using std::unique_ptr;
 using std::vector;
diff --git a/src/s2/s2builderutil_s2polygon_layer.h b/src/s2/s2builderutil_s2polygon_layer.h
index 6fd8c2ed..886f05db 100644
--- a/src/s2/s2builderutil_s2polygon_layer.h
+++ b/src/s2/s2builderutil_s2polygon_layer.h
@@ -35,9 +35,9 @@
 #include <memory>
 #include <utility>
 #include <vector>
+
 #include "s2/base/logging.h"
 #include "absl/container/btree_map.h"
-#include "absl/memory/memory.h"
 #include "s2/id_set_lexicon.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s2builder.h"
@@ -46,6 +46,7 @@
 #include "s2/s2error.h"
 #include "s2/s2loop.h"
 #include "s2/s2polygon.h"
+#include "s2/s2shape.h"
 
 namespace s2builderutil {
 
@@ -167,7 +168,7 @@ class IndexedS2PolygonLayer : public S2Builder::Layer {
     layer_.Build(g, error);
     if (error->ok() && !polygon_->is_empty()) {
       index_->Add(
-          absl::make_unique<S2Polygon::OwningShape>(std::move(polygon_)));
+          std::make_unique<S2Polygon::OwningShape>(std::move(polygon_)));
     }
   }
 
diff --git a/src/s2/s2builderutil_s2polyline_layer.cc b/src/s2/s2builderutil_s2polyline_layer.cc
index d74a84fc..2cf746f4 100644
--- a/src/s2/s2builderutil_s2polyline_layer.cc
+++ b/src/s2/s2builderutil_s2polyline_layer.cc
@@ -17,7 +17,16 @@
 
 #include "s2/s2builderutil_s2polyline_layer.h"
 
+#include <vector>
+
+#include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2builder_layer.h"
 #include "s2/s2debug.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
+#include "s2/s2polyline.h"
 
 using std::vector;
 
diff --git a/src/s2/s2builderutil_s2polyline_layer.h b/src/s2/s2builderutil_s2polyline_layer.h
index c38d2803..b58f390f 100644
--- a/src/s2/s2builderutil_s2polyline_layer.h
+++ b/src/s2/s2builderutil_s2polyline_layer.h
@@ -19,9 +19,10 @@
 #define S2_S2BUILDERUTIL_S2POLYLINE_LAYER_H_
 
 #include <memory>
+#include <utility>
 #include <vector>
+
 #include "s2/base/logging.h"
-#include "absl/memory/memory.h"
 #include "s2/id_set_lexicon.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s2builder.h"
@@ -29,6 +30,7 @@
 #include "s2/s2builder_layer.h"
 #include "s2/s2error.h"
 #include "s2/s2polyline.h"
+#include "s2/s2shape.h"
 
 namespace s2builderutil {
 
@@ -38,12 +40,10 @@ namespace s2builderutil {
 //
 // Duplicate edges are handled correctly (e.g., if a polyline backtracks on
 // itself, or loops around and retraces some of its previous edges.)  The
-// implementation attempts to preserve the order of directed input edges
-// whenever possible, so that if the input is a polyline and it is not
-// modified by S2Builder, then the output will be the same polyline (even if
-// the polyline backtracks on itself or forms a loop).  With undirected edges,
-// there are no such guarantees; for example, even if the input consists of a
-// single undirected edge, then either directed edge may be returned.
+// implementation attempts to preserve the order of input edges whenever
+// possible, so that if the input is a polyline and it is not modified by
+// S2Builder, then the output will be the same polyline (even if the polyline
+// backtracks on itself or forms a loop).
 //
 // S2PolylineLayer does not support options such as discarding sibling pairs
 // or merging duplicate edges because these options can split the polyline
@@ -130,7 +130,7 @@ class IndexedS2PolylineLayer : public S2Builder::Layer {
     layer_.Build(g, error);
     if (error->ok() && polyline_->num_vertices() > 0) {
       index_->Add(
-          absl::make_unique<S2Polyline::OwningShape>(std::move(polyline_)));
+          std::make_unique<S2Polyline::OwningShape>(std::move(polyline_)));
     }
   }
 
diff --git a/src/s2/s2builderutil_s2polyline_vector_layer.cc b/src/s2/s2builderutil_s2polyline_vector_layer.cc
index adae46ac..ad4717b6 100644
--- a/src/s2/s2builderutil_s2polyline_vector_layer.cc
+++ b/src/s2/s2builderutil_s2polyline_vector_layer.cc
@@ -18,6 +18,16 @@
 #include "s2/s2builderutil_s2polyline_vector_layer.h"
 
 #include <memory>
+#include <utility>
+#include <vector>
+
+#include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2builder_layer.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
+#include "s2/s2polyline.h"
 
 using std::unique_ptr;
 using std::vector;
diff --git a/src/s2/s2builderutil_s2polyline_vector_layer.h b/src/s2/s2builderutil_s2polyline_vector_layer.h
index ea951063..93623fc0 100644
--- a/src/s2/s2builderutil_s2polyline_vector_layer.h
+++ b/src/s2/s2builderutil_s2polyline_vector_layer.h
@@ -19,9 +19,10 @@
 #define S2_S2BUILDERUTIL_S2POLYLINE_VECTOR_LAYER_H_
 
 #include <memory>
+#include <utility>
 #include <vector>
+
 #include "s2/base/logging.h"
-#include "absl/memory/memory.h"
 #include "s2/id_set_lexicon.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s2builder.h"
@@ -30,6 +31,7 @@
 #include "s2/s2debug.h"
 #include "s2/s2error.h"
 #include "s2/s2polyline.h"
+#include "s2/s2shape.h"
 
 namespace s2builderutil {
 
@@ -69,15 +71,15 @@ class S2PolylineVectorLayer : public S2Builder::Layer {
     S2Builder::EdgeType edge_type() const;
     void set_edge_type(S2Builder::EdgeType edge_type);
 
-    // Indicates whether polylines should be "paths" (which don't allow
-    // duplicate vertices, except possibly the first and last vertex) or
-    // "walks" (which allow duplicate vertices and edges).
+    // Controls how polylines are constructed.  If the polyline type is PATH,
+    // then only vertices of indegree and outdegree 1 (or degree 2 in the case
+    // of undirected edges) will appear in the interior of polylines.  Use this
+    // option if you want to split polylines into separate pieces whenever they
+    // self-intersect or cross each other.
     //
-    // If your input consists of polylines, and you want to split them into
-    // separate pieces whenever they self-intersect or cross each other, then
-    // use PolylineType::PATH (and probably use split_crossing_edges()).  If
-    // you don't mind if your polylines backtrack or contain loops, then use
-    // PolylineType::WALK.
+    // If "polyline_type" is WALK, then each polyline will be as long as
+    // possible.  Polylines may pass through the same vertex or even the same
+    // edge multiple times (if duplicate edges are present).
     //
     // DEFAULT: PolylineType::PATH
     using PolylineType = S2Builder::Graph::PolylineType;
@@ -111,8 +113,9 @@ class S2PolylineVectorLayer : public S2Builder::Layer {
     // If true, calls FindValidationError() on each output polyline.  If any
     // error is found, it will be returned by S2Builder::Build().
     //
-    // Note that this option calls set_s2debug_override(S2Debug::DISABLE) in
-    // order to turn off the default error checking in debug builds.
+    // Note that this option calls set_s2debug_override(S2Debug::DISABLE) if
+    // "validate" is true in order to turn off the default error checking in
+    // debug builds.
     //
     // DEFAULT: false
     bool validate() const;
@@ -195,7 +198,7 @@ class IndexedS2PolylineVectorLayer : public S2Builder::Layer {
     if (error->ok()) {
       for (auto& polyline : polylines_) {
         index_->Add(
-            absl::make_unique<S2Polyline::OwningShape>(std::move(polyline)));
+            std::make_unique<S2Polyline::OwningShape>(std::move(polyline)));
       }
     }
   }
@@ -275,7 +278,7 @@ inline bool S2PolylineVectorLayer::Options::validate() const {
 
 inline void S2PolylineVectorLayer::Options::set_validate(bool validate) {
   validate_ = validate;
-  set_s2debug_override(S2Debug::DISABLE);
+  if (validate) set_s2debug_override(S2Debug::DISABLE);
 }
 
 inline S2Debug S2PolylineVectorLayer::Options::s2debug_override() const {
diff --git a/src/s2/s2builderutil_snap_functions.cc b/src/s2/s2builderutil_snap_functions.cc
index 55fe61d5..ba09668b 100644
--- a/src/s2/s2builderutil_snap_functions.cc
+++ b/src/s2/s2builderutil_snap_functions.cc
@@ -21,14 +21,17 @@
 #include <cfloat>
 #include <cmath>
 #include <memory>
+
 #include "s2/base/integral_types.h"
-#include "s2/base/logging.h"
-#include "absl/memory/memory.h"
+#include "s2/s1angle.h"
+#include "s2/s2builder.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2latlng.h"
 #include "s2/s2metrics.h"
+#include "s2/s2point.h"
+#include "s2/util/math/mathutil.h"
 
-using absl::make_unique;
+using std::make_unique;
 using std::max;
 using std::min;
 using std::unique_ptr;
@@ -62,7 +65,7 @@ S1Angle IdentitySnapFunction::min_vertex_separation() const {
 }
 
 S1Angle IdentitySnapFunction::min_edge_vertex_separation() const {
-  // In the worst case configuration, the edge separation is half of the
+  // In the worst case configuration, the edge-vertex separation is half of the
   // vertex separation.
   return 0.5 * snap_radius_;
 }
@@ -154,7 +157,7 @@ S1Angle S2CellIdSnapFunction::min_edge_vertex_separation() const {
   // Similar to min_vertex_separation(), in this case we have four bounds: a
   // constant bound that holds only at the minimum snap radius, a constant
   // bound that holds for any snap radius, a bound that is proportional to
-  // snap_radius, and a bound that is equal to snap_radius minus a constant.
+  // snap_radius, and a bound that approaches 0.5 * snap_radius asymptotically.
   //
   // 1. Constant bounds:
   //
@@ -311,8 +314,8 @@ S1Angle IntLatLngSnapFunction::min_vertex_separation() const {
 
 S1Angle IntLatLngSnapFunction::min_edge_vertex_separation() const {
   // Similar to min_vertex_separation(), in this case we have three bounds:
-  // one is a constant bound, one is proportional to snap_radius, and one is
-  // equal to snap_radius minus a constant.
+  // one is a constant bound, one is proportional to snap_radius, and one
+  // approaches 0.5 * snap_radius asymptotically.
   //
   // 1. Constant bound: In the plane, the worst-case configuration has an
   //    edge-vertex separation of ((1 / sqrt(13)) * to_degrees_) degrees.
diff --git a/src/s2/s2builderutil_snap_functions.h b/src/s2/s2builderutil_snap_functions.h
index 29a99a06..f5d392c3 100644
--- a/src/s2/s2builderutil_snap_functions.h
+++ b/src/s2/s2builderutil_snap_functions.h
@@ -19,9 +19,11 @@
 #define S2_S2BUILDERUTIL_SNAP_FUNCTIONS_H_
 
 #include <memory>
+
 #include "s2/s1angle.h"
 #include "s2/s2builder.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2point.h"
 
 namespace s2builderutil {
 
@@ -148,7 +150,7 @@ class S2CellIdSnapFunction : public S2Builder::SnapFunction {
 // point (23.12345651, -45.65432149) would become (23123457, -45654321).
 //
 // The main argument of the SnapFunction is the exponent for the power of 10
-// that coordinates should be multipled by before rounding.  For example,
+// that coordinates should be multiplied by before rounding.  For example,
 // IntLatLngSnapFunction(7) is a function that snaps to E7 coordinates.  The
 // exponent can range from 0 to 10.
 //
@@ -183,11 +185,11 @@ class IntLatLngSnapFunction : public S2Builder::SnapFunction {
   void set_exponent(int exponent);
   int exponent() const;
 
-  // The minum exponent supported for snapping.
-  static const int kMinExponent = 0;
+  // The minimum exponent supported for snapping.
+  static constexpr int kMinExponent = 0;
 
   // The maximum exponent supported for snapping.
-  static const int kMaxExponent = 10;
+  static constexpr int kMaxExponent = 10;
 
   // Defines the snap radius to be used (see s2builder.h).  The snap radius
   // must be at least the minimum value for the current exponent(), but larger
diff --git a/src/s2/s2builderutil_testing.cc b/src/s2/s2builderutil_testing.cc
index 2c283967..82884181 100644
--- a/src/s2/s2builderutil_testing.cc
+++ b/src/s2/s2builderutil_testing.cc
@@ -17,9 +17,32 @@
 
 #include "s2/s2builderutil_testing.h"
 
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "absl/strings/str_format.h"
+#include "s2/id_set_lexicon.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2builder_layer.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2text_format.h"
+
+using std::string;
+using std::vector;
+
+using Graph = S2Builder::Graph;
+using GraphOptions = S2Builder::GraphOptions;
+using DegenerateEdges = GraphOptions::DegenerateEdges;
+using DuplicateEdges = GraphOptions::DuplicateEdges;
+using SiblingPairs = GraphOptions::SiblingPairs;
+
 namespace s2builderutil {
 
-void GraphClone::Init(const S2Builder::Graph& g) {
+void GraphClone::Init(const Graph& g) {
   options_ = g.options();
   vertices_ = g.vertices();
   edges_ = g.edges();
@@ -34,4 +57,56 @@ void GraphClone::Init(const S2Builder::Graph& g) {
       is_full_polygon_predicate_);
 }
 
+string IndexMatchingLayer::ToString(const EdgeVector& edges) {
+  string msg;
+  for (const auto& edge : edges) {
+    vector<S2Point> vertices{edge.v0, edge.v1};
+    if (!msg.empty()) msg += "; ";
+    msg += s2textformat::ToString(vertices);
+  }
+  return msg;
+}
+
+void IndexMatchingLayer::Build(const Graph& g, S2Error* error) {
+  vector<S2Shape::Edge> actual, expected;
+  for (int e = 0; e < g.num_edges(); ++e) {
+    const Graph::Edge& edge = g.edge(e);
+    actual.push_back(S2Shape::Edge(g.vertex(edge.first),
+                                   g.vertex(edge.second)));
+  }
+  for (S2Shape* shape : index_) {
+    if (shape == nullptr) continue;
+    if (dimension_ >= 0 && shape->dimension() != dimension_) continue;
+    for (int e = shape->num_edges(); --e >= 0; ) {
+      expected.push_back(shape->edge(e));
+    }
+  }
+  std::sort(actual.begin(), actual.end());
+  std::sort(expected.begin(), expected.end());
+
+  // The edges are a multiset, so we can't use std::set_difference.
+  vector<S2Shape::Edge> missing, extra;
+  for (auto ai = actual.begin(), ei = expected.begin();
+       ai != actual.end() || ei != expected.end(); ) {
+    if (ei == expected.end() || (ai != actual.end() && *ai < *ei)) {
+      extra.push_back(*ai++);
+    } else if (ai == actual.end() || *ei < *ai) {
+      missing.push_back(*ei++);
+    } else {
+      ++ai;
+      ++ei;
+    }
+  }
+  if (!missing.empty() || !extra.empty()) {
+    // There may be errors in more than one dimension, so we append to the
+    // existing error text.
+    string label;
+    if (dimension_ >= 0) label = absl::StrFormat("Dimension %d: ", dimension_);
+    error->Init(S2Error::FAILED_PRECONDITION,
+                "%s%sMissing edges: %s Extra edges: %s\n",
+                error->text().c_str(), label.c_str(),
+                ToString(missing).c_str(), ToString(extra).c_str());
+  }
+}
+
 }  // namespace s2builderutil
diff --git a/src/s2/s2builderutil_testing.h b/src/s2/s2builderutil_testing.h
index 87e6d5f9..4e35104d 100644
--- a/src/s2/s2builderutil_testing.h
+++ b/src/s2/s2builderutil_testing.h
@@ -18,12 +18,18 @@
 #ifndef S2_S2BUILDERUTIL_TESTING_H_
 #define S2_S2BUILDERUTIL_TESTING_H_
 
+#include <memory>
+#include <string>
 #include <vector>
 
-#include "absl/memory/memory.h"
+#include "s2/id_set_lexicon.h"
 #include "s2/s2builder.h"
 #include "s2/s2builder_graph.h"
 #include "s2/s2builder_layer.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
 
 namespace s2builderutil {
 
@@ -31,7 +37,7 @@ namespace s2builderutil {
 // (unlike S2Builder::Graph, which is just a view).
 class GraphClone {
  public:
-  GraphClone() {}  // Must call Init().
+  GraphClone() = default;  // Must call Init().
   explicit GraphClone(const S2Builder::Graph& g) { Init(g); }
   void Init(const S2Builder::Graph& g);
   const S2Builder::Graph& graph() { return g_; }
@@ -85,7 +91,7 @@ class GraphAppendingLayer : public S2Builder::Layer {
   }
 
   void Build(const S2Builder::Graph& g, S2Error* error) override {
-    clones_->push_back(absl::make_unique<GraphClone>(g));
+    clones_->push_back(std::make_unique<GraphClone>(g));
     graphs_->push_back(clones_->back()->graph());
   }
 
@@ -95,6 +101,41 @@ class GraphAppendingLayer : public S2Builder::Layer {
   std::vector<std::unique_ptr<GraphClone>>* clones_;
 };
 
+// A layer type that expects that the edges in the S2Builder::Graph passed to
+// its Build() method should match the edges in the given S2ShapeIndex
+// (including multiplicities).  This allows testing whether an algorithm
+// produces a given multiset of edges without needing to specify a particular
+// ordering of those edges.
+class IndexMatchingLayer : public S2Builder::Layer {
+ public:
+  // Tests whether the edges passed to its Build() method match the edges in
+  // the given S2ShapeIndex (including multiplicities).  If any differences
+  // are found, sets "error" to a descriptive error message.
+  //
+  // If "dimension" is non-negative then only shapes of the given dimension
+  // are used.  (This makes allows use with classes such as S2BooleanOperation
+  // that output one S2Builder::Graph for each dimension.)
+  explicit IndexMatchingLayer(const S2Builder::GraphOptions& graph_options,
+                              const S2ShapeIndex* index, int dimension = -1)
+      : graph_options_(graph_options), index_(*index), dimension_(dimension) {
+  }
+
+  // S2Builder interface:
+  GraphOptions graph_options() const override {
+    return graph_options_;
+  }
+
+  void Build(const Graph& g, S2Error* error) override;
+
+ private:
+  using EdgeVector = std::vector<S2Shape::Edge>;
+  static std::string ToString(const EdgeVector& edges);
+
+  GraphOptions graph_options_;
+  const S2ShapeIndex& index_;
+  int dimension_;
+};
+
 }  // namespace s2builderutil
 
 #endif  // S2_S2BUILDERUTIL_TESTING_H_
diff --git a/src/s2/s2cap.cc b/src/s2/s2cap.cc
index 5c19ad0d..5b400047 100644
--- a/src/s2/s2cap.cc
+++ b/src/s2/s2cap.cc
@@ -17,23 +17,27 @@
 
 #include "s2/s2cap.h"
 
+#include <algorithm>
 #include <cfloat>
 #include <cmath>
-#include <iosfwd>
+#include <ostream>
 #include <vector>
 
-#include "s2/base/integral_types.h"
-#include "s2/base/logging.h"
+#include "absl/flags/flag.h"
+#include "s2/util/coding/coder.h"
 #include "s2/r1interval.h"
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
 #include "s2/s1interval.h"
 #include "s2/s2cell.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2debug.h"
 #include "s2/s2edge_distances.h"
 #include "s2/s2latlng.h"
 #include "s2/s2latlng_rect.h"
 #include "s2/s2metrics.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
-#include "s2/util/math/vector.h"
 
 using std::fabs;
 using std::max;
@@ -126,10 +130,9 @@ S2Cap S2Cap::Union(const S2Cap& other) const {
     return *this;
   } else {
     S1Angle result_radius = 0.5 * (distance + this_radius + other_radius);
-    S2Point result_center = S2::InterpolateAtDistance(
-        0.5 * (distance - this_radius + other_radius),
-        center(),
-        other.center());
+    S2Point result_center =
+        S2::GetPointOnLine(center(), other.center(),
+                           0.5 * (distance - this_radius + other_radius));
     return S2Cap(result_center, result_radius);
   }
 }
@@ -340,8 +343,8 @@ bool S2Cap::Decode(Decoder* decoder) {
   center_ = S2Point(x, y, z);
   radius_ = S1ChordAngle::FromLength2(decoder->getdouble());
 
-  if (FLAGS_s2debug) {
-     S2_CHECK(is_valid()) << "Invalid S2Cap: " << *this;
+  if (absl::GetFlag(FLAGS_s2debug)) {
+    S2_CHECK(is_valid()) << "Invalid S2Cap: " << *this;
   }
   return true;
 }
diff --git a/src/s2/s2cap.h b/src/s2/s2cap.h
index 4b158582..e1c63561 100644
--- a/src/s2/s2cap.h
+++ b/src/s2/s2cap.h
@@ -21,18 +21,23 @@
 #include <algorithm>
 #include <cmath>
 #include <iosfwd>
+#include <ostream>
 #include <vector>
 
 #include "s2/base/logging.h"
+#include "s2/util/coding/coder.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
+#include "s2/s2coder.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2region.h"
 
 class Decoder;
 class Encoder;
 class S2Cell;
+class S2CellId;
 class S2LatLngRect;
 
 // S2Cap represents a disc-shaped region defined by a center and radius.
@@ -58,6 +63,8 @@ class S2LatLngRect;
 // "plain old datatype" (POD) because it has virtual functions.
 class S2Cap final : public S2Region {
  public:
+  typedef s2coding::internal::S2LegacyCoder<S2Cap> Coder;
+
   // The default constructor returns an empty S2Cap.
   S2Cap() : center_(1, 0, 0), radius_(S1ChordAngle::Negative()) {}
 
@@ -91,6 +98,8 @@ class S2Cap final : public S2Region {
   // Return a full cap, i.e. a cap that contains all points.
   static S2Cap Full();
 
+  // TODO(b/221261577): `= default` causes SWIG errors. Use it when SWIG  has
+  // been deleted.
   ~S2Cap() override {}
 
   // Accessor methods.
@@ -205,8 +214,12 @@ class S2Cap final : public S2Region {
   // Return true if two caps are identical.
   bool operator==(const S2Cap& other) const;
 
-  // Return true if the cap center and height differ by at most "max_error"
-  // from the given cap "other".
+  // Return true if two caps are not identical.
+  bool operator!=(const S2Cap& other) const;
+
+  // Return true if the angle between the centers of 'this' and 'other' is at
+  // most 'max_error' radians, and the difference between squared chord distance
+  // radii of 'this' and 'other' is also at most max_error.
   bool ApproxEquals(const S2Cap& other,
                     S1Angle max_error = S1Angle::Radians(1e-14)) const;
 
@@ -283,4 +296,8 @@ inline bool S2Cap::is_full() const {
   return radius_.length2() == 4;
 }
 
+inline bool S2Cap::operator!=(const S2Cap& other) const {
+  return !operator==(other);
+}
+
 #endif  // S2_S2CAP_H_
diff --git a/src/s2/s2cell.cc b/src/s2/s2cell.cc
index 26a1d584..62543ae7 100644
--- a/src/s2/s2cell.cc
+++ b/src/s2/s2cell.cc
@@ -20,26 +20,32 @@
 #include <algorithm>
 #include <cfloat>
 #include <cmath>
-#include <iomanip>
+#include <new>
 
-#include "s2/base/logging.h"
+#include "s2/util/coding/coder.h"
 #include "s2/r1interval.h"
 #include "s2/r2.h"
+#include "s2/r2rect.h"
+#include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s1interval.h"
 #include "s2/s2cap.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2coords.h"
+#include "s2/s2coords_internal.h"
 #include "s2/s2edge_crosser.h"
 #include "s2/s2edge_distances.h"
 #include "s2/s2latlng.h"
 #include "s2/s2latlng_rect.h"
 #include "s2/s2measures.h"
 #include "s2/s2metrics.h"
+#include "s2/s2point.h"
+#include "s2/s2predicates.h"
 
 using S2::internal::kPosToIJ;
 using S2::internal::kPosToOrientation;
-using std::min;
 using std::max;
+using std::min;
 
 // Since S2Cells are copied by value, the following assertion is a reminder
 // not to add fields unnecessarily.  An S2Cell currently consists of 43 data
@@ -60,10 +66,6 @@ S2Cell::S2Cell(S2CellId id) {
   uv_ = S2CellId::IJLevelToBoundUV(ij, level_);
 }
 
-S2Point S2Cell::GetVertexRaw(int k) const {
-  return S2::FaceUVtoXYZ(face_, uv_.GetVertex(k));
-}
-
 S2Point S2Cell::GetEdgeRaw(int k) const {
   switch (k & 3) {
     case 0:  return S2::GetVNorm(face_, uv_[1][0]);   // Bottom
@@ -276,6 +278,15 @@ bool S2Cell::Contains(const S2Point& p) const {
   // We can't just call XYZtoFaceUV, because for points that lie on the
   // boundary between two faces (i.e. u or v is +1/-1) we need to return
   // true for both adjacent cells.
+  //
+  // We can get away with not checking if the face of the point matches the face
+  // of the cell here because, for the 4 faces adjacent to face_, p will be
+  // projected outside the range of ([-1,1]x[-1,1]) and thus can't intersect the
+  // cell bounds (except on the face boundary which we want).
+  //
+  // For the face opposite face_, the sign of the UV coordinates of P will be
+  // flipped so it will automatically fall outside the cell boundary as no cells
+  // cross the origin.
   R2Point uv;
   if (!S2::FaceXYZtoUV(face_, p, &uv)) return false;
 
@@ -549,4 +560,3 @@ S1ChordAngle S2Cell::GetMaxDistance(const S2Cell& target) const {
   }
   return max_dist;
 }
-
diff --git a/src/s2/s2cell.h b/src/s2/s2cell.h
index ab4d1057..f898167c 100644
--- a/src/s2/s2cell.h
+++ b/src/s2/s2cell.h
@@ -18,12 +18,17 @@
 #ifndef S2_S2CELL_H_
 #define S2_S2CELL_H_
 
+#include <cmath>
+
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
+#include "s2/util/coding/coder.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/r2rect.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2coords.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
 #include "s2/util/math/vector.h"
 
@@ -42,9 +47,29 @@ class S2LatLngRect;
 // not a "plain old datatype" (POD) because it has virtual functions.
 class S2Cell final : public S2Region {
  public:
+  // Canonical identifiers for the boundaries of the cell.  It's promised that
+  // both GetVertex() and GetBoundUV().GetVertex() return vertices in this
+  // order.
+  //
+  // That is, for a given boundary k, the edge defining the boundary is:
+  //   {GetVertex(k), GetVertex(k+1)}
+  //
+  // The boundaries are defined in UV coordinates.  The orientation may be
+  // rotated relative to other face cells, but are consistent within a face
+  // (i.e. a cell's left edge is its left-ward neighbor's right edge).
+  //
+  enum Boundary {
+    // clang-format off
+    kBottomEdge = 0,
+    kRightEdge  = 1,
+    kTopEdge    = 2,
+    kLeftEdge   = 3
+    // clang-format on
+  };
+
   // The default constructor is required in order to use freelists.
   // Cells should otherwise always be constructed explicitly.
-  S2Cell() {}
+  S2Cell() = default;
 
   // An S2Cell always corresponds to a particular S2CellId.  The other
   // constructors are just convenience methods.
@@ -85,12 +110,68 @@ class S2Cell final : public S2Region {
   // plane).  The points returned by GetVertexRaw are not normalized.
   // For convenience, the argument is reduced modulo 4 to the range [0..3].
   S2Point GetVertex(int k) const { return GetVertexRaw(k).Normalize(); }
-  S2Point GetVertexRaw(int k) const;
+  S2Point GetVertexRaw(int k) const {
+    return S2::FaceUVtoXYZ(face_, uv_.GetVertex(k));
+  }
+
+  // Returns either U or V for the given edge, whichever is constant along it.
+  //
+  // E.g. boundaries 0 and 2 are constant in the V axis so we return those
+  // coordinates, but boundaries 1 and 3 are constant in the U axis, so we
+  // return those coordinates.
+  //
+  // For convenience, the argument is reduced modulo 4 to the range [0..3].
+  double GetUVCoordOfEdge(int k) const {
+    k %= 4;
+    if (k % 2 == 0) {
+      return GetBoundUV().GetVertex(k).y();
+    }
+    return GetBoundUV().GetVertex(k).x();
+  }
+
+  // Returns either I or J for the given edge, whichever is constant along it.
+  //
+  // E.g. boundaries 0 and 2 are constant in the J axis so we return those
+  // coordinates, but boundaries 1 and 3 are constant in the I axis, so we
+  // return those coordinates.
+  //
+  // The returned value is not clamped to S2::kLimitIJ-1 as in S2::StToIJ, so
+  // that cell edges at the maximum extent of a face are properly returned as
+  // S2::kLimitIJ.
+  //
+  // For convenience, the argument is reduced modulo 4 to the range [0..3].
+  int32 GetIJCoordOfEdge(int k) const {
+    // We can just convert UV->ST->IJ for this because the IJ coordinates only
+    // have 30 bits of resolution in each axis.  The error in the conversion
+    // will be a couple of epsilon which is <<< 2^-30, so if we use a proper
+    // round-to-nearest operation, we'll always round to the correct IJ value.
+    //
+    // But, we need to explicitly use round here since Mathutil::FastIntRound
+    // rounds differently on different platforms.  If we land on 0, we may end
+    // up rounding to -1.
+    //
+    // Intel CPUs that support SSE4.1 have the ROUNDSD instruction, and ARM CPUs
+    // with VFP have the VCVT instruction, both of which can implement correct
+    // rounding efficiently regardless of the current FPU rounding mode.
+    return std::round(S2::kLimitIJ * S2::UVtoST(GetUVCoordOfEdge(k)));
+  }
 
   // Returns the inward-facing normal of the great circle passing through the
   // edge from vertex k to vertex k+1 (mod 4).  The normals returned by
-  // GetEdgeRaw are not necessarily unit length.  For convenience, the
-  // argument is reduced modulo 4 to the range [0..3].
+  // GetEdgeRaw are not necessarily unit length, but their length is bounded by
+  // sqrt(2) since the worst case is two components of magnitude 1.
+  //
+  // The vertices returned by GetVertex are not guaranteed to actually be on the
+  // boundary of the cell exactly.  Instead, they're the nearest representable
+  // point to the corner.
+  //
+  // Cell edge normals returned by GetEdgeRaw, however, are computed exactly and
+  // can be used with exact predicates to determine spatial relationships to the
+  // cell exactly.
+  //
+  // GetEdge() normalizes it's return value and thus may no longer be exact.
+  //
+  // For convenience, the argument is reduced modulo 4 to the range [0..3].
   S2Point GetEdge(int k) const { return GetEdgeRaw(k).Normalize(); }
   S2Point GetEdgeRaw(int k) const;
 
diff --git a/src/s2/s2cell_id.cc b/src/s2/s2cell_id.cc
index 0864feb4..dcee1034 100644
--- a/src/s2/s2cell_id.cc
+++ b/src/s2/s2cell_id.cc
@@ -20,27 +20,39 @@
 #include <algorithm>
 #include <cfloat>
 #include <cmath>
-#include <cstring>
-#include <iosfwd>
-#include <mutex>
+#include <cstddef>
+#include <ostream>
+#include <string>
 #include <vector>
 
+#include "absl/base/call_once.h"
+#include "absl/base/casts.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+
 #include "s2/base/integral_types.h"
-#include "s2/base/logging.h"
+#include "s2/util/bits/bits.h"
+#include "s2/util/coding/coder.h"
 #include "s2/r1interval.h"
+#include "s2/r2.h"
+#include "s2/r2rect.h"
+#include "s2/s1angle.h"
 #include "s2/s2coords.h"
+#include "s2/s2coords_internal.h"
+#include "s2/s2error.h"
 #include "s2/s2latlng.h"
-#include "absl/base/casts.h"
-#include "absl/strings/str_cat.h"
+#include "s2/s2point.h"
 
 using absl::StrCat;
-using S2::internal::kSwapMask;
+using absl::string_view;
 using S2::internal::kInvertMask;
 using S2::internal::kPosToIJ;
 using S2::internal::kPosToOrientation;
+using S2::internal::kSwapMask;
 using std::fabs;
 using std::max;
 using std::min;
+using std::string;
 using std::vector;
 
 // The following lookup tables are used to convert efficiently between an
@@ -98,9 +110,9 @@ static void InitLookupCell(int level, int i, int j, int orig_orientation,
   }
 }
 
-static std::once_flag flag;
+static absl::once_flag flag;
 inline static void MaybeInit() {
-  std::call_once(flag, []{
+  absl::call_once(flag, []{
     InitLookupCell(0, 0, 0, 0, 0, 0);
     InitLookupCell(0, 0, 0, kSwapMask, 0, kSwapMask);
     InitLookupCell(0, 0, 0, kInvertMask, 0, kInvertMask);
@@ -194,14 +206,14 @@ int S2CellId::GetCommonAncestorLevel(S2CellId other) const {
 }
 
 // Print the num_digits low order hex digits.
-static std::string HexFormatString(uint64 val, size_t num_digits) {
-  std::string result(num_digits, ' ');
+static string HexFormatString(uint64 val, size_t num_digits) {
+  string result(num_digits, ' ');
   for (; num_digits--; val >>= 4)
     result[num_digits] = "0123456789abcdef"[val & 0xF];
   return result;
 }
 
-std::string S2CellId::ToToken() const {
+string S2CellId::ToToken() const {
   // Simple implementation: print the id in hex without trailing zeros.
   // Using hex has the advantage that the tokens are case-insensitive, all
   // characters are alphanumeric, no characters require any special escaping
@@ -219,10 +231,12 @@ std::string S2CellId::ToToken() const {
   return HexFormatString(id_ >> (4 * num_zero_digits), 16 - num_zero_digits);
 }
 
-S2CellId S2CellId::FromToken(const char* token, size_t length) {
-  if (length > 16) return S2CellId::None();
+S2CellId S2CellId::FromToken(const string_view token) {
+  if (token.length() > 16) return S2CellId::None();
   uint64 id = 0;
-  for (int i = 0, pos = 60; i < length; ++i, pos -= 4) {
+  // Use size_t to fix signed/unsigned comparison for client that use `-Wextra`
+  // (e.g. Chrome).
+  for (size_t i = 0, pos = 60; i < token.length(); ++i, pos -= 4) {
     uint64 d;
     if ('0' <= token[i] && token[i] <= '9') {
       d = token[i] - '0';
@@ -238,10 +252,6 @@ S2CellId S2CellId::FromToken(const char* token, size_t length) {
   return S2CellId(id);
 }
 
-S2CellId S2CellId::FromToken(const std::string& token) {
-  return FromToken(token.data(), token.size());
-}
-
 void S2CellId::Encode(Encoder* const encoder) const {
   encoder->Ensure(sizeof(uint64));  // A single uint64.
   encoder->put64(id_);
@@ -434,13 +444,22 @@ R2Rect S2CellId::ExpandedByDistanceUV(const R2Rect& uv, S1Angle distance) {
   // points within the given distance of that side.  (The rectangle may be
   // expanded by a different amount in (u,v)-space on each side.)
   double u0 = uv[0][0], u1 = uv[0][1], v0 = uv[1][0], v1 = uv[1][1];
-  double max_u = std::max(fabs(u0), fabs(u1));
-  double max_v = std::max(fabs(v0), fabs(v1));
+  double max_u = max(fabs(u0), fabs(u1));
+  double max_v = max(fabs(v0), fabs(v1));
   double sin_dist = sin(distance);
-  return R2Rect(R1Interval(ExpandEndpoint(u0, max_v, -sin_dist),
-                           ExpandEndpoint(u1, max_v, sin_dist)),
-                R1Interval(ExpandEndpoint(v0, max_u, -sin_dist),
-                           ExpandEndpoint(v1, max_u, sin_dist)));
+
+  R1Interval xinterv = R1Interval(ExpandEndpoint(u0, max_v, -sin_dist),
+                                  ExpandEndpoint(u1, max_v, sin_dist));
+  R1Interval yinterv = R1Interval(ExpandEndpoint(v0, max_u, -sin_dist),
+                                  ExpandEndpoint(v1, max_u, sin_dist));
+
+  // R2Rect requires both or neither dimension be empty, so if we shrank the
+  // rectangle too much, manually collapse to a degenerate rectangle at the
+  // first corner.
+  if (xinterv.is_empty() || yinterv.is_empty()) {
+    return R2Rect(R1Interval(u0, u0), R1Interval(v0, v0));
+  }
+  return R2Rect(xinterv, yinterv);
 }
 
 S2CellId S2CellId::FromFaceIJWrap(int face, int i, int j) {
@@ -585,11 +604,11 @@ void S2CellId::AppendAllNeighbors(int nbr_level,
   }
 }
 
-std::string S2CellId::ToString() const {
+string S2CellId::ToString() const {
   if (!is_valid()) {
     return StrCat("Invalid: ", absl::Hex(id(), absl::kZeroPad16));
   }
-  std::string out = StrCat(face(), "/");
+  string out = StrCat(face(), "/");
   for (int current_level = 1; current_level <= level(); ++current_level) {
     // Avoid dependencies of SimpleItoA, and slowness of StrAppend &
     // std::to_string.
@@ -602,7 +621,7 @@ std::ostream& operator<<(std::ostream& os, S2CellId id) {
   return os << id.ToString();
 }
 
-S2CellId S2CellId::FromDebugString(absl::string_view str) {
+S2CellId S2CellId::FromDebugString(string_view str) {
   // This function is reasonably efficient, but is only intended for use in
   // tests.
   int level = static_cast<int>(str.size() - 2);
@@ -610,10 +629,47 @@ S2CellId S2CellId::FromDebugString(absl::string_view str) {
   int face = str[0] - '0';
   if (face < 0 || face > 5 || str[1] != '/') return S2CellId::None();
   S2CellId id = S2CellId::FromFace(face);
-  for (int i = 2; i < str.size(); ++i) {
+  // Use size_t to fix signed/unsigned comparison for client that use `-Wextra`
+  // (e.g. Chrome).
+  for (size_t i = 2; i < str.size(); ++i) {
     int child_pos = str[i] - '0';
     if (child_pos < 0 || child_pos > 3) return S2CellId::None();
     id = id.child(child_pos);
   }
   return id;
 }
+
+void S2CellId::Coder::Encode(Encoder& encoder, const S2CellId& v) const {
+  string token = v.ToToken();
+  // Ensure enough space for the token plus 1 nul byte appended by
+  // Encoder::puts.
+  encoder.Ensure(token.length() + 1);
+  encoder.puts(token.c_str());
+}
+
+bool S2CellId::Coder::Decode(Decoder& decoder, S2CellId& v,
+                             S2Error& error) const {
+  // The longest S2CellId representation is 16 bytes, plus one more for the nul
+  // terminator.
+  char bytes[17];
+
+  const size_t start_pos = decoder.pos();
+  decoder.getcn(bytes, '\0', std::min(decoder.avail(), sizeof(bytes)));
+  const size_t bytes_read = decoder.pos() - start_pos;
+
+  // The token must be nul-terminated.
+  if (bytes_read == 0 || bytes[bytes_read - 1] != '\0') {
+    error.Init(S2Error::DATA_LOSS, "Unknown decoding error");
+    return false;
+  }
+
+  const string_view token(bytes, bytes_read - 1);
+  v = S2CellId::FromToken(token);
+  // Prevent edge cases where S2CellId::FromToken returns S2CellId::None for
+  // an invalid token.
+  if (v == S2CellId::None() && token != "X") {
+    error.Init(S2Error::DATA_LOSS, "Unknown decoding error");
+    return false;
+  }
+  return true;
+}
diff --git a/src/s2/s2cell_id.h b/src/s2/s2cell_id.h
index ee6eb436..c435bd4e 100644
--- a/src/s2/s2cell_id.h
+++ b/src/s2/s2cell_id.h
@@ -19,20 +19,33 @@
 #define S2_S2CELL_ID_H_
 
 #include <cstddef>
+#include <cstdint>
 #include <functional>
 #include <iostream>
+#include <ostream>
 #include <string>
+#include <type_traits>
+#include <utility>
 #include <vector>
 
+#include "absl/base/attributes.h"
+#include "absl/hash/hash.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/string_view.h"
+
+#include "s2/base/integral_types.h"
+#include "s2/base/logging.h"
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
-#include "s2/base/port.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/r2.h"
 #include "s2/r2rect.h"
 #include "s2/s1angle.h"
+#include "s2/s2coder.h"
 #include "s2/s2coords.h"
-#include "absl/strings/string_view.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
+#include "s2/s2region.h"
 #include "s2/util/bits/bits.h"
 #include "s2/util/coding/coder.h"
 
@@ -77,19 +90,24 @@ class S2LatLng;
 // the default copy constructor and assignment operator.
 class S2CellId {
  public:
-  // The extra position bit (61 rather than 60) let us encode each cell as its
-  // Hilbert curve position at the cell center (which is halfway along the
-  // portion of the Hilbert curve that fills that cell).
-  static const int kFaceBits = 3;
-  static const int kNumFaces = 6;
-  static const int kMaxLevel = S2::kMaxCellLevel;  // Valid levels: 0..kMaxLevel
-  static const int kPosBits = 2 * kMaxLevel + 1;
-  static const int kMaxSize = 1 << kMaxLevel;
-
+  // Although only 60 bits are needed to represent the index of a leaf cell, the
+  // extra position bit lets us encode each cell as its Hilbert curve position
+  // at the cell center, which is halfway along the portion of the Hilbert curve
+  // that fills that cell.
+  static constexpr int kFaceBits = 3;
+  static constexpr int kNumFaces = 6;
+  static constexpr int kMaxLevel =
+      S2::kMaxCellLevel;  // Valid levels: 0..kMaxLevel
+  static constexpr int kPosBits = 2 * kMaxLevel + 1;
+  static constexpr int kMaxSize = 1 << kMaxLevel;
+
+  // This arg is uint64 rather than to help the uint64 -> uint64
+  // transition.  TODO(user): Remove inconsistency and update
+  // the rest of util/geometry when these are the same types, ~2020-09-01.
   explicit IFNDEF_SWIG(constexpr) S2CellId(uint64 id) : id_(id) {}
 
   // Construct a leaf cell containing the given point "p".  Usually there is
-  // is exactly one such cell, but for points along the edge of a cell, any
+  // exactly one such cell, but for points along the edge of a cell, any
   // adjacent cell may be (deterministically) chosen.  This is because
   // S2CellIds are considered to be closed sets.  The returned cell will
   // always contain the given point, i.e.
@@ -108,6 +126,7 @@ class S2CellId {
 
   // The default constructor returns an invalid cell id.
   IFNDEF_SWIG(constexpr) S2CellId() : id_(0) {}
+  // Returns an invalid cell id.
   static constexpr S2CellId None() { return S2CellId(); }
 
   // Returns an invalid cell id guaranteed to be larger than any
@@ -252,8 +271,8 @@ class S2CellId {
   // range queries), do not attempt to define "limit" as range_max.next().
   // The problem is that leaf S2CellIds are 2 units apart, so the semi-open
   // interval [min, limit) includes an additional value (range_max.id() + 1)
-  // which is happens to be a valid S2CellId about one-third of the time and
-  // is *never* contained by this cell.  (It always correpsonds to a cell that
+  // which happens to be a valid S2CellId about one-third of the time and
+  // is *never* contained by this cell.  (It always corresponds to a cell that
   // is larger than this one.)  You can define "limit" as (range_max.id() + 1)
   // if necessary (which is not always a valid S2CellId but can still be used
   // with FromToken/ToToken), or you can convert range_max() to the key space
@@ -338,7 +357,11 @@ class S2CellId {
   S2CellId maximum_tile(S2CellId limit) const;
 
   // Returns the level of the lowest common ancestor of this cell and "other",
-  // that is, the maximum level such that parent(level) == other.parent(level).
+  // i.e. the maximum level where this->parent(level) == other.parent(level).
+  // Note that this definition also covers the situation where this cell is a
+  // descendant of "other" or vice versa, or the two cells are the same,
+  // since this->parent(this->level()) == *this.
+  //
   // Returns -1 if the two cells do not have any common ancestor (i.e., they
   // are from different faces).
   int GetCommonAncestorLevel(S2CellId other) const;
@@ -363,8 +386,15 @@ class S2CellId {
   // "x" is an invalid cell id.  All tokens are alphanumeric strings.
   // FromToken() returns S2CellId::None() for malformed inputs.
   std::string ToToken() const;
-  static S2CellId FromToken(const char* token, size_t length);
-  static S2CellId FromToken(const std::string& token);
+  static S2CellId FromToken(absl::string_view token);
+
+  // Legacy coder for S2CellId that delegates to the token representation.
+  // Storage is variable depending on the level of the cell.
+  class Coder : public s2coding::S2Coder<S2CellId> {
+   public:
+    void Encode(Encoder& encoder, const S2CellId& v) const override;
+    bool Decode(Decoder& decoder, S2CellId& v, S2Error& error) const override;
+  };
 
   // Use encoder to generate a serialized representation of this cell id.
   // Can also encode an invalid cell.
@@ -395,10 +425,10 @@ class S2CellId {
   // neighbors are guaranteed to be distinct.
   void GetEdgeNeighbors(S2CellId neighbors[4]) const;
 
-  // Return the neighbors of closest vertex to this cell at the given level,
-  // by appending them to "output".  Normally there are four neighbors, but
-  // the closest vertex may only have three neighbors if it is one of the 8
-  // cube vertices.
+  // Return the S2CellIds of the neighbors of the closest vertex to this cell
+  // at the given level, by appending them to "output".  Normally there are four
+  // neighbors, but the closest vertex may only have three neighbors if it is
+  // one of the 8 cube vertices.
   //
   // Requires: level < this->level(), so that we can determine which vertex is
   // closest (in particular, level == kMaxLevel is not allowed).
@@ -442,16 +472,17 @@ class S2CellId {
   // the leaf cell with the given (i,j)-coordinates.
   static R2Rect IJLevelToBoundUV(int ij[2], int level);
 
-  // When S2CellId is used as a key in one of the btree container types
-  // (util/btree), indicate that linear rather than binary search should be
-  // used.  This is much faster when the comparison function is cheap.
+  // When S2CellId is used as a key in one of the absl::btree container types,
+  // indicate that linear rather than binary search should be used.  This is
+  // much faster when the comparison function is cheap.
   typedef std::true_type absl_btree_prefer_linear_node_search;
 
  private:
   // This is the offset required to wrap around from the beginning of the
   // Hilbert curve to the end or vice versa; see next_wrap() and prev_wrap().
   // SWIG doesn't understand uint64{}, so use static_cast.
-  static const uint64 kWrapOffset = static_cast<uint64>(kNumFaces) << kPosBits;
+  static constexpr uint64 kWrapOffset = static_cast<uint64>(kNumFaces)
+                                        << kPosBits;
 
   // Given a face and a point (i,j) where either i or j is outside the valid
   // range [0..kMaxSize-1], this function first determines which neighboring
@@ -544,7 +575,7 @@ inline int S2CellId::level() const {
   // We can't just S2_DCHECK(is_valid()) because we want level() to be
   // defined for end-iterators, i.e. S2CellId::End(kLevel).  However there is
   // no good way to define S2CellId::None().level(), so we do prohibit that.
-  S2_DCHECK(id_ != 0);
+  S2_DCHECK_NE(id_, uint64{0});
 
   // A special case for leaf cells is not worthwhile.
   return kMaxLevel - (Bits::FindLSBSetNonZero64(id_) >> 1);
@@ -559,16 +590,14 @@ inline double S2CellId::GetSizeST() const {
 }
 
 inline int S2CellId::GetSizeIJ(int level) {
-  return 1 << (kMaxLevel - level);
+  return uint64{1} << (kMaxLevel - level);
 }
 
 inline double S2CellId::GetSizeST(int level) {
   return S2::IJtoSTMin(GetSizeIJ(level));
 }
 
-inline bool S2CellId::is_leaf() const {
-  return int(id_) & 1;
-}
+inline bool S2CellId::is_leaf() const { return id_ & 1; }
 
 inline bool S2CellId::is_face() const {
   return (id_ & (lsb_for_level(0) - 1)) == 0;
@@ -693,10 +722,25 @@ inline S2CellId S2CellId::End(int level) {
 std::ostream& operator<<(std::ostream& os, S2CellId id);
 
 // Hasher for S2CellId.
-// Example use: std::unordered_map<S2CellId, int, S2CellIdHash>.
+// Does *not* need to be specified explicitly; this will be used by default for
+// absl::flat_hash_map/set.
+//
+// TODO(b/259279783): Remove rotation once mixing function on 32-bit systems is
+// fixed.
+template <typename H>
+H AbslHashValue(H h, S2CellId id) {
+  if (sizeof(void*) == 4) {
+    return H::combine(std::move(h), id.id(), absl::rotr(id.id(), 32));
+  }
+  return H::combine(std::move(h), id.id());
+}
+
+// Legacy hash functor for S2CellId. This only exists for backwards
+// compatibility with old hash types like std::unordered_map that don't use
+// absl::Hash natively.
 struct S2CellIdHash {
   size_t operator()(S2CellId id) const {
-    return std::hash<uint64>()(id.id());
+    return absl::Hash<S2CellId>()(id);
   }
 };
 
diff --git a/src/s2/s2cell_index.cc b/src/s2/s2cell_index.cc
index ce3e8cb9..eb5a539f 100644
--- a/src/s2/s2cell_index.cc
+++ b/src/s2/s2cell_index.cc
@@ -17,6 +17,14 @@
 
 #include "s2/s2cell_index.h"
 
+#include <algorithm>
+#include <vector>
+
+#include "absl/container/flat_hash_set.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2cell_union.h"
+
+using absl::flat_hash_set;
 using std::vector;
 
 using Label = S2CellIndex::Label;
@@ -55,8 +63,7 @@ void S2CellIndex::ContentsIterator::StartUnion(const RangeIterator& range) {
   next_node_cutoff_ = contents;
 }
 
-S2CellIndex::S2CellIndex() {
-}
+S2CellIndex::S2CellIndex() = default;
 
 void S2CellIndex::Add(const S2CellUnion& cell_ids, Label label) {
   for (S2CellId cell_id : cell_ids) {
@@ -115,7 +122,7 @@ void S2CellIndex::Build() {
   cell_tree_.clear();
   range_nodes_.reserve(deltas.size());
   int contents = -1;
-  for (int i = 0; i < deltas.size(); ) {
+  for (size_t i = 0; i < deltas.size();) {
     S2CellId start_id = deltas[i].start_id;
     // Process all the deltas associated with the current start_id.
     for (; i < deltas.size() && deltas[i].start_id == start_id; ++i) {
@@ -130,20 +137,18 @@ void S2CellIndex::Build() {
   }
 }
 
-vector<Label> S2CellIndex::GetIntersectingLabels(const S2CellUnion& target)
-    const {
-  vector<Label> labels;
+flat_hash_set<Label> S2CellIndex::GetIntersectingLabels(
+    const S2CellUnion& target) const {
+  flat_hash_set<Label> labels;
   GetIntersectingLabels(target, &labels);
   return labels;
 }
 
 void S2CellIndex::GetIntersectingLabels(const S2CellUnion& target,
-                                        std::vector<Label>* labels) const {
+                                        flat_hash_set<Label>* labels) const {
   labels->clear();
   VisitIntersectingCells(target, [labels](S2CellId cell_id, Label label) {
-      labels->push_back(label);
-      return true;
-    });
-  std::sort(labels->begin(), labels->end());
-  labels->erase(std::unique(labels->begin(), labels->end()), labels->end());
+    labels->insert(label);
+    return true;
+  });
 }
diff --git a/src/s2/s2cell_index.h b/src/s2/s2cell_index.h
index d47178b1..db4ff97d 100644
--- a/src/s2/s2cell_index.h
+++ b/src/s2/s2cell_index.h
@@ -18,9 +18,16 @@
 #ifndef S2_S2CELL_INDEX_H_
 #define S2_S2CELL_INDEX_H_
 
+#include <algorithm>
+#include <functional>
+#include <ostream>
 #include <vector>
+
+#include "absl/container/flat_hash_set.h"
+
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
+#include "s2/base/log_severity.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
 
@@ -62,7 +69,7 @@
 // is to use a built-in method such as GetIntersectingLabels (which returns
 // the labels of all cells that intersect a given target S2CellUnion):
 //
-//   vector<Label> labels = index.GetIntersectingLabels(target_union);
+//   flat_hash_set<Label> labels = index.GetIntersectingLabels(target_union);
 //
 // Alternatively, you can use an external class such as S2ClosestCellQuery,
 // which computes the cell(s) that are closest to a given target geometry.
@@ -125,6 +132,11 @@ class S2CellIndex {
       if (y.cell_id < cell_id) return false;
       return label < y.label;
     }
+
+    template <typename H>
+    friend H AbslHashValue(H h, LabelledCell x) {
+      return H::combine(std::move(h), x.cell_id, x.label);
+    }
   };
 
   // Default constructor.
@@ -167,14 +179,14 @@ class S2CellIndex {
                               const CellVisitor& visitor) const;
 
   // Convenience function that returns the labels of all indexed cells that
-  // intersect the given S2CellUnion "target".  The output contains each label
-  // at most once, but is not sorted.
-  std::vector<Label> GetIntersectingLabels(const S2CellUnion& target) const;
+  // intersect the given S2CellUnion "target".
+  absl::flat_hash_set<Label> GetIntersectingLabels(const S2CellUnion& target)
+      const;
 
   // This version can be more efficient when it is called many times, since it
-  // does not require allocating a new vector on each call.
+  // does not require allocating a new set on each call.
   void GetIntersectingLabels(const S2CellUnion& target,
-                             std::vector<Label>* labels) const;
+                             absl::flat_hash_set<Label>* labels) const;
 
  private:
   // Represents a node in the set of non-overlapping leaf cell ranges.
@@ -269,9 +281,8 @@ class S2CellIndex {
     // Otherwise positions the iterator at the previous entry and returns true.
     bool Prev();
 
-    // Positions the iterator at the first range with start_id() >= target.
-    // (Such an entry always exists as long as "target" is a valid leaf cell.
-    // Note that it is valid to access start_id() even when done() is true.)
+    // Positions the iterator at the range containing "target". (Such a range
+    // always exists as long as the target is a valid leaf cell.)
     //
     // REQUIRES: target.is_leaf()
     void Seek(S2CellId target);
@@ -316,11 +327,13 @@ class S2CellIndex {
     void Next();
 
     // If the iterator is already positioned at the beginning, returns false.
-    // Otherwise positions the iterator at the previous entry and returns true.
+    // Otherwise positions the iterator at the previous non-empty entry and
+    // returns true.
     bool Prev();
 
-    // Positions the iterator at the first non-empty range with
-    // start_id() >= target.
+    // Positions the iterator at the range that contains or follows "target", or
+    // at the end if no such range exists. (Note that start_id() may still be
+    // called in the latter case.)
     //
     // REQUIRES: target.is_leaf()
     void Seek(S2CellId target);
@@ -343,7 +356,7 @@ class S2CellIndex {
     // Convenience constructor that calls Init().
     explicit ContentsIterator(const S2CellIndex* index);
 
-    // Initializes the iterator.  Should be followed by a call to UnionWith()
+    // Initializes the iterator.  Should be followed by a call to StartUnion()
     // to visit the contents of each desired leaf cell range.
     void Init(const S2CellIndex* index);
 
@@ -560,7 +573,7 @@ inline S2CellIndex::ContentsIterator::ContentsIterator()
 
 inline S2CellIndex::ContentsIterator::ContentsIterator(
     const S2CellIndex* index) {
-    Init(index);
+  Init(index);
 }
 
 inline void S2CellIndex::ContentsIterator::Init(const S2CellIndex* index) {
diff --git a/src/s2/s2cell_iterator.h b/src/s2/s2cell_iterator.h
new file mode 100644
index 00000000..c77d62c2
--- /dev/null
+++ b/src/s2/s2cell_iterator.h
@@ -0,0 +1,203 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef S2_S2CELL_ITERATOR_H_
+#define S2_S2CELL_ITERATOR_H_
+
+#include <ostream>
+
+#include "absl/meta/type_traits.h"
+#include "s2/s2cell_id.h"
+
+// Possible relationships between two S2CellIds in an index.
+enum class S2CellRelation : int {
+  INDEXED,     // Target is contained by an index cell.
+  SUBDIVIDED,  // Target is subdivided into one or more index cells.
+  DISJOINT     // Target does not intersect any index cells.
+};
+
+inline std::ostream& operator<<(std::ostream& os, S2CellRelation r) {
+  switch (r) {
+    case S2CellRelation::INDEXED:
+      os << "INDEXED";
+      break;
+
+    case S2CellRelation::SUBDIVIDED:
+      os << "SUBDIVIDED";
+      break;
+
+    case S2CellRelation::DISJOINT:
+      os << "DISJOINT";
+      break;
+  }
+  return os;
+}
+
+// An abstract base class for iterators over any sorted collection keyed by
+// S2CellId.
+//
+// This is intentionally opaque to the type of the value we might be mapping to,
+// which can be anything (index cells, points, integers, other S2CellIds, etc),
+// and instead only defines the common subset of functionality for positioning
+// the iterator and querying the current cell id.
+//
+// This class is generally not used directly via pointer but instead to type
+// check that a given template parameter implements the interface.
+//
+// A default implementation for Locate is given as a static method.  Inheritors
+// should call it directly when defining their overloads of the API and mark
+// their methods final to ensure de-virtualization when using sub-classes
+// directly.
+//
+// A canonical implementation for Locate thus might look like:
+//
+//   class MyIterator : public S2CellIterator {
+//    public:
+//     void Locate(const S2Point& point) final {
+//       return LocateImpl(*this, point);
+//     }
+//   };
+//
+// This will ensure code that uses MyIterator directly and calls Locate will
+// directly use the methods of MyIterator instead of calling through the vtable.
+class S2CellIterator {
+ public:
+  // A type function to check if a type is derived from S2CellIterator.  This is
+  // useful for writing static checks on template parameters when we want to
+  // inline a particular iterator call, but we need to make sure it implements
+  // the interface that we want.  We don't have access to c++ concepts, so this
+  // is the next best thing:
+  //
+  //   template <typename Iterator>
+  //   void Frobnicate(Iterator& iter) {
+  //     static_assert(S2CellIterator::ImplementedBy<Iterator>{},
+  //       "We require an object implementing the S2CellIterator API.");
+  //   }
+  template <typename T>
+  using ImplementedBy = std::is_convertible<absl::decay_t<T>*, S2CellIterator*>;
+
+  S2CellIterator() = default;
+  virtual ~S2CellIterator() = default;
+
+  // Returns the current S2CellId that the iterator is positioned at.  This
+  // function should be cheap to call (ideally directly returning the current
+  // value).  When the iterator is done, this should return S2CellId::Sentinel.
+  virtual S2CellId id() const = 0;
+
+  // Return true if the iterator has reached the end of the input. This function
+  // should be cheap to call to check if iteration has ended.
+  virtual bool done() const = 0;
+
+  // Positions the iterator at the first position.
+  virtual void Begin() = 0;
+
+  // Positions the iterator past the last value.  After calling this function,
+  // the done() method should return true.
+  virtual void Finish() = 0;
+
+  // Positions the iterator at the next value.  Must not be called when done()
+  // is true.
+  virtual void Next() = 0;
+
+  // Positions the iterator at the previous value.  Returns false if the
+  // iterator is already at the start.
+  virtual bool Prev() = 0;
+
+  // Seeks the iterator to the first cell with id() >= target or the end
+  // of the iterator if no such cell exists.
+  virtual void Seek(S2CellId target) = 0;
+
+  // Positions the iterator at the cell containing target and returns true. If
+  // no such cell exists, return false and leave the iterator in an undefined
+  // (but valid) state.
+  virtual bool Locate(const S2Point& target) = 0;
+
+  // Let T be the target S2CellId.  If T is contained by some index cell I
+  // (including equality), this method positions the iterator at I and returns
+  // INDEXED.  Otherwise if T contains one or more (smaller) index cells, it
+  // positions the iterator at the first such cell I and returns SUBDIVIDED.
+  // Otherwise it returns DISJOINT and leaves the iterator in an undefined
+  // (but valid) state.
+  virtual S2CellRelation Locate(S2CellId target) = 0;
+
+ protected:
+  template <typename Iterator>
+  static inline bool LocateImpl(Iterator& iter, const S2Point& point);
+
+  template <typename Iterator>
+  static inline S2CellRelation LocateImpl(Iterator& iter, S2CellId target);
+
+  // Disable public copying and assigning via abstract base class pointer.
+  S2CellIterator(const S2CellIterator&) = default;
+  S2CellIterator& operator=(const S2CellIterator&) = default;
+};
+
+//////////////////   Implementation details follow   ////////////////////
+
+template <typename Iterator>
+inline bool S2CellIterator::LocateImpl(Iterator& iter, const S2Point& point) {
+  static_assert(S2CellIterator::ImplementedBy<Iterator>{},
+                "Iterator must implement the S2CellIterator API.");
+
+  // Let I = Seek(T), where T is the leaf cell containing the target point, and
+  // let Prev(I) be the predecessor of I.  If T is contained by an index cell,
+  // then the containing cell is either I or Prev(I).  We test for containment
+  // by comparing the ranges of leaf cells spanned by T, I, and Prev(I).
+  S2CellId target(point);
+
+  iter.Seek(target);
+  if (!iter.done() && iter.id().range_min() <= target) {
+    return true;
+  }
+
+  if (iter.Prev() && iter.id().range_max() >= target) {
+    return true;
+  }
+  return false;
+}
+
+template <typename Iterator>
+inline S2CellRelation S2CellIterator::LocateImpl(Iterator& iter,
+                                                 S2CellId target) {
+  static_assert(S2CellIterator::ImplementedBy<Iterator>{},
+                "Iterator must implement the S2CellIterator API.");
+
+  // Let T be the target cell id, let I = Seek(T.range_min()) and let Prev(I) be
+  // the predecessor of I.  If T contains any index cells, then T contains I.
+  // Similarly, if T is contained by an index cell, then the containing cell is
+  // either I or Prev(I).  We test for containment by comparing the ranges of
+  // leaf cells spanned by T, I, and Prev(I).
+  iter.Seek(target.range_min());
+  if (!iter.done()) {
+    // The target is contained by the cell we landed on, so it's indexed.
+    if (iter.id() >= target && iter.id().range_min() <= target) {
+      return S2CellRelation::INDEXED;
+    }
+
+    // The cell we landed on is contained by the target, so it's subdivided.
+    if (iter.id() <= target.range_max()) {
+      return S2CellRelation::SUBDIVIDED;
+    }
+  }
+
+  // Otherwise check the previous cell (if it exists).  If it contains the
+  // target then it's indexed, otherwise the target cell is disjoint.
+  if (iter.Prev() && iter.id().range_max() >= target) {
+    return S2CellRelation::INDEXED;
+  }
+  return S2CellRelation::DISJOINT;
+}
+
+#endif  // S2_S2CELL_ITERATOR_H_
diff --git a/src/s2/s2cell_iterator_join.h b/src/s2/s2cell_iterator_join.h
new file mode 100644
index 00000000..15a614e1
--- /dev/null
+++ b/src/s2/s2cell_iterator_join.h
@@ -0,0 +1,441 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef S2_S2CELL_ITERATOR_JOIN_H_
+#define S2_S2CELL_ITERATOR_JOIN_H_
+
+#include <algorithm>
+#include <array>
+#include <bitset>
+#include <functional>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "s2/base/logging.h"
+#include "absl/base/optimization.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2cell.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2cell_iterator.h"
+#include "s2/s2cell_range_iterator.h"
+
+// Defines a class which can be used to perform an inner join operation on any
+// two S2CellIterator iterators.  S2CellIteratorJoin takes an optional distance
+// value which specifies a "buffer" around cells in the iterator.  If cells are
+// within that distance, then they're considered to overlap.  This allows us to
+// support "tolerant" versions of queries.  Currently the tolerance must not be
+// negative.
+//
+// The iterator will find each pair of cells that are less than or equal to the
+// tolerance distance from each other and call a visitor with a reference to
+// each of the properly positioned iterators.  The visitor can then process the
+// overlapping iterators however it wishes, returning true if it wishes to
+// continue iterating, and false otherwise.
+//
+// Example usage:
+//
+//   // Process cell pairs until we find two cells with different edges.
+//   bool ProcessRow(
+//       const MutableS2ShapeIndex::Iterator& itera,
+//       const MutableS2ShapeIndex::Iterator& iterb) {
+//     return AllEdgesAreEqual(itera.cell(), iterb.cell());
+//   }
+//
+//   MutableS2ShapeIndex index_a, index_b;
+//   MakeS2CellIteratorJoin(&index_a, &index_b, distance).Join(ProcessRow);
+//
+// But we're not limited to joining iterators of the same type.  Any iterator
+// implementing the S2CellIterator API will work.  For example, we can join an
+// S2ShapeIndex and S2PointIndex:
+//
+//  bool ProcessCellPoints(
+//     const MutableS2ShapeIndex::Iterator& itera,
+//     const S2PointIndex<std::string>::Iterator& iterb) {
+//       // Process shapes and points from overlapping cells.
+//       return true;
+//     }
+//
+//  MutableS2ShapeIndex index_a;
+//  S2PointIndex<std::string> index_b;
+//  MakeS2CellIteratorJoin(&index_a, &index_b).Join(ProcessCellPoints);
+//
+// We use a visitor pattern because we can inline the call to the visitor using
+// a template parameter, and, as opposed to something like the Next/Value/Done
+// pattern that S2CellIterator itself uses, we can pass the visitor to a
+// different class or function to actually implement the join, invisibly to the
+// user.
+//
+// In addition to allowing us to delegate iteration nicely, the visitor pattern
+// lets us separate concerns when writing spatial algorithms.  The actual
+// processing of indexed data is separated from the logic of positioning the
+// iterators, which allows for much cleaner implementations.
+//
+// The visitor is a template parameter, which must ultimately be callable with
+// two const references to each input iterator type, returning true if the
+// iteration should continue, and false otherwise.
+//
+// Since the type of the visitor is inferred and we don't have access to C++
+// concepts, we can't constrain that it's callable in the way we want directly.
+// Instead, we check that whatever type is given would be convertible to a
+// std::function with the appropriate signature:
+//
+//     std::function<bool(const IteratorA&, const IteratorB&)>
+//
+// Where IteratorA and IteratorB are the concrete types of our two iterators.
+//
+template <typename IterA, typename IterB>
+class S2CellIteratorJoin {
+ public:
+  // Expose iterator types.
+  using IteratorA = IterA;
+  using IteratorB = IterB;
+
+  S2CellIteratorJoin() = default;
+  S2CellIteratorJoin(const IteratorA& iter_a, const IteratorB& iter_b,
+                     S1ChordAngle tolerance = {})
+      : iter_a_(MakeS2CellRangeIterator(iter_a)),
+        iter_b_(MakeS2CellRangeIterator(iter_b)),
+        tolerance_(tolerance) {}
+
+  // Executes the join.  Explicitly supports type inference for the visitor.
+  //
+  // Returns false if the visitor ever does, true otherwise.
+  template <typename Visitor>
+  bool Join(Visitor visitor) {
+    // We want to take the visitor as a template parameter for inlining, but
+    // also be type safe, so check that we can call it the way we want and print
+    // a reasonable error message if we can't.
+    static_assert(
+        std::is_convertible<
+            Visitor, std::function<bool(const IteratorA&, const IteratorB&)>>{},
+        "Visitor must return bool and be callable with two const "
+        "references to the iterators");
+
+    if (tolerance_ == S1ChordAngle::Zero()) {
+      return ExactJoin(visitor);
+    } else {
+      return TolerantJoin(visitor);
+    }
+  }
+
+ private:
+  S2CellRangeIterator<IteratorA> iter_a_;
+  S2CellRangeIterator<IteratorB> iter_b_;
+  S1ChordAngle tolerance_;
+
+  // Reusable storage for S2Cells on one side of the join.
+  std::vector<S2Cell> matched_cells_;
+
+  // Performs an exact inner join (when the tolerance is zero).
+  template <typename Visitor>
+  bool ExactJoin(Visitor& visitor);
+
+  // ---- Tolerant join related code.
+
+  // Maximum number of cross-terms before we recurse.
+  static constexpr int kMaxCrossProduct = 25;
+
+  // Does a tolerant join (when the tolerance is non-zero).
+  template <typename Visitor>
+  bool TolerantJoin(Visitor& visitor);
+
+  // Returns a minimal set of cells that cover the range of the iterator to seed
+  // the tolerant join set.  Returns empty list if the iterator has no cells.
+  template <typename Iterator>
+  static absl::InlinedVector<S2CellId, 4> GetIteratorCovering(Iterator& iter);
+
+  // Forms all possible pairs of cells from two lists.  Visits any pairs that
+  // are closer to each other than the tolerance.
+  template <typename Visitor>
+  bool ProcessNearbyCellPairs(absl::InlinedVector<S2CellId, 4> cells_a,
+                              absl::InlinedVector<S2CellId, 4> cells_b,
+                              Visitor& visitor);
+
+  // Processes a pair of cells known to be within tolerance of each other.
+  //
+  // We find the cells in the indexes that are covered by cell_a and cell_b.  If
+  // the number of resulting (a,b) cells isn't too large, then we report them to
+  // the visitor for processing. Otherwise, we push whichever cell has _fewer_
+  // matches onto the cell stack along with the children of the other cell, and
+  // process them recursively.
+  //
+  // Since there are only thirty levels to the cell hierarchy, this recursion is
+  // safe as we'll never go more than 30 stack frames deep.
+  template <typename Visitor>
+  bool ProcessCellPair(const S2Cell& cell_a, const S2Cell& cell_b,
+                       Visitor& visitor);
+
+  // Find cells from a given iterator that are covered by a given cell.
+  template <typename Iterator>
+  int EstimateCoveredCells(Iterator& iter, S2CellId cell);
+};
+
+// Clang format just can't handle templated methods like this well.
+// clang-format off
+
+// Factory function to build an S2CellIteratorJoin from two iterators that
+// implement the S2CellIterator API.  Type inference is explicitly supported for
+// the iterator types.
+//
+// The iterators are copied and a new S2CellIteratorJoin instance returned.
+// Note that the underlying container the iterators were created from must exist
+// for the lifetime of the join operation.
+template <
+    typename IterA, typename IterB,
+    typename std::enable_if<S2CellIterator::ImplementedBy<IterA>{} &&
+                            S2CellIterator::ImplementedBy<IterB>{},
+                            bool>::type = true>
+S2CellIteratorJoin<IterA, IterB> MakeS2CellIteratorJoin(
+    const IterA& iter_a, const IterB& iter_b, S1ChordAngle tolerance = {}) {
+  return {iter_a, iter_b, tolerance};
+}
+
+// Factory function to build an S2CellIteratorJoin from any two types that have
+// a nested ::Iterator class.  Two new iterators are instantiated and used to
+// create a new S2CellIteratorJoin instance.  The underlying containers must
+// live for the lifetime of the join operation, so we take them by const pointer
+// to avoid binding temporaries.
+template <typename IterableA, typename IterableB>
+S2CellIteratorJoin<typename IterableA::Iterator, typename IterableB::Iterator>
+MakeS2CellIteratorJoin(
+  const IterableA* a, const IterableB* b, S1ChordAngle tolerance = {}) {
+  return MakeS2CellIteratorJoin(typename IterableA::Iterator(a),
+                                typename IterableB::Iterator(b), tolerance);
+}
+// clang-format on
+
+//////////////////   Implementation details follow   ////////////////////
+
+template <typename A, typename B>
+template <typename Visitor>
+bool S2CellIteratorJoin<A, B>::ExactJoin(Visitor& visitor) {
+  iter_a_.Begin();
+  iter_b_.Begin();
+
+  // Iterate until we hit the end of an iterator or visitor tells us to stop.
+  while (!iter_a_.done() && !iter_b_.done()) {
+    int order = iter_a_.Relation(iter_b_);
+    switch (order) {
+      case -1:
+        // A precedes B, seek A.
+        iter_a_.SeekTo(iter_b_);
+        break;
+
+      case +1:
+        // B precedes A, seek B.
+        iter_b_.SeekTo(iter_a_);
+        break;
+
+      case 0: {
+        // Iterators overlap.
+        // If visitor rejects pair, then we're done.
+        if (!visitor(iter_a_.iterator(), iter_b_.iterator())) {
+          return false;
+        }
+
+        // Move the smaller of the cells forward.
+        const uint64 lsb_a = iter_a_.id().lsb();
+        const uint64 lsb_b = iter_b_.id().lsb();
+        if (lsb_a < lsb_b) {
+          iter_a_.Next();
+        } else if (lsb_a > lsb_b) {
+          iter_b_.Next();
+        } else {
+          // Cells are the same size and overlap, they must be the same cell.
+          // Move both forward.
+          iter_a_.Next();
+          iter_b_.Next();
+        }
+        break;
+      }
+    }
+  }
+  return true;
+}
+
+template <typename A, typename B>
+template <typename Iterator>
+absl::InlinedVector<S2CellId, 4> S2CellIteratorJoin<A, B>::GetIteratorCovering(
+    Iterator& iter) {
+  absl::InlinedVector<S2CellId, 4> cells;
+
+  // Position at the front, if the iterator's empty, we're done.
+  iter.Begin();
+  if (iter.done()) {
+    return cells;
+  }
+
+  // Find the range spanned by the iterator.
+  // TODO(b/248535702): Once GetCellUnionBound is migrated we can replace this.
+  S2CellId min_cell = iter.range_min();
+  iter.Finish();
+  if (!iter.Prev()) {
+    return cells;  // Empty index, we're done.
+  }
+  S2CellId max_cell = iter.range_max();
+
+  // Find cell level that encompasses all of the iterator range.
+  int level = min_cell.GetCommonAncestorLevel(max_cell);
+
+  if (level < 0) {
+    // The ends of the cell range don't have a common ancestor, meaning
+    // they're on different faces, so iterate the faces and add the ones that
+    // overlap.
+    const S2CellId end_id = max_cell.parent(0);
+    for (S2CellId id = min_cell.parent(0); id <= end_id; id = id.next()) {
+      cells.emplace_back(id);
+    }
+  } else {
+    cells.emplace_back(min_cell.parent(level));
+  }
+
+  return cells;
+}
+
+template <typename A, typename B>
+template <typename Visitor>
+bool S2CellIteratorJoin<A, B>::TolerantJoin(Visitor& visitor) {
+  absl::InlinedVector<S2CellId, 4> cells_a, cells_b;
+
+  // Seed cells with a covering for iterator A, quit if empty.
+  cells_a = GetIteratorCovering(iter_a_);
+  if (cells_a.empty()) {
+    return true;
+  }
+
+  // Seed cells with a covering for iterator B, quit if empty.
+  cells_b = GetIteratorCovering(iter_b_);
+  if (cells_b.empty()) {
+    return true;
+  }
+
+  return ProcessNearbyCellPairs(cells_a, cells_b, visitor);
+}
+
+template <typename A, typename B>
+template <typename Visitor>
+bool S2CellIteratorJoin<A, B>::ProcessNearbyCellPairs(
+    absl::InlinedVector<S2CellId, 4> cells_a,
+    absl::InlinedVector<S2CellId, 4> cells_b, Visitor& visitor) {
+  for (const auto& id_a : cells_a) {
+    const S2Cell cell_a(id_a);
+
+    for (const auto& id_b : cells_b) {
+      const S2Cell cell_b(id_b);
+
+      if (cell_a.GetDistance(cell_b) <= tolerance_) {
+        if (!ProcessCellPair(cell_a, cell_b, visitor)) {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+template <typename A, typename B>
+template <typename Visitor>
+bool S2CellIteratorJoin<A, B>::ProcessCellPair(const S2Cell& cell_a,
+                                               const S2Cell& cell_b,
+                                               Visitor& visitor) {
+  // Find matches for the A cell.
+  int num_covered_a = EstimateCoveredCells(iter_a_, cell_a.id());
+  if (num_covered_a == 0) {
+    return true;
+  }
+
+  // Find matches for the B cell.
+  int num_covered_b = EstimateCoveredCells(iter_b_, cell_b.id());
+  if (num_covered_b == 0) {
+    return true;
+  }
+
+  // If there's not too many matches, we can just process them directly
+  if (num_covered_a * num_covered_b < kMaxCrossProduct) {
+    // Pre-compute the S2Cells for the B side to avoid having to do it on every
+    // iteration of the outer loop.
+    matched_cells_.clear();
+    iter_b_.Locate(cell_b.id());
+    for (int i = 0; i < num_covered_b; ++i, iter_b_.Next()) {
+      matched_cells_.emplace_back(iter_b_.id());
+    }
+
+    iter_a_.Locate(cell_a.id());
+    for (int i = 0; i < num_covered_a; ++i, iter_a_.Next()) {
+      iter_b_.Locate(cell_b.id());
+      for (int j = 0; j < num_covered_b; ++j, iter_b_.Next()) {
+        if (S2Cell(iter_a_.id()).GetDistance(matched_cells_[j]) <= tolerance_) {
+          if (!visitor(iter_a_.iterator(), iter_b_.iterator())) {
+            return false;
+          }
+        }
+      }
+    }
+    return true;
+  }
+
+  // Too many matches, recurse on A or B, whichever's larger.
+  absl::InlinedVector<S2CellId, 4> cells_a, cells_b;
+
+  if (num_covered_a > num_covered_b) {
+    cells_b.push_back(cell_b.id());
+    for (int i = 0; i < 4; ++i) {
+      cells_a.push_back(cell_a.id().child(i));
+    }
+  } else {
+    cells_a.push_back(cell_a.id());
+    for (int i = 0; i < 4; ++i) {
+      cells_b.push_back(cell_b.id().child(i));
+    }
+  }
+  return ProcessNearbyCellPairs(cells_a, cells_b, visitor);
+}
+
+template <typename A, typename B>
+template <typename Iterator>
+int S2CellIteratorJoin<A, B>::EstimateCoveredCells(Iterator& iter,
+                                                   S2CellId cell) {
+  switch (iter.Locate(cell)) {
+    case S2CellRelation::DISJOINT:
+      return 0;
+
+    case S2CellRelation::INDEXED:
+      return 1;
+
+    case S2CellRelation::SUBDIVIDED: {
+      // Note: If we develop a Distance method for Iterators that can tell us
+      // how many cells within a range efficiently, then we can avoid the linear
+      // scan here that just has us repeatedly aborting.
+      const S2CellId end = cell.range_max();
+      int matches = 0;
+      for (; iter.id() <= end; iter.Next()) {
+        ++matches;
+
+        // Give up after max/2 matches.  There's too many matches anyways, so
+        // this is a heuristic that will help us decide whether to recurse on
+        // the left or right.
+        if (matches >= kMaxCrossProduct / 2) {
+          return matches;
+        }
+      }
+      return matches;
+    }
+  }
+  ABSL_UNREACHABLE();
+}
+
+#endif  // S2_S2CELL_ITERATOR_JOIN_H_
diff --git a/src/s2/s2cell_iterator_testing.h b/src/s2/s2cell_iterator_testing.h
new file mode 100644
index 00000000..2be0813e
--- /dev/null
+++ b/src/s2/s2cell_iterator_testing.h
@@ -0,0 +1,119 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef S2_S2CELL_ITERATOR_TESTING_H_
+#define S2_S2CELL_ITERATOR_TESTING_H_
+
+#include "absl/container/btree_map.h"
+#include "s2/s2cell_iterator.h"
+
+// A mock iterator for testing.  Iterates an absl::btree_map mapping S2CellId to
+// another type.  Rather than instantiating directly, consider using
+// MakeMockS2CellIterator for convenience.
+
+template <typename T>
+class MockS2CellIterator final : public S2CellIterator {
+ public:
+  // We use a btree_map because we require that S2CellIterator containers be
+  // ordered for fast seek and locate.
+  using CellMap = absl::btree_map<S2CellId, T>;
+
+  // Build an iterator from a btree map.  The map must persist for the lifetime
+  // of the iterator, so we take it by const pointer to avoid binding
+  // temporaries.
+  explicit MockS2CellIterator(const CellMap* map)
+      : map_(map), begin_(map->begin()), end_(map->end()) {
+    Begin();
+  }
+
+  // Returns the current value the iterator is positioned at.
+  //
+  // REQUIRES: !done()
+  const T& value() const {
+    S2_DCHECK(!done());
+    return iter_->second;
+  }
+
+  // Returns the current S2CellId at which the iterator is positioned.
+  S2CellId id() const override {
+    if (done()) {
+      return S2CellId::Sentinel();
+    }
+    return iter_->first;
+  }
+
+  // Returns true if the iterator has reached the end of the input.
+  bool done() const override { return iter_ == end_; }
+
+  // Positions the iterator at the first position.
+  void Begin() override { iter_ = begin_; }
+
+  // Positions the iterator past the last value.
+  void Finish() override { iter_ = end_; }
+
+  // Positions the iterator at the next value.
+  //
+  // REQUIRES: !done()
+  void Next() override {
+    S2_DCHECK(!done());
+    ++iter_;
+  }
+
+  // Positions the iterator at the previous value, returning false if it's
+  // already positioned at the beginning, true otherwise.
+  bool Prev() override {
+    if (iter_ == begin_) {
+      return false;
+    }
+    --iter_;
+    return true;
+  }
+
+  // Seeks the iterator to the first cell with id() >= target or the end
+  // of the iterator if no such cell exists.
+  void Seek(S2CellId target) override { iter_ = map_->lower_bound(target); }
+
+  // Positions the iterator at the cell containing target and returns true. If
+  // no such cell exists, then returns false and leaves the iterator in an
+  // undefined (but valid) state.
+  bool Locate(const S2Point& target) override {
+    return LocateImpl(*this, target);
+  }
+
+  // Let T be the target S2CellId.  If T is contained by some index cell I
+  // (including equality), this method positions the iterator at I and returns
+  // INDEXED.  Otherwise if T contains one or more (smaller) index cells, it
+  // positions the iterator at the first such cell I and returns SUBDIVIDED.
+  // Otherwise it returns DISJOINT and leaves the iterator in an undefined
+  // (but valid) state.
+  S2CellRelation Locate(S2CellId target) override {
+    return LocateImpl(*this, target);
+  }
+
+ private:
+  const CellMap* map_;
+  typename CellMap::const_iterator iter_, begin_, end_;
+};
+
+// Construct a MockS2CellIterator from a btree_map of S2CellIDs to any type.
+//
+// This function explicitly supports inferring the template parameter T.
+template <typename T>
+MockS2CellIterator<T> MakeMockS2CellIterator(
+    const absl::btree_map<S2CellId, T>* map) {
+  return MockS2CellIterator<T>(map);
+}
+
+#endif  // S2_S2CELL_ITERATOR_TESTING_H_
diff --git a/src/s2/s2cell_range_iterator.h b/src/s2/s2cell_range_iterator.h
new file mode 100644
index 00000000..c1188c75
--- /dev/null
+++ b/src/s2/s2cell_range_iterator.h
@@ -0,0 +1,262 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef S2_S2CELL_RANGE_ITERATOR_H_
+#define S2_S2CELL_RANGE_ITERATOR_H_
+
+#include "absl/meta/type_traits.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2cell_iterator.h"
+#include "s2/s2shape_index.h"
+
+// S2CellRangeIterator is a wrapper around an S2CellIterator that caches the
+// range_min() and range_max() that each cell covers as it iterates.  This lets
+// us define range based methods such as SeekTo() and Locate() efficiently.
+//
+// Computing the range_max() and range_min() of a cell isn't expensive but it's
+// not free either, so we extend the S2CellIterator interface instead of
+// integrating this functionality there, allowing the user to pay only for what
+// they use.
+//
+// An S2CellRangeIterator wraps an S2CellIterator, but is also itself an
+// S2CellIterator and thus can be used anywhere one is required.
+template <typename Iterator>
+class S2CellRangeIterator final : public S2CellIterator {
+ public:
+  static_assert(S2CellIterator::ImplementedBy<Iterator>{},
+                "S2CellRangeIterator requires an S2CellIterator");
+
+  // Construct a new S2CellRangeIterator positioned at the beginning.
+  explicit S2CellRangeIterator(Iterator iter) : it_(std::move(iter)) {
+    Begin();
+  }
+
+  // Returns a const reference to the underlying iterator.
+  const Iterator& iterator() const { return it_; }
+
+  // The current S2CellId and cell contents.
+  S2CellId id() const override { return it_.id(); }
+
+  // The min and max leaf cell ids covered by the current cell.  If done() is
+  // true, these methods return a value larger than any valid cell id.
+  S2CellId range_min() const { return range_min_; }
+  S2CellId range_max() const { return range_max_; }
+
+  // Queries the relationship between two range iterators.  Returns -1 if this
+  // iterator's current position entirely precedes the other iterator's current
+  // position, +1 if it entirely follows, and 0 if they overlap.
+  template <typename T>
+  int Relation(const S2CellRangeIterator<T>& b) {
+    if (range_max() < b.range_min()) return -1;
+    if (range_min() > b.range_max()) return +1;
+    return 0;
+  }
+
+  // S2CellIterator API.
+  void Begin() override;
+  void Next() override;
+  bool Prev() override;
+  void Seek(S2CellId target) override;
+  void Finish() override;
+  bool done() const override { return it_.done(); }
+  bool Locate(const S2Point& target) override;
+  S2CellRelation Locate(S2CellId target) override;
+
+  // The same as above, but uses another S2CellRangeIterator as the target.
+  template <typename T>
+  S2CellRelation Locate(const S2CellRangeIterator<T>& target);
+
+  // Position the iterator at the first cell that overlaps or follows
+  // "target", i.e. such that range_max() >= target.range_min().
+  template <typename T>
+  void SeekTo(const S2CellRangeIterator<T>& target);
+
+  // Position the iterator at the first cell that follows "target", i.e. the
+  // first cell such that range_min() > target.range_max().
+  template <typename T>
+  void SeekBeyond(const S2CellRangeIterator<T>& target);
+
+ private:
+  // Updates internal state after the iterator has been repositioned.
+  void Refresh();
+
+  Iterator it_;
+  S2CellId range_min_, range_max_;
+};
+
+// Builds a new S2CellRangeIterator from an index, supporting type inference.
+//
+// We may wish to provide overloads for other types in the future, so we
+// disqualify this function from overload resolution using std::enable_if when
+// the type isn't an S2ShapeIndex.
+//
+// The index must live for the duration of the iterator, so we take it by const
+// pointer instead of reference to avoid binding to temporaries.
+template <typename IndexType,
+          typename std::enable_if<S2ShapeIndex::ImplementedBy<IndexType>{},
+                                  bool>::type = true>
+S2CellRangeIterator<typename IndexType::Iterator> MakeS2CellRangeIterator(
+    const IndexType* index) {
+  using Iterator = typename IndexType::Iterator;
+  return S2CellRangeIterator<Iterator>(Iterator(index, S2ShapeIndex::BEGIN));
+}
+
+// Builds a new S2CellRangeIterator from an S2CellIterator, explicitly supports
+// type inference for the Iterator parameter.
+template <typename Iterator,
+          typename std::enable_if<S2CellIterator::ImplementedBy<Iterator>{},
+                                  bool>::type = true>
+auto MakeS2CellRangeIterator(Iterator&& iter) {
+  return S2CellRangeIterator<absl::decay_t<Iterator>>(
+      std::forward<Iterator>(iter));
+}
+
+//////////////////   Implementation details follow   ////////////////////
+
+template <typename Iterator>
+void S2CellRangeIterator<Iterator>::S2CellRangeIterator::Begin() {
+  it_.Begin();
+  Refresh();
+}
+
+template <typename Iterator>
+void S2CellRangeIterator<Iterator>::S2CellRangeIterator::Next() {
+  it_.Next();
+  Refresh();
+}
+
+template <typename Iterator>
+bool S2CellRangeIterator<Iterator>::S2CellRangeIterator::Prev() {
+  bool status = it_.Prev();
+  Refresh();
+  return status;
+}
+
+template <typename Iterator>
+void S2CellRangeIterator<Iterator>::S2CellRangeIterator::Seek(S2CellId target) {
+  it_.Seek(target);
+  Refresh();
+}
+
+template <typename Iterator>
+void S2CellRangeIterator<Iterator>::S2CellRangeIterator::Finish() {
+  it_.Finish();
+  Refresh();
+}
+
+template <typename Iterator>
+bool S2CellRangeIterator<Iterator>::S2CellRangeIterator::Locate(
+    const S2Point& target) {
+  bool status = it_.Locate(target);
+  Refresh();
+  return status;
+}
+
+template <typename Iterator>
+S2CellRelation S2CellRangeIterator<Iterator>::Locate(S2CellId target) {
+  // Let T be the target cell id, let I = Seek(T.range_min()) and let Prev(I) be
+  // the predecessor of I.  If T contains any index cells, then T contains I.
+  // Similarly, if T is contained by an index cell, then the containing cell is
+  // either I or Prev(I).  We test for containment by comparing the ranges of
+  // leaf cells spanned by T, I, and Prev(I).
+  Seek(target.range_min());
+  if (!done()) {
+    // The target is contained by the cell we landed on, so it's indexed.
+    if (id() >= target && range_min() <= target) {
+      return S2CellRelation::INDEXED;
+    }
+
+    // The cell we landed on is contained by the target, so it's subdivided.
+    if (id() <= target.range_max()) {
+      return S2CellRelation::SUBDIVIDED;
+    }
+  }
+
+  // Otherwise check the previous cell (if it exists).  If it contains the
+  // target then it's indexed, otherwise the target cell is disjoint.
+  if (Prev() && range_max() >= target) {
+    return S2CellRelation::INDEXED;
+  }
+  return S2CellRelation::DISJOINT;
+}
+
+// Convenience re-implementation of the above function, see it for details.
+template <typename Iterator>
+template <typename T>
+S2CellRelation S2CellRangeIterator<Iterator>::S2CellRangeIterator::Locate(
+    const S2CellRangeIterator<T>& target) {
+  Seek(target.range_min());
+  if (!done()) {
+    // The target is contained by the cell we landed on, so it's indexed.
+    if (id() >= target.id() && range_min() <= target.id()) {
+      return S2CellRelation::INDEXED;
+    }
+
+    // The cell we landed on is contained by the target, so it's subdivided.
+    if (id() <= target.range_max()) {
+      return S2CellRelation::SUBDIVIDED;
+    }
+  }
+
+  // Otherwise check the previous cell (if it exists).  If it contains the
+  // target then it's indexed, otherwise the target cell is disjoint.
+  if (Prev() && range_max() >= target.id()) {
+    return S2CellRelation::INDEXED;
+  }
+  return S2CellRelation::DISJOINT;
+}
+
+template <typename Iterator>
+template <typename T>
+void S2CellRangeIterator<Iterator>::SeekTo(
+    const S2CellRangeIterator<T>& target) {
+  Seek(target.range_min());
+
+  // If the current cell does not overlap "target", it is possible that the
+  // previous cell is the one we are looking for.  This can only happen when
+  // the previous cell contains "target" but has a smaller S2CellId.
+  if (done() || range_min() > target.range_max()) {
+    if (Prev() && range_max() < target.id()) {
+      Next();
+    }
+  }
+  Refresh();
+}
+
+template <typename Iterator>
+template <typename T>
+void S2CellRangeIterator<Iterator>::SeekBeyond(
+    const S2CellRangeIterator<T>& target) {
+  Seek(target.range_max().next());
+  if (!done() && range_min() <= target.range_max()) {
+    Next();
+  }
+  Refresh();
+}
+
+// This method is inline, but is only called by non-inline methods defined in
+// this file.  Putting the definition here enforces this requirement.
+template <typename Iterator>
+inline void S2CellRangeIterator<Iterator>::Refresh() {
+  if (done()) {
+    range_min_ = S2CellId::Sentinel().range_min();
+    range_max_ = S2CellId::Sentinel().range_max();
+  } else {
+    range_min_ = id().range_min();
+    range_max_ = id().range_max();
+  }
+}
+
+#endif  // S2_S2CELL_RANGE_ITERATOR_H_
diff --git a/src/s2/s2cell_union.cc b/src/s2/s2cell_union.cc
index 1ab532c4..7f739654 100644
--- a/src/s2/s2cell_union.cc
+++ b/src/s2/s2cell_union.cc
@@ -17,9 +17,17 @@
 
 #include "s2/s2cell_union.h"
 
+#include <cstddef>
+
 #include <algorithm>
+#include <string>
+#include <utility>
 #include <vector>
 
+#include "absl/flags/flag.h"
+#include "absl/strings/str_cat.h"
+
+#include "s2/base/commandlineflags.h"
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "s2/s1angle.h"
@@ -28,15 +36,17 @@
 #include "s2/s2cell_id.h"
 #include "s2/s2latlng_rect.h"
 #include "s2/s2metrics.h"
+#include "s2/s2point.h"
 #include "s2/util/coding/coder.h"
 
 using std::is_sorted;
 using std::max;
 using std::min;
+using std::string;
 using std::vector;
 
-DEFINE_int32(s2cell_union_decode_max_num_cells, 1000000,
-             "The maximum number of cells allowed by S2CellUnion::Decode");
+S2_DEFINE_int32(s2cell_union_decode_max_num_cells, 1000000,
+                "The maximum number of cells allowed by S2CellUnion::Decode");
 
 static const unsigned char kCurrentLosslessEncodingVersionNumber = 1;
 
@@ -76,13 +86,16 @@ void S2CellUnion::Init(const vector<uint64>& cell_ids) {
 }
 
 void S2CellUnion::InitFromMinMax(S2CellId min_id, S2CellId max_id) {
-  S2_DCHECK(max_id.is_valid());
+  S2_DCHECK(max_id.is_valid()) << max_id;
   InitFromBeginEnd(min_id, max_id.next());
 }
 
 void S2CellUnion::InitFromBeginEnd(S2CellId begin, S2CellId end) {
-  S2_DCHECK(begin.is_leaf());
-  S2_DCHECK(end.is_leaf());
+  S2_DCHECK(begin.is_leaf()) << begin;
+  S2_DCHECK(end.is_leaf()) << end;
+  const S2CellId kLeafEnd = S2CellId::End(S2CellId::kMaxLevel);
+  S2_DCHECK(begin.is_valid() || begin == kLeafEnd) << begin;
+  S2_DCHECK(end.is_valid() || end == kLeafEnd) << end;
   S2_DCHECK_LE(begin, end);
 
   // We repeatedly add the largest cell we can.
@@ -96,7 +109,7 @@ void S2CellUnion::InitFromBeginEnd(S2CellId begin, S2CellId end) {
 }
 
 void S2CellUnion::Pack(int excess) {
-  if (cell_ids_.capacity() - cell_ids_.size() > excess) {
+  if (cell_ids_.capacity() - cell_ids_.size() > static_cast<size_t>(excess)) {
     cell_ids_.shrink_to_fit();
   }
 }
@@ -147,17 +160,18 @@ bool S2CellUnion::IsNormalized() const {
   return true;
 }
 
-bool S2CellUnion::Normalize() {
-  return Normalize(&cell_ids_);
+void S2CellUnion::Normalize() {
+  Normalize(&cell_ids_);
 }
 
-/*static*/ bool S2CellUnion::Normalize(vector<S2CellId>* ids) {
+/*static*/ void S2CellUnion::Normalize(vector<S2CellId>* ids) {
   // Optimize the representation by discarding cells contained by other cells,
   // and looking for cases where all subcells of a parent cell are present.
 
   std::sort(ids->begin(), ids->end());
-  int out = 0;
+  size_t out = 0;
   for (S2CellId id : *ids) {
+    S2_DCHECK(id.is_valid()) << id;
     // Check whether this cell is contained by the previous cell.
     if (out > 0 && (*ids)[out-1].contains(id)) continue;
 
@@ -174,9 +188,8 @@ bool S2CellUnion::Normalize() {
     }
     (*ids)[out++] = id;
   }
-  if (ids->size() == out) return false;
-  ids->resize(out);
-  return true;
+  if (ids->size() != out)
+    ids->resize(out);
 }
 
 void S2CellUnion::Denormalize(int min_level, int level_mod,
@@ -249,45 +262,74 @@ S2LatLngRect S2CellUnion::GetRectBound() const {
   return bound;
 }
 
+// Returns true if "a" lies entirely before "b" on the Hilbert curve.  Note that
+// this is not even a weak ordering, since incomparability is not transitive.
+// Nevertheless, given a sorted vector of disjoint S2CellIds it can be used be
+// used to find the first element that might intersect a given target S2CellId:
+//
+//   auto it = std::lower_bound(v.begin(), v.end(), target, EntirelyPrecedes);
+//
+// This works because std::lower_bound() only requires that the elements are
+// partitioned with respect to the given predicate (which is true as long as the
+// S2CellIds are sorted and disjoint).
+static inline bool EntirelyPrecedes(S2CellId a, S2CellId b) {
+  return a.range_max() < b.range_min();
+}
+
 bool S2CellUnion::Contains(S2CellId id) const {
   // This is an exact test.  Each cell occupies a linear span of the S2
   // space-filling curve, and the cell id is simply the position at the center
   // of this span.  The cell union ids are sorted in increasing order along
-  // the space-filling curve.  So we simply find the pair of cell ids that
-  // surround the given cell id (using binary search).  There is containment
-  // if and only if one of these two cell ids contains this cell.
+  // the space-filling curve.  So we simply find the first cell that might
+  // intersect (is not entirely before) the target (using binary search).
+  // There is containment if and only if this cell id contains the target id.
+  S2_DCHECK(id.is_valid()) << id;
 
-  vector<S2CellId>::const_iterator i =
-      std::lower_bound(cell_ids_.begin(), cell_ids_.end(), id);
-  if (i != cell_ids_.end() && i->range_min() <= id) return true;
-  return i != cell_ids_.begin() && (--i)->range_max() >= id;
+  const auto i = std::lower_bound(begin(), end(), id, EntirelyPrecedes);
+  return i != end() && i->contains(id);
 }
 
 bool S2CellUnion::Intersects(S2CellId id) const {
   // This is an exact test; see the comments for Contains() above.
+  S2_DCHECK(id.is_valid()) << id;
 
-  vector<S2CellId>::const_iterator i =
-      std::lower_bound(cell_ids_.begin(), cell_ids_.end(), id);
-  if (i != cell_ids_.end() && i->range_min() <= id.range_max()) return true;
-  return i != cell_ids_.begin() && (--i)->range_max() >= id.range_min();
+  const auto i = std::lower_bound(begin(), end(), id, EntirelyPrecedes);
+  return i != end() && i->intersects(id);
 }
 
 bool S2CellUnion::Contains(const S2CellUnion& y) const {
-  // TODO(ericv): A divide-and-conquer or alternating-skip-search
-  // approach may be sigificantly faster in both the average and worst case.
-
+  if (y.empty()) return true;
+  if (empty()) return false;
+  auto i = begin();
   for (S2CellId y_id : y) {
-    if (!Contains(y_id)) return false;
+    // If our first cell ends before the one we need to contain, advance
+    // where we start searching.
+    if (EntirelyPrecedes(*i, y_id)) {
+      i = std::lower_bound(i + 1, end(), y_id, EntirelyPrecedes);
+      // If we're at the end, we don't contain the current y_id.
+      if (i == end()) return false;
+    }
+    if (!i->contains(y_id)) return false;
   }
   return true;
 }
 
 bool S2CellUnion::Intersects(const S2CellUnion& y) const {
-  // TODO(ericv): A divide-and-conquer or alternating-skip-search
-  // approach may be sigificantly faster in both the average and worst case.
-
-  for (S2CellId y_id : y) {
-    if (Intersects(y_id)) return true;
+  // Walk along the two sorted vectors, looking for overlap.
+  for (auto i = begin(), j = y.begin(); i != end() && j != y.end(); ) {
+    if (EntirelyPrecedes(*i, *j)) {
+      // Advance "i" to the first cell that might overlap *j.
+      i = std::lower_bound(i + 1, end(), *j, EntirelyPrecedes);
+      continue;
+    }
+    if (EntirelyPrecedes(*j, *i)) {
+      // Advance "j" to the first cell that might overlap *i.
+      j = std::lower_bound(j + 1, y.end(), *i, EntirelyPrecedes);
+      continue;
+    }
+    // Neither cell is to the left of the other, so they must intersect.
+    S2_DCHECK(i->intersects(*j));
+    return true;
   }
   return false;
 }
@@ -301,6 +343,7 @@ S2CellUnion S2CellUnion::Union(const S2CellUnion& y) const {
 }
 
 S2CellUnion S2CellUnion::Intersection(S2CellId id) const {
+  S2_DCHECK(id.is_valid()) << id;
   S2CellUnion result;
   if (Contains(id)) {
     result.cell_ids_.push_back(id);
@@ -318,8 +361,8 @@ S2CellUnion S2CellUnion::Intersection(S2CellId id) const {
 S2CellUnion S2CellUnion::Intersection(const S2CellUnion& y) const {
   S2CellUnion result;
   GetIntersection(cell_ids_, y.cell_ids_, &result.cell_ids_);
-  // The output is normalized as long as at least one input is normalized.
-  S2_DCHECK(result.IsNormalized() || (!IsNormalized() && !y.IsNormalized()));
+  // The output is normalized as long as both inputs are normalized.
+  S2_DCHECK(result.IsNormalized() || !IsNormalized() || !y.IsNormalized());
   return result;
 }
 
@@ -346,18 +389,15 @@ S2CellUnion S2CellUnion::Intersection(const S2CellUnion& y) const {
       if (*i <= j->range_max()) {
         out->push_back(*i++);
       } else {
-        // Advance "j" to the first cell possibly contained by *i.
-        j = std::lower_bound(j + 1, y.end(), imin);
-        // The previous cell *(j-1) may now contain *i.
-        if (*i <= (j - 1)->range_max()) --j;
+        // Advance "j" to the first cell that might overlap *i.
+        j = std::lower_bound(j + 1, y.end(), *i, EntirelyPrecedes);
       }
     } else if (jmin > imin) {
       // Identical to the code above with "i" and "j" reversed.
       if (*j <= i->range_max()) {
         out->push_back(*j++);
       } else {
-        i = std::lower_bound(i + 1, x.end(), jmin);
-        if (*j <= (i - 1)->range_max()) --i;
+        i = std::lower_bound(i + 1, x.end(), *j, EntirelyPrecedes);
       }
     } else {
       // "i" and "j" have the same range_min(), so one contains the other.
@@ -486,7 +526,7 @@ void S2CellUnion::Encode(Encoder* const encoder) const {
 
   encoder->put8(kCurrentLosslessEncodingVersionNumber);
   encoder->put64(uint64{cell_ids_.size()});
-  for (const S2CellId& cell_id : cell_ids_) {
+  for (const S2CellId cell_id : cell_ids_) {
     cell_id.Encode(encoder);
   }
 }
@@ -498,12 +538,13 @@ bool S2CellUnion::Decode(Decoder* const decoder) {
   if (version > kCurrentLosslessEncodingVersionNumber) return false;
 
   uint64 num_cells = decoder->get64();
-  if (num_cells > FLAGS_s2cell_union_decode_max_num_cells) {
+  if (num_cells > static_cast<size_t>(
+                      absl::GetFlag(FLAGS_s2cell_union_decode_max_num_cells))) {
     return false;
   }
 
   vector<S2CellId> temp_cell_ids(num_cells);
-  for (int i = 0; i < num_cells; ++i) {
+  for (size_t i = 0; i < num_cells; ++i) {
     if (!temp_cell_ids[i].Decode(decoder)) return false;
   }
   cell_ids_.swap(temp_cell_ids);
@@ -513,3 +554,14 @@ bool S2CellUnion::Decode(Decoder* const decoder) {
 bool S2CellUnion::Contains(const S2Point& p) const {
   return Contains(S2CellId(p));
 }
+
+std::string S2CellUnion::ToString() const {
+  static const size_t kMaxCount = 500;
+  string output = absl::StrCat("Size:", size(), " S2CellIds:");
+  for (int i = 0, limit = min(kMaxCount, size()); i < limit; ++i) {
+    if (i > 0) output += ",";
+    output += cell_id(i).ToToken();
+  }
+  if (size() > kMaxCount) output += ",...";
+  return output;
+}
diff --git a/src/s2/s2cell_union.h b/src/s2/s2cell_union.h
index f89c9ac1..8eedcf87 100644
--- a/src/s2/s2cell_union.h
+++ b/src/s2/s2cell_union.h
@@ -18,15 +18,29 @@
 #ifndef S2_S2CELL_UNION_H_
 #define S2_S2CELL_UNION_H_
 
+#include <cstddef>
+#include <ostream>
+#include <string>
+#include <utility>
 #include <vector>
 
+#include "absl/algorithm/container.h"
+#include "absl/base/macros.h"
+#include "absl/flags/flag.h"
+#include "absl/hash/hash.h"
+
 #include "s2/base/commandlineflags.h"
+#include "s2/base/commandlineflags_declare.h"
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
+#include "s2/s1angle.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2cell_iterator.h"
+#include "s2/s2coder.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
-#include "absl/base/macros.h"
+#include "s2/util/coding/coder.h"
 
 class Decoder;
 class Encoder;
@@ -35,8 +49,8 @@ class S2Cap;
 class S2Cell;
 class S2LatLngRect;
 
-DECLARE_bool(s2debug);
-DECLARE_int32(s2cell_union_decode_max_num_cells);
+S2_DECLARE_bool(s2debug);
+S2_DECLARE_int32(s2cell_union_decode_max_num_cells);
 
 // An S2CellUnion is a region consisting of cells of various sizes.  Typically
 // a cell union is used to approximate some other shape.  There is a tradeoff
@@ -53,8 +67,38 @@ DECLARE_int32(s2cell_union_decode_max_num_cells);
 // S2CellUnion is movable and copyable.
 class S2CellUnion final : public S2Region {
  public:
+  typedef s2coding::internal::S2LegacyCoder<S2CellUnion> Coder;
+
+  // S2CellIterator compatible iterator to support joining.
+  class Iterator final : public S2CellIterator {
+   public:
+    // Using a default constructed instance is undefined.
+    Iterator() = default;
+
+    // Build an iterator from a pointer to a cell union.  The union must live
+    // for the lifetime of the iterator, so we take it by const pointer to avoid
+    // binding temporaries.
+    explicit Iterator(const S2CellUnion* cell_union) : cell_union_(cell_union) {
+      Begin();
+    }
+
+    S2CellId id() const override;
+    bool done() const override;
+    void Begin() override;
+    void Finish() override;
+    void Next() override;
+    bool Prev() override;
+    void Seek(S2CellId target) override;
+    bool Locate(const S2Point& target) override;
+    S2CellRelation Locate(S2CellId target) override;
+
+   private:
+    const S2CellUnion* cell_union_ = nullptr;
+    std::vector<S2CellId>::const_iterator iter_;
+  };
+
   // Creates an empty cell union.
-  S2CellUnion() {}
+  S2CellUnion() = default;
 
   // Constructs a cell union with the given S2CellIds, then calls Normalize()
   // to sort them, remove duplicates, and merge cells when possible.  (See
@@ -157,10 +201,7 @@ class S2CellUnion final : public S2Region {
   // Normalizes the cell union by discarding cells that are contained by other
   // cells, replacing groups of 4 child cells by their parent cell whenever
   // possible, and sorting all the cell ids in increasing order.
-  //
-  // Returns true if the number of cells was reduced.
-  // TODO(ericv): Change this method to return void.
-  bool Normalize();
+  void Normalize();
 
   // Replaces "output" with an expanded version of the cell union where any
   // cells whose level is less than "min_level" or where (level - min_level)
@@ -313,7 +354,7 @@ class S2CellUnion final : public S2Region {
   // Like Normalize(), but works with a vector of S2CellIds.
   // Equivalent to:
   //   *cell_ids = S2CellUnion(std::move(*cell_ids)).Release();
-  static bool Normalize(std::vector<S2CellId>* cell_ids);
+  static void Normalize(std::vector<S2CellId>* cell_ids);
 
   // Like Denormalize(), but works with a vector of S2CellIds.
   // REQUIRES: out != &in
@@ -334,6 +375,11 @@ class S2CellUnion final : public S2Region {
                               const std::vector<S2CellId>& y,
                               std::vector<S2CellId>* out);
 
+  // Returns a human-readable string describing the S2CellUnion, consisting of
+  // the number of cells and the list of S2CellIds in S2CellId::ToToken()
+  // format (limited to at most 500 cells).
+  std::string ToString() const;
+
  private:
   friend class S2CellUnionTestPeer;  // For creating invalid S2CellUnions.
 
@@ -365,7 +411,7 @@ inline S2CellUnion S2CellUnion::FromNormalized(std::vector<S2CellId> cell_ids) {
 
 inline S2CellUnion S2CellUnion::FromVerbatim(std::vector<S2CellId> cell_ids) {
   S2CellUnion result(std::move(cell_ids), VERBATIM);
-  S2_DCHECK(!FLAGS_s2debug || result.IsValid());
+  S2_DCHECK(!absl::GetFlag(FLAGS_s2debug) || result.IsValid());
   return result;
 }
 
@@ -396,4 +442,58 @@ inline std::vector<S2CellId>::const_iterator S2CellUnion::end() const {
   return cell_ids_.end();
 }
 
+// Output stream operator. Automatically guards against large inputs.
+inline std::ostream& operator<<(std::ostream& os, const S2CellUnion& u) {
+  return os << u.ToString();
+}
+
+template <typename H>
+H AbslHashValue(H h, const S2CellUnion& u) {
+  return H::combine(std::move(h), absl::HashOf(u.cell_ids()));
+}
+
+inline S2CellId S2CellUnion::Iterator::id() const {
+  S2_DCHECK(!done());
+  return *iter_;
+}
+
+inline bool S2CellUnion::Iterator::done() const {
+  S2_DCHECK_NE(cell_union_, nullptr);
+  return iter_ == cell_union_->cell_ids_.end();
+}
+
+inline void S2CellUnion::Iterator::Begin() {
+  S2_DCHECK_NE(cell_union_, nullptr);
+  iter_ = cell_union_->cell_ids_.begin();
+}
+
+inline void S2CellUnion::Iterator::Finish() {
+  S2_DCHECK_NE(cell_union_, nullptr);
+  iter_ = cell_union_->cell_ids_.end();
+}
+
+inline void S2CellUnion::Iterator::Next() { ++iter_; }
+
+inline bool S2CellUnion::Iterator::Prev() {
+  S2_DCHECK_NE(cell_union_, nullptr);
+  if (iter_ == cell_union_->cell_ids_.begin()) {
+    return false;
+  }
+  --iter_;
+  return true;
+}
+
+inline void S2CellUnion::Iterator::Seek(S2CellId target) {
+  S2_DCHECK_NE(cell_union_, nullptr);
+  iter_ = absl::c_lower_bound(cell_union_->cell_ids_, target);
+}
+
+inline bool S2CellUnion::Iterator::Locate(const S2Point& target) {
+  return LocateImpl(*this, target);
+}
+
+inline S2CellRelation S2CellUnion::Iterator::Locate(S2CellId target) {
+  return LocateImpl(*this, target);
+}
+
 #endif  // S2_S2CELL_UNION_H_
diff --git a/src/s2/s2centroids.cc b/src/s2/s2centroids.cc
index e6ec2314..49a45442 100644
--- a/src/s2/s2centroids.cc
+++ b/src/s2/s2centroids.cc
@@ -19,6 +19,7 @@
 
 #include <cmath>
 
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 
 namespace S2 {
@@ -57,8 +58,8 @@ S2Point TrueCentroid(const S2Point& a, const S2Point& b, const S2Point& c) {
   // The result is the true centroid of the triangle multiplied by the
   // triangle's area.
   //
-  // TODO(ericv): This code still isn't as numerically stable as it could be.
-  // The biggest potential improvement is to compute B-A and C-A more
+  // TODO(b/205027737): This code still isn't as numerically stable as it could
+  // be.  The biggest potential improvement is to compute B-A and C-A more
   // accurately so that (B-A)x(C-A) is always inside triangle ABC.
   S2Point x(a.x(), b.x() - a.x(), c.x() - a.x());
   S2Point y(a.y(), b.y() - a.y(), c.y() - a.y());
diff --git a/src/s2/s2closest_cell_query.cc b/src/s2/s2closest_cell_query.cc
index b69c8669..3b949c7b 100644
--- a/src/s2/s2closest_cell_query.cc
+++ b/src/s2/s2closest_cell_query.cc
@@ -17,16 +17,10 @@
 
 #include "s2/s2closest_cell_query.h"
 
-#include <memory>
-#include "absl/memory/memory.h"
 #include "s2/s1angle.h"
-#include "s2/s2cap.h"
-#include "s2/s2cell.h"
-#include "s2/s2cell_id.h"
-#include "s2/s2cell_union.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2closest_cell_query_base.h"
 #include "s2/s2edge_distances.h"
-#include "s2/s2region_coverer.h"
-#include "s2/s2shape_index_region.h"
 
 void S2ClosestCellQuery::Options::set_conservative_max_distance(
     S1ChordAngle max_distance) {
diff --git a/src/s2/s2closest_cell_query.h b/src/s2/s2closest_cell_query.h
index 8a7141a2..25ce2eac 100644
--- a/src/s2/s2closest_cell_query.h
+++ b/src/s2/s2closest_cell_query.h
@@ -18,6 +18,7 @@
 #ifndef S2_S2CLOSEST_CELL_QUERY_H_
 #define S2_S2CLOSEST_CELL_QUERY_H_
 
+#include <utility>
 #include <vector>
 
 #include "s2/base/logging.h"
@@ -26,8 +27,12 @@
 #include "s2/s1chord_angle.h"
 #include "s2/s2cell.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2cell_index.h"
+#include "s2/s2cell_union.h"
 #include "s2/s2closest_cell_query_base.h"
 #include "s2/s2min_distance_targets.h"
+#include "s2/s2point.h"
+#include "s2/s2shape_index.h"
 
 // S2ClosestCellQuery is a helper class for finding the closest cell(s) to a
 // given point, edge, S2Cell, S2CellUnion, or geometry collection.  A typical
diff --git a/src/s2/s2closest_cell_query_base.h b/src/s2/s2closest_cell_query_base.h
index 0294c12f..83b21ca1 100644
--- a/src/s2/s2closest_cell_query_base.h
+++ b/src/s2/s2closest_cell_query_base.h
@@ -20,20 +20,29 @@
 #ifndef S2_S2CLOSEST_CELL_QUERY_BASE_H_
 #define S2_S2CLOSEST_CELL_QUERY_BASE_H_
 
+#include <cstddef>
+
+#include <algorithm>
+#include <iterator>
+#include <limits>
+#include <queue>
+#include <type_traits>
 #include <vector>
 
 #include "s2/base/logging.h"
 #include "absl/container/btree_set.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
+#include "absl/hash/hash.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s2cap.h"
+#include "s2/s2cell.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2cell_index.h"
 #include "s2/s2cell_union.h"
 #include "s2/s2distance_target.h"
+#include "s2/s2region.h"
 #include "s2/s2region_coverer.h"
-#include "s2/util/gtl/dense_hash_set.h"
-#include "s2/util/hash/mix.h"
 
 // S2ClosestCellQueryBase is a templatized class for finding the closest
 // (cell_id, label) pairs in an S2CellIndex to a given target.  It is not
@@ -55,7 +64,7 @@
 // The Distance template argument is used to represent distances.  Usually it
 // is a thin wrapper around S1ChordAngle, but another distance type may be
 // used as long as it implements the Distance concept described in
-// s2distance_targets.h.  For example this can be used to measure maximum
+// s2distance_target.h.  For example this can be used to measure maximum
 // distances, to get more accuracy, or to measure non-spheroidal distances.
 template <class Distance>
 class S2ClosestCellQueryBase {
@@ -328,14 +337,7 @@ class S2ClosestCellQueryBase {
   // TODO(ericv): Check whether it is faster to avoid duplicates by default
   // (even when Options::max_results() == 1), rather than just when we need to.
   bool avoid_duplicates_;
-  struct LabelledCellHash {
-    size_t operator()(LabelledCell x) const {
-      HashMix mix(x.cell_id.id());
-      mix.Mix(x.label);
-      return mix.get();
-    }
-  };
-  gtl::dense_hash_set<LabelledCell, LabelledCellHash> tested_cells_;
+  absl::flat_hash_set<LabelledCell> tested_cells_;
 
   // The algorithm maintains a priority queue of unprocessed S2CellIds, sorted
   // in increasing order of distance from the target.
@@ -369,16 +371,13 @@ class S2ClosestCellQueryBase {
 
   std::vector<S2CellId> max_distance_covering_;
   std::vector<S2CellId> intersection_with_max_distance_;
-  const LabelledCell* tmp_range_data_[kMinRangesToEnqueue - 1];
 };
 
 
 //////////////////   Implementation details follow   ////////////////////
 
-
 template <class Distance>
-inline S2ClosestCellQueryBase<Distance>::Options::Options() {
-}
+inline S2ClosestCellQueryBase<Distance>::Options::Options() = default;
 
 template <class Distance>
 inline int S2ClosestCellQueryBase<Distance>::Options::max_results() const {
@@ -441,9 +440,7 @@ inline void S2ClosestCellQueryBase<Distance>::Options::set_use_brute_force(
 
 template <class Distance>
 S2ClosestCellQueryBase<Distance>::S2ClosestCellQueryBase()
-    : tested_cells_(1) /* expected_max_elements*/ {
-  tested_cells_.set_empty_key(LabelledCell(S2CellId::None(), -1));
-}
+    : tested_cells_(/*bucket_count=*/1) {}
 
 template <class Distance>
 S2ClosestCellQueryBase<Distance>::~S2ClosestCellQueryBase() {
@@ -675,7 +672,7 @@ void S2ClosestCellQueryBase<Distance>::InitQueue() {
     initial_cells = &intersection_with_max_distance_;
   }
   NonEmptyRangeIterator range(index_);
-  for (int i = 0; i < initial_cells->size(); ++i) {
+  for (size_t i = 0; i < initial_cells->size(); ++i) {
     S2CellId id = (*initial_cells)[i];
     bool seek = (i == 0) || id.range_min() >= range.limit_id();
     ProcessOrEnqueue(id, &range, seek);
diff --git a/src/s2/s2closest_edge_query.cc b/src/s2/s2closest_edge_query.cc
index 534cf45e..f7bb8ef4 100644
--- a/src/s2/s2closest_edge_query.cc
+++ b/src/s2/s2closest_edge_query.cc
@@ -17,16 +17,10 @@
 
 #include "s2/s2closest_edge_query.h"
 
-#include <memory>
-#include "absl/memory/memory.h"
 #include "s2/s1angle.h"
-#include "s2/s2cap.h"
-#include "s2/s2cell.h"
-#include "s2/s2cell_id.h"
-#include "s2/s2cell_union.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2closest_edge_query_base.h"
 #include "s2/s2edge_distances.h"
-#include "s2/s2region_coverer.h"
-#include "s2/s2shape_index_region.h"
 
 void S2ClosestEdgeQuery::Options::set_conservative_max_distance(
     S1ChordAngle max_distance) {
diff --git a/src/s2/s2closest_edge_query.h b/src/s2/s2closest_edge_query.h
index e689db92..136782e7 100644
--- a/src/s2/s2closest_edge_query.h
+++ b/src/s2/s2closest_edge_query.h
@@ -34,12 +34,14 @@
 #include "s2/s2closest_edge_query_base.h"
 #include "s2/s2edge_distances.h"
 #include "s2/s2min_distance_targets.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 
-// S2ClosestEdgeQuery is a helper class for finding the closest edge(s) to a
-// given point, edge, S2Cell, or geometry collection.  For example, given a
-// set of polylines, the following code efficiently finds the closest 5 edges
-// to a query point:
+// S2ClosestEdgeQuery is a helper class for searching within an S2ShapeIndex
+// to find the closest edge(s) to a given point, edge, S2Cell, or geometry
+// collection.  For example, given a set of polylines, the following code
+// efficiently finds the closest 5 edges to a query point:
 //
 // void Test(const vector<S2Polyline*>& polylines, const S2Point& point) {
 //   MutableS2ShapeIndex index;
@@ -50,17 +52,19 @@
 //   query.mutable_options()->set_max_results(5);
 //   S2ClosestEdgeQuery::PointTarget target(point);
 //   for (const auto& result : query.FindClosestEdges(&target)) {
-//     // The Result struct contains the following fields:
-//     //   "distance" is the distance to the edge.
-//     //   "shape_id" identifies the S2Shape containing the edge.
-//     //   "edge_id" identifies the edge with the given shape.
+//     // The Result object contains the following accessors:
+//     //   distance() is the distance to the edge.
+//     //   shape_id() identifies the S2Shape containing the edge.
+//     //   edge_id() identifies the edge with the given shape.
+//     //   is_interior() indicates that the result is an interior point.
+//     //
 //     // The following convenience methods may also be useful:
 //     //   query.GetEdge(result) returns the endpoints of the edge.
 //     //   query.Project(point, result) computes the closest point on the
 //     //       result edge to the given target point.
-//     int polyline_index = result.shape_id;
-//     int edge_index = result.edge_id;
-//     S1ChordAngle distance = result.distance;  // Use ToAngle() for S1Angle.
+//     int polyline_index = result.shape_id();
+//     int edge_index = result.edge_id();
+//     S1ChordAngle distance = result.distance();  // Can convert to S1Angle.
 //     S2Shape::Edge edge = query.GetEdge(result);
 //     S2Point closest_point = query.Project(point, result);
 //   }
@@ -108,12 +112,27 @@ class S2ClosestEdgeQuery {
   using Distance = S2MinDistance;
   using Base = S2ClosestEdgeQueryBase<Distance>;
 
-  // Each "Result" object represents a closest edge.  It has the following
-  // fields:
+  // Each "Result" object represents a closest edge.  Here are its main
+  // methods (see S2ClosestEdgeQueryBase::Result for details):
   //
-  //   S1ChordAngle distance;  // The distance from the target to this edge.
-  //   int32 shape_id;         // Identifies an indexed shape.
-  //   int32 edge_id;          // Identifies an edge within the shape.
+  //   // The distance from the target to this edge.
+  //   Distance distance() const;
+  //
+  //   // Identifies an indexed shape.
+  //   int32 shape_id() const;
+  //
+  //   // Identifies an edge within the shape.
+  //   int32 edge_id() const;
+  //
+  //   // Returns true if this Result object represents the interior of a shape.
+  //   // Such results may be returned when options.include_interiors() is true.
+  //   bool is_interior() const;
+  //
+  //   // Returns true if this Result object indicates that no edge satisfies
+  //   // the given query options.  (This result is only returned in one special
+  //   // case, namely when FindClosestEdge() does not find any suitable edges.
+  //   // It is never returned by methods that return a vector of results.)
+  //   bool is_empty() const;
   using Result = Base::Result;
 
   // Options that control the set of edges returned.  Note that by default
@@ -242,7 +261,8 @@ class S2ClosestEdgeQuery {
   //
   // Note that if options().include_interiors() is true, the result vector may
   // include some entries with edge_id == -1.  This indicates that the target
-  // intersects the indexed polygon with the given shape_id.
+  // intersects the indexed polygon with the given shape_id.  Such results may
+  // be identifed by calling Result::is_interior().
   std::vector<Result> FindClosestEdges(Target* target);
 
   // This version can be more efficient when this method is called many times,
@@ -252,12 +272,11 @@ class S2ClosestEdgeQuery {
   //////////////////////// Convenience Methods ////////////////////////
 
   // Returns the closest edge to the target.  If no edge satisfies the search
-  // criteria, then the Result object will have distance == Infinity(),
-  // is_empty() == true, and shape_id == edge_id == -1.
+  // criteria, then the result object's is_empty() method will be true.
   //
-  // Note that if options.include_interiors() is true, edge_id == -1 is also
-  // used to indicate that the target intersects an indexed polygon (but in
-  // that case distance == Zero() and shape_id >= 0).
+  // Note that if options.include_interiors() is true, Result::is_interior()
+  // should be called to check whether the result represents an interior point
+  // (in which case edge_id() == -1).
   Result FindClosestEdge(Target* target);
 
   // Returns the minimum distance to the target.  If the index or target is
@@ -293,12 +312,7 @@ class S2ClosestEdgeQuery {
   bool IsConservativeDistanceLessOrEqual(Target* target, S1ChordAngle limit);
 
   // Returns the endpoints of the given result edge.
-  //
-  // CAVEAT: If options().include_interiors() is true, then clients must not
-  // pass this method any Result objects that correspond to shape interiors,
-  // i.e. those where result.edge_id < 0.
-  //
-  // REQUIRES: result.edge_id >= 0
+  // REQUIRES: !result.is_interior()
   S2Shape::Edge GetEdge(const Result& result) const;
 
   // Returns the point on given result edge that is closest to "point".
diff --git a/src/s2/s2closest_edge_query_base.h b/src/s2/s2closest_edge_query_base.h
index e1f047c4..3dea6878 100644
--- a/src/s2/s2closest_edge_query_base.h
+++ b/src/s2/s2closest_edge_query_base.h
@@ -18,11 +18,19 @@
 #ifndef S2_S2CLOSEST_EDGE_QUERY_BASE_H_
 #define S2_S2CLOSEST_EDGE_QUERY_BASE_H_
 
+#include <algorithm>
+#include <iterator>
+#include <limits>
 #include <memory>
+#include <queue>
+#include <type_traits>
+#include <utility>
 #include <vector>
 
+#include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "absl/container/btree_set.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s1angle.h"
@@ -32,11 +40,12 @@
 #include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
 #include "s2/s2distance_target.h"
+#include "s2/s2point.h"
 #include "s2/s2region_coverer.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 #include "s2/s2shapeutil_count_edges.h"
 #include "s2/s2shapeutil_shape_edge_id.h"
-#include "s2/util/gtl/dense_hash_set.h"
 
 // S2ClosestEdgeQueryBase is a templatized class for finding the closest
 // edge(s) between two geometries.  It is not intended to be used directly,
@@ -63,7 +72,7 @@
 // The Distance template argument is used to represent distances.  Usually it
 // is a thin wrapper around S1ChordAngle, but another distance type may be
 // used as long as it implements the Distance concept described in
-// s2distance_targets.h.  For example this can be used to measure maximum
+// s2distance_target.h.  For example this can be used to measure maximum
 // distances, to get more accuracy, or to measure non-spheroidal distances.
 template <class Distance>
 class S2ClosestEdgeQueryBase {
@@ -167,7 +176,7 @@ class S2ClosestEdgeQueryBase {
   //    Such results may be returned when options.include_interiors() is true.
   //    Such results can be identified using the is_interior() method.
   //
-  //  - (shape_id() < 0) && (edge_id() < 0) is returned by `FindClosestEdge`
+  //  - (shape_id() < 0) && (edge_id() < 0) is returned by FindClosestEdge()
   //    to indicate that no edge satisfies the given query options.  Such
   //    results can be identified using is_empty() method.
   class Result {
@@ -363,7 +372,7 @@ class S2ClosestEdgeQueryBase {
   // (even when Options::max_results() == 1), rather than just when we need to.
   bool avoid_duplicates_;
   using ShapeEdgeId = s2shapeutil::ShapeEdgeId;
-  gtl::dense_hash_set<ShapeEdgeId, s2shapeutil::ShapeEdgeIdHash> tested_edges_;
+  absl::flat_hash_set<ShapeEdgeId> tested_edges_;
 
   // The algorithm maintains a priority queue of unprocessed S2CellIds, sorted
   // in increasing order of distance from the target.
@@ -405,10 +414,8 @@ class S2ClosestEdgeQueryBase {
 
 //////////////////   Implementation details follow   ////////////////////
 
-
 template <class Distance>
-inline S2ClosestEdgeQueryBase<Distance>::Options::Options() {
-}
+inline S2ClosestEdgeQueryBase<Distance>::Options::Options() = default;
 
 template <class Distance>
 inline int S2ClosestEdgeQueryBase<Distance>::Options::max_results() const {
@@ -471,9 +478,7 @@ inline void S2ClosestEdgeQueryBase<Distance>::Options::set_use_brute_force(
 
 template <class Distance>
 S2ClosestEdgeQueryBase<Distance>::S2ClosestEdgeQueryBase()
-    : tested_edges_(1) /* expected_max_elements*/ {
-  tested_edges_.set_empty_key(ShapeEdgeId(-1, -1));
-}
+    : tested_edges_(/*bucket_count=*/1) {}
 
 template <class Distance>
 S2ClosestEdgeQueryBase<Distance>::~S2ClosestEdgeQueryBase() {
@@ -569,9 +574,9 @@ void S2ClosestEdgeQueryBase<Distance>::FindClosestEdgesInternal(
     absl::btree_set<int32> shape_ids;
     (void) target->VisitContainingShapes(
         *index_, [&shape_ids, &options](S2Shape* containing_shape,
-                                        const S2Point& target_point) {
+                                        const S2Point& /*target_point*/) {
           shape_ids.insert(containing_shape->id());
-          return shape_ids.size() < options.max_results();
+          return shape_ids.size() < static_cast<size_t>(options.max_results());
         });
     for (int shape_id : shape_ids) {
       AddResult(Result(Distance::Zero(), shape_id, -1));
@@ -718,7 +723,7 @@ void S2ClosestEdgeQueryBase<Distance>::InitQueue() {
   if (index_covering_.empty()) InitCovering();
   if (distance_limit_ == Distance::Infinity()) {
     // Start with the precomputed index covering.
-    for (int i = 0; i < index_covering_.size(); ++i) {
+    for (size_t i = 0; i < index_covering_.size(); ++i) {
       ProcessOrEnqueue(index_covering_[i], index_cells_[i]);
     }
   } else {
@@ -735,7 +740,7 @@ void S2ClosestEdgeQueryBase<Distance>::InitQueue() {
     // Now we need to clean up the initial cells to ensure that they all
     // contain at least one cell of the S2ShapeIndex.  (Some may not intersect
     // the index at all, while other may be descendants of an index cell.)
-    for (int i = 0, j = 0; i < initial_cells_.size(); ) {
+    for (size_t i = 0, j = 0; i < initial_cells_.size();) {
       S2CellId id_i = initial_cells_[i];
       // Find the top-level cell that contains this initial cell.
       while (index_covering_[j].range_max() < id_i) ++j;
@@ -748,8 +753,8 @@ void S2ClosestEdgeQueryBase<Distance>::InitQueue() {
       } else {
         // This initial cell is a proper descendant of a top-level cell.
         // Check how it is related to the cells of the S2ShapeIndex.
-        S2ShapeIndex::CellRelation r = iter_.Locate(id_i);
-        if (r == S2ShapeIndex::INDEXED) {
+        S2CellRelation r = iter_.Locate(id_i);
+        if (r == S2CellRelation::INDEXED) {
           // This cell is a descendant of an index cell.  Enqueue it and skip
           // any other initial cells that are also descendants of this cell.
           ProcessOrEnqueue(iter_.id(), &iter_.cell());
@@ -758,7 +763,7 @@ void S2ClosestEdgeQueryBase<Distance>::InitQueue() {
             continue;
         } else {
           // Enqueue the cell only if it contains at least one index cell.
-          if (r == S2ShapeIndex::SUBDIVIDED) ProcessOrEnqueue(id_i, nullptr);
+          if (r == S2CellRelation::SUBDIVIDED) ProcessOrEnqueue(id_i, nullptr);
           ++i;
         }
       }
diff --git a/src/s2/s2closest_edge_query_testing.h b/src/s2/s2closest_edge_query_testing.h
index 58c5770e..0f9a53fa 100644
--- a/src/s2/s2closest_edge_query_testing.h
+++ b/src/s2/s2closest_edge_query_testing.h
@@ -18,18 +18,23 @@
 #ifndef S2_S2CLOSEST_EDGE_QUERY_TESTING_H_
 #define S2_S2CLOSEST_EDGE_QUERY_TESTING_H_
 
+#include <algorithm>
+#include <memory>
+#include <utility>
 #include <vector>
 
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s1angle.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2edge_distances.h"
 #include "s2/s2edge_vector_shape.h"
 #include "s2/s2loop.h"
 #include "s2/s2metrics.h"
 #include "s2/s2point.h"
 #include "s2/s2point_vector_shape.h"
+#include "s2/s2shape.h"
 #include "s2/s2shapeutil_count_edges.h"
 #include "s2/s2shapeutil_shape_edge_id.h"
 #include "s2/s2testing.h"
@@ -39,7 +44,7 @@ namespace s2testing {
 // An abstract class that adds edges to a MutableS2ShapeIndex for benchmarking.
 class ShapeIndexFactory {
  public:
-  virtual ~ShapeIndexFactory() {}
+  virtual ~ShapeIndexFactory() = default;
 
   // Requests that approximately "num_edges" edges located within the given
   // S2Cap bound should be added to "index".
@@ -56,7 +61,7 @@ class RegularLoopShapeIndexFactory : public ShapeIndexFactory {
  public:
   void AddEdges(const S2Cap& index_cap, int num_edges,
                 MutableS2ShapeIndex* index) const override {
-    index->Add(absl::make_unique<S2Loop::OwningShape>(S2Loop::MakeRegularLoop(
+    index->Add(std::make_unique<S2Loop::OwningShape>(S2Loop::MakeRegularLoop(
         index_cap.center(), index_cap.GetRadius(), num_edges)));
   }
 };
@@ -68,7 +73,7 @@ class FractalLoopShapeIndexFactory : public ShapeIndexFactory {
                 MutableS2ShapeIndex* index) const override {
     S2Testing::Fractal fractal;
     fractal.SetLevelForApproxMaxEdges(num_edges);
-    index->Add(absl::make_unique<S2Loop::OwningShape>(
+    index->Add(std::make_unique<S2Loop::OwningShape>(
         fractal.MakeLoop(S2Testing::GetRandomFrameAt(index_cap.center()),
                          index_cap.GetRadius())));
   }
@@ -83,7 +88,7 @@ class PointCloudShapeIndexFactory : public ShapeIndexFactory {
     for (int i = 0; i < num_edges; ++i) {
       points.push_back(S2Testing::SamplePoint(index_cap));
     }
-    index->Add(absl::make_unique<S2PointVectorShape>(std::move(points)));
+    index->Add(std::make_unique<S2PointVectorShape>(std::move(points)));
   }
 };
 
diff --git a/src/s2/s2closest_point_query.cc b/src/s2/s2closest_point_query.cc
index a43f7e93..ed7e5abf 100644
--- a/src/s2/s2closest_point_query.cc
+++ b/src/s2/s2closest_point_query.cc
@@ -17,6 +17,10 @@
 
 #include "s2/s2closest_point_query.h"
 
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2edge_distances.h"
+
 void S2ClosestPointQueryOptions::set_conservative_max_distance(
     S1ChordAngle max_distance) {
   set_max_distance(Distance(max_distance.PlusError(
diff --git a/src/s2/s2closest_point_query.h b/src/s2/s2closest_point_query.h
index ca148e71..8ec64937 100644
--- a/src/s2/s2closest_point_query.h
+++ b/src/s2/s2closest_point_query.h
@@ -25,9 +25,12 @@
 #include "s2/base/logging.h"
 #include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
+#include "s2/s2cell.h"
 #include "s2/s2closest_point_query_base.h"
 #include "s2/s2min_distance_targets.h"
+#include "s2/s2point.h"
 #include "s2/s2point_index.h"
+#include "s2/s2shape_index.h"
 
 // Options that control the set of points returned.  Note that by default
 // *all* points are returned, so you will always want to set either the
diff --git a/src/s2/s2closest_point_query_base.h b/src/s2/s2closest_point_query_base.h
index 7cb0c06a..8be18f8b 100644
--- a/src/s2/s2closest_point_query_base.h
+++ b/src/s2/s2closest_point_query_base.h
@@ -20,17 +20,24 @@
 #ifndef S2_S2CLOSEST_POINT_QUERY_BASE_H_
 #define S2_S2CLOSEST_POINT_QUERY_BASE_H_
 
+#include <algorithm>
+#include <iterator>
+#include <limits>
+#include <queue>
 #include <vector>
 
 #include "s2/base/logging.h"
 #include "absl/container/inlined_vector.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s2cap.h"
+#include "s2/s2cell.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
 #include "s2/s2distance_target.h"
 #include "s2/s2edge_distances.h"
+#include "s2/s2point.h"
 #include "s2/s2point_index.h"
+#include "s2/s2region.h"
 #include "s2/s2region_coverer.h"
 
 // Options that control the set of points returned.  Note that by default
@@ -139,7 +146,7 @@ class S2ClosestPointQueryBaseOptions {
 // The Distance template argument is used to represent distances.  Usually it
 // is a thin wrapper around S1ChordAngle, but another distance type may be
 // used as long as it implements the Distance concept described in
-// s2distance_targets.h.  For example this can be used to measure maximum
+// s2distance_target.h.  For example this can be used to measure maximum
 // distances, to get more accuracy, or to measure non-spheroidal distances.
 template <class Distance, class Data>
 class S2ClosestPointQueryBase {
@@ -330,10 +337,9 @@ class S2ClosestPointQueryBase {
 
 //////////////////   Implementation details follow   ////////////////////
 
-
-template <class Distance> inline
-S2ClosestPointQueryBaseOptions<Distance>::S2ClosestPointQueryBaseOptions() {
-}
+template <class Distance>
+inline S2ClosestPointQueryBaseOptions<
+    Distance>::S2ClosestPointQueryBaseOptions() = default;
 
 template <class Distance>
 inline int S2ClosestPointQueryBaseOptions<Distance>::max_results() const {
@@ -395,8 +401,7 @@ inline void S2ClosestPointQueryBaseOptions<Distance>::set_use_brute_force(
 }
 
 template <class Distance, class Data>
-S2ClosestPointQueryBase<Distance, Data>::S2ClosestPointQueryBase() {
-}
+S2ClosestPointQueryBase<Distance, Data>::S2ClosestPointQueryBase() = default;
 
 template <class Distance, class Data>
 S2ClosestPointQueryBase<Distance, Data>::~S2ClosestPointQueryBase() {
@@ -622,7 +627,7 @@ void S2ClosestPointQueryBase<Distance, Data>::InitQueue() {
     initial_cells = &intersection_with_max_distance_;
   }
   iter_.Begin();
-  for (int i = 0; i < initial_cells->size() && !iter_.done(); ++i) {
+  for (size_t i = 0; i < initial_cells->size() && !iter_.done(); ++i) {
     S2CellId id = (*initial_cells)[i];
     ProcessOrEnqueue(id, &iter_, id.range_min() > iter_.id() /*seek*/);
   }
@@ -709,11 +714,11 @@ void S2ClosestPointQueryBase<Distance, Data>::MaybeAddResult(
     // each candidate point is considered at most once (except for one special
     // case where max_results() == 1, see InitQueue for details), so we don't
     // need to worry about possibly adding a duplicate entry here.
-    if (result_set_.size() >= options().max_results()) {
+    if (result_set_.size() >= static_cast<size_t>(options().max_results())) {
       result_set_.pop();  // Replace the furthest result point.
     }
     result_set_.push(result);
-    if (result_set_.size() >= options().max_results()) {
+    if (result_set_.size() >= static_cast<size_t>(options().max_results())) {
       distance_limit_ = result_set_.top().distance() - options().max_error();
     }
   }
diff --git a/src/s2/s2coder.h b/src/s2/s2coder.h
new file mode 100644
index 00000000..9b60dca5
--- /dev/null
+++ b/src/s2/s2coder.h
@@ -0,0 +1,159 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+
+#ifndef S2_S2CODER_H_
+#define S2_S2CODER_H_
+
+#include <type_traits>
+
+#include "s2/base/integral_types.h"
+#include "absl/status/status.h"
+#include "s2/util/coding/coder.h"
+#include "s2/s2error.h"
+
+// A general purpose encoding/decoding interface for S2 data types; This
+// interface abstracts away differences in how classes marshal themselves to
+// provide a simpler, more consistent API. Each value is serialized into a
+// compact internally-restrained format, such that users of the coder do not
+// need to independently include the length of variable-length values.
+//
+// Concrete types used with this API are required to be move-constructible and
+// default-constructible.
+//
+// Note that, in general, encoded forms don't store type information, leaving
+// correct tracking of types to the end user, although polymorphism is an
+// internal feature of some S2Coders.
+
+namespace s2coding {
+
+// Controls whether to optimize for speed or size when encoding shapes.  (Note
+// that encoding is always lossless, and that compact encodings are currently
+// only possible when points have been snapped to S2CellId centers.)
+enum class CodingHint : uint8 { FAST, COMPACT };
+
+// S2Coder interface.
+template <class T>
+class S2Coder {
+ public:
+  virtual ~S2Coder() = default;
+
+  // Encodes the value of T into the given encoder, including its own length
+  // information if not implicit from the type.
+  virtual void Encode(Encoder&, const T&) const = 0;
+
+  // Decodes the next value of T from the decoder and leaves it positioned at
+  // the first byte after the encoded representation of T.
+  virtual bool Decode(Decoder&, T&, S2Error&) const = 0;
+
+  absl::Status Decode(Decoder& decoder, T& v) const {
+    S2Error error;
+    v.Init(&decoder, error);
+    return ToStatus(error);
+  }
+
+  // Disallow assignment to avoid slicing through references.
+  S2Coder& operator=(const S2Coder&) = delete;
+  S2Coder& operator=(S2Coder&&) = delete;
+};
+
+// Generic coder for any class with an Encode method that does not take a coding
+// hint.
+template <typename T>
+class S2BasicCoder : public S2Coder<T> {
+ public:
+  void Encode(Encoder& encoder, const T& v) const override {
+    v.Encode(&encoder);
+  }
+
+  bool Decode(Decoder& decoder, T& v, S2Error& error) const override {
+    return v.Init(&decoder, error);
+  }
+
+  using S2Coder<T>::Decode;
+};
+
+// Generic coder for any class with an Encode method that takes a coding hint.
+template <typename T>
+class S2HintCoder : public S2Coder<T> {
+ public:
+  explicit S2HintCoder(CodingHint hint = CodingHint::FAST) : hint_(hint) {}
+
+  void Encode(Encoder& encoder, const T& v) const override {
+    v.Encode(&encoder, hint_);
+  }
+
+  bool Decode(Decoder& decoder, T& v, S2Error& error) const override {
+    return v.Init(&decoder, error);
+  }
+
+  using S2Coder<T>::Decode;
+
+ private:
+  CodingHint hint_;
+};
+
+namespace internal {
+
+// Coders for now legacy Decode style decoding.  Decode methods don't populate
+// an S2Error so new types should instead favor the Init style of decoding.
+
+// Generic coder for any class with an Encode method that does not take a coding
+// hint, and a Decode method for decoding the type.
+template <typename T>
+class S2LegacyCoder : public S2Coder<T> {
+ public:
+  void Encode(Encoder& encoder, const T& v) const override {
+    v.Encode(&encoder);
+  }
+
+  bool Decode(Decoder& decoder, T& v, S2Error& error) const override {
+    if (!v.Decode(&decoder)) {
+      error.Init(S2Error::DATA_LOSS, "Unknown decoding error");
+      return false;
+    }
+    return true;
+  }
+};
+
+// Generic coder for any class with an Encode method that takes a coding hint,
+// and a Decode method for decoding the type.
+template <typename T>
+class S2LegacyHintCoder : public S2Coder<T> {
+ public:
+  explicit S2LegacyHintCoder(CodingHint hint = CodingHint::FAST)
+      : hint_(hint) {}
+
+  void Encode(Encoder& encoder, const T& v) const override {
+    v.Encode(&encoder, hint_);
+  }
+
+  bool Decode(Decoder& decoder, T& v, S2Error& error) const override {
+    if (!v.Decode(&decoder)) {
+      error.Init(S2Error::DATA_LOSS, "Unknown decoding error");
+      return false;
+    }
+    return true;
+  }
+
+ private:
+  CodingHint hint_;
+};
+
+}  // namespace internal
+
+}  // namespace s2coding
+
+#endif  // S2_S2CODER_H_
diff --git a/src/s2/s2coder_testing.h b/src/s2/s2coder_testing.h
new file mode 100644
index 00000000..126ef199
--- /dev/null
+++ b/src/s2/s2coder_testing.h
@@ -0,0 +1,53 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+
+#ifndef S2_S2CODER_TESTING_H_
+#define S2_S2CODER_TESTING_H_
+
+#include "s2/util/coding/coder.h"
+#include "s2/s2coder.h"
+#include "s2/s2error.h"
+
+namespace s2coding {
+
+// Encodes a type T into a new Encoder instance.
+template <typename T>
+Encoder EncodeToEncoder(const S2Coder<T>& coder, const T& shape) {
+  Encoder encoder;
+  coder.Encode(encoder, shape);
+  return encoder;
+}
+
+// Decodes a type T encoded into an Encoder instance.
+template <typename T>
+bool DecodeFromEncoded(const S2Coder<T>& coder, Encoder& encoder, T& value,
+                       S2Error& error) {
+  Decoder decoder(encoder.base(), encoder.length());
+  return coder.Decode(decoder, value, error);
+}
+
+// Encodes then decodes a type T via S2Coder<T>, returning decoded object.
+template <typename T>
+T RoundTrip(const S2Coder<T>& coder, const T& shape, S2Error& error) {
+  Encoder encoder = EncodeToEncoder(coder, shape);
+  T val;
+  S2_CHECK(DecodeFromEncoded(coder, encoder, val, error));
+  return val;
+}
+
+}  // namespace s2coding
+
+#endif  // S2_S2CODER_TESTING_H_
diff --git a/src/s2/s2contains_point_query.h b/src/s2/s2contains_point_query.h
index 70675a9a..b4b9ab9b 100644
--- a/src/s2/s2contains_point_query.h
+++ b/src/s2/s2contains_point_query.h
@@ -18,9 +18,15 @@
 #ifndef S2_S2CONTAINS_POINT_QUERY_H_
 #define S2_S2CONTAINS_POINT_QUERY_H_
 
+#include <functional>
 #include <vector>
 
+#include "s2/base/integral_types.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2edge_crosser.h"
+#include "s2/s2edge_crossings.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 #include "s2/s2shapeutil_shape_edge.h"
 
@@ -40,14 +46,14 @@
 //    and polylines).
 //
 // Note that points other than vertices are never contained by polylines.
-// If you want need this behavior, use S2ClosestEdgeQuery::IsDistanceLess()
+// If you need this behavior, use S2ClosestEdgeQuery::IsDistanceLess()
 // with a suitable distance threshold instead.
-enum class S2VertexModel { OPEN, SEMI_OPEN, CLOSED };
+enum class S2VertexModel : uint8 { OPEN, SEMI_OPEN, CLOSED };
 
 // This class defines the options supported by S2ContainsPointQuery.
 class S2ContainsPointQueryOptions {
  public:
-  S2ContainsPointQueryOptions() {}
+  S2ContainsPointQueryOptions() = default;
 
   // Convenience constructor that sets the vertex_model() option.
   explicit S2ContainsPointQueryOptions(S2VertexModel vertex_model);
@@ -70,7 +76,8 @@ class S2ContainsPointQueryOptions {
 // whether or not shapes are considered to contain their vertices).
 //
 // Example usage:
-//   auto query = MakeS2ContainsPointQuery(&index, S2VertexModel::CLOSED);
+//   auto query = MakeS2ContainsPointQuery(
+//       &index, S2ContainsPointQueryOptions(S2VertexModel::CLOSED));
 //   return query.Contains(point);
 //
 // This class is not thread-safe.  To use it in parallel, each thread should
@@ -122,6 +129,12 @@ class S2ContainsPointQuery {
   // visited at most once.
   //
   // Note that the API allows non-const access to the visited shapes.
+  //
+  // Also see S2ShapeIndexRegion::VisitIntersectingShapes() which allows
+  // visiting all shapes in an S2ShapeIndex that intersect or contain a given
+  // target S2CellId.
+  //
+  // ENSURES: shape != nullptr
   using ShapeVisitor = std::function<bool (S2Shape* shape)>;
   bool VisitContainingShapes(const S2Point& p, const ShapeVisitor& visitor);
 
@@ -146,9 +159,9 @@ class S2ContainsPointQuery {
   // allowed to reposition this iterator arbitrarily between method calls.
   Iterator* mutable_iter() { return &it_; }
 
-  // Low-level helper method that returns true if the given S2ClippedShape
-  // referred to by an S2ShapeIndex::Iterator contains the point "p".
-  bool ShapeContains(const Iterator& it, const S2ClippedShape& clipped,
+  // Low-level helper method that returns true if the given S2ClippedShape in
+  // the given cell contains the point "p".
+  bool ShapeContains(S2CellId cell_id, const S2ClippedShape& clipped,
                      const S2Point& p) const;
 
  private:
@@ -216,7 +229,7 @@ bool S2ContainsPointQuery<IndexType>::Contains(const S2Point& p) {
   const S2ShapeIndexCell& cell = it_.cell();
   int num_clipped = cell.num_clipped();
   for (int s = 0; s < num_clipped; ++s) {
-    if (ShapeContains(it_, cell.clipped(s), p)) return true;
+    if (ShapeContains(it_.id(), cell.clipped(s), p)) return true;
   }
   return false;
 }
@@ -227,7 +240,7 @@ bool S2ContainsPointQuery<IndexType>::ShapeContains(const S2Shape& shape,
   if (!it_.Locate(p)) return false;
   const S2ClippedShape* clipped = it_.cell().find_clipped(shape.id());
   if (clipped == nullptr) return false;
-  return ShapeContains(it_, *clipped, p);
+  return ShapeContains(it_.id(), *clipped, p);
 }
 
 template <class IndexType>
@@ -241,7 +254,7 @@ bool S2ContainsPointQuery<IndexType>::VisitContainingShapes(
   int num_clipped = cell.num_clipped();
   for (int s = 0; s < num_clipped; ++s) {
     const S2ClippedShape& clipped = cell.clipped(s);
-    if (ShapeContains(it_, clipped, p) &&
+    if (ShapeContains(it_.id(), clipped, p) &&
         !visitor(index_->shape(clipped.shape_id()))) {
       return false;
     }
@@ -288,7 +301,8 @@ std::vector<S2Shape*> S2ContainsPointQuery<IndexType>::GetContainingShapes(
 
 template <class IndexType>
 bool S2ContainsPointQuery<IndexType>::ShapeContains(
-    const Iterator& it, const S2ClippedShape& clipped, const S2Point& p) const {
+    S2CellId cell_id, const S2ClippedShape& clipped,
+    const S2Point& p) const {
   bool inside = clipped.contains_center();
   const int num_edges = clipped.num_edges();
   if (num_edges > 0) {
@@ -306,7 +320,7 @@ bool S2ContainsPointQuery<IndexType>::ShapeContains(
     }
     // Test containment by drawing a line segment from the cell center to the
     // given point and counting edge crossings.
-    S2CopyingEdgeCrosser crosser(it.center(), p);
+    S2CopyingEdgeCrosser crosser(cell_id.ToPoint(), p);
     for (int i = 0; i < num_edges; ++i) {
       auto edge = shape.edge(clipped.edge(i));
       int sign = crosser.CrossingSign(edge.v0, edge.v1);
diff --git a/src/s2/s2contains_vertex_query.cc b/src/s2/s2contains_vertex_query.cc
index bf5351e3..154d760e 100644
--- a/src/s2/s2contains_vertex_query.cc
+++ b/src/s2/s2contains_vertex_query.cc
@@ -17,16 +17,26 @@
 
 #include "s2/s2contains_vertex_query.h"
 
-#include <cmath>
+#include <cstdlib>
+
 #include <utility>
+
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2predicates.h"
 
 using std::abs;
 
 int S2ContainsVertexQuery::ContainsSign() {
-  // Find the unmatched edge that is immediately clockwise from S2::Ortho(P).
-  S2Point reference_dir = S2::Ortho(target_);
+  // Find the unmatched edge that is immediately clockwise from S2::RefDir(P)
+  // but not equal to it.  The result is +1 iff this edge is outgoing.
+  //
+  // A loop with consecutive vertices A,B,C contains vertex B if and only if
+  // the fixed vector R = S2::RefDir(B) is contained by the wedge ABC.  The
+  // wedge is closed at A and open at C, i.e. the point B is inside the loop
+  // if A = R but not if C = R.  This convention is required for compatibility
+  // with S2::VertexCrossing.
+  S2Point reference_dir = S2::RefDir(target_);
   std::pair<S2Point, int> best(reference_dir, 0);
   for (const auto& e : edge_map_) {
     S2_DCHECK_LE(abs(e.second), 1);
diff --git a/src/s2/s2convex_hull_query.cc b/src/s2/s2convex_hull_query.cc
index aaee545d..b3e4cc8e 100644
--- a/src/s2/s2convex_hull_query.cc
+++ b/src/s2/s2convex_hull_query.cc
@@ -15,7 +15,7 @@
 
 // Author: ericv@google.com (Eric Veach)
 //
-// This implement Andrew's monotone chain algorithm, which is a variant of the
+// This implements Andrew's monotone chain algorithm, which is a variant of the
 // Graham scan (see https://en.wikipedia.org/wiki/Graham_scan).  The time
 // complexity is O(n log n), and the space required is O(n).  In fact only the
 // call to "sort" takes O(n log n) time; the rest of the algorithm is linear.
@@ -25,11 +25,22 @@
 
 #include "s2/s2convex_hull_query.h"
 
-#include "absl/memory/memory.h"
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "s2/s2cap.h"
+#include "s2/s2edge_distances.h"
+#include "s2/s2latlng_rect.h"
+#include "s2/s2loop.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
+#include "s2/s2polygon.h"
+#include "s2/s2polyline.h"
 #include "s2/s2predicates.h"
+#include "s2/s2predicates_internal.h"
 
-using absl::make_unique;
+using std::make_unique;
 using std::unique_ptr;
 using std::vector;
 
@@ -79,9 +90,9 @@ S2Cap S2ConvexHullQuery::GetCapBound() {
   // GetRectBound() for this same reason, so it is much better to keep track
   // of a rectangular bound as we go along and convert it at the end.
   //
-  // TODO(ericv): We could compute an optimal bound by implementing Welzl's
-  // algorithm.  However we would still need to have special handling of loops
-  // and polygons, since if a loop spans more than 180 degrees in any
+  // TODO(b/203701013): We could compute an optimal bound by implementing
+  // Welzl's algorithm.  However we would still need to have special handling of
+  // loops and polygons, since if a loop spans more than 180 degrees in any
   // direction (i.e., if it contains two antipodal points), then it is not
   // enough just to bound its vertices.  In this case the only convex bounding
   // cap is S2Cap::Full(), and the only convex bounding loop is the full loop.
@@ -101,14 +112,11 @@ class OrderedCcwAround {
 };
 
 unique_ptr<S2Loop> S2ConvexHullQuery::GetConvexHull() {
+  // Test whether the bounding cap is convex.  We need this to proceed with
+  // the algorithm below in order to construct a point "origin" that is
+  // definitely outside the convex hull.
   S2Cap cap = GetCapBound();
-  if (cap.height() >= 1) {
-    // The bounding cap is not convex.  The current bounding cap
-    // implementation is not optimal, but nevertheless it is likely that the
-    // input geometry itself is not contained by any convex polygon.  In any
-    // case, we need a convex bounding cap to proceed with the algorithm below
-    // (in order to construct a point "origin" that is definitely outside the
-    // convex hull).
+  if (cap.height() >= 1 - 10 * s2pred::DBL_ERR) {
     return make_unique<S2Loop>(S2Loop::kFull());
   }
   // This code implements Andrew's monotone chain algorithm, which is a simple
@@ -186,11 +194,25 @@ unique_ptr<S2Loop> S2ConvexHullQuery::GetSinglePointLoop(const S2Point& p) {
 
 unique_ptr<S2Loop> S2ConvexHullQuery::GetSingleEdgeLoop(const S2Point& a,
                                                         const S2Point& b) {
-  // Construct a loop consisting of the two vertices and their midpoint.
+  // If the points are exactly antipodal we return the full loop.
+  //
+  // Note that we could use the code below even in this case (which would
+  // return a zero-area loop that follows the edge AB), except that (1) the
+  // direction of AB is defined using symbolic perturbations and therefore is
+  // not predictable by ordinary users, and (2) S2Loop disallows anitpodal
+  // adjacent vertices and so we would need to use 4 vertices to define the
+  // degenerate loop.  (Note that the S2Loop antipodal vertex restriction is
+  // historical and now could easily be removed, however it would still have
+  // the problem that the edge direction is not easily predictable.)
+  if (a == -b) return make_unique<S2Loop>(S2Loop::kFull());
+
+  // Construct a loop consisting of the two vertices and their midpoint.  We
+  // use S2::Interpolate() to ensure that the midpoint is very close to
+  // the edge even when its endpoints are nearly antipodal.
   vector<S2Point> vertices;
   vertices.push_back(a);
   vertices.push_back(b);
-  vertices.push_back((a + b).Normalize());
+  vertices.push_back(S2::Interpolate(a, b, 0.5));
   auto loop = make_unique<S2Loop>(vertices);
   // The resulting loop may be clockwise, so invert it if necessary.
   loop->Normalize();
diff --git a/src/s2/s2convex_hull_query.h b/src/s2/s2convex_hull_query.h
index 8fd43c8b..bb9d3389 100644
--- a/src/s2/s2convex_hull_query.h
+++ b/src/s2/s2convex_hull_query.h
@@ -25,6 +25,7 @@
 #include "s2/s2cap.h"
 #include "s2/s2latlng_rect.h"
 #include "s2/s2loop.h"
+#include "s2/s2point.h"
 #include "s2/s2polygon.h"
 #include "s2/s2polyline.h"
 
diff --git a/src/s2/s2coords.cc b/src/s2/s2coords.cc
index d16207dc..e11fafe1 100644
--- a/src/s2/s2coords.cc
+++ b/src/s2/s2coords.cc
@@ -18,6 +18,8 @@
 #include "s2/s2coords.h"
 
 #include "s2/util/bits/bits.h"
+#include "s2/s2coords_internal.h"
+#include "s2/s2point.h"
 
 namespace S2 {
 
diff --git a/src/s2/s2coords.h b/src/s2/s2coords.h
index d9b6f096..e94ea878 100644
--- a/src/s2/s2coords.h
+++ b/src/s2/s2coords.h
@@ -98,6 +98,7 @@
 #define S2_S2COORDS_H_
 
 #include <algorithm>
+#include <cfloat>
 #include <cmath>
 
 #include "s2/base/integral_types.h"
@@ -113,6 +114,13 @@
 // dimension of the surface, not the space it is embedded in).
 namespace S2 {
 
+// The maximum absolute error in U/V coordinates when converting from XYZ.
+//
+// The XYZ -> UV conversion is a single division per coordinate, which is
+// promised to be at most 0.5*DBL_EPSILON absolute error for values with
+// magnitude less than two.
+constexpr double kMaxXYZtoUVError = 0.5 * DBL_EPSILON;
+
 // This is the number of levels needed to specify a leaf cell.  This
 // constant is defined here so that the S2::Metric class and the conversion
 // functions below can be implemented without including s2cell_id.h.  Please
@@ -128,7 +136,7 @@ const int kLimitIJ = 1 << kMaxCellLevel;  // == S2CellId::kMaxSize
 unsigned const int kMaxSiTi = 1U << (kMaxCellLevel + 1);
 
 // Convert an s- or t-value to the corresponding u- or v-value.  This is
-// a non-linear transformation from [-1,1] to [-1,1] that attempts to
+// a non-linear transformation from [0,1] to [-1,1] that attempts to
 // make the cell sizes more uniform.
 double STtoUV(double s);
 
diff --git a/src/s2/s2coords_internal.h b/src/s2/s2coords_internal.h
index 13cf7231..df2f6068 100644
--- a/src/s2/s2coords_internal.h
+++ b/src/s2/s2coords_internal.h
@@ -13,9 +13,10 @@
 // limitations under the License.
 //
 
+// Author: ericv@google.com (Eric Veach)
+
 #ifndef S2_S2COORDS_INTERNAL_H_
 #define S2_S2COORDS_INTERNAL_H_
-// Author: ericv@google.com (Eric Veach)
 
 namespace S2 {
 namespace internal {
diff --git a/src/s2/s2crossing_edge_query.cc b/src/s2/s2crossing_edge_query.cc
index dcaa88c8..62879a0e 100644
--- a/src/s2/s2crossing_edge_query.cc
+++ b/src/s2/s2crossing_edge_query.cc
@@ -20,12 +20,19 @@
 #include <algorithm>
 #include <vector>
 
-#include "s2/base/logging.h"
 #include "s2/r1interval.h"
+#include "s2/r2.h"
+#include "s2/r2rect.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2edge_clipping.h"
 #include "s2/s2edge_crosser.h"
+#include "s2/s2padded_cell.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
 #include "s2/s2shapeutil_count_edges.h"
+#include "s2/s2shapeutil_shape_edge.h"
+#include "s2/s2shapeutil_shape_edge_id.h"
 
 using s2shapeutil::ShapeEdge;
 using s2shapeutil::ShapeEdgeId;
@@ -35,11 +42,9 @@ using std::vector;
 // determined using the benchmarks in the unit test.
 static const int kMaxBruteForceEdges = 27;
 
-S2CrossingEdgeQuery::S2CrossingEdgeQuery() {
-}
+S2CrossingEdgeQuery::S2CrossingEdgeQuery() = default;
 
-S2CrossingEdgeQuery::~S2CrossingEdgeQuery() {
-}
+S2CrossingEdgeQuery::~S2CrossingEdgeQuery() = default;
 
 void S2CrossingEdgeQuery::Init(const S2ShapeIndex* index) {
   index_ = index;
@@ -222,12 +227,12 @@ bool S2CrossingEdgeQuery::VisitCells(const S2Point& a0, const S2Point& a1,
     //     we recursively subdivide to find the cells intersected by a0a1.
     //  3. edge_root does not intersect any index cells.  In this case there
     //     is nothing to do.
-    S2ShapeIndex::CellRelation relation = iter_.Locate(edge_root);
-    if (relation == S2ShapeIndex::INDEXED) {
+    S2CellRelation relation = iter_.Locate(edge_root);
+    if (relation == S2CellRelation::INDEXED) {
       // edge_root is an index cell or is contained by an index cell (case 1).
       S2_DCHECK(iter_.id().contains(edge_root));
       if (!visitor(iter_.cell())) return false;
-    } else if (relation == S2ShapeIndex::SUBDIVIDED) {
+    } else if (relation == S2CellRelation::SUBDIVIDED) {
       // edge_root is subdivided into one or more index cells (case 2).  We
       // find the cells intersected by a0a1 using recursive subdivision.
       if (!edge_root.is_face()) pcell = S2PaddedCell(edge_root, 0);
diff --git a/src/s2/s2crossing_edge_query.h b/src/s2/s2crossing_edge_query.h
index 1c349880..147640f1 100644
--- a/src/s2/s2crossing_edge_query.h
+++ b/src/s2/s2crossing_edge_query.h
@@ -18,6 +18,8 @@
 #ifndef S2_S2CROSSING_EDGE_QUERY_H_
 #define S2_S2CROSSING_EDGE_QUERY_H_
 
+#include <functional>
+#include <memory>
 #include <type_traits>
 #include <vector>
 
@@ -27,6 +29,8 @@
 #include "s2/r2.h"
 #include "s2/r2rect.h"
 #include "s2/s2padded_cell.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 #include "s2/s2shapeutil_shape_edge.h"
 #include "s2/s2shapeutil_shape_edge_id.h"
@@ -50,7 +54,7 @@ enum class CrossingType { INTERIOR, ALL };
 //           const S2Point& a0, const S2Point &a1) {
 //   MutableS2ShapeIndex index;
 //   for (S2Polyline* polyline : polylines) {
-//     index.Add(absl::make_unique<S2Polyline::Shape>(polyline));
+//     index.Add(std::make_unique<S2Polyline::Shape>(polyline));
 //   }
 //   S2CrossingEdgeQuery query(&index);
 //   for (const auto& edge : query.GetCrossingEdges(a, b, CrossingType::ALL)) {
diff --git a/src/s2/s2debug.cc b/src/s2/s2debug.cc
index 029a863e..c2c65269 100644
--- a/src/s2/s2debug.cc
+++ b/src/s2/s2debug.cc
@@ -17,7 +17,8 @@
 
 #include "s2/s2debug.h"
 
-#include "s2/base/logging.h"
+#include "s2/base/commandlineflags.h"
+#include "s2/base/log_severity.h"
 
-DEFINE_bool(s2debug, !!google::DEBUG_MODE,
-            "Enable automatic validity checking in S2 code");
+S2_DEFINE_bool(s2debug, !!google::DEBUG_MODE,
+               "Enable automatic validity checking in S2 code");
diff --git a/src/s2/s2debug.h b/src/s2/s2debug.h
index 224ed975..5ab9e870 100644
--- a/src/s2/s2debug.h
+++ b/src/s2/s2debug.h
@@ -53,11 +53,12 @@
 #define S2_S2DEBUG_H_
 
 #include "s2/base/commandlineflags.h"
+#include "s2/base/commandlineflags_declare.h"
 #include "s2/base/integral_types.h"
 
 // Command line flag that enables extra validity checking throughout the S2
 // code.  It is turned on by default in debug-mode builds.
-DECLARE_bool(s2debug);
+S2_DECLARE_bool(s2debug);
 
 // Class that allows the --s2debug validity checks to be enabled or disabled
 // for specific objects (e.g., see S2Polygon).
diff --git a/src/s2/s2distance_target.h b/src/s2/s2distance_target.h
index b36e7661..b274ac9c 100644
--- a/src/s2/s2distance_target.h
+++ b/src/s2/s2distance_target.h
@@ -18,8 +18,12 @@
 #ifndef S2_S2DISTANCE_TARGET_H_
 #define S2_S2DISTANCE_TARGET_H_
 
+#include <functional>
+
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 
 // S2DistanceTarget represents a geometric object to which distances are
@@ -87,7 +91,7 @@ class S2DistanceTarget {
  public:
   using Delta = typename Distance::Delta;
 
-  virtual ~S2DistanceTarget() {}
+  virtual ~S2DistanceTarget() = default;
 
   // Returns an S2Cap that bounds the set of points whose distance to the
   // target is Distance::Zero().
@@ -132,6 +136,8 @@ class S2DistanceTarget {
   // NOTE(ericv): This method exists only for the purpose of implementing
   // S2ClosestEdgeQuery::Options::include_interiors() efficiently.  Its API is
   // unlikely to be useful for other purposes.
+  //
+  // CAVEAT: Containing shapes may be visited more than once.
   using ShapeVisitor = std::function<bool (S2Shape* containing_shape,
                                            const S2Point& target_point)>;
   virtual bool VisitContainingShapes(const S2ShapeIndex& query_index,
diff --git a/src/s2/s2earth.cc b/src/s2/s2earth.cc
index 414c1465..d7f3db19 100644
--- a/src/s2/s2earth.cc
+++ b/src/s2/s2earth.cc
@@ -16,6 +16,7 @@
 #include "s2/s2earth.h"
 
 #include <cmath>
+
 #include <algorithm>
 
 namespace {
@@ -31,11 +32,11 @@ double Haversine(const double radians) {
 
 }  // namespace
 
-double S2Earth::ToLongitudeRadians(const util::units::Meters& distance,
-                                   double latitude_radians) {
+double S2Earth::MetersToLongitudeRadians(double meters,
+                                         double latitude_radians) {
   double scalar = cos(latitude_radians);
   if (scalar == 0) return M_PI * 2;
-  return std::min(ToRadians(distance) / scalar, M_PI * 2);
+  return std::min(MetersToRadians(meters) / scalar, M_PI * 2);
 }
 
 // Sourced from http://www.movable-type.co.uk/scripts/latlong.html.
diff --git a/src/s2/s2earth.h b/src/s2/s2earth.h
index 82dab66c..f5e31c6d 100644
--- a/src/s2/s2earth.h
+++ b/src/s2/s2earth.h
@@ -15,9 +15,8 @@
 
 // Author: ericv@google.com (Eric Veach)
 //
-// The earth modeled as a sphere.  There are lots of convenience
-// functions so that it doesn't take 2 lines of code just to do
-// a single conversion.
+// The earth modeled as a sphere.  There are lots of convenience functions so
+// that it doesn't take 2 lines of code just to do a single conversion.
 
 #ifndef S2_S2EARTH_H_
 #define S2_S2EARTH_H_
@@ -36,71 +35,83 @@ class S2Earth {
   // the Earth is modeled as a sphere; otherwise a given angle would
   // correspond to a range of distances depending on where the
   // corresponding line segment was located.
+
+  // Functions for converting distances to angles:
+  static constexpr S1Angle MetersToAngle(double meters);
+  static inline S1ChordAngle MetersToChordAngle(double meters);
+  static constexpr double MetersToRadians(double meters);
+
+  // Functions for converting angles to distances:
+  static constexpr double ToMeters(const S1Angle& angle);
+  static inline double ToMeters(const S1ChordAngle& cangle);
+  static constexpr double RadiansToMeters(double radians);
+
+  // Like the above, but where distances are expressed in kilometers:
+  static constexpr S1Angle KmToAngle(double km);
+  static inline S1ChordAngle KmToChordAngle(double km);
+  static constexpr double KmToRadians(double km);
+  static constexpr double ToKm(const S1Angle& angle);
+  static inline double ToKm(const S1ChordAngle& cangle);
+  static constexpr double RadiansToKm(double radians);
+
+  // Like the above, but where distances are expressed in util::units::Meters.
   //
-  // Note that you will lose precision if you use the ToDistance() method,
-  // since Meters is a single-precision type.  If you need more precision,
-  // use one of the direct conversion methods below.
-  inline static S1Angle ToAngle(const util::units::Meters& distance);
-  inline static S1ChordAngle ToChordAngle(const util::units::Meters& distance);
-  inline static util::units::Meters ToDistance(const S1Angle& angle);
-  inline static util::units::Meters ToDistance(const S1ChordAngle& cangle);
-
-  // Convenience functions.  These methods also return a double-precision
-  // result, unlike the generic ToDistance() method.
-  inline static double ToRadians(const util::units::Meters& distance);
-  inline static double ToMeters(const S1Angle& angle);
-  inline static double ToMeters(const S1ChordAngle& cangle);
-  inline static double ToKm(const S1Angle& angle);
-  inline static double ToKm(const S1ChordAngle& cangle);
-  inline static double KmToRadians(double km);
-  inline static double RadiansToKm(double radians);
-  inline static double MetersToRadians(double meters);
-  inline static double RadiansToMeters(double radians);
+  // CAVEAT: These versions are not as accurate because util::units::Meters
+  // uses "float" rather than "double" as the underlying representation.
+  // (You may lose precision if you use these functions.)
+  static constexpr S1Angle ToAngle(const util::units::Meters& distance);
+  static inline S1ChordAngle ToChordAngle(const util::units::Meters& distance);
+  static constexpr double ToRadians(const util::units::Meters& distance);
+  static constexpr util::units::Meters ToDistance(const S1Angle& angle);
+  static inline util::units::Meters ToDistance(const S1ChordAngle& cangle);
+  static constexpr util::units::Meters RadiansToDistance(double radians);
 
   // These functions convert between areas on the unit sphere
   // (as returned by the S2 library) and areas on the Earth's surface.
   // Note that the area of a region on the unit sphere is equal to the
   // solid angle it subtends from the sphere's center (measured in steradians).
-  inline static double SquareKmToSteradians(double km2);
-  inline static double SquareMetersToSteradians(double m2);
-  inline static double SteradiansToSquareKm(double steradians);
-  inline static double SteradiansToSquareMeters(double steradians);
+  static constexpr double SquareKmToSteradians(double km2);
+  static constexpr double SquareMetersToSteradians(double m2);
+  static constexpr double SteradiansToSquareKm(double steradians);
+  static constexpr double SteradiansToSquareMeters(double steradians);
 
-  // Convenience function for the frequent case where you need to call
+  // Convenience functions for the frequent case where you need to call
   // ToRadians in order to convert an east-west distance on the globe to
   // radians. The output is a function of how close to the poles you are
   // (i.e. at the bulge at the equator, one unit of longitude represents a
   // much farther distance). The function will never return more than 2*PI
   // radians, even if you're trying to go 100 million miles west at the north
   // pole.
+  static double MetersToLongitudeRadians(double meters,
+                                         double latitude_radians);
+  static double KmToLongitudeRadians(double km, double latitude_radians);
+
+  // CAVEAT: This version is not as accurate because util::units::Meters
+  // uses "float" rather than "double" as the underlying representation.
   static double ToLongitudeRadians(const util::units::Meters& distance,
                                    double latitude_radians);
 
   // Computes the initial bearing from a to b. This is the bearing an observer
   // at point a has when facing point b. A bearing of 0 degrees is north, and it
   // increases clockwise (90 degrees is east, etc).
+  //
   // If a == b, a == -b, or a is one of the Earths' poles, the return value is
   // undefined.
   static S1Angle GetInitialBearing(const S2LatLng& a, const S2LatLng& b);
 
-  // Returns the distance between two points.  Example:
-  // double miles = Miles(geostore::S2Earth::GetDistance(a, b)).value();
-  //
-  // Note that these methods only have single-precision accuracy, since
-  // Meters is a single-precision type.  If you ned more precision, use one
-  // of the methods below.
-  inline static util::units::Meters GetDistance(const S2Point& a,
+  // Returns the distance between two points.
+  static inline double GetDistanceMeters(const S2Point& a, const S2Point& b);
+  static inline double GetDistanceMeters(const S2LatLng& a, const S2LatLng& b);
+  static inline double GetDistanceKm(const S2Point& a, const S2Point& b);
+  static inline double GetDistanceKm(const S2LatLng& a, const S2LatLng& b);
+
+  // CAVEAT: These versions are not as accurate because util::units::Meters
+  // uses "float" rather than "double" as the underlying representation.
+  static inline util::units::Meters GetDistance(const S2Point& a,
                                                 const S2Point& b);
-  inline static util::units::Meters GetDistance(const S2LatLng& a,
+  static inline util::units::Meters GetDistance(const S2LatLng& a,
                                                 const S2LatLng& b);
 
-  // Convenience functions.  These methods also return a double-precision
-  // result, unlike the generic GetDistance() method.
-  inline static double GetDistanceKm(const S2Point& a, const S2Point& b);
-  inline static double GetDistanceKm(const S2LatLng& a, const S2LatLng& b);
-  inline static double GetDistanceMeters(const S2Point& a, const S2Point& b);
-  inline static double GetDistanceMeters(const S2LatLng& a, const S2LatLng& b);
-
   // Returns the Earth's mean radius, which is the radius of the equivalent
   // sphere with the same surface area.  According to NASA, this value is
   // 6371.01 +/- 0.02 km.  The equatorial radius is 6378.136 km, and the polar
@@ -110,115 +121,120 @@ class S2Earth {
   // Yoder, C.F. 1995. "Astrometric and Geodetic Properties of Earth and the
   // Solar System" in Global Earth Physics, A Handbook of Physical Constants,
   // AGU Reference Shelf 1, American Geophysical Union, Table 2.
-  inline static util::units::Meters Radius();
+  static constexpr double RadiusMeters();
+  static constexpr double RadiusKm();
+
+  // CAVEAT: This version is not as accurate because util::units::Meters
+  // uses "float" rather than "double" as the underlying representation.
+  static constexpr util::units::Meters Radius();
 
   // Convenience functions.
-  inline static double RadiusKm();
-  inline static double RadiusMeters();
 
   // Returns the altitude of the lowest known point on Earth. The lowest known
   // point on Earth is the Challenger Deep with an altitude of -10898 meters
   // above the surface of the spherical earth.
-  inline static util::units::Meters LowestAltitude();
-
-  // Convenience functions.
-  inline static double LowestAltitudeKm();
-  inline static double LowestAltitudeMeters();
+  static constexpr double LowestAltitudeMeters();
 
   // Returns the altitude of the highest known point on Earth. The highest
   // known point on Earth is Mount Everest with an altitude of 8846 meters
   // above the surface of the spherical earth.
-  inline static util::units::Meters HighestAltitude();
-
-  // Convenience functions.
-  inline static double HighestAltitudeKm();
-  inline static double HighestAltitudeMeters();
+  static constexpr double HighestAltitudeMeters();
 };
 
-inline S1Angle S2Earth::ToAngle(const util::units::Meters& distance) {
-  return S1Angle::Radians(ToRadians(distance));
+constexpr S1Angle S2Earth::MetersToAngle(double meters) {
+  return S1Angle::Radians(MetersToRadians(meters));
 }
 
-inline S1ChordAngle S2Earth::ToChordAngle(const util::units::Meters& distance) {
-  return S1ChordAngle(ToAngle(distance));
+inline S1ChordAngle S2Earth::MetersToChordAngle(double meters) {
+  return S1ChordAngle(MetersToAngle(meters));
 }
 
-inline util::units::Meters S2Earth::ToDistance(const S1Angle& angle) {
-  return util::units::Meters(ToMeters(angle));
+constexpr double S2Earth::MetersToRadians(double meters) {
+  return meters / RadiusMeters();
 }
 
-inline util::units::Meters S2Earth::ToDistance(const S1ChordAngle& cangle) {
-  return util::units::Meters(ToMeters(cangle));
+constexpr double S2Earth::ToMeters(const S1Angle& angle) {
+  return angle.radians() * RadiusMeters();
 }
 
-inline double S2Earth::ToRadians(const util::units::Meters& distance) {
-  return distance.value() / RadiusMeters();
+inline double S2Earth::ToMeters(const S1ChordAngle& cangle) {
+  return ToMeters(cangle.ToAngle());
 }
 
-inline double S2Earth::ToMeters(const S1Angle& angle) {
-  return angle.radians() * RadiusMeters();
+constexpr double S2Earth::RadiansToMeters(double radians) {
+  return radians * RadiusMeters();
 }
 
-inline double S2Earth::ToKm(const S1Angle& angle) {
-  return angle.radians() * RadiusKm();
+constexpr S1Angle S2Earth::KmToAngle(double km) {
+  return S1Angle::Radians(KmToRadians(km));
 }
 
-inline double S2Earth::ToMeters(const S1ChordAngle& cangle) {
-  return ToMeters(cangle.ToAngle());
+inline S1ChordAngle S2Earth::KmToChordAngle(double km) {
+  return S1ChordAngle(KmToAngle(km));
+}
+
+constexpr double S2Earth::KmToRadians(double km) { return km / RadiusKm(); }
+
+constexpr double S2Earth::ToKm(const S1Angle& angle) {
+  return angle.radians() * RadiusKm();
 }
 
 inline double S2Earth::ToKm(const S1ChordAngle& cangle) {
   return ToKm(cangle.ToAngle());
 }
 
-inline double S2Earth::KmToRadians(double km) {
-  return km / RadiusKm();
+constexpr double S2Earth::RadiansToKm(double radians) {
+  return radians * RadiusKm();
 }
 
-inline double S2Earth::RadiansToKm(double radians) {
-  return radians * RadiusKm();
+constexpr S1Angle S2Earth::ToAngle(const util::units::Meters& distance) {
+  return S1Angle::Radians(ToRadians(distance));
 }
 
-inline double S2Earth::MetersToRadians(double meters) {
-  return meters / RadiusMeters();
+inline S1ChordAngle S2Earth::ToChordAngle(const util::units::Meters& distance) {
+  return S1ChordAngle(ToAngle(distance));
 }
 
-inline double S2Earth::RadiansToMeters(double radians) {
-  return radians * RadiusMeters();
+constexpr double S2Earth::ToRadians(const util::units::Meters& distance) {
+  return distance.value() / RadiusMeters();
 }
 
-inline double S2Earth::SquareKmToSteradians(double km2) {
-  return km2 / (RadiusKm() * RadiusKm());
+constexpr util::units::Meters S2Earth::ToDistance(const S1Angle& angle) {
+  return util::units::Meters(ToMeters(angle));
 }
 
-inline double S2Earth::SquareMetersToSteradians(double m2) {
-  return m2 / (RadiusMeters() * RadiusMeters());
+inline util::units::Meters S2Earth::ToDistance(const S1ChordAngle& cangle) {
+  return util::units::Meters(ToMeters(cangle));
 }
 
-inline double S2Earth::SteradiansToSquareKm(double steradians) {
-  return steradians * RadiusKm() * RadiusKm();
+constexpr util::units::Meters S2Earth::RadiansToDistance(double radians) {
+  return util::units::Meters(RadiansToMeters(radians));
 }
 
-inline double S2Earth::SteradiansToSquareMeters(double steradians) {
-  return steradians * RadiusMeters() * RadiusMeters();
+constexpr double S2Earth::SquareKmToSteradians(double km2) {
+  return km2 / (RadiusKm() * RadiusKm());
 }
 
-inline util::units::Meters S2Earth::GetDistance(const S2Point& a,
-                                                const S2Point& b) {
-  return ToDistance(S1Angle(a, b));
+constexpr double S2Earth::SquareMetersToSteradians(double m2) {
+  return m2 / (RadiusMeters() * RadiusMeters());
 }
 
-inline util::units::Meters S2Earth::GetDistance(const S2LatLng& a,
-                                                const S2LatLng& b) {
-  return ToDistance(a.GetDistance(b));
+constexpr double S2Earth::SteradiansToSquareKm(double steradians) {
+  return steradians * RadiusKm() * RadiusKm();
 }
 
-inline double S2Earth::GetDistanceKm(const S2Point& a, const S2Point& b) {
-  return RadiansToKm(a.Angle(b));
+constexpr double S2Earth::SteradiansToSquareMeters(double steradians) {
+  return steradians * RadiusMeters() * RadiusMeters();
 }
 
-inline double S2Earth::GetDistanceKm(const S2LatLng& a, const S2LatLng& b) {
-  return ToKm(a.GetDistance(b));
+inline double S2Earth::KmToLongitudeRadians(double km,
+                                            double latitude_radians) {
+  return MetersToLongitudeRadians(1000 * km, latitude_radians);
+}
+
+inline double S2Earth::ToLongitudeRadians(const util::units::Meters& distance,
+                                          double latitude_radians) {
+  return MetersToLongitudeRadians(distance.value(), latitude_radians);
 }
 
 inline double S2Earth::GetDistanceMeters(const S2Point& a, const S2Point& b) {
@@ -229,40 +245,34 @@ inline double S2Earth::GetDistanceMeters(const S2LatLng& a, const S2LatLng& b) {
   return ToMeters(a.GetDistance(b));
 }
 
-inline util::units::Meters S2Earth::Radius() {
-  return util::units::Meters(RadiusMeters());
+inline double S2Earth::GetDistanceKm(const S2Point& a, const S2Point& b) {
+  return RadiansToKm(a.Angle(b));
 }
 
-inline double S2Earth::RadiusKm() {
-  return 0.001 * RadiusMeters();
+inline double S2Earth::GetDistanceKm(const S2LatLng& a, const S2LatLng& b) {
+  return ToKm(a.GetDistance(b));
 }
 
-inline double S2Earth::RadiusMeters() {
-  return 6371010.0;
+inline util::units::Meters S2Earth::GetDistance(const S2Point& a,
+                                                const S2Point& b) {
+  return RadiansToDistance(a.Angle(b));
 }
 
-inline util::units::Meters S2Earth::LowestAltitude() {
-  return util::units::Meters(LowestAltitudeMeters());
+inline util::units::Meters S2Earth::GetDistance(const S2LatLng& a,
+                                                const S2LatLng& b) {
+  return ToDistance(a.GetDistance(b));
 }
 
-inline double S2Earth::LowestAltitudeKm() {
-  return 0.001 * LowestAltitudeMeters();
-}
+constexpr double S2Earth::RadiusMeters() { return 6371010.0; }
 
-inline double S2Earth::LowestAltitudeMeters() {
-  return -10898;
-}
+constexpr double S2Earth::RadiusKm() { return 0.001 * RadiusMeters(); }
 
-inline util::units::Meters S2Earth::HighestAltitude() {
-  return util::units::Meters(HighestAltitudeMeters());
+constexpr util::units::Meters S2Earth::Radius() {
+  return util::units::Meters(RadiusMeters());
 }
 
-inline double S2Earth::HighestAltitudeKm() {
-  return 0.001 * HighestAltitudeMeters();
-}
+constexpr double S2Earth::LowestAltitudeMeters() { return -10898; }
 
-inline double S2Earth::HighestAltitudeMeters() {
-  return 8846;
-}
+constexpr double S2Earth::HighestAltitudeMeters() { return 8846; }
 
 #endif  // S2_S2EARTH_H_
diff --git a/src/s2/s2edge_clipping.cc b/src/s2/s2edge_clipping.cc
index 69380ecd..ae3e38d9 100644
--- a/src/s2/s2edge_clipping.cc
+++ b/src/s2/s2edge_clipping.cc
@@ -17,14 +17,17 @@
 
 #include "s2/s2edge_clipping.h"
 
+#include <algorithm>
 #include <cfloat>
 #include <cmath>
 
-#include "s2/base/logging.h"
 #include "s2/r1interval.h"
+#include "s2/r2.h"
+#include "s2/r2rect.h"
 #include "s2/s2coords.h"
+#include "s2/s2edge_crossings.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
-#include "s2/util/math/vector.h"
 
 namespace S2 {
 
@@ -32,14 +35,6 @@ using std::fabs;
 using std::max;
 using std::min;
 
-// Error constant definitions.  See the header file for details.
-const double kFaceClipErrorRadians = 3 * DBL_EPSILON;
-const double kFaceClipErrorUVDist = 9 * DBL_EPSILON;
-const double kFaceClipErrorUVCoord = 9 * M_SQRT1_2 * DBL_EPSILON;
-const double kIntersectsRectErrorUVDist = 3 * M_SQRT2 * DBL_EPSILON;
-const double kEdgeClipErrorUVCoord = 2.25 * DBL_EPSILON;
-const double kEdgeClipErrorUVDist = 2.25 * DBL_EPSILON;
-
 // S2PointUVW is used to document that a given S2Point is expressed in the
 // (u,v,w) coordinates of some cube face.
 using S2PointUVW = S2Point;
@@ -353,12 +348,6 @@ bool ClipToPaddedFace(const S2Point& a_xyz, const S2Point& b_xyz, int face,
   S2PointUVW scaled_n(scale_uv * n[0], scale_uv * n[1], n[2]);
   if (!IntersectsFace(scaled_n)) return false;
 
-  // TODO(ericv): This is a temporary hack until I rewrite S2::RobustCrossProd;
-  // it avoids loss of precision in Normalize() when the vector is so small
-  // that it underflows.
-  if (max(fabs(n[0]), max(fabs(n[1]), fabs(n[2]))) < ldexp(1.0, -511)) {
-    n *= ldexp(1.0, 563);
-  }  // END OF HACK
   n = n.Normalize();
   S2PointUVW a_tangent = n.CrossProd(a);
   S2PointUVW b_tangent = b.CrossProd(n);
diff --git a/src/s2/s2edge_clipping.h b/src/s2/s2edge_clipping.h
index 4ce308b2..0b7ab3c0 100644
--- a/src/s2/s2edge_clipping.h
+++ b/src/s2/s2edge_clipping.h
@@ -28,6 +28,7 @@
 #ifndef S2_S2EDGE_CLIPPING_H_
 #define S2_S2EDGE_CLIPPING_H_
 
+#include <cfloat>
 #include <cmath>
 
 #include "s2/base/logging.h"
@@ -56,8 +57,7 @@ using FaceSegmentVector = absl::InlinedVector<FaceSegment, 6>;
 // from A to B, and that all vertices are within kFaceClipErrorUVDist of the
 // line AB.  All vertices lie within the [-1,1]x[-1,1] cube face rectangles.
 // The results are consistent with s2pred::Sign(), i.e. the edge is
-// well-defined even its endpoints are antipodal.  [TODO(ericv): Extend the
-// implementation of S2::RobustCrossProd so that this statement is true.]
+// well-defined even its endpoints are antipodal.
 void GetFaceSegments(const S2Point& a, const S2Point& b,
                      FaceSegmentVector* segments);
 
@@ -65,8 +65,12 @@ void GetFaceSegments(const S2Point& a, const S2Point& b,
 // of AB that intersects that face.  This method guarantees that the clipped
 // vertices lie within the [-1,1]x[-1,1] cube face rectangle and are within
 // kFaceClipErrorUVDist of the line AB, but the results may differ from
-// those produced by GetFaceSegments.  Returns false if AB does not
-// intersect the given face.
+// those produced by GetFaceSegments.
+//
+// Returns false if AB does not intersect the given face.
+//
+// The test for face intersection is exact, so if this function returns false
+// then the edge definitively does not intersect the face.
 bool ClipToFace(const S2Point& a, const S2Point& b, int face,
                 R2Point* a_uv, R2Point* b_uv);
 
@@ -92,9 +96,9 @@ bool ClipToPaddedFace(const S2Point& a, const S2Point& b, int face,
 //    returned vertex there is a point on the exact edge AB whose u- and
 //    v-coordinates differ from the vertex by at most this amount.
 
-extern const double kFaceClipErrorRadians;
-extern const double kFaceClipErrorUVDist;
-extern const double kFaceClipErrorUVCoord;
+constexpr double kFaceClipErrorRadians = 3 * DBL_EPSILON;
+constexpr double kFaceClipErrorUVDist = 9 * DBL_EPSILON;
+constexpr double kFaceClipErrorUVCoord = 9 * M_SQRT1_2 * DBL_EPSILON;
 
 // Returns true if the edge AB intersects the given (closed) rectangle to
 // within the error bound below.
@@ -106,7 +110,7 @@ bool IntersectsRect(const R2Point& a, const R2Point& b, const R2Rect& rect);
 // the result is guaranteed to be false.  This bound assumes that "rect" is
 // a subset of the rectangle [-1,1]x[-1,1] or extends slightly outside it
 // (e.g., by 1e-10 or less).
-extern const double kIntersectsRectErrorUVDist;
+constexpr double kIntersectsRectErrorUVDist = 3 * M_SQRT2 * DBL_EPSILON;
 
 // Given an edge AB, returns the portion of AB that is contained by the given
 // rectangle "clip".  Returns false if there is no intersection.
@@ -145,16 +149,21 @@ bool ClipEdgeBound(const R2Point& a, const R2Point& b,
 //    the corresponding exact result.  It is equal to the error in a single
 //    coordinate because at most one coordinate is subject to error.
 
-extern const double kEdgeClipErrorUVCoord;
-extern const double kEdgeClipErrorUVDist;
+constexpr double kEdgeClipErrorUVCoord = 2.25 * DBL_EPSILON;
+constexpr double kEdgeClipErrorUVDist = 2.25 * DBL_EPSILON;
 
 // Given a value x that is some linear combination of a and b, returns the
 // value x1 that is the same linear combination of a1 and b1.  This function
 // makes the following guarantees:
 //  - If x == a, then x1 = a1 (exactly).
 //  - If x == b, then x1 = b1 (exactly).
-//  - If a <= x <= b, then a1 <= x1 <= b1 (even if a1 == b1).
+//  - If a <= x <= b and a1 <= b1, then a1 <= x1 <= b1 (even if a1 == b1).
+//  - More generally, if x is between a and b, then x1 is between a1 and b1.
 // REQUIRES: a != b
+//
+// When a <= x <= b or b <= x <= a we can prove the error bound on the resulting
+// value is 2.25*DBL_EPSILON.  The error for extrapolating an x value outside of
+// a and b can be much worse.  See the gappa proof at the end of the file.
 double InterpolateDouble(double x, double a, double b, double a1, double b1);
 
 
@@ -168,16 +177,71 @@ inline bool ClipToFace(const S2Point& a, const S2Point& b, int face,
 
 inline double InterpolateDouble(double x, double a, double b,
                                 double a1, double b1) {
+  // If A == B == X all we can return is the single point.
+  if (a == b) {
+    S2_DCHECK(x == a && a1 == b1);
+    return a1;
+  }
+
   S2_DCHECK_NE(a, b);
   // To get results that are accurate near both A and B, we interpolate
   // starting from the closer of the two points.
   if (std::fabs(a - x) <= std::fabs(b - x)) {
-    return a1 + (b1 - a1) * (x - a) / (b - a);
+    return a1 + (b1 - a1) * ((x - a) / (b - a));
   } else {
-    return b1 + (a1 - b1) * (x - b) / (a - b);
+    return b1 + (a1 - b1) * ((x - b) / (a - b));
   }
 }
 
+// Gappa proof of bounds for InterpolateDouble
+//
+// NOTE: this proof is only valid for a <= x <= b or b <= x <= a, not for
+// extrapolating values outside of the input range.
+// -----------------------------------------------------------------------------
+//
+// # Use IEEE754 double precision, round-to-nearest by default.
+// @rnd = float<ieee_64, ne>;
+//
+// # Define values to be floating point numbers (rounded reals).
+// x  = rnd(x_ex);
+// a  = rnd(a_ex);
+// b  = rnd(b_ex);
+// a1 = rnd(a1_ex);
+// b1 = rnd(b1_ex);
+//
+// # Compute answer in floating point and exact arithmetic.
+// InterpolateDouble_fp rnd = a1 + (b1-a1)*((x-a)/(b-a));
+// InterpolateDouble_ex     = a1 + (b1-a1)*((x-a)/(b-a));
+//
+// {
+//   # We operate in UV space so inputs are always in [-1,1].
+//   |x|  in [0,1] /\
+//   |a|  in [0,1] /\
+//   |b|  in [0,1] /\
+//   |a1| in [0,1] /\
+//   |b1| in [0,1] /\
+//
+//   # b != a is asserted by the algorithm.
+//   b-a <> 0 /\
+//
+//   # Either a <= x <= b or b <= x <= a, and we either do (x-a) or (x-b)
+//   # depending on which endpoint is closer to x.  So the ratio (x-a)/(b-a) can
+//   # only be up to one half of the total interval before we switch.
+//   rnd(x-a)/rnd(b-a) in [0,0.5]
+//
+//   # Estimate absolute error.
+//   -> InterpolateDouble_fp - InterpolateDouble_ex in ?
+// }
+//
+// -----------------------------------------------------------------------------
+// > gappa interpolate.gappa
+// Results:
+//   InterpolateDouble_fp - InterpolateDouble_ex in
+//       [-324259173170675769b-109 {-4.996e-16, -2^(-50.8301)},
+//         324259173170675769b-109 {+4.996e-16, +2^(-50.8301)}]
+//
+// 324259173170675769*2**-109/DBL_EPSILON == 2.25
+
 }  // namespace S2
 
 #endif  // S2_S2EDGE_CLIPPING_H_
diff --git a/src/s2/s2edge_crosser.cc b/src/s2/s2edge_crosser.cc
index 53fe48cd..5d42674a 100644
--- a/src/s2/s2edge_crosser.cc
+++ b/src/s2/s2edge_crosser.cc
@@ -17,11 +17,16 @@
 
 #include "s2/s2edge_crosser.h"
 
-#include "s2/base/logging.h"
-#include "s2/s2pointutil.h"
+#include <cfloat>
+#include <cmath>
+
+#include "s2/s2edge_crossings.h"
+#include "s2/s2edge_crossings_internal.h"
+#include "s2/s2point.h"
 #include "s2/s2predicates.h"
 
-int S2EdgeCrosser::CrossingSignInternal(const S2Point* d) {
+template <class PointRep>
+int S2EdgeCrosserBase<PointRep>::CrossingSignInternal(PointRep d) {
   // Compute the actual result, and then save the current vertex D as the next
   // vertex C, and save the orientation of the next triangle ACB (which is
   // opposite to the current triangle BDA).
@@ -31,17 +36,23 @@ int S2EdgeCrosser::CrossingSignInternal(const S2Point* d) {
   return result;
 }
 
-inline int S2EdgeCrosser::CrossingSignInternal2(const S2Point& d) {
-  // At this point, a very common situation is that A,B,C,D are four points on
-  // a line such that AB does not overlap CD.  (For example, this happens when
-  // a line or curve is sampled finely, or when geometry is constructed by
-  // computing the union of S2CellIds.)  Most of the time, we can determine
-  // that AB and CD do not intersect by computing the two outward-facing
-  // tangents at A and B (parallel to AB) and testing whether AB and CD are on
-  // opposite sides of the plane perpendicular to one of these tangents.  This
-  // is moderately expensive but still much cheaper than s2pred::ExpensiveSign.
+template <class PointRep>
+inline int S2EdgeCrosserBase<PointRep>::CrossingSignInternal2(
+    const S2Point& d) {
+  // At this point it is still very likely that CD does not cross AB.  Two
+  // common situations are (1) CD crosses the great circle through AB but does
+  // not cross AB itself, or (2) A,B,C,D are four points on a line such that
+  // AB does not overlap CD.  For example, the latter happens when a line or
+  // curve is sampled finely, or when geometry is constructed by computing the
+  // union of S2CellIds.
+  //
+  // Most of the time, we can determine that AB and CD do not intersect by
+  // computing the two outward-facing tangents at A and B (parallel to AB) and
+  // testing whether AB and CD are on opposite sides of the plane perpendicular
+  // to one of these tangents.  This is somewhat expensive but still much
+  // cheaper than s2pred::ExpensiveSign.
   if (!have_tangents_) {
-    S2Point norm = S2::RobustCrossProd(*a_, *b_).Normalize();
+    S2Point norm = S2::RobustCrossProd(*a_, *b_);
     a_tangent_ = a_->CrossProd(norm);
     b_tangent_ = norm.CrossProd(*b_);
     have_tangents_ = true;
@@ -52,7 +63,7 @@ inline int S2EdgeCrosser::CrossingSignInternal2(const S2Point& d) {
   // DotProd() below is DBL_EPSILON.  (There is also a small relative error
   // term that is insignificant because we are comparing the result against a
   // constant that is very close to zero.)
-  static const double kError = (1.5 + 1/sqrt(3.0)) * DBL_EPSILON;
+  static const double kError = (1.5 + 1/sqrt(3)) * DBL_EPSILON;
   if ((c_->DotProd(a_tangent_) > kError && d.DotProd(a_tangent_) > kError) ||
       (c_->DotProd(b_tangent_) > kError && d.DotProd(b_tangent_) > kError)) {
     return -1;
@@ -83,3 +94,8 @@ inline int S2EdgeCrosser::CrossingSignInternal2(const S2Point& d) {
   S2_DCHECK_NE(dac, 0);
   return (dac != acb_) ? -1 : 1;
 }
+
+// Explicitly instantiate the classes we need so that the methods above can be
+// omitted from the .h file (and to reduce compilation time).
+template class S2EdgeCrosserBase<S2::internal::S2Point_PointerRep>;
+template class S2EdgeCrosserBase<S2::internal::S2Point_ValueRep>;
diff --git a/src/s2/s2edge_crosser.h b/src/s2/s2edge_crosser.h
index c2005e67..0f2fb9a4 100644
--- a/src/s2/s2edge_crosser.h
+++ b/src/s2/s2edge_crosser.h
@@ -21,14 +21,15 @@
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s2edge_crossings.h"
+#include "s2/s2edge_crossings_internal.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2predicates.h"
 
-class S2CopyingEdgeCrosser;  // Forward declaration
-
-// This class allows edges to be efficiently tested for intersection with a
-// given fixed edge AB.  It is especially efficient when testing for
-// intersection with an edge chain connecting vertices v0, v1, v2, ...
+// This file defines two classes S2EdgeCrosser and S2CopyingEdgeCrosser that
+// allow edges to be efficiently tested for intersection with a given fixed
+// edge AB.  They are especially efficient when testing for intersection with
+// an edge chain connecting vertices v0, v1, v2, ...
 //
 // Example usage:
 //
@@ -44,28 +45,62 @@ class S2CopyingEdgeCrosser;  // Forward declaration
 //     return count;
 //   }
 //
-// This class expects that the client already has all the necessary vertices
-// stored in memory, so that this class can refer to them with pointers and
-// does not need to make its own copies.  If this is not the case (e.g., you
-// want to pass temporary objects as vertices), see S2CopyingEdgeCrosser.
-class S2EdgeCrosser {
+// The example above uses S2EdgeCrosser, which requires that the client
+// already has all the necessary vertices stored in memory so that this class
+// can refer to them with pointers and does not need to make its own copies.
+// If this is not the case (e.g., you want to pass temporary objects as
+// vertices) then you should use S2CopyingEdgeCrosser, which has exactly the
+// same API except that vertices are passed by const reference and do not need
+// to persist.
+//
+// The class below is instantiated twice:
+//
+//  - For S2EdgeCrosser, ArgType is (const S2Point*) and all points must be
+//    stored persistently by the client.
+//
+//  - For S2CopyingEdgeCrosser, ArgType is (const S2Point&) and points may
+//    be temporary objects (since this class makes its own copies).
+//
+// Note that S2EdgeCrosser is 5-10% faster in real applications when its
+// requirements can be met.  (Also note that simple benchmarks are not
+// sufficient to measure this performance difference; it seems to have
+// something to do with cache performance.)
+template <class PointRep>
+class S2EdgeCrosserBase {
+ private:
+  using ArgType = typename PointRep::T;
+
  public:
   // Default constructor; must be followed by a call to Init().
-  S2EdgeCrosser() {}
+  S2EdgeCrosserBase() = default;
 
   // Convenience constructor that calls Init() with the given fixed edge AB.
-  // The arguments "a" and "b" must point to values that persist for the
-  // lifetime of the S2EdgeCrosser object (or until the next Init() call).
-  S2EdgeCrosser(const S2Point* a, const S2Point* b);
+  //
+  // For S2EdgeCrosser (only), the arguments "a" and "b" must point to values
+  // that persist for the lifetime of the S2EdgeCrosser object.
+  //
+  // S2EdgeCrosser(const S2Point* a, const S2Point* b);
+  // S2CopyingEdgeCrosser(const S2Point& a, const S2Point& b);
+  S2EdgeCrosserBase(ArgType a, ArgType b);
 
   // Accessors for the constructor arguments.
-  const S2Point* a() { return a_; }
-  const S2Point* b() { return b_; }
+  //
+  // const S2Point* S2EdgeCrosser::a();
+  // const S2Point* S2EdgeCrosser::b();
+  // const S2Point& S2CopyingEdgeCrosser::a();
+  // const S2Point& S2CopyingEdgeCrosser::b();
+  ArgType a() { return a_; }
+  ArgType b() { return b_; }
+
+  // Initialize the object with the given fixed edge AB.
+  //
+  // For S2EdgeCrosser (only), the arguments "a" and "b" must point to values
+  // that persist for the lifetime of the S2EdgeCrosser object.
 
-  // Initialize the S2EdgeCrosser with the given fixed edge AB.  The arguments
-  // "a" and "b" must point to values that persist for the lifetime of the
-  // S2EdgeCrosser object (or until the next Init() call).
-  void Init(const S2Point* a, const S2Point* b);
+  //
+  // void S2EdgeCrosser::Init(const S2Point* a, const S2Point* b);
+  // void S2CopyingEdgeCrosser::Init(const S2Point& a, const S2Point& b);
+  void Init(ArgType a, ArgType b);
 
   // This function determines whether the edge AB intersects the edge CD.
   // Returns +1 if AB crosses CD at a point that is interior to both edges.
@@ -93,8 +128,12 @@ class S2EdgeCrosser {
   // it is slightly more efficient to use the single-argument version of
   // CrossingSign below.
   //
-  // The arguments must point to values that persist until the next call.
-  int CrossingSign(const S2Point* c, const S2Point* d);
+  // For S2EdgeCrosser (only), the arguments must point to values that persist
+  // until the next call.
+  //
+  // int S2EdgeCrosser::CrossingSign(const S2Point* c, const S2Point* d);
+  // int S2CopyingEdgeCrosser::CrossingSign(const S2Point& c, const S2Point& d);
+  int CrossingSign(ArgType c, ArgType d);
 
   // This method extends the concept of a "crossing" to the case where AB
   // and CD have a vertex in common.  The two edges may or may not cross,
@@ -106,8 +145,38 @@ class S2EdgeCrosser {
   // Returns true if CrossingSign(c, d) > 0, or AB and CD share a vertex
   // and VertexCrossing(a, b, c, d) returns true.
   //
+  // For S2EdgeCrosser (only), the arguments must point to values that persist
+  // until the next call.
+  //
+  // bool S2EdgeCrosser::EdgeOrVertexCrossing(const S2Point* c,
+  //                                          const S2Point* d);
+  // bool S2CopyingEdgeCrosser::EdgeOrVertexCrossing(const S2Point& c,
+  //                                                 const S2Point& d);
+  bool EdgeOrVertexCrossing(ArgType c, ArgType d);
+
+  // Like EdgeOrVertexCrossing() but returns -1 if AB crosses CD from left to
+  // right, +1 if AB crosses CD from right to left, and 0 otherwise.  This
+  // implies that if CD bounds some region according to the "interior is on
+  // the left" rule, this function returns -1 when AB exits the region and +1
+  // when AB enters.
+  //
+  // This method allows computing the change in winding number from point A to
+  // point B by summing the signed edge crossings of AB with the edges of the
+  // loop(s) used to define the winding number.
+  //
   // The arguments must point to values that persist until the next call.
-  bool EdgeOrVertexCrossing(const S2Point* c, const S2Point* d);
+  //
+  // int S2EdgeCrosser::SignedEdgeOrVertexCrossing(const S2Point* c,
+  //                                               const S2Point* d);
+  // int S2CopyingEdgeCrosser::SignedEdgeOrVertexCrossing(const S2Point& c,
+  //                                                      const S2Point& d);
+  int SignedEdgeOrVertexCrossing(ArgType c, ArgType d);
+
+  // If the preceding call to CrossingSign() returned +1 (indicating that the
+  // edge crosses the edge CD), this method returns -1 if AB crossed CD from
+  // left to right and +1 if AB crossed CD from right to left.  Otherwise its
+  // return value is undefined.
+  int last_interior_crossing_sign() const;
 
   ///////////////////////// Edge Chain Methods ///////////////////////////
   //
@@ -125,45 +194,70 @@ class S2EdgeCrosser {
   // Convenience constructor that uses AB as the fixed edge, and C as the
   // first vertex of the vertex chain (equivalent to calling RestartAt(c)).
   //
-  // The arguments must point to values that persist until the next call.
-  S2EdgeCrosser(const S2Point* a, const S2Point* b, const S2Point* c);
+  // For S2EdgeCrosser (only), the arguments must point to values that persist
+  // until the next call.
+  //
+  // S2EdgeCrosser(S2Point const* a, S2Point const* b, S2Point const* c);
+  // S2CopyingEdgeCrosser(S2Point const& a, S2Point const& b, S2Point const& c);
+  S2EdgeCrosserBase(ArgType a, ArgType b, ArgType c);
 
   // Call this method when your chain 'jumps' to a new place.
-  // The argument must point to a value that persists until the next call.
-  void RestartAt(const S2Point* c);
+  //
+  // For S2EdgeCrosser (only), the argument must point to a value that
+  // persists until the next call.
+  //
+  // void S2EdgeCrosser::RestartAt(S2Point const* c);
+  // void S2CopyingEdgeCrosser::RestartAt(S2Point const& c);
+  void RestartAt(ArgType c);
+
+  // Like CrossingSign above, but uses the last vertex passed to one of the
+  // crossing methods (or RestartAt) as the first vertex of the current edge.
+  //
+  // For S2EdgeCrosser (only), the argument must point to a value that
+  // persists until the next call.
+  //
+  // int S2EdgeCrosser::CrossingSign(S2Point const* d);
+  // int S2CopyingEdgeCrosser::CrossingSign(S2Point const& d);
+  int CrossingSign(ArgType d);
 
-  // Like CrossingSign above, but uses the last vertex passed to one of
-  // the crossing methods (or RestartAt) as the first vertex of the current
-  // edge.
+  // Like EdgeOrVertexCrossing above, but uses the last vertex passed to one
+  // of the crossing methods (or RestartAt) as the first vertex of the
+  // current edge.
+  //
+  // For S2EdgeCrosser (only), the argument must point to a value that
+  // persists until the next call.
   //
-  // The argument must point to a value that persists until the next call.
-  int CrossingSign(const S2Point* d);
+  // bool S2EdgeCrosser::EdgeOrVertexCrossing(S2Point const* d);
+  // bool S2CopyingEdgeCrosser::EdgeOrVertexCrossing(S2Point const& d);
+  bool EdgeOrVertexCrossing(ArgType d);
 
   // Like EdgeOrVertexCrossing above, but uses the last vertex passed to one
   // of the crossing methods (or RestartAt) as the first vertex of the
   // current edge.
   //
-  // The argument must point to a value that persists until the next call.
-  bool EdgeOrVertexCrossing(const S2Point* d);
+  // For S2EdgeCrosser (only), the argument must point to a value that
+  // persists until the next call.
+  //
+  // int S2EdgeCrosser::SignedEdgeOrVertexCrossing(S2Point const* d);
+  // int S2CopyingEdgeCrosser::SignedEdgeOrVertexCrossing(S2Point const& d);
+  int SignedEdgeOrVertexCrossing(ArgType d);
 
-  // Returns the last vertex of the current edge chain being tested, i.e. the
-  // C vertex that will be used to construct the edge CD when one of the
+  // Returns the last vertex of the current edge chain being tested, i.e.
+  // the C vertex that will be used to construct the edge CD when one of the
   // methods above is called.
-  const S2Point* c() { return c_; }
+  //
+  // const S2Point* S2EdgeCrosser::c();
+  // const S2Point& S2CopyingEdgeCrosser::c();
+  ArgType c() { return c_; }
 
  private:
-  friend class S2CopyingEdgeCrosser;
-
   // These functions handle the "slow path" of CrossingSign().
-  int CrossingSignInternal(const S2Point* d);
+  int CrossingSignInternal(PointRep d);
   int CrossingSignInternal2(const S2Point& d);
 
-  // Used internally by S2CopyingEdgeCrosser.  Updates "c_" only.
-  void set_c(const S2Point* c) { c_ = c; }
-
   // The fields below are constant after the call to Init().
-  const S2Point* a_;
-  const S2Point* b_;
+  PointRep a_;
+  PointRep b_;
   Vector3_d a_cross_b_;
 
   // To reduce the number of calls to s2pred::ExpensiveSign(), we compute an
@@ -174,93 +268,103 @@ class S2EdgeCrosser {
   S2Point a_tangent_;   // Outward-facing tangent at A.
   S2Point b_tangent_;   // Outward-facing tangent at B.
 
-  // The fields below are updated for each vertex in the chain.
-  const S2Point* c_;       // Previous vertex in the vertex chain.
-  int acb_;                // The orientation of triangle ACB.
+  // The fields below are updated for each vertex in the chain.  acb_ is
+  // initialized to avoid undefined behavior in the case where the edge chain
+  // starts with the invalid point (0, 0, 0).
+  PointRep c_;          // Previous vertex in the vertex chain.
+  int acb_ = 0;         // The orientation of triangle ACB.
 
   // The field below is a temporary used by CrossingSignInternal().
-  int bda_;                // The orientation of triangle BDA.
+  int bda_;             // The orientation of triangle BDA.
 
-  S2EdgeCrosser(const S2EdgeCrosser&) = delete;
-  void operator=(const S2EdgeCrosser&) = delete;
+  S2EdgeCrosserBase(const S2EdgeCrosserBase&) = delete;
+  void operator=(const S2EdgeCrosserBase&) = delete;
 };
 
-// S2CopyingEdgeCrosser is exactly like S2EdgeCrosser, except that it makes its
+// S2EdgeCrosser implements the API above by using (const S2Point *) as the
+// argument type and requiring that all points are stored persistently by the
+// client.  If this is not the case, use S2CopyingEdgeCrosser (below).
+using S2EdgeCrosser = S2EdgeCrosserBase<S2::internal::S2Point_PointerRep>;
+
+// S2CopyingEdgeCrosser is exactly like S2EdgeCrosser except that it makes its
 // own copy of all arguments so that they do not need to persist between
 // calls.  This is less efficient, but makes it possible to use points that
 // are generated on demand and cannot conveniently be stored by the client.
-class S2CopyingEdgeCrosser {
- public:
-  // These methods are all exactly like S2EdgeCrosser, except that the
-  // arguments can be temporaries.
-  S2CopyingEdgeCrosser() {}
-  S2CopyingEdgeCrosser(const S2Point& a, const S2Point& b);
-  const S2Point& a() { return a_; }
-  const S2Point& b() { return b_; }
-  const S2Point& c() { return c_; }
-  void Init(const S2Point& a, const S2Point& b);
-  int CrossingSign(const S2Point& c, const S2Point& d);
-  bool EdgeOrVertexCrossing(const S2Point& c, const S2Point& d);
-  S2CopyingEdgeCrosser(const S2Point& a, const S2Point& b, const S2Point& c);
-  void RestartAt(const S2Point& c);
-  int CrossingSign(const S2Point& d);
-  bool EdgeOrVertexCrossing(const S2Point& d);
-
- private:
-  S2Point a_, b_, c_;
-  // TODO(ericv): It would be more efficient to implement S2CopyingEdgeCrosser
-  // directly rather than as a wrapper around S2EdgeCrosser.
-  S2EdgeCrosser crosser_;
-
-  S2CopyingEdgeCrosser(const S2CopyingEdgeCrosser&) = delete;
-  void operator=(const S2CopyingEdgeCrosser&) = delete;
-};
+using S2CopyingEdgeCrosser = S2EdgeCrosserBase<S2::internal::S2Point_ValueRep>;
 
 
 //////////////////   Implementation details follow   ////////////////////
 
 
-inline S2EdgeCrosser::S2EdgeCrosser(const S2Point* a, const S2Point* b)
+// Let the compiler know that these classes are explicitly instantiated in the
+// .cc file; this helps to reduce compilation time.
+extern template class S2EdgeCrosserBase<S2::internal::S2Point_PointerRep>;
+extern template class S2EdgeCrosserBase<S2::internal::S2Point_ValueRep>;
+
+template <class PointRep>
+inline S2EdgeCrosserBase<PointRep>::S2EdgeCrosserBase(ArgType a, ArgType b)
     : a_(a), b_(b), a_cross_b_(a_->CrossProd(*b_)), have_tangents_(false),
-      c_(nullptr) {
-  S2_DCHECK(S2::IsUnitLength(*a));
-  S2_DCHECK(S2::IsUnitLength(*b));
+      c_() {
+  S2_DCHECK(S2::IsUnitLength(*a_));
+  S2_DCHECK(S2::IsUnitLength(*b_));
 }
 
-inline void S2EdgeCrosser::Init(const S2Point* a, const S2Point* b) {
+template <class PointRep>
+inline void S2EdgeCrosserBase<PointRep>::Init(ArgType a, ArgType b) {
   a_ = a;
   b_ = b;
-  a_cross_b_ = a->CrossProd(*b_);
+  a_cross_b_ = a_->CrossProd(*b_);
   have_tangents_ = false;
-  c_ = nullptr;
+  c_ = PointRep();
 }
 
-inline int S2EdgeCrosser::CrossingSign(const S2Point* c, const S2Point* d) {
+template <class PointRep>
+inline int S2EdgeCrosserBase<PointRep>::CrossingSign(ArgType c, ArgType d) {
   if (c != c_) RestartAt(c);
   return CrossingSign(d);
 }
 
-inline bool S2EdgeCrosser::EdgeOrVertexCrossing(const S2Point* c,
-                                                const S2Point* d) {
+template <class PointRep>
+inline bool S2EdgeCrosserBase<PointRep>::EdgeOrVertexCrossing(ArgType c,
+                                                              ArgType d) {
   if (c != c_) RestartAt(c);
   return EdgeOrVertexCrossing(d);
 }
 
-inline S2EdgeCrosser::S2EdgeCrosser(
-    const S2Point* a, const S2Point* b, const S2Point* c)
+template <class PointRep>
+inline int S2EdgeCrosserBase<PointRep>::SignedEdgeOrVertexCrossing(ArgType c,
+                                                                   ArgType d) {
+  if (c != c_) RestartAt(c);
+  return SignedEdgeOrVertexCrossing(d);
+}
+
+template <class PointRep>
+inline int S2EdgeCrosserBase<PointRep>::last_interior_crossing_sign() const {
+  // When AB crosses CD, the crossing sign is Sign(ABC).  S2EdgeCrosser doesn't
+  // store this, but it does store the sign of the *next* triangle ACB.  These
+  // two values happen to be the same.
+  return acb_;
+}
+
+template <class PointRep>
+inline S2EdgeCrosserBase<PointRep>::S2EdgeCrosserBase(
+    ArgType a, ArgType b, ArgType c)
     : a_(a), b_(b), a_cross_b_(a_->CrossProd(*b_)), have_tangents_(false) {
-  S2_DCHECK(S2::IsUnitLength(*a));
-  S2_DCHECK(S2::IsUnitLength(*b));
+  S2_DCHECK(S2::IsUnitLength(*a_));
+  S2_DCHECK(S2::IsUnitLength(*b_));
   RestartAt(c);
 }
 
-inline void S2EdgeCrosser::RestartAt(const S2Point* c) {
-  S2_DCHECK(S2::IsUnitLength(*c));
+template <class PointRep>
+inline void S2EdgeCrosserBase<PointRep>::RestartAt(ArgType c) {
   c_ = c;
+  S2_DCHECK(S2::IsUnitLength(*c_));
   acb_ = -s2pred::TriageSign(*a_, *b_, *c_, a_cross_b_);
 }
 
-inline int S2EdgeCrosser::CrossingSign(const S2Point* d) {
+template <class PointRep>
+inline int S2EdgeCrosserBase<PointRep>::CrossingSign(ArgType d_arg) {
+  PointRep d(d_arg);
   S2_DCHECK(S2::IsUnitLength(*d));
   // For there to be an edge crossing, the triangles ACB, CBD, BDA, DAC must
   // all be oriented the same way (CW or CCW).  We keep the orientation of ACB
@@ -283,61 +387,24 @@ inline int S2EdgeCrosser::CrossingSign(const S2Point* d) {
   return CrossingSignInternal(d);
 }
 
-inline bool S2EdgeCrosser::EdgeOrVertexCrossing(const S2Point* d) {
+template <class PointRep>
+inline bool S2EdgeCrosserBase<PointRep>::EdgeOrVertexCrossing(ArgType d) {
   // We need to copy c_ since it is clobbered by CrossingSign().
-  const S2Point* c = c_;
+  PointRep c = c_;
   int crossing = CrossingSign(d);
   if (crossing < 0) return false;
   if (crossing > 0) return true;
-  return S2::VertexCrossing(*a_, *b_, *c, *d);
-}
-
-inline S2CopyingEdgeCrosser::S2CopyingEdgeCrosser(const S2Point& a,
-                                                  const S2Point& b)
-    : a_(a), b_(b), c_(S2Point()), crosser_(&a_, &b_) {
+  return S2::VertexCrossing(*a_, *b_, *c, *PointRep(d));
 }
 
-inline void S2CopyingEdgeCrosser::Init(const S2Point& a, const S2Point& b) {
-  a_ = a;
-  b_ = b;
-  c_ = S2Point();
-  crosser_.Init(&a_, &b_);
-}
-
-inline int S2CopyingEdgeCrosser::CrossingSign(const S2Point& c,
-                                              const S2Point& d) {
-  if (c != c_ || crosser_.c_ == nullptr) RestartAt(c);
-  return CrossingSign(d);
-}
-
-inline bool S2CopyingEdgeCrosser::EdgeOrVertexCrossing(
-    const S2Point& c, const S2Point& d) {
-  if (c != c_ || crosser_.c_ == nullptr) RestartAt(c);
-  return EdgeOrVertexCrossing(d);
-}
-
-inline S2CopyingEdgeCrosser::S2CopyingEdgeCrosser(
-    const S2Point& a, const S2Point& b, const S2Point& c)
-    : a_(a), b_(b), c_(c), crosser_(&a_, &b_, &c) {
-}
-
-inline void S2CopyingEdgeCrosser::RestartAt(const S2Point& c) {
-  c_ = c;
-  crosser_.RestartAt(&c_);
-}
-
-inline int S2CopyingEdgeCrosser::CrossingSign(const S2Point& d) {
-  int result = crosser_.CrossingSign(&d);
-  c_ = d;
-  crosser_.set_c(&c_);
-  return result;
-}
-
-inline bool S2CopyingEdgeCrosser::EdgeOrVertexCrossing(const S2Point& d) {
-  bool result = crosser_.EdgeOrVertexCrossing(&d);
-  c_ = d;
-  crosser_.set_c(&c_);
-  return result;
+template <class PointRep>
+inline int S2EdgeCrosserBase<PointRep>::SignedEdgeOrVertexCrossing(ArgType d) {
+  // We need to copy c_ since it is clobbered by CrossingSign().
+  PointRep c = c_;
+  int crossing = CrossingSign(d);
+  if (crossing < 0) return 0;
+  if (crossing > 0) return last_interior_crossing_sign();
+  return S2::SignedVertexCrossing(*a_, *b_, *c, *PointRep(d));
 }
 
 #endif  // S2_S2EDGE_CROSSER_H_
diff --git a/src/s2/s2edge_crossings.cc b/src/s2/s2edge_crossings.cc
index 95d69d20..d7752cb8 100644
--- a/src/s2/s2edge_crossings.cc
+++ b/src/s2/s2edge_crossings.cc
@@ -16,13 +16,17 @@
 // Author: ericv@google.com (Eric Veach)
 
 #include "s2/s2edge_crossings.h"
-#include "s2/s2edge_crossings_internal.h"
 
+#include <algorithm>
 #include <cmath>
+#include <limits>
+#include <utility>
 
-#include "s2/base/logging.h"
+#include "absl/strings/string_view.h"
 #include "s2/s1angle.h"
 #include "s2/s2edge_crosser.h"
+#include "s2/s2edge_crossings_internal.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2predicates.h"
 #include "s2/s2predicates_internal.h"
@@ -30,34 +34,46 @@
 
 namespace S2 {
 
+using absl::string_view;
 using internal::GetIntersectionExact;
-using internal::IntersectionMethod;
 using internal::intersection_method_tally_;
+using internal::IntersectionMethod;
+using S2::internal::GetStableCrossProd;
+using s2pred::DBL_ERR;
+using s2pred::kHasLongDouble;
+using s2pred::kSqrt3;
+using s2pred::rounding_epsilon;
+using s2pred::ToExact;
+using s2pred::ToLD;
 using std::fabs;
+using std::max;
 using std::sqrt;
 
-// All error bounds in this file are expressed in terms of the maximum
-// rounding error for a floating-point type.  The rounding error is half of
-// the numeric_limits<T>::epsilon() value.
-static constexpr double DBL_ERR = s2pred::rounding_epsilon<double>();
+using Vector3_ld = s2pred::Vector3_ld;
+using Vector3_xf = s2pred::Vector3_xf;
 
-// kIntersectionError can be set somewhat arbitrarily, because the algorithm
-// uses more precision when necessary in order to achieve the specified error.
-// The only strict requirement is that kIntersectionError >= 2 * DBL_ERR
-// radians.  However, using a larger error tolerance makes the algorithm more
-// efficient because it reduces the number of cases where exact arithmetic is
-// needed.
-const S1Angle kIntersectionError = S1Angle::Radians(8 * DBL_ERR);
+// kRobustCrossProdError can be set somewhat arbitrarily because the algorithm
+// uses more precision as needed in order to achieve the specified error.  The
+// only strict requirement is that kRobustCrossProdError >= DBL_ERR, since
+// this is the minimum error even when using exact arithmetic.  We set the
+// error somewhat larger than this so that virtually all cases can be handled
+// using ordinary double-precision arithmetic.
+static_assert(kRobustCrossProdError.radians() == 6 * DBL_ERR, "update comment");
 
-const S1Angle kIntersectionMergeRadius = 2 * kIntersectionError;
+// kIntersectionError can also be set somewhat arbitrarily (see above) except
+// that in this case the error using exact arithmetic is up to 2 * DBL_ERR,
+// and the error limit is set to 8 * DBL_ERR so that virtually all cases can
+// be handled using ordinary double-precision arithmetic.
+static_assert(kIntersectionError.radians() == 8 * DBL_ERR, "update comment");
 
 namespace internal {
 
+const S1Angle kExactCrossProdError = S1Angle::Radians(DBL_ERR);
 const S1Angle kIntersectionExactError = S1Angle::Radians(2 * DBL_ERR);
 
 int* intersection_method_tally_ = nullptr;
 
-const char* GetIntersectionMethodName(IntersectionMethod method) {
+string_view GetIntersectionMethodName(IntersectionMethod method) {
   switch (method) {
     case IntersectionMethod::SIMPLE:    return "Simple";
     case IntersectionMethod::SIMPLE_LD: return "Simple_ld";
@@ -68,6 +84,269 @@ const char* GetIntersectionMethodName(IntersectionMethod method) {
   }
 }
 
+// Evaluates the cross product of unit-length vectors "a" and "b" in a
+// numerically stable way, returning true if the error in the result is
+// guaranteed to be at most kRobustCrossProdError.
+template <class T>
+inline bool GetStableCrossProd(const Vector3<T>& a, const Vector3<T>& b,
+                               Vector3<T>* result) {
+  // We compute the cross product (a - b) x (a + b).  Mathematically this is
+  // exactly twice the cross product of "a" and "b", but it has the numerical
+  // advantage that (a - b) and (a + b) are nearly perpendicular (since "a" and
+  // "b" are unit length).  This yields a result that is nearly orthogonal to
+  // both "a" and "b" even if these two values differ only very slightly.
+  //
+  // The maximum directional error in radians when this calculation is done in
+  // precision T (where T is a floating-point type) is:
+  //
+  //   (1 + 2 * sqrt(3) + 32 * sqrt(3) * DBL_ERR / ||N||) * T_ERR
+  //
+  // where ||N|| is the norm of the result.  To keep this error to at most
+  // kRobustCrossProdError, assuming this value is much less than 1, we need
+  //
+  //   (1 + 2 * sqrt(3) + 32 * sqrt(3) * DBL_ERR / ||N||) * T_ERR <= kErr
+  //
+  //   ||N|| >= 32 * sqrt(3) * DBL_ERR / (kErr / T_ERR - (1 + 2 * sqrt(3)))
+  //
+  // From this you can see that in order for this calculation to ever succeed in
+  // double precision, we must have kErr > (1 + 2 * sqrt(3)) * DBL_ERR, which is
+  // about 4.46 * DBL_ERR.  We actually set kRobustCrossProdError == 6 * DBL_ERR
+  // (== 3 * DBL_EPSILON) in order to minimize the number of cases where higher
+  // precision is needed; in particular, higher precision is only necessary when
+  // "a" and "b" are closer than about 18 * DBL_ERR == 9 * DBL_EPSILON.
+  // (80-bit precision can handle inputs as close as 2.5 * LDBL_EPSILON.)
+  constexpr T T_ERR = rounding_epsilon<T>();
+  constexpr T kMinNorm =
+      (32 * kSqrt3 * DBL_ERR) /
+      (kRobustCrossProdError.radians() / T_ERR - (1 + 2 * kSqrt3));
+
+  *result = (a - b).CrossProd(a + b);
+  return result->Norm2() >= kMinNorm * kMinNorm;
+}
+
+// Explicitly instantiate this function so that we can use it in tests without
+// putting its definition in a header file.
+template bool GetStableCrossProd<double>(
+  const Vector3_d&, const Vector3_d&, Vector3_d*);
+template bool GetStableCrossProd<long double>(
+    const Vector3_ld&, const Vector3_ld&, Vector3_ld*);
+
+}  // namespace internal
+
+S2Point RobustCrossProd(const S2Point& a, const S2Point& b) {
+  S2_DCHECK(IsUnitLength(a));
+  S2_DCHECK(IsUnitLength(b));
+
+  // The direction of a.CrossProd(b) becomes unstable as (a + b) or (a - b)
+  // approaches zero.  This leads to situations where a.CrossProd(b) is not
+  // very orthogonal to "a" and/or "b".  To solve this problem robustly requires
+  // falling back to extended precision, arbitrary precision, and even symbolic
+  // perturbations to handle the case when "a" and "b" are exactly
+  // proportional, e.g. a == -b (see s2predicates.cc for details).
+  Vector3_d result;
+  if (GetStableCrossProd(a, b, &result)) {
+    return result;
+  }
+  // Handle the (a == b) case now, before doing expensive arithmetic.  The only
+  // result that makes sense mathematically is to return zero, but it turns out
+  // to reduce the number of special cases in client code if we instead return
+  // an arbitrary orthogonal vector.
+  if (a == b) {
+    return Ortho(a);
+  }
+  // Next we try using "long double" precision (if available).
+  Vector3_ld result_ld;
+  if (kHasLongDouble && GetStableCrossProd(ToLD(a), ToLD(b), &result_ld)) {
+    return Vector3_d::Cast(result_ld);
+  }
+  // Otherwise we fall back to exact arithmetic, then symbolic perturbations.
+  return internal::ExactCrossProd(a, b);
+}
+
+// Returns the cross product of "a" and "b" after symbolic perturbations.
+// (These perturbations only affect the result if "a" and "b" are exactly
+// collinear, e.g. if a == -b or a == (1+eps) * b.)  The result may not be
+// normalizable (i.e., EnsureNormalizable() should be called on the result).
+static Vector3_d SymbolicCrossProdSorted(const S2Point& a, const S2Point& b) {
+  S2_DCHECK(a < b);
+  S2_DCHECK(s2pred::IsZero(ToExact(a).CrossProd(ToExact(b))));
+
+  // The following code uses the same symbolic perturbation model as S2::Sign.
+  // The particular sequence of tests below was obtained using Mathematica
+  // (although it would be easy to do it by hand for this simple case).
+  //
+  // Just like the function SymbolicallyPerturbedSign() in s2predicates.cc,
+  // every input coordinate x[i] is assigned a symbolic perturbation dx[i].  We
+  // then compute the cross product
+  //
+  //     (a + da).CrossProd(b + db) .
+  //
+  // The result is a polynomial in the perturbation symbols.  For example if we
+  // did this in one dimension, the result would be
+  //
+  //     a * b + b * da + a * db + da * db
+  //
+  // where "a" and "b" have numerical values and "da" and "db" are symbols.
+  // In 3 dimensions the result is similar except that the coefficients are
+  // 3-vectors rather than scalars.
+  //
+  // Every possible S2Point has its own symbolic perturbation in each coordinate
+  // (i.e., there are about 3 * 2**192 symbols).  The magnitudes of the
+  // perturbations are chosen such that if x < y lexicographically, the
+  // perturbations for "y" are much smaller than the perturbations for "x".
+  // Similarly, the perturbations for the coordinates of a given point x are
+  // chosen such that dx[0] is much smaller than dx[1] which is much smaller
+  // than dx[2].  Putting this together with fact the inputs to this function
+  // have been sorted so that a < b lexicographically, this tells us that
+  //
+  //     da[2] > da[1] > da[0] > db[2] > db[1] > db[0]
+  //
+  // where each perturbation is so much smaller than the previous one that we
+  // don't even need to consider it unless the coefficients of all previous
+  // perturbations are zero.  In fact, each succeeding perturbation is so small
+  // that we don't need to consider it unless the coefficient of all products of
+  // the previous perturbations are zero.  For example, we don't need to
+  // consider the coefficient of db[1] unless the coefficient of db[2]*da[0] is
+  // zero.
+  //
+  // The follow code simply enumerates the coefficients of the perturbations
+  // (and products of perturbations) that appear in the cross product above, in
+  // order of decreasing perturbation magnitude.  The first non-zero
+  // coefficient determines the result.  The easiest way to enumerate the
+  // coefficients in the correct order is to pretend that each perturbation is
+  // some tiny value "eps" raised to a power of two:
+  //
+  // eps**    1      2      4      8     16     32
+  //        da[2]  da[1]  da[0]  db[2]  db[1]  db[0]
+  //
+  // Essentially we can then just count in binary and test the corresponding
+  // subset of perturbations at each step.  So for example, we must test the
+  // coefficient of db[2]*da[0] before db[1] because eps**12 > eps**16.
+
+  if (b[0] != 0 || b[1] != 0) {           // da[2]
+    return Vector3_d(-b[1], b[0], 0);
+  }
+  if (b[2] != 0) {                        // da[1]
+    return Vector3_d(b[2], 0, 0);         // Note that b[0] == 0.
+  }
+
+  // None of the remaining cases can occur in practice, because we can only get
+  // to this point if b = (0, 0, 0).  Nevertheless, even (0, 0, 0) has a
+  // well-defined direction under the symbolic perturbation model.
+  S2_DCHECK(b[1] == 0 && b[2] == 0);        // da[0] coefficients (always zero)
+
+  if (a[0] != 0 || a[1] != 0) {          // db[2]
+    return Vector3_d(a[1], -a[0], 0);
+  }
+
+  // The following coefficient is always non-zero, so we can stop here.
+  //
+  // It may seem strange that we are returning (1, 0, 0) as the cross product
+  // without even looking at the sign of a[2].  (Wouldn't you expect
+  // (0, 0, -1) x (0, 0, 0) and (0, 0, 1) x (0, 0, 0) to point in opposite
+  // directions?)  It's worth pointing out that in this function there is *no
+  // relationship whatsoever* between the vectors "a" and "-a", because the
+  // perturbations applied to these vectors may be entirely different.  This is
+  // why the identity "RobustCrossProd(-a, b) == -RobustCrossProd(a, b)" does
+  // not hold whenever "a" and "b" are linearly dependent (i.e., proportional).
+  // [As it happens the two cross products above actually do point in opposite
+  // directions, but for example (1, 1, 1) x (2, 2, 2) = (-2, 2, 0) and
+  // (-1, -1, -1) x (2, 2, 2) = (-2, 2, 0) do not.]
+  return Vector3_d(1, 0, 0);                   // db[2] * da[1]
+}
+
+// Returns true if the given vector's magnitude is large enough such that the
+// angle to another vector of the same magnitude can be measured using Angle()
+// without loss of precision due to floating-point underflow.  (This requirement
+// is also sufficient to ensure that Normalize() can be called without risk of
+// precision loss.)
+inline static bool IsNormalizable(const Vector3_d& p) {
+  // Let ab = RobustCrossProd(a, b) and cd = RobustCrossProd(cd).  In order for
+  // ab.Angle(cd) to not lose precision, the squared magnitudes of ab and cd
+  // must each be at least 2**-484.  This ensures that the sum of the squared
+  // magnitudes of ab.CrossProd(cd) and ab.DotProd(cd) is at least 2**-968,
+  // which ensures that any denormalized terms in these two calculations do
+  // not affect the accuracy of the result (since all denormalized numbers are
+  // smaller than 2**-1022, which is less than DBL_ERR * 2**-968).
+  //
+  // The fastest way to ensure this is to test whether the largest component of
+  // the result has a magnitude of at least 2**-242.
+  return max(fabs(p[0]), max(fabs(p[1]), fabs(p[2]))) >= ldexp(1, -242);
+}
+
+// Scales a 3-vector as necessary to ensure that the result can be normalized
+// without loss of precision due to floating-point underflow.
+//
+// REQUIRES: p != (0, 0, 0)
+inline static Vector3_d EnsureNormalizable(const Vector3_d& p) {
+  S2_DCHECK_NE(p, Vector3_d(0, 0, 0));
+  if (!IsNormalizable(p)) {
+    // We can't just scale by a fixed factor because the smallest representable
+    // double is 2**-1074, so if we multiplied by 2**(1074 - 242) then the
+    // result might be so large that we couldn't square it without overflow.
+    //
+    // Note that we must scale by a power of two to avoid rounding errors,
+    // and that the calculation of "pmax" is free because IsNormalizable()
+    // is inline.  The code below scales "p" such that the largest component is
+    // in the range [1, 2).
+    double p_max = max(fabs(p[0]), max(fabs(p[1]), fabs(p[2])));
+
+    // The expression below avoids signed overflow for any value of ilogb().
+    return ldexp(2, -1 - ilogb(p_max)) * p;
+  }
+  return p;
+}
+
+// Converts an ExactFloat vector to a double-precision vector, scaling the
+// result as necessary to ensure that the result can be normalized without loss
+// of precision due to floating-point underflow.  (This method doesn't actually
+// call Normalize() since that would create additional error in situations
+// where normalization is not necessary.)
+static Vector3_d NormalizableFromExact(const Vector3_xf& xf) {
+  Vector3_d x(xf[0].ToDouble(), xf[1].ToDouble(), xf[2].ToDouble());
+  if (IsNormalizable(x)) {
+    return x;
+  }
+  // Scale so that the largest component magnitude is in the range [0.5, 1).
+  // Note that the exponents involved could be much smaller than those
+  // representable by an IEEE double precision float.
+  int exp = ExactFloat::kMinExp - 1;
+  for (int i = 0; i < 3; ++i) {
+    if (xf[i].is_normal()) exp = std::max(exp, xf[i].exp());
+  }
+  if (exp < ExactFloat::kMinExp) {
+    return Vector3_d(0, 0, 0);  // The exact result is (0, 0, 0).
+  }
+  return Vector3_d(ldexp(xf[0], -exp).ToDouble(),
+                   ldexp(xf[1], -exp).ToDouble(),
+                   ldexp(xf[2], -exp).ToDouble());
+}
+
+namespace internal {
+
+Vector3_d SymbolicCrossProd(const S2Point& a, const S2Point& b) {
+  S2_DCHECK_NE(a, b);
+  // SymbolicCrossProdSorted() requires that a < b.
+  if (a < b) {
+    return EnsureNormalizable(SymbolicCrossProdSorted(a, b));
+  } else {
+    return -EnsureNormalizable(SymbolicCrossProdSorted(b, a));
+  }
+}
+Vector3_d ExactCrossProd(const S2Point& a, const S2Point& b) {
+  S2_DCHECK_NE(a, b);
+  Vector3_xf result_xf = ToExact(a).CrossProd(ToExact(b));
+  if (!s2pred::IsZero(result_xf)) {
+    return NormalizableFromExact(result_xf);
+  }
+  // SymbolicCrossProd() requires that a < b.
+  if (a < b) {
+    return EnsureNormalizable(SymbolicCrossProd(a, b));
+  } else {
+    return -EnsureNormalizable(SymbolicCrossProd(b, a));
+  }
+}
+
 }  // namespace internal
 
 int CrossingSign(const S2Point& a, const S2Point& b,
@@ -88,13 +367,34 @@ bool VertexCrossing(const S2Point& a, const S2Point& b,
   // arbitrary fixed reference point.
   //
   // Optimization: if AB=CD or AB=DC, we can avoid most of the calculations.
-  if (a == c) return (b == d) || s2pred::OrderedCCW(S2::Ortho(a), d, b, a);
-  if (b == d) return s2pred::OrderedCCW(S2::Ortho(b), c, a, b);
+  if (a == c) return (b == d) || s2pred::OrderedCCW(S2::RefDir(a), d, b, a);
+  if (b == d) return s2pred::OrderedCCW(S2::RefDir(b), c, a, b);
 
-  if (a == d) return (b == c) || s2pred::OrderedCCW(S2::Ortho(a), c, b, a);
-  if (b == c) return s2pred::OrderedCCW(S2::Ortho(b), d, a, b);
+  if (a == d) return (b == c) || s2pred::OrderedCCW(S2::RefDir(a), c, b, a);
+  if (b == c) return s2pred::OrderedCCW(S2::RefDir(b), d, a, b);
 
-  S2_LOG(DFATAL) << "VertexCrossing called with 4 distinct vertices";
+  S2_LOG(ERROR) << "VertexCrossing called with 4 distinct vertices";
+  return false;
+}
+
+int SignedVertexCrossing(const S2Point& a, const S2Point& b,
+                         const S2Point& c, const S2Point& d) {
+  if (a == b || c == d) return 0;
+
+  // See VertexCrossing.  The sign of the crossing is +1 if both edges are
+  // outgoing or both edges are incoming with respect to the common vertex
+  // and -1 otherwise.
+  if (a == c) {
+    return ((b == d) || s2pred::OrderedCCW(S2::RefDir(a), d, b, a)) ? 1 : 0;
+  }
+  if (b == d) return s2pred::OrderedCCW(S2::RefDir(b), c, a, b) ? 1 : 0;
+
+  if (a == d) {
+    return ((b == c) || s2pred::OrderedCCW(S2::RefDir(a), c, b, a)) ? -1 : 0;
+  }
+  if (b == c) return s2pred::OrderedCCW(S2::RefDir(b), d, a, b) ? -1 : 0;
+
+  S2_LOG(ERROR) << "SignedVertexCrossing called with 4 distinct vertices";
   return false;
 }
 
@@ -106,9 +406,6 @@ bool EdgeOrVertexCrossing(const S2Point& a, const S2Point& b,
   return VertexCrossing(a, b, c, d);
 }
 
-using Vector3_ld = Vector3<long double>;
-using Vector3_xf = Vector3<ExactFloat>;
-
 // Computes the cross product of "x" and "y", normalizes it to be unit length,
 // and stores the result in "result".  Also returns the length of the cross
 // product before normalization, which is useful for estimating the amount of
@@ -130,9 +427,6 @@ static T RobustNormalWithLength(const Vector3<T>& x, const Vector3<T>& y,
 // If the intersection point of the edges (a0,a1) and (b0,b1) can be computed
 // to within an error of at most kIntersectionError by this function, then set
 // "result" to the intersection point and return true.
-//
-// The intersection point is not guaranteed to have the correct sign
-// (i.e., it may be either "result" or "-result").
 template <class T>
 static bool GetIntersectionSimple(const Vector3<T>& a0, const Vector3<T>& a1,
                                   const Vector3<T>& b0, const Vector3<T>& b1,
@@ -163,25 +457,27 @@ static bool GetIntersectionSimple(const Vector3<T>& a0, const Vector3<T>& a1,
   // We want this error to be at most kIntersectionError, which is true as
   // long as "result_len" is at least kMinResultLen defined below.
 
-  constexpr T T_ERR = s2pred::rounding_epsilon<T>();
-  static const T kMinNormalLength = (16 * sqrt(3.0) + 24) * DBL_ERR;
-  static const T kMinResultLen =
-      12 / (kIntersectionError.radians() / T_ERR - (2 + 2 * sqrt(3.0)));
+  constexpr T T_ERR = rounding_epsilon<T>();
+  constexpr T kMinNormalLength = (16 * kSqrt3 + 24) * DBL_ERR;
+  constexpr T kMinResultLen =
+      12 / (kIntersectionError.radians() / T_ERR - (2 + 2 * kSqrt3));
 
   // On some platforms "long double" is the same as "double", and on these
   // platforms this method always returns false (e.g. ARM, Win32).  Rather
   // than testing this directly, instead we look at kMinResultLen since this
   // is a direct measure of whether "long double" has sufficient accuracy to
-  // be useful.  If kMinResultLen > 0.5, it means that this method will fail
+  // be useful.  If kMinResultLen >= 0.5, it means that this method will fail
   // even for edges that meet at an angle of 30 degrees.  (On Intel platforms
   // kMinResultLen corresponds to an intersection angle of about 0.04
   // degrees.)
-  S2_DCHECK_LE(kMinResultLen, 0.5);
+  if (kMinResultLen >= 0.5) return false;
 
   Vector3<T> a_norm, b_norm;
   if (RobustNormalWithLength(a0, a1, &a_norm) >= kMinNormalLength &&
       RobustNormalWithLength(b0, b1, &b_norm) >= kMinNormalLength &&
       RobustNormalWithLength(a_norm, b_norm, result) >= kMinResultLen) {
+    // Make sure that we return the intersection point rather than its antipode.
+    *result *= (a_norm.DotProd(b1 - b0) < 0) ? -1 : 1;
     return true;
   }
   return false;
@@ -191,8 +487,7 @@ static bool GetIntersectionSimpleLD(const S2Point& a0, const S2Point& a1,
                                     const S2Point& b0, const S2Point& b1,
                                     S2Point* result) {
   Vector3_ld result_ld;
-  if (GetIntersectionSimple(Vector3_ld::Cast(a0), Vector3_ld::Cast(a1),
-                            Vector3_ld::Cast(b0), Vector3_ld::Cast(b1),
+  if (GetIntersectionSimple(ToLD(a0), ToLD(a1), ToLD(b0), ToLD(b1),
                             &result_ld)) {
     *result = S2Point::Cast(result_ld);
     return true;
@@ -239,8 +534,8 @@ static T GetProjection(const Vector3<T>& x,
   // ||N'-N|| <= ((1 + 2 * sqrt(3))||N|| + 32 * sqrt(3) * DBL_ERR) * T_ERR
   // |(A.B)'-(A.B)| <= (1.5 * (A.B) + 1.5 * ||A|| * ||B||) * T_ERR
   // ||(X-Y)'-(X-Y)|| <= ||X-Y|| * T_ERR
-  constexpr T T_ERR = s2pred::rounding_epsilon<T>();
-  *error = (((3.5 + 2 * sqrt(3.0)) * a_norm_len + 32 * sqrt(3.0) * DBL_ERR)
+  constexpr T T_ERR = rounding_epsilon<T>();
+  *error = (((3.5 + 2 * kSqrt3) * a_norm_len + 32 * kSqrt3 * DBL_ERR)
             * dist + 1.5 * fabs(result)) * T_ERR;
   return result;
 }
@@ -277,13 +572,20 @@ static bool GetIntersectionStableSorted(
   //
   // We save ourselves some work by scaling the result and the error bound by
   // "dist_sum", since the result is normalized to be unit length anyway.
-  T dist_sum = fabs(b0_dist - b1_dist);
+  //
+  // Make sure that we return the intersection point rather than its antipode.
+  // It is sufficient to ensure that (b0_dist - b1_dist) is non-negative.
+  if (b0_dist < b1_dist) {
+    b0_dist = -b0_dist;
+    b1_dist = -b1_dist;
+  }
+  T dist_sum = b0_dist - b1_dist;
   T error_sum = b0_error + b1_error;
   if (dist_sum <= error_sum) {
     return false;  // Error is unbounded in this case.
   }
   Vector3<T> x = b0_dist * b1 - b1_dist * b0;
-  constexpr T T_ERR = s2pred::rounding_epsilon<T>();
+  constexpr T T_ERR = rounding_epsilon<T>();
   T error = b_len * fabs(b0_dist * b1_error - b1_dist * b0_error) /
       (dist_sum - error_sum) + 2 * T_ERR * dist_sum;
 
@@ -320,9 +622,6 @@ static bool CompareEdges(const Vector3<T>& a0, const Vector3<T>& a1,
 // If the intersection point of the edges (a0,a1) and (b0,b1) can be computed
 // to within an error of at most kIntersectionError by this function, then set
 // "result" to the intersection point and return true.
-//
-// The intersection point is not guaranteed to have the correct sign
-// (i.e., it may be either "result" or "-result").
 template <class T>
 static bool GetIntersectionStable(const Vector3<T>& a0, const Vector3<T>& a1,
                                   const Vector3<T>& b0, const Vector3<T>& b1,
@@ -347,8 +646,7 @@ static bool GetIntersectionStableLD(const S2Point& a0, const S2Point& a1,
                                     const S2Point& b0, const S2Point& b1,
                                     S2Point* result) {
   Vector3_ld result_ld;
-  if (GetIntersectionStable(Vector3_ld::Cast(a0), Vector3_ld::Cast(a1),
-                            Vector3_ld::Cast(b0), Vector3_ld::Cast(b1),
+  if (GetIntersectionStable(ToLD(a0), ToLD(a1), ToLD(b0), ToLD(b1),
                             &result_ld)) {
     *result = S2Point::Cast(result_ld);
     return true;
@@ -356,69 +654,53 @@ static bool GetIntersectionStableLD(const S2Point& a0, const S2Point& a1,
   return false;
 }
 
-static S2Point S2PointFromExact(const Vector3_xf& xf) {
-  // If all components of "x" have absolute value less than about 1e-154,
-  // then x.Norm2() is zero in double precision due to underflow.  Therefore
-  // we need to scale "x" by an appropriate power of 2 before the conversion.
-  S2Point x(xf[0].ToDouble(), xf[1].ToDouble(), xf[2].ToDouble());
-  if (x.Norm2() > 0) return x.Normalize();
-
-  // Scale so that the largest component magnitude is in the range [0.5, 1).
-  int exp = ExactFloat::kMinExp - 1;
-  for (int i = 0; i < 3; ++i) {
-    if (xf[i].is_normal()) exp = std::max(exp, xf[i].exp());
-  }
-  if (exp < ExactFloat::kMinExp) {
-    return S2Point(0, 0, 0);
-  }
-  return S2Point(ldexp(xf[0], -exp).ToDouble(),
-                 ldexp(xf[1], -exp).ToDouble(),
-                 ldexp(xf[2], -exp).ToDouble()).Normalize();
+inline static S2Point ToS2Point(const Vector3_xf& xf) {
+  return NormalizableFromExact(xf).Normalize();
 }
 
 namespace internal {
 
 // Compute the intersection point of (a0, a1) and (b0, b1) using exact
 // arithmetic.  Note that the result is not exact because it is rounded to
-// double precision.  Also, the intersection point is not guaranteed to have
-// the correct sign (i.e., the return value may need to be negated).
+// double precision.
 S2Point GetIntersectionExact(const S2Point& a0, const S2Point& a1,
                              const S2Point& b0, const S2Point& b1) {
   // Since we are using exact arithmetic, we don't need to worry about
   // numerical stability.
-  Vector3_xf a0_xf = Vector3_xf::Cast(a0);
-  Vector3_xf a1_xf = Vector3_xf::Cast(a1);
-  Vector3_xf b0_xf = Vector3_xf::Cast(b0);
-  Vector3_xf b1_xf = Vector3_xf::Cast(b1);
-  Vector3_xf a_norm_xf = a0_xf.CrossProd(a1_xf);
-  Vector3_xf b_norm_xf = b0_xf.CrossProd(b1_xf);
+  Vector3_xf a_norm_xf = ToExact(a0).CrossProd(ToExact(a1));
+  Vector3_xf b_norm_xf = ToExact(b0).CrossProd(ToExact(b1));
   Vector3_xf x_xf = a_norm_xf.CrossProd(b_norm_xf);
 
   // The final Normalize() call is done in double precision, which creates a
-  // directional error of up to 2 * DBL_ERR.  (ToDouble() and Normalize() each
-  // contribute up to DBL_ERR of directional error.)
-  S2Point x = S2PointFromExact(x_xf);
-
-  if (x == S2Point(0, 0, 0)) {
-    // The two edges are exactly collinear, but we still consider them to be
-    // "crossing" because of simulation of simplicity.  Out of the four
-    // endpoints, exactly two lie in the interior of the other edge.  Of
-    // those two we return the one that is lexicographically smallest.
-    x = S2Point(10, 10, 10);  // Greater than any valid S2Point
-    S2Point a_norm = S2PointFromExact(a_norm_xf);
-    S2Point b_norm = S2PointFromExact(b_norm_xf);
-    if (a_norm == S2Point(0, 0, 0) || b_norm == S2Point(0, 0, 0)) {
-      // TODO(ericv): To support antipodal edges properly, we would need to
-      // add an s2pred::CrossProd() function that computes the cross product
-      // using simulation of simplicity and rounds the result to the nearest
-      // floating-point representation.
-      S2_LOG(DFATAL) << "Exactly antipodal edges not supported by GetIntersection";
-    }
-    if (s2pred::OrderedCCW(b0, a0, b1, b_norm) && a0 < x) x = a0;
-    if (s2pred::OrderedCCW(b0, a1, b1, b_norm) && a1 < x) x = a1;
-    if (s2pred::OrderedCCW(a0, b0, a1, a_norm) && b0 < x) x = b0;
-    if (s2pred::OrderedCCW(a0, b1, a1, a_norm) && b1 < x) x = b1;
+  // directional error of up to 2 * DBL_ERR.  (NormalizableFromExact() and
+  // Normalize() each contribute up to DBL_ERR of directional error.)
+  if (!s2pred::IsZero(x_xf)) {
+    // Make sure that we return the intersection point rather than its antipode.
+    return s2pred::Sign(a0, a1, b1) * ToS2Point(x_xf);
   }
+
+  // The two edges are exactly collinear, but we still consider them to be
+  // "crossing" because of simulation of simplicity.  The most principled way to
+  // handle this situation is to use symbolic perturbations, similar to what
+  // S2::RobustCrossProd and s2pred::Sign do.  This is certainly possible, but
+  // it turns out that there are approximately 18 cases to consider (compared to
+  // the 4 cases for RobustCrossProd and 13 for s2pred::Sign).
+  //
+  // For now we use a heuristic that simply chooses a plausible intersection
+  // point.  Out of the four endpoints, exactly two lie in the interior of the
+  // other edge.  Of those two we return the one that is lexicographically
+  // smallest.
+  S2Point a_norm = ToS2Point(a_norm_xf);
+  S2Point b_norm = ToS2Point(b_norm_xf);
+  if (a_norm == S2Point(0, 0, 0)) a_norm = SymbolicCrossProd(a0, a1);
+  if (b_norm == S2Point(0, 0, 0)) b_norm = SymbolicCrossProd(b0, b1);
+
+  S2Point x(10, 10, 10);  // Greater than any valid S2Point.
+  if (s2pred::OrderedCCW(b0, a0, b1, b_norm) && a0 < x) x = a0;
+  if (s2pred::OrderedCCW(b0, a1, b1, b_norm) && a1 < x) x = a1;
+  if (s2pred::OrderedCCW(a0, b0, a1, a_norm) && b0 < x) x = b0;
+  if (s2pred::OrderedCCW(a0, b1, a1, a_norm) && b1 < x) x = b1;
+
   S2_DCHECK(S2::IsUnitLength(x));
   return x;
 }
@@ -467,7 +749,7 @@ S2Point GetIntersection(const S2Point& a0, const S2Point& a1,
   // We don't actually use the first method (GetIntersectionSimple) because it
   // turns out that GetIntersectionStable() is twice as fast and also much
   // more accurate (even in double precision).  The "long double" version
-  // (only available on Intel platforms) uses 80-bit precision and is about
+  // (only available on some platforms) uses 80-bit precision and is about
   // twice as slow.  The exact arithmetic version is about 100x slower.
   //
   // So our strategy is to first call GetIntersectionStable() in double
@@ -475,9 +757,7 @@ S2Point GetIntersection(const S2Point& a0, const S2Point& a1,
   // then we try again in "long double"; if that doesn't work then we fall
   // back to exact arithmetic.
 
-  static const bool kUseSimpleMethod = false;
-  static const bool kHasLongDouble = (s2pred::rounding_epsilon<long double>() <
-                                      s2pred::rounding_epsilon<double>());
+  constexpr bool kUseSimpleMethod = false;
   S2Point result;
   IntersectionMethod method;
   if (kUseSimpleMethod && GetIntersectionSimple(a0, a1, b0, b1, &result)) {
@@ -498,13 +778,6 @@ S2Point GetIntersection(const S2Point& a0, const S2Point& a1,
     ++intersection_method_tally_[static_cast<int>(method)];
   }
 
-  // Make sure the intersection point is on the correct side of the sphere.
-  // Since all vertices are unit length, and edges are less than 180 degrees,
-  // (a0 + a1) and (b0 + b1) both have positive dot product with the
-  // intersection point.  We use the sum of all vertices to make sure that the
-  // result is unchanged when the edges are swapped or reversed.
-  if (result.DotProd((a0 + a1) + (b0 + b1)) < 0) result = -result;
-
   // Make sure that the intersection point lies on both edges.
   S2_DCHECK(ApproximatelyOrdered(a0, result, a1, kIntersectionError.radians()));
   S2_DCHECK(ApproximatelyOrdered(b0, result, b1, kIntersectionError.radians()));
diff --git a/src/s2/s2edge_crossings.h b/src/s2/s2edge_crossings.h
index 78f6961c..876a102f 100644
--- a/src/s2/s2edge_crossings.h
+++ b/src/s2/s2edge_crossings.h
@@ -42,12 +42,49 @@
 #include "s2/s1interval.h"
 #include "s2/s2latlng.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2predicates.h"
+#include "s2/s2predicates_internal.h"
 #include "s2/util/math/vector.h"
 
 namespace S2 {
 
+// Returns a vector whose direction is guaranteed to be very close to the exact
+// mathematical cross product of the given unit-length vectors "a" and "b", but
+// whose magnitude is arbitrary.  Unlike a.CrossProd(b), this statement is true
+// even when "a" and "b" are very nearly parallel (i.e., a ~= b or a ~= -b).
+// Specifically, the direction of the result vector differs from the exact cross
+// product by at most kRobustCrossProdError radians (see below).
+//
+// When a == -b exactly, the result is consistent with the symbolic perturbation
+// model used by S2::Sign (see s2predicates.h).  In other words, even antipodal
+// point pairs have a consistent and well-defined edge between them.  (In fact
+// this is true for any pair of distinct points whose vectors are parallel.)
+//
+// When a == b exactly, an arbitrary vector orthogonal to "a" is returned.
+// [From a strict mathematical viewpoint it would be better to return (0, 0, 0),
+// but this behavior helps to avoid special cases in client code.]
+//
+// This function has the following properties (RCP == RobustCrossProd):
+//
+//   (1) RCP(a, b) != 0 for all a, b
+//   (2) RCP(b, a) == -RCP(a, b) unless a == b
+//   (3) RCP(-a, b) == -RCP(a, b) unless a and b are exactly proportional
+//   (4) RCP(a, -b) == -RCP(a, b) unless a and b are exactly proportional
+//
+// Note that if you want the result to be unit-length, you must call Normalize()
+// explicitly.  (The result is always scaled such that Normalize() can be called
+// without precision loss due to floating-point underflow.)
+S2Point RobustCrossProd(const S2Point& a, const S2Point& b);
+
+// kRobustCrossProdError is an upper bound on the angle between the vector
+// returned by RobustCrossProd(a, b) and the true cross product of "a" and "b".
+// Note that cases where "a" and "b" are exactly proportional but not equal
+// (e.g. a = -b or a = (1+epsilon)*b) are handled using symbolic perturbations
+// in order to ensure that the result is non-zero and consistent with S2::Sign.
+constexpr S1Angle kRobustCrossProdError = S1Angle::Radians(6 * s2pred::DBL_ERR);
+
 // This function determines whether the edge AB intersects the edge CD.
 // Returns +1 if AB crosses CD at a point that is interior to both edges.
 // Returns  0 if any two vertices from different edges are the same.
@@ -75,12 +112,37 @@ namespace S2 {
 int CrossingSign(const S2Point& a, const S2Point& b,
                  const S2Point& c, const S2Point& d);
 
+// Returns true if the angle ABC contains its vertex B.  Containment is
+// defined such that if several polygons tile the region around a vertex, then
+// exactly one of those polygons contains that vertex.  Returns false for
+// degenerate angles of the form ABA.
+//
+// Note that this method is not sufficient to determine vertex containment in
+// polygons with duplicate vertices (such as the polygon ABCADE).  Use
+// S2ContainsVertexQuery for such polygons.  S2::AngleContainsVertex(a, b, c)
+// is equivalent to using S2ContainsVertexQuery as follows:
+//
+//    S2ContainsVertexQuery query(b);
+//    query.AddEdge(a, -1);  // incoming
+//    query.AddEdge(c, 1);   // outgoing
+//    return query.ContainsSign() > 0;
+//
+// Useful properties of AngleContainsVertex:
+//
+//  (1) AngleContainsVertex(a,b,a) == false
+//  (2) AngleContainsVertex(a,b,c) == !AngleContainsVertex(c,b,a) unless a == c
+//  (3) Given vertices v_1 ... v_k ordered cyclically CCW around vertex b,
+//      AngleContainsVertex(v_{i+1}, b, v_i) is true for exactly one value of i.
+//
+// REQUIRES: a != b && b != c
+bool AngleContainsVertex(const S2Point& a, const S2Point& b, const S2Point& c);
+
 // Given two edges AB and CD where at least two vertices are identical
 // (i.e. CrossingSign(a,b,c,d) == 0), this function defines whether the
-// two edges "cross" in a such a way that point-in-polygon containment tests
-// can be implemented by counting the number of edge crossings.  The basic
-// rule is that a "crossing" occurs if AB is encountered after CD during a
-// CCW sweep around the shared vertex starting from a fixed reference point.
+// two edges "cross" in such a way that point-in-polygon containment tests can
+// be implemented by counting the number of edge crossings.  The basic rule is
+// that a "crossing" occurs if AB is encountered after CD during a CCW sweep
+// around the shared vertex starting from a fixed reference point.
 //
 // Note that according to this rule, if AB crosses CD then in general CD
 // does not cross AB.  However, this leads to the correct result when
@@ -101,6 +163,28 @@ int CrossingSign(const S2Point& a, const S2Point& b,
 bool VertexCrossing(const S2Point& a, const S2Point& b,
                     const S2Point& c, const S2Point& d);
 
+// Like VertexCrossing() but returns -1 if AB crosses CD from left to right,
+// +1 if AB crosses CD from right to left, and 0 otherwise.  This implies that
+// if CD bounds some region according to the "interior is on the left" rule,
+// this function returns -1 when AB exits the region and +1 when AB enters.
+//
+// This is a helper method that allows computing the change in winding number
+// from point A to point B by summing the signed edge crossings of AB with the
+// edges of the loop(s) used to define the winding number.
+//
+// Useful properties of SignedVertexCrossing (SVC):
+//
+//  (1) SVC(a,a,c,d) == SVC(a,b,c,c) == 0
+//  (2) SVC(a,b,a,b) == +1
+//  (3) SVC(a,b,b,a) == -1
+//  (6) SVC(a,b,c,d) == -SVC(a,b,d,c) == -SVC(b,a,c,d) == SVC(b,a,d,c)
+//  (3) If exactly one of a,b equals one of c,d, then exactly one of
+//      SVC(a,b,c,d) and SVC(c,d,a,b) is non-zero
+//
+// It is an error to call this method with 4 distinct vertices.
+int SignedVertexCrossing(const S2Point& a, const S2Point& b,
+                         const S2Point& c, const S2Point& d);
+
 // A convenience function that calls CrossingSign() to handle cases
 // where all four vertices are distinct, and VertexCrossing() to handle
 // cases where two or more vertices are the same.  This defines a crossing
@@ -123,7 +207,7 @@ S2Point GetIntersection(const S2Point& a, const S2Point& b,
 
 // kIntersectionError is an upper bound on the distance from the intersection
 // point returned by GetIntersection() to the true intersection point.
-extern const S1Angle kIntersectionError;
+constexpr S1Angle kIntersectionError = S1Angle::Radians(8 * s2pred::DBL_ERR);
 
 // This value can be used as the S2Builder snap_radius() to ensure that edges
 // that have been displaced by up to kIntersectionError are merged back
@@ -131,7 +215,25 @@ extern const S1Angle kIntersectionError;
 // with a set of tiles and then unioned.  It is equal to twice the
 // intersection error because input edges might have been displaced in
 // opposite directions.
-extern const S1Angle kIntersectionMergeRadius;  // 2 * kIntersectionError
+constexpr S1Angle kIntersectionMergeRadius = 2 * kIntersectionError;
+
+
+//////////////////   Implementation details follow   ////////////////////
+
+
+inline bool AngleContainsVertex(const S2Point& a, const S2Point& b,
+                                const S2Point& c) {
+  // A loop with consecutive vertices A, B, C contains vertex B if and only if
+  // the fixed vector R = S2::RefDir(B) is contained by the wedge ABC.  The
+  // wedge is closed at A and open at C, i.e. the point B is inside the loop
+  // if A = R but not if C = R.
+  //
+  // Note that the test below is written so as to get correct results when the
+  // angle ABC is degenerate.  If A = C or C = R it returns false, and
+  // otherwise if A = R it returns true.
+  S2_DCHECK(a != b && b != c);
+  return !s2pred::OrderedCCW(S2::RefDir(b), c, a, b);
+}
 
 }  // namespace S2
 
diff --git a/src/s2/s2edge_crossings_internal.h b/src/s2/s2edge_crossings_internal.h
index 9852faa6..557aad44 100644
--- a/src/s2/s2edge_crossings_internal.h
+++ b/src/s2/s2edge_crossings_internal.h
@@ -21,12 +21,39 @@
 #ifndef S2_S2EDGE_CROSSINGS_INTERNAL_H_
 #define S2_S2EDGE_CROSSINGS_INTERNAL_H_
 
+#include "absl/strings/string_view.h"
 #include "s2/s1angle.h"
 #include "s2/s2point.h"
 
 namespace S2 {
 namespace internal {
 
+// Evaluates the cross product of unit-length vectors "a" and "b" in a
+// numerically stable way, returning true if the error in the result is
+// guaranteed to be at most kRobustCrossProdError.
+template <class T>
+bool GetStableCrossProd(const Vector3<T>& a, const Vector3<T>& b,
+                        Vector3<T>* result);
+
+// Returns the cross product of two points computed using exact arithmetic and
+// then symbolic perturbations if necessary, rounded to double-precision and
+// scaled so that the result can be normalized to an S2Point with loss of
+// precision due to floating-point underflow.
+//
+// REQUIRES: a != b (this case should be handled before calling this function)
+Vector3_d ExactCrossProd(const S2Point& a, const S2Point& b);
+
+// The maximum error in the method above.
+extern const S1Angle kExactCrossProdError;
+
+// Returns the cross product of two points using symbolic perturbations, rounded
+// to double-precision and scaled so that the result can be normalized to an
+// S2Point with loss of precision due to floating-point underflow.
+//
+// REQUIRES: a != b
+// REQUIRES: a and b are linearly dependent
+Vector3_d SymbolicCrossProd(const S2Point& a, const S2Point& b);
+
 // Returns the intersection point of two edges computed using exact arithmetic
 // and rounded to the nearest representable S2Point.
 S2Point GetIntersectionExact(const S2Point& a0, const S2Point& a1,
@@ -35,7 +62,7 @@ S2Point GetIntersectionExact(const S2Point& a0, const S2Point& a1,
 // The maximum error in the method above.
 extern const S1Angle kIntersectionExactError;
 
-// The following field is used exclusively by S2EdgeUtilTesting in order to
+// The following field is used exclusively by s2edge_crossings_test.cc to
 // measure how often each intersection method is used by GetIntersection().
 // If non-nullptr, then it points to an array of integers indexed by an
 // IntersectionMethod enum value.  Each call to GetIntersection() increments
@@ -51,7 +78,46 @@ enum class IntersectionMethod {
   EXACT,
   NUM_METHODS
 };
-const char* GetIntersectionMethodName(IntersectionMethod method);
+absl::string_view GetIntersectionMethodName(IntersectionMethod method);
+
+// The following classes are used as template arguments to S2EdgeCrosserBase in
+// order to create two versions, namely S2EdgeCrosser itself (which takes
+// (const S2Point *) arguments and requires points to be stored persistently in
+// memory) and S2CopyingEdgeCrosser (which takes (const S2Point&) arguments and
+// makes its own copies of all points).
+//
+// These classes are intended to be used like pointers, i.e. operator* and
+// operator-> return the S2Point value.  They also define an implicit
+// conversion operator that returns the underlying representation as either a
+// const pointer or const reference.
+
+class S2Point_PointerRep {
+ public:
+  using T = const S2Point *;
+  S2Point_PointerRep() : p_(nullptr) {}
+  explicit S2Point_PointerRep(const S2Point* p) : p_(p) {}
+  S2Point_PointerRep& operator=(const S2Point* p) { p_ = p; return *this; }
+  operator const S2Point*() const { return p_; }  // Conversion operator.
+  const S2Point& operator*() const { return *p_; }
+  const S2Point* operator->() const { return p_; }
+
+ private:
+  const S2Point* p_;
+};
+
+class S2Point_ValueRep {
+ public:
+  using T = const S2Point &;
+  S2Point_ValueRep() : p_() {}
+  explicit S2Point_ValueRep(const S2Point& p) : p_(p) {}
+  S2Point_ValueRep& operator=(const S2Point& p) { p_ = p; return *this; }
+  operator const S2Point&() const { return p_; }  // Conversion operator.
+  const S2Point& operator*() const { return p_; }
+  const S2Point* operator->() const { return &p_; }
+
+ private:
+  S2Point p_;
+};
 
 }  // namespace internal
 }  // namespace S2
diff --git a/src/s2/s2edge_distances.cc b/src/s2/s2edge_distances.cc
index e0251c8d..f2ca2a55 100644
--- a/src/s2/s2edge_distances.cc
+++ b/src/s2/s2edge_distances.cc
@@ -17,12 +17,16 @@
 
 #include "s2/s2edge_distances.h"
 
+#include <algorithm>
 #include <cfloat>
 #include <cmath>
+#include <utility>
 
-#include "s2/base/logging.h"
+#include "absl/base/optimization.h"
+#include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s2edge_crossings.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2predicates.h"
 
@@ -32,40 +36,47 @@ using std::min;
 namespace S2 {
 
 double GetDistanceFraction(const S2Point& x,
-                             const S2Point& a0, const S2Point& a1) {
-  S2_DCHECK_NE(a0, a1);
-  double d0 = x.Angle(a0);
-  double d1 = x.Angle(a1);
-  return d0 / (d0 + d1);
+                           const S2Point& a, const S2Point& b) {
+  S2_DCHECK_NE(a, b);
+  double da = x.Angle(a);
+  double db = x.Angle(b);
+  return da / (da + db);
 }
 
-S2Point InterpolateAtDistance(S1Angle ax_angle,
-                              const S2Point& a, const S2Point& b) {
-  double ax = ax_angle.radians();
+S2Point GetPointOnLine(const S2Point& a, const S2Point& b,
+                      S1ChordAngle r) {
+  // Use RobustCrossProd() to compute the tangent vector at A towards B.  This
+  // technique is robust even when A and B are antipodal or nearly so.
+  S2Point dir = S2::RobustCrossProd(a, b).CrossProd(a).Normalize();
+  return GetPointOnRay(a, dir, r);
+}
 
-  S2_DCHECK(S2::IsUnitLength(a));
-  S2_DCHECK(S2::IsUnitLength(b));
+S2Point GetPointOnLine(const S2Point& a, const S2Point& b, S1Angle r) {
+  // See comments above.
+  S2Point dir = S2::RobustCrossProd(a, b).CrossProd(a).Normalize();
+  return GetPointOnRay(a, dir, r);
+}
+
+S2Point GetPointToLeft(const S2Point& a, const S2Point& b, S1Angle r) {
+  return GetPointOnRay(a, S2::RobustCrossProd(a, b).Normalize(), r);
+}
 
-  // Use RobustCrossProd() to compute the tangent vector at A towards B.  The
-  // result is always perpendicular to A, even if A=B or A=-B, but it is not
-  // necessarily unit length.  (We effectively normalize it below.)
-  Vector3_d normal = S2::RobustCrossProd(a, b);
-  Vector3_d tangent = normal.CrossProd(a);
-  S2_DCHECK(tangent != S2Point(0, 0, 0));
-
-  // Now compute the appropriate linear combination of A and "tangent".  With
-  // infinite precision the result would always be unit length, but we
-  // normalize it anyway to ensure that the error is within acceptable bounds.
-  // (Otherwise errors can build up when the result of one interpolation is
-  // fed into another interpolation.)
-  return (cos(ax) * a + (sin(ax) / tangent.Norm()) * tangent).Normalize();
+S2Point GetPointToLeft(const S2Point& a, const S2Point& b, S1ChordAngle r) {
+  return GetPointOnRay(a, S2::RobustCrossProd(a, b).Normalize(), r);
 }
 
-S2Point Interpolate(double t, const S2Point& a, const S2Point& b) {
+S2Point GetPointToRight(const S2Point& a, const S2Point& b, S1Angle r) {
+  return GetPointOnRay(a, S2::RobustCrossProd(b, a).Normalize(), r);
+}
+
+S2Point GetPointToRight(const S2Point& a, const S2Point& b, S1ChordAngle r) {
+  return GetPointOnRay(a, S2::RobustCrossProd(b, a).Normalize(), r);
+}
+
+S2Point Interpolate(const S2Point& a, const S2Point& b, double t) {
   if (t == 0) return a;
   if (t == 1) return b;
-  S1Angle ab(a, b);
-  return InterpolateAtDistance(t * ab, a, b);
+  return GetPointOnLine(a, b, t * S1Angle(a, b));
 }
 
 // If the minimum distance from X to AB is attained at an interior point of AB
@@ -83,8 +94,8 @@ inline bool AlwaysUpdateMinInteriorDistance(
     const S2Point& x, const S2Point& a, const S2Point& b,
     double xa2, double xb2, S1ChordAngle* min_dist) {
   S2_DCHECK(S2::IsUnitLength(x) && S2::IsUnitLength(a) && S2::IsUnitLength(b));
-  S2_DCHECK_EQ(xa2, (x-a).Norm2());
-  S2_DCHECK_EQ(xb2, (x-b).Norm2());
+  S2_DCHECK_EQ(xa2, (x - a).Norm2());
+  S2_DCHECK_EQ(xb2, (x - b).Norm2());
 
   // The closest point on AB could either be one of the two vertices (the
   // "vertex case") or in the interior (the "interior case").  Let C = A x B.
@@ -100,13 +111,37 @@ inline bool AlwaysUpdateMinInteriorDistance(
   // interior case then both of these angles must be acute.
   //
   // We check this by computing the squared edge lengths of the planar
-  // triangle ABX, and testing acuteness using the law of cosines:
+  // triangle ABX, and testing whether angles XAB and XBA are both acute using
+  // the law of cosines:
+  //
+  //            | XA^2 - XB^2 | < AB^2      (*)
+  //
+  // This test must be done conservatively (taking numerical errors into
+  // account) since otherwise we might miss a situation where the true minimum
+  // distance is achieved by a point on the edge interior.
   //
-  //             max(XA^2, XB^2) < min(XA^2, XB^2) + AB^2
+  // There are two sources of error in the expression above (*).  The first is
+  // that points are not normalized exactly; they are only guaranteed to be
+  // within 2 * DBL_EPSILON of unit length.  Under the assumption that the two
+  // sides of (*) are nearly equal, the total error due to normalization errors
+  // can be shown to be at most
   //
-  if (max(xa2, xb2) >= min(xa2, xb2) + (a-b).Norm2()) {
+  //        2 * DBL_EPSILON * (XA^2 + XB^2 + AB^2) + 8 * DBL_EPSILON ^ 2 .
+  //
+  // The other source of error is rounding of results in the calculation of (*).
+  // Each of XA^2, XB^2, AB^2 has a maximum relative error of 2.5 * DBL_EPSILON,
+  // plus an additional relative error of 0.5 * DBL_EPSILON in the final
+  // subtraction which we further bound as 0.25 * DBL_EPSILON * (XA^2 + XB^2 +
+  // AB^2) for convenience.  This yields a final error bound of
+  //
+  //        4.75 * DBL_EPSILON * (XA^2 + XB^2 + AB^2) + 8 * DBL_EPSILON ^ 2 .
+  double ab2 = (a - b).Norm2();
+  double max_error = (4.75 * DBL_EPSILON * (xa2 + xb2 + ab2) +
+                      8 * DBL_EPSILON * DBL_EPSILON);
+  if (std::fabs(xa2 - xb2) >= ab2 + max_error) {
     return false;
   }
+
   // The minimum distance might be to a point on the edge interior.  Let R
   // be closest point to X that lies on the great circle through AB.  Rather
   // than computing the geodesic distance along the surface of the sphere,
@@ -134,8 +169,11 @@ inline bool AlwaysUpdateMinInteriorDistance(
   // Otherwise we do the exact, more expensive test for the interior case.
   // This test is very likely to succeed because of the conservative planar
   // test we did initially.
+  //
+  // TODO(ericv): Ensure that the errors in test are accurately reflected in the
+  // GetUpdateMinInteriorDistanceMaxError().
   S2Point cx = c.CrossProd(x);
-  if (a.DotProd(cx) >= 0 || b.DotProd(cx) <= 0) {
+  if ((a - x).DotProd(cx) >= 0 || (b - x).DotProd(cx) <= 0) {
     return false;
   }
   // Compute the squared chord length XR^2 = XQ^2 + QR^2 (see above).
@@ -149,6 +187,7 @@ inline bool AlwaysUpdateMinInteriorDistance(
     return false;
   }
   *min_dist = S1ChordAngle::FromLength2(dist2);
+
   return true;
 }
 
@@ -166,7 +205,7 @@ inline bool AlwaysUpdateMinDistance(const S2Point& x,
                                     S1ChordAngle* min_dist) {
   S2_DCHECK(S2::IsUnitLength(x) && S2::IsUnitLength(a) && S2::IsUnitLength(b));
 
-  double xa2 = (x-a).Norm2(), xb2 = (x-b).Norm2();
+  double xa2 = (x - a).Norm2(), xb2 = (x - b).Norm2();
   if (AlwaysUpdateMinInteriorDistance<always_update>(x, a, b, xa2, xb2,
                                                      min_dist)) {
     return true;  // Minimum distance is attained along the edge interior.
@@ -186,17 +225,13 @@ S1Angle GetDistance(const S2Point& x, const S2Point& a, const S2Point& b) {
   return min_dist.ToAngle();
 }
 
-// dd: changed return type to int because on new clang we get:
-// warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical]
-int UpdateMinDistance(const S2Point& x, const S2Point& a, const S2Point& b,
+bool UpdateMinDistance(const S2Point& x, const S2Point& a, const S2Point& b,
                        S1ChordAngle* min_dist) {
   return AlwaysUpdateMinDistance<false>(x, a, b, min_dist);
 }
 
-// dd: changed return type to int because on new clang we get:
-// warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical]
-int UpdateMaxDistance(const S2Point& x, const S2Point& a, const S2Point& b,
-                      S1ChordAngle* max_dist) {
+bool UpdateMaxDistance(const S2Point& x, const S2Point& a, const S2Point& b,
+                       S1ChordAngle* max_dist) {
   auto dist = max(S1ChordAngle(x, a), S1ChordAngle(x, b));
   if (dist > S1ChordAngle::Right()) {
     AlwaysUpdateMinDistance<true>(-x, a, b, &dist);
@@ -214,7 +249,7 @@ int UpdateMaxDistance(const S2Point& x, const S2Point& a, const S2Point& b,
 bool UpdateMinInteriorDistance(const S2Point& x,
                                const S2Point& a, const S2Point& b,
                                S1ChordAngle* min_dist) {
-  double xa2 = (x-a).Norm2(), xb2 = (x-b).Norm2();
+  double xa2 = (x - a).Norm2(), xb2 = (x - b).Norm2();
   return AlwaysUpdateMinInteriorDistance<false>(x, a, b, xa2, xb2, min_dist);
 }
 
@@ -233,9 +268,9 @@ static double GetUpdateMinInteriorDistanceMaxError(S1ChordAngle dist) {
   // parallel to the plane containing the edge respectively.
   double b = min(1.0, 0.5 * dist.length2());
   double a = sqrt(b * (2 - b));
-  return ((2.5 + 2 * sqrt(3.0) + 8.5 * a) * a +
-          (2 + 2 * sqrt(3.0) / 3 + 6.5 * (1 - b)) * b +
-          (23 + 16 / sqrt(3.0)) * DBL_EPSILON) * DBL_EPSILON;
+  return ((2.5 + 2 * sqrt(3) + 8.5 * a) * a +
+          (2 + 2 * sqrt(3) / 3 + 6.5 * (1 - b)) * b +
+          (23 + 16 / sqrt(3)) * DBL_EPSILON) * DBL_EPSILON;
 }
 
 double GetUpdateMinDistanceMaxError(S1ChordAngle dist) {
@@ -251,13 +286,33 @@ S2Point Project(const S2Point& x, const S2Point& a, const S2Point& b,
   S2_DCHECK(S2::IsUnitLength(b));
   S2_DCHECK(S2::IsUnitLength(x));
 
-  // Find the closest point to X along the great circle through AB.
-  S2Point p = x - (x.DotProd(a_cross_b) / a_cross_b.Norm2()) * a_cross_b;
+  // TODO(b/266451020): When X is nearly perpendicular to the plane containing
+  // AB, the result is guaranteed to be close to the edge AB but may be far from
+  // the true projected result.  This could be fixed by computing the product
+  // (A x B) x X x (A x B) using methods similar to S2::RobustCrossProd() and
+  // S2::GetIntersection().  However note that the error tolerance would need
+  // to be significantly larger in order for this calculation to succeed in
+  // double precision most of the time.  For example to avoid higher precision
+  // when X is within 60 degrees of AB the minimum error would be 18 * DBL_ERR,
+  // and to avoid higher precision when X is within 87 degrees of AB the
+  // minimum error would be 120 * DBL_ERR.
+
+  // The following is not necessary to meet accuracy guarantees but helps
+  // to avoid unexpected results in unit tests.
+  if (x == a || x == b) return x;
+
+  // Find the closest point to X along the great circle through AB.  Note that
+  // we use "n" rather than a_cross_b in the final cross product in order to
+  // avoid the possibility of underflow.
+  S2Point n = a_cross_b.Normalize();
+  S2Point p = S2::RobustCrossProd(n, x).CrossProd(n).Normalize();
 
   // If this point is on the edge AB, then it's the closest point.
-  if (S2::SimpleCCW(a_cross_b, a, p) && S2::SimpleCCW(p, b, a_cross_b)) {
-    return p.Normalize();
+  S2Point pn = p.CrossProd(n);
+  if (s2pred::Sign(p, n, a, pn) > 0 && s2pred::Sign(p, n, b, pn) < 0) {
+    return p;
   }
+
   // Otherwise, the closest point is either A or B.
   return ((x - a).Norm2() <= (x - b).Norm2()) ? a : b;
 }
@@ -266,16 +321,14 @@ S2Point Project(const S2Point& x, const S2Point& a, const S2Point& b) {
   return Project(x, a, b, S2::RobustCrossProd(a, b));
 }
 
-// dd: changed return type to int because on new clang we get:
-// warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical]
-int UpdateEdgePairMinDistance(
+bool UpdateEdgePairMinDistance(
     const S2Point& a0, const S2Point& a1,
     const S2Point& b0, const S2Point& b1,
     S1ChordAngle* min_dist) {
   if (*min_dist == S1ChordAngle::Zero()) {
     return false;
   }
-  if (S2::CrossingSign(a0, a1, b0, b1) > 0) {
+  if (S2::CrossingSign(a0, a1, b0, b1) >= 0) {
     *min_dist = S1ChordAngle::Zero();
     return true;
   }
@@ -285,22 +338,23 @@ int UpdateEdgePairMinDistance(
   //
   // The calculation below computes each of the six vertex-vertex distances
   // twice (this could be optimized).
-  return (UpdateMinDistance(a0, b0, b1, min_dist) |
-          UpdateMinDistance(a1, b0, b1, min_dist) |
-          UpdateMinDistance(b0, a0, a1, min_dist) |
-          UpdateMinDistance(b1, a0, a1, min_dist));
+  //
+  // We do not want the short circuit behavior of ||. Suppress
+  // -Wbitwise-instead-of-logical errors by converting to int.
+  return (int{UpdateMinDistance(a0, b0, b1, min_dist)} |
+          int{UpdateMinDistance(a1, b0, b1, min_dist)} |
+          int{UpdateMinDistance(b0, a0, a1, min_dist)} |
+          int{UpdateMinDistance(b1, a0, a1, min_dist)});
 }
 
-// dd: changed return type to int because on new clang we get:
-// warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical]
-int UpdateEdgePairMaxDistance(
+bool UpdateEdgePairMaxDistance(
     const S2Point& a0, const S2Point& a1,
     const S2Point& b0, const S2Point& b1,
     S1ChordAngle* max_dist) {
   if (*max_dist == S1ChordAngle::Straight()) {
     return false;
   }
-  if (S2::CrossingSign(a0, a1, -b0, -b1) > 0) {
+  if (S2::CrossingSign(a0, a1, -b0, -b1) >= 0) {
     *max_dist = S1ChordAngle::Straight();
     return true;
   }
@@ -310,10 +364,13 @@ int UpdateEdgePairMaxDistance(
   //
   // The calculation below computes each of the six vertex-vertex distances
   // twice (this could be optimized).
-  return (UpdateMaxDistance(a0, b0, b1, max_dist) |
-          UpdateMaxDistance(a1, b0, b1, max_dist) |
-          UpdateMaxDistance(b0, a0, a1, max_dist) |
-          UpdateMaxDistance(b1, a0, a1, max_dist));
+  //
+  // We do not want the short circuit behavior of ||. Suppress
+  // -Wbitwise-instead-of-logical errors by converting to int.
+  return (int{UpdateMaxDistance(a0, b0, b1, max_dist)} |
+          int{UpdateMaxDistance(a1, b0, b1, max_dist)} |
+          int{UpdateMaxDistance(b0, a0, a1, max_dist)} |
+          int{UpdateMaxDistance(b1, a0, a1, max_dist)});
 }
 
 std::pair<S2Point, S2Point> GetEdgePairClosestPoints(
@@ -336,10 +393,11 @@ std::pair<S2Point, S2Point> GetEdgePairClosestPoints(
     case A1: return std::make_pair(a1, Project(a1, b0, b1));
     case B0: return std::make_pair(Project(b0, a0, a1), b0);
     case B1: return std::make_pair(Project(b1, a0, a1), b1);
-    default: S2_LOG(FATAL) << "Unreached (to suppress Android compiler warning)";
   }
+  ABSL_UNREACHABLE();
 }
 
+// TODO(ericv): Optimize this function to use S1ChordAngle rather than S1Angle.
 bool IsEdgeBNearEdgeA(const S2Point& a0, const S2Point& a1,
                       const S2Point& b0, const S2Point& b1,
                       S1Angle tolerance) {
@@ -381,12 +439,7 @@ bool IsEdgeBNearEdgeA(const S2Point& a0, const S2Point& a1,
   if (planar_angle <= tolerance)
     return true;
 
-
-  // As planar_angle approaches M_PI, the projection of a_ortho onto the plane
-  // of B approaches the null vector, and normalizing it is numerically
-  // unstable.  This makes it unreliable or impossible to identify pairs of
-  // points where circ(A) is furthest from circ(B).  At this point in the
-  // algorithm, this can only occur for two reasons:
+  // When planar_angle >= Pi/2, there are only two possible scenarios:
   //
   //  1.) b0 and b1 are closest to A at distinct endpoints of A, in which case
   //      the opposite orientation of a_ortho and b_ortho means that A and B are
@@ -397,22 +450,28 @@ bool IsEdgeBNearEdgeA(const S2Point& a0, const S2Point& a1,
   //      cross a1.  B must be shorter than 2*tolerance and all points in B are
   //      close to one endpoint of A, and hence to A.
   //
-  // The logic applies when planar_angle is robustly greater than M_PI/2, but
-  // may be more computationally expensive than the logic beyond, so we choose a
-  // value close to M_PI.
-  if (planar_angle >= S1Angle::Radians(M_PI - 0.01)) {
-    return (S1Angle(b0, a0) < S1Angle(b0, a1)) ==
-        (S1Angle(b1, a0) < S1Angle(b1, a1));
+  // Note that this logic *must* be used when planar_angle >= Pi/2 because the
+  // code beyond does not handle the case where the maximum distance is
+  // attained at the interior point of B that is equidistant from the
+  // endpoints of A.  This happens when B intersects the perpendicular
+  // bisector of the endpoints of A in the hemisphere opposite A's midpoint.
+  if (planar_angle >= S1Angle::Radians(M_PI_2)) {
+    return ((S1Angle(b0, a0) < S1Angle(b0, a1)) ==
+            (S1Angle(b1, a0) < S1Angle(b1, a1)));
   }
 
-  // Finally, if either of the two points on circ(B) where circ(B) is furthest
-  // from circ(A) lie on edge B, edge B is not near edge A.
+  // Otherwise, if either of the two points on circ(B) where circ(B) is
+  // furthest from circ(A) lie on edge B, edge B is not near edge A.
   //
   // The normalized projection of a_ortho onto the plane of circ(B) is one of
   // the two points along circ(B) where it is furthest from circ(A).  The other
   // is -1 times the normalized projection.
-  S2Point furthest = (a_ortho - a_ortho.DotProd(b_ortho) * b_ortho).Normalize();
-  S2_DCHECK(S2::IsUnitLength(furthest));
+  //
+  // Note that the formula (A - (A.B) * B) loses accuracy when |A.B| ~= 1, so
+  // instead we compute it using two cross products.  (The first product does
+  // not need RobustCrossProd since its arguments are perpendicular.)
+  S2Point furthest = b_ortho.CrossProd(S2::RobustCrossProd(a_ortho, b_ortho))
+                     .Normalize();
   S2Point furthest_inv = -1 * furthest;
 
   // A point p lies on B if you can proceed from b_ortho to b0 to p to b1 and
diff --git a/src/s2/s2edge_distances.h b/src/s2/s2edge_distances.h
index 1294def0..a2f866a5 100644
--- a/src/s2/s2edge_distances.h
+++ b/src/s2/s2edge_distances.h
@@ -21,11 +21,17 @@
 #ifndef S2_S2EDGE_DISTANCES_H_
 #define S2_S2EDGE_DISTANCES_H_
 
+#include <cfloat>
 #include <utility>
 
+#include "s2/base/logging.h"
+#include "absl/base/macros.h"
 #include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
+#include "s2/s2edge_crossings.h"
 #include "s2/s2point.h"
+#include "s2/s2pointutil.h"
+#include "s2/s2predicates_internal.h"
 
 namespace S2 {
 
@@ -61,14 +67,14 @@ bool IsDistanceLess(const S2Point& x, const S2Point& a, const S2Point& b,
 // because (1) using S1ChordAngle is much faster than S1Angle, and (2) it
 // can save a lot of work by not actually computing the distance when it is
 // obviously larger than the current minimum.
-int UpdateMinDistance(const S2Point& x, const S2Point& a, const S2Point& b,
-                      S1ChordAngle* min_dist);
+bool UpdateMinDistance(const S2Point& x, const S2Point& a, const S2Point& b,
+                       S1ChordAngle* min_dist);
 
 // If the maximum distance from X to the edge AB is greater than "max_dist",
 // this method updates "max_dist" and returns true.  Otherwise it returns false.
 // The case A == B is handled correctly.
-int UpdateMaxDistance(const S2Point& x, const S2Point& a, const S2Point& b,
-                      S1ChordAngle* max_dist);
+bool UpdateMaxDistance(const S2Point& x, const S2Point& a, const S2Point& b,
+                       S1ChordAngle* max_dist);
 
 // Returns the maximum error in the result of UpdateMinDistance (and
 // associated functions such as UpdateMinInteriorDistance, IsDistanceLess,
@@ -83,11 +89,6 @@ int UpdateMaxDistance(const S2Point& x, const S2Point& a, const S2Point& b,
 // this error drops rapidly as the points move away from antipodality
 // (approximately 1 millimeter for points that are 50 meters from antipodal,
 // and 1 micrometer for points that are 50km from antipodal).
-//
-// TODO(ericv): Currently the error bound does not hold for edges whose
-// endpoints are antipodal to within about 1e-15 radians (less than 1 micron).
-// This could be fixed by extending S2::RobustCrossProd to use higher
-// precision when necessary.
 double GetUpdateMinDistanceMaxError(S1ChordAngle dist);
 
 // Returns true if the minimum distance from X to the edge AB is attained at
@@ -117,28 +118,94 @@ S2Point Project(const S2Point& x, const S2Point& a, const S2Point& b);
 S2Point Project(const S2Point& x, const S2Point& a, const S2Point& b,
                 const Vector3_d& a_cross_b);
 
+// kProjectPerpendicularError is an upper bound on the distance from the point
+// returned by Project() to the edge AB.  Note that it only bounds the error
+// perpendicular to the edge, not the error parallel to it.
+constexpr S1Angle kProjectPerpendicularError = S1Angle::Radians(
+    (2 + (2 / s2pred::kSqrt3)) * s2pred::DBL_ERR) + S2::kRobustCrossProdError;
 
 /////////////////////////////////////////////////////////////////////////////
 ///////////////         (point along edge) functions          ///////////////
 
-
 // Given a point X and an edge AB, returns the distance ratio AX / (AX + BX).
 // If X happens to be on the line segment AB, this is the fraction "t" such
-// that X == Interpolate(t, A, B).  Requires that A and B are distinct.
+// that X == Interpolate(A, B, t).  Requires that A and B are distinct.
 double GetDistanceFraction(const S2Point& x,
                            const S2Point& a, const S2Point& b);
 
 // Returns the point X along the line segment AB whose distance from A is the
-// given fraction "t" of the distance AB.  Does NOT require that "t" be
-// between 0 and 1.  Note that all distances are measured on the surface of
-// the sphere, so this is more complicated than just computing (1-t)*a + t*b
-// and normalizing the result.
-S2Point Interpolate(double t, const S2Point& a, const S2Point& b);
+// given fraction "t" of the distance AB (where "t" is not necessarily in the
+// range [0, 1]).  Note that all distances are measured on the surface of the
+// sphere, so this is more complicated than just computing (1-t)*a + t*b and
+// normalizing the result.
+//
+// Note that the line AB has a well-defined direction even when A and B are
+// antipodal or nearly so (see S2::RobustCrossProd).
+S2Point Interpolate(const S2Point& a, const S2Point& b, double t);
+
+// Returns the point at distance "r" from A along the line AB.
+//
+// Note that the line AB has a well-defined direction even when A and B are
+// antipodal or nearly so.  If A == B then an arbitrary direction is chosen.
+S2Point GetPointOnLine(const S2Point& a, const S2Point& b, S1Angle r);
+
+// Faster than the function above, but cannot accurately represent distances
+// near 180 degrees due to the limitations of S1ChordAngle.
+S2Point GetPointOnLine(const S2Point& a, const S2Point& b, S1ChordAngle r);
+
+// Returns S2Point to the left of the edge from `a` to `b` which
+// is distance 'r' away from `a` orthogonal to the specified edge.
+//   c (result)
+//   |
+//   |
+//   a --------> b
+S2Point GetPointToLeft(const S2Point& a, const S2Point& b, S1Angle r);
+
+// Faster than the function above due to the use of S1ChordAngle.
+S2Point GetPointToLeft(const S2Point& a, const S2Point& b, S1ChordAngle r);
+
+// Returns S2Point to the right of the edge from `a` to `b` which
+// is distance 'r' away from `a` orthogonal to the specified edge.
+//  a --------> b
+//  |
+//  |
+//  c (result)
+S2Point GetPointToRight(const S2Point& a, const S2Point& b, S1Angle r);
+
+// Faster than the function above due to the use of S1ChordAngle.
+S2Point GetPointToRight(const S2Point& a, const S2Point& b, S1ChordAngle r);
+
+// kGetPointOnLineError is an upper bound on the distance between the point
+// returned by GetPointOnLine() and the corresponding true infinite-precision
+// result.
+constexpr S1Angle kGetPointOnLineError = S1Angle::Radians(
+    (4 + (2 / s2pred::kSqrt3)) * s2pred::DBL_ERR) + S2::kRobustCrossProdError;
+
+// Returns the point at distance "r" along the ray with the given origin and
+// direction.  "dir" is required to be perpendicular to "origin" (since this
+// is how directions on the sphere are represented).
+//
+// This function is similar to S2::GetPointOnLine() except that (1) the first
+// two arguments are required to be perpendicular and (2) it is much faster.
+// It can be used as an alternative to repeatedly calling GetPointOnLine() by
+// computing "dir" as
+//
+//   S2Point dir = S2::RobustCrossProd(a, b).CrossProd(a).Normalize();
+//
+// REQUIRES: "origin" and "dir" are perpendicular to within the tolerance
+//           of the calculation above.
+S2Point GetPointOnRay(const S2Point& origin, const S2Point& dir, S1Angle r);
 
-// Like Interpolate(), except that the parameter "ax" represents the desired
-// distance from A to the result X rather than a fraction between 0 and 1.
-S2Point InterpolateAtDistance(S1Angle ax, const S2Point& a, const S2Point& b);
+// Faster than the function above, but cannot accurately represent distances
+// near 180 degrees due to the limitations of S1ChordAngle.
+S2Point GetPointOnRay(const S2Point& origin, const S2Point& dir,
+                      S1ChordAngle r);
 
+// kGetPointOnRayPerpendicularError is an upper bound on the distance from the
+// point returned by GetPointOnRay() to the ray itself.  Note that it only
+// bounds the error perpendicular to the ray, not the error parallel to it.
+constexpr S1Angle kGetPointOnRayPerpendicularError = S1Angle::Radians(
+    3 * s2pred::DBL_ERR);
 
 /////////////////////////////////////////////////////////////////////////////
 ///////////////            (edge, edge) functions             ///////////////
@@ -147,15 +214,15 @@ S2Point InterpolateAtDistance(S1Angle ax, const S2Point& a, const S2Point& b);
 // Like UpdateMinDistance(), but computes the minimum distance between the
 // given pair of edges.  (If the two edges cross, the distance is zero.)
 // The cases a0 == a1 and b0 == b1 are handled correctly.
-int UpdateEdgePairMinDistance(const S2Point& a0, const S2Point& a1,
-                              const S2Point& b0, const S2Point& b1,
-                              S1ChordAngle* min_dist);
+bool UpdateEdgePairMinDistance(const S2Point& a0, const S2Point& a1,
+                               const S2Point& b0, const S2Point& b1,
+                               S1ChordAngle* min_dist);
 
 // As above, but for maximum distances.  If one edge crosses the antipodal
 // reflection of the other, the distance is Pi.
-int UpdateEdgePairMaxDistance(const S2Point& a0, const S2Point& a1,
-                              const S2Point& b0, const S2Point& b1,
-                              S1ChordAngle* max_dist);
+bool UpdateEdgePairMaxDistance(const S2Point& a0, const S2Point& a1,
+                               const S2Point& b0, const S2Point& b1,
+                               S1ChordAngle* max_dist);
 
 // Returns the pair of points (a, b) that achieves the minimum distance
 // between edges a0a1 and b0b1, where "a" is a point on a0a1 and "b" is a
@@ -187,6 +254,35 @@ inline bool IsInteriorDistanceLess(const S2Point& x, const S2Point& a,
   return UpdateMinInteriorDistance(x, a, b, &limit);
 }
 
+inline S2Point GetPointOnRay(const S2Point& origin, const S2Point& dir,
+                             S1ChordAngle r) {
+  S2_DCHECK(S2::IsUnitLength(origin));
+  S2_DCHECK(S2::IsUnitLength(dir));
+  // The error bound below includes the error in computing the dot product.
+  S2_DCHECK_LE(origin.DotProd(dir),
+            S2::kRobustCrossProdError.radians() + 0.75 * DBL_EPSILON);
+
+  // Mathematically the result should already be unit length, but we normalize
+  // it anyway to ensure that the error is within acceptable bounds.
+  // (Otherwise errors can build up when the result of one interpolation is
+  // fed into another interpolation.)
+  //
+  // Note that it is much cheaper to compute the sine and cosine of an
+  // S1ChordAngle than an S1Angle.
+  return (cos(r) * origin + sin(r) * dir).Normalize();
+}
+
+inline S2Point GetPointOnRay(const S2Point& origin, const S2Point& dir,
+                             S1Angle r) {
+  // See comments above.
+  S2_DCHECK(S2::IsUnitLength(origin));
+  S2_DCHECK(S2::IsUnitLength(dir));
+  S2_DCHECK_LE(origin.DotProd(dir),
+            S2::kRobustCrossProdError.radians() + 0.75 * DBL_EPSILON);
+
+  return (cos(r) * origin + sin(r) * dir).Normalize();
+}
+
 }  // namespace S2
 
 #endif  // S2_S2EDGE_DISTANCES_H_
diff --git a/src/s2/s2edge_tessellator.cc b/src/s2/s2edge_tessellator.cc
index bbdc8a51..616aa2c0 100644
--- a/src/s2/s2edge_tessellator.cc
+++ b/src/s2/s2edge_tessellator.cc
@@ -17,10 +17,16 @@
 
 #include "s2/s2edge_tessellator.h"
 
-#include <cmath>
+#include <algorithm>
+#include <vector>
+
+#include "s2/r2.h"
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
 #include "s2/s2edge_distances.h"
-#include "s2/s2latlng.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
+#include "s2/s2projections.h"
 
 using std::vector;
 // Tessellation is implemented by subdividing the edge until the estimated
@@ -182,7 +188,7 @@ S1Angle S2EdgeTessellator::kMinTolerance() {
 S2EdgeTessellator::S2EdgeTessellator(const S2::Projection* projection,
                                      S1Angle tolerance)
     : proj_(*projection) {
-  if (tolerance < kMinTolerance()) S2_LOG(DFATAL) << "Tolerance too small";
+  if (tolerance < kMinTolerance()) S2_LOG(ERROR) << "Tolerance too small";
 
   // Rather than scaling the error estimate as described above, instead we scale
   // the tolerance.  See algorithm description at the top of this file.
@@ -201,8 +207,8 @@ S1ChordAngle S2EdgeTessellator::EstimateMaxError(
 
   constexpr double t1 = kInterpolationFraction;
   constexpr double t2 = 1 - kInterpolationFraction;
-  S2Point mid1 = S2::Interpolate(t1, a, b);
-  S2Point mid2 = S2::Interpolate(t2, a, b);
+  S2Point mid1 = S2::Interpolate(a, b, t1);
+  S2Point mid2 = S2::Interpolate(a, b, t2);
   S2Point pmid1 = proj_.Unproject(proj_.Interpolate(t1, pa, pb));
   S2Point pmid2 = proj_.Unproject(proj_.Interpolate(t2, pa, pb));
   return std::max(S1ChordAngle(mid1, pmid1), S1ChordAngle(mid2, pmid2));
diff --git a/src/s2/s2edge_tessellator.h b/src/s2/s2edge_tessellator.h
index bcc6ca32..39341ecd 100644
--- a/src/s2/s2edge_tessellator.h
+++ b/src/s2/s2edge_tessellator.h
@@ -19,7 +19,9 @@
 #define S2_S2EDGE_TESSELLATOR_H_
 
 #include <vector>
+
 #include "s2/r2.h"
+#include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s2point.h"
 #include "s2/s2projections.h"
diff --git a/src/s2/s2edge_vector_shape.h b/src/s2/s2edge_vector_shape.h
index cd50a893..7848f82f 100644
--- a/src/s2/s2edge_vector_shape.h
+++ b/src/s2/s2edge_vector_shape.h
@@ -20,6 +20,8 @@
 
 #include <utility>
 #include <vector>
+
+#include "s2/s2point.h"
 #include "s2/s2shape.h"
 
 // S2EdgeVectorShape is an S2Shape representing an arbitrary set of edges.  It
@@ -36,7 +38,11 @@
 class S2EdgeVectorShape : public S2Shape {
  public:
   // Constructs an empty edge vector.
-  S2EdgeVectorShape() {}
+  S2EdgeVectorShape() = default;
+
+  S2EdgeVectorShape(S2EdgeVectorShape&& other) = default;
+
+  S2EdgeVectorShape& operator=(S2EdgeVectorShape&& other) = default;
 
   // Constructs an S2EdgeVectorShape from a vector of edges.
   explicit S2EdgeVectorShape(std::vector<std::pair<S2Point, S2Point>> edges) {
diff --git a/src/s2/s2error.cc b/src/s2/s2error.cc
index 6c8beab9..88728f46 100644
--- a/src/s2/s2error.cc
+++ b/src/s2/s2error.cc
@@ -1,4 +1,4 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
+// Copyright Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -13,17 +13,137 @@
 // limitations under the License.
 //
 
-// Author: ericv@google.com (Eric Veach)
-
 #include "s2/s2error.h"
 
-#include "s2/base/stringprintf.h"
+#include <string>
+
+#include "absl/base/optimization.h"
+#include "absl/status/status.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+
+using absl::string_view;
+
+S2Error ToS2Error(const absl::Status& status) {
+  S2Error error;
+
+  if (status.ok()) {
+    return error;
+  }
+
+  const string_view message = status.message();
+
+  switch (status.code()) {
+    case absl::StatusCode::kCancelled:
+      error.Init(S2Error::CANCELLED, "%s", message);
+      break;
+
+    case absl::StatusCode::kInvalidArgument:
+      error.Init(S2Error::INVALID_ARGUMENT, "%s", message);
+      break;
+
+    case absl::StatusCode::kDataLoss:
+      error.Init(S2Error::DATA_LOSS, "%s", message);
+      break;
+
+    case absl::StatusCode::kResourceExhausted:
+      error.Init(S2Error::RESOURCE_EXHAUSTED, "%s", message);
+      break;
+
+    case absl::StatusCode::kFailedPrecondition:
+      error.Init(S2Error::FAILED_PRECONDITION, "%s", message);
+      break;
+
+    case absl::StatusCode::kOutOfRange:
+      error.Init(S2Error::OUT_OF_RANGE, "%s", message);
+      break;
+
+    case absl::StatusCode::kUnimplemented:
+      error.Init(S2Error::UNIMPLEMENTED, "%s", message);
+      break;
+
+    case absl::StatusCode::kInternal:
+      error.Init(S2Error::INTERNAL, "%s", message);
+      break;
+
+    default:
+      error.Init(S2Error::UNKNOWN, "%s", message);
+  }
+
+  return error;
+}
+
+absl::Status ToStatus(const S2Error& error) {
+  // Map user-defined error codes to `UnknownError`.  We do this outside of
+  // the `switch` so we can handle all cases explicitly without `default`.
+  if (S2Error::USER_DEFINED_START <= error.code() &&
+      error.code() <= S2Error::USER_DEFINED_END) {
+    return absl::UnknownError(
+        absl::StrCat(error.text(), " (", error.code(), ")"));
+  }
+
+  switch (static_cast<S2Error::Code>(error.code())) {
+    case S2Error::OK:
+      return absl::OkStatus();
+
+    case S2Error::CANCELLED:
+      return absl::CancelledError(error.text());
+
+    case S2Error::INVALID_ARGUMENT:
+      return absl::InvalidArgumentError(error.text());
+
+    case S2Error::DATA_LOSS:
+      return absl::DataLossError(error.text());
+
+    case S2Error::RESOURCE_EXHAUSTED:
+      return absl::ResourceExhaustedError(error.text());
+
+    case S2Error::FAILED_PRECONDITION:
+      return absl::FailedPreconditionError(error.text());
+
+    // Custom error space are deprecated and not open-sourced, so we map
+    // these to invalid argument errors.
+    case S2Error::NOT_CONTINUOUS:
+    case S2Error::INVALID_VERTEX:
+    case S2Error::INVALID_DIMENSION:
+    case S2Error::SPLIT_INTERIOR:
+    case S2Error::OVERLAPPING_GEOMETRY:
+    case S2Error::NOT_UNIT_LENGTH:
+    case S2Error::DUPLICATE_VERTICES:
+    case S2Error::ANTIPODAL_VERTICES:
+    case S2Error::LOOP_NOT_ENOUGH_VERTICES:
+    case S2Error::LOOP_SELF_INTERSECTION:
+    case S2Error::POLYGON_LOOPS_SHARE_EDGE:
+    case S2Error::POLYGON_LOOPS_CROSS:
+    case S2Error::POLYGON_EMPTY_LOOP:
+    case S2Error::POLYGON_EXCESS_FULL_LOOP:
+    case S2Error::POLYGON_INCONSISTENT_LOOP_ORIENTATIONS:
+    case S2Error::POLYGON_INVALID_LOOP_DEPTH:
+    case S2Error::POLYGON_INVALID_LOOP_NESTING:
+    case S2Error::BUILDER_SNAP_RADIUS_TOO_SMALL:
+    case S2Error::BUILDER_MISSING_EXPECTED_SIBLING_EDGES:
+    case S2Error::BUILDER_UNEXPECTED_DEGENERATE_EDGE:
+    case S2Error::BUILDER_EDGES_DO_NOT_FORM_LOOPS:
+    case S2Error::BUILDER_EDGES_DO_NOT_FORM_POLYLINE:
+    case S2Error::BUILDER_IS_FULL_PREDICATE_NOT_SPECIFIED:
+      return absl::InvalidArgumentError(
+          absl::StrCat(error.text(), " (", error.code(), ")"));
+
+    case S2Error::OUT_OF_RANGE:
+      return absl::OutOfRangeError(error.text());
+
+    case S2Error::UNIMPLEMENTED:
+      return absl::UnimplementedError(error.text());
+
+    case S2Error::INTERNAL:
+      return absl::InternalError(error.text());
 
-void S2Error::Init(Code code, const char* format, ...) {
-  code_ = code;
-  text_.clear();
-  va_list ap;
-  va_start(ap, format);
-  StringAppendV(&text_, format, ap);
-  va_end(ap);
+    case S2Error::UNKNOWN:
+    case S2Error::USER_DEFINED_START:
+    case S2Error::USER_DEFINED_END:
+      // USER_DEFINED_START/END can't happen here; they're handled above.
+      return absl::UnknownError(
+          absl::StrCat(error.text(), " (", error.code(), ")"));
+  }
+  ABSL_UNREACHABLE();
 }
diff --git a/src/s2/s2error.h b/src/s2/s2error.h
index 41449632..511b1237 100644
--- a/src/s2/s2error.h
+++ b/src/s2/s2error.h
@@ -25,6 +25,10 @@
 #include <ostream>
 #include <string>
 
+#include "absl/base/attributes.h"
+#include "absl/status/status.h"
+#include "absl/strings/str_format.h"
+
 #include "s2/base/port.h"
 
 // This class is intended to be copied by value as desired.  It uses
@@ -32,7 +36,7 @@
 class S2Error {
  public:
   enum Code {
-    OK = 0,                  // No error.
+    OK = 0,  // No error.
 
     ////////////////////////////////////////////////////////////////////
     // Generic errors, not specific to geometric objects:
@@ -45,12 +49,13 @@ class S2Error {
     INTERNAL = 1005,             // An internal invariant has failed.
     DATA_LOSS = 1006,            // Data loss or corruption.
     RESOURCE_EXHAUSTED = 1007,   // A resource has been exhausted.
+    CANCELLED = 1008,            // Operation was cancelled.
 
     ////////////////////////////////////////////////////////////////////
     // Error codes in the following range can be defined by clients:
 
     USER_DEFINED_START = 1000000,
-    USER_DEFINED_END   = 9999999,
+    USER_DEFINED_END = 9999999,
 
     ////////////////////////////////////////////////////////////////////
     // Errors that apply to more than one type of geometry:
@@ -58,6 +63,8 @@ class S2Error {
     NOT_UNIT_LENGTH = 1,     // Vertex is not unit length.
     DUPLICATE_VERTICES = 2,  // There are two identical vertices.
     ANTIPODAL_VERTICES = 3,  // There are two antipodal vertices.
+    NOT_CONTINUOUS = 4,      // Edges of a chain aren't continuous.
+    INVALID_VERTEX = 5,      // Vertex has value that's inf or NaN.
 
     ////////////////////////////////////////////////////////////////////
     // S2Loop errors:
@@ -66,14 +73,15 @@ class S2Error {
     LOOP_SELF_INTERSECTION = 101,    // Loop has a self-intersection.
 
     ////////////////////////////////////////////////////////////////////
-    // S2Polygon errors:
+    // S2Polygon/S2Shape errors:
 
     POLYGON_LOOPS_SHARE_EDGE = 200,  // Two polygon loops share an edge.
     POLYGON_LOOPS_CROSS = 201,       // Two polygon loops cross.
     POLYGON_EMPTY_LOOP = 202,        // Polygon has an empty loop.
     POLYGON_EXCESS_FULL_LOOP = 203,  // Non-full polygon has a full loop.
 
-    // InitOriented() was called and detected inconsistent loop orientations.
+    // Inconsistent loop orientations were detected, indicating that the
+    // interior is not on the left of all edges.
     POLYGON_INCONSISTENT_LOOP_ORIENTATIONS = 204,
 
     // Loop depths don't correspond to any valid nesting hierarchy.
@@ -83,6 +91,10 @@ class S2Error {
     // encoded by the loop depths.
     POLYGON_INVALID_LOOP_NESTING = 206,
 
+    INVALID_DIMENSION = 207,     // Shape dimension isn't valid.
+    SPLIT_INTERIOR = 208,        // Interior split by holes.
+    OVERLAPPING_GEOMETRY = 209,  // Geometry overlaps where it shouldn't
+
     ////////////////////////////////////////////////////////////////////
     // S2Builder errors:
 
@@ -113,11 +125,23 @@ class S2Error {
   };
   S2Error() : code_(OK), text_() {}
 
+  // Convenience constructor that calls Init().
+  template <typename... Args>
+  S2Error(Code code, const absl::FormatSpec<Args...>& format,
+          const Args&... args) {
+    Init(code, format, args...);
+  }
+
   // Set the error to the given code and printf-style message.  Note that you
   // can prepend text to an existing error by calling Init() more than once:
   //
   //   error->Init(error->code(), "Loop %d: %s", j, error->text().c_str());
-  void Init(Code code, const char* format, ...) ABSL_PRINTF_ATTRIBUTE(3, 4);
+  template <typename... Args>
+  void Init(Code code, const absl::FormatSpec<Args...>& format,
+            const Args&... args) {
+    code_ = code;
+    text_ = absl::StrFormat(format, args...);
+  }
 
   bool ok() const { return code_ == OK; }
   Code code() const { return code_; }
@@ -131,6 +155,31 @@ class S2Error {
   std::string text_;
 };
 
+// Converts an absl::Status object into an S2Error.
+//
+// absl::Status codes with no exact equivalent in S2Error::Code are converted to
+// S2Error::UNKNOWN.
+ABSL_MUST_USE_RESULT S2Error ToS2Error(const absl::Status& status);
+
+// Converts an S2Error into an absl::Status.
+//
+// Custom error space are deprecated,
+// so S2Error codes with no exact equivalent in absl::StatusCode are converted
+// as follows:
+//
+//   - errors related to S2Loop, S2Polygon, and S2Builder, as well as those that
+//     apply to more than one type of geometry are mapped to
+//     absl::StatusCode::kInvalidArgument.
+//
+//   - client-defined errors (those in the [USER_DEFINED_START,USER_DEFINED_END]
+//     range), UNKNOWN errors, and all other errors not covered above are mapped
+//     to absl::StatusCode::kUnknown
+//
+// The above mapping significantly reduces the set of expressible errors.
+// Clients reacting to specific S2 errors (e.g. polygon validation errors)
+// should continue to use S2Error for its richer error API.
+absl::Status ToStatus(const S2Error& error);
+
 
 //////////////////   Implementation details follow   ////////////////////
 
diff --git a/src/s2/s2furthest_edge_query.cc b/src/s2/s2furthest_edge_query.cc
index 5b7b3ecd..0d338c97 100644
--- a/src/s2/s2furthest_edge_query.cc
+++ b/src/s2/s2furthest_edge_query.cc
@@ -17,8 +17,13 @@
 
 #include <vector>
 
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2closest_edge_query_base.h"
 #include "s2/s2edge_distances.h"
 
+using std::vector;
+
 void S2FurthestEdgeQuery::Options::set_conservative_min_distance(
     S1ChordAngle min_distance) {
   set_max_distance(Distance(min_distance.PlusError(
@@ -70,7 +75,7 @@ S2FurthestEdgeQuery::~S2FurthestEdgeQuery() {
 }
 
 void S2FurthestEdgeQuery::FindFurthestEdges(
-    Target* target, std::vector<S2FurthestEdgeQuery::Result>* results) {
+    Target* target, vector<S2FurthestEdgeQuery::Result>* results) {
   results->clear();
   for (auto result : base_.FindClosestEdges(target, options_)) {
     results->push_back(S2FurthestEdgeQuery::Result(result));
diff --git a/src/s2/s2furthest_edge_query.h b/src/s2/s2furthest_edge_query.h
index 8a500244..6740c913 100644
--- a/src/s2/s2furthest_edge_query.h
+++ b/src/s2/s2furthest_edge_query.h
@@ -21,6 +21,7 @@
 #include <type_traits>
 #include <vector>
 
+#include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "absl/base/macros.h"
 #include "absl/container/inlined_vector.h"
@@ -30,8 +31,11 @@
 #include "s2/s2cell.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2closest_edge_query_base.h"
+#include "s2/s2distance_target.h"
 #include "s2/s2edge_distances.h"
 #include "s2/s2max_distance_targets.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 
 // S2FurthestEdgeQuery is a helper class for searching within an S2ShapeIndex
@@ -51,15 +55,17 @@
 //   query.mutable_options()->set_max_results(5);
 //   S2FurthestEdgeQuery::PointTarget target(point);
 //   for (const auto& result : query.FindFurthestEdges(&target)) {
-//     // The Result struct contains the following accessors:
-//     //   "distance()" is the distance to the edge.
-//     //   "shape_id()" identifies the S2Shape containing the edge.
-//     //   "edge_id()" identifies the edge with the given shape.
-//     // The following convenience methods may also be useful:
+//     // The Result object contains the following accessors:
+//     //   distance() is the distance to the edge.
+//     //   shape_id() identifies the S2Shape containing the edge.
+//     //   edge_id() identifies the edge with the given shape.
+//     //   is_interior() indicates that the result is an interior point.
+//     //
+//     // The following convenience method may also be useful:
 //     //   query.GetEdge(result) returns the endpoints of the edge.
 //     int polyline_index = result.shape_id();
 //     int edge_index = result.edge_id();
-//     S1ChordAngle distance = result.distance();
+//     S1ChordAngle distance = result.distance();  // Can convert to S1Angle.
 //     S2Shape::Edge edge = query.GetEdge(result);
 //   }
 // }
@@ -107,6 +113,7 @@ class S2FurthestEdgeQuery {
   // Distance concept required by S2ClosestEdgeQueryBase.  It inverts the sense
   // of some methods to enable the closest edge query to return furthest
   // edges.
+  //
   // Distance and Base are made private to prevent leakage outside of this
   // class.  The private and public sections are interleaved since the public
   // options class needs the private Base class.
@@ -201,9 +208,18 @@ class S2FurthestEdgeQuery {
     int max_brute_force_index_size() const override;
   };
 
-  // Result class to pass back to user.  We choose to pass back this result
-  // type, which has an S1ChordAngle as its distance, rather than the
-  // Base::Result returned from the query which uses S2MaxDistance.
+  // Each "Result" object represents a furthest edge.  We choose to pass back
+  // this result type, which has an S1ChordAngle as its distance, rather than
+  // the Base::Result returned from the query which uses S2MaxDistance.  Note
+  // the following special cases:
+  //
+  //  - (shape_id() >= 0) && (edge_id() < 0) represents the interior of a shape.
+  //    Such results may be returned when options.include_interiors() is true.
+  //    Such results can be identified using the is_interior() method.
+  //
+  //  - (shape_id() < 0) && (edge_id() < 0) is returned by FindFurthestEdge()
+  //    to indicate that no edge satisfies the given query options.  Such
+  //    results can be identified using is_empty() method.
   class Result {
    public:
     // The default constructor, which yields an invalid result.
@@ -218,12 +234,6 @@ class S2FurthestEdgeQuery {
     Result(S1ChordAngle distance, int32 _shape_id, int32 _edge_id)
         : distance_(distance), shape_id_(_shape_id), edge_id_(_edge_id) {}
 
-    // Returns true if this Result object does not refer to any edge.
-    // (The only case where an empty Result is returned is when the
-    // FindFurthestEdges() method does not find any edges that meet the
-    // specified criteria.)
-    bool is_empty() const { return edge_id_ < 0; }
-
     // The distance from the target to this point.
     S1ChordAngle distance() const { return distance_; }
 
@@ -231,6 +241,16 @@ class S2FurthestEdgeQuery {
     int32 shape_id() const { return shape_id_; }
     int32 edge_id() const { return edge_id_; }
 
+    // Returns true if this Result object represents the interior of a shape.
+    // (Such results may be returned when options.include_interiors() is true.)
+    bool is_interior() const { return shape_id_ >= 0 && edge_id_ < 0; }
+
+    // Returns true if this Result object indicates that no edge satisfies the
+    // given query options.  (This result is only returned in one special
+    // case, namely when FindFurthestEdge() does not find any suitable edges.
+    // It is never returned by methods that return a vector of results.)
+    bool is_empty() const { return shape_id_ < 0; }
+
     // Returns true if two Result objects are identical.
     friend bool operator==(const Result& x, const Result& y) {
       return (x.distance_ == y.distance_ &&
@@ -249,8 +269,8 @@ class S2FurthestEdgeQuery {
 
    private:
     S1ChordAngle distance_;
-    int32 shape_id_;     // Identifies an indexed shape.
-    int32 edge_id_;      // Identifies an edge within the shape.
+    int32 shape_id_;  // Identifies an indexed shape.
+    int32 edge_id_;   // Identifies an edge within the shape.
   };
 
   // Convenience constructor that calls Init().  Options may be specified here
@@ -285,6 +305,12 @@ class S2FurthestEdgeQuery {
 
   // Returns the furthest edges to the given target that satisfy the given
   // options.  This method may be called multiple times.
+  //
+  // Note that if options().include_interiors() is true, the result vector may
+  // include some entries with edge_id == -1.  This indicates that the
+  // furthest distance is attained at a point in the interior of the indexed
+  // polygon with the given shape_id.  Such results may be identifed by
+  // calling Result::is_interior().
   std::vector<Result> FindFurthestEdges(Target* target);
 
   // This version can be more efficient when this method is called many times,
@@ -294,8 +320,11 @@ class S2FurthestEdgeQuery {
   //////////////////////// Convenience Methods ////////////////////////
 
   // Returns the furthest edge to the target.  If no edge satisfies the search
-  // criteria, then the Result object will have
-  // distance == S1ChordAngle::Negative() and shape_id == edge_id == -1.
+  // criteria, then the result object's is_empty() method will be true.
+  //
+  // Note that if options.include_interiors() is true, Result::is_interior()
+  // should be called to check whether the result represents an interior point
+  // (in which case edge_id() == -1).
   Result FindFurthestEdge(Target* target);
 
   // Returns the maximum distance to the target.  If the index or target is
@@ -324,7 +353,7 @@ class S2FurthestEdgeQuery {
                                             S1ChordAngle limit);
 
   // Returns the endpoints of the given result edge.
-  // REQUIRES: result.edge_id >= 0
+  // REQUIRES: !result.is_interior()
   S2Shape::Edge GetEdge(const Result& result) const;
 
  private:
@@ -335,8 +364,7 @@ class S2FurthestEdgeQuery {
 
 //////////////////   Implementation details follow   ////////////////////
 
-inline S2FurthestEdgeQuery::Options::Options() {
-}
+inline S2FurthestEdgeQuery::Options::Options() = default;
 
 inline S1ChordAngle S2FurthestEdgeQuery::Options::min_distance() const {
   return S1ChordAngle(max_distance());
diff --git a/src/s2/s2hausdorff_distance_query.cc b/src/s2/s2hausdorff_distance_query.cc
new file mode 100644
index 00000000..d66d5b3b
--- /dev/null
+++ b/src/s2/s2hausdorff_distance_query.cc
@@ -0,0 +1,124 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "s2/s2hausdorff_distance_query.h"
+
+#include "absl/types/optional.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2closest_edge_query.h"
+#include "s2/s2point.h"
+#include "s2/s2predicates.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+
+using DirectedResult = S2HausdorffDistanceQuery::DirectedResult;
+using Options = S2HausdorffDistanceQuery::Options;
+using Result = S2HausdorffDistanceQuery::Result;
+
+namespace {
+// This internally used function computes the closest edge distance from point
+// to the source index via closest_edge_query, and, if necessary, updates the
+// max_distance, the target_point and the source_point.
+void UpdateMaxDistance(const S2Point& point,
+                       S2ClosestEdgeQuery& closest_edge_query,
+                       S1ChordAngle& max_distance, S2Point& target_point,
+                       S2Point& source_point) {
+  // In case we already have a valid result, it can be used as the lower
+  // bound estimate for the final Hausdorff distance. Therefore, if the
+  // distance between the current target point and the last source point
+  // does not exceed this lower bound, we can safely skip this target point, not
+  // updating the maximum distance.
+  if (!max_distance.is_negative() &&
+      s2pred::CompareDistance(point, source_point, max_distance) <= 0) {
+    return;
+  }
+
+  // Find the closest edge and the closest point in the source geometry
+  // to the target point.
+  S2ClosestEdgeQuery::PointTarget target(point);
+  const S2ClosestEdgeQuery::Result closest_edge =
+      closest_edge_query.FindClosestEdge(&target);
+  if (!closest_edge.is_empty() && max_distance < closest_edge.distance()) {
+    max_distance = closest_edge.distance();
+    target_point = point;
+    source_point = closest_edge_query.Project(point, closest_edge);
+  }
+}
+}  // namespace
+
+S2HausdorffDistanceQuery::S2HausdorffDistanceQuery(
+    const S2HausdorffDistanceQuery::Options& options) {
+  Init(options);
+}
+
+void S2HausdorffDistanceQuery::Init(
+    const S2HausdorffDistanceQuery::Options& options) {
+  options_ = options;
+}
+
+S1ChordAngle S2HausdorffDistanceQuery::GetDistance(
+    const S2ShapeIndex* target, const S2ShapeIndex* source) const {
+  absl::optional<Result> result = GetResult(target, source);
+  return result.has_value() ? result->distance() : S1ChordAngle::Infinity();
+}
+
+absl::optional<Result> S2HausdorffDistanceQuery::GetResult(
+    const S2ShapeIndex* target, const S2ShapeIndex* source) const {
+  absl::optional<DirectedResult> target_to_source =
+      GetDirectedResult(target, source);
+  if (target_to_source) {
+    return Result(*target_to_source, *GetDirectedResult(source, target));
+  } else {
+    return absl::nullopt;
+  }
+}
+
+S1ChordAngle S2HausdorffDistanceQuery::GetDirectedDistance(
+    const S2ShapeIndex* target, const S2ShapeIndex* source) const {
+  absl::optional<DirectedResult> directed_result =
+      GetDirectedResult(target, source);
+  return directed_result.has_value() ? directed_result->distance()
+                                     : S1ChordAngle::Infinity();
+}
+
+absl::optional<DirectedResult> S2HausdorffDistanceQuery::GetDirectedResult(
+    const S2ShapeIndex* target, const S2ShapeIndex* source) const {
+  S2ClosestEdgeQuery closest_edge_query(source);
+  closest_edge_query.mutable_options()->set_max_results(1);
+  closest_edge_query.mutable_options()->set_include_interiors(
+      options_.include_interiors());
+  S1ChordAngle max_distance = S1ChordAngle::Negative();
+  S2Point source_point, target_point;
+
+  // This approximation of Haussdorff distance is based on computing closest
+  // point distances from the _vertices_ of the target index to _edges_ of the
+  // source index.  Hence we iterate over all shapes in the target index, then
+  // over all chains in those shapes, then over all edges in those chains, and
+  // then over the edges' vertices.
+  for (const S2Shape* shape : *target) {
+    for (auto chain : shape->chains()) {
+      for (const S2Point& vertex : shape->vertices(chain)) {
+        UpdateMaxDistance(vertex, closest_edge_query, max_distance,
+                          target_point, source_point);
+      }
+    }
+  }
+
+  if (max_distance.is_negative()) {
+    return absl::nullopt;
+  } else {
+    return DirectedResult(max_distance, target_point);
+  }
+}
diff --git a/src/s2/s2hausdorff_distance_query.h b/src/s2/s2hausdorff_distance_query.h
new file mode 100644
index 00000000..d0d67532
--- /dev/null
+++ b/src/s2/s2hausdorff_distance_query.h
@@ -0,0 +1,210 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+
+#ifndef S2_S2HAUSDORFF_DISTANCE_QUERY_H_
+#define S2_S2HAUSDORFF_DISTANCE_QUERY_H_
+
+#include <algorithm>
+
+#include "absl/types/optional.h"
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2min_distance_targets.h"
+#include "s2/s2point.h"
+#include "s2/s2shape_index.h"
+
+// S2HausdorffDistanceQuery is a helper class for computing discrete Hausdorff
+// distances between two geometries. This can be useful for e.g. computing how
+// far apart a highway and a semi-parallel frontage road ever get.
+//
+// Both geometries are provided as S2ShapeIndex objects. A S2ShapeIndex is a
+// collection of points, polylines, and/or polygons. See s2shape_index.h for
+// details.
+//
+// Discrete directed Hausdorff distance from target geometry A to source
+// geometry B is defined as the maximum, over all vertices of A, of the closest
+// edge distance from the vertex to geometry B.  It is called _discrete_ because
+// the maximum is computed over all _vertices_ of A, rather than over other
+// positions such as midpoints of the edges. The current implementation computes
+// the discrete Hausdorff distance instead of exact Hausdorff distance because
+// the latter incurs significantly larger computational expenses, while the
+// former is suitable for most practical use cases.
+//
+// The undirected Hausdorff distance (usually referred to more simply as just
+// Hausdorff distance) between geometries A and B is the maximum of the directed
+// Hausdorff distances from A to B, and from B to A.
+//
+// The difference between directed and undirected Hausdorff distances can be
+// illustrated by the following example. Let's say we have two polygonal
+// geometries, one representing the continental contiguous United States, and
+// the other Catalina island off the coast of California. The directed
+// Hausdorff distance from the island to the continental US is going to be about
+// 60 km - this is how far the furthest point on the island gets from the
+// continent. At the same time, the directed Hausdorff distance from the
+// continental US to Catalina island is several thousand kilometers - this is
+// how far from the island one can get at the North-East corner of the US. The
+// undirected Hausdorff distance between these two entities is the maximum of
+// the two directed distances, that is a few thousand kilometers.
+//
+// For example, given two geometries A and B in the form of S2 shape indexes,
+// the following code finds the directed Hausdorff distance from A to B and the
+// [undirected] Hausdorff distance between A and B:
+//
+// bool ComputeHausdorffDistances(const S2ShapeIndex* A, const S2ShapeIndex* B,
+//                                S1ChordAngle& directed_distance,
+//                                S1ChordAngle& undirected_distance) {
+//   S2HausdorffDistanceQuery query(S2HausdorffDistanceQuery::Options());
+//
+//   absl::optional<DirectedResult> directed_result =
+//                  query.GetDirectedHausdorffDistance(A, B);
+//   if (!directed_result) {
+//     return false;
+//   }
+//   directed_distance = directed_result->distance();
+//
+//   absl::optional<Result> undirected_result =
+//                  query.GetHausdorffDistance(A, B);
+//   undirected_distance = undirected_result->distance();
+//
+//   return true;
+// }
+//
+// For the definition of Hausdorff distance and other details see
+// https://en.wikipedia.org/wiki/Hausdorff_distance.
+//
+class S2HausdorffDistanceQuery {
+ public:
+  // Options for S2HausdorffDistanceQuery.
+  class Options {
+   public:
+    Options();
+
+    // The include_interiors flag (true by default) indicates that the distance
+    // should be computed not just to polygon boundaries of the source index,
+    // but to polygon interiors as well. For example, if target shape A is fully
+    // contained inside the source shape B, and include_interiors is set to
+    // true, then the directed Hausdorff distance from A to B is going to be
+    // zero.
+    bool include_interiors() const { return include_interiors_; }
+
+    void set_include_interiors(bool include_interiors) {
+      include_interiors_ = include_interiors;
+    }
+
+   private:
+    bool include_interiors_ = true;
+  };
+
+  // DirectedResult stores the results of directed Hausdorff distance queries
+  class DirectedResult {
+   public:
+    // Initializes this instance of DirectedResult with given values.
+    DirectedResult(const S1ChordAngle& distance, const S2Point& target_point)
+        : distance_(distance), target_point_(target_point) {}
+
+    // Returns the resulting directed Hausdorff distance value.
+    S1ChordAngle distance() const { return distance_; }
+
+    // Returns the point on the target index on which the directed Hausdorff
+    // distance is achieved.
+    const S2Point& target_point() const { return target_point_; }
+
+   private:
+    S1ChordAngle distance_;
+    S2Point target_point_;
+  };
+
+  // Result stores the output of [undirected] Hausdorff distance query. It
+  // consists of two directed query results, forward and reverse.
+  class Result {
+   public:
+    // Initializes the Result with the directed result of the forward query
+    // (target index to source index) and that of the reverse (source index
+    // to target index) query.
+    Result(const DirectedResult& target_to_source,
+           const DirectedResult& source_to_target)
+        : target_to_source_(target_to_source),
+          source_to_target_(source_to_target) {}
+
+    // Returns the actual Hausdorff distance, which is the maximum of the two
+    // directed query results.
+    S1ChordAngle distance() const {
+      return std::max(target_to_source_.distance(),
+                      source_to_target_.distance());
+    }
+
+    // Returns the const reference to the result for the target-to-source
+    // directed Hausdorff distance call.
+    const DirectedResult& target_to_source() const { return target_to_source_; }
+
+    // Returns the const reference to the result for the source-to-target
+    // directed Hausdorff distance call.
+    const DirectedResult& source_to_target() const { return source_to_target_; }
+
+   private:
+    DirectedResult target_to_source_;
+    DirectedResult source_to_target_;
+  };
+
+  // This constructor initializes the query with the query options.
+  explicit S2HausdorffDistanceQuery(const Options& options = Options());
+
+  // Initializes the query with the options.
+  void Init(const Options& options = Options());
+
+  // Accessors for the query options.
+  const Options& options() const { return options_; }
+  Options* mutable_options() { return &options_; }
+
+  // Compute directed Hausdorff distance from the target index to the source
+  // index.  Returns nullopt iff at least one of the shape indexes is empty.
+  //
+  // Note that directed Hausdorff distance from geometry A (as target) to
+  // geometry B (as source) is not (in general case) equal to that from B (as
+  // target) to A (as source).
+  absl::optional<DirectedResult> GetDirectedResult(
+      const S2ShapeIndex* target, const S2ShapeIndex* source) const;
+
+  // Same as the above method, only returns the actual distance, or
+  // S1ChordAngle::Infinity() iff at least one of the shape indexes is empty.
+  S1ChordAngle GetDirectedDistance(const S2ShapeIndex* target,
+                                   const S2ShapeIndex* source) const;
+
+  // Compute the [undirected] Hausdorff distance between the target index
+  // and the source index.  Returns nullopt iff at least one of the shape
+  // indexes is empty.
+  //
+  // Note that the result of this query is symmetrical with respect to target
+  // vs. source, i.e. if target and source indices are swapped, the
+  // resulting Hausdorff distance remains unchanged.
+  absl::optional<Result> GetResult(const S2ShapeIndex* target,
+                                   const S2ShapeIndex* source) const;
+
+  // Same as the above method, but only returns the maximum of forward and
+  // reverse distances, or S1ChordAngle::Infinity() iff at least one of the
+  // shape indexes is empty.
+  S1ChordAngle GetDistance(const S2ShapeIndex* target,
+                           const S2ShapeIndex* source) const;
+
+ private:
+  Options options_;
+};
+
+//////////////////   Implementation details follow   ////////////////////
+
+inline S2HausdorffDistanceQuery::Options::Options() = default;
+
+#endif  // S2_S2HAUSDORFF_DISTANCE_QUERY_H_
diff --git a/src/s2/s2latlng.cc b/src/s2/s2latlng.cc
index 693079b9..f649657a 100644
--- a/src/s2/s2latlng.cc
+++ b/src/s2/s2latlng.cc
@@ -17,14 +17,40 @@
 
 #include "s2/s2latlng.h"
 
+#include <cmath>
+
 #include <algorithm>
 #include <ostream>
+#include <string>
 
-#include "s2/base/logging.h"
-#include "s2/base/stringprintf.h"
+#include "absl/strings/str_format.h"
+#include "s2/util/coding/coder.h"
+#include "s2/r2.h"
+#include "s2/s1angle.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
 
 using std::max;
 using std::min;
+using std::string;
+
+void S2LatLng::Encode(Encoder* encoder) const {
+  encoder->Ensure(2 * sizeof(double));
+  encoder->putdouble(coords_.x());
+  encoder->putdouble(coords_.y());
+}
+
+bool S2LatLng::Init(Decoder* decoder, S2Error& error) {
+  if (decoder->avail() < 2 * sizeof(double)) {
+    error.Init(S2Error::DATA_LOSS, "Insufficient data to decode");
+    return false;
+  }
+
+  double lat = decoder->getdouble();
+  double lon = decoder->getdouble();
+  *this = S2LatLng(R2Point(lat, lon));
+  return true;
+}
 
 S2LatLng S2LatLng::Normalized() const {
   // remainder(x, 2 * M_PI) reduces its argument to the range [-M_PI, M_PI]
@@ -76,13 +102,9 @@ S1Angle S2LatLng::GetDistance(const S2LatLng& o) const {
   return S1Angle::Radians(2 * asin(sqrt(min(1.0, x))));
 }
 
-std::string S2LatLng::ToStringInDegrees() const {
+string S2LatLng::ToStringInDegrees() const {
   S2LatLng pt = Normalized();
-  return StringPrintf("%f,%f", pt.lat().degrees(), pt.lng().degrees());
-}
-
-void S2LatLng::ToStringInDegrees(std::string* s) const {
-  *s = ToStringInDegrees();
+  return absl::StrFormat("%f,%f", pt.lat().degrees(), pt.lng().degrees());
 }
 
 std::ostream& operator<<(std::ostream& os, const S2LatLng& ll) {
diff --git a/src/s2/s2latlng.h b/src/s2/s2latlng.h
index c4493354..8a2390e3 100644
--- a/src/s2/s2latlng.h
+++ b/src/s2/s2latlng.h
@@ -22,11 +22,18 @@
 #include <iosfwd>
 #include <ostream>
 #include <string>
+#include <utility>
+
+#include "absl/hash/hash.h"
 
 #include "s2/base/integral_types.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/r2.h"
 #include "s2/s1angle.h"
+#include "s2/s2coder.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
+#include "s2/util/coding/coder.h"
 #include "s2/util/math/vector.h"
 
 // This class represents a point on the unit sphere as a pair
@@ -39,34 +46,46 @@
 // the default copy constructor and assignment operator.
 class S2LatLng {
  public:
+  typedef s2coding::S2BasicCoder<S2LatLng> Coder;
+
   // Constructor.  The latitude and longitude are allowed to be outside
   // the is_valid() range.  However, note that most methods that accept
   // S2LatLngs expect them to be normalized (see Normalized() below).
-  S2LatLng(S1Angle lat, S1Angle lng);
+  constexpr S2LatLng(S1Angle lat, S1Angle lng);
 
   // The default constructor sets the latitude and longitude to zero.  This is
   // mainly useful when declaring arrays, STL containers, etc.
-  S2LatLng();
+  constexpr S2LatLng();
 
   // Convert a direction vector (not necessarily unit length) to an S2LatLng.
   explicit S2LatLng(const S2Point& p);
 
+  // Decodes an S2LatLng, returning true on success.  Populates error
+  // on failure.
+  bool Init(Decoder* decoder, S2Error& error);
+
   // Returns an S2LatLng for which is_valid() will return false.
-  static S2LatLng Invalid();
+  static constexpr S2LatLng Invalid();
 
   // Convenience functions -- shorter than calling S1Angle::Radians(), etc.
-  static S2LatLng FromRadians(double lat_radians, double lng_radians);
-  static S2LatLng FromDegrees(double lat_degrees, double lng_degrees);
-  static S2LatLng FromE5(int32 lat_e5, int32 lng_e5);
-  static S2LatLng FromE6(int32 lat_e6, int32 lng_e6);
-  static S2LatLng FromE7(int32 lat_e7, int32 lng_e7);
+  static constexpr S2LatLng FromRadians(double lat_radians, double lng_radians);
+  static constexpr S2LatLng FromDegrees(double lat_degrees, double lng_degrees);
+  static constexpr S2LatLng FromE5(int32 lat_e5, int32 lng_e5);
+  static constexpr S2LatLng FromE6(int32 lat_e6, int32 lng_e6);
+  static constexpr S2LatLng FromE7(int32 lat_e7, int32 lng_e7);
+
+  // Appends an encoded representation of the S2LatLng to "encoder".
+  //
+  // REQUIRES: "encoder" uses the default constructor, so that its buffer can be
+  //           enlarged as necessary by calling Ensure(int).
+  void Encode(Encoder* encoder) const;
 
   // Convenience functions -- to use when args have been fixed32s in protos.
   //
   // The arguments are static_cast into int32, so very large unsigned values
   // are treated as negative numbers.
-  static S2LatLng FromUnsignedE6(uint32 lat_e6, uint32 lng_e6);
-  static S2LatLng FromUnsignedE7(uint32 lat_e7, uint32 lng_e7);
+  static constexpr S2LatLng FromUnsignedE6(uint32 lat_e6, uint32 lng_e6);
+  static constexpr S2LatLng FromUnsignedE7(uint32 lat_e7, uint32 lng_e7);
 
   // Methods to compute the latitude and longitude of a point separately.
   static S1Angle Latitude(const S2Point& p);
@@ -86,7 +105,12 @@ class S2LatLng {
   // the range [-180, 180].
   S2LatLng Normalized() const;
 
-  // Converts a normalized S2LatLng to the equivalent unit-length vector.
+  // Converts an S2LatLng to the equivalent unit-length vector.  Unnormalized
+  // values (see Normalize()) are wrapped around the sphere as would be expected
+  // based on their definition as spherical angles.  So for example the
+  // following pairs yield equivalent points (modulo numerical error):
+  //     (90.5, 10) =~ (89.5, -170)
+  //     (a, b) =~ (a + 360 * n, b)
   // The maximum error in the result is 1.5 * DBL_EPSILON.  (This does not
   // include the error of converting degrees, E5, E6, or E7 to radians.)
   //
@@ -123,6 +147,11 @@ class S2LatLng {
   bool operator<=(const S2LatLng& o) const { return coords_ <= o.coords_; }
   bool operator>=(const S2LatLng& o) const { return coords_ >= o.coords_; }
 
+  // Returns true if the numerical coordinates of two S2LatLng objects are
+  // close.  Note that since S2LatLng operates on a rectangular space, the
+  // behavior of ApproxEquals will does not reflect closeness of points on a
+  // sphere if the points are close to the poles.  For those comparisons,
+  // consider using GetDistance() instead.
   bool ApproxEquals(const S2LatLng& o,
                     S1Angle max_error = S1Angle::Radians(1e-15)) const {
     return coords_.aequal(o.coords_, max_error.radians());
@@ -131,64 +160,73 @@ class S2LatLng {
   // Exports the latitude and longitude in degrees, separated by a comma.
   // e.g. "94.518000,150.300000"
   std::string ToStringInDegrees() const;
-  void ToStringInDegrees(std::string* s) const;
 
  private:
   // Internal constructor.
   explicit S2LatLng(const R2Point& coords) : coords_(coords) {}
 
   // This is internal to avoid ambiguity about which units are expected.
-  S2LatLng(double lat_radians, double lng_radians)
-    : coords_(lat_radians, lng_radians) {}
+  constexpr S2LatLng(double lat_radians, double lng_radians)
+      : coords_(lat_radians, lng_radians) {}
 
   R2Point coords_;
 };
 
 // Hasher for S2LatLng.
-// Example use: std::unordered_map<S2LatLng, int, S2LatLngHash>.
-struct S2LatLngHash {
-  size_t operator()(const S2LatLng& lat_lng) const {
-    return GoodFastHash<R2Point>()(lat_lng.coords());
-  }
-};
+// Does *not* need to be specified explicitly; this will be used by default for
+// absl::flat_hash_map/set.
+template <typename H>
+H AbslHashValue(H h, const S2LatLng& lat_lng) {
+  return H::combine(std::move(h), lat_lng.coords().x(), lat_lng.coords().y());
+}
 
-//////////////////   Implementation details follow   ////////////////////
+// Legacy hash functor for S2LatLng. This only exists for backwards
+// compatibility with old hash types like std::unordered_map that don't use
+// absl::Hash natively.
+#ifndef SWIG
+using S2LatLngHash = absl::Hash<S2LatLng>;
+#endif
 
+//////////////////   Implementation details follow   ////////////////////
 
-inline S2LatLng::S2LatLng(S1Angle lat, S1Angle lng)
+constexpr S2LatLng::S2LatLng(S1Angle lat, S1Angle lng)
     : coords_(lat.radians(), lng.radians()) {}
 
-inline S2LatLng::S2LatLng() : coords_(0, 0) {}
+constexpr S2LatLng::S2LatLng() : coords_(0, 0) {}
 
-inline S2LatLng S2LatLng::FromRadians(double lat_radians, double lng_radians) {
+inline constexpr S2LatLng S2LatLng::FromRadians(double lat_radians,
+                                                double lng_radians) {
   return S2LatLng(lat_radians, lng_radians);
 }
 
-inline S2LatLng S2LatLng::FromDegrees(double lat_degrees, double lng_degrees) {
+inline constexpr S2LatLng S2LatLng::FromDegrees(double lat_degrees,
+                                                double lng_degrees) {
   return S2LatLng(S1Angle::Degrees(lat_degrees), S1Angle::Degrees(lng_degrees));
 }
 
-inline S2LatLng S2LatLng::FromE5(int32 lat_e5, int32 lng_e5) {
+inline constexpr S2LatLng S2LatLng::FromE5(int32 lat_e5, int32 lng_e5) {
   return S2LatLng(S1Angle::E5(lat_e5), S1Angle::E5(lng_e5));
 }
 
-inline S2LatLng S2LatLng::FromE6(int32 lat_e6, int32 lng_e6) {
+inline constexpr S2LatLng S2LatLng::FromE6(int32 lat_e6, int32 lng_e6) {
   return S2LatLng(S1Angle::E6(lat_e6), S1Angle::E6(lng_e6));
 }
 
-inline S2LatLng S2LatLng::FromE7(int32 lat_e7, int32 lng_e7) {
+inline constexpr S2LatLng S2LatLng::FromE7(int32 lat_e7, int32 lng_e7) {
   return S2LatLng(S1Angle::E7(lat_e7), S1Angle::E7(lng_e7));
 }
 
-inline S2LatLng S2LatLng::FromUnsignedE6(uint32 lat_e6, uint32 lng_e6) {
+inline constexpr S2LatLng S2LatLng::FromUnsignedE6(uint32 lat_e6,
+                                                   uint32 lng_e6) {
   return S2LatLng(S1Angle::UnsignedE6(lat_e6), S1Angle::UnsignedE6(lng_e6));
 }
 
-inline S2LatLng S2LatLng::FromUnsignedE7(uint32 lat_e7, uint32 lng_e7) {
+inline constexpr S2LatLng S2LatLng::FromUnsignedE7(uint32 lat_e7,
+                                                   uint32 lng_e7) {
   return S2LatLng(S1Angle::UnsignedE7(lat_e7), S1Angle::UnsignedE7(lng_e7));
 }
 
-inline S2LatLng S2LatLng::Invalid() {
+inline constexpr S2LatLng S2LatLng::Invalid() {
   // These coordinates are outside the bounds allowed by is_valid().
   return S2LatLng(M_PI, 2 * M_PI);
 }
@@ -196,12 +234,18 @@ inline S2LatLng S2LatLng::Invalid() {
 inline S1Angle S2LatLng::Latitude(const S2Point& p) {
   // We use atan2 rather than asin because the input vector is not necessarily
   // unit length, and atan2 is much more accurate than asin near the poles.
-  return S1Angle::Radians(atan2(p[2], sqrt(p[0]*p[0] + p[1]*p[1])));
+  // The "+ 0.0" is to ensure that points with coordinates of -0.0 and +0.0
+  // (which compare equal) are converted to identical S2LatLng values, since
+  // even though -0.0 == +0.0 they can be formatted differently.
+  return S1Angle::Radians(atan2(p[2] + 0.0, sqrt(p[0]*p[0] + p[1]*p[1])));
 }
 
 inline S1Angle S2LatLng::Longitude(const S2Point& p) {
-  // Note that atan2(0, 0) is defined to be zero.
-  return S1Angle::Radians(atan2(p[1], p[0]));
+  // The "+ 0.0" is to ensure that points with coordinates of -0.0 and +0.0
+  // (which compare equal) are converted to identical S2LatLng values, since
+  // even though -0.0 == +0.0 and -180 == 180 degrees, they can be formatted
+  // differently.  Also note that atan2(0, 0) is defined to be zero.
+  return S1Angle::Radians(atan2(p[1] + 0.0, p[0] + 0.0));
 }
 
 inline bool S2LatLng::is_valid() const {
diff --git a/src/s2/s2latlng_rect.cc b/src/s2/s2latlng_rect.cc
index d107b5ba..8a1ee821 100644
--- a/src/s2/s2latlng_rect.cc
+++ b/src/s2/s2latlng_rect.cc
@@ -17,18 +17,25 @@
 
 #include "s2/s2latlng_rect.h"
 
+#include <cfloat>
+
 #include <algorithm>
 #include <cmath>
-#include <iosfwd>
-#include <iostream>
+#include <ostream>
 
-#include "s2/base/logging.h"
+#include "absl/flags/flag.h"
 #include "s2/util/coding/coder.h"
+#include "s2/r1interval.h"
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s1interval.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
 #include "s2/s2debug.h"
 #include "s2/s2edge_crossings.h"
 #include "s2/s2edge_distances.h"
+#include "s2/s2latlng.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 
 using std::fabs;
@@ -297,15 +304,17 @@ S2Cap S2LatLngRect::GetCapBound() const {
     pole_z = 1;
     pole_angle = M_PI_2 - lat_.lo();
   }
-  S2Cap pole_cap(S2Point(0, 0, pole_z), S1Angle::Radians(pole_angle));
+  // Ensure that the bounding cap is conservative taking into account errors
+  // in the arithmetic above and the S1Angle/S1ChordAngle conversion.
+  S2Cap pole_cap(S2Point(0, 0, pole_z),
+                 S1Angle::Radians((1 + 2 * DBL_EPSILON) * pole_angle));
 
   // For bounding rectangles that span 180 degrees or less in longitude, the
   // maximum cap size is achieved at one of the rectangle vertices.  For
   // rectangles that are larger than 180 degrees, we punt and always return a
   // bounding cap centered at one of the two poles.
-  double lng_span = lng_.hi() - lng_.lo();
-  if (remainder(lng_span, 2 * M_PI) >= 0 && lng_span < 2 * M_PI) {
-    S2Cap mid_cap(GetCenter().ToPoint(), S1Angle::Radians(0));
+  if (lng_.GetLength() < 2 * M_PI) {
+    S2Cap mid_cap(GetCenter().ToPoint(), S1Angle::Zero());
     for (int k = 0; k < 4; ++k) {
       mid_cap.AddPoint(GetVertex(k).ToPoint());
     }
@@ -363,7 +372,7 @@ bool S2LatLngRect::Decode(Decoder* decoder) {
   lng_ = S1Interval(lng_lo, lng_hi);
 
   if (!is_valid()) {
-    S2_DLOG_IF(ERROR, FLAGS_s2debug)
+    S2_DLOG_IF(ERROR, absl::GetFlag(FLAGS_s2debug))
         << "Invalid result in S2LatLngRect::Decode: " << *this;
     return false;
   }
diff --git a/src/s2/s2latlng_rect.h b/src/s2/s2latlng_rect.h
index c529d33c..4e135664 100644
--- a/src/s2/s2latlng_rect.h
+++ b/src/s2/s2latlng_rect.h
@@ -21,13 +21,17 @@
 #include <cmath>
 #include <iosfwd>
 #include <iostream>
+#include <ostream>
 
 #include "s2/base/logging.h"
+#include "s2/util/coding/coder.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/r1interval.h"
 #include "s2/s1angle.h"
 #include "s2/s1interval.h"
+#include "s2/s2coder.h"
 #include "s2/s2latlng.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
 
 class Decoder;
@@ -59,6 +63,8 @@ class S2Cell;
 // not a "plain old datatype" (POD) because it has virtual functions.
 class S2LatLngRect final : public S2Region {
  public:
+  typedef s2coding::internal::S2LegacyCoder<S2LatLngRect> Coder;
+
   // Construct a rectangle from minimum and maximum latitudes and longitudes.
   // If lo.lng() > hi.lng(), the rectangle spans the 180 degree longitude
   // line. Both points must be normalized, with lo.lat() <= hi.lat().
@@ -279,7 +285,7 @@ class S2LatLngRect final : public S2Region {
   //    include the full longitude range (because all longitudes are present
   //    at the poles).
   //
-  // Expansion and contraction are defined such that they are inverses whenver
+  // Expansion and contraction are defined such that they are inverses whenever
   // possible, i.e.
   //
   //   rect.ExpandedByDistance(x).ExpandedByDistance(-x) == rect
diff --git a/src/s2/s2latlng_rect_bounder.cc b/src/s2/s2latlng_rect_bounder.cc
index b2943b47..5b6b61c6 100644
--- a/src/s2/s2latlng_rect_bounder.cc
+++ b/src/s2/s2latlng_rect_bounder.cc
@@ -17,13 +17,16 @@
 
 #include "s2/s2latlng_rect_bounder.h"
 
+#include <algorithm>
 #include <cfloat>
 #include <cmath>
 
-#include "s2/base/logging.h"
 #include "s2/r1interval.h"
-#include "s2/s1chord_angle.h"
+#include "s2/s1angle.h"
 #include "s2/s1interval.h"
+#include "s2/s2latlng.h"
+#include "s2/s2latlng_rect.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 
 using std::fabs;
@@ -151,8 +154,17 @@ void S2LatLngRectBounder::AddInternal(const S2Point& b,
         // be spent getting from A to B; the remainder bounds the round-trip
         // distance (in latitude) from A or B to the min or max latitude
         // attained along the edge AB.
-        double lat_budget = 2 * asin(0.5 * (a_ - b).Norm() * sin(max_lat));
-        double max_delta = 0.5*(lat_budget - lat_ab.GetLength()) + DBL_EPSILON;
+        //
+        // There is a maximum relative error of 4.5 * DBL_EPSILON in computing
+        // the squared distance (a_ - b), which means a maximum error of (4.5
+        // / 2 + 0.5) == 2.75 * DBL_EPSILON in computing Norm().  The sin()
+        // and multiply each have a relative error of 0.5 * DBL_EPSILON which
+        // we round up to a total of 4 * DBL_EPSILON.
+        double lat_budget_z = 0.5 * (a_ - b).Norm() * sin(max_lat);
+        double lat_budget = 2 * asin(min((1 + 4 * DBL_EPSILON) * lat_budget_z,
+                                         1.0));
+        double max_delta = 0.5 * (lat_budget - lat_ab.GetLength()) +
+                           DBL_EPSILON;
 
         // Test whether AB passes through the point of maximum latitude or
         // minimum latitude.  If the dot product(s) are small enough then the
@@ -331,7 +343,7 @@ S2LatLngRect S2LatLngRectBounder::ExpandForSubregions(
 
 S2LatLng S2LatLngRectBounder::MaxErrorForTests() {
   // The maximum error in the latitude calculation is
-  //    3.84 * DBL_EPSILON   for the RobustCrossProd calculation
+  //    3.84 * DBL_EPSILON   for the cross product calculation (see above)
   //    0.96 * DBL_EPSILON   for the Latitude() calculation
   //    5    * DBL_EPSILON   added by AddPoint/GetBound to compensate for error
   //    ------------------
diff --git a/src/s2/s2lax_loop_shape.cc b/src/s2/s2lax_loop_shape.cc
index f1ba0a67..7fd3cf3c 100644
--- a/src/s2/s2lax_loop_shape.cc
+++ b/src/s2/s2lax_loop_shape.cc
@@ -17,15 +17,35 @@
 
 #include "s2/s2lax_loop_shape.h"
 
-#include <vector>
+#include <algorithm>
+#include <memory>
+#include <utility>
 
+#include "s2/base/integral_types.h"
+#include "absl/types/span.h"
+#include "absl/utility/utility.h"
 #include "s2/s2loop.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
 #include "s2/s2shapeutil_get_reference_point.h"
 
-using std::vector;
+using absl::Span;
+using std::make_unique;
 using ReferencePoint = S2Shape::ReferencePoint;
 
-S2LaxLoopShape::S2LaxLoopShape(const vector<S2Point>& vertices) {
+S2LaxLoopShape::S2LaxLoopShape(S2LaxLoopShape&& other)
+    : S2Shape(std::move(other)),
+      num_vertices_(absl::exchange(other.num_vertices_, 0)),
+      vertices_(std::move(other.vertices_)) {}
+
+S2LaxLoopShape& S2LaxLoopShape::operator=(S2LaxLoopShape&& other) {
+  S2Shape::operator=(static_cast<S2Shape&&>(other));
+  num_vertices_ = absl::exchange(other.num_vertices_, 0);
+  vertices_ = std::move(other.vertices_);
+  return *this;
+}
+
+S2LaxLoopShape::S2LaxLoopShape(Span<const S2Point> vertices) {
   Init(vertices);
 }
 
@@ -33,9 +53,9 @@ S2LaxLoopShape::S2LaxLoopShape(const S2Loop& loop) {
   Init(loop);
 }
 
-void S2LaxLoopShape::Init(const vector<S2Point>& vertices) {
+void S2LaxLoopShape::Init(Span<const S2Point> vertices) {
   num_vertices_ = vertices.size();
-  vertices_.reset(new S2Point[num_vertices_]);
+  vertices_ = make_unique<S2Point[]>(num_vertices_);
   std::copy(vertices.begin(), vertices.end(), vertices_.get());
 }
 
@@ -46,7 +66,7 @@ void S2LaxLoopShape::Init(const S2Loop& loop) {
     vertices_ = nullptr;
   } else {
     num_vertices_ = loop.num_vertices();
-    vertices_.reset(new S2Point[num_vertices_]);
+    vertices_ = make_unique<S2Point[]>(num_vertices_);
     std::copy(&loop.vertex(0), &loop.vertex(0) + num_vertices_,
               vertices_.get());
   }
@@ -70,13 +90,28 @@ S2Shape::ReferencePoint S2LaxLoopShape::GetReferencePoint() const {
   return s2shapeutil::GetReferencePoint(*this);
 }
 
-S2VertexIdLaxLoopShape::S2VertexIdLaxLoopShape(
-    const std::vector<int32>& vertex_ids, const S2Point* vertex_array) {
+S2VertexIdLaxLoopShape::S2VertexIdLaxLoopShape(S2VertexIdLaxLoopShape&& other)
+    : S2Shape(std::move(other)),
+      num_vertices_(absl::exchange(other.num_vertices_, 0)),
+      vertex_ids_(std::move(other.vertex_ids_)),
+      vertex_array_(std::move(other.vertex_array_)) {}
+
+S2VertexIdLaxLoopShape& S2VertexIdLaxLoopShape::operator=(
+    S2VertexIdLaxLoopShape&& other) {
+  S2Shape::operator=(static_cast<S2Shape&&>(other));
+  num_vertices_ = absl::exchange(other.num_vertices_, 0);
+  vertex_ids_ = std::move(other.vertex_ids_);
+  vertex_array_ = std::move(other.vertex_array_);
+  return *this;
+}
+
+S2VertexIdLaxLoopShape::S2VertexIdLaxLoopShape(Span<const int32> vertex_ids,
+                                               const S2Point* vertex_array) {
   Init(vertex_ids, vertex_array);
 }
 
-void S2VertexIdLaxLoopShape::Init(const std::vector<int32>& vertex_ids,
-                           const S2Point* vertex_array) {
+void S2VertexIdLaxLoopShape::Init(Span<const int32> vertex_ids,
+                                  const S2Point* vertex_array) {
   num_vertices_ = vertex_ids.size();
   vertex_ids_.reset(new int32[num_vertices_]);
   std::copy(vertex_ids.begin(), vertex_ids.end(), vertex_ids_.get());
diff --git a/src/s2/s2lax_loop_shape.h b/src/s2/s2lax_loop_shape.h
index 7740e804..06ce29f7 100644
--- a/src/s2/s2lax_loop_shape.h
+++ b/src/s2/s2lax_loop_shape.h
@@ -36,7 +36,10 @@
 #include <memory>
 #include <vector>
 
+#include "s2/base/integral_types.h"
+#include "absl/types/span.h"
 #include "s2/s2loop.h"
+#include "s2/s2point.h"
 #include "s2/s2shape.h"
 
 // S2LaxLoopShape represents a closed loop of edges surrounding an interior
@@ -52,14 +55,18 @@ class S2LaxLoopShape : public S2Shape {
   // Constructs an empty loop.
   S2LaxLoopShape() : num_vertices_(0) {}
 
+  S2LaxLoopShape(S2LaxLoopShape&& other);
+
+  S2LaxLoopShape& operator=(S2LaxLoopShape&& other);
+
   // Constructs an S2LaxLoopShape with the given vertices.
-  explicit S2LaxLoopShape(const std::vector<S2Point>& vertices);
+  explicit S2LaxLoopShape(absl::Span<const S2Point> vertices);
 
   // Constructs an S2LaxLoopShape from the given S2Loop, by copying its data.
   explicit S2LaxLoopShape(const S2Loop& loop);
 
   // Initializes an S2LaxLoopShape with the given vertices.
-  void Init(const std::vector<S2Point>& vertices);
+  void Init(absl::Span<const S2Point> vertices);
 
   // Initializes an S2LaxLoopShape from the given S2Loop, by copying its data.
   //
@@ -114,18 +121,21 @@ class S2VertexIdLaxLoopShape : public S2Shape {
   // Constructs an empty loop.
   S2VertexIdLaxLoopShape() : num_vertices_(0) {}
 
+  S2VertexIdLaxLoopShape(S2VertexIdLaxLoopShape&& other);
+
+  S2VertexIdLaxLoopShape& operator=(S2VertexIdLaxLoopShape&& other);
+
   // Constructs the shape from the given vertex array and indices.
   // "vertex_ids" is a vector of indices into "vertex_array".
   //
   // ENSURES:  loop->vertex(i) == (*vertex_array)[vertex_ids[i]]
   // REQUIRES: "vertex_array" persists for the lifetime of this object.
-  explicit S2VertexIdLaxLoopShape(const std::vector<int32>& vertex_ids,
+  explicit S2VertexIdLaxLoopShape(absl::Span<const int32> vertex_ids,
                                   const S2Point* vertex_array);
 
   // Initializes the shape from the given vertex array and indices.
   // "vertex_ids" is a vector of indices into "vertex_array".
-  void Init(const std::vector<int32>& vertex_ids,
-            const S2Point* vertex_array);
+  void Init(absl::Span<const int32> vertex_ids, const S2Point* vertex_array);
 
   // Returns the number of vertices in the loop.
   int num_vertices() const { return num_vertices_; }
diff --git a/src/s2/s2lax_polygon_shape.cc b/src/s2/s2lax_polygon_shape.cc
index 43c3d941..de0b5f44 100644
--- a/src/s2/s2lax_polygon_shape.cc
+++ b/src/s2/s2lax_polygon_shape.cc
@@ -17,14 +17,45 @@
 
 #include "s2/s2lax_polygon_shape.h"
 
+#include <algorithm>
+#include <atomic>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "absl/types/span.h"
+#include "absl/utility/utility.h"
+
+#include "s2/base/integral_types.h"
+#include "s2/encoded_s2point_vector.h"
+#include "s2/encoded_uint_vector.h"
+#include "s2/s2coder.h"
+#include "s2/s2error.h"
+#include "s2/s2loop.h"
+#include "s2/s2point.h"
+#include "s2/s2polygon.h"
+#include "s2/s2shape.h"
 #include "s2/s2shapeutil_get_reference_point.h"
+#include "s2/util/coding/coder.h"
+#include "s2/util/coding/varint.h"
 
-using absl::make_unique;
 using absl::MakeSpan;
 using absl::Span;
+using std::make_unique;
+using std::unique_ptr;
 using std::vector;
 using ChainPosition = S2Shape::ChainPosition;
 
+namespace {
+template <typename T>
+unique_ptr<T> make_unique_for_overwrite(size_t n) {
+  // We only need to support this one variant.
+  static_assert(std::is_array<T>::value, "T must be an array type");
+  return unique_ptr<T>(new typename absl::remove_extent_t<T>[n]);
+}
+}  // namespace
+
+
 // When adding a new encoding, be aware that old binaries will not be able
 // to decode it.
 static const unsigned char kCurrentEncodingVersionNumber = 1;
@@ -34,10 +65,38 @@ S2LaxPolygonShape::S2LaxPolygonShape(
   Init(loops);
 }
 
+S2LaxPolygonShape::S2LaxPolygonShape(Span<const Span<const S2Point>> loops) {
+  Init(loops);
+}
+
 S2LaxPolygonShape::S2LaxPolygonShape(const S2Polygon& polygon) {
   Init(polygon);
 }
 
+S2LaxPolygonShape::S2LaxPolygonShape(S2LaxPolygonShape&& b)
+    : S2Shape(std::move(b)),
+      num_loops_(absl::exchange(b.num_loops_, 0)),
+      prev_loop_(b.prev_loop_.exchange(0, std::memory_order_relaxed)),
+      num_vertices_(absl::exchange(b.num_vertices_, 0)),
+      vertices_(std::move(b.vertices_)),
+      loop_starts_(std::move(b.loop_starts_)) {}
+
+S2LaxPolygonShape& S2LaxPolygonShape::operator=(S2LaxPolygonShape&& b) {
+  using std::memory_order_relaxed;
+
+  // We need to delegate to our parent move-assignment operator since we can't
+  // move any of its private state.  This is a little odd since b is in a
+  // half-moved state after calling but is ultimately safe.
+  S2Shape::operator=(static_cast<S2Shape&&>(b));
+  num_loops_ = absl::exchange(b.num_loops_, 0);
+  prev_loop_.store(b.prev_loop_.exchange(0, memory_order_relaxed),
+                   memory_order_relaxed);
+  num_vertices_ = absl::exchange(b.num_vertices_, 0);
+  vertices_ = std::move(b.vertices_);
+  loop_starts_ = std::move(b.loop_starts_);
+  return *this;
+}
+
 void S2LaxPolygonShape::Init(const vector<S2LaxPolygonShape::Loop>& loops) {
   vector<Span<const S2Point>> spans;
   spans.reserve(loops.size());
@@ -63,67 +122,61 @@ void S2LaxPolygonShape::Init(const S2Polygon& polygon) {
   // to reverse the orientation of any loops representing holes.
   for (int i = 0; i < polygon.num_loops(); ++i) {
     if (polygon.loop(i)->is_hole()) {
-      S2Point* v0 = &vertices_[cumulative_vertices_[i]];
+      S2Point* v0 = &vertices_[loop_starts_[i]];
       std::reverse(v0, v0 + num_loop_vertices(i));
     }
   }
 }
 
-void S2LaxPolygonShape::Init(const vector<Span<const S2Point>>& loops) {
+void S2LaxPolygonShape::Init(Span<const Span<const S2Point>> loops) {
   num_loops_ = loops.size();
   if (num_loops_ == 0) {
     num_vertices_ = 0;
-    vertices_ = nullptr;
   } else if (num_loops_ == 1) {
     num_vertices_ = loops[0].size();
-    vertices_.reset(new S2Point[num_vertices_]);
+    // TODO(ericv): Use std::allocator to obtain uninitialized memory instead.
+    // This would avoid default-constructing all the elements before we
+    // overwrite them, and it would also save 8 bytes of memory allocation
+    // since "new T[]" stores its own copy of the array size.
+    //
+    // Note that even absl::make_unique_for_overwrite<> and c++20's
+    // std::make_unique_for_overwrite<T[]> default-construct all elements when
+    // T is a class type.
+    vertices_ = make_unique<S2Point[]>(num_vertices_);
     std::copy(loops[0].begin(), loops[0].end(), vertices_.get());
   } else {
-    cumulative_vertices_ = new uint32[num_loops_ + 1];
-    int32 num_vertices = 0;
+    // Don't use make_unique<> here in order to avoid zero initialization.
+    loop_starts_ = make_unique_for_overwrite<uint32[]>(num_loops_ + 1);
+    num_vertices_ = 0;
     for (int i = 0; i < num_loops_; ++i) {
-      cumulative_vertices_[i] = num_vertices;
-      num_vertices += loops[i].size();
+      loop_starts_[i] = num_vertices_;
+      num_vertices_ += loops[i].size();
     }
-    cumulative_vertices_[num_loops_] = num_vertices;
-    vertices_.reset(new S2Point[num_vertices]);
+    loop_starts_[num_loops_] = num_vertices_;
+    vertices_ = make_unique<S2Point[]>(num_vertices_);  // TODO(see above)
     for (int i = 0; i < num_loops_; ++i) {
       std::copy(loops[i].begin(), loops[i].end(),
-                vertices_.get() + cumulative_vertices_[i]);
+                vertices_.get() + loop_starts_[i]);
     }
   }
 }
 
-S2LaxPolygonShape::~S2LaxPolygonShape() {
-  if (num_loops() > 1) {
-    delete[] cumulative_vertices_;
-  }
-}
-
-int S2LaxPolygonShape::num_vertices() const {
-  if (num_loops() <= 1) {
-    return num_vertices_;
-  } else {
-    return cumulative_vertices_[num_loops()];
-  }
-}
-
 int S2LaxPolygonShape::num_loop_vertices(int i) const {
   S2_DCHECK_LT(i, num_loops());
   if (num_loops() == 1) {
     return num_vertices_;
   } else {
-    return cumulative_vertices_[i + 1] - cumulative_vertices_[i];
+    return loop_starts_[i + 1] - loop_starts_[i];
   }
 }
 
 const S2Point& S2LaxPolygonShape::loop_vertex(int i, int j) const {
   S2_DCHECK_LT(i, num_loops());
   S2_DCHECK_LT(j, num_loop_vertices(i));
-  if (num_loops() == 1) {
+  if (i == 0) {
     return vertices_[j];
   } else {
-    return vertices_[cumulative_vertices_[i] + j];
+    return vertices_[loop_starts_[i] + j];
   }
 }
 
@@ -131,12 +184,12 @@ void S2LaxPolygonShape::Encode(Encoder* encoder,
                                s2coding::CodingHint hint) const {
   encoder->Ensure(1 + Varint::kMax32);
   encoder->put8(kCurrentEncodingVersionNumber);
-  encoder->put_varint32(num_loops_);
+  encoder->put_varint32(num_loops());
   s2coding::EncodeS2PointVector(MakeSpan(vertices_.get(), num_vertices()),
                                 hint, encoder);
   if (num_loops() > 1) {
-    s2coding::EncodeUintVector<uint32>(MakeSpan(cumulative_vertices_,
-                                                num_loops() + 1), encoder);
+    s2coding::EncodeUintVector<uint32>(
+        MakeSpan(loop_starts_.get(), num_loops() + 1), encoder);
   }
 }
 
@@ -153,44 +206,37 @@ bool S2LaxPolygonShape::Init(Decoder* decoder) {
 
   if (num_loops_ == 0) {
     num_vertices_ = 0;
-    vertices_ = nullptr;
   } else {
-    vertices_ = make_unique<S2Point[]>(vertices.size());
-    for (int i = 0; i < vertices.size(); ++i) {
+    num_vertices_ = vertices.size();
+    vertices_ = make_unique<S2Point[]>(num_vertices_);  // TODO(see above)
+    for (int i = 0; i < num_vertices_; ++i) {
       vertices_[i] = vertices[i];
     }
-    if (num_loops_ == 1) {
-      num_vertices_ = vertices.size();
-    } else {
-      s2coding::EncodedUintVector<uint32> cumulative_vertices;
-      if (!cumulative_vertices.Init(decoder)) return false;
-      cumulative_vertices_ = new uint32[cumulative_vertices.size()];
-      for (int i = 0; i < cumulative_vertices.size(); ++i) {
-        cumulative_vertices_[i] = cumulative_vertices[i];
+    if (num_loops_ > 1) {
+      s2coding::EncodedUintVector<uint32> loop_starts;
+      if (!loop_starts.Init(decoder)) return false;
+      loop_starts_ = make_unique_for_overwrite<uint32[]>(loop_starts.size());
+      for (size_t i = 0; i < loop_starts.size(); ++i) {
+        loop_starts_[i] = loop_starts[i];
       }
     }
   }
   return true;
 }
 
-S2Shape::Edge S2LaxPolygonShape::edge(int e0) const {
-  S2_DCHECK_LT(e0, num_edges());
-  int e1 = e0 + 1;
-  if (num_loops() == 1) {
-    if (e1 == num_vertices_) { e1 = 0; }
-  } else {
-    // Find the index of the first vertex of the loop following this one.
-    const int kMaxLinearSearchLoops = 12;  // From benchmarks.
-    uint32* next = cumulative_vertices_ + 1;
-    if (num_loops() <= kMaxLinearSearchLoops) {
-      while (*next <= e0) ++next;
-    } else {
-      next = std::lower_bound(next, next + num_loops(), e1);
-    }
-    // Wrap around to the first vertex of the loop if necessary.
-    if (e1 == *next) { e1 = next[-1]; }
+bool S2LaxPolygonShape::Init(Decoder* decoder, S2Error& error) {
+  if (!Init(decoder)) {
+    error.Init(S2Error::DATA_LOSS,
+               "Unknown error occurred decoding S2LaxPolygonShape");
+    return false;
   }
-  return Edge(vertices_[e0], vertices_[e1]);
+  return true;
+}
+
+S2Shape::Edge S2LaxPolygonShape::edge(int e) const {
+  // Method names are fully specified to enable inlining.
+  ChainPosition pos = S2LaxPolygonShape::chain_position(e);
+  return S2LaxPolygonShape::chain_edge(pos.chain_id, pos.offset);
 }
 
 S2Shape::ReferencePoint S2LaxPolygonShape::GetReferencePoint() const {
@@ -202,39 +248,30 @@ S2Shape::Chain S2LaxPolygonShape::chain(int i) const {
   if (num_loops() == 1) {
     return Chain(0, num_vertices_);
   } else {
-    int start = cumulative_vertices_[i];
-    return Chain(start, cumulative_vertices_[i + 1] - start);
+    int start = loop_starts_[i];
+    return Chain(start, loop_starts_[i + 1] - start);
   }
 }
 
-S2Shape::Edge S2LaxPolygonShape::chain_edge(int i, int j) const {
-  S2_DCHECK_LT(i, num_loops());
-  S2_DCHECK_LT(j, num_loop_vertices(i));
-  int n = num_loop_vertices(i);
-  int k = (j + 1 == n) ? 0 : j + 1;
-  if (num_loops() == 1) {
-    return Edge(vertices_[j], vertices_[k]);
-  } else {
-    int base = cumulative_vertices_[i];
-    return Edge(vertices_[base + j], vertices_[base + k]);
-  }
-}
-
-S2Shape::ChainPosition S2LaxPolygonShape::chain_position(int e) const {
-  S2_DCHECK_LT(e, num_edges());
-  const int kMaxLinearSearchLoops = 12;  // From benchmarks.
-  if (num_loops() == 1) {
-    return ChainPosition(0, e);
-  } else {
-    // Find the index of the first vertex of the loop following this one.
-    uint32* next = cumulative_vertices_ + 1;
-    if (num_loops() <= kMaxLinearSearchLoops) {
-      while (*next <= e) ++next;
-    } else {
-      next = std::lower_bound(next, next + num_loops(), e + 1);
-    }
-    return ChainPosition(next - (cumulative_vertices_ + 1), e - next[-1]);
-  }
+EncodedS2LaxPolygonShape::EncodedS2LaxPolygonShape(EncodedS2LaxPolygonShape&& b)
+    : S2Shape(std::move(b)),
+      num_loops_(absl::exchange(b.num_loops_, 0)),
+      prev_loop_(b.prev_loop_.exchange(0, std::memory_order_relaxed)),
+      vertices_(std::move(b.vertices_)),
+      loop_starts_(std::move(b.loop_starts_)) {}
+
+EncodedS2LaxPolygonShape& EncodedS2LaxPolygonShape::operator=(
+    EncodedS2LaxPolygonShape&& b) {
+  // We need to delegate to our parent move-assignment operator since we can't
+  // move any of its private state.  This is a little odd since b is in a
+  // half-moved state after calling but is ultimately safe.
+  S2Shape::operator=(static_cast<S2Shape&&>(b));
+  num_loops_ = absl::exchange(b.num_loops_, 0);
+  prev_loop_.store(b.prev_loop_.exchange(0, std::memory_order_relaxed),
+                   std::memory_order_relaxed);
+  vertices_ = std::move(b.vertices_);
+  loop_starts_ = std::move(b.loop_starts_);
+  return *this;
 }
 
 bool EncodedS2LaxPolygonShape::Init(Decoder* decoder) {
@@ -249,16 +286,28 @@ bool EncodedS2LaxPolygonShape::Init(Decoder* decoder) {
   if (!vertices_.Init(decoder)) return false;
 
   if (num_loops_ > 1) {
-    if (!cumulative_vertices_.Init(decoder)) return false;
+    if (!loop_starts_.Init(decoder)) return false;
   }
   return true;
 }
 
+// The encoding must be identical to S2LaxPolygonShape::Encode().
+void EncodedS2LaxPolygonShape::Encode(Encoder* encoder,
+                                      s2coding::CodingHint) const {
+  encoder->Ensure(1 + Varint::kMax32);
+  encoder->put8(kCurrentEncodingVersionNumber);
+  encoder->put_varint32(num_loops_);
+  vertices_.Encode(encoder);
+  if (num_loops_ > 1) {
+    loop_starts_.Encode(encoder);
+  }
+}
+
 int EncodedS2LaxPolygonShape::num_vertices() const {
   if (num_loops() <= 1) {
     return vertices_.size();
   } else {
-    return cumulative_vertices_[num_loops()];
+    return loop_starts_[num_loops()];
   }
 }
 
@@ -267,7 +316,7 @@ int EncodedS2LaxPolygonShape::num_loop_vertices(int i) const {
   if (num_loops() == 1) {
     return vertices_.size();
   } else {
-    return cumulative_vertices_[i + 1] - cumulative_vertices_[i];
+    return loop_starts_[i + 1] - loop_starts_[i];
   }
 }
 
@@ -277,30 +326,21 @@ S2Point EncodedS2LaxPolygonShape::loop_vertex(int i, int j) const {
   if (num_loops() == 1) {
     return vertices_[j];
   } else {
-    return vertices_[cumulative_vertices_[i] + j];
+    return vertices_[loop_starts_[i] + j];
   }
 }
 
 S2Shape::Edge EncodedS2LaxPolygonShape::edge(int e) const {
   S2_DCHECK_LT(e, num_edges());
-  int e1 = e + 1;
+  size_t e1 = e + 1;
   if (num_loops() == 1) {
     if (e1 == vertices_.size()) { e1 = 0; }
+    return Edge(vertices_[e], vertices_[e1]);
   } else {
-    // Find the index of the first vertex of the loop following this one.
-    const int kMaxLinearSearchLoops = 12;  // From benchmarks.
-    int next = 1;
-    if (num_loops() <= kMaxLinearSearchLoops) {
-      while (cumulative_vertices_[next] <= e) ++next;
-    } else {
-      next = cumulative_vertices_.lower_bound(e1);
-    }
-    // Wrap around to the first vertex of the loop if necessary.
-    if (e1 == cumulative_vertices_[next]) {
-      e1 = cumulative_vertices_[next - 1];
-    }
+    // Method names are fully specified to enable inlining.
+    ChainPosition pos = EncodedS2LaxPolygonShape::chain_position(e);
+    return EncodedS2LaxPolygonShape::chain_edge(pos.chain_id, pos.offset);
   }
-  return Edge(vertices_[e], vertices_[e1]);
 }
 
 S2Shape::ReferencePoint EncodedS2LaxPolygonShape::GetReferencePoint() const {
@@ -312,37 +352,7 @@ S2Shape::Chain EncodedS2LaxPolygonShape::chain(int i) const {
   if (num_loops() == 1) {
     return Chain(0, vertices_.size());
   } else {
-    int start = cumulative_vertices_[i];
-    return Chain(start, cumulative_vertices_[i + 1] - start);
-  }
-}
-
-S2Shape::Edge EncodedS2LaxPolygonShape::chain_edge(int i, int j) const {
-  S2_DCHECK_LT(i, num_loops());
-  S2_DCHECK_LT(j, num_loop_vertices(i));
-  int n = num_loop_vertices(i);
-  int k = (j + 1 == n) ? 0 : j + 1;
-  if (num_loops() == 1) {
-    return Edge(vertices_[j], vertices_[k]);
-  } else {
-    int base = cumulative_vertices_[i];
-    return Edge(vertices_[base + j], vertices_[base + k]);
-  }
-}
-
-S2Shape::ChainPosition EncodedS2LaxPolygonShape::chain_position(int e) const {
-  S2_DCHECK_LT(e, num_edges());
-  const int kMaxLinearSearchLoops = 12;  // From benchmarks.
-  if (num_loops() == 1) {
-    return ChainPosition(0, e);
-  } else {
-    // Find the index of the first vertex of the loop following this one.
-    int next = 1;
-    if (num_loops() <= kMaxLinearSearchLoops) {
-      while (cumulative_vertices_[next] <= e) ++next;
-    } else {
-      next = cumulative_vertices_.lower_bound(e + 1);
-    }
-    return ChainPosition(next - 1, e - cumulative_vertices_[next - 1]);
+    int start = loop_starts_[i];
+    return Chain(start, loop_starts_[i + 1] - start);
   }
 }
diff --git a/src/s2/s2lax_polygon_shape.h b/src/s2/s2lax_polygon_shape.h
index f2613821..ede085e9 100644
--- a/src/s2/s2lax_polygon_shape.h
+++ b/src/s2/s2lax_polygon_shape.h
@@ -18,14 +18,24 @@
 #ifndef S2_S2LAX_POLYGON_SHAPE_H_
 #define S2_S2LAX_POLYGON_SHAPE_H_
 
+#include <algorithm>
+#include <atomic>
 #include <memory>
 #include <vector>
 
+#include "absl/base/attributes.h"
 #include "absl/types/span.h"
+
+#include "s2/base/integral_types.h"
+#include "s2/base/logging.h"
 #include "s2/encoded_s2point_vector.h"
 #include "s2/encoded_uint_vector.h"
+#include "s2/s2coder.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
 #include "s2/s2polygon.h"
 #include "s2/s2shape.h"
+#include "s2/util/coding/coder.h"
 
 // S2LaxPolygonShape represents a region defined by a collection of zero or
 // more closed loops.  The interior is the region to the left of all loops.
@@ -55,7 +65,7 @@
 //  - In order to be valid for point containment tests, the polygon must
 //    satisfy the "interior is on the left" rule.  This means that there must
 //    not be any crossing edges, and if there are duplicate edges then all but
-//    at most one of thm must belong to a sibling pair (i.e., the number of
+//    at most one of them must belong to a sibling pair (i.e., the number of
 //    edges in opposite directions must differ by at most one).
 //
 //  - To be valid for boolean operations (S2BooleanOperation), degenerate
@@ -66,31 +76,46 @@
 //      {AA, AB}      // degenerate edge coincides with another edge
 //      {AB, BA, AB}  // sibling pair coincides with another edge
 //
-// Note that S2LaxPolygonShape is must faster to initialize and is more
+// Note that S2LaxPolygonShape is much faster to initialize and is more
 // compact than S2Polygon, but unlike S2Polygon it does not have any built-in
 // operations.  Instead you should use S2ShapeIndex operations
 // (S2BooleanOperation, S2ClosestEdgeQuery, etc).
 class S2LaxPolygonShape : public S2Shape {
  public:
-  static constexpr TypeTag kTypeTag = 5;
+  typedef s2coding::S2HintCoder<S2LaxPolygonShape> Coder;
+
+  // Define as enum so we don't have to declare storage.
+  // TODO(user, b/210097200): Use static constexpr when C++17 is allowed
+  // in opensource.
+  enum : TypeTag { kTypeTag = 5 };
 
   // Constructs an empty polygon.
   S2LaxPolygonShape() : num_loops_(0), num_vertices_(0) {}
 
+  S2LaxPolygonShape(const S2LaxPolygonShape&) = delete;
+  S2LaxPolygonShape(S2LaxPolygonShape&& b);
+  S2LaxPolygonShape& operator=(S2LaxPolygonShape&& b);
+
   // Constructs an S2LaxPolygonShape from the given vertex loops.
   using Loop = std::vector<S2Point>;
   explicit S2LaxPolygonShape(const std::vector<Loop>& loops);
 
+  // Alternative version that can be used to avoid copying all the vertex data
+  // when it is stored using something other than std::vector.
+  explicit S2LaxPolygonShape(absl::Span<const absl::Span<const S2Point>> loops);
+
   // Constructs an S2LaxPolygonShape from an S2Polygon, by copying its data.
   // Full and empty S2Polygons are supported.
   explicit S2LaxPolygonShape(const S2Polygon& polygon);
 
-  ~S2LaxPolygonShape() override;
-
   // Initializes an S2LaxPolygonShape from the given vertex loops.
   void Init(const std::vector<Loop>& loops);
 
-  // Initializes an S2LaxPolygonShape from an S2Polygon, by copying its data.
+  // Alternative version that can be used to avoid copying all the vertex data
+  // when it is stored using something other than std::vector.
+  void Init(absl::Span<const absl::Span<const S2Point>> loops);
+
+  // Initializes an S2LaxPolygonShape from an S2Polygon by copying its data.
   // Full and empty S2Polygons are supported.
   void Init(const S2Polygon& polygon);
 
@@ -98,7 +123,7 @@ class S2LaxPolygonShape : public S2Shape {
   int num_loops() const { return num_loops_; }
 
   // Returns the total number of vertices in all loops.
-  int num_vertices() const;
+  int num_vertices() const { return num_vertices_; }
 
   // Returns the number of vertices in the given loop.
   int num_loop_vertices(int i) const;
@@ -112,13 +137,15 @@ class S2LaxPolygonShape : public S2Shape {
   //
   // REQUIRES: "encoder" uses the default constructor, so that its buffer
   //           can be enlarged as necessary by calling Ensure(int).
-  void Encode(Encoder* encoder,
-              s2coding::CodingHint hint = s2coding::CodingHint::COMPACT) const;
+  void Encode(Encoder* encoder, s2coding::CodingHint hint) const override;
 
   // Decodes an S2LaxPolygonShape, returning true on success.  (The method
   // name is chosen for compatibility with EncodedS2LaxPolygonShape below.)
   bool Init(Decoder* decoder);
 
+  // Populates an S2Error if decoding fails.
+  bool Init(Decoder* decoder, S2Error& error);
+
   // S2Shape interface:
   int num_edges() const final { return num_vertices(); }
   Edge edge(int e) const final;
@@ -131,17 +158,22 @@ class S2LaxPolygonShape : public S2Shape {
   TypeTag type_tag() const override { return kTypeTag; }
 
  private:
-  void Init(const std::vector<absl::Span<const S2Point>>& loops);
-
+  // Note that the parent class has a 4-byte S2Shape::id_ field so there is no
+  // wasted space in the following layout.
   int32 num_loops_;
+
+  // The loop that contained the edge returned by the previous call to the
+  // edge() method.  This is used as a hint to speed up edge location when
+  // there are many loops.  Benchmarks indicate that the improved locality
+  // this provides can speed up chain position lookup by 1.7-4.7x.
+  mutable std::atomic<int> prev_loop_{0};
+
+  int32 num_vertices_;
   std::unique_ptr<S2Point[]> vertices_;
-  // If num_loops_ <= 1, this union stores the number of vertices.
-  // Otherwise it points to an array of size (num_loops + 1) where element "i"
-  // is the total number of vertices in loops 0..i-1.
-  union {
-    int32 num_vertices_;
-    uint32* cumulative_vertices_;  // Don't use unique_ptr in unions.
-  };
+
+  // When num_loops_ > 1, stores an array of size (num_loops_ + 1) where
+  // element "i" represents the total number of vertices in loops 0..i-1.
+  std::unique_ptr<uint32[]> loop_starts_;
 };
 
 // Exactly like S2LaxPolygonShape, except that the vertices are kept in an
@@ -151,14 +183,27 @@ class S2LaxPolygonShape : public S2Shape {
 // into a large contiguous buffer that contains other encoded data as well.
 class EncodedS2LaxPolygonShape : public S2Shape {
  public:
+  static constexpr TypeTag kTypeTag = S2LaxPolygonShape::kTypeTag;
+
   // Constructs an uninitialized object; requires Init() to be called.
-  EncodedS2LaxPolygonShape() {}
+  EncodedS2LaxPolygonShape() = default;
+
+  EncodedS2LaxPolygonShape(EncodedS2LaxPolygonShape&&);
+  EncodedS2LaxPolygonShape& operator=(EncodedS2LaxPolygonShape&&);
 
   // Initializes an EncodedS2LaxPolygonShape.
   //
   // REQUIRES: The Decoder data buffer must outlive this object.
   bool Init(Decoder* decoder);
 
+  // Appends an encoded representation of the S2LaxPolygonShape to "encoder".
+  // The coding hint is ignored, and whatever method was originally used to
+  // encode the shape is preserved.
+  //
+  // REQUIRES: "encoder" uses the default constructor, so that its buffer
+  //           can be enlarged as necessary by calling Ensure(int).
+  void Encode(Encoder* encoder, s2coding::CodingHint hint) const override;
+
   int num_loops() const { return num_loops_; }
   int num_vertices() const;
   int num_loop_vertices(int i) const;
@@ -173,11 +218,113 @@ class EncodedS2LaxPolygonShape : public S2Shape {
   Chain chain(int i) const final;
   Edge chain_edge(int i, int j) const final;
   ChainPosition chain_position(int e) const final;
+  TypeTag type_tag() const override { return kTypeTag; }
 
  private:
   int32 num_loops_;
+
+  // The loop that contained the edge returned by the previous call to the
+  // edge() method.  This is used as a hint to speed up edge location when
+  // there are many loops.
+  mutable std::atomic<int> prev_loop_{0};
+
   s2coding::EncodedS2PointVector vertices_;
-  s2coding::EncodedUintVector<uint32> cumulative_vertices_;
+  s2coding::EncodedUintVector<uint32> loop_starts_;
 };
 
+
+//////////////////   Implementation details follow   ////////////////////
+
+
+ABSL_ATTRIBUTE_ALWAYS_INLINE
+inline S2Shape::Edge S2LaxPolygonShape::chain_edge(int i, int j) const {
+  S2_DCHECK_LT(i, num_loops());
+  S2_DCHECK_LT(j, num_loop_vertices(i));
+  int n = num_loop_vertices(i);
+  int k = (j + 1 == n) ? 0 : j + 1;
+  if (num_loops() == 1) {
+    return Edge(vertices_[j], vertices_[k]);
+  } else {
+    int start = loop_starts_[i];
+    return Edge(vertices_[start + j], vertices_[start + k]);
+  }
+}
+
+ABSL_ATTRIBUTE_ALWAYS_INLINE
+inline S2Shape::ChainPosition S2LaxPolygonShape::chain_position(int e) const {
+  S2_DCHECK_LT(e, num_edges());
+  if (num_loops() == 1) {
+    return ChainPosition(0, e);
+  }
+  // Test if this edge belongs to the loop returned by the previous call.
+  const uint32* start = &loop_starts_[0] +
+                        prev_loop_.load(std::memory_order_relaxed);
+  if (static_cast<uint32_t>(e) >= start[0] && static_cast<uint32_t>(e) < start[1]) {
+    // This edge belongs to the same loop as the previous call.
+  } else {
+    if (static_cast<uint32_t>(e) == start[1]) {
+      // This is the edge immediately following the previous loop.
+      do {
+        ++start;
+      } while (static_cast<uint32_t>(e) == start[1]);
+    } else {
+      start = &loop_starts_[0];
+      constexpr int kMaxLinearSearchLoops = 12;  // From benchmarks.
+      if (num_loops() <= kMaxLinearSearchLoops) {
+        while (start[1] <= static_cast<uint32_t>(e)) ++start;
+      } else {
+        start = std::upper_bound(start + 1, start + num_loops(), e) - 1;
+      }
+    }
+    prev_loop_.store(start - &loop_starts_[0], std::memory_order_relaxed);
+  }
+  return ChainPosition(start - &loop_starts_[0], e - start[0]);
+}
+
+ABSL_ATTRIBUTE_ALWAYS_INLINE
+inline S2Shape::Edge EncodedS2LaxPolygonShape::chain_edge(int i, int j) const {
+  S2_DCHECK_LT(i, num_loops());
+  S2_DCHECK_LT(j, num_loop_vertices(i));
+  int n = num_loop_vertices(i);
+  int k = (j + 1 == n) ? 0 : j + 1;
+  if (num_loops() == 1) {
+    return Edge(vertices_[j], vertices_[k]);
+  } else {
+    int start = loop_starts_[i];
+    return Edge(vertices_[start + j], vertices_[start + k]);
+  }
+}
+
+ABSL_ATTRIBUTE_ALWAYS_INLINE
+inline S2Shape::ChainPosition EncodedS2LaxPolygonShape::chain_position(int e)
+    const {
+  S2_DCHECK_LT(e, num_edges());
+  if (num_loops() == 1) {
+    return ChainPosition(0, e);
+  }
+  constexpr int kMaxLinearSearchLoops = 12;  // From benchmarks.
+  int i = prev_loop_.load(std::memory_order_relaxed);
+  if (i == 0 && static_cast<uint32_t>(e) < loop_starts_[1]) {
+    return ChainPosition(0, e);  // Optimization for first loop.
+  }
+  if (static_cast<uint32_t>(e) >= loop_starts_[i] &&
+      static_cast<uint32_t>(e) < loop_starts_[i + 1]) {
+    // This edge belongs to the same loop as the previous call.
+  } else {
+    if (static_cast<uint32_t>(e) == loop_starts_[i + 1]) {
+      // This is the edge immediately following the previous loop.
+      do {
+        ++i;
+      } while (static_cast<uint32_t>(e) == loop_starts_[i + 1]);
+    } else if (num_loops() <= kMaxLinearSearchLoops) {
+      for (i = 0; loop_starts_[i + 1] <= static_cast<uint32_t>(e); ++i) {
+      }
+    } else {
+      i = loop_starts_.lower_bound(e + 1) - 1;
+    }
+    prev_loop_.store(i, std::memory_order_relaxed);
+  }
+  return ChainPosition(i, e - loop_starts_[i]);
+}
+
 #endif  // S2_S2LAX_POLYGON_SHAPE_H_
diff --git a/src/s2/s2lax_polyline_shape.cc b/src/s2/s2lax_polyline_shape.cc
index af14344b..a156e533 100644
--- a/src/s2/s2lax_polyline_shape.cc
+++ b/src/s2/s2lax_polyline_shape.cc
@@ -18,14 +18,36 @@
 #include "s2/s2lax_polyline_shape.h"
 
 #include <algorithm>
-#include "s2/base/logging.h"
+#include <memory>
+#include <utility>
+
+#include "absl/types/span.h"
+#include "absl/utility/utility.h"
+#include "s2/util/coding/coder.h"
+#include "s2/encoded_s2point_vector.h"
+#include "s2/s2coder.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
 #include "s2/s2polyline.h"
+#include "s2/s2shape.h"
 
-using absl::make_unique;
 using absl::MakeSpan;
-using std::vector;
+using absl::Span;
+using std::make_unique;
 
-S2LaxPolylineShape::S2LaxPolylineShape(const vector<S2Point>& vertices) {
+S2LaxPolylineShape::S2LaxPolylineShape(S2LaxPolylineShape&& other)
+    : S2Shape(std::move(other)),
+      num_vertices_(absl::exchange(other.num_vertices_, 0)),
+      vertices_(std::move(other.vertices_)) {}
+
+S2LaxPolylineShape& S2LaxPolylineShape::operator=(S2LaxPolylineShape&& other) {
+  S2Shape::operator=(static_cast<S2Shape&&>(other));
+  num_vertices_ = absl::exchange(other.num_vertices_, 0);
+  vertices_ = std::move(other.vertices_);
+  return *this;
+}
+
+S2LaxPolylineShape::S2LaxPolylineShape(Span<const S2Point> vertices) {
   Init(vertices);
 }
 
@@ -33,11 +55,11 @@ S2LaxPolylineShape::S2LaxPolylineShape(const S2Polyline& polyline) {
   Init(polyline);
 }
 
-void S2LaxPolylineShape::Init(const vector<S2Point>& vertices) {
+void S2LaxPolylineShape::Init(Span<const S2Point> vertices) {
   num_vertices_ = vertices.size();
   S2_LOG_IF(WARNING, num_vertices_ == 1)
       << "s2shapeutil::S2LaxPolylineShape with one vertex has no edges";
-  vertices_.reset(new S2Point[num_vertices_]);
+  vertices_ = make_unique<S2Point[]>(num_vertices_);
   std::copy(vertices.begin(), vertices.end(), vertices_.get());
 }
 
@@ -45,7 +67,7 @@ void S2LaxPolylineShape::Init(const S2Polyline& polyline) {
   num_vertices_ = polyline.num_vertices();
   S2_LOG_IF(WARNING, num_vertices_ == 1)
       << "s2shapeutil::S2LaxPolylineShape with one vertex has no edges";
-  vertices_.reset(new S2Point[num_vertices_]);
+  vertices_ = make_unique<S2Point[]>(num_vertices_);
   std::copy(&polyline.vertex(0), &polyline.vertex(0) + num_vertices_,
             vertices_.get());
 }
@@ -67,6 +89,15 @@ bool S2LaxPolylineShape::Init(Decoder* decoder) {
   return true;
 }
 
+bool S2LaxPolylineShape::Init(Decoder* decoder, S2Error& error) {
+  if (!Init(decoder)) {
+    error.Init(S2Error::DATA_LOSS,
+               "Unknown error occurred decoding S2LaxPolylineShape");
+    return false;
+  }
+  return true;
+}
+
 S2Shape::Edge S2LaxPolylineShape::edge(int e) const {
   S2_DCHECK_LT(e, num_edges());
   return Edge(vertex(e), vertex(e + 1));
@@ -94,6 +125,12 @@ bool EncodedS2LaxPolylineShape::Init(Decoder* decoder) {
   return vertices_.Init(decoder);
 }
 
+// The encoding must be identical to S2LaxPolylineShape::Encode().
+void EncodedS2LaxPolylineShape::Encode(Encoder* encoder,
+                                       s2coding::CodingHint) const {
+  vertices_.Encode(encoder);
+}
+
 S2Shape::Edge EncodedS2LaxPolylineShape::edge(int e) const {
   S2_DCHECK_LT(e, num_edges());
   return Edge(vertex(e), vertex(e + 1));
diff --git a/src/s2/s2lax_polyline_shape.h b/src/s2/s2lax_polyline_shape.h
index d0f384fb..4b513735 100644
--- a/src/s2/s2lax_polyline_shape.h
+++ b/src/s2/s2lax_polyline_shape.h
@@ -18,15 +18,23 @@
 #ifndef S2_S2LAX_POLYLINE_SHAPE_H_
 #define S2_S2LAX_POLYLINE_SHAPE_H_
 
+#include <algorithm>
 #include <memory>
 #include <vector>
+
+#include "s2/base/integral_types.h"
+#include "absl/types/span.h"
+#include "s2/util/coding/coder.h"
 #include "s2/encoded_s2point_vector.h"
+#include "s2/s2coder.h"
+#include "s2/s2error.h"
+#include "s2/s2point.h"
 #include "s2/s2polyline.h"
 #include "s2/s2shape.h"
 
 // S2LaxPolylineShape represents a polyline.  It is similar to
-// S2Polyline::Shape except that duplicate vertices are allowed, and the
-// representation is slightly more compact.
+// S2Polyline::Shape except that adjacent vertices are allowed to be identical
+// or antipodal, and the representation is slightly more compact.
 //
 // Polylines may have any number of vertices, but note that polylines with
 // fewer than 2 vertices do not define any edges.  (To create a polyline
@@ -34,20 +42,29 @@
 // or use S2LaxClosedPolylineShape defined in s2_lax_loop_shape.h.)
 class S2LaxPolylineShape : public S2Shape {
  public:
-  static constexpr TypeTag kTypeTag = 4;
+  // Define as enum so we don't have to declare storage.
+  // TODO(user, b/210097200): Use static constexpr when C++17 is allowed
+  // in opensource.
+  enum : TypeTag { kTypeTag = 4 };
+
+  typedef s2coding::S2HintCoder<S2LaxPolylineShape> Coder;
 
   // Constructs an empty polyline.
   S2LaxPolylineShape() : num_vertices_(0) {}
 
+  S2LaxPolylineShape(S2LaxPolylineShape&& other);
+
+  S2LaxPolylineShape& operator=(S2LaxPolylineShape&& other);
+
   // Constructs an S2LaxPolylineShape with the given vertices.
-  explicit S2LaxPolylineShape(const std::vector<S2Point>& vertices);
+  explicit S2LaxPolylineShape(absl::Span<const S2Point> vertices);
 
   // Constructs an S2LaxPolylineShape from the given S2Polyline, by copying
   // its data.
   explicit S2LaxPolylineShape(const S2Polyline& polyline);
 
   // Initializes an S2LaxPolylineShape with the given vertices.
-  void Init(const std::vector<S2Point>& vertices);
+  void Init(absl::Span<const S2Point> vertices);
 
   // Initializes an S2LaxPolylineShape from the given S2Polyline, by copying
   // its data.
@@ -60,13 +77,15 @@ class S2LaxPolylineShape : public S2Shape {
   //
   // REQUIRES: "encoder" uses the default constructor, so that its buffer
   //           can be enlarged as necessary by calling Ensure(int).
-  void Encode(Encoder* encoder,
-              s2coding::CodingHint hint = s2coding::CodingHint::COMPACT) const;
+  void Encode(Encoder* encoder, s2coding::CodingHint hint) const override;
 
   // Decodes an S2LaxPolylineShape, returning true on success.  (The method
   // name is chosen for compatibility with EncodedS2LaxPolylineShape below.)
   bool Init(Decoder* decoder);
 
+  // Populates an S2Error when Init fails.
+  bool Init(Decoder* decoder, S2Error& error);
+
   // S2Shape interface:
   int num_edges() const final { return std::max(0, num_vertices() - 1); }
   Edge edge(int e) const final;
@@ -94,14 +113,27 @@ class S2LaxPolylineShape : public S2Shape {
 // into a large contiguous buffer that contains other encoded data as well.
 class EncodedS2LaxPolylineShape : public S2Shape {
  public:
+  // Define as enum so we don't have to declare storage.
+  // TODO(user, b/210097200): Use static constexpr when C++17 is allowed
+  // in opensource.
+  enum : TypeTag { kTypeTag = S2LaxPolylineShape::kTypeTag };
+
   // Constructs an uninitialized object; requires Init() to be called.
-  EncodedS2LaxPolylineShape() {}
+  EncodedS2LaxPolylineShape() = default;
 
   // Initializes an EncodedS2LaxPolylineShape.
   //
   // REQUIRES: The Decoder data buffer must outlive this object.
   bool Init(Decoder* decoder);
 
+  // Appends an encoded representation of the S2LaxPolylineShape to "encoder".
+  // The coding hint is ignored, and whatever method was originally used to
+  // encode the shape is preserved.
+  //
+  // REQUIRES: "encoder" uses the default constructor, so that its buffer
+  //           can be enlarged as necessary by calling Ensure(int).
+  void Encode(Encoder* encoder, s2coding::CodingHint hint) const override;
+
   int num_vertices() const { return vertices_.size(); }
   S2Point vertex(int i) const { return vertices_[i]; }
 
@@ -116,6 +148,7 @@ class EncodedS2LaxPolylineShape : public S2Shape {
   Chain chain(int i) const final;
   Edge chain_edge(int i, int j) const final;
   ChainPosition chain_position(int e) const final;
+  TypeTag type_tag() const override { return kTypeTag; }
 
  private:
   s2coding::EncodedS2PointVector vertices_;
diff --git a/src/s2/s2loop.cc b/src/s2/s2loop.cc
index 4df96c8f..babe1ecc 100644
--- a/src/s2/s2loop.cc
+++ b/src/s2/s2loop.cc
@@ -17,55 +17,69 @@
 
 #include "s2/s2loop.h"
 
+#include <cstddef>
+
 #include <algorithm>
 #include <atomic>
 #include <bitset>
-#include <cfloat>
 #include <cmath>
-#include <set>
+#include <memory>
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
+#include "absl/flags/flag.h"
+#include "absl/types/span.h"
+#include "absl/utility/utility.h"
+
 #include "s2/base/commandlineflags.h"
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/r1interval.h"
+#include "s2/r2.h"
+#include "s2/r2rect.h"
 #include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
 #include "s2/s1interval.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
-#include "s2/s2centroids.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2cell_union.h"
 #include "s2/s2closest_edge_query.h"
 #include "s2/s2coords.h"
 #include "s2/s2crossing_edge_query.h"
 #include "s2/s2debug.h"
 #include "s2/s2edge_clipping.h"
 #include "s2/s2edge_crosser.h"
+#include "s2/s2edge_crossings.h"
 #include "s2/s2edge_distances.h"
 #include "s2/s2error.h"
+#include "s2/s2latlng_rect.h"
 #include "s2/s2latlng_rect_bounder.h"
-#include "s2/s2measures.h"
+#include "s2/s2loop_measures.h"
 #include "s2/s2padded_cell.h"
+#include "s2/s2point.h"
 #include "s2/s2point_compression.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2predicates.h"
+#include "s2/s2region.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 #include "s2/s2shapeutil_visit_crossing_edge_pairs.h"
 #include "s2/s2wedge_relations.h"
-#include "absl/memory/memory.h"
-#include "absl/types/span.h"
-#include "s2/util/coding/coder.h"
 #include "s2/util/coding/coder.h"
 #include "s2/util/math/matrix3x3.h"
 
-using absl::make_unique;
+using absl::flat_hash_set;
 using absl::MakeSpan;
+using absl::Span;
+using std::make_unique;
 using std::pair;
-using std::set;
+using std::unique_ptr;
 using std::vector;
 
-DEFINE_bool(
+S2_DEFINE_bool(
     s2loop_lazy_indexing, true,
     "Build the S2ShapeIndex only when it is first needed.  This can save "
     "significant amounts of memory and time when geometry is constructed but "
@@ -74,7 +88,7 @@ DEFINE_bool(
 
 // The maximum number of vertices we'll allow when decoding a loop.
 // The default value of 50 million is about 30x bigger than the number of
-DEFINE_int32(
+S2_DEFINE_int32(
     s2polygon_decode_max_num_vertices, 50000000,
     "The upper limit on the number of loops that are allowed by the "
     "S2Polygon::Decode method.");
@@ -93,11 +107,54 @@ S2Loop::S2Loop() {
   // The loop is not valid until Init() is called.
 }
 
-S2Loop::S2Loop(const vector<S2Point>& vertices)
+#ifndef SWIG
+S2Loop::S2Loop(S2Loop&& b)
+    : S2Region(std::move(b)),
+      depth_(absl::exchange(b.depth_, 0)),
+      num_vertices_(absl::exchange(b.num_vertices_, 0)),
+      vertices_(std::move(b.vertices_)),
+      s2debug_override_(std::move(b.s2debug_override_)),
+      origin_inside_(std::move(b.origin_inside_)),
+      unindexed_contains_calls_(
+          b.unindexed_contains_calls_.exchange(0, std::memory_order_relaxed)),
+      bound_(std::move(b.bound_)),
+      subregion_bound_(std::move(b.subregion_bound_)),
+      index_(std::move(b.index_)) {
+  // Our index points to S2Loop::Shape instances which point back to S2Loop,
+  // we need to update those S2Loop pointers now that we've moved.
+  for (S2Shape* shape : index_) {
+    down_cast<Shape*>(shape)->loop_ = this;
+  }
+}
+
+S2Loop& S2Loop::operator=(S2Loop&& b) {
+  S2Region::operator=(static_cast<S2Region&&>(b));
+  depth_ = absl::exchange(b.depth_, 0);
+  num_vertices_ = absl::exchange(b.num_vertices_, 0);
+  vertices_ = std::move(b.vertices_);
+  s2debug_override_ = std::move(b.s2debug_override_);
+  origin_inside_ = std::move(b.origin_inside_);
+  unindexed_contains_calls_.store(
+      b.unindexed_contains_calls_.exchange(0, std::memory_order_relaxed),
+      std::memory_order_relaxed);
+  bound_ = std::move(b.bound_);
+  subregion_bound_ = std::move(b.subregion_bound_);
+  index_ = std::move(b.index_);
+
+  // Our index points to S2Loop::Shape instances which point back to S2Loop,
+  // we need to update those S2Loop pointers now that we've moved.
+  for (S2Shape* shape : index_) {
+    down_cast<Shape*>(shape)->loop_ = this;
+  }
+
+  return *this;
+}
+#endif
+
+S2Loop::S2Loop(Span<const S2Point> vertices)
   : S2Loop(vertices, S2Debug::ALLOW) {}
 
-S2Loop::S2Loop(const vector<S2Point>& vertices,
-               S2Debug override)
+S2Loop::S2Loop(Span<const S2Point> vertices, S2Debug override)
   : s2debug_override_(override) {
   Init(vertices);
 }
@@ -115,20 +172,18 @@ void S2Loop::ClearIndex() {
   index_.Clear();
 }
 
-void S2Loop::Init(const vector<S2Point>& vertices) {
+void S2Loop::Init(Span<const S2Point> vertices) {
   ClearIndex();
-  if (owns_vertices_) delete[] vertices_;
   num_vertices_ = vertices.size();
-  vertices_ = new S2Point[num_vertices_];
+  vertices_ = make_unique<S2Point[]>(num_vertices_);
   std::copy(vertices.begin(), vertices.end(), &vertices_[0]);
-  owns_vertices_ = true;
   InitOriginAndBound();
 }
 
 bool S2Loop::IsValid() const {
   S2Error error;
   if (FindValidationError(&error)) {
-    S2_LOG_IF(ERROR, FLAGS_s2debug) << error;
+    S2_LOG_IF(ERROR, absl::GetFlag(FLAGS_s2debug)) << error;
     return false;
   }
   return true;
@@ -197,28 +252,22 @@ void S2Loop::InitOriginAndBound() {
     // otherwise it is empty.
     origin_inside_ = (vertex(0).z() < 0);
   } else {
-    // Point containment testing is done by counting edge crossings starting
-    // at a fixed point on the sphere (S2::Origin()).  Historically this was
-    // important, but it is now no longer necessary, and it may be worthwhile
-    // experimenting with using a loop vertex as the reference point.  In any
-    // case, we need to know whether the reference point (S2::Origin) is
-    // inside or outside the loop before we can construct the S2ShapeIndex.
-    // We do this by first guessing that it is outside, and then seeing
-    // whether we get the correct containment result for vertex 1.  If the
-    // result is incorrect, the origin must be inside the loop.
+    // The brute force point containment algorithm works by counting edge
+    // crossings starting at a fixed reference point (chosen as S2::Origin()
+    // for historical reasons).  Loop initialization would be more efficient
+    // if we used a loop vertex such as vertex(0) as the reference point
+    // instead, however making this change would be a lot of work because
+    // origin_inside_ is currently part of the Encode() format.
     //
-    // A loop with consecutive vertices A,B,C contains vertex B if and only if
-    // the fixed vector R = S2::Ortho(B) is contained by the wedge ABC.  The
-    // wedge is closed at A and open at C, i.e. the point B is inside the loop
-    // if A=R but not if C=R.  This convention is required for compatibility
-    // with S2::VertexCrossing.  (Note that we can't use S2::Origin()
-    // as the fixed vector because of the possibility that B == S2::Origin().)
-    //
-    // TODO(ericv): Investigate using vertex(0) as the reference point.
-
+    // In any case, we initialize origin_inside_ by first guessing that it is
+    // outside, and then seeing whether we get the correct containment result
+    // for vertex 1.  If the result is incorrect, the origin must be inside
+    // the loop instead.  Note that the S2Loop is not necessarily valid and so
+    // we need to check the requirements of S2::AngleContainsVertex() first.
+    bool v1_inside = vertex(0) != vertex(1) && vertex(2) != vertex(1) &&
+                     S2::AngleContainsVertex(vertex(0), vertex(1), vertex(2));
     origin_inside_ = false;  // Initialize before calling Contains().
-    bool v1_inside = s2pred::OrderedCCW(S2::Ortho(vertex(1)), vertex(0),
-                                        vertex(2), vertex(1));
+
     // Note that Contains(S2Point) only does a bounds check once InitIndex()
     // has been called, so it doesn't matter that bound_ is undefined here.
     if (v1_inside != Contains(vertex(1))) {
@@ -276,10 +325,10 @@ void S2Loop::InitBound() {
 
 void S2Loop::InitIndex() {
   index_.Add(make_unique<Shape>(this));
-  if (!FLAGS_s2loop_lazy_indexing) {
+  if (!absl::GetFlag(FLAGS_s2loop_lazy_indexing)) {
     index_.ForceBuild();
   }
-  if (FLAGS_s2debug && s2debug_override_ == S2Debug::ALLOW) {
+  if (absl::GetFlag(FLAGS_s2debug) && s2debug_override_ == S2Debug::ALLOW) {
     // Note that FLAGS_s2debug is false in optimized builds (by default).
     S2_CHECK(IsValid());
   }
@@ -289,7 +338,6 @@ S2Loop::S2Loop(const S2Cell& cell)
     : depth_(0),
       num_vertices_(4),
       vertices_(new S2Point[num_vertices_]),
-      owns_vertices_(true),
       s2debug_override_(S2Debug::ALLOW),
       unindexed_contains_calls_(0) {
   for (int i = 0; i < 4; ++i) {
@@ -300,15 +348,12 @@ S2Loop::S2Loop(const S2Cell& cell)
   InitOriginAndBound();
 }
 
-S2Loop::~S2Loop() {
-  if (owns_vertices_) delete[] vertices_;
-}
+S2Loop::~S2Loop() = default;
 
 S2Loop::S2Loop(const S2Loop& src)
     : depth_(src.depth_),
       num_vertices_(src.num_vertices_),
-      vertices_(new S2Point[num_vertices_]),
-      owns_vertices_(true),
+      vertices_(make_unique<S2Point[]>(num_vertices_)),
       s2debug_override_(src.s2debug_override_),
       origin_inside_(src.origin_inside_),
       unindexed_contains_calls_(0),
@@ -352,18 +397,16 @@ bool S2Loop::IsNormalized() const {
 }
 
 void S2Loop::Normalize() {
-  S2_CHECK(owns_vertices_);
   if (!IsNormalized()) Invert();
   S2_DCHECK(IsNormalized());
 }
 
 void S2Loop::Invert() {
-  S2_CHECK(owns_vertices_);
   ClearIndex();
   if (is_empty_or_full()) {
     vertices_[0] = is_full() ? kEmptyVertex() : kFullVertex();
   } else {
-    std::reverse(vertices_, vertices_ + num_vertices());
+    std::reverse(&vertices_[0], &vertices_[num_vertices()]);
   }
   // origin_inside_ must be set correctly before building the S2ShapeIndex.
   origin_inside_ ^= true;
@@ -440,7 +483,7 @@ S2Cap S2Loop::GetCapBound() const {
 
 bool S2Loop::Contains(const S2Cell& target) const {
   MutableS2ShapeIndex::Iterator it(&index_);
-  S2ShapeIndex::CellRelation relation = it.Locate(target.id());
+  S2CellRelation relation = it.Locate(target.id());
 
   // If "target" is disjoint from all index cells, it is not contained.
   // Similarly, if "target" is subdivided into one or more index cells then it
@@ -448,7 +491,7 @@ bool S2Loop::Contains(const S2Cell& target) const {
   // intersect a sufficient number of edges.  (But note that if "target" itself
   // is an index cell then it may be contained, since it could be a cell with
   // no edges in the loop interior.)
-  if (relation != S2ShapeIndex::INDEXED) return false;
+  if (relation != S2CellRelation::INDEXED) return false;
 
   // Otherwise check if any edges intersect "target".
   if (BoundaryApproxIntersects(it, target)) return false;
@@ -459,14 +502,14 @@ bool S2Loop::Contains(const S2Cell& target) const {
 
 bool S2Loop::MayIntersect(const S2Cell& target) const {
   MutableS2ShapeIndex::Iterator it(&index_);
-  S2ShapeIndex::CellRelation relation = it.Locate(target.id());
+  S2CellRelation relation = it.Locate(target.id());
 
   // If "target" does not overlap any index cell, there is no intersection.
-  if (relation == S2ShapeIndex::DISJOINT) return false;
+  if (relation == S2CellRelation::DISJOINT) return false;
 
   // If "target" is subdivided into one or more index cells, there is an
   // intersection to within the S2ShapeIndex error bound (see Contains).
-  if (relation == S2ShapeIndex::SUBDIVIDED) return true;
+  if (relation == S2CellRelation::SUBDIVIDED) return true;
 
   // If "target" is an index cell, there is an intersection because index cells
   // are created only if they have at least one edge or they are entirely
@@ -584,11 +627,11 @@ bool S2Loop::Contains(const MutableS2ShapeIndex::Iterator& it,
 }
 
 void S2Loop::Encode(Encoder* const encoder) const {
-  encoder->Ensure(num_vertices_ * sizeof(*vertices_) + 20);  // sufficient
+  encoder->Ensure(num_vertices_ * sizeof(vertices_[0]) + 20);  // sufficient
 
   encoder->put8(kCurrentLosslessEncodingVersionNumber);
   encoder->put32(num_vertices_);
-  encoder->putn(vertices_, sizeof(*vertices_) * num_vertices_);
+  encoder->putn(vertices_.get(), sizeof(vertices_[0]) * num_vertices_);
   encoder->put8(origin_inside_);
   encoder->put32(depth_);
   S2_DCHECK_GE(encoder->avail(), 0);
@@ -601,60 +644,30 @@ bool S2Loop::Decode(Decoder* const decoder) {
   unsigned char version = decoder->get8();
   switch (version) {
     case kCurrentLosslessEncodingVersionNumber:
-      return DecodeInternal(decoder, false);
+      return DecodeInternal(decoder);
   }
   return false;
 }
 
-bool S2Loop::DecodeWithinScope(Decoder* const decoder) {
-  if (decoder->avail() < sizeof(unsigned char)) return false;
-  unsigned char version = decoder->get8();
-  switch (version) {
-    case kCurrentLosslessEncodingVersionNumber:
-      return DecodeInternal(decoder, true);
-  }
-  return false;
-}
-
-bool S2Loop::DecodeInternal(Decoder* const decoder,
-                            bool within_scope) {
+bool S2Loop::DecodeInternal(Decoder* const decoder) {
   // Perform all checks before modifying vertex state. Empty loops are
   // explicitly allowed here: a newly created loop has zero vertices
   // and such loops encode and decode properly.
   if (decoder->avail() < sizeof(uint32)) return false;
   const uint32 num_vertices = decoder->get32();
-  if (num_vertices > FLAGS_s2polygon_decode_max_num_vertices) {
+  if (num_vertices > static_cast<uint32>(absl::GetFlag(
+                         FLAGS_s2polygon_decode_max_num_vertices))) {
     return false;
   }
-  if (decoder->avail() < (num_vertices * sizeof(*vertices_) +
+  if (decoder->avail() < (num_vertices * sizeof(vertices_[0]) +
                           sizeof(uint8) + sizeof(uint32))) {
     return false;
   }
   ClearIndex();
-  if (owns_vertices_) delete[] vertices_;
   num_vertices_ = num_vertices;
 
-  // x86 can do unaligned floating-point reads; however, many other
-  // platforms cannot. Do not use the zero-copy version if we are on
-  // an architecture that does not support unaligned reads, and the
-  // pointer is not correctly aligned.
-#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
-    defined(_M_IX86)
-  bool is_misaligned = false;
-#else
-  bool is_misaligned =
-      reinterpret_cast<intptr_t>(decoder->ptr()) % sizeof(double) != 0;
-#endif
-  if (within_scope && !is_misaligned) {
-    vertices_ = const_cast<S2Point *>(reinterpret_cast<const S2Point*>(
-                    decoder->ptr()));
-    decoder->skip(num_vertices_ * sizeof(*vertices_));
-    owns_vertices_ = false;
-  } else {
-    vertices_ = new S2Point[num_vertices_];
-    decoder->getn(vertices_, num_vertices_ * sizeof(*vertices_));
-    owns_vertices_ = true;
-  }
+  vertices_ = make_unique<S2Point[]>(num_vertices_);
+  decoder->getn(vertices_.get(), num_vertices_ * sizeof(vertices_[0]));
   origin_inside_ = decoder->get8();
   depth_ = decoder->get32();
   if (!bound_.Decode(decoder)) return false;
@@ -675,8 +688,8 @@ bool S2Loop::DecodeInternal(Decoder* const decoder,
 // loops (Contains, Intersects, or CompareBoundary).
 class LoopRelation {
  public:
-  LoopRelation() {}
-  virtual ~LoopRelation() {}
+  LoopRelation() = default;
+  virtual ~LoopRelation() = default;
 
   // Optionally, a_target() and b_target() can specify an early-exit condition
   // for the loop relation.  If any point P is found such that
@@ -700,7 +713,7 @@ class LoopRelation {
   virtual int b_crossing_target() const = 0;
 
   // Given a vertex "ab1" that is shared between the two loops, return true if
-  // the two associated wedges (a0, ab1, b2) and (b0, ab1, b2) are equivalent
+  // the two associated wedges (a0, ab1, a2) and (b0, ab1, b2) are equivalent
   // to an edge crossing.  The loop relation is also allowed to maintain its
   // own internal state, and can return true if it observes any sequence of
   // wedges that are equivalent to an edge crossing.
@@ -1029,7 +1042,7 @@ class ContainsRelation : public LoopRelation {
   bool found_shared_vertex_;
 };
 
-bool S2Loop::Contains(const S2Loop* b) const {
+bool S2Loop::Contains(const S2Loop& b) const {
   // For this loop A to contains the given loop B, all of the following must
   // be true:
   //
@@ -1045,17 +1058,17 @@ bool S2Loop::Contains(const S2Loop* b) const {
   // The second part of (3) is necessary to detect the case of two loops whose
   // union is the entire sphere, i.e. two loops that contains each other's
   // boundaries but not each other's interiors.
-  if (!subregion_bound_.Contains(b->bound_)) return false;
+  if (!subregion_bound_.Contains(b.bound_)) return false;
 
   // Special cases to handle either loop being empty or full.
-  if (is_empty_or_full() || b->is_empty_or_full()) {
-    return is_full() || b->is_empty();
+  if (is_empty_or_full() || b.is_empty_or_full()) {
+    return is_full() || b.is_empty();
   }
 
   // Check whether there are any edge crossings, and also check the loop
   // relationship at any shared vertices.
   ContainsRelation relation;
-  if (HasCrossingRelation(*this, *b, &relation)) return false;
+  if (HasCrossingRelation(*this, b, &relation)) return false;
 
   // There are no crossings, and if there are any shared vertices then A
   // contains B locally at each shared vertex.
@@ -1064,18 +1077,18 @@ bool S2Loop::Contains(const S2Loop* b) const {
   // Since there are no edge intersections or shared vertices, we just need to
   // test condition (3) above.  We can skip this test if we discovered that A
   // contains at least one point of B while checking for edge crossings.
-  if (!Contains(b->vertex(0))) return false;
+  if (!Contains(b.vertex(0))) return false;
 
   // We still need to check whether (A union B) is the entire sphere.
   // Normally this check is very cheap due to the bounding box precondition.
-  if ((b->subregion_bound_.Contains(bound_) ||
-       b->bound_.Union(bound_).is_full()) && b->Contains(vertex(0))) {
+  if ((b.subregion_bound_.Contains(bound_) ||
+       b.bound_.Union(bound_).is_full()) &&
+      b.Contains(vertex(0))) {
     return false;
   }
   return true;
 }
 
-
 // Loop relation for Intersects().
 class IntersectsRelation : public LoopRelation {
  public:
@@ -1097,16 +1110,16 @@ class IntersectsRelation : public LoopRelation {
   bool found_shared_vertex_;
 };
 
-bool S2Loop::Intersects(const S2Loop* b) const {
+bool S2Loop::Intersects(const S2Loop& b) const {
   // a->Intersects(b) if and only if !a->Complement()->Contains(b).
   // This code is similar to Contains(), but is optimized for the case
   // where both loops enclose less than half of the sphere.
-  if (!bound_.Intersects(b->bound_)) return false;
+  if (!bound_.Intersects(b.bound_)) return false;
 
   // Check whether there are any edge crossings, and also check the loop
   // relationship at any shared vertices.
   IntersectsRelation relation;
-  if (HasCrossingRelation(*this, *b, &relation)) return true;
+  if (HasCrossingRelation(*this, b, &relation)) return true;
   if (relation.found_shared_vertex()) return false;
 
   // Since there are no edge intersections or shared vertices, the loops
@@ -1117,13 +1130,12 @@ bool S2Loop::Intersects(const S2Loop* b) const {
 
   // Check whether A contains B, or A and B contain each other's boundaries.
   // (Note that A contains all the vertices of B in either case.)
-  if (subregion_bound_.Contains(b->bound_) ||
-      bound_.Union(b->bound_).is_full()) {
-    if (Contains(b->vertex(0))) return true;
+  if (subregion_bound_.Contains(b.bound_) || bound_.Union(b.bound_).is_full()) {
+    if (Contains(b.vertex(0))) return true;
   }
   // Check whether B contains A.
-  if (b->subregion_bound_.Contains(bound_)) {
-    if (b->Contains(vertex(0))) return true;
+  if (b.subregion_bound_.Contains(bound_)) {
+    if (b.Contains(vertex(0))) return true;
   }
   return false;
 }
@@ -1187,96 +1199,96 @@ class CompareBoundaryRelation : public LoopRelation {
   bool excludes_edge_;        // True if any edge of B is excluded by A.
 };
 
-int S2Loop::CompareBoundary(const S2Loop* b) const {
-  S2_DCHECK(!is_empty() && !b->is_empty());
-  S2_DCHECK(!b->is_full() || !b->is_hole());
+int S2Loop::CompareBoundary(const S2Loop& b) const {
+  S2_DCHECK(!is_empty() && !b.is_empty());
+  S2_DCHECK(!b.is_full() || !b.is_hole());
 
   // The bounds must intersect for containment or crossing.
-  if (!bound_.Intersects(b->bound_)) return -1;
+  if (!bound_.Intersects(b.bound_)) return -1;
 
   // Full loops are handled as though the loop surrounded the entire sphere.
   if (is_full()) return 1;
-  if (b->is_full()) return -1;
+  if (b.is_full()) return -1;
 
   // Check whether there are any edge crossings, and also check the loop
   // relationship at any shared vertices.
-  CompareBoundaryRelation relation(b->is_hole());
-  if (HasCrossingRelation(*this, *b, &relation)) return 0;
+  CompareBoundaryRelation relation(b.is_hole());
+  if (HasCrossingRelation(*this, b, &relation)) return 0;
   if (relation.found_shared_vertex()) {
     return relation.contains_edge() ? 1 : -1;
   }
 
   // There are no edge intersections or shared vertices, so we can check
   // whether A contains an arbitrary vertex of B.
-  return Contains(b->vertex(0)) ? 1 : -1;
+  return Contains(b.vertex(0)) ? 1 : -1;
 }
 
-bool S2Loop::ContainsNonCrossingBoundary(const S2Loop* b, bool reverse_b)
-    const {
-  S2_DCHECK(!is_empty() && !b->is_empty());
-  S2_DCHECK(!b->is_full() || !reverse_b);
+bool S2Loop::ContainsNonCrossingBoundary(const S2Loop& b,
+                                         bool reverse_b) const {
+  S2_DCHECK(!is_empty() && !b.is_empty());
+  S2_DCHECK(!b.is_full() || !reverse_b);
 
   // The bounds must intersect for containment.
-  if (!bound_.Intersects(b->bound_)) return false;
+  if (!bound_.Intersects(b.bound_)) return false;
 
   // Full loops are handled as though the loop surrounded the entire sphere.
   if (is_full()) return true;
-  if (b->is_full()) return false;
+  if (b.is_full()) return false;
 
-  int m = FindVertex(b->vertex(0));
+  int m = FindVertex(b.vertex(0));
   if (m < 0) {
     // Since vertex b0 is not shared, we can check whether A contains it.
-    return Contains(b->vertex(0));
+    return Contains(b.vertex(0));
   }
   // Otherwise check whether the edge (b0, b1) is contained by A.
-  return WedgeContainsSemiwedge(vertex(m-1), vertex(m), vertex(m+1),
-                                b->vertex(1), reverse_b);
+  return WedgeContainsSemiwedge(vertex(m - 1), vertex(m), vertex(m + 1),
+                                b.vertex(1), reverse_b);
 }
 
-bool S2Loop::ContainsNested(const S2Loop* b) const {
-  if (!subregion_bound_.Contains(b->bound_)) return false;
+bool S2Loop::ContainsNested(const S2Loop& b) const {
+  if (!subregion_bound_.Contains(b.bound_)) return false;
 
   // Special cases to handle either loop being empty or full.  Also bail out
   // when B has no vertices to avoid heap overflow on the vertex(1) call
   // below.  (This method is called during polygon initialization before the
   // client has an opportunity to call IsValid().)
-  if (is_empty_or_full() || b->num_vertices() < 2) {
-    return is_full() || b->is_empty();
+  if (is_empty_or_full() || b.num_vertices() < 2) {
+    return is_full() || b.is_empty();
   }
 
   // We are given that A and B do not share any edges, and that either one
   // loop contains the other or they do not intersect.
-  int m = FindVertex(b->vertex(1));
+  int m = FindVertex(b.vertex(1));
   if (m < 0) {
-    // Since b->vertex(1) is not shared, we can check whether A contains it.
-    return Contains(b->vertex(1));
+    // Since b.vertex(1) is not shared, we can check whether A contains it.
+    return Contains(b.vertex(1));
   }
-  // Check whether the edge order around b->vertex(1) is compatible with
+  // Check whether the edge order around b.vertex(1) is compatible with
   // A containing B.
-  return S2::WedgeContains(vertex(m-1), vertex(m), vertex(m+1),
-                                   b->vertex(0), b->vertex(2));
+  return S2::WedgeContains(vertex(m - 1), vertex(m), vertex(m + 1), b.vertex(0),
+                           b.vertex(2));
 }
 
-bool S2Loop::Equals(const S2Loop* b) const {
-  if (num_vertices() != b->num_vertices()) return false;
+bool S2Loop::Equals(const S2Loop& b) const {
+  if (num_vertices() != b.num_vertices()) return false;
   for (int i = 0; i < num_vertices(); ++i) {
-    if (vertex(i) != b->vertex(i)) return false;
+    if (vertex(i) != b.vertex(i)) return false;
   }
   return true;
 }
 
-bool S2Loop::BoundaryEquals(const S2Loop* b) const {
-  if (num_vertices() != b->num_vertices()) return false;
+bool S2Loop::BoundaryEquals(const S2Loop& b) const {
+  if (num_vertices() != b.num_vertices()) return false;
 
   // Special case to handle empty or full loops.  Since they have the same
   // number of vertices, if one loop is empty/full then so is the other.
-  if (is_empty_or_full()) return is_empty() == b->is_empty();
+  if (is_empty_or_full()) return is_empty() == b.is_empty();
 
   for (int offset = 0; offset < num_vertices(); ++offset) {
-    if (vertex(offset) == b->vertex(0)) {
+    if (vertex(offset) == b.vertex(0)) {
       // There is at most one starting offset since loop vertices are unique.
       for (int i = 0; i < num_vertices(); ++i) {
-        if (vertex(i + offset) != b->vertex(i)) return false;
+        if (vertex(i + offset) != b.vertex(i)) return false;
       }
       return true;
     }
@@ -1322,7 +1334,7 @@ static bool MatchBoundaries(const S2Loop& a, const S2Loop& b, int a_offset,
   // explored to avoid duplicating work.
 
   vector<pair<int, int>> pending;
-  set<pair<int, int>> done;
+  flat_hash_set<pair<int, int>> done;
   pending.push_back(std::make_pair(0, 0));
   while (!pending.empty()) {
     int i = pending.back().first;
@@ -1404,17 +1416,16 @@ bool S2Loop::DecodeCompressed(Decoder* decoder, int snap_level) {
     return false;
   }
   if (unsigned_num_vertices == 0 ||
-      unsigned_num_vertices > FLAGS_s2polygon_decode_max_num_vertices) {
+      unsigned_num_vertices > static_cast<uint32>(absl::GetFlag(
+                                  FLAGS_s2polygon_decode_max_num_vertices))) {
     return false;
   }
   ClearIndex();
-  if (owns_vertices_) delete[] vertices_;
   num_vertices_ = unsigned_num_vertices;
-  vertices_ = new S2Point[num_vertices_];
-  owns_vertices_ = true;
+  vertices_ = make_unique<S2Point[]>(num_vertices_);
 
   if (!S2DecodePointsCompressed(decoder, snap_level,
-                                MakeSpan(vertices_, num_vertices_))) {
+                                MakeSpan(vertices_.get(), num_vertices_))) {
     return false;
   }
   uint32 properties_uint32;
@@ -1463,18 +1474,14 @@ std::bitset<kNumProperties> S2Loop::GetCompressedEncodingProperties() const {
 }
 
 /* static */
-std::unique_ptr<S2Loop> S2Loop::MakeRegularLoop(const S2Point& center,
-                                                S1Angle radius,
-                                                int num_vertices) {
-  Matrix3x3_d m;
-  S2::GetFrame(center, &m);  // TODO(ericv): Return by value
-  return MakeRegularLoop(m, radius, num_vertices);
+unique_ptr<S2Loop> S2Loop::MakeRegularLoop(const S2Point& center,
+                                           S1Angle radius, int num_vertices) {
+  return MakeRegularLoop(S2::GetFrame(center), radius, num_vertices);
 }
 
 /* static */
-std::unique_ptr<S2Loop> S2Loop::MakeRegularLoop(const Matrix3x3_d& frame,
-                                                S1Angle radius,
-                                                int num_vertices) {
+unique_ptr<S2Loop> S2Loop::MakeRegularLoop(const Matrix3x3_d& frame,
+                                           S1Angle radius, int num_vertices) {
   // We construct the loop in the given frame coordinates, with the center at
   // (0, 0, 1).  For a loop of radius "r", the loop vertices have the form
   // (x, y, z) where x^2 + y^2 = sin(r) and z = cos(r).  The distance on the
diff --git a/src/s2/s2loop.h b/src/s2/s2loop.h
index 4bc95ca3..fff4b894 100644
--- a/src/s2/s2loop.h
+++ b/src/s2/s2loop.h
@@ -22,9 +22,12 @@
 #include <bitset>
 #include <cmath>
 #include <cstddef>
-#include <map>
+#include <memory>
 #include <vector>
 
+#include "absl/base/macros.h"
+#include "absl/types/span.h"
+
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
@@ -32,12 +35,17 @@
 #include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s2debug.h"
+#include "s2/s2error.h"
 #include "s2/s2latlng_rect.h"
 #include "s2/s2loop_measures.h"
+#include "s2/s2point.h"
+#include "s2/s2point_span.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2region.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
-#include "absl/base/macros.h"
+#include "s2/testing/gtest_prod.h"
+#include "s2/util/coding/coder.h"
 #include "s2/util/math/matrix3x3.h"
 #include "s2/util/math/vector.h"
 
@@ -51,8 +59,10 @@ class S2Cell;
 class S2CrossingEdgeQuery;
 class S2Error;
 class S2Loop;
+namespace s2builderutil {
+class S2PolygonLayer;
+}  // namespace s2builderutil
 struct S2XYZFaceSiTi;
-namespace s2builderutil { class S2PolygonLayer; }
 
 // An S2Loop represents a simple spherical polygon.  It consists of a single
 // chain of vertices where the first vertex is implicitly connected to the
@@ -88,8 +98,13 @@ class S2Loop final : public S2Region {
   // Decode() before it is used.
   S2Loop();
 
+#ifndef SWIG
+  S2Loop(S2Loop&&);
+  S2Loop& operator=(S2Loop&&);
+#endif
+
   // Convenience constructor that calls Init() with the given vertices.
-  explicit S2Loop(const std::vector<S2Point>& vertices);
+  explicit S2Loop(absl::Span<const S2Point> vertices);
 
   // Convenience constructor to disable the automatic validity checking
   // controlled by the --s2debug flag.  Example:
@@ -104,13 +119,13 @@ class S2Loop final : public S2Region {
   //
   // The main reason to use this constructor is if you intend to call
   // IsValid() explicitly.  See set_s2debug_override() for details.
-  S2Loop(const std::vector<S2Point>& vertices, S2Debug override);
+  S2Loop(absl::Span<const S2Point> vertices, S2Debug override);
 
   // Initialize a loop with given vertices.  The last vertex is implicitly
   // connected to the first.  All points should be unit length.  Loops must
   // have at least 3 vertices (except for the empty and full loops, see
   // kEmpty and kFull).  This method may be called multiple times.
-  void Init(const std::vector<S2Point>& vertices);
+  void Init(absl::Span<const S2Point> vertices);
 
   // A special vertex chain of length 1 that creates an empty loop (i.e., a
   // loop with no edges that contains no points).  Example usage:
@@ -196,6 +211,12 @@ class S2Loop final : public S2Region {
     return vertices_[j];
   }
 
+  // Returns an S2PointLoopSpan containing the loop vertices, for use with the
+  // functions defined in s2loop_measures.h.
+  S2PointLoopSpan vertices_span() const {
+    return S2PointLoopSpan(vertices_.get(), num_vertices());
+  }
+
   // Returns true if this is the special empty loop that contains no points.
   bool is_empty() const;
 
@@ -292,21 +313,21 @@ class S2Loop final : public S2Region {
 
   // Returns true if the region contained by this loop is a superset of the
   // region contained by the given other loop.
-  bool Contains(const S2Loop* b) const;
+  bool Contains(const S2Loop& b) const;
 
   // Returns true if the region contained by this loop intersects the region
   // contained by the given other loop.
-  bool Intersects(const S2Loop* b) const;
+  bool Intersects(const S2Loop& b) const;
 
   // Returns true if two loops have the same vertices in the same linear order
   // (i.e., cyclic rotations are not allowed).
-  bool Equals(const S2Loop* b) const;
+  bool Equals(const S2Loop& b) const;
 
   // Returns true if two loops have the same boundary.  This is true if and
   // only if the loops have the same vertices in the same cyclic order (i.e.,
   // the vertices may be cyclically rotated).  The empty and full loops are
   // considered to have different boundaries.
-  bool BoundaryEquals(const S2Loop* b) const;
+  bool BoundaryEquals(const S2Loop& b) const;
 
   // Returns true if two loops have the same boundary except for vertex
   // perturbations.  More precisely, the vertices in the two loops must be in
@@ -369,7 +390,7 @@ class S2Loop final : public S2Region {
                                                  S1Angle radius,
                                                  int num_vertices);
 
-  // Returnss the total number of bytes used by the loop.
+  // Returns the total number of bytes used by the loop.
   size_t SpaceUsed() const;
 
   ////////////////////////////////////////////////////////////////////////
@@ -406,12 +427,6 @@ class S2Loop final : public S2Region {
   // This method may be called with loops that have already been initialized.
   bool Decode(Decoder* const decoder);
 
-  // Provides the same functionality as Decode, except that decoded regions
-  // are allowed to point directly into the Decoder's memory buffer rather
-  // than copying the data.  This can be much faster, but the decoded loop is
-  // only valid within the scope (lifetime) of the Decoder's memory buffer.
-  bool DecodeWithinScope(Decoder* const decoder);
-
   ////////////////////////////////////////////////////////////////////////
   // Methods intended primarily for use by the S2Polygon implementation:
 
@@ -420,7 +435,7 @@ class S2Loop final : public S2Region {
   // The loops must meet all the S2Polygon requirements; for example this
   // implies that their boundaries may not cross or have any shared edges
   // (although they may have shared vertices).
-  bool ContainsNested(const S2Loop* b) const;
+  bool ContainsNested(const S2Loop& b) const;
 
   // Returns +1 if A contains the boundary of B, -1 if A excludes the boundary
   // of B, and 0 if the boundaries of A and B cross.  Shared edges are handled
@@ -437,7 +452,7 @@ class S2Loop final : public S2Region {
   //
   // REQUIRES: neither loop is empty.
   // REQUIRES: if b->is_full(), then !b->is_hole().
-  int CompareBoundary(const S2Loop* b) const;
+  int CompareBoundary(const S2Loop& b) const;
 
   // Given two loops whose boundaries do not cross (see CompareBoundary),
   // return true if A contains the boundary of B.  If "reverse_b" is true, the
@@ -447,7 +462,7 @@ class S2Loop final : public S2Region {
   //
   // REQUIRES: neither loop is empty.
   // REQUIRES: if b->is_full(), then reverse_b == false.
-  bool ContainsNonCrossingBoundary(const S2Loop* b, bool reverse_b) const;
+  bool ContainsNonCrossingBoundary(const S2Loop& b, bool reverse_b) const;
 
   // Wrapper class for indexing a loop (see S2ShapeIndex).  Once this object
   // is inserted into an S2ShapeIndex it is owned by that index, and will be
@@ -457,6 +472,9 @@ class S2Loop final : public S2Region {
   // details).
 #ifndef SWIG
   class Shape : public S2Shape {
+    // To update `loop_` in `S2Loop` move constructor/assignment.
+    friend class S2Loop;
+
    public:
     Shape() : loop_(nullptr) {}  // Must call Init().
 
@@ -488,6 +506,10 @@ class S2Loop final : public S2Region {
     }
 
    private:
+    // Allow the move constructor/operator= to update `loop_`
+    friend class S2Loop;
+    friend class S2LoopTestBase;
+
     const S2Loop* loop_;
   };
 
@@ -496,7 +518,7 @@ class S2Loop final : public S2Region {
   // is constructed solely for the purpose of indexing it.
   class OwningShape : public Shape {
    public:
-    OwningShape() {}  // Must call Init().
+    OwningShape() = default;  // Must call Init().
     explicit OwningShape(std::unique_ptr<const S2Loop> loop)
         : Shape(loop.release()) {
     }
@@ -517,16 +539,13 @@ class S2Loop final : public S2Region {
   friend class LoopCrosser;
   friend class s2builderutil::S2PolygonLayer;
 
+  // So that test can access InitIndex().
+  FRIEND_TEST(S2LoopTestBase, PointersCorrectAfterMove);
+
   // Internal copy constructor used only by Clone() that makes a deep copy of
   // its argument.
   S2Loop(const S2Loop& src);
 
-  // Returns an S2PointLoopSpan containing the loop vertices, for use with the
-  // functions defined in s2loop_measures.h.
-  S2PointLoopSpan vertices_span() const {
-    return S2PointLoopSpan(vertices_, num_vertices());
-  }
-
   // Returns true if this loop contains S2::Origin().
   bool contains_origin() const { return origin_inside_; }
 
@@ -550,12 +569,8 @@ class S2Loop final : public S2Region {
   // loop self-intersections.
   bool FindValidationErrorNoIndex(S2Error* error) const;
 
-  // Internal implementation of the Decode and DecodeWithinScope methods above.
-  // If within_scope is true, memory is allocated for vertices_ and data
-  // is copied from the decoder using std::copy. If it is false, vertices_
-  // will point to the memory area inside the decoder, and the field
-  // owns_vertices_ is set to false.
-  bool DecodeInternal(Decoder* const decoder, bool within_scope);
+  // Internal implementation of the Decode method above.
+  bool DecodeInternal(Decoder* const decoder);
 
   // Converts the loop vertices to the S2XYZFaceSiTi format and store the result
   // in the given array, which must be large enough to store all the vertices.
@@ -633,12 +648,8 @@ class S2Loop final : public S2Region {
   // We store the vertices in an array rather than a vector because we don't
   // need any STL methods, and computing the number of vertices using size()
   // would be relatively expensive (due to division by sizeof(S2Point) == 24).
-  // When DecodeWithinScope is used to initialize the loop, we do not
-  // take ownership of the memory for vertices_, and the owns_vertices_ field
-  // is used to prevent deallocation and overwriting.
   int num_vertices_ = 0;
-  S2Point* vertices_ = nullptr;
-  bool owns_vertices_ = false;
+  std::unique_ptr<S2Point[]> vertices_;
 
   S2Debug s2debug_override_ = S2Debug::ALLOW;
   bool origin_inside_ = false;  // Does the loop contain S2::Origin()?
diff --git a/src/s2/s2loop_measures.cc b/src/s2/s2loop_measures.cc
index e53df6ed..d9eeffbf 100644
--- a/src/s2/s2loop_measures.cc
+++ b/src/s2/s2loop_measures.cc
@@ -17,16 +17,19 @@
 
 #include "s2/s2loop_measures.h"
 
+#include <algorithm>
 #include <cfloat>
 #include <cmath>
+#include <limits>
+#include <ostream>
 #include <vector>
-#include "s2/base/logging.h"
+
 #include "absl/container/inlined_vector.h"
 #include "s2/s1angle.h"
 #include "s2/s2centroids.h"
-#include "s2/s2edge_distances.h"
 #include "s2/s2measures.h"
-#include "s2/s2pointutil.h"
+#include "s2/s2point.h"
+#include "s2/s2point_span.h"
 
 using std::fabs;
 using std::max;
@@ -38,7 +41,7 @@ namespace S2 {
 S1Angle GetPerimeter(S2PointLoopSpan loop) {
   S1Angle perimeter = S1Angle::Zero();
   if (loop.size() <= 1) return perimeter;
-  for (int i = 0; i < loop.size(); ++i) {
+  for (size_t i = 0; i < loop.size(); ++i) {
     perimeter += S1Angle(loop[i], loop[i + 1]);
   }
   return perimeter;
@@ -52,7 +55,7 @@ double GetArea(S2PointLoopSpan loop) {
 }
 
 double GetSignedArea(S2PointLoopSpan loop) {
-  // It is suprisingly difficult to compute the area of a loop robustly.  The
+  // It is surprisingly difficult to compute the area of a loop robustly.  The
   // main issues are (1) whether degenerate loops are considered to be CCW or
   // not (i.e., whether their area is close to 0 or 4*Pi), and (2) computing
   // the areas of small loops with good relative accuracy.
@@ -118,10 +121,13 @@ double GetSignedArea(S2PointLoopSpan loop) {
 
   // The signed area should be between approximately -4*Pi and 4*Pi.
   // Normalize it to be in the range [-2*Pi, 2*Pi].
-  double area = GetSurfaceIntegral(loop, S2::SignedArea);
+  double area = GetSurfaceIntegralKahan(loop, S2::SignedArea);
   double max_error = GetCurvatureMaxError(loop);
-  S2_DCHECK_LE(fabs(area), 4 * M_PI + max_error);
+
+  // Normalize the area to be in the range (-2*Pi, 2*Pi].  Effectively this
+  // means that hemispheres are always interpreted as having positive area.
   area = remainder(area, 4 * M_PI);
+  if (area == -2 * M_PI) area = 2 * M_PI;
 
   // If the area is a small negative or positive number, verify that the sign
   // of the result is consistent with the loop orientation.
@@ -150,29 +156,49 @@ S2PointLoopSpan PruneDegeneracies(S2PointLoopSpan loop,
   vector<S2Point>& vertices = *new_vertices;
   vertices.clear();
   vertices.reserve(loop.size());
+  // Move vertices from `loop` to `vertices`, checking for degeneracies as we
+  // go.  Invariant: the partially constructed sequence `vertices` contains no
+  // AAs nor ABAs.
   for (const S2Point& v : loop) {
-    // Remove duplicate vertices.
-    if (vertices.empty() || v != vertices.back()) {
-      // Remove edge pairs of the form ABA.
+    if (!vertices.empty()) {
+      if (v == vertices.back()) {
+        // De-dup: AA -> A.
+        continue;
+      }
       if (vertices.size() >= 2 && v == vertices.end()[-2]) {
+        // Remove whisker: ABA -> A.
         vertices.pop_back();
-      } else {
-        vertices.push_back(v);
+        continue;
       }
     }
+    // The new vertex isn't involved in a degeneracy involving earlier vertices.
+    vertices.push_back(v);
+  }
+  if (vertices.size() >= 2 && vertices[0] == vertices.back()) {
+    // Remove AA that wraps from end to beginning.
+    vertices.pop_back();
   }
+
+  // Invariant from this point on: there are no more AA's (not even wrapped),
+  // and no transformations we do after this (ABA -> A) introduce any AA's.
+
   // Check whether the loop was completely degenerate.
   if (vertices.size() < 3) return S2PointLoopSpan();
 
-  // Otherwise some portion of the loop is guaranteed to be non-degenerate.
-  // However there may still be some degenerate portions to remove.
-  if (vertices[0] == vertices.back()) vertices.pop_back();
+  // Otherwise some portion of the loop is guaranteed to be non-degenerate
+  // (this requires some thought).
+  // However there may still be some ABA->A's to do at the ends.
 
-  // If the loop begins with BA and ends with A, then there is an edge pair of
-  // the form ABA at the end/start of the loop.  Remove all such pairs.  As
-  // noted above, this is guaranteed to leave a non-degenerate loop.
+  // If the loop begins with BA and ends with A, or begins with A and ends with
+  // AB, then there is an edge pair of the form ABA including the first and last
+  // point, which we remove by removing the first and last point, leaving A at
+  // the beginning or end.  Do this as many times as we can.
+  // As noted above, this is guaranteed to leave a non-degenerate loop.
   int k = 0;
-  while (vertices[k + 1] == vertices.end()[-(k + 1)]) ++k;
+  while (vertices[k + 1] == vertices.end()[-(k + 1)] ||
+         vertices[k] == vertices.end()[-(k + 2)]) {
+    ++k;
+  }
   return S2PointLoopSpan(vertices.data() + k, vertices.size() - 2 * k);
 }
 
@@ -218,21 +244,21 @@ double GetCurvature(S2PointLoopSpan loop) {
 
 double GetCurvatureMaxError(S2PointLoopSpan loop) {
   // The maximum error can be bounded as follows:
-  //   2.24 * DBL_EPSILON    for RobustCrossProd(b, a)
-  //   2.24 * DBL_EPSILON    for RobustCrossProd(c, b)
+  //   3.00 * DBL_EPSILON    for RobustCrossProd(b, a)
+  //   3.00 * DBL_EPSILON    for RobustCrossProd(c, b)
   //   3.25 * DBL_EPSILON    for Angle()
   //   2.00 * DBL_EPSILON    for each addition in the Kahan summation
-  //   ------------------
-  //   9.73 * DBL_EPSILON
+  //  -------------------
+  //  11.25 * DBL_EPSILON
   //
-  // TODO(ericv): This error estimate is approximate.  There are two issues:
-  // (1) SignedArea needs some improvements to ensure that its error is
+  // TODO(b/203697029): This error estimate is approximate.  There are two
+  // issues: (1) SignedArea needs some improvements to ensure that its error is
   // actually never higher than GirardArea, and (2) although the number of
   // triangles in the sum is typically N-2, in theory it could be as high as
   // 2*N for pathological inputs.  But in other respects this error bound is
   // very conservative since it assumes that the maximum error is achieved on
   // every triangle.
-  const double kMaxErrorPerVertex = 9.73 * DBL_EPSILON;
+  const double kMaxErrorPerVertex = 11.25 * DBL_EPSILON;
   return kMaxErrorPerVertex * loop.size();
 }
 
diff --git a/src/s2/s2loop_measures.h b/src/s2/s2loop_measures.h
index 98b17c4f..474beebd 100644
--- a/src/s2/s2loop_measures.h
+++ b/src/s2/s2loop_measures.h
@@ -31,6 +31,7 @@
 #include <vector>
 
 #include "s2/s1angle.h"
+#include "s2/s2edge_crossings.h"
 #include "s2/s2point.h"
 #include "s2/s2point_span.h"
 #include "s2/s2pointutil.h"
@@ -137,37 +138,107 @@ std::ostream& operator<<(std::ostream& os, LoopOrder order);
 // vertices to be traversed in a canonical order.
 LoopOrder GetCanonicalLoopOrder(S2PointLoopSpan loop);
 
+namespace internal {
+
 // Returns the oriented surface integral of some quantity f(x) over the loop
 // interior, given a function f_tri(A,B,C) that returns the corresponding
 // integral over the spherical triangle ABC.  Here "oriented surface integral"
 // means:
 //
-// (1) f_tri(A,B,C) must be the integral of f if ABC is counterclockwise,
+// (1) f_tri(A,B,C) should return the integral of f if ABC is counterclockwise
 //     and the integral of -f if ABC is clockwise.
 //
-// (2) The result of this function is *either* the integral of f over the
-//     loop interior, or the integral of (-f) over the loop exterior.
+// (2) The result is the integral of f over the loop interior plus or minus
+//     some multiple of the integral of f over the entire sphere.
 //
 // Note that there are at least two common situations where property (2) above
 // is not a limitation:
 //
-//  - If the integral of f over the entire sphere is zero, then it doesn't
-//    matter which case is returned because they are always equal.
+//  - When the integral of f over the entire sphere is zero.  For example this
+//    is true when computing centroids.
+//
+//  - When f is non-negative and the integral over the entire sphere is a
+//    constant known in advance.  In this case the correct result can be
+//    obtained by using std::remainder appropriately.
 //
-//  - If f is non-negative, then it is easy to detect when the integral over
-//    the loop exterior has been returned, and the integral over the loop
-//    interior can be obtained by adding the integral of f over the entire
-//    unit sphere (a constant) to the result.
+// Accumulation of the result can be customized via the sum parameter.
+// Intermediate results are summed via operator+=.
 //
 // REQUIRES: The default constructor for T must initialize the value to zero.
 //           (This is true for built-in types such as "double".)
+template <class T, class TAccumulator = T>
+void GetSurfaceIntegral(S2PointLoopSpan loop,
+                        T f_tri(const S2Point&, const S2Point&, const S2Point&),
+                        TAccumulator& sum);
+
+// Compensated sum using Kahan's algorithm.  This doesn't use the higher-order
+// variations so it's not as robust against wildly ill-conditioned inputs as it
+// could be in the interest of speed.  It's very accurate in general for long
+// sequences of accumulation though.
+template <typename T>
+class KahanSum {
+ public:
+  KahanSum() = default;
+  explicit KahanSum(T value) : sum_(value) {}
+
+  // Adds value to running total with compensate summation.
+  void operator+=(T value) {
+    T tmp1 = value - err_;
+    T tmp2 = sum_ + tmp1;
+    err_ = (tmp2 - sum_) - tmp1;
+    sum_ = tmp2;
+  }
+
+  // Explicitly return the final sum as an instance of T.
+  explicit operator T() const { return sum_; }
+
+  // Returns the current compensation value.
+  T Compensation() const { return err_; }
+
+ private:
+  T sum_ = T();
+  T err_ = T();
+};
+
+}  // namespace internal
+
+// Accumulates the result naively into a variable of type T.
 template <class T>
 T GetSurfaceIntegral(S2PointLoopSpan loop,
-                     T f_tri(const S2Point&, const S2Point&, const S2Point&));
+                     T f_tri(const S2Point&, const S2Point&, const S2Point&)) {
+  T sum = T();
+  internal::GetSurfaceIntegral(loop, f_tri, sum);
+  return sum;
+}
+
+// Accumulates the result using a Kahan sum which accumulates much less error
+// for long sequences of numbers.
+template <class T>
+T GetSurfaceIntegralKahan(S2PointLoopSpan loop,
+                          T f_tri(const S2Point&, const S2Point&,
+                                  const S2Point&)) {
+  internal::KahanSum<T> sum;
+  internal::GetSurfaceIntegral(loop, f_tri, sum);
+  return (T)sum;
+}
 
-// Returns a new loop obtained by removing all degeneracies from "loop".  In
-// particular, the result will not contain any adjacent duplicate vertices or
-// sibling edge pairs, i.e. vertex sequences of the form (A, A) or (A, B, A).
+// Returns a new loop obtained by removing all degeneracies from "loop"
+// that can be detected by only comparing adjacent vertices and edges
+// for equality (not doing any geometric examination of them).
+// More specifically, the function repeatedly finds any vertex subsequences
+// of the form AA or ABA, and collapes them to A, until there are no more,
+// and a loop of length 1 or 2 will be turned into an empty loop.
+//
+// NOTE: it doesn't matter what order such degeneracies are processed in;
+// the resulting pruned loop is uniquely determined, up to cyclic permutation.
+// (This isn't obvious.)
+//
+// CAVEAT: notice that GetCurvature() (and other functions in this file)
+// may return a different answer when called on the resulting pruned loop from
+// when it's called on the original loop.  Specifically, according to
+// GetCurvature()'s contract, when the original loop is nonempty but degenerate,
+// calling GetCurvature() on it will yield 2*PI ("empty") before pruning,
+// but -2*PI ("full") after pruning.
 //
 // "new_vertices" represents storage where new loop vertices may be written.
 // Note that the S2PointLoopSpan result may be a subsequence of either "loop"
@@ -176,36 +247,37 @@ T GetSurfaceIntegral(S2PointLoopSpan loop,
 S2PointLoopSpan PruneDegeneracies(S2PointLoopSpan loop,
                                   std::vector<S2Point>* new_vertices);
 
-
 //////////////////// Implementation details follow ////////////////////////
 
-
 inline bool operator==(LoopOrder x, LoopOrder y) {
   return x.first == y.first && x.dir == y.dir;
 }
 
-template <class T>
-T GetSurfaceIntegral(S2PointLoopSpan loop,
-                     T f_tri(const S2Point&, const S2Point&, const S2Point&)) {
+template <class T, class TAccumulator>
+void internal::GetSurfaceIntegral(S2PointLoopSpan loop,
+                                  T f_tri(const S2Point&, const S2Point&,
+                                          const S2Point&),
+                                  TAccumulator& sum) {
   // We sum "f_tri" over a collection T of oriented triangles, possibly
   // overlapping.  Let the sign of a triangle be +1 if it is CCW and -1
   // otherwise, and let the sign of a point "x" be the sum of the signs of the
   // triangles containing "x".  Then the collection of triangles T is chosen
-  // such that either:
+  // such that every point in the loop interior has the same sign x, and every
+  // point in the loop exterior has the same sign (x - 1).  Furthermore almost
+  // always it is true that x == 0 or x == 1, meaning that either
   //
   //  (1) Each point in the loop interior has sign +1, and sign 0 otherwise; or
   //  (2) Each point in the loop exterior has sign -1, and sign 0 otherwise.
   //
   // The triangles basically consist of a "fan" from vertex 0 to every loop
-  // edge that does not include vertex 0.  These triangles will always satisfy
-  // either (1) or (2).  However, what makes this a bit tricky is that
-  // spherical edges become numerically unstable as their length approaches
-  // 180 degrees.  Of course there is not much we can do if the loop itself
-  // contains such edges, but we would like to make sure that all the triangle
-  // edges under our control (i.e., the non-loop edges) are stable.  For
-  // example, consider a loop around the equator consisting of four equally
-  // spaced points.  This is a well-defined loop, but we cannot just split it
-  // into two triangles by connecting vertex 0 to vertex 2.
+  // edge that does not include vertex 0.  However, what makes this a bit
+  // tricky is that spherical edges become numerically unstable as their
+  // length approaches 180 degrees.  Of course there is not much we can do if
+  // the loop itself contains such edges, but we would like to make sure that
+  // all the triangle edges under our control (i.e., the non-loop edges) are
+  // stable.  For example, consider a loop around the equator consisting of
+  // four equally spaced points.  This is a well-defined loop, but we cannot
+  // just split it into two triangles by connecting vertex 0 to vertex 2.
   //
   // We handle this type of situation by moving the origin of the triangle fan
   // whenever we are about to create an unstable edge.  We choose a new
@@ -219,20 +291,19 @@ T GetSurfaceIntegral(S2PointLoopSpan loop,
   // reduced further if desired.
   static const double kMaxLength = M_PI - 1e-5;
 
-  // The default constructor for T must initialize the value to zero.
+  // The default constructor for TAccumulator must initialize the value to zero.
   // (This is true for built-in types such as "double".)
-  T sum = T();
-  if (loop.size() < 3) return sum;
+  if (loop.size() < 3) return;
 
   S2Point origin = loop[0];
-  for (int i = 1; i + 1 < loop.size(); ++i) {
-    // Let V_i be loop[i], let O be the current origin, and let length(A,B)
-    // be the length of edge (A,B).  At the start of each loop iteration, the
-    // "leading edge" of the triangle fan is (O,V_i), and we want to extend
-    // the triangle fan so that the leading edge is (O,V_i+1).
+  for (size_t i = 1; i + 1 < loop.size(); ++i) {
+    // Let V_i be loop[i], let O be the current origin, and let length(A, B)
+    // be the length of edge (A, B).  At the start of each loop iteration, the
+    // "leading edge" of the triangle fan is (O, V_i), and we want to extend
+    // the triangle fan so that the leading edge is (O, V_i+1).
     //
     // Invariants:
-    //  1. length(O,V_i) < kMaxLength for all (i > 1).
+    //  1. length(O, V_i) < kMaxLength for all (i > 1).
     //  2. Either O == V_0, or O is approximately perpendicular to V_0.
     //  3. "sum" is the oriented integral of f over the area defined by
     //     (O, V_0, V_1, ..., V_i).
@@ -244,35 +315,47 @@ T GetSurfaceIntegral(S2PointLoopSpan loop,
       // for the triangle fan.
       S2Point old_origin = origin;
       if (origin == loop[0]) {
-        // The following point is well-separated from V_i and V_0 (and
-        // therefore V_i+1 as well).
+        // The following point O' is well-separated from V_i and V_0 (and
+        // therefore V_i+1 as well).  Moving the origin transforms the leading
+        // edge of the triangle fan into a two-edge chain (V_0, O', V_i).
         origin = S2::RobustCrossProd(loop[0], loop[i]).Normalize();
       } else if (loop[i].Angle(loop[0]) < kMaxLength) {
         // All edges of the triangle (O, V_0, V_i) are stable, so we can
-        // revert to using V_0 as the origin.
+        // revert to using V_0 as the origin.  This changes the leading edge
+        // chain (V_0, O, V_i) back into a single edge (V_0, V_i).
         origin = loop[0];
       } else {
         // (O, V_i+1) and (V_0, V_i) are antipodal pairs, and O and V_0 are
         // perpendicular.  Therefore V_0.CrossProd(O) is approximately
         // perpendicular to all of {O, V_0, V_i, V_i+1}, and we can choose
         // this point O' as the new origin.
+        //
+        // NOTE(ericv): The following line is the reason why in rare cases the
+        // triangle sum can have a sign other than -1, 0, or 1.  To fix this
+        // we would need to choose either "-origin" or "origin" below
+        // depending on whether the signed area of the triangles chosen so far
+        // is positive or negative respectively.  This is easy in the case of
+        // GetSignedArea() but would be extra work for GetCentroid().  In any
+        // case this does not cause any problems in practice.
         origin = loop[0].CrossProd(old_origin);
 
-        // Advance the edge (V_0,O) to (V_0,O').
+        // The following two triangles transform the leading edge chain from
+        // (V_0, O, V_i) to (V_0, O', V_i+1).
+        //
+        // First we advance the edge (V_0, O) to (V_0, O').
         sum += f_tri(loop[0], old_origin, origin);
       }
-      // Advance the edge (O,V_i) to (O',V_i).
+      // Advance the edge (O, V_i) to (O', V_i).
       sum += f_tri(old_origin, loop[i], origin);
     }
-    // Advance the edge (O,V_i) to (O,V_i+1).
+    // Advance the edge (O, V_i) to (O, V_i+1).
     sum += f_tri(origin, loop[i], loop[i+1]);
   }
   // If the origin is not V_0, we need to sum one more triangle.
   if (origin != loop[0]) {
-    // Advance the edge (O,V_n-1) to (O,V_0).
+    // Advance the edge (O, V_n-1) to (O, V_0).
     sum += f_tri(origin, loop[loop.size() - 1], loop[0]);
   }
-  return sum;
 }
 
 }  // namespace S2
diff --git a/src/s2/s2max_distance_targets.cc b/src/s2/s2max_distance_targets.cc
index 6fc951bd..f1b5d0a1 100644
--- a/src/s2/s2max_distance_targets.cc
+++ b/src/s2/s2max_distance_targets.cc
@@ -15,15 +15,23 @@
 
 #include "s2/s2max_distance_targets.h"
 
+#include <cmath>
+
 #include <memory>
-#include "absl/memory/memory.h"
-#include "s2/s1angle.h"
+
+#include "s2/s1chord_angle.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
+#include "s2/s2contains_point_query.h"
+#include "s2/s2distance_target.h"
 #include "s2/s2edge_distances.h"
 #include "s2/s2furthest_edge_query.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
 #include "s2/s2shape_index_region.h"
-#include "s2/s2text_format.h"
+
+using std::make_unique;
 
 //////////////////   Point Target   ////////////////////
 
@@ -153,11 +161,9 @@ bool S2MaxDistanceCellTarget::VisitContainingShapes(
 
 S2MaxDistanceShapeIndexTarget::S2MaxDistanceShapeIndexTarget(
     const S2ShapeIndex* index)
-    : index_(index), query_(absl::make_unique<S2FurthestEdgeQuery>(index)) {
-}
+    : index_(index), query_(make_unique<S2FurthestEdgeQuery>(index)) {}
 
-S2MaxDistanceShapeIndexTarget::~S2MaxDistanceShapeIndexTarget() {
-}
+S2MaxDistanceShapeIndexTarget::~S2MaxDistanceShapeIndexTarget() = default;
 
 bool S2MaxDistanceShapeIndexTarget::include_interiors() const {
   return query_->options().include_interiors();
@@ -196,9 +202,7 @@ bool S2MaxDistanceShapeIndexTarget::UpdateMinDistance(
   query_->mutable_options()->set_min_distance(S1ChordAngle(*min_dist));
   S2FurthestEdgeQuery::PointTarget target(p);
   S2FurthestEdgeQuery::Result r = query_->FindFurthestEdge(&target);
-  if (r.shape_id() < 0) {
-    return false;
-  }
+  if (r.is_empty()) return false;
   *min_dist = S2MaxDistance(r.distance());
   return true;
 }
@@ -208,7 +212,7 @@ bool S2MaxDistanceShapeIndexTarget::UpdateMinDistance(
   query_->mutable_options()->set_min_distance(S1ChordAngle(*min_dist));
   S2FurthestEdgeQuery::EdgeTarget target(v0, v1);
   S2FurthestEdgeQuery::Result r = query_->FindFurthestEdge(&target);
-  if (r.shape_id() < 0) return false;
+  if (r.is_empty()) return false;
   *min_dist = S2MaxDistance(r.distance());
   return true;
 }
@@ -218,7 +222,7 @@ bool S2MaxDistanceShapeIndexTarget::UpdateMinDistance(
   query_->mutable_options()->set_min_distance(S1ChordAngle(*min_dist));
   S2FurthestEdgeQuery::CellTarget target(cell);
   S2FurthestEdgeQuery::Result r = query_->FindFurthestEdge(&target);
-  if (r.shape_id() < 0) return false;
+  if (r.is_empty()) return false;
   *min_dist = S2MaxDistance(r.distance());
   return true;
 }
diff --git a/src/s2/s2max_distance_targets.h b/src/s2/s2max_distance_targets.h
index 83b926bb..6827ce49 100644
--- a/src/s2/s2max_distance_targets.h
+++ b/src/s2/s2max_distance_targets.h
@@ -26,9 +26,11 @@
 #include "s2/_fp_contract_off.h"
 #include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
+#include "s2/s2cap.h"
 #include "s2/s2cell.h"
 #include "s2/s2distance_target.h"
 #include "s2/s2edge_distances.h"
+#include "s2/s2point.h"
 #include "s2/s2shape_index.h"
 
 class S2FurthestEdgeQuery;
diff --git a/src/s2/s2measures.cc b/src/s2/s2measures.cc
index 0f9d3ffa..098bdc76 100644
--- a/src/s2/s2measures.cc
+++ b/src/s2/s2measures.cc
@@ -20,6 +20,8 @@
 #include <algorithm>
 #include <cmath>
 
+#include "s2/s2edge_crossings.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2predicates.h"
 
@@ -53,6 +55,34 @@ double TurnAngle(const S2Point& a, const S2Point& b, const S2Point& c) {
   return (s2pred::Sign(a, b, c) > 0) ? angle : -angle;
 }
 
+// The .Angle() member function uses atan2(|AxB|, A.B) to compute the angle
+// between A and B, which can lose about half its precision when A and B are
+// nearly (anti-)parallel.
+//
+// Kahan provides a much more stable form:
+//   2*atan2(| A*|B| - |A|*B |, | A*|B| + |A|*B |)
+//
+// Since S2Points are unit magnitude by construction we can simplify further:
+//   2*atan2(|A-B|,|A+B|)
+//
+// This likely can't replace Vector::Angle since it requires four magnitude
+// calculations, each of which takes 5 operations + a square root, plus 6
+// operations to find the sum and difference of the vectors, for a total of 26 +
+// 4 square roots.  Vector::Angle requires 19 + 1 square root.
+//
+// Since we always have unit vectors, we can elide two of those magnitude
+// calculations for a total of 16 + 2 square roots which is competitive with
+// Vector::Angle performance.
+//
+// Reference: Kahan, W. (2006, Jan 11). "How Futile are Mindless Assessments of
+//   Roundoff in Floating-Point Computation?"
+// (p. 47). https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf
+double StableAngle(S2Point a, S2Point b) {
+  S2_DCHECK(IsUnitLength(a));
+  S2_DCHECK(IsUnitLength(b));
+  return 2 * atan2((a - b).Norm(), (a + b).Norm());
+}
+
 double Area(const S2Point& a, const S2Point& b, const S2Point& c) {
   S2_DCHECK(IsUnitLength(a));
   S2_DCHECK(IsUnitLength(b));
@@ -87,10 +117,16 @@ double Area(const S2Point& a, const S2Point& b, const S2Point& c) {
   // is about 1e-15, this means that we shouldn't even consider it unless
   // s >= 3e-4 or so.
   //
+  // The 'standard' formula for finding the angle between two vectors used by
+  // Vector::Angle is known to have poor numerical properties when the vectors
+  // are nearly (anti-)parallel.  Instead we use an alternate formulation by
+  // Kahan which has much better properties in that case, see `StableAngle` for
+  // more information.
+  //
   // TODO(ericv): Implement rigorous error bounds (analysis already done).
-  double sa = b.Angle(c);
-  double sb = c.Angle(a);
-  double sc = a.Angle(b);
+  double sa = StableAngle(b, c);
+  double sb = StableAngle(c, a);
+  double sc = StableAngle(a, b);
   double s = 0.5 * (sa + sb + sc);
   if (s >= 3e-4) {
     // Consider whether Girard's formula might be more accurate.
diff --git a/src/s2/s2memory_tracker.cc b/src/s2/s2memory_tracker.cc
new file mode 100644
index 00000000..d2a38440
--- /dev/null
+++ b/src/s2/s2memory_tracker.cc
@@ -0,0 +1,44 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#include "s2/s2memory_tracker.h"
+
+#include <utility>
+
+#include "s2/base/integral_types.h"
+#include "s2/s2error.h"
+
+void S2MemoryTracker::SetError(S2Error error) {
+  error_ = std::move(error);
+}
+
+// Not inline in order to avoid code bloat.
+void S2MemoryTracker::SetLimitExceededError() {
+  error_.Init(S2Error::RESOURCE_EXHAUSTED,
+              "Memory limit exceeded (tracked usage %d bytes, limit %d bytes)",
+              usage_bytes_, limit_bytes_);
+}
+
+bool S2MemoryTracker::Client::TallyTemp(int64 delta_bytes) {
+  Tally(delta_bytes);
+  return Tally(-delta_bytes);
+}
+
+const S2Error& S2MemoryTracker::Client::error() const {
+  static S2Error error_ok;  // NOLINT
+  return tracker_ ? tracker_->error() : error_ok;
+}
diff --git a/src/s2/s2memory_tracker.h b/src/s2/s2memory_tracker.h
new file mode 100644
index 00000000..98a9d52c
--- /dev/null
+++ b/src/s2/s2memory_tracker.h
@@ -0,0 +1,405 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#ifndef S2_S2MEMORY_TRACKER_H_
+#define S2_S2MEMORY_TRACKER_H_
+
+#include <algorithm>
+#include <functional>
+#include <limits>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "absl/strings/str_format.h"
+#include "s2/s2error.h"
+#include "s2/util/gtl/compact_array.h"
+
+// S2MemoryTracker is a helper class for tracking and limiting the memory
+// usage of S2 operations.  It provides the following functionality:
+//
+//  - Tracks the current and maximum memory usage of certain S2 classes
+//    (including S2Builder, S2BooleanOperation, and S2BufferOperation).
+//
+//  - Supports cancelling the current operation if a given memory limit would
+//    otherwise be exceeded.
+//
+//  - Invokes an optional callback after every N bytes of memory allocation,
+//    and periodically within certain calculations that might take a long
+//    time.  This gives the client an opportunity to cancel the current
+//    operation for any reason, e.g. because the memory usage of the entire
+//    thread or process is too high, a deadline was exceeded, an external
+//    cancellation request was received, etc.
+//
+// To use it, clients simply create an S2MemoryTracker object and pass it to
+// the desired S2 operations.  For example:
+//
+//   S2MemoryTracker tracker;
+//   tracker.set_limit_bytes(500 << 20);     // 500 MB limit
+//   S2Builder::Options options;
+//   options.set_memory_tracker(&tracker);
+//   S2Builder builder{options};
+//   ...
+//   S2Error error;
+//   if (!builder.Build(&error)) {
+//     if (error.code() == S2Error::RESOURCE_EXHAUSTED) {
+//       S2_LOG(ERROR) << error;  // Memory limit exceeded
+//     }
+//   }
+//
+// Here is an example showing how to invoke a callback after every 10 MB of
+// memory allocation:
+//
+//   tracker.set_periodic_callback(10 << 20 /*10 MB*/, [&]() {
+//       if (MyCancellationCheck()) {
+//         tracker.SetError(S2Error::CANCELLED, "Operation cancelled");
+//       }
+//     });
+//
+// Note that the callback is invoked based on cumulative allocation rather
+// than current usage, e.g. a loop that repeatedly allocates and frees 1 MB
+// would invoke the callback every 10 iterations.  Also note that the callback
+// has control over what type of error is generated.
+//
+// This class is not thread-safe and therefore all objects associated with a
+// single S2MemoryTracker should be accessed using a single thread.
+//
+// Implementation Notes
+// --------------------
+//
+// In order to write a new class that tracks memory using S2MemoryTracker,
+// users must analyze their data structures and make appropriate method calls.
+// The major drawback to this approach is that it is fragile, since users
+// might change their code but not their memory tracking.  The only way to
+// avoid this problem is through rigorous testing.  See s2builder_test.cc
+// and s2memory_tracker_testing.h for useful techniques.
+//
+// Note that malloc hooks are not a good solution for memory tracking within
+// the S2 library.  The reasons for this include: (1) malloc hooks are
+// program-wide and affect all threads, (2) the S2 library is used on many
+// platforms (and by open source projects) and cannot depend on the features
+// of specific memory allocators, and (3) certain S2 code paths can allocate a
+// lot of memory at once, so it is better to predict and avoid such
+// allocations rather than detecting them after the fact (as would happen with
+// malloc hooks).
+class S2MemoryTracker {
+ public:
+  S2MemoryTracker() = default;
+
+  // The current tracked memory usage.
+  //
+  // CAVEAT: When an operation is cancelled (e.g. due to a memory limit being
+  // exceeded) the value returned may be wildly inaccurate.  This is because
+  // this method reports attempted rather than actual memory allocation, and
+  // S2 operations often attempt to allocate memory even on their failure /
+  // early exit code paths.
+  int64 usage_bytes() const { return usage_bytes_; }
+
+  // The maximum tracked memory usage.
+  //
+  // CAVEAT: When an operation is cancelled the return value may be wildly
+  // inaccurate (see usage() for details).
+  int64 max_usage_bytes() const { return max_usage_bytes_; }
+
+  // Specifies a memory limit in bytes.  Whenever the tracked memory usage
+  // would exceed this value, an error of type S2Error::RESOURCE_EXHAUSTED is
+  // generated and the current operation will be cancelled.  If the value is
+  // kNoLimit then memory usage is tracked but not limited.
+  //
+  // DEFAULT: kNoLimit
+  int64 limit_bytes() const { return limit_bytes_; }
+  void set_limit_bytes(int64 limit_bytes) { limit_bytes_ = limit_bytes; }
+
+  // Indicates that memory usage is unlimited.
+  static constexpr int64 kNoLimit = std::numeric_limits<int64>::max();
+
+  // Returns the tracker's current error status.  Whenever an error exists
+  // the current S2 operation will be cancelled.
+  const S2Error& error() const { return error_; }
+
+  // Returns true if no memory tracking errors have occurred.  If this method
+  // returns false then the current S2 operation will be cancelled.
+  bool ok() const { return error_.ok(); }
+
+  // Sets the error status of the memory tracker.  Typically this method is
+  // called from the periodic callback (see below).  Setting the error code to
+  // anything other than S2Error::OK requests cancellation of the current S2
+  // operation.
+  //
+  // CAVEAT: Do not use this method to clear an existing error unless you know
+  // what you're doing.  Clients are not required to track memory accurately
+  // once an operation has been cancelled, and therefore the only safe way to
+  // reset the error status is to delete the S2MemoryTracker::Client object
+  // where the error occurred (which will free all its memory and restore the
+  // S2MemoryTracker to an accurate state).
+  template <typename... Args>
+  void SetError(S2Error::Code code, const absl::FormatSpec<Args...>& format,
+                const Args&... args);
+
+  // This version of SetError copies an existing error.
+  void SetError(S2Error error);
+
+  // A function that is called periodically to check whether the current
+  // S2 operation should be cancelled.
+  using PeriodicCallback = std::function<void ()>;
+
+  // Sets a function that is called after every "callback_alloc_delta_bytes"
+  // of tracked memory allocation to check whether the current operation
+  // should be cancelled.  The callback may also be called periodically during
+  // calculations that take a long time.  Once an error has occurred, further
+  // callbacks are suppressed.
+  void set_periodic_callback(int64 callback_alloc_delta_bytes,
+                             PeriodicCallback periodic_callback) {
+    callback_alloc_delta_bytes_ = callback_alloc_delta_bytes;
+    callback_ = periodic_callback;
+    callback_alloc_limit_bytes_ = alloc_bytes_ + callback_alloc_delta_bytes_;
+  }
+  int64 callback_alloc_delta_bytes() const {
+    return callback_alloc_delta_bytes_;
+  }
+  const PeriodicCallback& periodic_callback() const { return callback_; }
+
+  // Resets usage() and max_usage() to zero and clears any error.  Leaves all
+  // other parameters unchanged.
+  void Reset() {
+    error_.Clear();
+    usage_bytes_ = max_usage_bytes_ = alloc_bytes_ = 0;
+    callback_alloc_limit_bytes_ = callback_alloc_delta_bytes_;
+  }
+
+  //////////////////////////////////////////////////////////////////////
+  //
+  // Everything below this point is only needed by classes that use
+  // S2MemoryTracker to track their memory usage.
+
+  // S2MemoryTracker::Client is used to track the memory used by a given S2
+  // operation.  It points to an S2MemoryTracker tracker object and updates
+  // the tracker's state as memory is allocated and freed.  Several client
+  // objects can point to the same memory tracker; this makes it easier for
+  // one S2 operation to use another S2 operation in its implementation.  (For
+  // example, S2BooleanOperation is implemented using S2Builder.)
+  //
+  // The client object keeps track of its own memory usage in addition to
+  // updating the shared S2MemoryTracker state.  This allows the memory used
+  // by a particular client to be automatically subtracted from the total when
+  // that client is destroyed.
+  class Client {
+   private:
+    // Forward declaration to avoid code duplication.
+    template <class T, bool exact> bool AddSpaceInternal(T* v, int64 n);
+
+   public:
+    // Specifies the S2MemoryTracker that will be used to track the memory
+    // usage of this client.  Several S2 operations can use the same memory
+    // tracker by creating different Client objects, e.g. S2BooleanOperation
+    // has a client to track its memory usage, and it also uses S2Builder
+    // which creates its own client.  This allows the total memory usage of
+    // both classes to be tracked and controlled.
+
+    // Default constructor.  Note that this class can be used without calling
+    // Init(), but in that case memory usage is not tracked.
+    Client() = default;
+
+    // Convenience constructor that calls Init().
+    explicit Client(S2MemoryTracker* tracker) {
+      Init(tracker);
+    }
+
+    // Initializes this client to use the given memory tracker.  This function
+    // may be called more than once (which is equivalent to destroying this
+    // client and transferring the current memory usage to a new client).
+    void Init(S2MemoryTracker* tracker) {
+      int64 usage_bytes = client_usage_bytes_;
+      Tally(-usage_bytes);
+      tracker_ = tracker;
+      Tally(usage_bytes);
+    }
+
+    // Returns the memory tracker associated with this client object.
+    S2MemoryTracker* tracker() const { return tracker_; }
+
+    // Returns true if this client has been initialized.
+    bool is_active() const { return tracker_ != nullptr; }
+
+    // When a Client object is destroyed, any remaining memory is subtracted
+    // from the associated S2MemoryTracker (under the assumption that the
+    // associated S2 operation has been destroyed as well).
+    ~Client() { Tally(-client_usage_bytes_); }
+
+    // Returns the current tracked memory usage.
+    // XXX(ericv): Return 0 when not active.
+    int64 usage_bytes() const {
+      return tracker_ ? tracker_->usage_bytes() : 0;
+    }
+
+    // Returns the current tracked memory usage of this client only.
+    // Returns zero if tracker() has not been initialized.
+    int64 client_usage_bytes() const { return client_usage_bytes_; }
+
+    // Returns the tracker's current error status.
+    const S2Error& error() const;
+
+    // Returns true if no memory tracking errors have occurred.  If this method
+    // returns false then the current S2 operation will be cancelled.
+    bool ok() const { return tracker_ ? tracker_->ok() : true; }
+
+    // Records a "delta" bytes of memory use (positive or negative), returning
+    // false if the current operation should be cancelled.
+    bool Tally(int64 delta_bytes) {
+      if (!is_active()) return true;
+      client_usage_bytes_ += delta_bytes;
+      return tracker_->Tally(delta_bytes);
+    }
+
+    // Specifies that "delta" bytes of memory will be allocated and then later
+    // freed.  Returns false if the current operation should be cancelled.
+    bool TallyTemp(int64 delta_bytes);
+
+    // Adds the memory used by the given vector to the current tally.  Returns
+    // false if the current operation should be cancelled.
+    template <class T>
+    inline bool Tally(const std::vector<T>& v) {
+      return Tally(v.capacity() * sizeof(v[0]));
+    }
+
+    // Subtracts the memory used by the given vector from the current tally.
+    // Returns false if the current operation should be cancelled.
+    template <class T>
+    inline bool Untally(const std::vector<T>& v) {
+      return Tally(-v.capacity() * sizeof(v[0]));
+    }
+
+    // Ensures that the given vector has space for "n" additional elements and
+    // tracks any additional memory that was allocated for this purpose.  This
+    // method should be called before actually adding any elements to the
+    // vector, otherwise memory will not be tracked correctly.  Returns false
+    // if the current operation should be cancelled.
+    template <class T>
+    inline bool AddSpace(T* v, int64 n) {
+      return AddSpaceInternal<T, false>(v, n);
+    }
+
+    // Like AddSpace() except that if more memory is needed the vector is
+    // sized to hold exactly "n" additional elements.  Returns false if the
+    // current operation should be cancelled.
+    template <class T>
+    inline bool AddSpaceExact(T* v, int64 n) {
+      return AddSpaceInternal<T, true>(v, n);
+    }
+
+    // Deallocates storage for the given vector and updates the memory
+    // tracking accordingly.  Returns false if the current operation should be
+    // cancelled.
+    template <class T>
+    inline bool Clear(T* v) {
+      int64 old_capacity = v->capacity();
+      T().swap(*v);
+      return Tally(-old_capacity * sizeof((*v)[0]));
+    }
+
+    // Returns the number of allocated bytes used by gtl::compact_array<T>.
+    // (This class is similar to std::vector but stores a small number of
+    // elements inline.)
+    template <class T>
+    static int64 GetCompactArrayAllocBytes(const gtl::compact_array<T>& array);
+
+    // Returns the estimated minimum number of allocated bytes for each
+    // additional entry in an absl::btree_* container (e.g. absl::btree_map)
+    // including overhead.  This estimate is accurate only if the container
+    // nodes are nearly full, as when elements are added in sorted order.
+    // Otherwise nodes are expected to be 75% full on average.
+    //
+    // This function can be used to approximately track memory usage while
+    // such a container is being built.  Once construction is complete, the
+    // exact memory usage can be determined using "container.bytes_used()".
+    template <class T>
+    static int64 GetBtreeMinBytesPerEntry();
+
+   private:
+    S2MemoryTracker* tracker_ = nullptr;
+    int64 client_usage_bytes_ = 0;
+  };
+
+ private:
+  friend class Client;
+
+  bool Tally(int64 delta_bytes);
+  void SetLimitExceededError();
+
+  int64 usage_bytes_ = 0;
+  int64 max_usage_bytes_ = 0;
+  int64 limit_bytes_ = kNoLimit;
+  int64 alloc_bytes_ = 0;
+  S2Error error_;
+  PeriodicCallback callback_;
+  int64 callback_alloc_delta_bytes_ = 0;
+  int64 callback_alloc_limit_bytes_ = kNoLimit;
+};
+
+
+//////////////////   Implementation details follow   ////////////////////
+
+template <typename... Args>
+void S2MemoryTracker::SetError(S2Error::Code code,
+                               const absl::FormatSpec<Args...>& format,
+                               const Args&... args) {
+  error_.Init(code, format, args...);
+}
+
+inline bool S2MemoryTracker::Tally(int64 delta_bytes) {
+  usage_bytes_ += delta_bytes;
+  alloc_bytes_ += std::max(int64{0}, delta_bytes);
+  max_usage_bytes_ = std::max(max_usage_bytes_, usage_bytes_);
+  if (usage_bytes_ > limit_bytes_ && ok()) SetLimitExceededError();
+  if (callback_ && alloc_bytes_ >= callback_alloc_limit_bytes_) {
+    callback_alloc_limit_bytes_ = alloc_bytes_ + callback_alloc_delta_bytes_;
+    if (ok()) callback_();
+  }
+  return ok();
+}
+
+template <class T, bool exact>
+inline bool S2MemoryTracker::Client::AddSpaceInternal(T* v, int64 n) {
+  int64 new_size = v->size() + n;
+  int64 old_capacity = v->capacity();
+  if (new_size <= old_capacity) return true;
+  int64 new_capacity = exact ? new_size : std::max(new_size, 2 * old_capacity);
+  // Note that reserve() allocates new storage before freeing the old storage.
+  if (!Tally(new_capacity * sizeof((*v)[0]))) return false;
+  v->reserve(new_capacity);
+  S2_DCHECK_EQ(v->capacity(), new_capacity);
+  return Tally(-old_capacity * sizeof((*v)[0]));
+}
+
+// Returns the number of bytes used by compact_array<T>.
+template <class T>
+int64 S2MemoryTracker::Client::GetCompactArrayAllocBytes(
+    const gtl::compact_array<T>& array) {
+  // Unfortunately this information isn't part of the public API.
+  enum {
+    kMaxInlinedBytes = 11,
+    kInlined = kMaxInlinedBytes / sizeof(T)
+  };
+  int n = array.capacity();
+  return (n <= kInlined) ? 0 : n * sizeof(T);
+}
+
+template <class T>
+int64 S2MemoryTracker::Client::GetBtreeMinBytesPerEntry() {
+  return 1.12 * sizeof(typename T::value_type);
+}
+
+#endif  // S2_S2MEMORY_TRACKER_H_
diff --git a/src/s2/s2metrics.cc b/src/s2/s2metrics.cc
index e9cb1cbc..17fa2dd7 100644
--- a/src/s2/s2metrics.cc
+++ b/src/s2/s2metrics.cc
@@ -30,6 +30,10 @@
 
 #include "s2/s2metrics.h"
 
+#include <cmath>
+
+#include "s2/s2coords.h"
+
 namespace S2 {
 
 const LengthMetric kMinAngleSpan(
@@ -48,9 +52,9 @@ const LengthMetric kAvgAngleSpan(M_PI / 2);                    // 1.571
 // This is true for all projections.
 
 const LengthMetric kMinWidth(
-    S2_PROJECTION == S2_LINEAR_PROJECTION ? sqrt(2.0 / 3.0) :             // 0.816
-    S2_PROJECTION == S2_TAN_PROJECTION ? M_PI / (2 * sqrt(2.0)) :        // 1.111
-    S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 2 * sqrt(2.0) / 3 :       // 0.943
+    S2_PROJECTION == S2_LINEAR_PROJECTION ? sqrt(2. / 3) :             // 0.816
+    S2_PROJECTION == S2_TAN_PROJECTION ? M_PI / (2 * sqrt(2)) :        // 1.111
+    S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 2 * sqrt(2) / 3 :       // 0.943
     0);
 
 const LengthMetric kMaxWidth(kMaxAngleSpan.deriv());
@@ -63,9 +67,9 @@ const LengthMetric kAvgWidth(
     0);
 
 const LengthMetric kMinEdge(
-    S2_PROJECTION == S2_LINEAR_PROJECTION ? 2 * sqrt(2.0) / 3 :          // 0.943
-    S2_PROJECTION == S2_TAN_PROJECTION ? M_PI / (2 * sqrt(2.0)) :        // 1.111
-    S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 2 * sqrt(2.0) / 3 :       // 0.943
+    S2_PROJECTION == S2_LINEAR_PROJECTION ? 2 * sqrt(2) / 3 :          // 0.943
+    S2_PROJECTION == S2_TAN_PROJECTION ? M_PI / (2 * sqrt(2)) :        // 1.111
+    S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 2 * sqrt(2) / 3 :       // 0.943
     0);
 
 const LengthMetric kMaxEdge(kMaxAngleSpan.deriv());
@@ -78,14 +82,14 @@ const LengthMetric kAvgEdge(
     0);
 
 const LengthMetric kMinDiag(
-    S2_PROJECTION == S2_LINEAR_PROJECTION ? 2 * sqrt(2.0) / 3 :          // 0.943
-    S2_PROJECTION == S2_TAN_PROJECTION ? M_PI * sqrt(2.0) / 3 :          // 1.481
-    S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 8 * sqrt(2.0) / 9 :       // 1.257
+    S2_PROJECTION == S2_LINEAR_PROJECTION ? 2 * sqrt(2) / 3 :          // 0.943
+    S2_PROJECTION == S2_TAN_PROJECTION ? M_PI * sqrt(2) / 3 :          // 1.481
+    S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 8 * sqrt(2) / 9 :       // 1.257
     0);
 
 const LengthMetric kMaxDiag(
-    S2_PROJECTION == S2_LINEAR_PROJECTION ? 2 * sqrt(2.0) :              // 2.828
-    S2_PROJECTION == S2_TAN_PROJECTION ? M_PI * sqrt(2.0 / 3.0) :         // 2.565
+    S2_PROJECTION == S2_LINEAR_PROJECTION ? 2 * sqrt(2) :              // 2.828
+    S2_PROJECTION == S2_TAN_PROJECTION ? M_PI * sqrt(2. / 3) :         // 2.565
     S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 2.438654594434021032 :  // 2.439
     0);
 
@@ -96,9 +100,9 @@ const LengthMetric kAvgDiag(
     0);
 
 const AreaMetric kMinArea(
-    S2_PROJECTION == S2_LINEAR_PROJECTION ? 4 / (3 * sqrt(3.0)) :        // 0.770
-    S2_PROJECTION == S2_TAN_PROJECTION ? (M_PI*M_PI) / (4*sqrt(2.0)) :   // 1.745
-    S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 8 * sqrt(2.0) / 9 :       // 1.257
+    S2_PROJECTION == S2_LINEAR_PROJECTION ? 4 / (3 * sqrt(3)) :        // 0.770
+    S2_PROJECTION == S2_TAN_PROJECTION ? (M_PI*M_PI) / (4*sqrt(2)) :   // 1.745
+    S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 8 * sqrt(2) / 9 :       // 1.257
     0);
 
 const AreaMetric kMaxArea(
@@ -111,12 +115,12 @@ const AreaMetric kAvgArea(4 * M_PI / 6);                       // 2.094
 // This is true for all projections.
 
 const double kMaxEdgeAspect = (
-    S2_PROJECTION == S2_LINEAR_PROJECTION ? sqrt(2.0) :                  // 1.414
-    S2_PROJECTION == S2_TAN_PROJECTION ?  sqrt(2.0) :                    // 1.414
+    S2_PROJECTION == S2_LINEAR_PROJECTION ? sqrt(2) :                  // 1.414
+    S2_PROJECTION == S2_TAN_PROJECTION ?  sqrt(2) :                    // 1.414
     S2_PROJECTION == S2_QUADRATIC_PROJECTION ? 1.442615274452682920 :  // 1.443
     0);
 
-const double kMaxDiagAspect = sqrt(3.0);                             // 1.732
+const double kMaxDiagAspect = sqrt(3);                             // 1.732
 // This is true for all projections.
 
 }  // namespace S2
diff --git a/src/s2/s2metrics.h b/src/s2/s2metrics.h
index a9441f8e..b2365157 100644
--- a/src/s2/s2metrics.h
+++ b/src/s2/s2metrics.h
@@ -50,24 +50,24 @@ template <int dim> class Metric {
   // particular metric.
   double GetValue(int level) const { return ldexp(deriv_, - dim * level); }
 
-  // Return the level at which the metric has approximately the given
-  // value.  For example, S2::kAvgEdge.GetClosestLevel(0.1) returns the
-  // level at which the average cell edge length is approximately 0.1.
-  // The return value is always a valid level.
+  // Return the level at which the metric has approximately the given value.
+  // For example, S2::kAvgEdge.GetClosestLevel(0.1) returns the level at which
+  // the average cell edge length is approximately 0.1. The return value is
+  // always a valid level.
   int GetClosestLevel(double value) const;
 
-  // Return the minimum level such that the metric is at most the given
-  // value, or S2CellId::kMaxLevel if there is no such level.  For example,
+  // Return the minimum level such that the metric is at most the given value,
+  // or S2CellId::kMaxLevel if there is no such level. For example,
   // S2::kMaxDiag.GetLevelForMaxValue(0.1) returns the minimum level such
   // that all cell diagonal lengths are 0.1 or smaller.  The return value
   // is always a valid level.
   int GetLevelForMaxValue(double value) const;
 
-  // Return the maximum level such that the metric is at least the given
-  // value, or zero if there is no such level.  For example,
-  // S2::kMinWidth.GetLevelForMinValue(0.1) returns the maximum level such
-  // that all cells have a minimum width of 0.1 or larger.  The return value
-  // is always a valid level.
+  // Return the maximum level such that the metric is at least the given value,
+  // or 0 if there is no such level.  For example,
+  // S2::kMinWidth.GetLevelForMinValue(0.1) returns the maximum level such that
+  // all cells have a minimum width of 0.1 or larger.  The return value is
+  // always a valid level.
   int GetLevelForMinValue(double value) const;
 
  private:
@@ -166,9 +166,9 @@ template <int dim>
 int S2::Metric<dim>::GetLevelForMaxValue(double value) const {
   if (value <= 0) return S2::kMaxCellLevel;
 
-  // This code is equivalent to computing a floating-point "level"
-  // value and rounding up.  ilogb() returns the exponent corresponding to a
-  // fraction in the range [1,2).
+  // This code is equivalent to computing a floating-point "level" value and
+  // rounding up.  ilogb() returns the exponent corresponding to a fraction in
+  // the range [1,2).
   int level = ilogb(value / deriv_);
   level = std::max(0, std::min(S2::kMaxCellLevel, -(level >> (dim - 1))));
   S2_DCHECK(level == S2::kMaxCellLevel || GetValue(level) <= value);
@@ -180,8 +180,8 @@ template <int dim>
 int S2::Metric<dim>::GetLevelForMinValue(double value) const {
   if (value <= 0) return S2::kMaxCellLevel;
 
-  // This code is equivalent to computing a floating-point "level"
-  // value and rounding down.
+  // This code is equivalent to computing a floating-point "level" value and
+  // rounding down.
   int level = ilogb(deriv_ / value);
   level = std::max(0, std::min(S2::kMaxCellLevel, level >> (dim - 1)));
   S2_DCHECK(level == 0 || GetValue(level) >= value);
diff --git a/src/s2/s2min_distance_targets.cc b/src/s2/s2min_distance_targets.cc
index 344a681d..00c43911 100644
--- a/src/s2/s2min_distance_targets.cc
+++ b/src/s2/s2min_distance_targets.cc
@@ -17,16 +17,29 @@
 
 #include "s2/s2min_distance_targets.h"
 
+#include <cmath>
+
 #include <memory>
-#include "absl/memory/memory.h"
-#include "s2/s1angle.h"
+#include <utility>
+
+#include "s2/s1chord_angle.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2cell_index.h"
+#include "s2/s2cell_union.h"
 #include "s2/s2closest_cell_query.h"
 #include "s2/s2closest_edge_query.h"
+#include "s2/s2contains_point_query.h"
+#include "s2/s2distance_target.h"
 #include "s2/s2edge_distances.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
 #include "s2/s2shape_index_region.h"
 
+using std::make_unique;
+
 S2Cap S2MinDistancePointTarget::GetCapBound() {
   return S2Cap(point_, S1ChordAngle::Zero());
 }
@@ -127,15 +140,14 @@ bool S2MinDistanceCellTarget::VisitContainingShapes(
 S2MinDistanceCellUnionTarget::S2MinDistanceCellUnionTarget(
     S2CellUnion cell_union)
     : cell_union_(std::move(cell_union)),
-      query_(absl::make_unique<S2ClosestCellQuery>(&index_)) {
+      query_(make_unique<S2ClosestCellQuery>(&index_)) {
   for (S2CellId cell_id : cell_union_) {
     index_.Add(cell_id, 0);
   }
   index_.Build();
 }
 
-S2MinDistanceCellUnionTarget::~S2MinDistanceCellUnionTarget() {
-}
+S2MinDistanceCellUnionTarget::~S2MinDistanceCellUnionTarget() = default;
 
 bool S2MinDistanceCellUnionTarget::use_brute_force() const {
   return query_->options().use_brute_force();
@@ -196,11 +208,9 @@ bool S2MinDistanceCellUnionTarget::VisitContainingShapes(
 
 S2MinDistanceShapeIndexTarget::S2MinDistanceShapeIndexTarget(
     const S2ShapeIndex* index)
-    : index_(index), query_(absl::make_unique<S2ClosestEdgeQuery>(index)) {
-}
+    : index_(index), query_(make_unique<S2ClosestEdgeQuery>(index)) {}
 
-S2MinDistanceShapeIndexTarget::~S2MinDistanceShapeIndexTarget() {
-}
+S2MinDistanceShapeIndexTarget::~S2MinDistanceShapeIndexTarget() = default;
 
 bool S2MinDistanceShapeIndexTarget::include_interiors() const {
   return query_->options().include_interiors();
diff --git a/src/s2/s2min_distance_targets.h b/src/s2/s2min_distance_targets.h
index 6911c1ab..0c44e58c 100644
--- a/src/s2/s2min_distance_targets.h
+++ b/src/s2/s2min_distance_targets.h
@@ -28,10 +28,13 @@
 #include "s2/_fp_contract_off.h"
 #include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
+#include "s2/s2cap.h"
 #include "s2/s2cell.h"
 #include "s2/s2cell_index.h"
+#include "s2/s2cell_union.h"
 #include "s2/s2distance_target.h"
 #include "s2/s2edge_distances.h"
+#include "s2/s2point.h"
 #include "s2/s2shape_index.h"
 
 // Forward references because these classes depend on the types defined here.
diff --git a/src/s2/s2padded_cell.cc b/src/s2/s2padded_cell.cc
index 3b79f780..e761eee2 100644
--- a/src/s2/s2padded_cell.cc
+++ b/src/s2/s2padded_cell.cc
@@ -22,7 +22,11 @@
 
 #include "s2/util/bits/bits.h"
 #include "s2/r1interval.h"
+#include "s2/r2rect.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2coords.h"
+#include "s2/s2coords_internal.h"
+#include "s2/s2point.h"
 
 using std::max;
 using std::min;
diff --git a/src/s2/s2padded_cell.h b/src/s2/s2padded_cell.h
index e53e0032..df5f9625 100644
--- a/src/s2/s2padded_cell.h
+++ b/src/s2/s2padded_cell.h
@@ -21,6 +21,8 @@
 #include "s2/_fp_contract_off.h"
 #include "s2/r2rect.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2coords_internal.h"
+#include "s2/s2point.h"
 
 // S2PaddedCell represents an S2Cell whose (u,v)-range has been expanded on
 // all sides by a given amount of "padding".  Unlike S2Cell, its methods and
diff --git a/src/s2/s2point.h b/src/s2/s2point.h
index 26da0218..5f46c86c 100644
--- a/src/s2/s2point.h
+++ b/src/s2/s2point.h
@@ -18,21 +18,159 @@
 #ifndef S2_S2POINT_H_
 #define S2_S2POINT_H_
 
+#include <utility>
+
+#include "absl/base/attributes.h"
+#include "absl/hash/hash.h"
+#include "s2/util/coding/coder.h"
 #include "s2/_fp_contract_off.h"
+#include "s2/s2coder.h"
+#include "s2/s2error.h"
 #include "s2/util/math/vector.h"  // IWYU pragma: export
-#include "s2/util/math/vector3_hash.h"
 
 // An S2Point represents a point on the unit sphere as a 3D vector.  Usually
 // points are normalized to be unit length, but some methods do not require
 // this.  See util/math/vector.h for the methods available.  Among other
 // things, there are overloaded operators that make it convenient to write
 // arithmetic expressions (e.g. (1-x)*p1 + x*p2).
-using S2Point = Vector3_d;
+class S2Point : public Vector3_d {
+  using ValType = double;
+
+ public:
+  typedef s2coding::S2BasicCoder<S2Point> Coder;
+
+  // Inherit base class constructors.
+  using Base = Vector3_d;
+  using Base::Base;
+
+  // Due to an ambiguity in original C++11 specificiation, it was unclear
+  // whether imported base class default constructors should be considered
+  // when deciding to delete the default constructor of a class.  GCC and
+  // Clang both accept the base class default ctor, while MSVC 2017 and
+  // later do not.
+  // The ambiguity was resolved in
+  // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0136r1.html
+  // and accepted as part of the C++17 approval process, with the intent of
+  // being retroactively applied to C++11.
+  // However, while MSVC accepted the change for C++17 and forward, it did
+  // not implement the change for prior versions. See their conformance page:
+  // https://learn.microsoft.com/en-us/cpp/overview/visual-cpp-language-conformance?view=msvc-170
+  // (search for P0136R1).
+  // The explicit declaration here of a default ctor is a workaround until
+  // this codebase is targeted at C++17 and above, at which point MSVC, GCC
+  // and Clang will all have identical behavior and won't require this
+  // explicit declaration of a default ctor.
+  S2Point() = default;
+
+  // When S2Point was defined as a Vector3_d we could mix and match the two
+  // names.  With inheritance upcasting to a Vector3_d is easy, but we need to
+  // explicitly allow the other direction, even though there's no data to
+  // modify.  These are not marked explicit because this translation wasn't
+  // explicit before.
+
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  S2Point(const Base& base) : Base(base) {}
+
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  S2Point(Base&& base) : Base(std::move(base)) {}
+
+  // Initialize S2Point from a Decoder instance.
+  bool Init(Decoder* decoder, S2Error& error) {
+    if (decoder->avail() < sizeof(S2Point)) {
+      error.Init(S2Error::DATA_LOSS, "Not enough data to decode S2Point");
+      return false;
+    }
+
+    x(decoder->getdouble());
+    y(decoder->getdouble());
+    z(decoder->getdouble());
+    return true;
+  }
+
+  S2Point& operator=(const Base& base) {
+    Base::operator=(base);
+    return *this;
+  }
+
+  S2Point& operator=(Base&& base) {
+    Base::operator=(std::move(base));
+    return *this;
+  }
+
+  // We can freely convert between S2Point and Vector3_d with no cost, but there
+  // are a few corner cases where returning a Vector3_d can cause problems.
+  // Notably the type of a ternary operator is evaluated independently of the
+  // type being assigned to, so something like:
+  //
+  //   S2Point pnt = (x < 0) ? start_pnt : start_pnt + step;
+  // (where start_pnt and step are both S2Point)
+  //
+  // Would fail to compile because start_pnt is an S2Point but start_pnt + step
+  // is a Vector3_d, which is likely surprising to people.  So add overloads for
+  // functions that return a Vector3_d to force return types to be covariant.
+  S2Point& operator+=(const S2Point& b) {
+    Base::operator+=(b);
+    return *this;
+  }
+  S2Point& operator-=(const S2Point& b) {
+    Base::operator-=(b);
+    return *this;
+  }
+  S2Point& operator*=(const ValType& v) {
+    Base::operator*=(v);
+    return *this;
+  }
+  S2Point& operator/=(const ValType& v) {
+    Base::operator/=(v);
+    return *this;
+  }
+
+  S2Point operator+(const S2Point& b) const { return Base::operator+(b); }
+  S2Point operator-(const S2Point& b) const { return Base::operator-(b); }
+  S2Point operator*(const ValType& v) const { return Base::operator*(v); }
+  S2Point operator/(const ValType& v) const { return Base::operator/(v); }
+
+  friend S2Point operator-(const S2Point& pnt) {
+    return -static_cast<const Base&>(pnt);
+  }
+
+  template <typename T>
+  static S2Point Cast(const Vector3<T>& b) {
+    return Base::Cast(b);
+  }
+
+  S2Point MulComponents(const S2Point& b) const {
+    return Base::MulComponents(b);
+  }
+  S2Point DivComponents(const S2Point& b) const {
+    return Base::DivComponents(b);
+  }
+
+  friend S2Point Max(const S2Point& a, const S2Point& b) {
+    return Max(static_cast<const Base&>(a), static_cast<const Base&>(b));
+  }
+
+  friend S2Point Min(const S2Point& a, const S2Point& b) {
+    return Min(static_cast<const Base&>(a), static_cast<const Base&>(b));
+  }
+
+  S2Point Normalize() const { return Base::Normalize(); }
+  S2Point Sqrt() const { return Base::Sqrt(); }
+  S2Point Floor() const { return Base::Floor(); }
+  S2Point Ceil() const { return Base::Ceil(); }
+  S2Point FRound() const { return Base::FRound(); }
+  static S2Point NaN() { return Base::NaN(); }
+
+  void Encode(Encoder* encoder) const {
+    encoder->Ensure(sizeof(S2Point));
+    encoder->putn(Data(), sizeof(S2Point));
+  }
+};
 
 // S2PointHash can be used with standard containers (e.g., unordered_set) or
 // nonstandard extensions (e.g., hash_map).  It is defined such that if two
 // S2Points compare equal to each other, they have the same hash.  (This
 // requires that positive and negative zero hash to the same value.)
-using S2PointHash = GoodFastHash<S2Point>;
+using S2PointHash = absl::Hash<S2Point>;
 
 #endif  // S2_S2POINT_H_
diff --git a/src/s2/s2point_compression.cc b/src/s2/s2point_compression.cc
index 31eb2adc..b19188d0 100644
--- a/src/s2/s2point_compression.cc
+++ b/src/s2/s2point_compression.cc
@@ -16,17 +16,19 @@
 
 #include "s2/s2point_compression.h"
 
+#include <algorithm>
 #include <utility>
 #include <vector>
 
+#include "absl/base/casts.h"
+#include "absl/container/fixed_array.h"
+#include "absl/types/span.h"
+
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2coords.h"
-#include "absl/base/casts.h"
-#include "absl/base/macros.h"
-#include "absl/container/fixed_array.h"
-#include "absl/types/span.h"
+#include "s2/s2point.h"
 #include "s2/util/bits/bit-interleave.h"
 #include "s2/util/coding/coder.h"
 #include "s2/util/coding/nth-derivative.h"
@@ -70,7 +72,7 @@ struct FaceRun {
     const uint64 count64 = face_and_count / S2CellId::kNumFaces;
     count = count64;
 
-    return count > 0 && count == count64;
+    return count > 0 && static_cast<uint64>(count) == count64;
   }
 
   int face;
@@ -98,7 +100,7 @@ class Faces {
     int num_faces_used_for_index_;
   };
 
-  Faces() {}
+  Faces() = default;
 
   // Add the face to the list of face runs, combining with the last if
   // possible.
@@ -250,7 +252,7 @@ void EncodePointsCompressed(Span<const pair<int, int>> vertices_pi_qi,
                             int level, Encoder* encoder) {
   NthDerivativeCoder pi_coder(kDerivativeEncodingOrder);
   NthDerivativeCoder qi_coder(kDerivativeEncodingOrder);
-  for (int i = 0; i < vertices_pi_qi.size(); ++i) {
+  for (size_t i = 0; i < vertices_pi_qi.size(); ++i) {
     if (i == 0) {
       // The first point will be just the (pi, qi) coordinates
       // of the S2Point.  NthDerivativeCoder will not save anything
@@ -272,7 +274,7 @@ bool DecodeFirstPointFixedLength(Decoder* decoder,
                                  NthDerivativeCoder* pi_coder,
                                  NthDerivativeCoder* qi_coder,
                                  pair<int, int>* vertex_pi_qi) {
-  const int bytes_required = (level + 7) / 8 * 2;
+  const size_t bytes_required = (level + 7) / 8 * 2;
   if (decoder->avail() < bytes_required) return false;
   uint64 little_endian_interleaved_pi_qi = 0;
   decoder->getn(&little_endian_interleaved_pi_qi, bytes_required);
@@ -320,7 +322,7 @@ void S2EncodePointsCompressed(Span<const S2XYZFaceSiTi> points,
   absl::FixedArray<pair<int, int>> vertices_pi_qi(points.size());
   vector<int> off_center;
   Faces faces;
-  for (int i = 0; i < points.size(); ++i) {
+  for (size_t i = 0; i < points.size(); ++i) {
     faces.AddFace(points[i].face);
     vertices_pi_qi[i].first = SiTitoPiQi(points[i].si, level);
     vertices_pi_qi[i].second = SiTitoPiQi(points[i].ti, level);
@@ -344,6 +346,8 @@ void S2EncodePointsCompressed(Span<const S2XYZFaceSiTi> points,
 
 bool S2DecodePointsCompressed(Decoder* decoder, int level,
                               Span<S2Point> points) {
+  S2_DCHECK_LE(level, S2::kMaxCellLevel);
+
   Faces faces;
   if (!faces.Decode(points.size(), decoder)) {
     return false;
@@ -352,7 +356,7 @@ bool S2DecodePointsCompressed(Decoder* decoder, int level,
   NthDerivativeCoder pi_coder(kDerivativeEncodingOrder);
   NthDerivativeCoder qi_coder(kDerivativeEncodingOrder);
   Faces::Iterator faces_iterator = faces.GetIterator();
-  for (int i = 0; i < points.size(); ++i) {
+  for (size_t i = 0; i < points.size(); ++i) {
     pair<int, int> vertex_pi_qi;
     if (i == 0) {
       if (!DecodeFirstPointFixedLength(decoder, level, &pi_coder, &qi_coder,
@@ -376,7 +380,7 @@ bool S2DecodePointsCompressed(Decoder* decoder, int level,
       num_off_center > points.size()) {
     return false;
   }
-  for (int i = 0; i < num_off_center; ++i) {
+  for (size_t i = 0; i < num_off_center; ++i) {
     uint32 index;
     if (!decoder->get_varint32(&index) || index >= points.size()) {
       return false;
diff --git a/src/s2/s2point_compression.h b/src/s2/s2point_compression.h
index ed4742cb..bfa9aae5 100644
--- a/src/s2/s2point_compression.h
+++ b/src/s2/s2point_compression.h
@@ -47,8 +47,10 @@
 #define S2_S2POINT_COMPRESSION_H_
 
 #include "absl/types/span.h"
+#include "s2/util/coding/coder.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s1angle.h"
+#include "s2/s2point.h"
 
 class Decoder;
 class Encoder;
diff --git a/src/s2/s2point_index.h b/src/s2/s2point_index.h
index 7bb671b7..0b4e20a6 100644
--- a/src/s2/s2point_index.h
+++ b/src/s2/s2point_index.h
@@ -18,10 +18,27 @@
 #ifndef S2_S2POINT_INDEX_H_
 #define S2_S2POINT_INDEX_H_
 
+#include <cstddef>
+
 #include <tuple>
 #include <type_traits>
+#include <utility>
+
 #include "absl/container/btree_map.h"
+
 #include "s2/s2cell_id.h"
+#include "s2/s2cell_iterator.h"
+#include "s2/s2point.h"
+
+namespace s2internal {
+// Hack to expose bytes_used.
+template <typename Key, typename Value>
+class BTreeMultimap : public absl::btree_multimap<Key, Value> {
+ public:
+  size_t bytes_used() const { return this->tree_.bytes_used(); }
+};
+}  // namespace s2internal
+
 
 // S2PointIndex maintains an index of points sorted by leaf S2CellId.  Each
 // point can optionally store auxiliary data such as an integer or pointer.
@@ -30,7 +47,7 @@
 // The class supports adding or removing points dynamically, and provides a
 // seekable iterator interface for navigating the index.
 //
-// You can use this class in conjuction with S2ClosestPointQuery to find the
+// You can use this class in conjunction with S2ClosestPointQuery to find the
 // closest index points to a given query point.  For example:
 //
 // void Test(const vector<S2Point>& index_points,
@@ -93,7 +110,7 @@ class S2PointIndex {
   // optimization" to ensure that no extra space is used when Data is empty.
   class PointData {
    public:
-    PointData() {}  // Needed by STL
+    PointData() = default;  // Needed by STL
     PointData(const S2Point& point, const Data& data) : tuple_(point, data) {}
 
     const S2Point& point() const { return std::get<0>(tuple_); }
@@ -114,7 +131,8 @@ class S2PointIndex {
   };
 
   // Default constructor.
-  S2PointIndex();
+  // TODO(b/252809194): Move definition back to .cc file.
+  S2PointIndex() = default;
 
   // Returns the number of points in the index.
   int num_points() const;
@@ -138,12 +156,15 @@ class S2PointIndex {
   // Resets the index to its original empty state.  Invalidates all iterators.
   void Clear();
 
+  // Returns the number of bytes currently occupied by the index.
+  size_t SpaceUsed() const;
+
  private:
   // Defined here because the Iterator class below uses it.
-  using Map = absl::btree_multimap<S2CellId, PointData>;
+  using Map = s2internal::BTreeMultimap<S2CellId, PointData>;
 
  public:
-  class Iterator {
+  class Iterator final : public S2CellIterator {
    public:
     // Default constructor; must be followed by a call to Init().
     Iterator();
@@ -160,7 +181,7 @@ class S2PointIndex {
 
     // The S2CellId for the current index entry.
     // REQUIRES: !done()
-    S2CellId id() const;
+    S2CellId id() const override;
 
     // The point associated with the current index entry.
     // REQUIRES: !done()
@@ -174,25 +195,42 @@ class S2PointIndex {
     const PointData& point_data() const;
 
     // Returns true if the iterator is positioned past the last index entry.
-    bool done() const;
+    bool done() const override;
 
     // Positions the iterator at the first index entry (if any).
-    void Begin();
+    void Begin() override;
 
     // Positions the iterator so that done() is true.
-    void Finish();
+    void Finish() override;
 
     // Advances the iterator to the next index entry.
     // REQUIRES: !done()
-    void Next();
+    void Next() override;
 
     // If the iterator is already positioned at the beginning, returns false.
     // Otherwise positions the iterator at the previous entry and returns true.
-    bool Prev();
+    bool Prev() override;
 
     // Positions the iterator at the first entry with id() >= target, or at the
     // end of the index if no such entry exists.
-    void Seek(S2CellId target);
+    void Seek(S2CellId target) override;
+
+    // Positions the iterator at the cell containing target and returns true. If
+    // no such cell exists, return false and leave the iterator in an undefined
+    // (but valid) state.
+    bool Locate(const S2Point& target) override {
+      return LocateImpl(*this, target);
+    }
+
+    // Let T be the target S2CellId.  If T is contained by some index cell I
+    // (including equality), this method positions the iterator at I and returns
+    // INDEXED.  Otherwise if T contains one or more (smaller) index cells, it
+    // positions the iterator at the first such cell I and returns SUBDIVIDED.
+    // Otherwise it returns DISJOINT and leaves the iterator in an undefined
+    // (but valid) state.
+    S2CellRelation Locate(S2CellId target) override {
+      return LocateImpl(*this, target);
+    }
 
    private:
     const Map* map_;
@@ -210,11 +248,6 @@ class S2PointIndex {
 
 //////////////////   Implementation details follow   ////////////////////
 
-
-template <class Data>
-S2PointIndex<Data>::S2PointIndex() {
-}
-
 template <class Data>
 inline int S2PointIndex<Data>::num_points() const {
   return map_.size();
@@ -266,6 +299,11 @@ void S2PointIndex<Data>::Clear() {
   map_.clear();
 }
 
+template <class Data>
+size_t S2PointIndex<Data>::SpaceUsed() const {
+  return sizeof(*this) - sizeof(map_) + map_.bytes_used();
+}
+
 template <class Data>
 inline S2PointIndex<Data>::Iterator::Iterator() : map_(nullptr) {
 }
diff --git a/src/s2/s2point_region.cc b/src/s2/s2point_region.cc
index be4edd27..4fe2e7d4 100644
--- a/src/s2/s2point_region.cc
+++ b/src/s2/s2point_region.cc
@@ -17,18 +17,17 @@
 
 #include "s2/s2point_region.h"
 
-#include "s2/base/logging.h"
 #include "s2/util/coding/coder.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
 #include "s2/s2latlng.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 
 static const unsigned char kCurrentLosslessEncodingVersionNumber = 1;
 
-S2PointRegion::~S2PointRegion() {
-}
+S2PointRegion::~S2PointRegion() = default;
 
 S2PointRegion* S2PointRegion::Clone() const {
   return new S2PointRegion(point_);
diff --git a/src/s2/s2point_region.h b/src/s2/s2point_region.h
index 9123d6b8..55dd6e32 100644
--- a/src/s2/s2point_region.h
+++ b/src/s2/s2point_region.h
@@ -18,12 +18,15 @@
 #ifndef S2_S2POINT_REGION_H_
 #define S2_S2POINT_REGION_H_
 
+#include "absl/base/macros.h"
+
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s1angle.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2region.h"
-#include "absl/base/macros.h"
+#include "s2/util/coding/coder.h"
 
 class Decoder;
 class Encoder;
diff --git a/src/s2/s2point_span.h b/src/s2/s2point_span.h
index 94235189..c6346eba 100644
--- a/src/s2/s2point_span.h
+++ b/src/s2/s2point_span.h
@@ -18,6 +18,8 @@
 #ifndef S2_S2POINT_SPAN_H_
 #define S2_S2POINT_SPAN_H_
 
+#include <vector>
+
 #include "s2/base/logging.h"
 #include "absl/types/span.h"
 #include "s2/s2point.h"
diff --git a/src/s2/s2point_vector_shape.h b/src/s2/s2point_vector_shape.h
index 4aa5bc97..a7de5b05 100644
--- a/src/s2/s2point_vector_shape.h
+++ b/src/s2/s2point_vector_shape.h
@@ -18,21 +18,33 @@
 #ifndef S2_S2POINT_VECTOR_SHAPE_H_
 #define S2_S2POINT_VECTOR_SHAPE_H_
 
+#include <utility>
 #include <vector>
+
+#include "s2/util/coding/coder.h"
 #include "s2/encoded_s2point_vector.h"
+#include "s2/s2coder.h"
+#include "s2/s2point.h"
 #include "s2/s2shape.h"
 
 // S2PointVectorShape is an S2Shape representing a set of S2Points. Each point
-// is reprsented as a degenerate edge with the same starting and ending
+// is represented as a degenerate edge with the same starting and ending
 // vertices.
 //
 // This class is useful for adding a collection of points to an S2ShapeIndex.
 class S2PointVectorShape : public S2Shape {
  public:
-  static constexpr TypeTag kTypeTag = 3;
+  // Define as enum so we don't have to declare storage.
+  // TODO(user, b/210097200): Use static constexpr when C++17 is allowed
+  // in opensource.
+  enum : TypeTag { kTypeTag = 3 };
 
   // Constructs an empty point vector.
-  S2PointVectorShape() {}
+  S2PointVectorShape() = default;
+
+  S2PointVectorShape(S2PointVectorShape&& other) = default;
+
+  S2PointVectorShape& operator=(S2PointVectorShape&& other) = default;
 
   // Constructs an S2PointVectorShape from a vector of points.
   explicit S2PointVectorShape(std::vector<S2Point> points) {
@@ -48,8 +60,7 @@ class S2PointVectorShape : public S2Shape {
   //
   // REQUIRES: "encoder" uses the default constructor, so that its buffer
   //           can be enlarged as necessary by calling Ensure(int).
-  void Encode(Encoder* encoder,
-              s2coding::CodingHint hint = s2coding::CodingHint::COMPACT) const {
+  void Encode(Encoder* encoder, s2coding::CodingHint hint) const override {
     s2coding::EncodeS2PointVector(points_, hint, encoder);
   }
 
@@ -62,8 +73,12 @@ class S2PointVectorShape : public S2Shape {
     return true;
   }
 
-  // S2Shape interface:
+  // S2Shape interface.
+
+  // Returns the number of points.
   int num_edges() const final { return num_points(); }
+
+  // Returns a point represented as a degenerate edge.
   Edge edge(int e) const final { return Edge(points_[e], points_[e]); }
   int dimension() const final { return 0; }
   ReferencePoint GetReferencePoint() const final {
@@ -91,14 +106,29 @@ class S2PointVectorShape : public S2Shape {
 // into a large contiguous buffer that contains other encoded data as well.
 class EncodedS2PointVectorShape : public S2Shape {
  public:
+  // Define as enum so we don't have to declare storage.
+  // TODO(user, b/210097200): Use static constexpr when C++17 is allowed
+  // in opensource.
+  enum : TypeTag { kTypeTag = S2PointVectorShape::kTypeTag };
+
   // Constructs an uninitialized object; requires Init() to be called.
-  EncodedS2PointVectorShape() {}
+  EncodedS2PointVectorShape() = default;
 
   // Initializes an EncodedS2PointVectorShape.
   //
   // REQUIRES: The Decoder data buffer must outlive this object.
   bool Init(Decoder* decoder) { return points_.Init(decoder); }
 
+  // Appends an encoded representation of the S2LaxPolygonShape to "encoder".
+  // The coding hint is ignored, and whatever method was originally used to
+  // encode the shape is preserved.
+  //
+  // REQUIRES: "encoder" uses the default constructor, so that its buffer
+  //           can be enlarged as necessary by calling Ensure(int).
+  void Encode(Encoder* encoder, s2coding::CodingHint) const override {
+    points_.Encode(encoder);
+  }
+
   int num_points() const { return static_cast<int>(points_.size()); }
   S2Point point(int i) const { return points_[i]; }
 
@@ -118,6 +148,7 @@ class EncodedS2PointVectorShape : public S2Shape {
   ChainPosition chain_position(int e) const final {
     return ChainPosition(e, 0);
   }
+  TypeTag type_tag() const override { return kTypeTag; }
 
  private:
   s2coding::EncodedS2PointVector points_;
diff --git a/src/s2/s2pointutil.cc b/src/s2/s2pointutil.cc
index de66a703..f1469ab4 100644
--- a/src/s2/s2pointutil.cc
+++ b/src/s2/s2pointutil.cc
@@ -20,6 +20,10 @@
 #include <cfloat>
 #include <cmath>
 
+#include "s2/s1angle.h"
+#include "s2/s2point.h"
+#include "s2/util/math/matrix3x3.h"
+
 using std::fabs;
 
 namespace S2 {
@@ -54,29 +58,6 @@ S2Point Ortho(const S2Point& a) {
 #endif
 }
 
-Vector3_d RobustCrossProd(const S2Point& a, const S2Point& b) {
-  // The direction of a.CrossProd(b) becomes unstable as (a + b) or (a - b)
-  // approaches zero.  This leads to situations where a.CrossProd(b) is not
-  // very orthogonal to "a" and/or "b".  We could fix this using Gram-Schmidt,
-  // but we also want b.RobustCrossProd(a) == -a.RobustCrossProd(b).
-  //
-  // The easiest fix is to just compute the cross product of (b+a) and (b-a).
-  // Mathematically, this cross product is exactly twice the cross product of
-  // "a" and "b", but it has the numerical advantage that (b+a) and (b-a)
-  // are always perpendicular (since "a" and "b" are unit length).  This
-  // yields a result that is nearly orthogonal to both "a" and "b" even if
-  // these two values differ only in the lowest bit of one component.
-
-  S2_DCHECK(IsUnitLength(a));
-  S2_DCHECK(IsUnitLength(b));
-  Vector3_d x = (b + a).CrossProd(b - a);
-  if (x != S2Point(0, 0, 0)) return x;
-
-  // The only result that makes sense mathematically is to return zero, but
-  // we find it more convenient to return an arbitrary orthogonal vector.
-  return Ortho(a);
-}
-
 S2Point Rotate(const S2Point& p, const S2Point& axis, S1Angle angle) {
   S2_DCHECK(IsUnitLength(p));
   S2_DCHECK(IsUnitLength(axis));
@@ -115,17 +96,4 @@ S2Point FromFrame(const Matrix3x3_d& m, const S2Point& q) {
   return m * q;
 }
 
-bool SimpleCCW(const S2Point& a, const S2Point& b, const S2Point& c) {
-  // We compute the signed volume of the parallelepiped ABC.  The usual
-  // formula for this is (AxB).C, but we compute it here using (CxA).B
-  // in order to ensure that ABC and CBA are not both CCW.  This follows
-  // from the following identities (which are true numerically, not just
-  // mathematically):
-  //
-  //     (1) x.CrossProd(y) == -(y.CrossProd(x))
-  //     (2) (-x).DotProd(y) == -(x.DotProd(y))
-
-  return c.CrossProd(a).DotProd(b) > 0;
-}
-
 }  // namespace S2
diff --git a/src/s2/s2pointutil.h b/src/s2/s2pointutil.h
index b2aa0cf9..c5884578 100644
--- a/src/s2/s2pointutil.h
+++ b/src/s2/s2pointutil.h
@@ -32,81 +32,63 @@
 // dimension of the surface, not the space it is embedded in).
 namespace S2 {
 
-// Return a unique "origin" on the sphere for operations that need a fixed
+// Returns a unique "origin" on the sphere for operations that need a fixed
 // reference point.  In particular, this is the "point at infinity" used for
 // point-in-polygon testing (by counting the number of edge crossings).
+// To be clear, this is NOT (0,0,0), the origin of the coordinate system.
 inline S2Point Origin();
 
-// Return true if the given point is approximately unit length
+// Returns true if the given point is approximately unit length
 // (this is mainly useful for assertions).
 bool IsUnitLength(const S2Point& p);
 
-// Return true if two points are within the given distance of each other (this
-// is mainly useful for testing). It is an error if either point is a
-// zero-length vector (default S2Point), but this is only checked in debug mode.
-// In non-debug mode it will always return true.
+// Returns true if two points are within the given distance of each other
+// (this is mainly useful for testing).  It is an error if either point is a
+// zero-length vector (default S2Point), but this is only checked in debug
+// mode.  In non-debug mode it will always return true.
 bool ApproxEquals(const S2Point& a, const S2Point& b,
                   S1Angle max_error = S1Angle::Radians(1e-15));
 
-// Return a unit-length vector that is orthogonal to "a".  Satisfies
+// Returns a unit-length vector that is orthogonal to "a".  Satisfies
 // Ortho(-a) = -Ortho(a) for all a.
 //
 // Note that Vector3_d also defines an "Ortho" method, but this one is
 // preferred for use in S2 code because it explicitly tries to avoid result
-// result coordinates that are zero.  (This is a performance optimization that
-// reduces the amount of time spent in functions which handle degeneracies.)
+// coordinates that are zero.  (This is a performance optimization that
+// reduces the amount of time spent in functions that handle degeneracies.)
 S2Point Ortho(const S2Point& a);
 
-// Return a vector "c" that is orthogonal to the given unit-length vectors
-// "a" and "b".  This function is similar to a.CrossProd(b) except that it
-// does a better job of ensuring orthogonality when "a" is nearly parallel
-// to "b", and it returns a non-zero result even when a == b or a == -b.
-//
-// It satisfies the following properties (RCP == RobustCrossProd):
-//
-//   (1) RCP(a,b) != 0 for all a, b
-//   (2) RCP(b,a) == -RCP(a,b) unless a == b or a == -b
-//   (3) RCP(-a,b) == -RCP(a,b) unless a == b or a == -b
-//   (4) RCP(a,-b) == -RCP(a,b) unless a == b or a == -b
-//
-// The result is not guaranteed to be unit length.
-S2Point RobustCrossProd(const S2Point& a, const S2Point& b);
+// Returns a unit-length vector used as the reference direction for deciding
+// whether a polygon with semi-open boundaries contains the given vertex "a"
+// (see S2ContainsVertexQuery).  The result is unit length and is guaranteed
+// to be different from the given point "a".
+S2Point RefDir(const S2Point& a);
 
-// Rotate the given point about the given axis by the given angle.  "p" and
+// Rotates the given point about the given axis by the given angle.  "p" and
 // "axis" must be unit length; "angle" has no restrictions (e.g., it can be
 // positive, negative, greater than 360 degrees, etc).
+//
+// See also the closely related functions S2::GetPointOnRay() and
+// S2::GetPointOnLine(), which are declared in s2edge_distances.h.
 S2Point Rotate(const S2Point& p, const S2Point& axis, S1Angle angle);
 
-// Extend the given point "z" on the unit sphere into a right-handed
+// Extends the given point "z" on the unit sphere into a right-handed
 // coordinate frame of unit-length column vectors m = (x,y,z).  Note that the
 // vectors (x,y) are an orthonormal frame for the tangent space at "z", while
 // "z" itself is an orthonormal frame for the normal space at "z".
 Matrix3x3_d GetFrame(const S2Point& z);
 void GetFrame(const S2Point& z, Matrix3x3_d* m);
 
-// Given an orthonormal basis "m" of column vectors and a point "p", return
+// Given an orthonormal basis "m" of column vectors and a point "p", returns
 // the coordinates of "p" with respect to the basis "m".  The resulting
 // point "q" satisfies the identity (m * q == p).
 S2Point ToFrame(const Matrix3x3_d& m, const S2Point& p);
 
 // Given an orthonormal basis "m" of column vectors and a point "q" with
-// respect to that basis, return the equivalent point "p" with respect to
+// respect to that basis, returns the equivalent point "p" with respect to
 // the standard axis-aligned basis.  The result satisfies (p == m * q).
-Matrix3x3_d GetFrame(const S2Point& z);
 S2Point FromFrame(const Matrix3x3_d& m, const S2Point& q);
 
-// Return true if the points A, B, C are strictly counterclockwise.  Return
-// false if the points are clockwise or collinear (i.e. if they are all
-// contained on some great circle).
-//
-// Due to numerical errors, situations may arise that are mathematically
-// impossible, e.g. ABC may be considered strictly CCW while BCA is not.
-// However, the implementation guarantees the following:
-//
-//   If SimpleCCW(a,b,c), then !SimpleCCW(c,b,a) for all a,b,c.
-// ABSL_DEPRECATED("Use s2pred::Sign instead.")
-bool SimpleCCW(const S2Point& a, const S2Point& b, const S2Point& c);
-
 
 //////////////////   Implementation details follow   ////////////////////
 
@@ -134,6 +116,10 @@ inline S2Point Origin() {
 #endif
 }
 
+inline S2Point RefDir(const S2Point& a) {
+  return S2::Ortho(a);
+}
+
 }  // namespace S2
 
 #endif  // S2_S2POINTUTIL_H_
diff --git a/src/s2/s2polygon.cc b/src/s2/s2polygon.cc
index 60ec5eb1..f90606cd 100644
--- a/src/s2/s2polygon.cc
+++ b/src/s2/s2polygon.cc
@@ -19,22 +19,34 @@
 
 #include <algorithm>
 #include <array>
+#include <atomic>
 #include <cmath>
 #include <cstddef>
-#include <set>
+#include <memory>
+#include <queue>
 #include <stack>
+#include <string>
 #include <utility>
 #include <vector>
 
+#include "absl/container/fixed_array.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/flags/flag.h"
+#include "absl/utility/utility.h"
 
-#include "s2/base/casts.h"
 #include "s2/base/commandlineflags.h"
+#include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "s2/mutable_s2shape_index.h"
+#include "s2/r1interval.h"
+#include "s2/r2.h"
+#include "s2/r2rect.h"
 #include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
 #include "s2/s1interval.h"
 #include "s2/s2boolean_operation.h"
 #include "s2/s2builder.h"
+#include "s2/s2builder_layer.h"
 #include "s2/s2builderutil_s2polygon_layer.h"
 #include "s2/s2builderutil_s2polyline_layer.h"
 #include "s2/s2builderutil_s2polyline_vector_layer.h"
@@ -44,47 +56,44 @@
 #include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
 #include "s2/s2closest_edge_query.h"
+#include "s2/s2coder.h"
 #include "s2/s2contains_point_query.h"
 #include "s2/s2coords.h"
-#include "s2/s2crossing_edge_query.h"
 #include "s2/s2debug.h"
-#include "s2/s2edge_clipping.h"
-#include "s2/s2edge_crosser.h"
 #include "s2/s2edge_crossings.h"
 #include "s2/s2error.h"
-#include "s2/s2latlng.h"
 #include "s2/s2latlng_rect.h"
 #include "s2/s2latlng_rect_bounder.h"
 #include "s2/s2loop.h"
-#include "s2/s2measures.h"
+#include "s2/s2loop_measures.h"
 #include "s2/s2metrics.h"
+#include "s2/s2point.h"
 #include "s2/s2point_compression.h"
+#include "s2/s2pointutil.h"
 #include "s2/s2polyline.h"
-#include "s2/s2predicates.h"
+#include "s2/s2region.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 #include "s2/s2shape_index_region.h"
 #include "s2/s2shapeutil_visit_crossing_edge_pairs.h"
-#include "absl/container/fixed_array.h"
-#include "absl/container/inlined_vector.h"
-#include "absl/memory/memory.h"
 #include "s2/util/coding/coder.h"
 
-using absl::make_unique;
+using absl::flat_hash_set;
 using s2builderutil::IdentitySnapFunction;
+using s2builderutil::S2CellIdSnapFunction;
 using s2builderutil::S2PolygonLayer;
 using s2builderutil::S2PolylineLayer;
 using s2builderutil::S2PolylineVectorLayer;
-using s2builderutil::S2CellIdSnapFunction;
 using std::fabs;
-using std::max;
-using std::min;
+using std::greater;
+using std::make_unique;
 using std::pair;
-using std::set;
+using std::priority_queue;
 using std::sqrt;
 using std::unique_ptr;
 using std::vector;
 
-DEFINE_bool(
+S2_DEFINE_bool(
     s2polygon_lazy_indexing, true,
     "Build the S2ShapeIndex only when it is first needed.  This can save "
     "significant amounts of memory and time when geometry is constructed but "
@@ -92,7 +101,7 @@ DEFINE_bool(
 
 // The maximum number of loops we'll allow when decoding a polygon.
 // The default value of 10 million is 200x bigger than the number of
-DEFINE_int32(
+S2_DEFINE_int32(
     s2polygon_decode_max_num_loops, 10000000,
     "The upper limit on the number of loops that are allowed by the "
     "S2Polygon::Decode method.");
@@ -124,6 +133,53 @@ S2Polygon::S2Polygon(const S2Cell& cell)
   Init(make_unique<S2Loop>(cell));
 }
 
+S2Polygon::S2Polygon(S2Polygon&& b)
+    : S2Region(std::move(b)),
+      loops_(std::move(b.loops_)),
+      s2debug_override_(std::move(b.s2debug_override_)),
+      error_inconsistent_loop_orientations_(
+          absl::exchange(b.error_inconsistent_loop_orientations_, 0)),
+      num_vertices_(absl::exchange(b.num_vertices_, 0)),
+      unindexed_contains_calls_(
+          b.unindexed_contains_calls_.exchange(0, std::memory_order_relaxed)),
+      bound_(std::move(b.bound_)),
+      subregion_bound_(std::move(b.subregion_bound_)),
+      index_(std::move(b.index_)) {
+  // `index_` has a pointer to an S2Polygon::Shape which points to S2Polygon.
+  // But, we've moved to a new address, so get the Shape back out of the index
+  // and update it to point to our new location.
+  for (S2Shape* shape : index_) {
+    down_cast<Shape*>(shape)->polygon_ = this;
+  }
+}
+
+S2Polygon& S2Polygon::operator=(S2Polygon&& b) {
+  // We need to delegate to our parent move-assignment operator since we can't
+  // move any of its private state.  This is a little odd since b is in a
+  // half-moved state after calling but is ultimately safe.
+  S2Region::operator=(static_cast<S2Region&&>(b));
+  loops_ = std::move(b.loops_);
+  s2debug_override_ = std::move(b.s2debug_override_);
+  error_inconsistent_loop_orientations_ =
+      absl::exchange(b.error_inconsistent_loop_orientations_, 0);
+  num_vertices_ = absl::exchange(b.num_vertices_, 0);
+  unindexed_contains_calls_.store(
+      b.unindexed_contains_calls_.exchange(0, std::memory_order_relaxed),
+      std::memory_order_relaxed);
+  bound_ = std::move(b.bound_);
+  subregion_bound_ = std::move(b.subregion_bound_);
+  index_ = std::move(b.index_);
+
+  // `index_` has a pointer to an S2Polygon::Shape which points to S2Polygon.
+  // But, we've moved to a new address, so get the Shape back out of the index
+  // and update it to point to our new location.
+  for (S2Shape* shape : index_) {
+    down_cast<Shape*>(shape)->polygon_ = this;
+  }
+
+  return *this;
+}
+
 void S2Polygon::set_s2debug_override(S2Debug override) {
   s2debug_override_ = override;
 }
@@ -132,24 +188,24 @@ S2Debug S2Polygon::s2debug_override() const {
   return s2debug_override_;
 }
 
-void S2Polygon::Copy(const S2Polygon* src) {
+void S2Polygon::Copy(const S2Polygon& src) {
   ClearLoops();
-  for (int i = 0; i < src->num_loops(); ++i) {
-    loops_.emplace_back(src->loop(i)->Clone());
+  for (int i = 0; i < src.num_loops(); ++i) {
+    loops_.emplace_back(src.loop(i)->Clone());
   }
-  s2debug_override_ = src->s2debug_override_;
+  s2debug_override_ = src.s2debug_override_;
   // Don't copy error_inconsistent_loop_orientations_, since this is not a
   // property of the polygon but only of the way the polygon was constructed.
-  num_vertices_ = src->num_vertices();
+  num_vertices_ = src.num_vertices();
   unindexed_contains_calls_.store(0, std::memory_order_relaxed);
-  bound_ = src->bound_;
-  subregion_bound_ = src->subregion_bound_;
+  bound_ = src.bound_;
+  subregion_bound_ = src.subregion_bound_;
   InitIndex();  // TODO(ericv): Copy the index efficiently.
 }
 
 S2Polygon* S2Polygon::Clone() const {
   S2Polygon* result = new S2Polygon;
-  result->Copy(this);
+  result->Copy(*this);
   return result;
 }
 
@@ -177,7 +233,7 @@ S2Polygon::~S2Polygon() {
 bool S2Polygon::IsValid() const {
   S2Error error;
   if (FindValidationError(&error)) {
-    S2_LOG_IF(ERROR, FLAGS_s2debug) << error;
+    S2_LOG_IF(ERROR, absl::GetFlag(FLAGS_s2debug)) << error;
     return false;
   }
   return true;
@@ -239,7 +295,7 @@ bool S2Polygon::FindLoopNestingError(S2Error* error) const {
       if (i == j) continue;
       bool nested = (j >= i + 1) && (j <= last);
       const bool reverse_b = false;
-      if (loop(i)->ContainsNonCrossingBoundary(loop(j), reverse_b) != nested) {
+      if (loop(i)->ContainsNonCrossingBoundary(*loop(j), reverse_b) != nested) {
         error->Init(S2Error::POLYGON_INVALID_LOOP_NESTING,
                     "Invalid nesting: loop %d should %scontain loop %d",
                     i, nested ? "" : "not ", j);
@@ -252,12 +308,19 @@ bool S2Polygon::FindLoopNestingError(S2Error* error) const {
 
 void S2Polygon::InsertLoop(S2Loop* new_loop, S2Loop* parent,
                            LoopMap* loop_map) {
+  // Below, we are going to keep a pointer (`children`) into `loop_map`.
+  // Insert the new children loop vector before we get the pointer,
+  // otherwise the pointer will be invalidated when we do the insert.
+  vector<S2Loop*>* new_children = &(*loop_map)[new_loop];
+
+  // Find most nested containing loop.  `children` is where we need to
+  // add `new_loop`.
   vector<S2Loop*>* children;
   for (bool done = false; !done; ) {
     children = &(*loop_map)[parent];
     done = true;
     for (S2Loop* child : *children) {
-      if (child->ContainsNested(new_loop)) {
+      if (child->ContainsNested(*new_loop)) {
         parent = child;
         done = false;
         break;
@@ -267,10 +330,9 @@ void S2Polygon::InsertLoop(S2Loop* new_loop, S2Loop* parent,
 
   // Some of the children of the parent loop may now be children of
   // the new loop.
-  vector<S2Loop*>* new_children = &(*loop_map)[new_loop];
-  for (int i = 0; i < children->size();) {
+  for (size_t i = 0; i < children->size();) {
     S2Loop* child = (*children)[i];
-    if (new_loop->ContainsNested(child)) {
+    if (new_loop->ContainsNested(*child)) {
       new_children->push_back(child);
       children->erase(children->begin() + i);
     } else {
@@ -303,10 +365,10 @@ void S2Polygon::InitLoops(LoopMap* loop_map) {
 void S2Polygon::InitIndex() {
   S2_DCHECK_EQ(0, index_.num_shape_ids());
   index_.Add(make_unique<Shape>(this));
-  if (!FLAGS_s2polygon_lazy_indexing) {
+  if (!absl::GetFlag(FLAGS_s2polygon_lazy_indexing)) {
     index_.ForceBuild();
   }
-  if (FLAGS_s2debug && s2debug_override_ == S2Debug::ALLOW) {
+  if (absl::GetFlag(FLAGS_s2debug) && s2debug_override_ == S2Debug::ALLOW) {
     // Note that FLAGS_s2debug is false in optimized builds (by default).
     S2_CHECK(IsValid());
   }
@@ -402,8 +464,8 @@ void S2Polygon::InitOriented(vector<unique_ptr<S2Loop>> loops) {
   //    necessary if the polygon requires at least one non-normalized loop to
   //    represent it.
 
-  set<const S2Loop*> contained_origin;
-  for (int i = 0; i < loops.size(); ++i) {
+  flat_hash_set<const S2Loop*> contained_origin;
+  for (size_t i = 0; i < loops.size(); ++i) {
     S2Loop* loop = loops[i].get();
     if (loop->contains_origin()) {
       contained_origin.insert(loop);
@@ -434,14 +496,14 @@ void S2Polygon::InitOriented(vector<unique_ptr<S2Loop>> loops) {
   // Verify that the original loops had consistent shell/hole orientations.
   // Each original loop L should have been inverted if and only if it now
   // represents a hole.
-  for (int i = 0; i < loops_.size(); ++i) {
+  for (size_t i = 0; i < loops_.size(); ++i) {
     if ((contained_origin.count(loop(i)) != loop(i)->contains_origin()) !=
         loop(i)->is_hole()) {
       // There is no point in saving the loop index, because the error is a
       // property of the entire set of loops.  In general there is no way to
       // determine which ones are incorrect.
       error_inconsistent_loop_orientations_ = true;
-      if (FLAGS_s2debug && s2debug_override_ == S2Debug::ALLOW) {
+      if (absl::GetFlag(FLAGS_s2debug) && s2debug_override_ == S2Debug::ALLOW) {
         // The FLAGS_s2debug validity checking usually happens in InitIndex(),
         // but this error is detected too late for that.
         S2_CHECK(IsValid());  // Always fails.
@@ -528,12 +590,12 @@ S1Angle S2Polygon::GetDistanceToBoundary(const S2Point& x) const {
 }
 
 /*static*/ pair<double, double> S2Polygon::GetOverlapFractions(
-    const S2Polygon* a, const S2Polygon* b) {
+    const S2Polygon& a, const S2Polygon& b) {
   S2Polygon intersection;
   intersection.InitToIntersection(a, b);
   double intersection_area = intersection.GetArea();
-  double a_area = a->GetArea();
-  double b_area = b->GetArea();
+  double a_area = a.GetArea();
+  double b_area = b.GetArea();
   return std::make_pair(
       intersection_area >= a_area ? 1 : intersection_area / a_area,
       intersection_area >= b_area ? 1 : intersection_area / b_area);
@@ -553,11 +615,11 @@ S2Point S2Polygon::ProjectToBoundary(const S2Point& x) const {
   return q.Project(x, edge);
 }
 
-bool S2Polygon::Contains(const S2Polygon* b) const {
+bool S2Polygon::Contains(const S2Polygon& b) const {
   // It's worth checking bounding rectangles, since they are precomputed.
   // Note that the first bound has been expanded to account for possible
   // numerical errors in the second bound.
-  if (!subregion_bound_.Contains(b->bound_)) {
+  if (!subregion_bound_.Contains(b.bound_)) {
     // It is possible that A contains B even though Bound(A) does not contain
     // Bound(B).  This can only happen when polygon B has at least two outer
     // shells and the union of the two bounds spans all longitudes.  For
@@ -568,7 +630,7 @@ bool S2Polygon::Contains(const S2Polygon* b) const {
     //
     // For convenience we just check whether B has at least two loops rather
     // than two outer shells.
-    if (b->num_loops() == 1 || !bound_.lng().Union(b->bound_.lng()).is_full()) {
+    if (b.num_loops() == 1 || !bound_.lng().Union(b.bound_.lng()).is_full()) {
       return false;
     }
   }
@@ -576,21 +638,21 @@ bool S2Polygon::Contains(const S2Polygon* b) const {
   // The following case is not handled by S2BooleanOperation because it only
   // determines whether the boundary of the result is empty (which does not
   // distinguish between the full and empty polygons).
-  if (is_empty() && b->is_full()) return false;
+  if (is_empty() && b.is_full()) return false;
 
-  return S2BooleanOperation::Contains(index_, b->index_);
+  return S2BooleanOperation::Contains(index_, b.index_);
 }
 
-bool S2Polygon::Intersects(const S2Polygon* b) const {
+bool S2Polygon::Intersects(const S2Polygon& b) const {
   // It's worth checking bounding rectangles, since they are precomputed.
-  if (!bound_.Intersects(b->bound_)) return false;
+  if (!bound_.Intersects(b.bound_)) return false;
 
   // The following case is not handled by S2BooleanOperation because it only
   // determines whether the boundary of the result is empty (which does not
   // distinguish between the full and empty polygons).
-  if (is_full() && b->is_full()) return true;
+  if (is_full() && b.is_full()) return true;
 
-  return S2BooleanOperation::Intersects(index_, b->index_);
+  return S2BooleanOperation::Intersects(index_, b.index_);
 }
 
 S2Cap S2Polygon::GetCapBound() const {
@@ -605,26 +667,27 @@ bool S2Polygon::Contains(const S2Cell& target) const {
   return MakeS2ShapeIndexRegion(&index_).Contains(target);
 }
 
-bool S2Polygon::ApproxContains(const S2Polygon* b, S1Angle tolerance) const {
+bool S2Polygon::ApproxContains(const S2Polygon& b, S1Angle tolerance) const {
   S2Polygon difference;
-  difference.InitToApproxDifference(b, this, tolerance);
+  difference.InitToDifference(b, *this, IdentitySnapFunction(tolerance));
   return difference.is_empty();
 }
 
-bool S2Polygon::ApproxDisjoint(const S2Polygon* b, S1Angle tolerance) const {
+bool S2Polygon::ApproxDisjoint(const S2Polygon& b, S1Angle tolerance) const {
   S2Polygon intersection;
-  intersection.InitToApproxIntersection(b, this, tolerance);
+  intersection.InitToIntersection(b, *this, IdentitySnapFunction(tolerance));
   return intersection.is_empty();
 }
 
-bool S2Polygon::ApproxEquals(const S2Polygon* b, S1Angle tolerance) const {
+bool S2Polygon::ApproxEquals(const S2Polygon& b, S1Angle tolerance) const {
   // TODO(ericv): This can be implemented more cheaply with S2Builder, by
   // simply adding all the edges from one polygon, adding the reversed edge
   // from the other polygon, and turning on the options to split edges and
   // discard sibling pairs.  Then the polygons are approximately equal if the
   // output graph has no edges.
   S2Polygon symmetric_difference;
-  symmetric_difference.InitToApproxSymmetricDifference(b, this, tolerance);
+  symmetric_difference.InitToSymmetricDifference(
+      b, *this, IdentitySnapFunction(tolerance));
   return symmetric_difference.is_empty();
 }
 
@@ -657,7 +720,13 @@ bool S2Polygon::Contains(const S2Point& p) const {
   return MakeS2ContainsPointQuery(&index_).Contains(p);
 }
 
-void S2Polygon::Encode(Encoder* const encoder) const {
+void S2Polygon::Encode(Encoder* const encoder,
+                       s2coding::CodingHint hint) const {
+  if (hint == s2coding::CodingHint::FAST) {
+    EncodeUncompressed(encoder);
+    return;
+  }
+
   if (num_vertices_ == 0) {
     EncodeCompressed(encoder, nullptr, S2::kMaxCellLevel);
     return;
@@ -732,26 +801,14 @@ bool S2Polygon::Decode(Decoder* const decoder) {
   unsigned char version = decoder->get8();
   switch (version) {
     case kCurrentUncompressedEncodingVersionNumber:
-      return DecodeUncompressed(decoder, false);
+      return DecodeUncompressed(decoder);
     case kCurrentCompressedEncodingVersionNumber:
       return DecodeCompressed(decoder);
   }
   return false;
 }
 
-bool S2Polygon::DecodeWithinScope(Decoder* const decoder) {
-  if (decoder->avail() < sizeof(unsigned char)) return false;
-  unsigned char version = decoder->get8();
-  switch (version) {
-    case kCurrentUncompressedEncodingVersionNumber:
-      return DecodeUncompressed(decoder, true);
-    case kCurrentCompressedEncodingVersionNumber:
-      return DecodeCompressed(decoder);
-  }
-  return false;
-}
-
-bool S2Polygon::DecodeUncompressed(Decoder* const decoder, bool within_scope) {
+bool S2Polygon::DecodeUncompressed(Decoder* const decoder) {
   if (decoder->avail() < 2 * sizeof(uint8) + sizeof(uint32)) return false;
   ClearLoops();
   decoder->get8();  // Ignore irrelevant serialized owns_loops_ value.
@@ -759,17 +816,15 @@ bool S2Polygon::DecodeUncompressed(Decoder* const decoder, bool within_scope) {
   // Polygons with no loops are explicitly allowed here: a newly created
   // polygon has zero loops and such polygons encode and decode properly.
   const uint32 num_loops = decoder->get32();
-  if (num_loops > FLAGS_s2polygon_decode_max_num_loops) return false;
+  if (num_loops > static_cast<uint32>(
+                      absl::GetFlag(FLAGS_s2polygon_decode_max_num_loops)))
+    return false;
   loops_.reserve(num_loops);
   num_vertices_ = 0;
-  for (int i = 0; i < num_loops; ++i) {
+  for (size_t i = 0; i < num_loops; ++i) {
     loops_.push_back(make_unique<S2Loop>());
     loops_.back()->set_s2debug_override(s2debug_override());
-    if (within_scope) {
-      if (!loops_.back()->DecodeWithinScope(decoder)) return false;
-    } else {
-      if (!loops_.back()->Decode(decoder)) return false;
-    }
+    if (!loops_.back()->Decode(decoder)) return false;
     num_vertices_ += loops_.back()->num_vertices();
   }
   if (!bound_.Decode(decoder)) return false;
@@ -781,17 +836,17 @@ bool S2Polygon::DecodeUncompressed(Decoder* const decoder, bool within_scope) {
 // TODO(ericv): Consider adding this to the S2Loop API.  (May also want an
 // undirected version (CompareDirected vs CompareUndirected); should they
 // return a sign, or have separate "<" and "==" methods?)
-int S2Polygon::CompareLoops(const S2Loop* a, const S2Loop* b) {
-  if (a->num_vertices() != b->num_vertices()) {
-    return a->num_vertices() - b->num_vertices();
+int S2Polygon::CompareLoops(const S2Loop& a, const S2Loop& b) {
+  if (a.num_vertices() != b.num_vertices()) {
+    return a.num_vertices() - b.num_vertices();
   }
-  S2::LoopOrder ao = a->GetCanonicalLoopOrder();
-  S2::LoopOrder bo = b->GetCanonicalLoopOrder();
+  S2::LoopOrder ao = a.GetCanonicalLoopOrder();
+  S2::LoopOrder bo = b.GetCanonicalLoopOrder();
   if (ao.dir != bo.dir) return ao.dir - bo.dir;
-  for (int n = a->num_vertices(), ai = ao.first, bi = bo.first;
-       --n >= 0; ai += ao.dir, bi += bo.dir) {
-    if (a->vertex(ai) < b->vertex(bi)) return -1;
-    if (a->vertex(ai) > b->vertex(bi)) return 1;
+  for (int n = a.num_vertices(), ai = ao.first, bi = bo.first; --n >= 0;
+       ai += ao.dir, bi += bo.dir) {
+    if (a.vertex(ai) < b.vertex(bi)) return -1;
+    if (a.vertex(ai) > b.vertex(bi)) return 1;
   }
   return 0;
 }
@@ -825,7 +880,7 @@ void S2Polygon::Invert() {
         // We break ties deterministically in order to avoid having the output
         // depend on the input order of the loops.
         if (angle < best_angle ||
-            (angle == best_angle && CompareLoops(loop(i), loop(best)) < 0)) {
+            (angle == best_angle && CompareLoops(*loop(i), *loop(best)) < 0)) {
           best = i;
           best_angle = angle;
         }
@@ -858,7 +913,7 @@ void S2Polygon::Invert() {
   InitLoopProperties();
 }
 
-void S2Polygon::InitToComplement(const S2Polygon* a) {
+void S2Polygon::InitToComplement(const S2Polygon& a) {
   Copy(a);
   Invert();
 }
@@ -879,24 +934,22 @@ void S2Polygon::InitToOperation(S2BooleanOperation::OpType op_type,
                                 const S2Polygon& a, const S2Polygon& b) {
   S2Error error;
   if (!InitToOperation(op_type, snap_function, a, b, &error)) {
-    S2_LOG(DFATAL) << S2BooleanOperation::OpTypeToString(op_type)
+    S2_LOG(ERROR) << S2BooleanOperation::OpTypeToString(op_type)
                 << " operation failed: " << error;
   }
 }
 
-void S2Polygon::InitToIntersection(const S2Polygon* a, const S2Polygon* b) {
-  InitToApproxIntersection(a, b, S2::kIntersectionMergeRadius);
-}
-
-void S2Polygon::InitToApproxIntersection(const S2Polygon* a, const S2Polygon* b,
-                                         S1Angle snap_radius) {
-  InitToIntersection(*a, *b, IdentitySnapFunction(snap_radius));
+void S2Polygon::InitToIntersection(const S2Polygon& a, const S2Polygon& b) {
+  InitToIntersection(a, b, IdentitySnapFunction(S2::kIntersectionMergeRadius));
 }
 
 void S2Polygon::InitToIntersection(
     const S2Polygon& a, const S2Polygon& b,
     const S2Builder::SnapFunction& snap_function) {
-  if (!a.bound_.Intersects(b.bound_)) return;
+  if (!a.bound_.Intersects(b.bound_)) {
+    InitNested({});
+    return;
+  }
   InitToOperation(S2BooleanOperation::OpType::INTERSECTION,
                   snap_function, a, b);
 }
@@ -904,18 +957,16 @@ void S2Polygon::InitToIntersection(
 bool S2Polygon::InitToIntersection(
     const S2Polygon& a, const S2Polygon& b,
     const S2Builder::SnapFunction& snap_function, S2Error* error) {
-  if (!a.bound_.Intersects(b.bound_)) return true;  // Success.
+  if (!a.bound_.Intersects(b.bound_))  {
+    InitNested({});
+    return true;  // Success.
+  }
   return InitToOperation(S2BooleanOperation::OpType::INTERSECTION,
                          snap_function, a, b, error);
 }
 
-void S2Polygon::InitToUnion(const S2Polygon* a, const S2Polygon* b) {
-  InitToApproxUnion(a, b, S2::kIntersectionMergeRadius);
-}
-
-void S2Polygon::InitToApproxUnion(const S2Polygon* a, const S2Polygon* b,
-                                  S1Angle snap_radius) {
-  InitToUnion(*a, *b, IdentitySnapFunction(snap_radius));
+void S2Polygon::InitToUnion(const S2Polygon& a, const S2Polygon& b) {
+  InitToUnion(a, b, IdentitySnapFunction(S2::kIntersectionMergeRadius));
 }
 
 void S2Polygon::InitToUnion(
@@ -931,13 +982,8 @@ bool S2Polygon::InitToUnion(
                          snap_function, a, b, error);
 }
 
-void S2Polygon::InitToDifference(const S2Polygon* a, const S2Polygon* b) {
-  InitToApproxDifference(a, b, S2::kIntersectionMergeRadius);
-}
-
-void S2Polygon::InitToApproxDifference(const S2Polygon* a, const S2Polygon* b,
-                                       S1Angle snap_radius) {
-  InitToDifference(*a, *b, IdentitySnapFunction(snap_radius));
+void S2Polygon::InitToDifference(const S2Polygon& a, const S2Polygon& b) {
+  InitToDifference(a, b, IdentitySnapFunction(S2::kIntersectionMergeRadius));
 }
 
 void S2Polygon::InitToDifference(
@@ -953,15 +999,10 @@ bool S2Polygon::InitToDifference(
                          snap_function, a, b, error);
 }
 
-void S2Polygon::InitToSymmetricDifference(const S2Polygon* a,
-                                          const S2Polygon* b) {
-  InitToApproxSymmetricDifference(a, b, S2::kIntersectionMergeRadius);
-}
-
-void S2Polygon::InitToApproxSymmetricDifference(const S2Polygon* a,
-                                                const S2Polygon* b,
-                                                S1Angle snap_radius) {
-  InitToSymmetricDifference(*a, *b, IdentitySnapFunction(snap_radius));
+void S2Polygon::InitToSymmetricDifference(const S2Polygon& a,
+                                          const S2Polygon& b) {
+  InitToSymmetricDifference(a, b,
+                            IdentitySnapFunction(S2::kIntersectionMergeRadius));
 }
 
 void S2Polygon::InitToSymmetricDifference(
@@ -983,18 +1024,23 @@ void S2Polygon::InitFromBuilder(const S2Polygon& a, S2Builder* builder) {
   builder->AddPolygon(a);
   S2Error error;
   if (!builder->Build(&error)) {
-    S2_LOG(DFATAL) << "Could not build polygon: " << error;
+    S2_LOG(ERROR) << "Could not build polygon: " << error;
   }
   // If there are no loops, check whether the result should be the full
-  // polygon rather than the empty one.  (See InitToApproxIntersection.)
+  // polygon rather than the empty one.  (See InitToIntersection.)
   if (num_loops() == 0) {
     if (a.bound_.Area() > 2 * M_PI && a.GetArea() > 2 * M_PI) Invert();
   }
 }
 
-void S2Polygon::InitToSnapped(const S2Polygon* a, int snap_level) {
-  S2Builder builder{S2Builder::Options(S2CellIdSnapFunction(snap_level))};
-  InitFromBuilder(*a, &builder);
+void S2Polygon::InitToSnapped(const S2Polygon& polygon,
+                              const S2Builder::SnapFunction& snap_function) {
+  S2Builder builder{S2Builder::Options(snap_function)};
+  InitFromBuilder(polygon, &builder);
+}
+
+void S2Polygon::InitToSnapped(const S2Polygon& polygon, int snap_level) {
+  InitToSnapped(polygon, S2CellIdSnapFunction(snap_level));
 }
 
 void S2Polygon::InitToSimplified(const S2Polygon& a,
@@ -1011,12 +1057,13 @@ void S2Polygon::InitToSimplified(const S2Polygon& a,
 // Bit "i" in the result is set if and only "p" is incident to the edge
 // corresponding to S2Cell::edge(i).
 uint8 GetCellEdgeIncidenceMask(const S2Cell& cell, const S2Point& p,
-                               double tolerance_uv) {
+                                 double tolerance_uv) {
   uint8 mask = 0;
   R2Point uv;
   if (S2::FaceXYZtoUV(cell.face(), p, &uv)) {
     R2Rect bound = cell.GetBoundUV();
-    if (FLAGS_s2debug) S2_DCHECK(bound.Expanded(tolerance_uv).Contains(uv));
+    if (absl::GetFlag(FLAGS_s2debug))
+      S2_DCHECK(bound.Expanded(tolerance_uv).Contains(uv));
     if (fabs(uv[1] - bound[1][0]) <= tolerance_uv) mask |= 1;
     if (fabs(uv[0] - bound[0][1]) <= tolerance_uv) mask |= 2;
     if (fabs(uv[1] - bound[1][1]) <= tolerance_uv) mask |= 4;
@@ -1025,9 +1072,9 @@ uint8 GetCellEdgeIncidenceMask(const S2Cell& cell, const S2Point& p,
   return mask;
 }
 
-void S2Polygon::InitToSimplifiedInCell(
-    const S2Polygon* a, const S2Cell& cell,
-    S1Angle snap_radius, S1Angle boundary_tolerance) {
+void S2Polygon::InitToSimplifiedInCell(const S2Polygon& a, const S2Cell& cell,
+                                       S1Angle snap_radius,
+                                       S1Angle boundary_tolerance) {
   // The polygon to be simplified consists of "boundary edges" that follow the
   // cell boundary and "interior edges" that do not.  We want to simplify the
   // interior edges while leaving the boundary edges unchanged.  It's not
@@ -1073,12 +1120,12 @@ void S2Polygon::InitToSimplifiedInCell(
   // want the bound in terms of (u = 2 * s - 1) rather than "s" itself.
   // Consulting s2metrics.cc, this value is sqrt(2/3)/2 = sqrt(1/6).
   // Going back to the original problem, this gives:
-  double boundary_tolerance_uv = sqrt(6.0) * boundary_tolerance.radians();
+  double boundary_tolerance_uv = sqrt(6) * boundary_tolerance.radians();
 
   // The first pass yields a collection of simplified polylines that preserve
   // the original cyclic vertex order.
-  auto polylines = SimplifyEdgesInCell(*a, cell, boundary_tolerance_uv,
-                                       snap_radius);
+  auto polylines =
+      SimplifyEdgesInCell(a, cell, boundary_tolerance_uv, snap_radius);
 
   // The second pass eliminates any intersections between interior edges and
   // boundary edges, and then assembles the edges into a polygon.
@@ -1092,13 +1139,13 @@ void S2Polygon::InitToSimplifiedInCell(
   }
   S2Error error;
   if (!builder.Build(&error)) {
-    S2_LOG(DFATAL) << "Could not build polygon: " << error;
+    S2_LOG(ERROR) << "Could not build polygon: " << error;
     return;
   }
   // If there are no loops, check whether the result should be the full
-  // polygon rather than the empty one.  (See InitToApproxIntersection.)
+  // polygon rather than the empty one.  (See InitToIntersection.)
   if (num_loops() == 0) {
-    if (a->bound_.Area() > 2 * M_PI && a->GetArea() > 2 * M_PI) Invert();
+    if (a.bound_.Area() > 2 * M_PI && a.GetArea() > 2 * M_PI) Invert();
   }
 }
 
@@ -1153,7 +1200,7 @@ vector<unique_ptr<S2Polyline>> S2Polygon::SimplifyEdgesInCell(
   }
   S2Error error;
   if (!builder.Build(&error)) {
-    S2_LOG(DFATAL) << "InitToSimplifiedInCell failed: " << error;
+    S2_LOG(ERROR) << "InitToSimplifiedInCell failed: " << error;
   }
   return polylines;
 }
@@ -1175,7 +1222,7 @@ vector<unique_ptr<S2Polyline>> S2Polygon::OperationWithPolyline(
   a_index.Add(make_unique<S2Polyline::Shape>(&a));
   S2Error error;
   if (!op.Build(a_index, index_, &error)) {
-    S2_LOG(DFATAL) << "Polyline " << S2BooleanOperation::OpTypeToString(op_type)
+    S2_LOG(ERROR) << "Polyline " << S2BooleanOperation::OpTypeToString(op_type)
                 << " operation failed: " << error;
   }
   return result;
@@ -1239,40 +1286,53 @@ unique_ptr<S2Polygon> S2Polygon::DestructiveUnion(
 
 unique_ptr<S2Polygon> S2Polygon::DestructiveApproxUnion(
     vector<unique_ptr<S2Polygon>> polygons, S1Angle snap_radius) {
-  // Effectively create a priority queue of polygons in order of number of
-  // vertices.  Repeatedly union the two smallest polygons and add the result
-  // to the queue until we have a single polygon to return.
-  using QueueType = std::multimap<int, unique_ptr<S2Polygon>>;
-  QueueType queue;  // Map from # of vertices to polygon.
-  for (auto& polygon : polygons)
-    queue.insert(std::make_pair(polygon->num_vertices(), std::move(polygon)));
+  return DestructiveUnion(std::move(polygons),
+                          IdentitySnapFunction(snap_radius));
+}
+
+unique_ptr<S2Polygon> S2Polygon::DestructiveUnion(
+    vector<unique_ptr<S2Polygon>> polygons,
+    const S2Builder::SnapFunction& snap_function) {
+  if (polygons.empty())
+    return make_unique<S2Polygon>();
+
+  // Create a priority queue of polygons in order of number of vertices.
+  // Repeatedly union the two smallest polygons and add the result to the
+  // queue until we have a single polygon to return.  We use the index
+  // into `polygons` as the second component of the pair instead of a
+  // pointer so the comparison is deterministic rather than depending
+  // on the polygon's address.  (Some clients depend on determinism.)
+  using Pair = pair<int, size_t>;  // (num vertices, `polygons` index)
+  using PriorityQueue = priority_queue<Pair, vector<Pair>, greater<Pair>>;
+  PriorityQueue queue;  // Least vertices is `top`.
+  for (size_t i = 0; i < polygons.size(); ++i) {
+    queue.emplace(polygons[i]->num_vertices(), i);
+  }
 
   while (queue.size() > 1) {
     // Pop two simplest polygons from queue.
-    QueueType::iterator smallest_it = queue.begin();
-    int a_size = smallest_it->first;
-    unique_ptr<S2Polygon> a_polygon(std::move(smallest_it->second));
-    queue.erase(smallest_it);
-    smallest_it = queue.begin();
-    int b_size = smallest_it->first;
-    unique_ptr<S2Polygon> b_polygon(std::move(smallest_it->second));
-    queue.erase(smallest_it);
+    const Pair a = queue.top();
+    queue.pop();
+    const Pair b = queue.top();
+    queue.pop();
 
     // Union and add result back to queue.
     auto union_polygon = make_unique<S2Polygon>();
-    union_polygon->InitToApproxUnion(a_polygon.get(), b_polygon.get(),
-                                     snap_radius);
-    queue.insert(std::make_pair(a_size + b_size, std::move(union_polygon)));
+    union_polygon->InitToUnion(*polygons[a.second], *polygons[b.second],
+                               snap_function);
+
+    // Replace `a` with the union, and clear `b` to reclaim memory.
+    polygons[a.second] = std::move(union_polygon);
+    polygons[b.second].reset();
+
     // We assume that the number of vertices in the union polygon is the
     // sum of the number of vertices in the original polygons, which is not
     // always true, but will almost always be a decent approximation, and
     // faster than recomputing.
+    queue.emplace(a.first + b.first, a.second);
   }
 
-  if (queue.empty())
-    return make_unique<S2Polygon>();
-  else
-    return std::move(queue.begin()->second);
+  return std::move(polygons[queue.top().second]);
 }
 
 void S2Polygon::InitToCellUnionBorder(const S2CellUnion& cells) {
@@ -1291,7 +1351,7 @@ void S2Polygon::InitToCellUnionBorder(const S2CellUnion& cells) {
   }
   S2Error error;
   if (!builder.Build(&error)) {
-    S2_LOG(DFATAL) << "InitToCellUnionBorder failed: " << error;
+    S2_LOG(ERROR) << "InitToCellUnionBorder failed: " << error;
   }
   // If there are no loops, check whether the result should be the full
   // polygon rather than the empty one.  There are only two ways that this can
@@ -1310,7 +1370,7 @@ bool S2Polygon::IsNormalized() const {
   // most one vertex with their parent loop.  Example: suppose loop A has
   // children B, C, D, and the following pairs are connected: AB, BC, CD, DA.
   // Then the polygon is not normalized.
-  set<S2Point> vertices;
+  flat_hash_set<S2Point> vertices;
   const S2Loop* last_parent = nullptr;
   for (int i = 0; i < num_loops(); ++i) {
     const S2Loop* child = loop(i);
@@ -1332,28 +1392,28 @@ bool S2Polygon::IsNormalized() const {
   return true;
 }
 
-bool S2Polygon::Equals(const S2Polygon* b) const {
-  if (num_loops() != b->num_loops()) return false;
+bool S2Polygon::Equals(const S2Polygon& b) const {
+  if (num_loops() != b.num_loops()) return false;
   for (int i = 0; i < num_loops(); ++i) {
     const S2Loop* a_loop = loop(i);
-    const S2Loop* b_loop = b->loop(i);
-    if ((b_loop->depth() != a_loop->depth()) || !b_loop->Equals(a_loop)) {
+    const S2Loop* b_loop = b.loop(i);
+    if ((b_loop->depth() != a_loop->depth()) || !b_loop->Equals(*a_loop)) {
       return false;
     }
   }
   return true;
 }
 
-bool S2Polygon::BoundaryEquals(const S2Polygon* b) const {
-  if (num_loops() != b->num_loops()) return false;
+bool S2Polygon::BoundaryEquals(const S2Polygon& b) const {
+  if (num_loops() != b.num_loops()) return false;
 
   for (int i = 0; i < num_loops(); ++i) {
     const S2Loop* a_loop = loop(i);
     bool success = false;
     for (int j = 0; j < num_loops(); ++j) {
-      const S2Loop* b_loop = b->loop(j);
+      const S2Loop* b_loop = b.loop(j);
       if ((b_loop->depth() == a_loop->depth()) &&
-          b_loop->BoundaryEquals(a_loop)) {
+          b_loop->BoundaryEquals(*a_loop)) {
         success = true;
         break;
       }
@@ -1437,9 +1497,11 @@ bool S2Polygon::DecodeCompressed(Decoder* decoder) {
   // polygon has zero loops and such polygons encode and decode properly.
   uint32 num_loops;
   if (!decoder->get_varint32(&num_loops)) return false;
-  if (num_loops > FLAGS_s2polygon_decode_max_num_loops) return false;
+  if (num_loops > static_cast<uint32>(
+                      absl::GetFlag(FLAGS_s2polygon_decode_max_num_loops)))
+    return false;
   loops_.reserve(num_loops);
-  for (int i = 0; i < num_loops; ++i) {
+  for (size_t i = 0; i < num_loops; ++i) {
     auto loop = make_unique<S2Loop>();
     loop->set_s2debug_override(s2debug_override());
     if (!loop->DecodeCompressed(decoder, snap_level)) {
@@ -1452,58 +1514,41 @@ bool S2Polygon::DecodeCompressed(Decoder* decoder) {
 }
 
 S2Polygon::Shape::Shape(const S2Polygon* polygon)
-    : cumulative_edges_(nullptr) {
+    : loop_starts_(nullptr) {
   Init(polygon);
 }
 
 void S2Polygon::Shape::Init(const S2Polygon* polygon) {
   polygon_ = polygon;
-  delete[] cumulative_edges_;
-  cumulative_edges_ = nullptr;
-  num_edges_ = 0;
+  loop_starts_ = nullptr;
+  uint32 offset = 0;
   if (!polygon->is_full()) {
     const int kMaxLinearSearchLoops = 12;  // From benchmarks.
     int num_loops = polygon->num_loops();
     if (num_loops > kMaxLinearSearchLoops) {
-      cumulative_edges_ = new int[num_loops];
+      // Unlike make_unique<>, new T[] does not default-construct each element.
+      loop_starts_.reset(new uint32[num_loops + 1]);  // NOLINT
     }
     for (int i = 0; i < num_loops; ++i) {
-      if (cumulative_edges_) cumulative_edges_[i] = num_edges_;
-      num_edges_ += polygon->loop(i)->num_vertices();
+      if (loop_starts_) loop_starts_[i] = offset;
+      offset += polygon->loop(i)->num_vertices();
     }
+    if (loop_starts_) loop_starts_[num_loops] = offset;
   }
 }
 
-S2Polygon::Shape::~Shape() {
-  delete[] cumulative_edges_;
-}
+S2Polygon::Shape::~Shape() = default;
 
 S2Shape::Edge S2Polygon::Shape::edge(int e) const {
-  S2_DCHECK_LT(e, num_edges());
-  const S2Polygon* p = polygon();
-  int i;
-  if (cumulative_edges_) {
-    // "upper_bound" finds the loop just beyond the one we want.
-    int* start = std::upper_bound(cumulative_edges_,
-                                  cumulative_edges_ + p->num_loops(), e) - 1;
-    i = start - cumulative_edges_;
-    e -= *start;
-  } else {
-    // When the number of loops is small, linear search is faster.  Most often
-    // there is exactly one loop and the code below executes zero times.
-    for (i = 0; e >= p->loop(i)->num_vertices(); ++i) {
-      e -= p->loop(i)->num_vertices();
-    }
-  }
-  return Edge(p->loop(i)->oriented_vertex(e),
-              p->loop(i)->oriented_vertex(e + 1));
+  // Method names are fully specified to enable inlining.
+  ChainPosition pos = S2Polygon::Shape::chain_position(e);
+  return S2Polygon::Shape::chain_edge(pos.chain_id, pos.offset);
 }
 
 S2Shape::ReferencePoint S2Polygon::Shape::GetReferencePoint() const {
-  const S2Polygon* p = polygon();
   bool contains_origin = false;
-  for (int i = 0; i < p->num_loops(); ++i) {
-    contains_origin ^= p->loop(i)->contains_origin();
+  for (int i = 0; i < polygon_->num_loops(); ++i) {
+    contains_origin ^= polygon_->loop(i)->contains_origin();
   }
   return ReferencePoint(S2::Origin(), contains_origin);
 }
@@ -1514,8 +1559,9 @@ int S2Polygon::Shape::num_chains() const {
 
 S2Shape::Chain S2Polygon::Shape::chain(int i) const {
   S2_DCHECK_LT(i, Shape::num_chains());
-  if (cumulative_edges_) {
-    return Chain(cumulative_edges_[i], polygon_->loop(i)->num_vertices());
+  if (loop_starts_) {
+    int start = loop_starts_[i];
+    return Chain(start, loop_starts_[i + 1] - start);
   } else {
     int e = 0;
     for (int j = 0; j < i; ++j) e += polygon_->loop(j)->num_vertices();
@@ -1526,34 +1572,6 @@ S2Shape::Chain S2Polygon::Shape::chain(int i) const {
   }
 }
 
-S2Shape::Edge S2Polygon::Shape::chain_edge(int i, int j) const {
-  S2_DCHECK_LT(i, Shape::num_chains());
-  S2_DCHECK_LT(j, polygon_->loop(i)->num_vertices());
-  return Edge(polygon()->loop(i)->oriented_vertex(j),
-              polygon()->loop(i)->oriented_vertex(j + 1));
-}
-
-S2Shape::ChainPosition S2Polygon::Shape::chain_position(int e) const {
-  // TODO(ericv): Make inline to remove code duplication with GetEdge.
-  S2_DCHECK_LT(e, num_edges());
-  const S2Polygon* p = polygon();
-  int i;
-  if (cumulative_edges_) {
-    // "upper_bound" finds the loop just beyond the one we want.
-    int* start = std::upper_bound(cumulative_edges_,
-                                  cumulative_edges_ + p->num_loops(), e) - 1;
-    i = start - cumulative_edges_;
-    e -= *start;
-  } else {
-    // When the number of loops is small, linear search is faster.  Most often
-    // there is exactly one loop and the code below executes zero times.
-    for (i = 0; e >= p->loop(i)->num_vertices(); ++i) {
-      e -= p->loop(i)->num_vertices();
-    }
-  }
-  return ChainPosition(i, e);
-}
-
 size_t S2Polygon::SpaceUsed() const {
   size_t size = sizeof(*this);
   for (int i = 0; i < num_loops(); ++i) {
diff --git a/src/s2/s2polygon.h b/src/s2/s2polygon.h
index 5906ad00..c048b516 100644
--- a/src/s2/s2polygon.h
+++ b/src/s2/s2polygon.h
@@ -18,25 +18,35 @@
 #ifndef S2_S2POLYGON_H_
 #define S2_S2POLYGON_H_
 
+#include <algorithm>
 #include <atomic>
 #include <cstddef>
-#include <map>
+#include <memory>
+#include <utility>
 #include <vector>
 
+#include "absl/base/attributes.h"
+#include "absl/container/flat_hash_map.h"
+
 #include "s2/base/integral_types.h"
-#include "absl/base/macros.h"
+#include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s1angle.h"
 #include "s2/s2boolean_operation.h"
 #include "s2/s2builder.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2coder.h"
 #include "s2/s2debug.h"
+#include "s2/s2error.h"
 #include "s2/s2latlng_rect.h"
 #include "s2/s2loop.h"
+#include "s2/s2point.h"
 #include "s2/s2polyline.h"
 #include "s2/s2region.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
+#include "s2/util/coding/coder.h"
 
 class Decoder;
 class Encoder;
@@ -46,7 +56,6 @@ class S2Cell;
 class S2CellUnion;
 class S2Error;
 class S2Loop;
-class S2PolygonBuilder;
 class S2Polyline;
 struct S2XYZFaceSiTi;
 
@@ -107,6 +116,8 @@ struct S2XYZFaceSiTi;
 
 class S2Polygon final : public S2Region {
  public:
+  typedef s2coding::internal::S2LegacyCoder<S2Polygon> Coder;
+
   // The default constructor creates an empty polygon.  It can be made
   // non-empty by calling Init(), Decode(), etc.
   S2Polygon();
@@ -151,6 +162,9 @@ class S2Polygon final : public S2Region {
   explicit S2Polygon(std::unique_ptr<S2Loop> loop,
                      S2Debug override = S2Debug::ALLOW);
 
+  S2Polygon(S2Polygon&&);
+  S2Polygon& operator=(S2Polygon&&);
+
   // Create a polygon from a set of hierarchically nested loops.  The polygon
   // interior consists of the points contained by an odd number of loops.
   // (Recall that a loop contains the set of points on its left-hand side.)
@@ -184,7 +198,7 @@ class S2Polygon final : public S2Region {
 
   // Makes a deep copy of the given source polygon.  The destination polygon
   // will be cleared if necessary.
-  void Copy(const S2Polygon* src);
+  void Copy(const S2Polygon& src);
 
   // Destroys the polygon and frees its loops.
   ~S2Polygon() override;
@@ -230,6 +244,7 @@ class S2Polygon final : public S2Region {
   int num_loops() const { return static_cast<int>(loops_.size()); }
 
   // Total number of vertices in all loops.
+  // TODO(ericv): Change to num_edges() for consistency with the S2Shape API.
   int num_vertices() const { return num_vertices_; }
 
   // Return the loop at the given index.  Note that during initialization, the
@@ -283,8 +298,8 @@ class S2Polygon final : public S2Region {
 
   // Return the overlap fractions between two polygons, i.e. the ratios of the
   // area of intersection to the area of each polygon.
-  static std::pair<double, double> GetOverlapFractions(const S2Polygon* a,
-                                                       const S2Polygon* b);
+  static std::pair<double, double> GetOverlapFractions(const S2Polygon& a,
+                                                       const S2Polygon& b);
 
   // If the given point is contained by the polygon, return it.  Otherwise
   // return the closest point on the polygon boundary.  If the polygon is
@@ -299,7 +314,7 @@ class S2Polygon final : public S2Region {
 
   // Return true if this polygon contains the given other polygon, i.e.
   // if polygon A contains all points contained by polygon B.
-  bool Contains(const S2Polygon* b) const;
+  bool Contains(const S2Polygon& b) const;
 
   // Returns true if this polgyon (A) approximately contains the given other
   // polygon (B). This is true if it is possible to move the vertices of B
@@ -307,11 +322,11 @@ class S2Polygon final : public S2Region {
   //
   // For example, the empty polygon will contain any polygon whose maximum
   // width is no more than "tolerance".
-  bool ApproxContains(const S2Polygon* b, S1Angle tolerance) const;
+  bool ApproxContains(const S2Polygon& b, S1Angle tolerance) const;
 
   // Return true if this polygon intersects the given other polygon, i.e.
   // if there is a point that is contained by both polygons.
-  bool Intersects(const S2Polygon* b) const;
+  bool Intersects(const S2Polygon& b) const;
 
   // Returns true if this polgyon (A) and the given polygon (B) are
   // approximately disjoint.  This is true if it is possible to ensure that A
@@ -321,7 +336,7 @@ class S2Polygon final : public S2Region {
   //
   // For example, any polygon is approximately disjoint from a polygon whose
   // maximum width is no more than "tolerance".
-  bool ApproxDisjoint(const S2Polygon* b, S1Angle tolerance) const;
+  bool ApproxDisjoint(const S2Polygon& b, S1Angle tolerance) const;
 
   // Initialize this polygon to the intersection, union, difference (A - B),
   // or symmetric difference (XOR) of the given two polygons.
@@ -367,46 +382,34 @@ class S2Polygon final : public S2Region {
   // success and set "error" appropriately otherwise.  However note that these
   // functions should never return an error provided that both input polygons
   // are valid (i.e., IsValid() returns true).
-  void InitToIntersection(const S2Polygon* a, const S2Polygon* b);
+  void InitToIntersection(const S2Polygon& a, const S2Polygon& b);
   void InitToIntersection(const S2Polygon& a, const S2Polygon& b,
                           const S2Builder::SnapFunction& snap_function);
   bool InitToIntersection(const S2Polygon& a, const S2Polygon& b,
                           const S2Builder::SnapFunction& snap_function,
                           S2Error *error);
 
-  void InitToUnion(const S2Polygon* a, const S2Polygon* b);
+  void InitToUnion(const S2Polygon& a, const S2Polygon& b);
   void InitToUnion(const S2Polygon& a, const S2Polygon& b,
                    const S2Builder::SnapFunction& snap_function);
   bool InitToUnion(const S2Polygon& a, const S2Polygon& b,
                    const S2Builder::SnapFunction& snap_function,
                    S2Error *error);
 
-  void InitToDifference(const S2Polygon* a, const S2Polygon* b);
+  void InitToDifference(const S2Polygon& a, const S2Polygon& b);
   void InitToDifference(const S2Polygon& a, const S2Polygon& b,
                         const S2Builder::SnapFunction& snap_function);
   bool InitToDifference(const S2Polygon& a, const S2Polygon& b,
                         const S2Builder::SnapFunction& snap_function,
                         S2Error *error);
 
-  void InitToSymmetricDifference(const S2Polygon* a, const S2Polygon* b);
+  void InitToSymmetricDifference(const S2Polygon& a, const S2Polygon& b);
   void InitToSymmetricDifference(const S2Polygon& a, const S2Polygon& b,
                                  const S2Builder::SnapFunction& snap_function);
   bool InitToSymmetricDifference(const S2Polygon& a, const S2Polygon& b,
                                  const S2Builder::SnapFunction& snap_function,
                                  S2Error *error);
 
-  // Convenience functions that use the IdentitySnapFunction with the given
-  // snap radius.  TODO(ericv): Consider deprecating these and require the
-  // snap function to be specified explcitly?
-  void InitToApproxIntersection(const S2Polygon* a, const S2Polygon* b,
-                                S1Angle snap_radius);
-  void InitToApproxUnion(const S2Polygon* a, const S2Polygon* b,
-                         S1Angle snap_radius);
-  void InitToApproxDifference(const S2Polygon* a, const S2Polygon* b,
-                              S1Angle snap_radius);
-  void InitToApproxSymmetricDifference(const S2Polygon* a, const S2Polygon* b,
-                                       S1Angle snap_radius);
-
   // Snaps the vertices of the given polygon using the given SnapFunction
   // (e.g., s2builderutil::IntLatLngSnapFunction(6) snaps to E6 coordinates).
   // This can change the polygon topology (merging loops, for example), but
@@ -424,21 +427,29 @@ class S2Polygon final : public S2Region {
   // given level (default level 30, which has S2CellId centers spaced about 1
   // centimeter apart).  Polygons can be efficiently encoded by Encode() after
   // they have been snapped.
-  void InitToSnapped(const S2Polygon* polygon,
+  void InitToSnapped(const S2Polygon& polygon,
                      int snap_level = S2CellId::kMaxLevel);
 
   // Snaps the input polygon according to the given "snap_function" and
   // reduces the number of vertices if possible, while ensuring that no vertex
   // moves further than snap_function.snap_radius().
   //
+  // A zero snap radius will leave the input geometry unmodified.
+  //
   // Simplification works by replacing nearly straight chains of short edges
   // with longer edges, in a way that preserves the topology of the input
   // polygon up to the creation of degeneracies.  This means that loops or
   // portions of loops may become degenerate, in which case they are removed.
+  //
   // For example, if there is a very small island in the original polygon, it
-  // may disappear completely.  (Even if there are dense islands, they could
-  // all be removed rather than being replaced by a larger simplified island
-  // if more area is covered by water than land.)
+  // may disappear completely.  (Even if there are dense islands, they could all
+  // be removed rather than being replaced by a larger simplified island if more
+  // area is covered by water than land.)
+  //
+  // What's more, since we snap at the same time that we simplify, edges that
+  // come within the snap radius of a vertex may have a vertex inserted
+  // resulting in a "pinch" that forces S2Builder to produce multiple output
+  // loops.
   void InitToSimplified(const S2Polygon& a,
                         const S2Builder::SnapFunction& snap_function);
 
@@ -475,11 +486,11 @@ class S2Polygon final : public S2Region {
   //
   // REQUIRES: all vertices of "a" are within "boundary_tolerance" of "cell".
   void InitToSimplifiedInCell(
-      const S2Polygon* a, const S2Cell& cell, S1Angle snap_radius,
+      const S2Polygon& a, const S2Cell& cell, S1Angle snap_radius,
       S1Angle boundary_tolerance = S1Angle::Radians(1e-15));
 
   // Initialize this polygon to the complement of the given polygon.
-  void InitToComplement(const S2Polygon* a);
+  void InitToComplement(const S2Polygon& a);
 
   // Invert the polygon (replace it by its complement).
   void Invert();
@@ -568,6 +579,9 @@ class S2Polygon final : public S2Region {
   static std::unique_ptr<S2Polygon> DestructiveApproxUnion(
       std::vector<std::unique_ptr<S2Polygon> > polygons,
       S1Angle snap_radius);
+  static std::unique_ptr<S2Polygon> DestructiveUnion(
+      std::vector<std::unique_ptr<S2Polygon> > polygons,
+      const S2Builder::SnapFunction& snap_function);
 #endif  // !defined(SWIG)
 
   // Initialize this polygon to the outline of the given cell union.
@@ -599,7 +613,7 @@ class S2Polygon final : public S2Region {
   // Return true if two polygons have exactly the same loops.  The loops must
   // appear in the same order, and corresponding loops must have the same
   // linear vertex ordering (i.e., cyclic rotations are not allowed).
-  bool Equals(const S2Polygon* b) const;
+  bool Equals(const S2Polygon& b) const;
 
   // Return true if two polygons are approximately equal to within the given
   // tolerance.  This is true if it is possible to move the vertices of the
@@ -614,7 +628,7 @@ class S2Polygon final : public S2Region {
   // a good choice for comparing polygons that have been snapped, simplified,
   // unioned, etc, since these operations use a model similar to this one
   // (i.e., degenerate loops or portions of loops are automatically removed).
-  bool ApproxEquals(const S2Polygon* b, S1Angle tolerance) const;
+  bool ApproxEquals(const S2Polygon& b, S1Angle tolerance) const;
 
   // Returns true if two polygons have the same boundary.  More precisely,
   // this method requires that both polygons have loops with the same cyclic
@@ -622,7 +636,7 @@ class S2Polygon final : public S2Region {
   // may be cyclically rotated between corresponding loops, and the loop
   // ordering may be different between the two polygons as long as the nesting
   // hierarchy is the same.)
-  bool BoundaryEquals(const S2Polygon* b) const;
+  bool BoundaryEquals(const S2Polygon& b) const;
 
   // Return true if two polygons have the same boundary except for vertex
   // perturbations.  Both polygons must have loops with the same cyclic vertex
@@ -674,7 +688,8 @@ class S2Polygon final : public S2Region {
   //
   // REQUIRES: "encoder" uses the default constructor, so that its buffer
   //           can be enlarged as necessary by calling Ensure(int).
-  void Encode(Encoder* const encoder) const;
+  void Encode(Encoder* const encoder,
+              s2coding::CodingHint hint = s2coding::CodingHint::COMPACT) const;
 
   // Encodes the polygon's S2Points directly as three doubles using
   // (40 + 43 * num_loops + 24 * num_vertices) bytes.
@@ -686,16 +701,6 @@ class S2Polygon final : public S2Region {
   // Decodes a polygon encoded with Encode().  Returns true on success.
   bool Decode(Decoder* const decoder);
 
-  // Decodes a polygon by pointing the S2Loop vertices directly into the
-  // decoder's memory buffer (which needs to persist for the lifetime of the
-  // decoded S2Polygon).  It is much faster than Decode(), but requires that
-  // all the polygon vertices were encoded exactly using 24 bytes per vertex.
-  // This essentially requires that the polygon was not snapped beforehand to
-  // a given S2Cell level; otherwise this method falls back to Decode().
-  //
-  // Returns true on success.
-  bool DecodeWithinScope(Decoder* const decoder);
-
 #ifndef SWIG
   // Wrapper class for indexing a polygon (see S2ShapeIndex).  Once this
   // object is inserted into an S2ShapeIndex it is owned by that index, and
@@ -707,10 +712,16 @@ class S2Polygon final : public S2Region {
   // Note that unlike S2Polygon, the edges of S2Polygon::Shape are directed
   // such that the polygon interior is always on the left.
   class Shape : public S2Shape {
+    // To update `polygon_` in `S2Polygon` move constructor/assignment.
+    friend class S2Polygon;
+
    public:
-    static constexpr TypeTag kTypeTag = 1;
+    // Define as enum so we don't have to declare storage.
+    // TODO(user, b/210097200): Use static constexpr when C++17 is
+    // allowed in opensource.
+    enum : TypeTag { kTypeTag = 1 };
 
-    Shape() : polygon_(nullptr), cumulative_edges_(nullptr) {}
+    Shape() : polygon_(nullptr), loop_starts_(nullptr) {}
     ~Shape() override;
 
     // Initialization.  Does not take ownership of "polygon".  May be called
@@ -721,20 +732,11 @@ class S2Polygon final : public S2Region {
 
     const S2Polygon* polygon() const { return polygon_; }
 
-    // Encodes the polygon using S2Polygon::Encode().
-    void Encode(Encoder* encoder) const {
-      polygon_->Encode(encoder);
-    }
-
-    // Encodes the polygon using S2Polygon::EncodeUncompressed().
-    void EncodeUncompressed(Encoder* encoder) const {
-      polygon_->EncodeUncompressed(encoder);
-    }
-
-    // Decoding is defined only for S2Polyline::OwningShape below.
-
     // S2Shape interface:
-    int num_edges() const final { return num_edges_; }
+    int num_edges() const final {
+      return (polygon_->num_vertices() != 1) ? polygon_->num_vertices()
+                                             : polygon_->is_full() ? 0 : 1;
+    }
     Edge edge(int e) const final;
     int dimension() const final { return 2; }
     ReferencePoint GetReferencePoint() const final;
@@ -744,22 +746,27 @@ class S2Polygon final : public S2Region {
     ChainPosition chain_position(int e) const final;
     TypeTag type_tag() const override { return kTypeTag; }
 
+  void Encode(Encoder* encoder, s2coding::CodingHint hint) const override {
+      if (hint == s2coding::CodingHint::FAST) {
+        polygon_->EncodeUncompressed(encoder);
+      } else {
+        polygon_->Encode(encoder);
+      }
+    }
+    // Decoding is defined only for S2Polygon::OwningShape below.
+
    private:
-    // The total number of edges in the polygon.  This is the same as
-    // polygon_->num_vertices() except in one case (polygon_->is_full()).  On
-    // the other hand this field doesn't take up any extra space due to field
-    // packing with S2Shape::id_.
-    //
-    // TODO(ericv): Consider using this field instead as an atomic<int> hint to
-    // speed up edge location when there are a large number of loops.  Also
-    // consider changing S2Polygon::num_vertices to num_edges instead.
-    int num_edges_;
+    // The loop that contained the edge returned by the previous call to the
+    // edge() method.  This is used as a hint to speed up edge location when
+    // there are many loops.  Note that this field does not take up any space
+    // due to field packing with S2Shape::id_.
+    mutable std::atomic<int> prev_loop_{0};
 
     const S2Polygon* polygon_;
 
     // An array where element "i" is the total number of edges in loops 0..i-1.
     // This field is only used for polygons that have a large number of loops.
-    int* cumulative_edges_;
+    std::unique_ptr<uint32[]> loop_starts_;
   };
 
   // Like Shape, except that the S2Polygon is automatically deleted when this
@@ -767,7 +774,7 @@ class S2Polygon final : public S2Region {
   // is constructed solely for the purpose of indexing it.
   class OwningShape : public Shape {
    public:
-    OwningShape() {}  // Must call Init().
+    OwningShape() = default;  // Must call Init().
 
     explicit OwningShape(std::unique_ptr<const S2Polygon> polygon)
         : Shape(polygon.get()), owned_polygon_(std::move(polygon)) {}
@@ -778,7 +785,7 @@ class S2Polygon final : public S2Region {
     }
 
     bool Init(Decoder* decoder) {
-      auto polygon = absl::make_unique<S2Polygon>();
+      auto polygon = std::make_unique<S2Polygon>();
       if (!polygon->Decode(decoder)) return false;
       Shape::Init(polygon.get());
       owned_polygon_ = std::move(polygon);
@@ -822,7 +829,7 @@ class S2Polygon final : public S2Region {
   // A map from each loop to its immediate children with respect to nesting.
   // This map is built during initialization of multi-loop polygons to
   // determine which are shells and which are holes, and then discarded.
-  typedef std::map<S2Loop*, std::vector<S2Loop*> > LoopMap;
+  typedef absl::flat_hash_map<S2Loop*, std::vector<S2Loop*> > LoopMap;
 
   void InsertLoop(S2Loop* new_loop, S2Loop* parent, LoopMap* loop_map);
   void InitLoops(LoopMap* loop_map);
@@ -859,9 +866,8 @@ class S2Polygon final : public S2Region {
       const S2Polyline& a) const;
 
   // Decode a polygon encoded with EncodeUncompressed().  Used by the Decode
-  // and DecodeWithinScope methods above.  The within_scope parameter
-  // specifies whether to call DecodeWithinScope on the loops.
-  bool DecodeUncompressed(Decoder* const decoder, bool within_scope);
+  // method above.
+  bool DecodeUncompressed(Decoder* const decoder);
 
   // Encode the polygon's vertices using about 4 bytes / vertex plus 24 bytes /
   // unsnapped vertex. All the loop vertices must be converted first to the
@@ -886,7 +892,7 @@ class S2Polygon final : public S2Region {
   // Defines a total ordering on S2Loops that does not depend on the cyclic
   // order of loop vertices.  This function is used to choose which loop to
   // invert in the case where several loops have exactly the same area.
-  static int CompareLoops(const S2Loop* a, const S2Loop* b);
+  static int CompareLoops(const S2Loop& a, const S2Loop& b);
 
   std::vector<std::unique_ptr<S2Loop> > loops_;
 
@@ -931,4 +937,49 @@ class S2Polygon final : public S2Region {
 #endif
 };
 
+
+//////////////////   Implementation details follow   ////////////////////
+
+
+ABSL_ATTRIBUTE_ALWAYS_INLINE
+inline S2Shape::Edge S2Polygon::Shape::chain_edge(int i, int j) const {
+  S2_DCHECK_LT(i, Shape::num_chains());
+  const S2Loop* loop = polygon_->loop(i);
+  S2_DCHECK_LT(j, loop->num_vertices());
+  return Edge(loop->oriented_vertex(j), loop->oriented_vertex(j + 1));
+}
+
+ABSL_ATTRIBUTE_ALWAYS_INLINE
+inline S2Shape::ChainPosition S2Polygon::Shape::chain_position(int e) const {
+  S2_DCHECK_LT(e, num_edges());
+  int i;
+  const uint32* start = loop_starts_.get();
+  if (start == nullptr) {
+    // When the number of loops is small, linear search is faster.  Most often
+    // there is exactly one loop and the code below executes zero times.
+    for (i = 0; e >= polygon_->loop(i)->num_vertices(); ++i) {
+      e -= polygon_->loop(i)->num_vertices();
+    }
+  } else {
+    i = prev_loop_.load(std::memory_order_relaxed);
+    if (static_cast<uint32_t>(e) >= start[i] &&
+        static_cast<uint32_t>(e) < start[i + 1]) {
+      // This edge belongs to the same loop as the previous call.
+    } else {
+      if (static_cast<uint32_t>(e) == start[i + 1]) {
+        // This edge immediately follows the loop from the previous call.
+        // Note that S2Polygon does not allow empty loops.
+        ++i;
+      } else {
+        // "upper_bound" finds the loop just beyond the one we want.
+        i = std::upper_bound(&start[1], &start[polygon_->num_loops()], e)
+            - &start[1];
+      }
+      prev_loop_.store(i, std::memory_order_relaxed);
+    }
+    e -= start[i];
+  }
+  return ChainPosition(i, e);
+}
+
 #endif  // S2_S2POLYGON_H_
diff --git a/src/s2/s2polyline.cc b/src/s2/s2polyline.cc
index acd02e52..4a245a98 100644
--- a/src/s2/s2polyline.cc
+++ b/src/s2/s2polyline.cc
@@ -17,37 +17,61 @@
 
 #include "s2/s2polyline.h"
 
+#include <cstddef>
+
 #include <algorithm>
+#include <array>
 #include <cmath>
-#include <functional>
-#include <set>
+#include <memory>
 #include <utility>
 #include <vector>
 
+#include "absl/container/fixed_array.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/flags/flag.h"
+#include "absl/types/span.h"
+#include "absl/utility/utility.h"
+
 #include "s2/base/commandlineflags.h"
+#include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
-#include "absl/utility/utility.h"
-#include "s2/util/coding/coder.h"
 #include "s2/s1angle.h"
 #include "s2/s1interval.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_layer.h"
+#include "s2/s2builderutil_s2polyline_layer.h"
+#include "s2/s2builderutil_snap_functions.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
+#include "s2/s2coder.h"
+#include "s2/s2coords.h"
 #include "s2/s2debug.h"
 #include "s2/s2edge_crosser.h"
 #include "s2/s2edge_distances.h"
 #include "s2/s2error.h"
+#include "s2/s2latlng.h"
+#include "s2/s2latlng_rect.h"
 #include "s2/s2latlng_rect_bounder.h"
+#include "s2/s2point.h"
+#include "s2/s2point_compression.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2polyline_measures.h"
 #include "s2/s2predicates.h"
+#include "s2/s2shape.h"
+#include "s2/util/coding/coder.h"
 #include "s2/util/math/matrix3x3.h"
 
+using absl::flat_hash_set;
+using absl::Span;
+using s2builderutil::S2CellIdSnapFunction;
+using s2builderutil::S2PolylineLayer;
+using std::make_unique;
 using std::max;
 using std::min;
-using std::set;
 using std::vector;
 
 static const unsigned char kCurrentLosslessEncodingVersionNumber = 1;
+static const unsigned char kCurrentCompressedEncodingVersionNumber = 2;
 
 S2Polyline::S2Polyline()
   : s2debug_override_(S2Debug::ALLOW) {}
@@ -65,26 +89,24 @@ S2Polyline& S2Polyline::operator=(S2Polyline&& other) {
   return *this;
 }
 
-S2Polyline::S2Polyline(const vector<S2Point>& vertices)
+S2Polyline::S2Polyline(Span<const S2Point> vertices)
   : S2Polyline(vertices, S2Debug::ALLOW) {}
 
-S2Polyline::S2Polyline(const vector<S2LatLng>& vertices)
+S2Polyline::S2Polyline(Span<const S2LatLng> vertices)
   : S2Polyline(vertices, S2Debug::ALLOW) {}
 
-S2Polyline::S2Polyline(const vector<S2Point>& vertices,
+S2Polyline::S2Polyline(Span<const S2Point> vertices,
                        S2Debug override)
   : s2debug_override_(override) {
   Init(vertices);
 }
 
-S2Polyline::S2Polyline(const vector<S2LatLng>& vertices,
-                       S2Debug override)
+S2Polyline::S2Polyline(Span<const S2LatLng> vertices, S2Debug override)
   : s2debug_override_(override) {
   Init(vertices);
 }
 
-S2Polyline::~S2Polyline() {
-}
+S2Polyline::~S2Polyline() = default;
 
 void S2Polyline::set_s2debug_override(S2Debug override) {
   s2debug_override_ = override;
@@ -94,30 +116,51 @@ S2Debug S2Polyline::s2debug_override() const {
   return s2debug_override_;
 }
 
-void S2Polyline::Init(const vector<S2Point>& vertices) {
+void S2Polyline::Init(Span<const S2Point> vertices) {
   num_vertices_ = vertices.size();
-  vertices_.reset(new S2Point[num_vertices_]);
+  vertices_ = make_unique<S2Point[]>(num_vertices_);
   std::copy(vertices.begin(), vertices.end(), &vertices_[0]);
-  if (FLAGS_s2debug && s2debug_override_ == S2Debug::ALLOW) {
+  if (absl::GetFlag(FLAGS_s2debug) && s2debug_override_ == S2Debug::ALLOW) {
     S2_CHECK(IsValid());
   }
 }
 
-void S2Polyline::Init(const vector<S2LatLng>& vertices) {
+void S2Polyline::Init(Span<const S2LatLng> vertices) {
   num_vertices_ = vertices.size();
-  vertices_.reset(new S2Point[num_vertices_]);
+  vertices_ = make_unique<S2Point[]>(num_vertices_);
   for (int i = 0; i < num_vertices_; ++i) {
     vertices_[i] = vertices[i].ToPoint();
   }
-  if (FLAGS_s2debug && s2debug_override_ == S2Debug::ALLOW) {
+  if (absl::GetFlag(FLAGS_s2debug) && s2debug_override_ == S2Debug::ALLOW) {
     S2_CHECK(IsValid());
   }
 }
 
+void S2Polyline::InitToSnapped(const S2Polyline& polyline, int snap_level) {
+  S2Builder builder{S2Builder::Options(S2CellIdSnapFunction(snap_level))};
+  InitFromBuilder(polyline, &builder);
+}
+
+void S2Polyline::InitToSimplified(
+    const S2Polyline& polyline, const S2Builder::SnapFunction& snap_function) {
+  S2Builder::Options options(snap_function);
+  options.set_simplify_edge_chains(true);
+  S2Builder builder(options);
+  InitFromBuilder(polyline, &builder);
+}
+
+void S2Polyline::InitFromBuilder(const S2Polyline& polyline,
+                                 S2Builder* builder) {
+  builder->StartLayer(make_unique<S2PolylineLayer>(this));
+  builder->AddPolyline(polyline);
+  S2Error error;
+  S2_CHECK(builder->Build(&error)) << "Could not build polyline: " << error;
+}
+
 bool S2Polyline::IsValid() const {
   S2Error error;
   if (FindValidationError(&error)) {
-    S2_LOG_IF(ERROR, FLAGS_s2debug) << error;
+    S2_LOG_IF(ERROR, absl::GetFlag(FLAGS_s2debug)) << error;
     return false;
   }
   return true;
@@ -150,7 +193,13 @@ bool S2Polyline::FindValidationError(S2Error* error) const {
 S2Polyline::S2Polyline(const S2Polyline& src)
   : num_vertices_(src.num_vertices_),
     vertices_(new S2Point[num_vertices_]) {
-  std::copy(&src.vertices_[0], &src.vertices_[num_vertices_], &vertices_[0]);
+  // TODO(user, ericv): Decide whether to use a canonical empty
+  // representation.
+  // If num_vertices_ == 0, then src.vertices_ will be null if src was default
+  // constructed.
+  if (num_vertices_ != 0) {
+    std::copy(&src.vertices_[0], &src.vertices_[num_vertices_], &vertices_[0]);
+  }
 }
 
 S2Polyline* S2Polyline::Clone() const {
@@ -158,11 +207,29 @@ S2Polyline* S2Polyline::Clone() const {
 }
 
 S1Angle S2Polyline::GetLength() const {
-  return S2::GetLength(S2PointSpan(&vertices_[0], num_vertices_));
+  return S2::GetLength(vertices_span());
 }
 
 S2Point S2Polyline::GetCentroid() const {
-  return S2::GetCentroid(S2PointSpan(&vertices_[0], num_vertices_));
+  return S2::GetCentroid(vertices_span());
+}
+
+int S2Polyline::GetSnapLevel() const {
+  int snap_level = -1;
+  for (int i = 0; i < num_vertices_; ++i) {
+    int face;
+    unsigned int si, ti;
+    const int level = S2::XYZtoFaceSiTi(vertices_[i], &face, &si, &ti);
+    if (level < 0) return level;  // Vertex is not a cell center.
+    if (level != snap_level) {
+      if (snap_level < 0) {
+        snap_level = level;  // First vertex.
+      } else {
+        return -1;  // Vertices at more than one cell level.
+      }
+    }
+  }
+  return snap_level;
 }
 
 S2Point S2Polyline::GetSuffix(double fraction, int* next_vertex) const {
@@ -184,8 +251,7 @@ S2Point S2Polyline::GetSuffix(double fraction, int* next_vertex) const {
     if (target < length) {
       // This interpolates with respect to arc length rather than
       // straight-line distance, and produces a unit-length result.
-      S2Point result = S2::InterpolateAtDistance(target, vertex(i-1),
-                                                         vertex(i));
+      S2Point result = S2::GetPointOnLine(vertex(i - 1), vertex(i), target);
       // It is possible that (result == vertex(i)) due to rounding errors.
       *next_vertex = (result == vertex(i)) ? (i + 1) : i;
       return result;
@@ -279,21 +345,20 @@ bool S2Polyline::IsOnRight(const S2Point& point) const {
   return s2pred::Sign(point, vertex(next_vertex), vertex(next_vertex - 1)) > 0;
 }
 
-bool S2Polyline::Intersects(const S2Polyline* line) const {
-  if (num_vertices() <= 0 || line->num_vertices() <= 0) {
+bool S2Polyline::Intersects(const S2Polyline& line) const {
+  if (num_vertices() <= 0 || line.num_vertices() <= 0) {
     return false;
   }
 
-  if (!GetRectBound().Intersects(line->GetRectBound())) {
+  if (!GetRectBound().Intersects(line.GetRectBound())) {
     return false;
   }
 
   // TODO(ericv): Use S2ShapeIndex here.
   for (int i = 1; i < num_vertices(); ++i) {
-    S2EdgeCrosser crosser(
-        &vertex(i - 1), &vertex(i), &line->vertex(0));
-    for (int j = 1; j < line->num_vertices(); ++j) {
-      if (crosser.CrossingSign(&line->vertex(j)) >= 0) {
+    S2EdgeCrosser crosser(&vertex(i - 1), &vertex(i), &line.vertex(0));
+    for (int j = 1; j < line.num_vertices(); ++j) {
+      if (crosser.CrossingSign(&line.vertex(j)) >= 0) {
         return true;
       }
     }
@@ -302,7 +367,9 @@ bool S2Polyline::Intersects(const S2Polyline* line) const {
 }
 
 void S2Polyline::Reverse() {
-  std::reverse(&vertices_[0], &vertices_[num_vertices_]);
+  if (num_vertices_ > 0) {
+    std::reverse(&vertices_[0], &vertices_[num_vertices_]);
+  }
 }
 
 S2LatLngRect S2Polyline::GetRectBound() const {
@@ -343,32 +410,144 @@ bool S2Polyline::MayIntersect(const S2Cell& cell) const {
   return false;
 }
 
-void S2Polyline::Encode(Encoder* const encoder) const {
+void S2Polyline::Encode(Encoder* const encoder,
+                        s2coding::CodingHint hint) const {
+  if (hint == s2coding::CodingHint::COMPACT) {
+    EncodeMostCompact(encoder);
+    return;
+  }
+
+  EncodeUncompressed(encoder);
+}
+
+void S2Polyline::EncodeUncompressed(Encoder* const encoder) const {
   encoder->Ensure(num_vertices_ * sizeof(vertices_[0]) + 10);  // sufficient
 
   encoder->put8(kCurrentLosslessEncodingVersionNumber);
   encoder->put32(num_vertices_);
-  encoder->putn(&vertices_[0], sizeof(vertices_[0]) * num_vertices_);
+  encoder->putn(vertices_.get(), sizeof(vertices_[0]) * num_vertices_);
 
   S2_DCHECK_GE(encoder->avail(), 0);
 }
 
 bool S2Polyline::Decode(Decoder* const decoder) {
-  if (decoder->avail() < sizeof(unsigned char) + sizeof(uint32)) return false;
-  unsigned char version = decoder->get8();
-  if (version > kCurrentLosslessEncodingVersionNumber) return false;
+  if (decoder->avail() < sizeof(unsigned char)) {
+    return false;
+  }
+  const unsigned char version = decoder->get8();
+  switch (version) {
+    case kCurrentLosslessEncodingVersionNumber:
+      return DecodeUncompressed(decoder);
+    case kCurrentCompressedEncodingVersionNumber:
+      return DecodeCompressed(decoder);
+  }
+  return false;
+}
 
+bool S2Polyline::DecodeUncompressed(Decoder* const decoder) {
+  if (decoder->avail() < sizeof(uint32)) {
+    return false;
+  }
   num_vertices_ = decoder->get32();
-  vertices_.reset(new S2Point[num_vertices_]);
-  if (decoder->avail() < num_vertices_ * sizeof(vertices_[0])) return false;
+
+  // Check the bytes available before allocating memory in case of
+  // corrupt/malicious input.
+  if (decoder->avail() < num_vertices_ * sizeof(S2Point)) return false;
+  vertices_ = make_unique<S2Point[]>(num_vertices_);
   decoder->getn(&vertices_[0], num_vertices_ * sizeof(vertices_[0]));
 
-  if (FLAGS_s2debug && s2debug_override_ == S2Debug::ALLOW) {
+  if (absl::GetFlag(FLAGS_s2debug) && s2debug_override_ == S2Debug::ALLOW) {
     S2_CHECK(IsValid());
   }
   return true;
 }
 
+void S2Polyline::EncodeMostCompact(Encoder* encoder) const {
+  if (num_vertices_ == 0) {
+    EncodeCompressed(encoder, {}, S2::kMaxCellLevel);
+    return;
+  }
+  // Convert S2Points to (face, si, ti) representation.
+  absl::FixedArray<S2XYZFaceSiTi> all_vertices(num_vertices_);
+  for (int i = 0; i < num_vertices_; ++i) {
+    all_vertices[i].xyz = vertices_[i];
+    all_vertices[i].cell_level =
+        S2::XYZtoFaceSiTi(all_vertices[i].xyz, &all_vertices[i].face,
+                          &all_vertices[i].si, &all_vertices[i].ti);
+  }
+
+  // Computes a histogram of the cell levels at which the vertices are snapped.
+  // cell_level is -1 for unsnapped, or 0 through kMaxCellLevel if snapped,
+  // so we add one to it to get a non-negative index.  (histogram[0] is the
+  // number of unsnapped vertices, histogram[i] the number of vertices
+  // snapped at level i-1).
+  std::array<int, S2::kMaxCellLevel + 2> histogram;
+  histogram.fill(0);
+  for (int i = 0; i < num_vertices_; ++i) {
+    histogram[all_vertices[i].cell_level + 1] += 1;
+  }
+  // Compute the level at which most of the vertices are snapped.
+  // If multiple levels have the same maximum number of vertices
+  // snapped to it, the first one (lowest level number / largest
+  // area / smallest encoding length) will be chosen, so this
+  // is desired.  Start with histogram[1] since histogram[0] is
+  // the number of unsnapped vertices, which we don't care about.
+  const auto max_iter =
+      std::max_element(histogram.begin() + 1, histogram.end());
+  // snap_level will be at position histogram[snap_level + 1], see above.
+  const int snap_level = max_iter - (histogram.begin() + 1);
+  const int num_snapped = *max_iter;
+
+  // The compressed encoding requires approximately 4 bytes per vertex plus
+  // "exact_point_size" for each unsnapped vertex (encoded as an S2Point plus
+  // the index at which it is located).
+  int exact_point_size = sizeof(S2Point) + 2;
+  int num_unsnapped = num_vertices_ - num_snapped;
+  int compressed_size = 4 * num_vertices_ + exact_point_size * num_unsnapped;
+  int lossless_size = sizeof(S2Point) * num_vertices_;
+  if (compressed_size < lossless_size) {
+    EncodeCompressed(encoder, all_vertices, snap_level);
+  } else {
+    EncodeUncompressed(encoder);
+  }
+}
+
+void S2Polyline::EncodeCompressed(Encoder* encoder,
+                                  absl::Span<const S2XYZFaceSiTi> all_vertices,
+                                  int snap_level) const {
+  // Set version number.
+  encoder->Ensure(2 + Encoder::kVarintMax32);
+  encoder->put8(kCurrentCompressedEncodingVersionNumber);
+  encoder->put8(snap_level);
+  encoder->put_varint32(num_vertices_);
+  S2EncodePointsCompressed(all_vertices, snap_level, encoder);
+}
+
+bool S2Polyline::DecodeCompressed(Decoder* decoder) {
+  if (decoder->avail() < sizeof(uint8)) return false;
+  const int snap_level = decoder->get8();
+  if (snap_level > S2::kMaxCellLevel) return false;
+
+  vector<S2Point> points;
+  uint32 num_vertices;
+  if (!decoder->get_varint32(&num_vertices)) return false;
+  if (num_vertices == 0) {
+    // Empty polylines are allowed.
+    Init(points);
+    return true;
+  }
+
+  // TODO(b/209937354): Prevent large allocations like in DecodeUncompressed.
+  // This is more complicated due to the compressed encoding, but perhaps the
+  // minimum required size can be bounded.
+  points.resize(num_vertices);
+  if (!S2DecodePointsCompressed(decoder, snap_level, absl::MakeSpan(points))) {
+    return false;
+  }
+  Init(points);
+  return true;
+}
+
 namespace {
 
 // Given a polyline, a tolerance distance, and a start index, this function
@@ -468,10 +647,10 @@ void S2Polyline::SubsampleVertices(S1Angle tolerance,
   }
 }
 
-bool S2Polyline::Equals(const S2Polyline* b) const {
-  if (num_vertices() != b->num_vertices()) return false;
+bool S2Polyline::Equals(const S2Polyline& b) const {
+  if (num_vertices() != b.num_vertices()) return false;
   for (int offset = 0; offset < num_vertices(); ++offset) {
-    if (vertex(offset) != b->vertex(offset)) return false;
+    if (vertex(offset) != b.vertex(offset)) return false;
   }
   return true;
 }
@@ -508,21 +687,20 @@ struct SearchState {
   inline SearchState(int i_val, int j_val, bool i_in_progress_val)
       : i(i_val), j(j_val), i_in_progress(i_in_progress_val) {}
 
+  friend bool operator==(const SearchState& a, const SearchState& b) {
+    return a.i == b.i && a.j == b.j && a.i_in_progress == b.i_in_progress;
+  }
+
+  template <typename H>
+  friend H AbslHashValue(H h, const SearchState& s) {
+    return H::combine(std::move(h), s.i, s.j, s.i_in_progress);
+  }
+
   int i;
   int j;
   bool i_in_progress;
 };
 
-// This operator is needed for storing SearchStates in a set.  The ordering
-// chosen has no special meaning.
-struct SearchStateKeyCompare {
-  bool operator() (const SearchState& a, const SearchState& b) const {
-    if (a.i != b.i) return a.i < b.i;
-    if (a.j != b.j) return a.j < b.j;
-    return a.i_in_progress < b.i_in_progress;
-  }
-};
-
 }  // namespace
 
 bool S2Polyline::NearlyCovers(const S2Polyline& covered,
@@ -575,7 +753,7 @@ bool S2Polyline::NearlyCovers(const S2Polyline& covered,
   if (this->num_vertices() == 0) return false;
 
   vector<SearchState> pending;
-  set<SearchState, SearchStateKeyCompare> done;
+  flat_hash_set<SearchState> done;
 
   // Find all possible starting states.
   for (int i = 0, next_i = NextDistinctVertex(*this, 0), next_next_i;
diff --git a/src/s2/s2polyline.h b/src/s2/s2polyline.h
index 4a8eb115..358a0090 100644
--- a/src/s2/s2polyline.h
+++ b/src/s2/s2polyline.h
@@ -18,19 +18,31 @@
 #ifndef S2_S2POLYLINE_H_
 #define S2_S2POLYLINE_H_
 
+#include <cstddef>
+
+#include <algorithm>
 #include <memory>
+#include <utility>
 #include <vector>
 
+#include "absl/base/macros.h"
+#include "absl/types/span.h"
+
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s1angle.h"
+#include "s2/s2builder.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2coder.h"
 #include "s2/s2debug.h"
 #include "s2/s2error.h"
+#include "s2/s2latlng.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2point.h"
+#include "s2/s2point_span.h"
 #include "s2/s2region.h"
 #include "s2/s2shape.h"
-#include "absl/base/macros.h"
-#include "absl/memory/memory.h"
+#include "s2/util/coding/coder.h"
 
 class Decoder;
 class Encoder;
@@ -38,12 +50,16 @@ class S1Angle;
 class S2Cap;
 class S2Cell;
 class S2LatLng;
+class S2PolylineCoder;
+struct S2XYZFaceSiTi;
 
 // An S2Polyline represents a sequence of zero or more vertices connected by
 // straight edges (geodesics).  Edges of length 0 and 180 degrees are not
 // allowed, i.e. adjacent vertices should not be identical or antipodal.
 class S2Polyline final : public S2Region {
  public:
+  typedef s2coding::internal::S2LegacyHintCoder<S2Polyline> Coder;
+
   // Creates an empty S2Polyline that should be initialized by calling Init()
   // or Decode().
   S2Polyline();
@@ -55,8 +71,8 @@ class S2Polyline final : public S2Region {
 #endif  // SWIG
 
   // Convenience constructors that call Init() with the given vertices.
-  explicit S2Polyline(const std::vector<S2Point>& vertices);
-  explicit S2Polyline(const std::vector<S2LatLng>& vertices);
+  explicit S2Polyline(absl::Span<const S2Point> vertices);
+  explicit S2Polyline(absl::Span<const S2LatLng> vertices);
 
   // Convenience constructors to disable the automatic validity checking
   // controlled by the --s2debug flag.  Example:
@@ -71,17 +87,17 @@ class S2Polyline final : public S2Region {
   //
   // The main reason to use this constructors is if you intend to call
   // IsValid() explicitly.  See set_s2debug_override() for details.
-  S2Polyline(const std::vector<S2Point>& vertices, S2Debug override);
-  S2Polyline(const std::vector<S2LatLng>& vertices, S2Debug override);
+  S2Polyline(absl::Span<const S2Point> vertices, S2Debug override);
+  S2Polyline(absl::Span<const S2LatLng> vertices, S2Debug override);
 
   // Initialize a polyline that connects the given vertices. Empty polylines are
   // allowed.  Adjacent vertices should not be identical or antipodal.  All
   // vertices should be unit length.
-  void Init(const std::vector<S2Point>& vertices);
+  void Init(absl::Span<const S2Point> vertices);
 
   // Convenience initialization function that accepts latitude-longitude
   // coordinates rather than S2Points.
-  void Init(const std::vector<S2LatLng>& vertices);
+  void Init(absl::Span<const S2LatLng> vertices);
 
   ~S2Polyline() override;
 
@@ -100,6 +116,22 @@ class S2Polyline final : public S2Region {
   void set_s2debug_override(S2Debug override);
   S2Debug s2debug_override() const;
 
+  // Convenience function that snaps the vertices to S2CellId centers at the
+  // given level (default level 30, which has S2CellId centers spaced about 1
+  // centimeter apart)
+  void InitToSnapped(const S2Polyline& polyline,
+                     int snap_level = S2CellId::kMaxLevel);
+
+  // Snaps the input polyline according to the given "snap_function" and
+  // reduces the number of vertices if possible, while ensuring that no vertex
+  // moves further than snap_function.snap_radius().
+  //
+  // Simplification works by replacing nearly straight chains of short edges
+  // with longer edges, in a way that preserves the topology of the input
+  // polyline up to the creation of degeneracies.
+  void InitToSimplified(const S2Polyline& polyline,
+                        const S2Builder::SnapFunction& snap_function);
+
   // Return true if the given vertices form a valid polyline.
   bool IsValid() const;
 
@@ -116,6 +148,11 @@ class S2Polyline final : public S2Region {
     return vertices_[k];
   }
 
+  // Returns an S2PointSpan containing the polyline's vertices.
+  S2PointSpan vertices_span() const {
+    return S2PointSpan(vertices_.get(), num_vertices_);
+  }
+
   // Return the length of the polyline.
   S1Angle GetLength() const;
 
@@ -127,9 +164,15 @@ class S2Polyline final : public S2Region {
   // of several polylines (by simply adding up their centroids).
   S2Point GetCentroid() const;
 
+  // If all of the polyline's vertices happen to be the centers of S2Cells at
+  // some level, then returns that level, otherwise returns -1.  See also
+  // InitToSnapped() and s2builderutil::S2CellIdSnapFunction.
+  // Returns -1 if the polyline has no vertices.
+  int GetSnapLevel() const;
+
   // Return the point whose distance from vertex 0 along the polyline is the
   // given fraction of the polyline's total length.  Fractions less than zero
-  // or greater than one are clamped.  The return value is unit length.  This
+  // or greater than one are clamped.  The return value is unit length.  The
   // cost of this function is currently linear in the number of vertices.
   // The polyline must not be empty.
   S2Point Interpolate(double fraction) const;
@@ -155,7 +198,7 @@ class S2Polyline final : public S2Region {
   // The inverse operation of GetSuffix/Interpolate.  Given a point on the
   // polyline, returns the ratio of the distance to the point from the
   // beginning of the polyline over the length of the polyline.  The return
-  // value is always betwen 0 and 1 inclusive.  See GetSuffix() for the
+  // value is always between 0 and 1 inclusive.  See GetSuffix() for the
   // meaning of "next_vertex".
   //
   // The polyline should not be empty.  If it has fewer than 2 vertices, the
@@ -186,7 +229,7 @@ class S2Polyline final : public S2Region {
   // The running time is quadratic in the number of vertices.  (To intersect
   // polylines more efficiently, or compute the actual intersection geometry,
   // use S2BooleanOperation.)
-  bool Intersects(const S2Polyline* line) const;
+  bool Intersects(const S2Polyline& line) const;
 
   // Reverse the order of the polyline vertices.
   void Reverse();
@@ -221,7 +264,7 @@ class S2Polyline final : public S2Region {
   void SubsampleVertices(S1Angle tolerance, std::vector<int>* indices) const;
 
   // Return true if two polylines are exactly the same.
-  bool Equals(const S2Polyline* b) const;
+  bool Equals(const S2Polyline& b) const;
 
   // Return true if two polylines have the same number of vertices, and
   // corresponding vertex pairs are separated by no more than "max_error".
@@ -262,12 +305,26 @@ class S2Polyline final : public S2Region {
   bool Contains(const S2Point& p) const override { return false; }
 
   // Appends a serialized representation of the S2Polyline to "encoder".
+  // Currently just calls EncodeUncompressed().
+  //
+  // TODO(b/128865764): After transition period, replace it with
+  // EncodeMostCompact.
   //
   // REQUIRES: "encoder" uses the default constructor, so that its buffer
   //           can be enlarged as necessary by calling Ensure(int).
-  void Encode(Encoder* const encoder) const;
+  void Encode(Encoder* const encoder,
+              s2coding::CodingHint hint = s2coding::CodingHint::FAST) const;
+
+  // Appends a serialized uncompressed representation of the S2Polyline to
+  // "encoder".
+  void EncodeUncompressed(Encoder* encoder) const;
 
-  // Decodes an S2Polyline encoded with Encode().  Returns true on success.
+  // Encode the polylines's vertices using the most compact way: compressed or
+  // uncompressed.
+  void EncodeMostCompact(Encoder* encoder) const;
+
+  // Decodes an S2Polyline encoded with any of Encode*() methods. Returns true
+  // on success.
   bool Decode(Decoder* const decoder);
 
 #ifndef SWIG
@@ -279,7 +336,10 @@ class S2Polyline final : public S2Region {
   // data (see S2Shape for details).
   class Shape : public S2Shape {
    public:
-    static constexpr TypeTag kTypeTag = 2;
+    // Define as enum so we don't have to declare storage.
+    // TODO(user, b/210097200): Use static constexpr when C++17 is
+    // allowed in opensource.
+    enum : TypeTag { kTypeTag = 2 };
 
     Shape() : polyline_(nullptr) {}  // Must call Init().
 
@@ -293,14 +353,6 @@ class S2Polyline final : public S2Region {
 
     const S2Polyline* polyline() const { return polyline_; }
 
-    // Encodes the polyline using S2Polyline::Encode().
-    void Encode(Encoder* encoder) const {
-      // TODO(geometry-library): Support compressed encodings.
-      polyline_->Encode(encoder);
-    }
-
-    // Decoding is defined only for S2Polyline::OwningShape below.
-
     // S2Shape interface:
 
     int num_edges() const final {
@@ -324,6 +376,15 @@ class S2Polyline final : public S2Region {
     }
     TypeTag type_tag() const override { return kTypeTag; }
 
+    void Encode(Encoder* encoder, s2coding::CodingHint hint) const override {
+      if (hint == s2coding::CodingHint::FAST) {
+        polyline_->EncodeUncompressed(encoder);
+      } else {
+        polyline_->EncodeMostCompact(encoder);
+      }
+    }
+    // Decoding is defined only for S2Polyline::OwningShape below.
+
    private:
     const S2Polyline* polyline_;
   };
@@ -333,7 +394,7 @@ class S2Polyline final : public S2Region {
   // is constructed solely for the purpose of indexing it.
   class OwningShape : public Shape {
    public:
-    OwningShape() {}  // Must call Init().
+    OwningShape() = default;  // Must call Init().
 
     explicit OwningShape(std::unique_ptr<const S2Polyline> polyline)
         : Shape(polyline.get()), owned_polyline_(std::move(polyline)) {}
@@ -344,7 +405,7 @@ class S2Polyline final : public S2Region {
     }
 
     bool Init(Decoder* decoder) {
-      auto polyline = absl::make_unique<S2Polyline>();
+      auto polyline = std::make_unique<S2Polyline>();
       if (!polyline->Decode(decoder)) return false;
       Shape::Init(polyline.get());
       owned_polyline_ = std::move(polyline);
@@ -361,6 +422,22 @@ class S2Polyline final : public S2Region {
   // its argument.
   S2Polyline(const S2Polyline& src);
 
+  // Initializes this polyline from input polyline using the given S2Builder.
+  void InitFromBuilder(const S2Polyline& polyline, S2Builder* builder);
+
+  // Encode the polylines's vertices using S2EncodePointsCompressed().
+  void EncodeCompressed(Encoder* encoder,
+                        absl::Span<const S2XYZFaceSiTi> all_vertices,
+                        int snap_level) const;
+
+  // Decode a polyline encoded with EncodeUncompressed(). Used by the Decode
+  // method above.
+  bool DecodeUncompressed(Decoder* const decoder);
+
+  // Decodes a polyline encoded with EncodeCompressed(). Used by the Decode
+  // method above.
+  bool DecodeCompressed(Decoder* decoder);
+
   // Allows overriding the automatic validity checking controlled by the
   // --s2debug flag.
   S2Debug s2debug_override_;
diff --git a/src/s2/s2polyline_alignment.cc b/src/s2/s2polyline_alignment.cc
index c331dabd..bd3c4592 100644
--- a/src/s2/s2polyline_alignment.cc
+++ b/src/s2/s2polyline_alignment.cc
@@ -15,22 +15,28 @@
 
 
 #include "s2/s2polyline_alignment.h"
-#include "s2/s2polyline_alignment_internal.h"
 
 #include <algorithm>
-#include <numeric>
+#include <cmath>
+#include <memory>
+#include <ostream>
 #include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
 
-#include "s2/base/logging.h"
-#include "absl/memory/memory.h"
-#include "s2/util/math/mathutil.h"
+#include "s2/s2point.h"
+#include "s2/s2polyline.h"
+#include "s2/s2polyline_alignment_internal.h"
+
+using std::make_unique;
+using std::string;
+using std::unique_ptr;
+using std::vector;
 
 namespace s2polyline_alignment {
 
-Window::Window(const std::vector<ColumnStride>& strides) {
+Window::Window(const vector<ColumnStride>& strides) {
   S2_DCHECK(!strides.empty()) << "Cannot construct empty window.";
   S2_DCHECK(strides[0].start == 0) << "First element of start_cols is non-zero.";
   strides_ = strides;
@@ -71,7 +77,7 @@ Window Window::Upsample(const int new_rows, const int new_cols) const {
   S2_DCHECK(new_cols >= cols_) << "Upsampling: New_cols < current_cols";
   const double row_scale = static_cast<double>(new_rows) / rows_;
   const double col_scale = static_cast<double>(new_cols) / cols_;
-  std::vector<ColumnStride> new_strides(new_rows);
+  vector<ColumnStride> new_strides(new_rows);
   ColumnStride from_stride;
   for (int row = 0; row < new_rows; ++row) {
     from_stride = strides_[static_cast<int>((row + 0.5) / row_scale)];
@@ -90,7 +96,7 @@ Window Window::Upsample(const int new_rows, const int new_cols) const {
 // feels unnecessary to combine them.
 Window Window::Dilate(const int radius) const {
   S2_DCHECK(radius >= 0) << "Negative dilation radius.";
-  std::vector<ColumnStride> new_strides(rows_);
+  vector<ColumnStride> new_strides(rows_);
   int prev_row, next_row;
   for (int row = 0; row < rows_; ++row) {
     prev_row = std::max(0, row - radius);
@@ -102,7 +108,7 @@ Window Window::Dilate(const int radius) const {
 }
 
 // Debug string implemented primarily for testing purposes.
-std::string Window::DebugString() const {
+string Window::DebugString() const {
   std::stringstream buffer;
   for (int row = 0; row < rows_; ++row) {
     for (int col = 0; col < cols_; ++col) {
@@ -177,18 +183,22 @@ VertexAlignment DynamicTimewarp(const S2Polyline& a, const S2Polyline& b,
                                 const Window& w) {
   const int rows = a.num_vertices();
   const int cols = b.num_vertices();
-  auto costs = CostTable(rows, std::vector<double>(cols));
+  auto costs = CostTable(rows, vector<double>(cols));
 
   ColumnStride curr;
   ColumnStride prev = ColumnStride::All();
   for (int row = 0; row < rows; ++row) {
     curr = w.GetColumnStride(row);
     for (int col = curr.start; col < curr.end; ++col) {
+      // The total cost up to (row,col) is the minimum of the cost up, down,
+      // left and the distance between the points in row and col. We use
+      // the distance between the points, as we are trying to minimize the
+      // distance between the two polylines.
       double d_cost = BoundsCheckedTableCost(row - 1, col - 1, prev, costs);
       double u_cost = BoundsCheckedTableCost(row - 1, col - 0, prev, costs);
       double l_cost = BoundsCheckedTableCost(row - 0, col - 1, curr, costs);
       costs[row][col] = std::min({d_cost, u_cost, l_cost}) +
-                        (a.vertex(row) - b.vertex(col)).Norm2();
+                        (a.vertex(row) - b.vertex(col)).Norm();
     }
     prev = curr;
   }
@@ -230,14 +240,14 @@ VertexAlignment DynamicTimewarp(const S2Polyline& a, const S2Polyline& b,
   return VertexAlignment(costs.back().back(), warp_path);
 }
 
-std::unique_ptr<S2Polyline> HalfResolution(const S2Polyline& in) {
+unique_ptr<S2Polyline> HalfResolution(const S2Polyline& in) {
   const int n = in.num_vertices();
-  std::vector<S2Point> vertices;
+  vector<S2Point> vertices;
   vertices.reserve(n / 2);
   for (int i = 0; i < n; i += 2) {
     vertices.push_back(in.vertex(i));
   }
-  return absl::make_unique<S2Polyline>(vertices);
+  return make_unique<S2Polyline>(vertices);
 }
 
 // Helper methods for GetMedoidPolyline and GetConsensusPolyline to auto-select
@@ -262,13 +272,13 @@ double GetExactVertexAlignmentCost(const S2Polyline& a, const S2Polyline& b) {
   const int b_n = b.num_vertices();
   S2_CHECK(a_n > 0) << "A is empty polyline.";
   S2_CHECK(b_n > 0) << "B is empty polyline.";
-  std::vector<double> cost(b_n, DOUBLE_MAX);
+  vector<double> cost(b_n, DOUBLE_MAX);
   double left_diag_min_cost = 0;
   for (int row = 0; row < a_n; ++row) {
     for (int col = 0; col < b_n; ++col) {
       double up_cost = cost[col];
       cost[col] = std::min(left_diag_min_cost, up_cost) +
-                  (a.vertex(row) - b.vertex(col)).Norm2();
+                  (a.vertex(row) - b.vertex(col)).Norm();
       left_diag_min_cost = std::min(cost[col], up_cost);
     }
     left_diag_min_cost = DOUBLE_MAX;
@@ -282,7 +292,7 @@ VertexAlignment GetExactVertexAlignment(const S2Polyline& a,
   const int b_n = b.num_vertices();
   S2_CHECK(a_n > 0) << "A is empty polyline.";
   S2_CHECK(b_n > 0) << "B is empty polyline.";
-  const auto w = Window(std::vector<ColumnStride>(a_n, {0, b_n}));
+  const auto w = Window(vector<ColumnStride>(a_n, {0, b_n}));
   return DynamicTimewarp(a, b, w);
 }
 
@@ -334,14 +344,14 @@ VertexAlignment GetApproxVertexAlignment(const S2Polyline& a,
 // alignments. Specifically, because cost_fn(a, b) = cost_fn(b, a), and
 // cost_fn(a, a) = 0, we can compute only the lower triangle of cost matrix
 // and then mirror it across the diagonal to save on cost_fn invocations.
-int GetMedoidPolyline(const std::vector<std::unique_ptr<S2Polyline>>& polylines,
+int GetMedoidPolyline(const vector<unique_ptr<S2Polyline>>& polylines,
                       const MedoidOptions options) {
   const int num_polylines = polylines.size();
   const bool approx = options.approx();
   S2_CHECK_GT(num_polylines, 0);
 
   // costs[i] stores total cost of aligning [i] with all other polylines.
-  std::vector<double> costs(num_polylines, 0.0);
+  vector<double> costs(num_polylines, 0.0);
   for (int i = 0; i < num_polylines; ++i) {
     for (int j = i + 1; j < num_polylines; ++j) {
       double cost = CostFn(*polylines[i], *polylines[j], approx);
@@ -371,8 +381,8 @@ int GetMedoidPolyline(const std::vector<std::unique_ptr<S2Polyline>>& polylines,
 //
 //  This algorithm takes O(iteration_cap * num_polylines) pairwise alignments.
 
-std::unique_ptr<S2Polyline> GetConsensusPolyline(
-    const std::vector<std::unique_ptr<S2Polyline>>& polylines,
+unique_ptr<S2Polyline> GetConsensusPolyline(
+    const vector<unique_ptr<S2Polyline>>& polylines,
     const ConsensusOptions options) {
   const int num_polylines = polylines.size();
   S2_CHECK_GT(num_polylines, 0);
@@ -386,14 +396,14 @@ std::unique_ptr<S2Polyline> GetConsensusPolyline(
     medoid_options.set_approx(approx);
     seed_index = GetMedoidPolyline(polylines, medoid_options);
   }
-  auto consensus = std::unique_ptr<S2Polyline>(polylines[seed_index]->Clone());
+  auto consensus = unique_ptr<S2Polyline>(polylines[seed_index]->Clone());
   const int num_consensus_vertices = consensus->num_vertices();
   S2_DCHECK_GT(num_consensus_vertices, 1);
 
   bool converged = false;
   int iterations = 0;
   while (!converged && iterations < options.iteration_cap()) {
-    std::vector<S2Point> points(num_consensus_vertices, S2Point());
+    vector<S2Point> points(num_consensus_vertices, S2Point());
     for (const auto& polyline : polylines) {
       const auto alignment = AlignmentFn(*consensus, *polyline, approx);
       for (const auto& pair : alignment.warp_path) {
@@ -405,7 +415,7 @@ std::unique_ptr<S2Polyline> GetConsensusPolyline(
     }
 
     ++iterations;
-    auto new_consensus = absl::make_unique<S2Polyline>(points);
+    auto new_consensus = make_unique<S2Polyline>(points);
     converged = new_consensus->ApproxEquals(*consensus);
     consensus = std::move(new_consensus);
   }
diff --git a/src/s2/s2polyline_alignment.h b/src/s2/s2polyline_alignment.h
index ede3ca20..32ac0496 100644
--- a/src/s2/s2polyline_alignment.h
+++ b/src/s2/s2polyline_alignment.h
@@ -18,6 +18,7 @@
 #define S2_S2POLYLINE_ALIGNMENT_H_
 
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "s2/s2polyline.h"
@@ -50,24 +51,24 @@
 // a = [(1, 0), (5, 0), (6, 0), (9, 0)] and
 // b = [(2, 0), (7, 0), (8, 0)].
 //
-// The "cost matrix" between these two polylines (using squared chordal
-// distance, .Norm2(), as our distance function) looks like this:
+// The "cost matrix" between these two polylines (using chordal
+// distance, .Norm(), as our distance function) looks like this:
 //
 //        (2, 0)  (7, 0)  (8, 0)
-// (1, 0)     1      36      49
-// (5, 0)     9       4       9
-// (6, 0)    16       1       4
-// (9, 0)    49       4       1
+// (1, 0)     1       6       7
+// (5, 0)     3       2       3
+// (6, 0)     4       1       2
+// (9, 0)     7       2       1
 //
 // The Dynamic Timewarp DP table for this cost matrix has cells defined by
 //
 // table[i][j] = cost(i,j) + min(table[i-1][j-1], table[i][j-1], table[i-1, j])
 //
 //        (2, 0)  (7, 0)  (8, 0)
-// (1, 0)     1      37      86
-// (5, 0)    10       5      14
-// (6, 0)    26       6       9
-// (9, 0)    75      10       7
+// (1, 0)     1       7      14
+// (5, 0)     4       3       7
+// (6, 0)     8       4       6
+// (9, 0)    15       6       5
 //
 // Starting at the bottom right corner of the DP table, we can work our way
 // backwards to the upper left corner  to recover the reverse of the warp path:
@@ -96,13 +97,13 @@ typedef std::vector<std::pair<int, int>> WarpPath;
 struct VertexAlignment {
   // `alignment_cost` represents the sum of the squared chordal distances
   // between each pair of vertices in the warp path. Specifically,
-  // cost = sum_{(i, j) \in path} (a.vertex(i) - b.vertex(j)).Norm2();
-  // This means that the units of alignment_cost are "squared distance". This is
+  // cost = sum_{(i, j) \in path} (a.vertex(i) - b.vertex(j)).Norm();
+  // This means that the units of alignment_cost are distance. This is
   // an optimization to avoid the (expensive) atan computation of the true
-  // spherical angular distance between the points, as well as an unnecessary
-  // square root. All we need to compute vertex alignment is a metric that
-  // satisifies the triangle inequality, and squared chordal distance works as
-  // well as spherical S1Angle distance for this purpose.
+  // spherical angular distance between the points. All we need to compute
+  // vertex alignment is a metric that satisfies the triangle inequality, and
+  // chordal distance works as well as spherical S1Angle distance for
+  // this purpose.
   double alignment_cost;
 
   // Each entry (i, j) of `warp_path` represents a pairing between vertex
diff --git a/src/s2/s2polyline_alignment_internal.h b/src/s2/s2polyline_alignment_internal.h
index ea7bdd46..c940cf3d 100644
--- a/src/s2/s2polyline_alignment_internal.h
+++ b/src/s2/s2polyline_alignment_internal.h
@@ -17,16 +17,17 @@
 #ifndef S2_S2POLYLINE_ALIGNMENT_INTERNAL_H_
 #define S2_S2POLYLINE_ALIGNMENT_INTERNAL_H_
 
-#include "s2/s2polyline_alignment.h"
-
 #include <limits>
+#include <memory>
+#include <string>
 #include <vector>
 
 #include "s2/s2polyline.h"
+#include "s2/s2polyline_alignment.h"
 
 namespace s2polyline_alignment {
 
-static constexpr double DOUBLE_MAX = std::numeric_limits<double>::max();
+constexpr double DOUBLE_MAX = std::numeric_limits<double>::max();
 
 // Alias for a 2d Dynamic Programming table.
 typedef std::vector<std::vector<double>> CostTable;
diff --git a/src/s2/s2polyline_measures.cc b/src/s2/s2polyline_measures.cc
index 4d66f158..33b2c071 100644
--- a/src/s2/s2polyline_measures.cc
+++ b/src/s2/s2polyline_measures.cc
@@ -17,15 +17,17 @@
 
 #include "s2/s2polyline_measures.h"
 
-#include <cmath>
-#include "s2/base/logging.h"
+#include "absl/types/span.h"
+#include "s2/s1angle.h"
 #include "s2/s2centroids.h"
+#include "s2/s2point.h"
+#include "s2/s2point_span.h"
 
 namespace S2 {
 
 S1Angle GetLength(S2PointSpan polyline) {
   S1Angle length;
-  for (int i = 1; i < polyline.size(); ++i) {
+  for (size_t i = 1; i < polyline.size(); ++i) {
     length += S1Angle(polyline[i - 1], polyline[i]);
   }
   return length;
@@ -33,7 +35,7 @@ S1Angle GetLength(S2PointSpan polyline) {
 
 S2Point GetCentroid(S2PointSpan polyline) {
   S2Point centroid;
-  for (int i = 1; i < polyline.size(); ++i) {
+  for (size_t i = 1; i < polyline.size(); ++i) {
     centroid += S2::TrueCentroid(polyline[i - 1], polyline[i]);
   }
   return centroid;
diff --git a/src/s2/s2polyline_simplifier.cc b/src/s2/s2polyline_simplifier.cc
index 7a16e2b6..b784ef58 100644
--- a/src/s2/s2polyline_simplifier.cc
+++ b/src/s2/s2polyline_simplifier.cc
@@ -17,14 +17,19 @@
 
 #include "s2/s2polyline_simplifier.h"
 
+#include <cmath>
+
 #include <cfloat>
+#include <vector>
 
 #include "s2/s1chord_angle.h"
 #include "s2/s1interval.h"
+#include "s2/s2point.h"
 
 void S2PolylineSimplifier::Init(const S2Point& src) {
   src_ = src;
   window_ = S1Interval::Full();
+  ranges_to_avoid_.clear();
 
   // Precompute basis vectors for the tangent space at "src".  This is similar
   // to GetFrame() except that we don't normalize the vectors.  As it turns
@@ -60,7 +65,14 @@ bool S2PolylineSimplifier::Extend(const S2Point& dst) const {
   if (S1ChordAngle(src_, dst) > S1ChordAngle::Right()) return false;
 
   // Otherwise check whether this vertex is in the acceptable angle range.
-  return window_.Contains(GetAngle(dst));
+  double dir = GetDirection(dst);
+  if (!window_.Contains(dir)) return false;
+
+  // Also check any angles ranges to avoid that have not been processed yet.
+  for (const auto& range : ranges_to_avoid_) {
+    if (range.interval.Contains(dir)) return false;
+  }
+  return true;
 }
 
 bool S2PolylineSimplifier::TargetDisc(const S2Point& p, S1ChordAngle r) {
@@ -77,9 +89,16 @@ bool S2PolylineSimplifier::TargetDisc(const S2Point& p, S1ChordAngle r) {
   }
   // Otherwise compute the angle interval corresponding to the target disc and
   // intersect it with the current window.
-  double center = GetAngle(p);
+  double center = GetDirection(p);
   S1Interval target = S1Interval::FromPoint(center).Expanded(semiwidth);
   window_ = window_.Intersection(target);
+
+  // If there are any angle ranges to avoid, they can be processed now.
+  for (const auto& range : ranges_to_avoid_) {
+    AvoidRange(range.interval, range.on_left);
+  }
+  ranges_to_avoid_.clear();
+
   return !window_.is_empty();
 }
 
@@ -89,22 +108,63 @@ bool S2PolylineSimplifier::AvoidDisc(const S2Point& p, S1ChordAngle r,
   // guarantees that the final output edge will avoid the given disc.
   double semiwidth = GetSemiwidth(p, r, 1 /*round up*/);
   if (semiwidth >= M_PI) {
-    // The avoidance disc contains "src", so it is impossible to avoid.
+    // The disc to avoid contains "src", so it can't be avoided.
     window_ = S1Interval::Empty();
     return false;
   }
-  double center = GetAngle(p);
-  double opposite = (center > 0) ? center - M_PI : center + M_PI;
-  S1Interval target = (disc_on_left ? S1Interval(opposite, center) :
-                       S1Interval(center, opposite));
-  window_ = window_.Intersection(target.Expanded(-semiwidth));
+  // Compute the disallowed range of angles: the angle subtended by the disc
+  // on one side, and 90 degrees on the other (to satisfy "disc_on_left").
+  double center = GetDirection(p);
+  double dleft = disc_on_left ? M_PI_2 : semiwidth;
+  double dright = disc_on_left ? semiwidth : M_PI_2;
+  S1Interval avoid_interval(remainder(center - dright, 2 * M_PI),
+                            remainder(center + dleft, 2 * M_PI));
+
+  if (window_.is_full()) {
+    // Discs to avoid can't be processed until window_ is reduced to at most
+    // 180 degrees by a call to TargetDisc().  Save it for later.
+    ranges_to_avoid_.push_back(RangeToAvoid{avoid_interval, disc_on_left});
+    return true;
+  }
+  AvoidRange(avoid_interval, disc_on_left);
   return !window_.is_empty();
 }
 
-double S2PolylineSimplifier::GetAngle(const S2Point& p) const {
+void S2PolylineSimplifier::AvoidRange(const S1Interval& avoid_interval,
+                                      bool disc_on_left) {
+  // If "avoid_interval" is a proper subset of "window_", then in theory the
+  // result should be two intervals.  One interval points towards the given
+  // disc and passes on the correct side of it, while the other interval points
+  // away from the disc.  However the latter interval never contains an
+  // acceptable output edge direction (as long as this class is being used
+  // correctly) and can be safely ignored.  This is true because (1) "window_"
+  // is not full, which means that it contains at least one vertex of the input
+  // polyline and is at most 180 degrees in length, and (2) "disc_on_left" is
+  // computed with respect to the next edge of the input polyline, which means
+  // that the next input vertex is either inside "avoid_interval" or somewhere
+  // in the 180 degrees to its right/left according to "disc_on_left", which
+  // means that it cannot be contained by the subinterval that we ignore.
+  S2_DCHECK(!window_.is_full());
+  if (window_.Contains(avoid_interval)) {
+    if (disc_on_left) {
+      window_ = S1Interval(window_.lo(), avoid_interval.lo());
+    } else {
+      window_ = S1Interval(avoid_interval.hi(), window_.hi());
+    }
+  } else {
+    window_ = window_.Intersection(avoid_interval.Complement());
+  }
+}
+
+double S2PolylineSimplifier::GetDirection(const S2Point& p) const {
   return atan2(p.DotProd(y_dir_), p.DotProd(x_dir_));
 }
 
+// Computes half the angle in radians subtended from the source vertex by a
+// disc of radius "r" centered at "p", rounding the result conservatively up
+// or down according to whether round_direction is +1 or -1.  (So for example,
+// if round_direction == +1 then the return value is an upper bound on the
+// true result.)
 double S2PolylineSimplifier::GetSemiwidth(const S2Point& p, S1ChordAngle r,
                                           int round_direction) const {
   double constexpr DBL_ERR = 0.5 * DBL_EPSILON;
@@ -140,11 +200,11 @@ double S2PolylineSimplifier::GetSemiwidth(const S2Point& p, S1ChordAngle r,
   // We compute bounds on the errors from all sources:
   //
   //   - The call to GetSemiwidth (this call).
-  //   - The call to GetAngle that computes the center of the interval.
-  //   - The call to GetAngle in Extend that tests whether a given point
+  //   - The call to GetDirection that computes the center of the interval.
+  //   - The call to GetDirection in Extend that tests whether a given point
   //     is an acceptable destination vertex.
   //
-  // Summary of the errors in GetAngle:
+  // Summary of the errors in GetDirection:
   //
   // y_dir_ has no error.
   //
diff --git a/src/s2/s2polyline_simplifier.h b/src/s2/s2polyline_simplifier.h
index 69f968c3..f1177d73 100644
--- a/src/s2/s2polyline_simplifier.h
+++ b/src/s2/s2polyline_simplifier.h
@@ -57,13 +57,16 @@
 #ifndef S2_S2POLYLINE_SIMPLIFIER_H_
 #define S2_S2POLYLINE_SIMPLIFIER_H_
 
+#include <vector>
+
 #include "s2/_fp_contract_off.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s1interval.h"
+#include "s2/s2point.h"
 
 class S2PolylineSimplifier {
  public:
-  S2PolylineSimplifier() {}
+  S2PolylineSimplifier() = default;
 
   // Starts a new simplified edge at "src".
   void Init(const S2Point& src);
@@ -76,34 +79,66 @@ class S2PolylineSimplifier {
   // 90 degrees (such edges are not supported).
   bool Extend(const S2Point& dst) const;
 
-  // Requires that the output edge must pass through the given disc.
+  // Requires that the output edge must pass through the given disc. Returns
+  // true if it is possible to intersect the target disc, given previous
+  // constraints.
   bool TargetDisc(const S2Point& point, S1ChordAngle radius);
 
   // Requires that the output edge must avoid the given disc.  "disc_on_left"
-  // specifies whether the disc must be to the left or right of the edge.
-  // (This feature allows the simplified edge to preserve the topology of the
-  // original polyline with respect to other nearby points.)
+  // specifies whether the disc must be to the left or right of the output
+  // edge AB.  (This feature allows the simplified edge to preserve the
+  // topology of the original polyline with respect to other nearby points.)
+  //
+  // More precisely, let AB be the output edge, P be the center of the disc,
+  // and r be its radius.  Then this method ensures that
+  //
+  //   (1) Distance(AB, P) > r, and
+  //   (2) if DotProd(AB, AP) > 0, then Sign(ABP) > 0 iff disc_on_left is true.
+  //
+  // The second condition says that "disc_on_left" has an effect if and only
+  // if P is not behind the source vertex A with respect to the direction AB.
   //
   // If your input is a polyline, you can compute "disc_on_left" as follows.
   // Let the polyline be ABCDE and assume that it already avoids a set of
-  // points X_i.  Suppose that you have aleady added ABC to the simplifer, and
+  // points X_i.  Suppose that you have aleady added ABC to the simplifier, and
   // now want to extend the edge chain to D.  First find the X_i that are near
   // the edge CD, then discard the ones such that AX_i <= AC or AX_i >= AD
   // (since these points have either already been considered or aren't
   // relevant yet).  Now X_i is to the left of the polyline if and only if
-  // s2pred::OrderedCCW(A, D, X, C) (in other words, if X_i is to the left of
-  // the angle wedge ACD).
-  bool AvoidDisc(const S2Point& point, S1ChordAngle radius,
-                 bool disc_on_left);
+  // s2pred::OrderedCCW(A, D, X_i, C) (in other words, if X_i is to the left of
+  // the angle wedge ACD).  Note that simply testing s2pred::Sign(C, D, X_i)
+  // or s2pred::Sign(A, D, X_i) does not handle all cases correctly.
+  //
+  // Returns true if the disc can be avoided given previous constraints, or if
+  // the discs to avoid have not been processed yet. Returns false if the disc
+  // cannot be avoided.
+  bool AvoidDisc(const S2Point& point, S1ChordAngle radius, bool disc_on_left);
 
  private:
-  double GetAngle(const S2Point& p) const;
+  // Unfortunately, the discs to avoid cannot be processed until the direction
+  // of the output edge is constrained to lie within an S1Interval of at most
+  // 180 degrees.  This happens only when the first target disc is added that
+  // does not contain the source vertex.  Until that time we simply store all
+  // the discs as ranges of directions to avoid.
+  struct RangeToAvoid {
+    S1Interval interval;  // Range of directions to avoid.
+    bool on_left;         // Is this disc to the left of the output edge?
+  };
+
+  double GetDirection(const S2Point& p) const;
   double GetSemiwidth(const S2Point& p, S1ChordAngle r,
                       int round_direction) const;
+  void AvoidRange(const S1Interval& avoid_interval, bool disc_on_left);
+
+  S2Point src_;             // Output edge source vertex.
+  S2Point x_dir_, y_dir_;   // Orthonormal frame for mapping vectors to angles.
+  S1Interval window_;       // Allowable range of angles for the output edge.
 
-  S2Point src_;
-  S2Point x_dir_, y_dir_;
-  S1Interval window_;
+  // We store the discs to avoid individually until TargetDisc() is first
+  // called with a disc that does not contain the source vertex.  At that time
+  // all such discs are processed by using them to constrain "window_", and
+  // this vector is cleared.
+  std::vector<RangeToAvoid> ranges_to_avoid_;
 };
 
 #endif  // S2_S2POLYLINE_SIMPLIFIER_H_
diff --git a/src/s2/s2predicates.cc b/src/s2/s2predicates.cc
index a8333b42..aef5e649 100644
--- a/src/s2/s2predicates.cc
+++ b/src/s2/s2predicates.cc
@@ -16,15 +16,18 @@
 // Author: ericv@google.com (Eric Veach)
 
 #include "s2/s2predicates.h"
-#include "s2/s2predicates_internal.h"
 
 #include <algorithm>
 #include <cfloat>
 #include <cmath>
 #include <ostream>
+#include <utility>
+
 #include "s2/s1chord_angle.h"
+#include "s2/s2edge_crossings.h"
+#include "s2/s2point.h"
+#include "s2/s2predicates_internal.h"
 #include "s2/util/math/exactfloat/exactfloat.h"
-#include "s2/util/math/vector.h"
 
 using std::fabs;
 using std::max;
@@ -33,12 +36,6 @@ using std::sqrt;
 
 namespace s2pred {
 
-// All error bounds in this file are expressed in terms of the maximum
-// rounding error for a floating-point type.  The rounding error is half of
-// the numeric_limits<T>::epsilon() value.
-constexpr double DBL_ERR = rounding_epsilon<double>();
-constexpr long double LD_ERR = rounding_epsilon<long double>();
-
 // A predefined S1ChordAngle representing (approximately) 45 degrees.
 static const S1ChordAngle k45Degrees = S1ChordAngle::FromLength2(2 - M_SQRT2);
 
@@ -95,6 +92,10 @@ int StableSign(const S2Point& a, const S2Point& b, const S2Point& c) {
     det = -(bc.CrossProd(ab).DotProd(b));
     max_error = kDetErrorMultiplier * sqrt(bc2 * ab2);
   }
+  // Errors smaller than this value may not be accurate due to underflow.
+  const double kMinNoUnderflowError = kDetErrorMultiplier * sqrt(DBL_MIN);
+  if (max_error < kMinNoUnderflowError) return 0;
+
   return (fabs(det) <= max_error) ? 0 : (det > 0) ? 1 : -1;
 }
 
@@ -236,9 +237,9 @@ int ExactSign(const S2Point& a, const S2Point& b, const S2Point& c,
 
   // Construct multiple-precision versions of the sorted points and compute
   // their exact 3x3 determinant.
-  Vector3_xf xa = Vector3_xf::Cast(*pa);
-  Vector3_xf xb = Vector3_xf::Cast(*pb);
-  Vector3_xf xc = Vector3_xf::Cast(*pc);
+  Vector3_xf xa = ToExact(*pa);
+  Vector3_xf xb = ToExact(*pb);
+  Vector3_xf xc = ToExact(*pc);
   Vector3_xf xb_cross_xc = xb.CrossProd(xc);
   ExactFloat det = xa.DotProd(xb_cross_xc);
 
@@ -269,8 +270,6 @@ int ExactSign(const S2Point& a, const S2Point& b, const S2Point& c,
 // supported an option based on MPFR, but that has an LGPL license and is
 // therefore not suited for some applications.)
 
-using Vector3_xf = Vector3<ExactFloat>;
-
 int ExpensiveSign(const S2Point& a, const S2Point& b, const S2Point& c,
                   bool perturb) {
   // Return zero if and only if two points are the same.  This ensures (1).
@@ -297,6 +296,8 @@ int ExpensiveSign(const S2Point& a, const S2Point& b, const S2Point& c,
 
 bool OrderedCCW(const S2Point& a, const S2Point& b, const S2Point& c,
                 const S2Point& o) {
+  S2_DCHECK(a != o && b != o && c != o);
+
   // The last inequality below is ">" rather than ">=" so that we return true
   // if A == B or B == C, and otherwise false if A == C.  Recall that
   // Sign(x,y,z) == -Sign(z,y,x) for all x,y,z.
@@ -341,8 +342,8 @@ inline double GetSin2Distance(const S2Point& x, const S2Point& y,
   // distances as small as DBL_ERR.
   S2Point n = (x - y).CrossProd(x + y);
   double d2 = 0.25 * n.Norm2();
-  *error = ((21 + 4 * sqrt(3.0)) * DBL_ERR * d2 +
-            32 * sqrt(3.0) * DBL_ERR * DBL_ERR * sqrt(d2) +
+  *error = ((21 + 4 * sqrt(3)) * DBL_ERR * d2 +
+            32 * sqrt(3) * DBL_ERR * DBL_ERR * sqrt(d2) +
             768 * DBL_ERR * DBL_ERR * DBL_ERR * DBL_ERR);
   return d2;
 }
@@ -359,8 +360,8 @@ inline long double GetSin2Distance(const Vector3_ld& x, const Vector3_ld& y,
   // the additional effort.)
   Vector3_ld n = (x - y).CrossProd(x + y);
   long double d2 = 0.25 * n.Norm2() / (x.Norm2() * y.Norm2());
-  *error = ((13 + 4 * sqrt(3.0)) * LD_ERR * d2 +
-            32 * sqrt(3.0) * DBL_ERR * LD_ERR * sqrt(d2) +
+  *error = ((13 + 4 * sqrt(3)) * LD_ERR * d2 +
+            32 * sqrt(3) * DBL_ERR * LD_ERR * sqrt(d2) +
             768 * DBL_ERR * DBL_ERR * LD_ERR * LD_ERR);
   return d2;
 }
@@ -433,8 +434,10 @@ int SymbolicCompareDistances(const S2Point& x,
 static int CompareSin2Distances(const S2Point& x,
                                 const S2Point& a, const S2Point& b) {
   int sign = TriageCompareSin2Distances(x, a, b);
-  if (sign != 0) return sign;
-  return TriageCompareSin2Distances(ToLD(x), ToLD(a), ToLD(b));
+  if (kHasLongDouble && sign == 0) {
+    sign = TriageCompareSin2Distances(ToLD(x), ToLD(a), ToLD(b));
+  }
+  return sign;
 }
 
 int CompareDistances(const S2Point& x, const S2Point& a, const S2Point& b) {
@@ -461,7 +464,7 @@ int CompareDistances(const S2Point& x, const S2Point& a, const S2Point& b) {
   } else if (cos_ax < -M_SQRT1_2) {
     // Angles > 135 degrees.  sin^2(angle) is decreasing in this range.
     sign = -CompareSin2Distances(x, a, b);
-  } else {
+  } else if (kHasLongDouble) {
     // We've already tried double precision, so continue with "long double".
     sign = TriageCompareCosDistances(ToLD(x), ToLD(a), ToLD(b));
   }
@@ -522,15 +525,19 @@ int CompareDistance(const S2Point& x, const S2Point& y, S1ChordAngle r) {
   int sign = TriageCompareCosDistance(x, y, r.length2());
   if (sign != 0) return sign;
 
+  // Optimization for (x == y) to avoid falling back to exact arithmetic.
+  if (r.length2() == 0 && x == y) return 0;
+
   // Unlike with CompareDistances(), it's not worth using the sin^2 method
   // when the distance limit is near 180 degrees because the S1ChordAngle
   // representation itself has has a rounding error of up to 2e-8 radians for
   // distances near 180 degrees.
   if (r < k45Degrees) {
     sign = TriageCompareSin2Distance(x, y, r.length2());
-    if (sign != 0) return sign;
-    sign = TriageCompareSin2Distance(ToLD(x), ToLD(y), ToLD(r.length2()));
-  } else {
+    if (kHasLongDouble && sign == 0) {
+      sign = TriageCompareSin2Distance(ToLD(x), ToLD(y), ToLD(r.length2()));
+    }
+  } else if (kHasLongDouble) {
     sign = TriageCompareCosDistance(ToLD(x), ToLD(y), ToLD(r.length2()));
   }
   if (sign != 0) return sign;
@@ -593,7 +600,7 @@ int TriageCompareLineSin2Distance(const Vector3<T>& x, const Vector3<T>& a0,
   T n2sin2_r_error = 6 * T_ERR * n2sin2_r;
   T ax2, xDn = (x - GetClosestVertex(x, a0, a1, &ax2)).DotProd(n);
   T xDn2 = xDn * xDn;
-  const T c1 = (((3.5 + 2 * sqrt(3.0)) * n1 + 32 * sqrt(3.0) * DBL_ERR) *
+  const T c1 = (((3.5 + 2 * sqrt(3)) * n1 + 32 * sqrt(3) * DBL_ERR) *
                 T_ERR * sqrt(ax2));
   T xDn2_error = 4 * T_ERR * xDn2 + (2 * fabs(xDn) + c1) * c1;
 
@@ -633,7 +640,7 @@ int TriageCompareLineCos2Distance(const Vector3<T>& x, const Vector3<T>& a0,
   // The length of M = X.CrossProd(N) is the cosine of the distance.
   T m2 = x.CrossProd(n).Norm2();
   T m1 = sqrt(m2);
-  T m1_error = ((1 + 8 / sqrt(3.0)) * n1 + 32 * sqrt(3.0) * DBL_ERR) * T_ERR;
+  T m1_error = ((1 + 8 / sqrt(3)) * n1 + 32 * sqrt(3) * DBL_ERR) * T_ERR;
   T m2_error = 3 * T_ERR * m2 + (2 * m1 + m1_error) * m1_error;
 
   // If we are using extended precision, then it is worthwhile to recompute
@@ -684,26 +691,25 @@ int TriageCompareEdgeDistance(const Vector3<T>& x, const Vector3<T>& a0,
   T a1_sign = a1_dir.DotProd(m);
   T n2 = n.Norm2();
   T n1 = sqrt(n2);
-  T n1_error = ((3.5 + 8 / sqrt(3.0)) * n1 + 32 * sqrt(3.0) * DBL_ERR) * T_ERR;
+  T n1_error = ((3.5 + 8 / sqrt(3)) * n1 + 32 * sqrt(3) * DBL_ERR) * T_ERR;
   T a0_sign_error = n1_error * a0_dir.Norm();
   T a1_sign_error = n1_error * a1_dir.Norm();
-  if (fabs(a0_sign) < a0_sign_error || fabs(a1_sign) < a1_sign_error) {
-    // It is uncertain whether minimum distance is to an edge vertex or to the
-    // edge interior.  We handle this by computing both distances and checking
-    // whether they yield the same result.
-    int vertex_sign = min(TriageCompareDistance(x, a0, r2),
-                          TriageCompareDistance(x, a1, r2));
-    int line_sign = TriageCompareLineDistance(x, a0, a1, r2, n, n1, n2);
-    return (vertex_sign == line_sign) ? line_sign : 0;
-  }
-  if (a0_sign >= 0 || a1_sign <= 0) {
-    // The minimum distance is to an edge endpoint.
-    return min(TriageCompareDistance(x, a0, r2),
-               TriageCompareDistance(x, a1, r2));
-  } else {
+  if (a0_sign < a0_sign_error && a1_sign > -a1_sign_error) {
+    if (a0_sign > -a0_sign_error || a1_sign < a1_sign_error) {
+      // It is uncertain whether minimum distance is to an edge vertex or to
+      // the edge interior.  We compute both distances and check whether they
+      // yield the same result; otherwise the result is uncertain.
+      int vertex_sign = min(TriageCompareDistance(x, a0, r2),
+                            TriageCompareDistance(x, a1, r2));
+      int line_sign = TriageCompareLineDistance(x, a0, a1, r2, n, n1, n2);
+      return (vertex_sign == line_sign) ? line_sign : 0;
+    }
     // The minimum distance is to the edge interior.
     return TriageCompareLineDistance(x, a0, a1, r2, n, n1, n2);
   }
+  // The minimum distance is to an edge endpoint.
+  return min(TriageCompareDistance(x, a0, r2),
+             TriageCompareDistance(x, a1, r2));
 }
 
 // REQUIRES: the closest point to "x" is in the interior of edge (a0, a1).
@@ -732,8 +738,10 @@ int ExactCompareEdgeDistance(const S2Point& x, const S2Point& a0,
   // "a0" and "a1", since it is virtually certain that the previous floating
   // point calculations failed in that case.
   //
-  // CompareEdgeDirections also checks that no edge has antipodal endpoints.
-  if (CompareEdgeDirections(a0, a1, a0, x) > 0 &&
+  // CompareEdgeDirections requires that no edge has antipodal endpoints,
+  // therefore we need to handle the cases a0 == -x, a1 == -x separately.
+  if (a0 != -x && a1 != -x &&
+      CompareEdgeDirections(a0, a1, a0, x) > 0 &&
       CompareEdgeDirections(a0, a1, x, a1) > 0) {
     // The closest point to "x" is along the interior of the edge.
     return ExactCompareLineDistance(ToExact(x), ToExact(a0), ToExact(a1),
@@ -755,30 +763,294 @@ int CompareEdgeDistance(const S2Point& x, const S2Point& a0, const S2Point& a1,
 
   // Optimization for the case where the edge is degenerate.
   if (a0 == a1) return CompareDistance(x, a0, r);
-
-  sign = TriageCompareEdgeDistance(ToLD(x), ToLD(a0), ToLD(a1),
-                                   ToLD(r.length2()));
-  if (sign != 0) return sign;
+  if (kHasLongDouble) {
+    sign = TriageCompareEdgeDistance(ToLD(x), ToLD(a0), ToLD(a1),
+                                     ToLD(r.length2()));
+    if (sign != 0) return sign;
+  }
   return ExactCompareEdgeDistance(x, a0, a1, r);
 }
 
+int CompareEdgePairDistance(const S2Point& a0, const S2Point& a1,
+                            const S2Point& b0, const S2Point& b1,
+                            S1ChordAngle r) {
+  // The following logic mimics S2::UpdateEdgePairMinDistance().
+
+  // If the edges cross or share an endpoint, the minimum distance is zero.
+  if (S2::CrossingSign(a0, a1, b0, b1) >= 0) {
+    return (r.length2() > 0) ? -1 : (r.length2() < 0) ? 1 : 0;
+  }
+  // Otherwise, the minimum distance is achieved at an endpoint of at least
+  // one of the two edges.
+  return min(min(CompareEdgeDistance(a0, b0, b1, r),
+                 CompareEdgeDistance(a1, b0, b1, r)),
+             min(CompareEdgeDistance(b0, a0, a1, r),
+                 CompareEdgeDistance(b1, a0, a1, r)));
+}
+
+template <class T>
+int TriageIntersectionOrdering(const Vector3<T>& a, const Vector3<T>& b,
+                               const Vector3<T>& c, const Vector3<T>& d,
+                               const Vector3<T>& m, const Vector3<T>& n) {
+  S2_DCHECK_NE(a, b);
+  S2_DCHECK_NE(a, -b);
+  S2_DCHECK_NE(c, d);
+  S2_DCHECK_NE(c, -d);
+  S2_DCHECK_NE(m, n);
+  S2_DCHECK_NE(m, -n);
+
+  // Given an edge AB, and the normal of a great circle M, the intersection of
+  // the edge with the great circle is given by the triple product (A×B)×M.
+  //
+  // Its distance relative to the reference circle N is then proportional to the
+  // dot product with N: d0 = ((A×B)×M)•N
+  //
+  // Edge CD is similar, we want to compute d1 = ((C×D)×M)•N and compare d0 to
+  // d1.  If they're further than some error from each other, we can rely on the
+  // comparison, otherwise we fall back to more exact arithmetic.
+  //
+  // ((A×B)×M)•N is a quadruple product.  We can expand this out using
+  // Lagrange's formula for a vector triple product and then distribute the dot
+  // product, which eliminates all the cross products:
+  //
+  // d0 = ((A×B)×M)•N
+  // d0 = ((M•A)B - (M•B)A)•N
+  // d0 = (M•A)(N•B) - (M•B)(N•A)
+  //
+  // Similarly:
+  //
+  // d1 = (M•C)(N•D) - (M•D)(N•C)
+  //
+  // We can compute this difference with a maximum absolute error of 32ε (see
+  // the gappa proof at end of the file).
+  //
+  // NOTE: If we want to push this error bound down as far as possible, we could
+  // use the dot product algorithm created by Ogita et al:
+  //
+  //   Accurate Sum and Dot Product, Ogita, Rump, Oishi 2005.
+  //
+  // Along with the 2x2 determinant algorithm by Kahan (which is useful for any
+  // bilinear form):
+  //
+  //   Further Analysis of Kahan's Algorithm for the Accurate Computation of
+  //   2x2 Determinants, Jeannerod, Louvet, and Muller, 2013.
+  //
+  // Both algorithms allow us to have bounded relative error, and since we're
+  // only interested in the sign of this operation, as long as the relative
+  // error is < 1 we can never get a sign flip, which would make this exact for
+  // our purposes.
+  constexpr T kMaxError = 32 * std::numeric_limits<T>::epsilon();
+
+  T mdota = m.DotProd(a);
+  T mdotb = m.DotProd(b);
+  T mdotc = m.DotProd(c);
+  T mdotd = m.DotProd(d);
+
+  T ndota = n.DotProd(a);
+  T ndotb = n.DotProd(b);
+  T ndotc = n.DotProd(c);
+  T ndotd = n.DotProd(d);
+
+  T prodab = mdota * ndotb - mdotb * ndota;
+  T prodcd = mdotc * ndotd - mdotd * ndotc;
+
+  if (std::fabs(prodab - prodcd) > kMaxError) {
+    return prodab < prodcd ? -1 : +1;
+  }
+  return 0;
+}
+
+int ExactIntersectionOrdering(const Vector3_xf& a, const Vector3_xf& b,
+                              const Vector3_xf& c, const Vector3_xf& d,
+                              const Vector3_xf& m, const Vector3_xf& n) {
+  S2_DCHECK_NE(a, b);
+  S2_DCHECK_NE(a, -b);
+  S2_DCHECK_NE(c, d);
+  S2_DCHECK_NE(c, -d);
+  S2_DCHECK_NE(n, m);
+  S2_DCHECK_NE(n, -m);
+
+  ExactFloat mdota = m.DotProd(a);
+  ExactFloat mdotb = m.DotProd(b);
+  ExactFloat mdotc = m.DotProd(c);
+  ExactFloat mdotd = m.DotProd(d);
+
+  ExactFloat ndota = n.DotProd(a);
+  ExactFloat ndotb = n.DotProd(b);
+  ExactFloat ndotc = n.DotProd(c);
+  ExactFloat ndotd = n.DotProd(d);
+
+  ExactFloat prodab = mdota * ndotb - mdotb * ndota;
+  ExactFloat prodcd = mdotc * ndotd - mdotd * ndotc;
+
+  if (prodab < prodcd) return -1;
+  if (prodab > prodcd) return +1;
+  return 0;
+}
+
+int CircleEdgeIntersectionOrdering(const S2Point& a, const S2Point& b,
+                                   const S2Point& c, const S2Point& d,
+                                   const S2Point& m, const S2Point& n) {
+  int ans = TriageIntersectionOrdering(a, b, c, d, m, n);
+  if (ans != 0) {
+    return ans;
+  }
+
+  // We got zero, check for duplicate/reverse duplicate edges before falling
+  // back to more precision.
+  if ((a == c && b == d) || (a == d && b == c)) {
+    return 0;
+  }
+
+  if (kHasLongDouble) {
+    ans = TriageIntersectionOrdering(ToLD(a), ToLD(b), ToLD(c), ToLD(d),
+                                     ToLD(m), ToLD(n));
+    if (ans != 0) {
+      return ans;
+    }
+  }
+
+  return ExactIntersectionOrdering(  //
+      ToExact(a), ToExact(b), ToExact(c), ToExact(d), ToExact(m), ToExact(n));
+
+  return 0;
+}
+
+template <typename T>
+int TriageSignDotProd(const Vector3<T>& a, const Vector3<T>& b) {
+  S2_DCHECK_LE(a.Norm2(), 2);
+  S2_DCHECK_LE(b.Norm2(), 2);
+
+  // The dot product error can be bound as 1.01nu|a||b| assuming nu < .01,
+  // where u is the rounding unit (epsilon/2).  n=3 because we have 3
+  // components, and we require that our vectors be <= sqrt(2) in length (so
+  // that we can support the un-normalized edge normals for cells).
+  //
+  // So we have 1.01*3*ε/2*2 = 3.03ε, which we'll round up to 3.046875ε
+  // which is exactly representable.
+  //
+  // Reference:
+  //   Error Estimation Of Floating-Point Summation And Dot Product, Rump
+  //   2011
+  constexpr T kMaxError = 3.046875 * std::numeric_limits<T>::epsilon();
+
+  T na = a.DotProd(b);
+  if (std::fabs(na) <= kMaxError) return 0;
+  return na > 0 ? +1 : -1;
+}
+
+int ExactSignDotProd(const Vector3_xf& a, const Vector3_xf& b) {
+  ExactFloat dot = a.DotProd(b);
+  if (dot.is_zero()) {
+    return 0;
+  }
+  return dot.sgn() > 0 ? +1 : -1;
+}
+
+int SignDotProd(const S2Point& a, const S2Point& b) {
+  int sign = TriageSignDotProd(a, b);
+  if (sign != 0) {
+    return sign;
+  }
+
+  if (kHasLongDouble) {
+    sign = TriageSignDotProd(ToLD(a), ToLD(b));
+    if (sign != 0) {
+      return sign;
+    }
+  }
+
+  return ExactSignDotProd(ToExact(a), ToExact(b));
+}
+
+template <class T>
+int TriageCircleEdgeIntersectionSign(const Vector3<T>& a, const Vector3<T>& b,
+                                     const Vector3<T>& n, const Vector3<T>& x) {
+  S2_DCHECK_NE(a, b);
+  S2_DCHECK_NE(a, -b);
+
+  // The normal vector for the edge is A×B.  The vector for the line of
+  // intersection between the plane defined by N and A×B is (A×B)×N.  We then
+  // want to dot this with the normals of the other great circles X and compute
+  // the sign.
+  //
+  // So the full formula for the test is ((A×B)×N)•X which is a quadruple
+  // product.  We can expand this out using Lagrange's formula for a triple
+  // product, and then distribute the dot product:
+  //
+  // = ((A×B)×N)•X
+  // = ((N•A)B - (N•B)A)•X
+  // = (N•A)(X•B) - (N•B)(X•A)
+  //
+  // We can compute this with a maximum absolute error of 14ε (see the gappa
+  // proof at end of the file).
+  constexpr T kMaxError = 14 * std::numeric_limits<T>::epsilon();
+
+  T ndota = n.DotProd(a);
+  T ndotb = n.DotProd(b);
+  T xdota = x.DotProd(a);
+  T xdotb = x.DotProd(b);
+
+  T prod = ndota * xdotb - ndotb * xdota;
+
+  // If we're too close to the plane of X we'll have to use more precision.
+  if (std::fabs(prod) <= kMaxError) {
+    return 0;
+  }
+  return prod < 0 ? -1 : +1;
+}
+
+int ExactCircleEdgeIntersectionSign(const Vector3_xf& a, const Vector3_xf& b,
+                                    const Vector3_xf& n, const Vector3_xf& x) {
+  S2_DCHECK_NE(a, b);
+  S2_DCHECK_NE(a, -b);
+
+  ExactFloat ndota = n.DotProd(a);
+  ExactFloat ndotb = n.DotProd(b);
+  ExactFloat xdota = x.DotProd(a);
+  ExactFloat xdotb = x.DotProd(b);
+
+  ExactFloat prod = ndota * xdotb - ndotb * xdota;
+
+  if (prod.is_zero()) {
+    return 0;
+  }
+  return prod.sgn() > 0 ? +1 : -1;
+}
+
+int CircleEdgeIntersectionSign(const S2Point& a, const S2Point& b,
+                               const S2Point& n, const S2Point& x) {
+  int ans = TriageCircleEdgeIntersectionSign(a, b, n, x);
+  if (ans != 0) {
+    return ans;
+  }
+
+  if (kHasLongDouble) {
+    ans = TriageCircleEdgeIntersectionSign(ToLD(a), ToLD(b), ToLD(n), ToLD(x));
+    if (ans != 0) {
+      return ans;
+    }
+  }
+
+  return ExactCircleEdgeIntersectionSign(  //
+      ToExact(a), ToExact(b), ToExact(n), ToExact(x));
+}
+
 template <class T>
-int TriageCompareEdgeDirections(
-    const Vector3<T>& a0, const Vector3<T>& a1,
-    const Vector3<T>& b0, const Vector3<T>& b1) {
+int TriageCompareEdgeDirections(const Vector3<T>& a0, const Vector3<T>& a1,
+                                const Vector3<T>& b0, const Vector3<T>& b1) {
   constexpr T T_ERR = rounding_epsilon<T>();
   Vector3<T> na = (a0 - a1).CrossProd(a0 + a1);
   Vector3<T> nb = (b0 - b1).CrossProd(b0 + b1);
   T na_len = na.Norm(), nb_len = nb.Norm();
   T cos_ab = na.DotProd(nb);
-  T cos_ab_error = ((5 + 4 * sqrt(3.0)) * na_len * nb_len +
-                    32 * sqrt(3.0) * DBL_ERR * (na_len + nb_len)) * T_ERR;
+  T cos_ab_error = ((5 + 4 * sqrt(3)) * na_len * nb_len +
+                    32 * sqrt(3) * DBL_ERR * (na_len + nb_len)) * T_ERR;
   return (cos_ab > cos_ab_error) ? 1 : (cos_ab < -cos_ab_error) ? -1 : 0;
 }
 
 bool ArePointsLinearlyDependent(const Vector3_xf& x, const Vector3_xf& y) {
-  Vector3_xf n = x.CrossProd(y);
-  return n[0].sgn() == 0 && n[1].sgn() == 0 && n[2].sgn() == 0;
+  return IsZero(x.CrossProd(y));
 }
 
 bool ArePointsAntipodal(const Vector3_xf& x, const Vector3_xf& y) {
@@ -804,9 +1076,10 @@ int CompareEdgeDirections(const S2Point& a0, const S2Point& a1,
 
   // Optimization for the case where either edge is degenerate.
   if (a0 == a1 || b0 == b1) return 0;
-
-  sign = TriageCompareEdgeDirections(ToLD(a0), ToLD(a1), ToLD(b0), ToLD(b1));
-  if (sign != 0) return sign;
+  if (kHasLongDouble) {
+    sign = TriageCompareEdgeDirections(ToLD(a0), ToLD(a1), ToLD(b0), ToLD(b1));
+    if (sign != 0) return sign;
+  }
   return ExactCompareEdgeDirections(ToExact(a0), ToExact(a1),
                                     ToExact(b0), ToExact(b1));
 }
@@ -836,9 +1109,9 @@ Vector3<T> GetCircumcenter(const Vector3<T>& a, const Vector3<T>& b,
   T bc_len = bc_diff.Norm();
   Vector3<T> mab = nab.CrossProd(ab_sum);
   Vector3<T> mbc = nbc.CrossProd(bc_sum);
-  *error = (((16 + 24 * sqrt(3.0)) * T_ERR +
+  *error = (((16 + 24 * sqrt(3)) * T_ERR +
                 8 * DBL_ERR * (ab_len + bc_len)) * nab_len * nbc_len +
-               128 * sqrt(3.0) * DBL_ERR * T_ERR * (nab_len + nbc_len) +
+               128 * sqrt(3) * DBL_ERR * T_ERR * (nab_len + nbc_len) +
                3 * 4096 * DBL_ERR * DBL_ERR * T_ERR * T_ERR);
   return mab.CrossProd(mbc);
 }
@@ -860,7 +1133,7 @@ int TriageEdgeCircumcenterSign(const Vector3<T>& x0, const Vector3<T>& x1,
 
   T z_len = z.Norm();
   T nx_len = nx.Norm();
-  T nx_error = ((1 + 2 * sqrt(3.0)) * nx_len + 32 * sqrt(3.0) * DBL_ERR) * T_ERR;
+  T nx_error = ((1 + 2 * sqrt(3)) * nx_len + 32 * sqrt(3) * DBL_ERR) * T_ERR;
   T result_error = ((3 * T_ERR * nx_len + nx_error) * z_len + z_error * nx_len);
   return (result > result_error) ? 1 : (result < -result_error) ? -1 : 0;
 }
@@ -990,7 +1263,7 @@ int UnperturbedSign(const S2Point& a, const S2Point& b, const S2Point& c) {
 int SymbolicEdgeCircumcenterSign(
     const S2Point& x0, const S2Point& x1,
     const S2Point& a_arg, const S2Point& b_arg, const S2Point& c_arg) {
-  // We use the same perturbation strategy as SymbolicCompareDistances.  Note
+  // We use the same perturbation strategy as SymbolicCompareDistances. Note
   // that pedestal perturbations of X0 and X1 do not affect the result,
   // because Sign(X0, X1, Z) does not change when its arguments are scaled
   // by a positive factor.  Therefore we only need to consider A, B, C.
@@ -1068,10 +1341,11 @@ int EdgeCircumcenterSign(const S2Point& x0, const S2Point& x1,
   // Optimization for the cases that are going to return zero anyway, in order
   // to avoid falling back to exact arithmetic.
   if (x0 == x1 || a == b || b == c || c == a) return 0;
-
-  sign = TriageEdgeCircumcenterSign(
-      ToLD(x0), ToLD(x1), ToLD(a), ToLD(b), ToLD(c), abc_sign);
-  if (sign != 0) return sign;
+  if (kHasLongDouble) {
+    sign = TriageEdgeCircumcenterSign(
+        ToLD(x0), ToLD(x1), ToLD(a), ToLD(b), ToLD(c), abc_sign);
+    if (sign != 0) return sign;
+  }
   sign = ExactEdgeCircumcenterSign(
       ToExact(x0), ToExact(x1), ToExact(a), ToExact(b), ToExact(c), abc_sign);
   if (sign != 0) return sign;
@@ -1111,7 +1385,7 @@ Excluded TriageVoronoiSiteExclusion(const Vector3<T>& a, const Vector3<T>& b,
   // The planes perpendicular to OA1 and OA2 cut off two discs of radius r
   // around A and B.  Now consider the two lines (inside the sphere) where
   // these planes intersect the plane containing the input edge X, and let A2
-  // and B2 be the points on these lines that are closest to A and B.  The
+  // and B2 be the points on these lines that are closest to A and B. The
   // coverage intervals of A and B can be represented as an interval along
   // each of these lines, centered at A2 and B2.  Let P1 and P2 be the
   // endpoints of the coverage interval for A, and let Q1 and Q2 be the
@@ -1174,7 +1448,7 @@ Excluded TriageVoronoiSiteExclusion(const Vector3<T>& a, const Vector3<T>& b,
   T n2 = n.Norm2();
   T n1 = sqrt(n2);
   // This factor is used in the error terms of dot products with "n" below.
-  T Dn_error = ((3.5 + 2 * sqrt(3.0)) * n1 + 32 * sqrt(3.0) * DBL_ERR) * T_ERR;
+  T Dn_error = ((3.5 + 2 * sqrt(3)) * n1 + 32 * sqrt(3) * DBL_ERR) * T_ERR;
 
   T cos_r = 1 - 0.5 * r2;
   T sin2_r = r2 * (1 - 0.25 * r2);
@@ -1186,7 +1460,7 @@ Excluded TriageVoronoiSiteExclusion(const Vector3<T>& a, const Vector3<T>& b,
   T aDn_error = Dn_error * sqrt(ax2);
   T ra2 = n2sin2_r - aDn2;
   T ra2_error = (8 * DBL_ERR + 4 * T_ERR) * aDn2 +
-      (2 * fabs(aDn) + aDn_error) * aDn_error + 6 * T_ERR * n2sin2_r;
+                (2 * fabs(aDn) + aDn_error) * aDn_error + 6 * T_ERR * n2sin2_r;
   // This is the minimum possible value of ra2, which is used to bound the
   // derivative of sqrt(ra2) in computing ra_error below.
   T min_ra2 = ra2 - ra2_error;
@@ -1200,14 +1474,16 @@ Excluded TriageVoronoiSiteExclusion(const Vector3<T>& a, const Vector3<T>& b,
   T bDn_error = Dn_error * sqrt(bx2);
   T rb2 = n2sin2_r - bDn2;
   T rb2_error = (8 * DBL_ERR + 4 * T_ERR) * bDn2 +
-      (2 * fabs(bDn) + bDn_error) * bDn_error + 6 * T_ERR * n2sin2_r;
+                (2 * fabs(bDn) + bDn_error) * bDn_error + 6 * T_ERR * n2sin2_r;
   T min_rb2 = rb2 - rb2_error;
   if (min_rb2 < 0) return Excluded::UNCERTAIN;
   T rb = sqrt(rb2);
   // Includes the rb2 subtraction error above.
   T rb_error = 1.5 * T_ERR * rb + 0.5 * rb2_error / sqrt(min_rb2);
 
-  // The sign of LHS(3) determines which site may be excluded by the other.
+  // The sign of LHS(3) before taking the absolute value determines which site
+  // may be excluded by the other.  If it is positive then A may be excluded,
+  // and if it is negative then B may be excluded.
   T lhs3 = cos_r * (rb - ra);
   T abs_lhs3 = fabs(lhs3);
   T lhs3_error = cos_r * (ra_error + rb_error) + 3 * T_ERR * abs_lhs3;
@@ -1216,14 +1492,50 @@ Excluded TriageVoronoiSiteExclusion(const Vector3<T>& a, const Vector3<T>& b,
   Vector3<T> aXb = (a - b).CrossProd(a + b);  // 2 * a.CrossProd(b)
   T aXb1 = aXb.Norm();
   T sin_d = 0.5 * aXb.DotProd(n);
-  T sin_d_error = (4 * DBL_ERR + (2.5 + 2 * sqrt(3.0)) * T_ERR) * aXb1 * n1 +
-      16 * sqrt(3.0) * DBL_ERR * T_ERR * (aXb1 + n1);
+  T sin_d_error = (4 * DBL_ERR + (2.5 + 2 * sqrt(3)) * T_ERR) * aXb1 * n1 +
+                  16 * sqrt(3) * DBL_ERR * T_ERR * (aXb1 + n1);
 
   // If LHS(3) is definitely less than RHS(3), neither site excludes the other.
   T result = abs_lhs3 - sin_d;
   T result_error = lhs3_error + sin_d_error;
   if (result < -result_error) return Excluded::NEITHER;
 
+  // d < 0 means that when AB is projected onto the great circle through X0X1,
+  // it proceeds in the opposite direction as X0X1.  Normally we have d > 0
+  // since GetVoronoiSiteExclusion requires d(A,X0) < d(B,X0) (corresponding to
+  // the fact that sites are processed in order of increasing distance from X0).
+  //
+  // However when edge X is long and/or the snap radius "r" is large, there
+  // are two cases where where d < 0 can occur:
+  //
+  // 1. d(A,X0) > Pi/2 and d(B,X1) < Pi/2 or the symmetric case (swap < and >).
+  //    This can only happen when d(X0,X1) + r > Pi/2.  Note that {A,B} may
+  //    project to the interior of edge X or beyond its endpoints.  In this
+  //    case A is kept iff d(A,X0) < Pi/2, and B is kept otherwise.  Note that
+  //    the sign of (rb - ra) is not sufficient to determine which point is
+  //    kept in the situation where d(X0,X1) + r > Pi.
+  //
+  // 2. A is beyond endpoint X0, B is beyond endpoint X1, and AB wraps around
+  //    the sphere in the opposite direction from edge X.  This case can only
+  //    happen when d(X0,X1) + r > Pi.  Here each site is closest to one
+  //    endpoint of X and neither excludes the other.
+  //
+  // The algorithm that handles both cases is to keep A if d(A,X0) < Pi/2 and
+  // to keep B if d(B,X1) < Pi/2.  (One of these conditions is always true.)
+  if (sin_d < -sin_d_error) {
+    // Site A is kept if ca < 0 and excluded if ca > 0.
+    T r90 = S1ChordAngle::Right().length2();
+    int ca = TriageCompareCosDistance(a, x0, r90);
+    int cb = TriageCompareCosDistance(b, x1, r90);
+    if (ca < 0 && cb < 0) return Excluded::NEITHER;
+    if (ca <= 0 && cb <= 0) return Excluded::UNCERTAIN;  // One or both kept?
+    // Since either ca or cb is 1, we know the result even if the distance
+    // comparison for the other site was uncertain.
+    S2_DCHECK(ca <= 0 || cb <= 0);
+    return (ca > 0) ? Excluded::FIRST : Excluded::SECOND;
+  }
+  if (sin_d <= sin_d_error) return Excluded::UNCERTAIN;
+
   // Otherwise, before proceeding further we need to check that |d| <= Pi/2.
   // In fact, |d| < Pi/2 is enough because of the requirement that r < Pi/2.
   // The following expression represents cos(d) after scaling; it is
@@ -1241,28 +1553,6 @@ Excluded TriageVoronoiSiteExclusion(const Vector3<T>& a, const Vector3<T>& b,
   // rare so it seems better to punt.
   if (cos_d < cos_d_error) return Excluded::UNCERTAIN;
 
-  // Normally we have d > 0 because the sites are sorted so that A is closer
-  // to X0 and B is closer to X1.  However if the edge X is longer than Pi/2,
-  // and the sites A and B are beyond its endpoints, then AB can wrap around
-  // the sphere in the opposite direction from X.  In this situation d < 0 but
-  // each site is closest to one endpoint of X, so neither excludes the other.
-  //
-  // It turns out that this can happen only when the site that is further away
-  // from edge X is less than 90 degrees away from whichever endpoint of X it
-  // is closer to.  It is provable that if this distance is less than 90
-  // degrees, then it is also less than r2, and therefore the Voronoi regions
-  // of both sites intersect the edge.
-  if (sin_d < -sin_d_error) {
-    T r90 = S1ChordAngle::Right().length2();
-    // "ca" is negative if Voronoi region A definitely intersects edge X.
-    int ca = (lhs3 < -lhs3_error) ? -1 : TriageCompareCosDistance(a, x0, r90);
-    int cb = (lhs3 > lhs3_error) ? -1 : TriageCompareCosDistance(b, x1, r90);
-    if (ca < 0 && cb < 0) return Excluded::NEITHER;
-    if (ca <= 0 && cb <= 0) return Excluded::UNCERTAIN;
-    if (abs_lhs3 <= lhs3_error) return Excluded::UNCERTAIN;
-  } else if (sin_d <= sin_d_error) {
-    return Excluded::UNCERTAIN;
-  }
   // Now we can finish checking the results of predicate (3).
   if (result <= result_error) return Excluded::UNCERTAIN;
   S2_DCHECK_GT(abs_lhs3, lhs3_error);
@@ -1294,11 +1584,26 @@ Excluded ExactVoronoiSiteExclusion(const Vector3_xf& a, const Vector3_xf& b,
   // eliminate all the square roots, which leads to a polynomial predicate of
   // degree 20 in the input arguments (i.e., degree 4 in each of "a", "b",
   // "x0", "x1", and "r2").
-  //
-  // Before squaring we need to check the sign of each side.  We also check
-  // the condition that cos(d) >= 0.  Given what else we need to compute, it
-  // is cheaper use the identity (aXn).(bXn) = (a.b) |n|^2 - (a.n)(b.n) .
+
+  // Before squaring we need to check the sign of each side.  If the RHS of
+  // (2) is negative (corresponding to sin(d) < 0), then we need to apply the
+  // logic in TriageVoronoiSiteExclusion.
   Vector3_xf n = x0.CrossProd(x1);
+  ExactFloat rhs2 = a.CrossProd(b).DotProd(n);
+  int rhs2_sgn = rhs2.sgn();
+  if (rhs2_sgn < 0) {
+    // This is the d < 0 case.  See comments in TriageVoronoiSiteExclusion.
+    ExactFloat r90 = S1ChordAngle::Right().length2();
+    int ca = ExactCompareDistance(a, x0, r90);
+    int cb = ExactCompareDistance(b, x1, r90);
+    if (ca < 0 && cb < 0) return Excluded::NEITHER;
+    S2_DCHECK(ca != 0 && cb != 0);  // This is guaranteed since d < 0.
+    S2_DCHECK(ca < 0 || cb < 0);    // At least one site must be kept.
+    return (ca > 0) ? Excluded::FIRST : Excluded::SECOND;
+  }
+
+  // We also check that cos(d) >= 0.  Given what else we need to compute, it
+  // is cheaper use the identity (aXn).(bXn) = (a.b) |n|^2 - (a.n)(b.n) .
   ExactFloat n2 = n.Norm2();
   ExactFloat aDn = a.DotProd(n);
   ExactFloat bDn = b.DotProd(n);
@@ -1318,20 +1623,6 @@ Excluded ExactVoronoiSiteExclusion(const Vector3_xf& a, const Vector3_xf& b,
   ExactFloat sb2 = a2 * (n2sin2_r * b2 - bDn * bDn);
   int lhs2_sgn = (sb2 - sa2).sgn();
 
-  // If the RHS of (2) is negative (corresponding to sin(d) < 0), then we need
-  // to consider the possibility that the edge AB wraps around the sphere in
-  // the opposite direction from edge X, with the result that neither site
-  // excludes the other (see TriageVoronoiSiteExclusion).
-  ExactFloat rhs2 = a.CrossProd(b).DotProd(n);
-  int rhs2_sgn = rhs2.sgn();
-  if (rhs2_sgn < 0) {
-    ExactFloat r90 = S1ChordAngle::Right().length2();
-    int ca = (lhs2_sgn < 0) ? -1 : ExactCompareDistance(a, x0, r90);
-    int cb = (lhs2_sgn > 0) ? -1 : ExactCompareDistance(b, x1, r90);
-    if (ca <= 0 && cb <= 0) return Excluded::NEITHER;
-    S2_DCHECK(ca != 1 || cb != 1);
-    return ca == 1 ? Excluded::FIRST : Excluded::SECOND;
-  }
   if (lhs2_sgn == 0) {
     // If the RHS of (2) is zero as well (i.e., d == 0) then both sites are
     // equidistant from every point on edge X.  This case requires symbolic
@@ -1405,11 +1696,11 @@ Excluded GetVoronoiSiteExclusion(const S2Point& a, const S2Point& b,
 
   Excluded result = TriageVoronoiSiteExclusion(a, b, x0, x1, r.length2());
   if (result != Excluded::UNCERTAIN) return result;
-
-  result = TriageVoronoiSiteExclusion(ToLD(a), ToLD(b), ToLD(x0), ToLD(x1),
-                                      ToLD(r.length2()));
-  if (result != Excluded::UNCERTAIN) return result;
-
+  if (kHasLongDouble) {
+    result = TriageVoronoiSiteExclusion(ToLD(a), ToLD(b), ToLD(x0), ToLD(x1),
+                                        ToLD(r.length2()));
+    if (result != Excluded::UNCERTAIN) return result;
+  }
   return ExactVoronoiSiteExclusion(ToExact(a), ToExact(b), ToExact(x0),
                                    ToExact(x1), r.length2());
 }
@@ -1428,59 +1719,253 @@ std::ostream& operator<<(std::ostream& os, Excluded excluded) {
 // tests can use them without putting all the definitions in a header file.
 
 template int TriageCompareCosDistances<double>(
-    const S2Point&, const S2Point&, const S2Point&);
+    const Vector3_d&, const Vector3_d&, const Vector3_d&);
 
 template int TriageCompareCosDistances<long double>(
     const Vector3_ld&, const Vector3_ld&, const Vector3_ld&);
 
 template int TriageCompareSin2Distances<double>(
-    const S2Point&, const S2Point&, const S2Point&);
+    const Vector3_d&, const Vector3_d&, const Vector3_d&);
 
 template int TriageCompareSin2Distances<long double>(
     const Vector3_ld&, const Vector3_ld&, const Vector3_ld&);
 
 template int TriageCompareCosDistance<double>(
-    const S2Point&, const S2Point&, double r2);
+    const Vector3_d&, const Vector3_d&, double r2);
 
 template int TriageCompareCosDistance<long double>(
     const Vector3_ld&, const Vector3_ld&, long double r2);
 
 template int TriageCompareSin2Distance<double>(
-    const S2Point&, const S2Point&, double r2);
+    const Vector3_d&, const Vector3_d&, double r2);
 
 template int TriageCompareSin2Distance<long double>(
     const Vector3_ld&, const Vector3_ld&, long double r2);
 
+template int TriageSignDotProd<double>(const Vector3_d& a, const Vector3_d& b);
+
+template int TriageSignDotProd<long double>(const Vector3_ld& a,
+                                            const Vector3_ld& b);
+
+template int TriageIntersectionOrdering<double>(
+    const Vector3_d& a, const Vector3_d& b, const Vector3_d& c,
+    const Vector3_d& d, const Vector3_d& m, const Vector3_d& n);
+
+template int TriageIntersectionOrdering<long double>(
+    const Vector3_ld& a, const Vector3_ld& b, const Vector3_ld& c,
+    const Vector3_ld& d, const Vector3_ld& m, const Vector3_ld& n);
+
+template int TriageCircleEdgeIntersectionSign<double>(const Vector3_d& a,
+                                                      const Vector3_d& b,
+                                                      const Vector3_d& n,
+                                                      const Vector3_d& x);
+
+template int TriageCircleEdgeIntersectionSign<long double>(const Vector3_ld& a,
+                                                           const Vector3_ld& b,
+                                                           const Vector3_ld& n,
+                                                           const Vector3_ld& x);
+
 template int TriageCompareEdgeDistance<double>(
-    const S2Point& x, const S2Point& a0, const S2Point& a1, double r2);
+    const Vector3_d& x, const Vector3_d& a0, const Vector3_d& a1, double r2);
 
 template int TriageCompareEdgeDistance<long double>(
     const Vector3_ld& x, const Vector3_ld& a0, const Vector3_ld& a1,
     long double r2);
 
 template int TriageCompareEdgeDirections<double>(
-    const S2Point& a0, const S2Point& a1,
-    const S2Point& b0, const S2Point& b1);
+    const Vector3_d& a0, const Vector3_d& a1,
+    const Vector3_d& b0, const Vector3_d& b1);
 
 template int TriageCompareEdgeDirections<long double>(
     const Vector3_ld& a0, const Vector3_ld& a1,
     const Vector3_ld& b0, const Vector3_ld& b1);
 
 template int TriageEdgeCircumcenterSign<double>(
-    const S2Point& x0, const S2Point& x1,
-    const S2Point& a, const S2Point& b, const S2Point& c, int abc_sign);
+    const Vector3_d& x0, const Vector3_d& x1, const Vector3_d& a,
+    const Vector3_d& b, const Vector3_d& c, int abc_sign);
 
 template int TriageEdgeCircumcenterSign<long double>(
-    const Vector3_ld& x0, const Vector3_ld& x1,
-    const Vector3_ld& a, const Vector3_ld& b, const Vector3_ld& c,
-    int abc_sign);
+    const Vector3_ld& x0, const Vector3_ld& x1, const Vector3_ld& a,
+    const Vector3_ld& b, const Vector3_ld& c, int abc_sign);
 
 template Excluded TriageVoronoiSiteExclusion(
-    const S2Point& a, const S2Point& b,
-    const S2Point& x0, const S2Point& x1, double r2);
+    const Vector3_d& a, const Vector3_d& b,
+    const Vector3_d& x0, const Vector3_d& x1, double r2);
 
 template Excluded TriageVoronoiSiteExclusion(
     const Vector3_ld& a, const Vector3_ld& b,
     const Vector3_ld& x0, const Vector3_ld& x1, long double r2);
 
 }  // namespace s2pred
+
+// Gappa proof for TriageIntersectionOrdering
+//
+// # Use IEEE754 double precision, round-to-nearest by default.
+// @rnd = float<ieee_64, ne>;
+//
+// # Five vectors, two forming edges AB, CD and two normals N,M for great
+// # circles.
+// a0 = rnd(a0_ex);
+// a1 = rnd(a1_ex);
+// a2 = rnd(a2_ex);
+// b0 = rnd(b0_ex);
+// b1 = rnd(b1_ex);
+// b2 = rnd(b2_ex);
+// c0 = rnd(c0_ex);
+// c1 = rnd(c1_ex);
+// c2 = rnd(c2_ex);
+// d0 = rnd(d0_ex);
+// d1 = rnd(d1_ex);
+// d2 = rnd(d2_ex);
+// n0 = rnd(n0_ex);
+// n1 = rnd(n1_ex);
+// n2 = rnd(n2_ex);
+// m0 = rnd(m0_ex);
+// m1 = rnd(m1_ex);
+// m2 = rnd(m2_ex);
+//
+// # (AxB)xN     =  (N*A)B - (N*B)*A         -- Lagrange's formula
+// # ((AxB)xN)*M = ((N*A)B - (N*B)*A)*M
+// #             = (N*A)(M*B) - (X*B)(M*A)
+//
+// ndota_ rnd = n0*a0 + n1*a1 + n2*a2;
+// ndotb_ rnd = n0*b0 + n1*b1 + n2*b2;
+// mdota_ rnd = m0*a0 + m1*a1 + m2*a2;
+// mdotb_ rnd = m0*b0 + m1*b1 + m2*b2;
+// prod0_ rnd = ndota_*mdotb_ - ndotb_*mdota_;
+//
+// ndotc_ rnd = n0*c0 + n1*c1 + n2*c2;
+// ndotd_ rnd = n0*d0 + n1*d1 + n2*d2;
+// mdotc_ rnd = m0*c0 + m1*c1 + m2*c2;
+// mdotd_ rnd = m0*d0 + m1*d1 + m2*d2;
+// prod1_ rnd = ndotc_*mdotd_ - ndotd_*mdotc_;
+//
+// diff_ rnd = prod1_ - prod0_;
+//
+// # Compute it all again in exact arithmetic.
+// ndota = n0*a0 + n1*a1 + n2*a2;
+// ndotb = n0*b0 + n1*b1 + n2*b2;
+// mdota = m0*a0 + m1*a1 + m2*a2;
+// mdotb = m0*b0 + m1*b1 + m2*b2;
+// prod0 = ndota*mdotb - ndotb*mdota;
+//
+// ndotc = n0*c0 + n1*c1 + n2*c2;
+// ndotd = n0*d0 + n1*d1 + n2*d2;
+// mdotc = m0*c0 + m1*c1 + m2*c2;
+// mdotd = m0*d0 + m1*d1 + m2*d2;
+// prod1 = ndotc*mdotd - ndotd*mdotc;
+//
+// diff = prod1 - prod0;
+//
+// {
+//   # A,B,C, and D are meant to be normalized S2Point values, so their
+//   # magnitude will be at most 1.  M and N are allowed to be unnormalized cell
+//   # edge normals, so their magnitude can be up to sqrt(2).  In each case the
+//   # components will be at most one.
+//      a0 in [-1, 1]
+//   /\ a1 in [-1, 1]
+//   /\ a2 in [-1, 1]
+//   /\ b0 in [-1, 1]
+//   /\ b1 in [-1, 1]
+//   /\ b2 in [-1, 1]
+//   /\ c0 in [-1, 1]
+//   /\ c1 in [-1, 1]
+//   /\ c2 in [-1, 1]
+//   /\ d0 in [-1, 1]
+//   /\ d1 in [-1, 1]
+//   /\ d2 in [-1, 1]
+//
+//   /\ n0 in [-1, 1]
+//   /\ n1 in [-1, 1]
+//   /\ n2 in [-1, 1]
+//   /\ m0 in [-1, 1]
+//   /\ m1 in [-1, 1]
+//   /\ m2 in [-1, 1]
+//
+//   # We always dot an unnormalized normal against a normalized point so the
+//   # magnitude of the dot product in each case is bounded by sqrt(2).
+//   /\ |ndota_| in [0, 1.4142135623730954]
+//   /\ |ndotb_| in [0, 1.4142135623730954]
+//   /\ |ndotc_| in [0, 1.4142135623730954]
+//   /\ |ndotd_| in [0, 1.4142135623730954]
+//
+//   /\ |mdota_| in [0, 1.4142135623730954]
+//   /\ |mdotb_| in [0, 1.4142135623730954]
+//   /\ |mdotc_| in [0, 1.4142135623730954]
+//   /\ |mdotd_| in [0, 1.4142135623730954]
+//
+//   ->
+//   |diff_ - diff| in ?
+// }
+//
+// > gappa proof.gappa
+// Results:
+//|  diff_ - diff| in [0, 1145679351550454559b-107 {7.06079e-15, 2^(-47.0091)}]
+//
+// >>> 1145679351550454559*2**-107/2**-52
+// 31.79898987322334
+
+// Gappa proof for TriageCircleEdgeIntersectionSign
+//
+// # Use IEEE754 double precision, round-to-nearest by default.
+// @rnd = float<ieee_64, ne>;
+//
+// # Four vectors, two forming an edge AB and two normals (X,Y) for great
+// circles. a0 = rnd(a0_ex); a1 = rnd(a1_ex); a2 = rnd(a2_ex); b0 =
+// rnd(b0_ex); b1 = rnd(b1_ex); b2 = rnd(b2_ex); n0 = rnd(n0_ex); n1 =
+// rnd(n1_ex); n2 = rnd(n2_ex); x0 = rnd(x0_ex); x1 = rnd(x1_ex); x2 =
+// rnd(x2_ex);
+//
+// # (AxB)xX     =  (X*A)B - (X*B)*A         -- Lagrange's formula
+// # ((AxB)xX)*Y = ((X*A)B - (X*B)*A)*Y
+// #             = (X*A)(Y*B) - (X*B)(Y*A)
+//
+// ndota_ rnd = n0*a0 + n1*a1 + n2*a2;
+// ndotb_ rnd = n0*b0 + n1*b1 + n2*b2;
+// xdota_ rnd = x0*a0 + x1*a1 + x2*a2;
+// xdotb_ rnd = x0*b0 + x1*b1 + x2*b2;
+// diff_  rnd = ndota_*xdotb_ - ndotb_*xdota_;
+//
+// # Compute it all again in exact arithmetic.
+// ndota = n0*a0 + n1*a1 + n2*a2;
+// ndotb = n0*b0 + n1*b1 + n2*b2;
+// xdota = x0*a0 + x1*a1 + x2*a2;
+// xdotb = x0*b0 + x1*b1 + x2*b2;
+// diff  = ndota*xdotb - ndotb*xdota;
+//
+// {
+//   # A and B are meant to be normalized S2Point values, so their magnitude
+//   will # be at most 1.  X and Y are allowed to be unnormalized cell edge
+//   normals, so # their magnitude can be up to sqrt(2).  In each case the
+//   components will be # at most one.
+//      a0 in [-1, 1]
+//   /\ a1 in [-1, 1]
+//   /\ a2 in [-1, 1]
+//   /\ b0 in [-1, 1]
+//   /\ b1 in [-1, 1]
+//   /\ b2 in [-1, 1]
+//   /\ n0 in [-1, 1]
+//   /\ n1 in [-1, 1]
+//   /\ n2 in [-1, 1]
+//   /\ x0 in [-1, 1]
+//   /\ x1 in [-1, 1]
+//   /\ x2 in [-1, 1]
+//
+//   # We always dot an unnormalized normal against a normalized point so the
+//   # magnitude of the dot product in each case is bounded by sqrt(2).
+//   /\ |ndota_| in [0, 1.4142135623730954]
+//   /\ |ndotb_| in [0, 1.4142135623730954]
+//   /\ |xdota_| in [0, 1.4142135623730954]
+//   /\ |xdotb_| in [0, 1.4142135623730954]
+//
+//   ->
+//   |diff_ - diff| in ?
+// } 6ms
+//
+// > gappa proof.gappa
+// Results:
+//   |diff_ - diff| in [0, 1001564163474598623b-108 {3.08631e-15,
+//   2^(-48.2031)}]
+//
+// >>> 1001564163474598623*2**-108/2**-52
+// 13.89949493661167
diff --git a/src/s2/s2predicates.h b/src/s2/s2predicates.h
index 9b6509e7..b91d2df3 100644
--- a/src/s2/s2predicates.h
+++ b/src/s2/s2predicates.h
@@ -21,8 +21,24 @@
 // precision or even exact arithmetic when the result is uncertain.  Such
 // predicates are useful in implementing robust algorithms.
 //
-// See also S2EdgeCrosser, which implements various exact
-// edge-crossing predicates more efficiently than can be done here.
+// s2edge_crossings.h contains the following exact predicates that test for
+// edge crossings.  (Note that usually you should use S2EdgeCrosser, which
+// implements them in a much more efficient way.)
+//
+//   int CrossingSign(const S2Point& a0, const S2Point& a1,
+//                    const S2Point& b0, const S2Point& b1);
+//
+//   bool EdgeOrVertexCrossing(const S2Point& a0, const S2Point& a1,
+//                             const S2Point& b0, const S2Point& b1);
+//
+// It also contains the following functions, which compute their result to
+// within a guaranteed tolerance and are consistent with the predicates
+// defined here (including using symbolic perturbations when necessary):
+//
+//   S2Point RobustCrossProd(const S2Point& a, const S2Point& b);
+//
+//   S2Point GetIntersection(const S2Point& a, const S2Point& b,
+//                           const S2Point& c, const S2Point& d);
 //
 // TODO(ericv): Add InCircleSign() (the Voronoi/Delaunay predicate).
 // (This is trickier than the usual textbook implementations because we want
@@ -32,25 +48,19 @@
 #define S2_S2PREDICATES_H_
 
 #include <cfloat>
+#include <cmath>
 #include <iosfwd>
+#include <ostream>
 
+#include "absl/flags/flag.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s2debug.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 
 namespace s2pred {
 
-// S2EdgeUtil contains the following exact predicates that test for edge
-// crossings.  (Usually you will want to use S2EdgeCrosser, which
-// implements them much more efficiently.)
-//
-// int CrossingSign(const S2Point& a0, const S2Point& a1,
-//                  const S2Point& b0, const S2Point& b1);
-//
-// bool EdgeOrVertexCrossing(const S2Point& a0, const S2Point& a1,
-//                           const S2Point& b0, const S2Point& b1);
-
 // Returns +1 if the points A, B, C are counterclockwise, -1 if the points
 // are clockwise, and 0 if any two points are the same.  This function is
 // essentially like taking the sign of the determinant of ABC, except that
@@ -87,6 +97,8 @@ int Sign(const S2Point& a, const S2Point& b, const S2Point& c);
 //  (3) If OrderedCCW(a,b,c,o) && OrderedCCW(c,b,a,o), then a == b == c
 //  (4) If a == b or b == c, then OrderedCCW(a,b,c,o) is true
 //  (5) Otherwise if a == c, then OrderedCCW(a,b,c,o) is false
+//
+// REQUIRES: a != o && b != o && c != o
 bool OrderedCCW(const S2Point& a, const S2Point& b, const S2Point& c,
                 const S2Point& o);
 
@@ -121,6 +133,17 @@ int CompareDistance(const S2Point& x, const S2Point& y, S1ChordAngle r);
 int CompareEdgeDistance(const S2Point& x, const S2Point& a0, const S2Point& a1,
                         S1ChordAngle r);
 
+// Returns -1, 0, or +1 according to whether the distance from edge A edge B
+// is less than, equal to, or greater than "r" respectively.  Distances are
+// measured with respect the positions of all points as though they were
+// projected to lie exactly on the surface of the unit sphere.
+//
+// REQUIRES: A0 and A1 do not project to antipodal points (e.g., A0 == -A1).
+// REQUIRES: B0 and B1 do not project to antipodal points (e.g., B0 == -B1).
+int CompareEdgePairDistance(const S2Point& a0, const S2Point& a1,
+                            const S2Point& b0, const S2Point& b1,
+                            S1ChordAngle r);
+
 // Returns -1, 0, or +1 according to whether the normal of edge A has
 // negative, zero, or positive dot product with the normal of edge B.  This
 // essentially measures whether the edges A and B are closer to proceeding in
@@ -140,6 +163,105 @@ int CompareEdgeDistance(const S2Point& x, const S2Point& a0, const S2Point& a1,
 int CompareEdgeDirections(const S2Point& a0, const S2Point& a1,
                           const S2Point& b0, const S2Point& b1);
 
+// Computes the exact sign of the dot product between A and B.
+//
+// REQUIRES: |a|^2 <= 2 and |b|^2 <= 2
+int SignDotProd(const S2Point& a, const S2Point& b);
+
+// Forms the intersection of edge AB with the great circle specified by normal N
+// as (A×B)×N and computes the sign of that point dotted with Y.
+//
+// When you have an edge you know crosses a cell boundary corresponding to N,
+// then this function can tell you whether the intersection point is to the
+// positive, negative, or exactly on an adjacent side X.  Two such tests can
+// determine if the intersection is in range of the S2Cell along the crossed
+// boundary.
+//
+//                            |     A  |
+//                            | N   •  |
+//                      ------┌────╱───┐------
+//                            │ ↓ •   ←│X
+//                                B
+//
+// The intersection of AxB and N results in two (antipodal) points.  This method
+// allows either of those points to test as contained in the lune, so the
+// ambiguity must be resolved externally.
+//
+// Fortunately, if we have an edge on a face, and it crosses some great circle
+// we take from that face, then we know it can't cross on the antipodal side
+// too, because the edge would be > 180 degrees in length.  So checking manually
+// for an edge crossing before calling is sufficient to avoid any issues.
+//
+// REQUIRES: A and B are not equal or antipodal.
+// REQUIRES: A and B are not coplanar with the plane specified by N
+// REQUIRES: AB crosses N (vertices have opposite dot product signs with N)
+//
+// Returns:
+//   -1 - Intersection was on the negative side of X
+//    0 - Intersection was exactly on X
+//   +1 - Intersection was on the positive side of X
+//
+int CircleEdgeIntersectionSign(const S2Point& a, const S2Point& b,
+                               const S2Point& n, const S2Point& x);
+
+// Given two edges AB and CD that cross a great circle defined by a normal
+// vector M, orders the crossings of AB and CD relative to another great circle
+// N representing a zero point.
+//
+// This predicate can be used in any circumstance where we have an exact normal
+// vector to order edge crossings relative to some zero point.
+//
+// As an example, if we have edges AB and CD that cross boundary 2 of a cell:
+//
+//                             B     D
+//                             •  2  •
+//                            ┌─\───/─┐
+//                          3 │  • •  │ 1
+//                               A C
+//
+// We could order them by using the normal of boundary 2 as M, and the normal of
+// either boundary 1 or 3 as N.  If we use boundary 1 as N, then:
+//
+//   CircleEdgeIntersectionOrdering(A, B, C, D, M, N) == +1
+//
+// Indicating that CD is closer to boundary 1 than AB is.
+//
+// But, if we use boundary 3 as N, then:
+//
+//   CircleEdgeIntersectionOrdering(A, B, C, D, M, N) == -1
+//
+// Indicating that AB is closer to boundary 3 than CD is.
+//
+// These results are consistent but one needs to bear in mind what boundary is
+// being used as the reference.
+//
+// The edges AB and CD should be specified such that A and C are on the positive
+// side of M and B and D are on the negative side, as illustrated above.  This
+// will make the sign of their cross products with M consistent.
+//
+// Because we use a dot product to check the distance from N, this predicate can
+// only unambiguously order along edges within [0,90] degrees if N (both
+// vertices must be in quadrant one of the unit circle).
+//
+// REQUIRES: A and B are not equal or antipodal.
+// REQUIRES: C and D are not equal or antipodal.
+// REQUIRES: M and N are not equal or antipodal.
+// REQUIRES: AB crosses M (vertices have opposite dot product signs with M)
+// REQUIRES: CD crosses M (vertices have opposite dot product signs with M)
+// REQUIRES: A and C are on the positive side of M
+// REQUIRES: B and D are on the negative side of M
+// REQUIRES: Intersection of AB and N is on the positive side of N
+// REQUIRES: Intersection of CD and N is on the positive side of N
+//
+// Returns:
+//   -1 if crossing AB is closer to N than crossing CD
+//    0 if the two edges cross at exactly the same position
+//   +1 if crossing AB is further from N than crossing CD
+//
+int CircleEdgeIntersectionOrdering(const S2Point& a, const S2Point& b,
+                                   const S2Point& c, const S2Point& d,
+                                   const S2Point& m, const S2Point& n);
+
 // Returns Sign(X0, X1, Z) where Z is the circumcenter of triangle ABC.
 // The return value is +1 if Z is to the left of edge X, and -1 if Z is to the
 // right of edge X.  The return value is zero if A == B, B == C, or C == A
@@ -200,7 +322,10 @@ Excluded GetVoronoiSiteExclusion(const S2Point& a, const S2Point& b,
 
 // A more efficient version of Sign that allows the precomputed
 // cross-product of A and B to be specified.  (Unlike the 3 argument
-// version this method is also inlined.)
+// version this method is also inlined.)  Note that "a_cross_b" must be
+// computed using CrossProd rather than S2::RobustCrossProd.
+//
+// REQUIRES: a_cross_b == a.CrossProd(b)
 inline int Sign(const S2Point& a, const S2Point& b, const S2Point& c,
                 const Vector3_d& a_cross_b);
 
@@ -211,6 +336,8 @@ inline int Sign(const S2Point& a, const S2Point& b, const S2Point& c,
 //
 // The purpose of this method is to allow additional cheap tests to be done,
 // where possible, in order to avoid calling ExpensiveSign unnecessarily.
+//
+// REQUIRES: a_cross_b == a.CrossProd(b)
 inline int TriageSign(const S2Point& a, const S2Point& b,
                       const S2Point& c, const Vector3_d& a_cross_b);
 
@@ -260,15 +387,17 @@ inline int TriageSign(const S2Point& a, const S2Point& b,
   //  fl((AxB).C) = (AxB).C + d where |d| <= (2.5 + 2/sqrt(3)) * e
   //
   // which is about 3.6548 * e, or 1.8274 * DBL_EPSILON.
-  const double kMaxDetError = 1.8274 * DBL_EPSILON;
-  S2_DCHECK(S2::IsUnitLength(a));
-  S2_DCHECK(S2::IsUnitLength(b));
-  S2_DCHECK(S2::IsUnitLength(c));
+  //
+  // In order to support vectors of magnitude <= sqrt(2), we double this value.
+  const double kMaxDetError = 3.6548 * DBL_EPSILON;
+  S2_DCHECK_LE(a.Norm2(), 2);
+  S2_DCHECK_LE(b.Norm2(), 2);
+  S2_DCHECK_LE(c.Norm2(), 2);
+  S2_DCHECK_EQ(a_cross_b, a.CrossProd(b));
   double det = a_cross_b.DotProd(c);
 
   // Double-check borderline cases in debug mode.
-  S2_DCHECK(!FLAGS_s2debug ||
-         std::fabs(det) <= kMaxDetError ||
+  S2_DCHECK(!absl::GetFlag(FLAGS_s2debug) || std::fabs(det) <= kMaxDetError ||
          std::fabs(det) >= 100 * kMaxDetError ||
          det * ExpensiveSign(a, b, c) > 0);
 
diff --git a/src/s2/s2predicates_internal.h b/src/s2/s2predicates_internal.h
index 5e193695..b7cd9b30 100644
--- a/src/s2/s2predicates_internal.h
+++ b/src/s2/s2predicates_internal.h
@@ -25,6 +25,7 @@
 
 #include "absl/base/casts.h"
 #include "s2/s1chord_angle.h"
+#include "s2/s2point.h"
 #include "s2/s2predicates.h"
 #include "s2/util/math/exactfloat/exactfloat.h"
 #include "s2/util/math/vector.h"
@@ -47,6 +48,15 @@ template <typename T> constexpr T rounding_epsilon() {
   return epsilon_for_digits(std::numeric_limits<T>::digits);
 }
 
+constexpr double DBL_ERR = rounding_epsilon<double>();
+constexpr long double LD_ERR = rounding_epsilon<long double>();
+constexpr bool kHasLongDouble = (LD_ERR < DBL_ERR);
+
+// Define sqrt(3) as a constant so that we can use it with constexpr.
+// Unfortunately we can't use M_SQRT3 because some client libraries define
+// this symbol without first checking whether it already exists.
+constexpr double kSqrt3 = 1.7320508075688772935274463415058;
+
 using Vector3_ld = Vector3<long double>;
 using Vector3_xf = Vector3<ExactFloat>;
 
@@ -62,6 +72,11 @@ inline static Vector3_xf ToExact(const S2Point& x) {
   return Vector3_xf::Cast(x);
 }
 
+// Efficiently tests whether an ExactFloat vector is (0, 0, 0).
+inline static bool IsZero(const Vector3_xf& a) {
+  return a[0].sgn() == 0 && a[1].sgn() == 0 && a[2].sgn() == 0;
+}
+
 int StableSign(const S2Point& a, const S2Point& b, const S2Point& c);
 
 int ExactSign(const S2Point& a, const S2Point& b, const S2Point& c,
@@ -101,6 +116,33 @@ int TriageCompareEdgeDistance(const Vector3<T>& x, const Vector3<T>& a0,
 int ExactCompareEdgeDistance(const S2Point& x, const S2Point& a0,
                              const S2Point& a1, S1ChordAngle r);
 
+// Computes the sign of a.DotProd(b) and returns it, or 0 if it's within the
+// error margin.
+template <class T>
+int TriageSignDotProd(const Vector3<T>& a, const Vector3<T>& b);
+
+// Returns the sign of a.DotProd(b) using exact arithmetic.
+int ExactSignDotProd(const Vector3_xf& a, const Vector3_xf& b);
+
+// Orders intersections along a great circle relative to some reference point.
+template <class T>
+int TriageIntersectionOrdering(const Vector3<T>& a, const Vector3<T>& b,
+                               const Vector3<T>& c, const Vector3<T>& d,
+                               const Vector3<T>& m, const Vector3<T>& n);
+
+int ExactIntersectionOrdering(const Vector3_xf& a, const Vector3_xf& b,
+                              const Vector3_xf& c, const Vector3_xf& d,
+                              const Vector3_xf& m, const Vector3_xf& n);
+
+// Computes location of the intersection of edge AB with great circle N with
+// respect to the great circles x and y.
+template <class T>
+int TriageCircleEdgeIntersectionSign(const Vector3<T>& a, const Vector3<T>& b,
+                                     const Vector3<T>& n, const Vector3<T>& x);
+
+int ExactCircleEdgeIntersectionSign(const Vector3_xf& a, const Vector3_xf& b,
+                                    const Vector3_xf& n, const Vector3_xf& x);
+
 template <class T>
 int TriageCompareEdgeDirections(
     const Vector3<T>& a0, const Vector3<T>& a1,
diff --git a/src/s2/s2projections.cc b/src/s2/s2projections.cc
index 0dc96e0c..9d6d7dc5 100644
--- a/src/s2/s2projections.cc
+++ b/src/s2/s2projections.cc
@@ -18,7 +18,11 @@
 #include "s2/s2projections.h"
 
 #include <cmath>
+
+#include "s2/r2.h"
+#include "s2/s1angle.h"
 #include "s2/s2latlng.h"
+#include "s2/s2point.h"
 
 using std::fabs;
 
diff --git a/src/s2/s2projections.h b/src/s2/s2projections.h
index 2dcc4f0c..54abe838 100644
--- a/src/s2/s2projections.h
+++ b/src/s2/s2projections.h
@@ -22,6 +22,8 @@
 #ifndef S2_S2PROJECTIONS_H_
 #define S2_S2PROJECTIONS_H_
 
+#include <cmath>
+
 #include "s2/r2.h"
 #include "s2/s2latlng.h"
 #include "s2/s2point.h"
@@ -33,7 +35,7 @@ namespace S2 {
 // behavior along each axis.
 class Projection {
  public:
-  virtual ~Projection() {}
+  virtual ~Projection() = default;
 
   // Converts a point on the sphere to a projected 2D point.
   virtual R2Point Project(const S2Point& p) const = 0;
diff --git a/src/s2/s2r2rect.cc b/src/s2/s2r2rect.cc
index 5bda6be7..39ae4f0e 100644
--- a/src/s2/s2r2rect.cc
+++ b/src/s2/s2r2rect.cc
@@ -17,15 +17,15 @@
 
 #include "s2/s2r2rect.h"
 
-#include <iosfwd>
+#include <ostream>
 
-#include "s2/base/logging.h"
-#include "s2/r1interval.h"
+#include "s2/r2.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2coords.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2point.h"
 
 S2R2Rect S2R2Rect::FromCell(const S2Cell& cell) {
   // S2Cells have a more efficient GetSizeST() method than S2CellIds.
diff --git a/src/s2/s2r2rect.h b/src/s2/s2r2rect.h
index 61d2699e..45e4110f 100644
--- a/src/s2/s2r2rect.h
+++ b/src/s2/s2r2rect.h
@@ -19,6 +19,7 @@
 #define S2_S2R2RECT_H_
 
 #include <iosfwd>
+#include <ostream>
 
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
@@ -26,6 +27,7 @@
 #include "s2/r2.h"
 #include "s2/r2rect.h"
 #include "s2/s1angle.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
 
 class Decoder;
@@ -180,6 +182,9 @@ class S2R2Rect final : public S2Region {
   // Return true if two rectangles contains the same set of points.
   bool operator==(const S2R2Rect& other) const;
 
+  // Return true if two rectangles do not contain the same set of points.
+  bool operator!=(const S2R2Rect& other) const;
+
   // Return true if the x- and y-intervals of the two rectangles are the same
   // up to the given tolerance (see r1interval.h for details).
   bool ApproxEquals(const S2R2Rect& other,
@@ -284,6 +289,9 @@ inline S2R2Rect S2R2Rect::Intersection(const S2R2Rect& other) const {
 inline bool S2R2Rect::operator==(const S2R2Rect& other) const {
   return rect_ == other.rect_;
 }
+inline bool S2R2Rect::operator!=(const S2R2Rect& other) const {
+  return !operator==(other);
+}
 inline bool S2R2Rect::ApproxEquals(const S2R2Rect& other,
                                    S1Angle max_error) const {
   return rect_.ApproxEquals(other.rect_, max_error.radians());
diff --git a/src/s2/s2region.cc b/src/s2/s2region.cc
index 45f05f0c..afb10eaf 100644
--- a/src/s2/s2region.cc
+++ b/src/s2/s2region.cc
@@ -21,6 +21,8 @@
 
 #include "s2/s2cap.h"
 
-void S2Region::GetCellUnionBound(std::vector<S2CellId> *cell_ids) const {
+using std::vector;
+
+void S2Region::GetCellUnionBound(vector<S2CellId> *cell_ids) const {
   return GetCapBound().GetCellUnionBound(cell_ids);
 }
diff --git a/src/s2/s2region.h b/src/s2/s2region.h
index cb1c4893..4369a78f 100644
--- a/src/s2/s2region.h
+++ b/src/s2/s2region.h
@@ -22,10 +22,10 @@
 
 #include "s2/_fp_contract_off.h"
 #include "s2/s1angle.h"
+#include "s2/s2point.h"
 
 class Decoder;
 class Encoder;
-
 class S2Cap;
 class S2Cell;
 class S2CellId;
@@ -43,7 +43,7 @@ class S2Region {
   S2Region() = default;
   S2Region(const S2Region& other) = default;
   S2Region& operator=(const S2Region&) = default;
-  virtual ~S2Region() {}
+  virtual ~S2Region() = default;
 
   // Returns a deep copy of the region.
   //
@@ -79,12 +79,13 @@ class S2Region {
   virtual void GetCellUnionBound(std::vector<S2CellId> *cell_ids) const;
 
   // Returns true if the region completely contains the given cell, otherwise
-  // returns false.
+  // either the region does not contain the cell or the containment relationship
+  // could not be determined.
   virtual bool Contains(const S2Cell& cell) const = 0;
 
   // If this method returns false, the region does not intersect the given
-  // cell.  Otherwise, either region intersects the cell, or the intersection
-  // relationship could not be determined.
+  // cell.  Otherwise, either the region intersects the cell, or the
+  // intersection relationship could not be determined.
   //
   // Note that there is currently exactly one implementation of this method
   // (S2LatLngRect::MayIntersect) that takes advantage of the semantics above
@@ -129,14 +130,6 @@ class S2Region {
   // Returns true on success.
   //
   // bool Decode(Decoder* const decoder);
-
-  // Provides the same functionality as Decode, except that decoded regions
-  // are allowed to point directly into the Decoder's memory buffer rather
-  // than copying the data.  This method can be much faster for regions that
-  // have a lot of data (such as polygons), but the decoded region is only
-  // valid within the scope (lifetime) of the Decoder's memory buffer.
-  //
-  // bool DecodeWithinScope(Decoder* const decoder);
 };
 
 #endif  // S2_S2REGION_H_
diff --git a/src/s2/s2region_coverer.cc b/src/s2/s2region_coverer.cc
index 7bd66ce8..18bfb4c8 100644
--- a/src/s2/s2region_coverer.cc
+++ b/src/s2/s2region_coverer.cc
@@ -18,26 +18,22 @@
 #include "s2/s2region_coverer.h"
 
 #include <algorithm>
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>
-#include <functional>
-#include <queue>
-#include <unordered_set>
+#include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
+
+#include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
-#include "s2/s1angle.h"
-#include "s2/s2cap.h"
+#include "s2/s2cell.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
-#include "s2/s2metrics.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
-#include "absl/base/casts.h"
 
 using std::is_sorted;
 using std::max;
 using std::min;
-using std::unordered_set;
 using std::vector;
 
 // Define storage for header file constants (the values are not needed here).
@@ -248,7 +244,8 @@ void S2RegionCoverer::GetCoveringInternal(const S2Region& region) {
 
   GetInitialCandidates();
   while (!pq_.empty() &&
-         (!interior_covering_ || result_.size() < options_.max_cells())) {
+         (!interior_covering_ ||
+          result_.size() < static_cast<size_t>(options_.max_cells()))) {
     Candidate* candidate = pq_.top().second;
     pq_.pop();
     S2_VLOG(2) << "Pop: " << candidate->cell.id();
@@ -260,14 +257,14 @@ void S2RegionCoverer::GetCoveringInternal(const S2Region& region) {
     // takes care of the situation when we already have more than max_cells()
     // in results (min_level is too high).  Subdividing the candidate with one
     // child does no harm in this case.
-    if (interior_covering_ ||
-        candidate->cell.level() < options_.min_level() ||
+    if (interior_covering_ || candidate->cell.level() < options_.min_level() ||
         candidate->num_children == 1 ||
         (result_.size() + pq_.size() + candidate->num_children <=
-         options_.max_cells())) {
+         static_cast<size_t>(options_.max_cells()))) {
       // Expand this candidate into its children.
       for (int i = 0; i < candidate->num_children; ++i) {
-        if (interior_covering_ && result_.size() >= options_.max_cells()) {
+        if (interior_covering_ &&
+            result_.size() >= static_cast<size_t>(options_.max_cells())) {
           DeleteCandidate(candidate->children[i], true);
         } else {
           AddCandidate(candidate->children[i]);
@@ -346,7 +343,8 @@ bool S2RegionCoverer::IsCanonical(const vector<S2CellId>& covering) const {
   const int min_level = options_.min_level();
   const int max_level = options_.true_max_level();
   const int level_mod = options_.level_mod();
-  const bool too_many_cells = covering.size() > options_.max_cells();
+  const bool too_many_cells =
+      covering.size() > static_cast<size_t>(options_.max_cells());
   int same_parent_count = 1;
   S2CellId prev_id = S2CellId::None();
   for (const S2CellId id : covering) {
@@ -454,9 +452,9 @@ void S2RegionCoverer::CanonicalizeCovering(vector<S2CellId>* covering) {
   } else {
     // Repeatedly replace two adjacent cells in S2CellId order by their lowest
     // common ancestor until the number of cells is acceptable.
-    while (covering->size() > options_.max_cells()) {
+    while (covering->size() > static_cast<size_t>(options_.max_cells())) {
       int best_index = -1, best_level = -1;
-      for (int i = 0; i + 1 < covering->size(); ++i) {
+      for (size_t i = 0; i + 1 < covering->size(); ++i) {
         int level = (*covering)[i].GetCommonAncestorLevel((*covering)[i+1]);
         level = AdjustLevel(level);
         if (level > best_level) {
@@ -485,7 +483,7 @@ void S2RegionCoverer::CanonicalizeCovering(vector<S2CellId>* covering) {
 
 void S2RegionCoverer::FloodFill(const S2Region& region, S2CellId start,
                                 vector<S2CellId>* output) {
-  unordered_set<S2CellId, S2CellIdHash> all;
+  absl::flat_hash_set<S2CellId, S2CellIdHash> all;
   vector<S2CellId> frontier;
   output->clear();
   all.insert(start);
diff --git a/src/s2/s2region_coverer.h b/src/s2/s2region_coverer.h
index fdae8b91..b06e4fcf 100644
--- a/src/s2/s2region_coverer.h
+++ b/src/s2/s2region_coverer.h
@@ -18,17 +18,21 @@
 #ifndef S2_S2REGION_COVERER_H_
 #define S2_S2REGION_COVERER_H_
 
+#include <algorithm>
 #include <cstddef>
 #include <new>
 #include <queue>
 #include <utility>
 #include <vector>
 
+#include "absl/base/casts.h"
 #include "absl/base/macros.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s2cell.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
+#include "s2/s2point.h"
+#include "s2/s2region.h"
 
 class S2Region;
 
@@ -179,7 +183,7 @@ class S2RegionCoverer {
   // Returns an S2CellUnion that covers (GetCovering) or is contained within
   // (GetInteriorCovering) the given region and satisfies the current options.
   //
-  // Note that if options().min_level() > 0 or options().level_mod() > 1, the
+  // Note that if options().min_level() > 0 or options().level_mod() > 1, then
   // by definition the S2CellUnion may not be normalized, i.e. there may be
   // groups of four child cells that can be replaced by their parent cell.
   S2CellUnion GetCovering(const S2Region& region);
@@ -248,10 +252,7 @@ class S2RegionCoverer {
  private:
   struct Candidate {
     void* operator new(std::size_t size, std::size_t max_children) {
-      // dd: CRAN reports a sanitizer error when using this magic, so
-      // we just hard code for the maximum size the comments suggest it will be
-      // return ::operator new (size + max_children * sizeof(Candidate *));
-      return ::operator new (size);
+      return ::operator new (size + max_children * sizeof(Candidate *));
     }
 
     void operator delete(void* p) {
@@ -271,9 +272,7 @@ class S2RegionCoverer {
     S2Cell cell;
     bool is_terminal;        // Cell should not be expanded further.
     int num_children = 0;    // Number of children that intersect the region.
-    // dd: To avoid a sanitizer, we hard-code 64 here rather than rely on
-    // flexible array member magic.
-    Candidate* children[64];  // Actual size may be 0, 4, 16, or 64 elements.
+    Candidate* children[0];  // Actual size may be 0, 4, 16, or 64 elements.
   };
 
   // If the cell intersects the given region, return a new candidate with no
diff --git a/src/s2/s2region_intersection.cc b/src/s2/s2region_intersection.cc
index 623af6a5..cb5c217f 100644
--- a/src/s2/s2region_intersection.cc
+++ b/src/s2/s2region_intersection.cc
@@ -16,17 +16,24 @@
 
 #include "s2/s2region_intersection.h"
 
+#include <memory>
+#include <utility>
+#include <vector>
+
 #include "s2/s2cap.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2point.h"
+#include "s2/s2region.h"
 
+using std::unique_ptr;
 using std::vector;
 
 S2RegionIntersection::S2RegionIntersection(
-    vector<std::unique_ptr<S2Region>> regions) {
+    vector<unique_ptr<S2Region>> regions) {
   Init(std::move(regions));
 }
 
-void S2RegionIntersection::Init(vector<std::unique_ptr<S2Region>> regions) {
+void S2RegionIntersection::Init(vector<unique_ptr<S2Region>> regions) {
   S2_DCHECK(regions_.empty());
   regions_ = std::move(regions);
 }
@@ -38,8 +45,8 @@ S2RegionIntersection::S2RegionIntersection(const S2RegionIntersection& src)
   }
 }
 
-vector<std::unique_ptr<S2Region>> S2RegionIntersection::Release() {
-  vector<std::unique_ptr<S2Region>> result;
+vector<unique_ptr<S2Region>> S2RegionIntersection::Release() {
+  vector<unique_ptr<S2Region>> result;
   result.swap(regions_);
   return result;
 }
diff --git a/src/s2/s2region_intersection.h b/src/s2/s2region_intersection.h
index 7aaf597b..35f4795a 100644
--- a/src/s2/s2region_intersection.h
+++ b/src/s2/s2region_intersection.h
@@ -20,10 +20,12 @@
 #include <memory>
 #include <vector>
 
+#include "absl/base/macros.h"
+
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
-#include "absl/base/macros.h"
 
 class Decoder;
 class Encoder;
diff --git a/src/s2/s2region_term_indexer.cc b/src/s2/s2region_term_indexer.cc
index ed62eb87..ba99ef40 100644
--- a/src/s2/s2region_term_indexer.cc
+++ b/src/s2/s2region_term_indexer.cc
@@ -74,16 +74,22 @@
 
 #include "s2/s2region_term_indexer.h"
 
-#include <cctype>
+#include <string>
+#include <vector>
 
-#include "s2/base/logging.h"
-#include "s2/s1angle.h"
-#include "s2/s2cap.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+
+#include "s2/base/log_severity.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2cell_union.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
-#include "absl/strings/str_cat.h"
+#include "s2/s2region_coverer.h"
 
 using absl::string_view;
+using std::string;
 using std::vector;
 
 S2RegionTermIndexer::Options::Options() {
@@ -95,8 +101,8 @@ S2RegionTermIndexer::Options::Options() {
 }
 
 void S2RegionTermIndexer::Options::set_marker_character(char ch) {
-  S2_DCHECK(!std::isalnum(ch));
-  marker_ = std::string(1, ch);
+  S2_DCHECK(!absl::ascii_isalnum(ch));
+  marker_ = ch;
 }
 
 S2RegionTermIndexer::S2RegionTermIndexer(const Options& options)
@@ -110,7 +116,7 @@ S2RegionTermIndexer::S2RegionTermIndexer(S2RegionTermIndexer&&) = default;
 S2RegionTermIndexer& S2RegionTermIndexer::operator=(S2RegionTermIndexer&&) =
                                                    default;
 
-std::string S2RegionTermIndexer::GetTerm(TermType term_type, const S2CellId& id,
+string S2RegionTermIndexer::GetTerm(TermType term_type, const S2CellId id,
                                     string_view prefix) const {
   // There are generally more ancestor terms than covering terms, so we add
   // the extra "marker" character to the covering terms to distinguish them.
@@ -121,7 +127,7 @@ std::string S2RegionTermIndexer::GetTerm(TermType term_type, const S2CellId& id,
   }
 }
 
-vector<std::string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
+vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
                                                   string_view prefix) {
   // See the top of this file for an overview of the indexing strategy.
   //
@@ -133,7 +139,10 @@ vector<std::string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
   // max_level() != true_max_level() (see S2RegionCoverer::Options).
 
   const S2CellId id(point);
-  vector<std::string> terms;
+  vector<string> terms;
+  terms.reserve((options_.true_max_level() - options_.min_level()) /
+                    options_.level_mod() +
+                1);
   for (int level = options_.min_level(); level <= options_.max_level();
        level += options_.level_mod()) {
     terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
@@ -141,7 +150,7 @@ vector<std::string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
   return terms;
 }
 
-vector<std::string> S2RegionTermIndexer::GetIndexTerms(const S2Region& region,
+vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Region& region,
                                                   string_view prefix) {
   // Note that options may have changed since the last call.
   *coverer_.mutable_options() = options_;
@@ -149,7 +158,7 @@ vector<std::string> S2RegionTermIndexer::GetIndexTerms(const S2Region& region,
   return GetIndexTermsForCanonicalCovering(covering, prefix);
 }
 
-vector<std::string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
+vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
     const S2CellUnion& covering, string_view prefix) {
   // See the top of this file for an overview of the indexing strategy.
   //
@@ -165,7 +174,10 @@ vector<std::string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
     *coverer_.mutable_options() = options_;
     S2_CHECK(coverer_.IsCanonical(covering));
   }
-  vector<std::string> terms;
+  vector<string> terms;
+  // `covering.size()` is necessary.  Double it because we'll probably add
+  // more.  This could probably reasonably be even higher.
+  terms.reserve(2 * covering.size());
   S2CellId prev_id = S2CellId::None();
   int true_max_level = options_.true_max_level();
   for (S2CellId id : covering) {
@@ -198,12 +210,17 @@ vector<std::string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
   return terms;
 }
 
-vector<std::string> S2RegionTermIndexer::GetQueryTerms(const S2Point& point,
+vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Point& point,
                                                   string_view prefix) {
   // See the top of this file for an overview of the indexing strategy.
 
   const S2CellId id(point);
-  vector<std::string> terms;
+  vector<string> terms;
+  terms.reserve(options_.index_contains_points_only()
+                    ? 1
+                    : ((options_.true_max_level() - options_.min_level()) /
+                           options_.level_mod() +
+                       2));
   // Recall that all true_max_level() cells are indexed only as ancestor terms.
   int level = options_.true_max_level();
   terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
@@ -216,7 +233,7 @@ vector<std::string> S2RegionTermIndexer::GetQueryTerms(const S2Point& point,
   return terms;
 }
 
-vector<std::string> S2RegionTermIndexer::GetQueryTerms(const S2Region& region,
+vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Region& region,
                                                   string_view prefix) {
   // Note that options may have changed since the last call.
   *coverer_.mutable_options() = options_;
@@ -224,7 +241,7 @@ vector<std::string> S2RegionTermIndexer::GetQueryTerms(const S2Region& region,
   return GetQueryTermsForCanonicalCovering(covering, prefix);
 }
 
-vector<std::string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
+vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
     const S2CellUnion& covering, string_view prefix) {
   // See the top of this file for an overview of the indexing strategy.
 
@@ -232,7 +249,8 @@ vector<std::string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
     *coverer_.mutable_options() = options_;
     S2_CHECK(coverer_.IsCanonical(covering));
   }
-  vector<std::string> terms;
+  vector<string> terms;
+  terms.reserve(2 * covering.size());
   S2CellId prev_id = S2CellId::None();
   int true_max_level = options_.true_max_level();
   for (S2CellId id : covering) {
diff --git a/src/s2/s2region_term_indexer.h b/src/s2/s2region_term_indexer.h
index ae71990c..de2c409a 100644
--- a/src/s2/s2region_term_indexer.h
+++ b/src/s2/s2region_term_indexer.h
@@ -60,7 +60,7 @@
 //   // This class is intended to be used with an external key-value store,
 //   // but for this example will we use an unordered_map.  The key is an
 //   // index term, and the value is a set of document ids.
-//   std::unordered_map<std::string, std::vector<int>> index;
+//   std::unordered_map<string, std::vector<int>> index;
 //
 //   // Create an indexer that uses up to 10 cells to approximate each region.
 //   S2RegionTermIndexer::Options options;
@@ -100,10 +100,13 @@
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
+
+#include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
 #include "s2/s2region_coverer.h"
-#include "absl/strings/string_view.h"
 
 class S2RegionTermIndexer {
  public:
@@ -215,14 +218,14 @@ class S2RegionTermIndexer {
     //
     // REQUIRES: "ch" is non-alphanumeric.
     // DEFAULT: '$'
-    const std::string& marker() const { return marker_; }
-    char marker_character() const { return marker_[0]; }
+    absl::string_view marker() const { return absl::string_view(&marker_, 1); }
+    char marker_character() const { return marker_; }
     void set_marker_character(char ch);
 
    private:
     bool points_only_ = false;
     bool optimize_for_space_ = false;
-    std::string marker_ = std::string(1, '$');
+    char marker_ = '$';
   };
 
   // Default constructor.  Options can be set using mutable_options().
@@ -251,7 +254,7 @@ class S2RegionTermIndexer {
   // parking lots, etc).  The prefix should be kept short since it is
   // prepended to every term.
   std::vector<std::string> GetIndexTerms(const S2Region& region,
-                                    absl::string_view prefix);
+                                         absl::string_view prefix);
 
   // Converts a given query region into a set of terms.  If you compute the
   // union of all the documents associated with these terms, the result will
@@ -259,7 +262,7 @@ class S2RegionTermIndexer {
   //
   // "prefix" should match the corresponding value used when indexing.
   std::vector<std::string> GetQueryTerms(const S2Region& region,
-                                    absl::string_view prefix);
+                                         absl::string_view prefix);
 
   // Convenience methods that accept an S2Point rather than S2Region.  (These
   // methods are also faster.)
@@ -267,9 +270,9 @@ class S2RegionTermIndexer {
   // Note that you can index an S2LatLng by converting it to an S2Point first:
   //     auto terms = GetIndexTerms(S2Point(latlng), ...);
   std::vector<std::string> GetIndexTerms(const S2Point& point,
-                                    absl::string_view prefix);
+                                         absl::string_view prefix);
   std::vector<std::string> GetQueryTerms(const S2Point& point,
-                                    absl::string_view prefix);
+                                         absl::string_view prefix);
 
   // Low-level methods that accept an S2CellUnion covering of the region to be
   // indexed or queried.
@@ -289,8 +292,8 @@ class S2RegionTermIndexer {
  private:
   enum TermType { ANCESTOR, COVERING };
 
-  std::string GetTerm(TermType term_type, const S2CellId& id,
-                 absl::string_view prefix) const;
+  std::string GetTerm(TermType term_type, const S2CellId id,
+                      absl::string_view prefix) const;
 
   Options options_;
   S2RegionCoverer coverer_;
diff --git a/src/s2/s2region_union.cc b/src/s2/s2region_union.cc
index cd104b04..7b93ccb9 100644
--- a/src/s2/s2region_union.cc
+++ b/src/s2/s2region_union.cc
@@ -17,16 +17,23 @@
 
 #include "s2/s2region_union.h"
 
+#include <memory>
+#include <utility>
+#include <vector>
+
 #include "s2/s2cap.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2point.h"
+#include "s2/s2region.h"
 
+using std::unique_ptr;
 using std::vector;
 
-S2RegionUnion::S2RegionUnion(vector<std::unique_ptr<S2Region>> regions) {
+S2RegionUnion::S2RegionUnion(vector<unique_ptr<S2Region>> regions) {
   Init(std::move(regions));
 }
 
-void S2RegionUnion::Init(vector<std::unique_ptr<S2Region>> regions) {
+void S2RegionUnion::Init(vector<unique_ptr<S2Region>> regions) {
   S2_DCHECK(regions_.empty());
   regions_ = std::move(regions);
 }
@@ -38,13 +45,13 @@ S2RegionUnion::S2RegionUnion(const S2RegionUnion& src)
   }
 }
 
-vector<std::unique_ptr<S2Region>> S2RegionUnion::Release() {
-  vector<std::unique_ptr<S2Region>> result;
+vector<unique_ptr<S2Region>> S2RegionUnion::Release() {
+  vector<unique_ptr<S2Region>> result;
   result.swap(regions_);
   return result;
 }
 
-void S2RegionUnion::Add(std::unique_ptr<S2Region> region) {
+void S2RegionUnion::Add(unique_ptr<S2Region> region) {
   regions_.push_back(std::move(region));
 }
 
diff --git a/src/s2/s2region_union.h b/src/s2/s2region_union.h
index 1ff0cfeb..cd610445 100644
--- a/src/s2/s2region_union.h
+++ b/src/s2/s2region_union.h
@@ -21,10 +21,12 @@
 #include <memory>
 #include <vector>
 
+#include "absl/base/macros.h"
+
 #include "s2/base/logging.h"
 #include "s2/_fp_contract_off.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
-#include "absl/base/macros.h"
 
 class Decoder;
 class Encoder;
@@ -33,7 +35,19 @@ class S2Cell;
 class S2LatLngRect;
 
 // An S2RegionUnion represents a union of possibly overlapping regions.
-// It is convenient for computing a covering of a set of regions.
+// It is convenient for computing a covering of a set of regions.  However, note
+// that currently, using S2RegionCoverer to compute coverings of S2RegionUnions
+// may produce coverings with considerably less than the requested number of
+// cells in cases of overlapping or tiling regions.  This occurs because the
+// current S2RegionUnion.Contains implementation for S2Cells only returns
+// true if the cell is fully contained by one of the regions.  So, cells along
+// internal boundaries in the region union will be subdivided by the coverer
+// even though this is unnecessary, using up the maxSize cell budget.  Then,
+// when the coverer normalizes the covering, groups of 4 sibling cells along
+// these internal borders will be replaced by parents, resulting in coverings
+// that may have significantly fewer than maxSize cells, and so are less
+// accurate.  This is not a concern for unions of disjoint regions.
+//
 class S2RegionUnion final : public S2Region {
  public:
   // Create an empty region.  Can be made non-empty by calling Init() or Add().
@@ -43,7 +57,7 @@ class S2RegionUnion final : public S2Region {
   explicit S2RegionUnion(std::vector<std::unique_ptr<S2Region>> regions);
 
   // Use {} instead of = default to work around gcc bug.
-  ~S2RegionUnion() override {}
+  ~S2RegionUnion() override = default;
 
   // Initialize region by taking ownership of the given regions.
   void Init(std::vector<std::unique_ptr<S2Region>> regions);
@@ -59,6 +73,7 @@ class S2RegionUnion final : public S2Region {
   // Accessor methods.
   int num_regions() const { return regions_.size(); }
   const S2Region* region(int i) const { return regions_[i].get(); }
+  S2Region* mutable_region(int i) { return regions_[i].get(); }
 
   ////////////////////////////////////////////////////////////////////////
   // S2Region interface (see s2region.h for details):
@@ -67,6 +82,9 @@ class S2RegionUnion final : public S2Region {
   S2Cap GetCapBound() const override;
   S2LatLngRect GetRectBound() const override;
   bool Contains(const S2Point& p) const override;
+
+  // The current implementation only returns true if one of the regions in the
+  // union fully contains the cell.
   bool Contains(const S2Cell& cell) const override;
   bool MayIntersect(const S2Cell& cell) const override;
 
diff --git a/src/s2/s2shape.h b/src/s2/s2shape.h
index 88219191..111d5590 100644
--- a/src/s2/s2shape.h
+++ b/src/s2/s2shape.h
@@ -18,9 +18,14 @@
 #ifndef S2_S2SHAPE_H_
 #define S2_S2SHAPE_H_
 
+#include <iterator>
+
 #include "s2/base/integral_types.h"
+#include "s2/base/logging.h"
+#include "s2/s2coder.h"
 #include "s2/s2point.h"
 #include "s2/s2pointutil.h"
+#include "s2/util/coding/coder.h"
 
 // The purpose of S2Shape is to represent polygonal geometry in a flexible
 // way.  It is organized as a collection of edges that optionally defines an
@@ -61,12 +66,27 @@ class S2Shape {
     Edge() = default;
     Edge(const S2Point& _v0, const S2Point& _v1) : v0(_v0), v1(_v1) {}
 
+    // Returns the edge with the vertices reversed.
+    Edge Reversed() const { return {v1, v0}; }
+
+    // Returns true if the edge is degenerate.
+    bool IsDegenerate() const { return v0 == v1; }
+
     // TODO(ericv): Define all 6 comparisons.
     friend bool operator==(const Edge& x, const Edge& y) {
       return x.v0 == y.v0 && x.v1 == y.v1;
     }
+
+    friend bool operator!=(const Edge& x, const Edge& y) { return !(x == y); }
+
     friend bool operator<(const Edge& x, const Edge& y) {
-      return x.v0 < y.v0 || (x.v0 == y.v0 && x.v1 < y.v1); }
+      return x.v0 < y.v0 || (x.v0 == y.v0 && x.v1 < y.v1);
+    }
+
+    template <typename H>
+    friend H AbslHashValue(H h, const Edge& e) {
+      return H::combine(std::move(h), e.v0, e.v1);
+    }
   };
 
   // A range of edge ids corresponding to a chain of zero or more connected
@@ -127,17 +147,23 @@ class S2Shape {
   // Indicates that a given S2Shape type cannot be encoded.
   static constexpr TypeTag kNoTypeTag = 0;
 
+  // The following constant should be updated whenever new types are added.
+  static constexpr TypeTag kNextAvailableTypeTag = 6;
+
   // The minimum allowable tag for user-defined S2Shape types.
   static constexpr TypeTag kMinUserTypeTag = 8192;
 
   S2Shape() : id_(-1) {}
-  virtual ~S2Shape() {}
+  virtual ~S2Shape() = default;
 
-  // Returns the number of edges in this shape.  Edges have ids ranging from 0
-  // to num_edges() - 1.
+  // Returns the number of edges in this shape, or points, if the shape's
+  // dimension is 0.  Edges or points have ids ranging from 0 to
+  // num_edges() - 1.
   virtual int num_edges() const = 0;
 
-  // Returns the endpoints of the given edge id.
+  // Returns the edge or point for the given edge id.  Points are represented as
+  // degenerate edges, with equal endpoints, but not all degenerate edges are
+  // points.
   //
   // REQUIRES: 0 <= id < num_edges()
   virtual Edge edge(int edge_id) const = 0;
@@ -169,7 +195,8 @@ class S2Shape {
   bool is_empty() const {
     return num_edges() == 0 && (dimension() < 2 || num_chains() == 0);
   }
-  // Returns true if the shape contains all points on the sphere.
+  // Returns true if the shape contains all points on the sphere and has no
+  // edges.
   bool is_full() const {
     return num_edges() == 0 && dimension() == 2 && num_chains() > 0;
   }
@@ -229,6 +256,26 @@ class S2Shape {
   // S2Shape (see TypeTag above).
   virtual TypeTag type_tag() const { return kNoTypeTag; }
 
+  // Appends an encoded representation of the S2Shape to "encoder".  Note that
+  // the encoding does *not* include the type_tag(), so the tag will need to
+  // be encoded separately if the shape type will be unknown at decoding time
+  // (see s2shapeutil::EncodeTaggedShapes() and related functions).
+  //
+  // The encoded representation should satisfy the following:
+  //
+  //  - It should include a version number, so that the encoding may be changed
+  //    or improved in the future.
+  //
+  //  - If "hint" is CodingHint::FAST, the encoding should be optimized for
+  //    decoding performance.  If "hint" is CodingHint::COMPACT, the encoding
+  //    should be optimized for space.
+  //
+  // REQUIRES: "encoder" uses the default constructor, so that its buffer
+  //           can be enlarged as necessary by calling Ensure(int).
+  virtual void Encode(Encoder* encoder, s2coding::CodingHint hint) const {
+    S2_DLOG(FATAL) << "Encoding not implemented for this S2Shape type";
+  }
+
   // Virtual methods that return pointers of your choice.  These methods are
   // intended to help with the problem of attaching additional data to S2Shape
   // objects.  For example, you could return a pointer to a source object, or
@@ -268,16 +315,381 @@ class S2Shape {
   virtual const void* user_data() const { return nullptr; }
   virtual void* mutable_user_data() { return nullptr; }
 
- private:
-  // Next available type tag available for use within the S2 library: 6.
+  // Convenience method that returns the edge id of next edge in a chain.  Wraps
+  // around at the start/end of any closed chains.
+  //
+  // This is intended for one-off lookups, as it has to look up the chain for
+  // the edge every time.  If you want many lookups or to iterate the edges of a
+  // chain, then it's better to do that directly.
+  //
+  // Return -1 when the end of an open chain is reached. Polygon and closed
+  // polyline chains wrap around to the beginning and thus never return -1,
+  // while points always do.
+  int NextEdgeWrap(int edge_id) const;
 
+  // Convenience method that returns the edge id of previous edge in a chain.
+  // Wraps around at the start/end of any closed chains/
+  //
+  // This is intended for one-off lookups, as it has to look up the chain for
+  // the edge every time.  If you want many lookups or to iterate the edges of a
+  // chain, then it's better to do that directly.
+  //
+  // Return -1 when the start of an open chain is reached. Polygon and closed
+  // polyline chains wrap around to the end and thus never return -1, while
+  // points always do.
+  int PrevEdgeWrap(int edge_id) const;
+
+  // ChainVertexIterator allows the use of iterator syntax for accessing
+  // vertices of a shape's chain, e.g.:
+  //
+  //   S2Shape::Chain chain = shape->chain(chain_id);
+  //   for (const S2Point& p : shape.vertices(chain)) { ... }
+  // or
+  //   int chain_id = ...
+  //   for (const S2Point& p : shape.vertices(chain_id)) { ... }
+  //
+  // ChainVertexIterator supports dereference, increment and equality/inequality
+  // operators.
+  //
+  // A ChainVertexIterator is valid as long as the shape it has been created
+  // from exists. It doesn't own the shape it points to, so destroying it has no
+  // effect on the shape.
+  class ChainVertexIterator {
+   public:
+    using difference_type = std::ptrdiff_t;
+    using iterator_category = std::forward_iterator_tag;
+    using pointer = const S2Point*;
+    using reference = const S2Point&;
+    using value_type = S2Point;
+
+    ChainVertexIterator() = default;
+
+    // Creates iterator pointing to the chain vertex with given index. The index
+    // is zero-based vertex offset from the beginning of the chain.
+    ChainVertexIterator(const S2Shape* shape, const Chain& chain,
+                        int vertex_index);
+
+    // Dereference operators.
+    reference operator*() const;
+    pointer operator->() const;
+
+    // Prefix and postfix increment operators.
+    ChainVertexIterator& operator++();
+    ChainVertexIterator operator++(int);
+
+    // Equality operator overload.
+    bool operator==(ChainVertexIterator it) const;
+
+    // Inquality operator overload.
+    bool operator!=(ChainVertexIterator it) const;
+
+   private:
+    const S2Shape* shape_ = nullptr;
+    Chain chain_;
+
+    // Offset of the current vertex in the chain.
+    int vertex_index_ = 0;
+
+    // Cached copy of the edge used to access the current vertex.
+    Edge edge_;
+
+    // Offset of the current edge in the chain.
+    int edge_offset_ = -1;
+
+    // Offset of the current vertex in the current edge (0 for v0, 1 for v1).
+    int edge_vertex_ = 0;
+
+    // Updates the cached edge data if necessary, to make sure that the current
+    // edge contains the current vertex.
+    void UpdateCurrentEdge();
+  };
+
+  // ChainVertexRange provides access to the iterable vertex range of the given
+  // chain:
+  //
+  //   S2Shape::ChainVertexRange vertices(shape, chain);
+  //   for (const S2Point& p : vertices) { ... }
+  //
+  // A ChainVertexRange is valid as long as the shape it has been created from
+  // exists. It doesn't own the shape it points to, so destroying it has no
+  // effect on the shape.
+  class ChainVertexRange {
+   public:
+    ChainVertexRange() = default;
+
+    // Creates the vertex range for the given chain of the shape.
+    ChainVertexRange(const S2Shape* shape, const Chain& chain);
+
+    // Returns the vertex iterator pointing to the first vertex of the chain.
+    ChainVertexIterator begin() const;
+
+    // Returns the vertex iterator pointing to the end of the chain.
+    ChainVertexIterator end() const;
+
+    // Returns the number of vertices in the chain.
+    // Note: in case of polyline shapes (shape->dimension() == 1) the number of
+    // vertices differs from the number of edges by 1.
+    int num_vertices() const;
+
+   private:
+    const S2Shape* shape_ = nullptr;
+    Chain chain_;
+  };
+
+  // Returns the iterable vertex range for given chain.
+  ChainVertexRange vertices(const Chain& chain) const;
+
+  // Returns the iterable vertex range for given chain id.
+  ChainVertexRange vertices(int chain_id) const;
+
+  // ChainIterator allows iterating over the chains of the shape using
+  // range-based for loops:
+  //
+  //   for (const S2Shape::Chain& chain : shape) { ... }
+  //
+  // A ChainIterator is valid as long as the shape it has been created from
+  // exists. It doesn't own the shape it points to, so destroying it has no
+  // effect on the shape.
+  class ChainIterator {
+   public:
+    using difference_type = std::ptrdiff_t;
+    using iterator_category = std::forward_iterator_tag;
+    using pointer = Chain*;
+    using reference = Chain;
+    using value_type = Chain;
+
+    ChainIterator() = default;
+
+    // Creates the iterator pointing to the shapes' chain with given chain id.
+    ChainIterator(const S2Shape* shape, int chain_id)
+        : shape_(shape), chain_id_(chain_id) {}
+
+    // Dereference operator.
+    reference operator*() const;
+
+    // Prefix and postfix increment operators.
+    ChainIterator& operator++();
+    ChainIterator operator++(int);
+
+    // REQUIRES: "it" and *this must reference the same S2Shape.
+    bool operator==(ChainIterator it) const;
+
+    // REQUIRES: "it" and *this must reference the same S2Shape.
+    bool operator!=(ChainIterator it) const;
+
+   private:
+    const S2Shape* shape_ = nullptr;
+    int chain_id_ = 0;
+  };
+
+  // ChainRange provides access to the iterable range of chains of the given
+  // shape:
+  //
+  //   S2Shape::ChainRange chains(shape);
+  //   for (const S2Shape::Chain& chain : chains) { ... }
+  //
+  // or:
+  //
+  //   for (const S2Shape::Chain& chain : shape->chains()) { ... }
+  //
+  // A ChainRange is valid as long as the shape it has been created from exists.
+  // It doesn't own the shape it points to, so destroying it has no effect on
+  // the shape.
+  class ChainRange {
+   public:
+    ChainRange() = default;
+
+    // Creates an instance of ChainRange for given shape.
+    explicit ChainRange(const S2Shape* shape) : shape_(shape) {}
+
+    // Returns the chain iterator pointing to the first chain of the shape.
+    ChainIterator begin() const;
+
+    // Returns the chain iterator pointing to the end of the chain range.
+    ChainIterator end() const;
+
+   private:
+    const S2Shape* shape_ = nullptr;
+  };
+
+  // Returns the chain range of the shape.
+  ChainRange chains() const;
+
+ protected:
+  // S2Shape has some state used by the S2ShapeIndex classes.  If we want to be
+  // able to move or copy derived classes, we need to have those operations
+  // available on S2Shape as well.  Having them defined however presents the
+  // risk of accidental slicing as in:
+  //
+  //     S2Shape& a = S2LaxPolygon(...);
+  //     S2Shape& b = S2LaxPolygon(...);
+  //     a = b; // <-- Does not call S2LaxPolygon's copy assignment.
+  //
+  // So we make these protected to allow Derived classes to decide on their
+  // own move/copy semantics without exposing them to broader use.
+  S2Shape(S2Shape&&) = default;
+  S2Shape(const S2Shape&) = default;
+  S2Shape& operator=(const S2Shape&) = default;
+  S2Shape& operator=(S2Shape&&) = default;
+
+ private:
   friend class EncodedS2ShapeIndex;
   friend class MutableS2ShapeIndex;
 
   int id_;  // Assigned by S2ShapeIndex when the shape is added.
-
-  S2Shape(const S2Shape&) = delete;
-  void operator=(const S2Shape&) = delete;
 };
 
+//////////////////   Implementation details follow   ////////////////////
+
+inline S2Shape::ChainIterator::reference S2Shape::ChainIterator::operator*()
+    const {
+  return shape_->chain(chain_id_);
+}
+
+inline S2Shape::ChainIterator& S2Shape::ChainIterator::operator++() {
+  ++chain_id_;
+  return *this;
+}
+
+inline S2Shape::ChainIterator S2Shape::ChainIterator::operator++(int) {
+  return ChainIterator(shape_, chain_id_++);
+}
+
+inline bool S2Shape::ChainIterator::operator==(ChainIterator it) const {
+  return chain_id_ == it.chain_id_ && shape_ == it.shape_;
+}
+
+inline bool S2Shape::ChainIterator::operator!=(ChainIterator it) const {
+  return !(*this == it);
+}
+
+inline S2Shape::ChainRange S2Shape::chains() const { return ChainRange(this); }
+
+inline S2Shape::ChainIterator S2Shape::ChainRange::begin() const {
+  return ChainIterator(shape_, 0);
+}
+
+inline S2Shape::ChainIterator S2Shape::ChainRange::end() const {
+  return ChainIterator(shape_, shape_->num_chains());
+}
+
+inline S2Shape::ChainVertexIterator::ChainVertexIterator(const S2Shape* shape,
+                                                         const Chain& chain,
+                                                         int vertex_index)
+    : shape_(shape), chain_(chain), vertex_index_(vertex_index) {
+  UpdateCurrentEdge();
+}
+
+inline S2Shape::ChainVertexIterator::reference
+S2Shape::ChainVertexIterator::operator*() const {
+  return edge_vertex_ ? edge_.v1 : edge_.v0;
+}
+
+inline S2Shape::ChainVertexIterator::pointer
+S2Shape::ChainVertexIterator::operator->() const {
+  return edge_vertex_ ? &edge_.v1 : &edge_.v0;
+}
+
+inline S2Shape::ChainVertexIterator&
+S2Shape::ChainVertexIterator::operator++() {
+  ++vertex_index_;
+  UpdateCurrentEdge();
+  return *this;
+}
+
+inline S2Shape::ChainVertexIterator S2Shape::ChainVertexIterator::operator++(
+    int) {
+  auto result = *this;
+  ++vertex_index_;
+  UpdateCurrentEdge();
+  return result;
+}
+
+inline bool S2Shape::ChainVertexIterator::operator==(
+    S2Shape::ChainVertexIterator it) const {
+  return vertex_index_ == it.vertex_index_ && chain_.start == it.chain_.start &&
+         shape_ == it.shape_;
+}
+
+inline bool S2Shape::ChainVertexIterator::operator!=(
+    S2Shape::ChainVertexIterator it) const {
+  return !(*this == it);
+}
+
+// This function makes sure that cached copy of the edge contains the vertex
+// at the current vertex index. The cached copy is updated only if needed.
+// Except for the last edge of even-sized chains, only the edges with even
+// offsets are cached.
+//
+// We want to cache the current edge for two reasons:
+//  (1) use only even edges to cut the edge generation in half;
+//  (2) be able to return pointer/reference to S2Point in ChainVertexIterator.
+inline void S2Shape::ChainVertexIterator::UpdateCurrentEdge() {
+  // No need to update in case of the iterator pointing beyond the end of the
+  // range.
+  if (vertex_index_ > chain_.length) {
+    return;
+  }
+
+  // Compute the new edge offset.
+  int edge_offset = 0;
+
+  // For point shapes (dim == 0) the edge offset is always 0.
+  if (shape_->dimension() > 0) {
+    // An edge with even offset (0, 2, ...) can provide access to two vertices
+    // at once.
+    edge_offset = vertex_index_ % 2 ? vertex_index_ - 1 : vertex_index_;
+    if (edge_offset < chain_.length) {
+      // Vertices with odd indices are accessed via v1, and those with even
+      // indices via v0 of the even edge.
+      edge_vertex_ = vertex_index_ % 2;
+    } else {
+      // A corner case: the last vertex of a chain with even number of edges
+      // can be accessed only via the last edge (which has an odd offset).
+      edge_offset = chain_.length - 1;
+      edge_vertex_ = 1;
+    }
+  }
+  // Update the cached copy of the edge only if needed.
+  if (edge_offset_ != edge_offset) {
+    edge_offset_ = edge_offset;
+    edge_ = shape_->edge(chain_.start + edge_offset_);
+  }
+}
+
+inline S2Shape::ChainVertexRange::ChainVertexRange(const S2Shape* shape,
+                                                   const Chain& chain)
+    : shape_(shape), chain_(chain) {}
+
+inline S2Shape::ChainVertexIterator S2Shape::ChainVertexRange::begin() const {
+  return ChainVertexIterator(shape_, chain_, 0);
+}
+
+inline S2Shape::ChainVertexIterator S2Shape::ChainVertexRange::end() const {
+  return ChainVertexIterator(shape_, chain_, num_vertices());
+}
+
+inline int S2Shape::ChainVertexRange::num_vertices() const {
+  // The number of vertices for point shapes (dimension = 0) is always 1, and it
+  // is equal to the chain length, which is also = 1 for the point shapes.
+  //
+  // For the polygon shapes (dimension = 2) the number of vertices is equal to
+  // the number of edges which is given by the chain length. For example, a
+  // rectangle has 4 edges and 4 vertices.
+  //
+  // For polyline shape (dimension = 1) the number of vertices equals the number
+  // of edges + 1 (one vertex at the start of each edge, plus one exta vertex
+  // at the end of the last edge), hence chain_.length + 1.
+  return shape_->dimension() == 1 ? chain_.length + 1 : chain_.length;
+}
+
+inline S2Shape::ChainVertexRange S2Shape::vertices(
+    const S2Shape::Chain& chain) const {
+  return ChainVertexRange(this, chain);
+}
+
+inline S2Shape::ChainVertexRange S2Shape::vertices(int chain_id) const {
+  return vertices(chain(chain_id));
+}
+
 #endif  // S2_S2SHAPE_H_
diff --git a/src/s2/s2shape_index.cc b/src/s2/s2shape_index.cc
index b523f110..4e0fd186 100644
--- a/src/s2/s2shape_index.cc
+++ b/src/s2/s2shape_index.cc
@@ -17,6 +17,11 @@
 
 #include "s2/s2shape_index.h"
 
+#include "s2/base/integral_types.h"
+#include "s2/util/coding/coder.h"
+#include "s2/util/coding/varint.h"
+#include "s2/util/gtl/compact_array.h"
+
 bool S2ClippedShape::ContainsEdge(int id) const {
   // Linear search is fast because the number of edges per shape is typically
   // very small (less than 10).
@@ -311,6 +316,12 @@ inline bool S2ShapeIndexCell::DecodeEdges(int num_edges,
         count = delta + 8;
         if (!decoder->get_varint32(&delta)) return false;
       }
+
+      // Guard against overflowing edge memory for bad inputs.
+      if (static_cast<int32>(i + count) > num_edges) {
+        return false;
+      }
+
       edge_id += delta;
       for (; count > 0; --count, ++i, ++edge_id) {
         clipped->set_edge(i, edge_id);
diff --git a/src/s2/s2shape_index.h b/src/s2/s2shape_index.h
index e44de6c7..cbf23ed1 100644
--- a/src/s2/s2shape_index.h
+++ b/src/s2/s2shape_index.h
@@ -18,6 +18,24 @@
 // S2ShapeIndex is an abstract base class for indexing polygonal geometry in
 // memory.  The main documentation is with the class definition below.
 // (Some helper classes are defined first.)
+//
+// S2ShapeIndex has two major subtypes:
+//
+//  - MutableS2ShapeIndex is for building new S2ShapeIndexes.  Indexes may
+//    also be updated dynamically by inserting or deleting shapes.  Once an
+//    index has been built it can be encoded compactly and later decoded as
+//    either a MutableS2ShapeIndex or an EncodedS2ShapeIndex.
+//
+//  - EncodedS2ShapeIndex is an S2ShapeIndex implementation that works
+//    directly with encoded data.  Rather than decoding everything in advance,
+//    geometry is decoded incrementally (down to individual edges) as needed.
+//    It can be initialized from a single block of data in nearly constant
+//    time.  This saves large amounts of memory and is also much faster in the
+//    common situation where geometric data is loaded from somewhere, decoded,
+//    and then only a single operation is performed on it.  (Speedups for
+//    large geometric objects can be over 1000x.)  It supports all
+//    S2ShapeIndex operations including boolean operations, measuring
+//    distances, etc.
 
 #ifndef S2_S2SHAPE_INDEX_H_
 #define S2_S2SHAPE_INDEX_H_
@@ -27,20 +45,24 @@
 #include <cstddef>
 #include <iterator>
 #include <memory>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
+#include "absl/base/macros.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/synchronization/mutex.h"
+
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
-#include "s2/base/mutex.h"
 #include "s2/base/spinlock.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/s2cell_id.h"
+#include "s2/s2cell_iterator.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2shape.h"
-#include "absl/base/macros.h"
-#include "absl/base/thread_annotations.h"
-#include "absl/memory/memory.h"
+#include "s2/util/coding/coder.h"
 #include "s2/util/gtl/compact_array.h"
 
 class R1Interval;
@@ -56,6 +78,8 @@ class S2PaddedCell;
 // original shape.
 class S2ClippedShape {
  public:
+  S2ClippedShape() : num_edges_(0) {}
+
   // The shape id of the clipped shape.
   int shape_id() const;
 
@@ -97,11 +121,14 @@ class S2ClippedShape {
   uint32 contains_center_ : 1;  // shape contains the cell center
   uint32 num_edges_ : 31;
 
+  // The maximum number of edges that we can store inline in the union.
+  static constexpr int kMaxInlineEdges = 2;
+
   // If there are more than two edges, this field holds a pointer.
   // Otherwise it holds an array of edge ids.
   union {
     int32* edges_;  // Owned by the containing S2ShapeIndexCell.
-    std::array<int32, 2> inline_edges_;
+    std::array<int32, kMaxInlineEdges> inline_edges_;
   };
 };
 
@@ -109,7 +136,7 @@ class S2ClippedShape {
 // It consists of a set of clipped shapes.
 class S2ShapeIndexCell {
  public:
-  S2ShapeIndexCell() {}
+  S2ShapeIndexCell() = default;
   ~S2ShapeIndexCell();
 
   // Returns the number of clipped shapes in this cell.
@@ -126,6 +153,14 @@ class S2ShapeIndexCell {
   const S2ClippedShape* find_clipped(const S2Shape* shape) const;
   const S2ClippedShape* find_clipped(int shape_id) const;
 
+  // Returns a read-only span over the clipped shapes in the cell.
+  absl::Span<const S2ClippedShape> clipped_shapes() const {
+    if (!shapes_.empty()) {
+      return {shapes_.begin(), shapes_.size()};
+    }
+    return {};
+  }
+
   // Convenience method that returns the total number of edges in all clipped
   // shapes.
   int num_edges() const;
@@ -174,13 +209,14 @@ class S2ShapeIndexCell {
 // so for example if you want to create an S2ShapeIndex containing a polyline
 // and 10 points, then you will need at least two different S2Shape objects.
 //
-// The most important type of S2ShapeIndex is MutableS2ShapeIndex, which
-// allows you to build an index incrementally by adding or removing shapes.
-// Soon there will also be an EncodedS2ShapeIndex type that makes it possible
-// to keep the index data in encoded form.  Code that only needs read-only
-// ("const") access to an index should use the S2ShapeIndex base class as the
-// parameter type, so that it will work with any S2ShapeIndex subtype.  For
-// example:
+// There are two important types of S2ShapeIndex.  MutableS2ShapeIndex allows
+// you to build an index incrementally by adding or removing shapes, whereas
+// EncodedS2ShapeIndex works very efficiently with existing indexes by keeping
+// the index data in encoded form (see introduction at the top of this file).
+//
+// Code that only needs read-only ("const") access to an index should use the
+// S2ShapeIndex base class as the parameter type, so that it will work with
+// any S2ShapeIndex subtype.  For example:
 //
 //   void DoSomething(const S2ShapeIndex& index) {
 //     ... works with MutableS2ShapeIndex or EncodedS2ShapeIndex ...
@@ -201,7 +237,8 @@ class S2ShapeIndexCell {
 // - S2BooleanOperation: computes boolean operations such as union,
 //                       and boolean predicates such as containment.
 //
-// - S2ShapeIndexRegion: computes approximations for a collection of geometry.
+// - S2ShapeIndexRegion: can be used together with S2RegionCoverer to
+//                       approximate geometry as a set of S2CellIds.
 //
 // - S2ShapeIndexBufferedRegion: computes approximations that have been
 //                               expanded by a given radius.
@@ -213,7 +250,7 @@ class S2ShapeIndexCell {
 //                        const vector<S2Polygon*>& polygons) {
 //     MutableS2ShapeIndex index;
 //     for (auto polygon : polygons) {
-//       index.Add(absl::make_unique<S2Polygon::Shape>(polygon));
+//       index.Add(std::make_unique<S2Polygon::Shape>(polygon));
 //     }
 //     auto query = MakeS2ContainsPointQuery(&index);
 //     for (const auto& point : points) {
@@ -247,16 +284,30 @@ class S2ShapeIndexCell {
 //     }
 //   }
 //
-// Subtypes provided by the S2 library have the same thread-safety properties
-// as std::vector.  That is, const methods may be called concurrently from
-// multiple threads, and non-const methods require exclusive access to the
-// S2ShapeIndex.
+// The S2ShapeIndex subtypes provided by the S2 library are thread-compatible,
+// meaning that const methods are const methods may be called concurrently
+// from multiple threads, while non-const methods require exclusive access to
+// the S2ShapeIndex.
 class S2ShapeIndex {
  protected:
   class IteratorBase;
 
  public:
-  virtual ~S2ShapeIndex() {}
+  // A type function to check if a type is derived from S2ShapeIndex.  This is
+  // useful for writing static checks on template parameters when we want to
+  // inline a particular iterator call, but we need to make sure it implements
+  // the interface that we want.  We don't have access to c++ concepts, so this
+  // is the next best thing:
+  //
+  //   template <typename Index>
+  //   void Process(Index& iter) {
+  //     static_assert(S2ShapeIndex::ImplementedBy<Index>{},
+  //       "We require an S2ShapeIndex.");
+  //   }
+  template <typename T>
+  using ImplementedBy = std::is_convertible<absl::decay_t<T>*, S2ShapeIndex*>;
+
+  virtual ~S2ShapeIndex() = default;
 
   // Returns the number of distinct shape ids in the index.  This is the same
   // as the number of shapes provided that no shapes have ever been removed.
@@ -307,16 +358,6 @@ class S2ShapeIndex {
   // Like all non-const methods, this method is not thread-safe.
   virtual void Minimize() = 0;
 
-  // The possible relationships between a "target" cell and the cells of the
-  // S2ShapeIndex.  If the target is an index cell or is contained by an index
-  // cell, it is "INDEXED".  If the target is subdivided into one or more
-  // index cells, it is "SUBDIVIDED".  Otherwise it is "DISJOINT".
-  enum CellRelation {
-    INDEXED,       // Target is contained by an index cell
-    SUBDIVIDED,    // Target is subdivided into one or more index cells
-    DISJOINT       // Target does not intersect any index cells
-  };
-
   // When passed to an Iterator constructor, specifies whether the iterator
   // should be positioned at the beginning of the index (BEGIN), the end of
   // the index (END), or arbitrarily (UNPOSITIONED).  By default iterators are
@@ -326,7 +367,7 @@ class S2ShapeIndex {
 
   // A random access iterator that provides low-level access to the cells of
   // the index.  Cells are sorted in increasing order of S2CellId.
-  class Iterator {
+  class Iterator final : public S2CellIterator {
    public:
     // Default constructor; must be followed by a call to Init().
     Iterator() : iter_(nullptr) {}
@@ -362,7 +403,7 @@ class S2ShapeIndex {
 
     // Returns the S2CellId of the current index cell.  If done() is true,
     // returns a value larger than any valid S2CellId (S2CellId::Sentinel()).
-    S2CellId id() const { return iter_->id(); }
+    S2CellId id() const override { return iter_->id(); }
 
     // Returns the center point of the cell.
     // REQUIRES: !done()
@@ -373,32 +414,31 @@ class S2ShapeIndex {
     const S2ShapeIndexCell& cell() const { return iter_->cell(); }
 
     // Returns true if the iterator is positioned past the last index cell.
-    bool done() const { return iter_->done(); }
+    bool done() const override { return iter_->done(); }
 
     // Positions the iterator at the first index cell (if any).
-    void Begin() { iter_->Begin(); }
+    void Begin() override { iter_->Begin(); }
 
     // Positions the iterator past the last index cell.
-    void Finish() { iter_->Finish(); }
+    void Finish() override { iter_->Finish(); }
 
     // Positions the iterator at the next index cell.
     // REQUIRES: !done()
-    void Next() { iter_->Next(); }
+    void Next() override { iter_->Next(); }
 
     // If the iterator is already positioned at the beginning, returns false.
     // Otherwise positions the iterator at the previous entry and returns true.
-    bool Prev() { return iter_->Prev(); }
+    bool Prev() override { return iter_->Prev(); }
 
     // Positions the iterator at the first cell with id() >= target, or at the
     // end of the index if no such cell exists.
-    void Seek(S2CellId target) { iter_->Seek(target); }
-
-    // Positions the iterator at the cell containing "target".  If no such cell
-    // exists, returns false and leaves the iterator positioned arbitrarily.
-    // The returned index cell is guaranteed to contain all edges that might
-    // intersect the line segment between "target" and the cell center.
-    bool Locate(const S2Point& target) {
-      return IteratorBase::LocateImpl(target, this);
+    void Seek(S2CellId target) override { iter_->Seek(target); }
+
+    // Positions the iterator at the cell containing target and returns true. If
+    // no such cell exists, return false and leave the iterator in an undefined
+    // (but valid) state.
+    bool Locate(const S2Point& target) override {
+      return LocateImpl(*this, target);
     }
 
     // Let T be the target S2CellId.  If T is contained by some index cell I
@@ -407,8 +447,8 @@ class S2ShapeIndex {
     // cells, it positions the iterator at the first such cell I and returns
     // SUBDIVIDED.  Otherwise it returns DISJOINT and leaves the iterator
     // positioned arbitrarily.
-    CellRelation Locate(S2CellId target) {
-      return IteratorBase::LocateImpl(target, this);
+    S2CellRelation Locate(S2CellId target) override {
+      return LocateImpl(*this, target);
     }
 
    private:
@@ -435,7 +475,7 @@ class S2ShapeIndex {
   // s2shapeutil_coding.h for useful subtypes.
   class ShapeFactory {
    public:
-    virtual ~ShapeFactory() {}
+    virtual ~ShapeFactory() = default;
 
     // Returns the number of S2Shapes in the vector.
     virtual int size() const = 0;
@@ -452,16 +492,16 @@ class S2ShapeIndex {
  protected:
   // Each subtype of S2ShapeIndex should define an Iterator type derived
   // from the following base class.
-  class IteratorBase {
+  class IteratorBase : public S2CellIterator {
    public:
-    virtual ~IteratorBase() {}
+    ~IteratorBase() override = default;
 
     IteratorBase(const IteratorBase&);
     IteratorBase& operator=(const IteratorBase&);
 
     // Returns the S2CellId of the current index cell.  If done() is true,
     // returns a value larger than any valid S2CellId (S2CellId::Sentinel()).
-    S2CellId id() const;
+    S2CellId id() const override;
 
     // Returns the center point of the cell.
     // REQUIRES: !done()
@@ -472,39 +512,7 @@ class S2ShapeIndex {
     const S2ShapeIndexCell& cell() const;
 
     // Returns true if the iterator is positioned past the last index cell.
-    bool done() const;
-
-    // Positions the iterator at the first index cell (if any).
-    virtual void Begin() = 0;
-
-    // Positions the iterator past the last index cell.
-    virtual void Finish() = 0;
-
-    // Positions the iterator at the next index cell.
-    // REQUIRES: !done()
-    virtual void Next() = 0;
-
-    // If the iterator is already positioned at the beginning, returns false.
-    // Otherwise positions the iterator at the previous entry and returns true.
-    virtual bool Prev() = 0;
-
-    // Positions the iterator at the first cell with id() >= target, or at the
-    // end of the index if no such cell exists.
-    virtual void Seek(S2CellId target) = 0;
-
-    // Positions the iterator at the cell containing "target".  If no such cell
-    // exists, returns false and leaves the iterator positioned arbitrarily.
-    // The returned index cell is guaranteed to contain all edges that might
-    // intersect the line segment between "target" and the cell center.
-    virtual bool Locate(const S2Point& target) = 0;
-
-    // Let T be the target S2CellId.  If T is contained by some index cell I
-    // (including equality), this method positions the iterator at I and
-    // returns INDEXED.  Otherwise if T contains one or more (smaller) index
-    // cells, it positions the iterator at the first such cell I and returns
-    // SUBDIVIDED.  Otherwise it returns DISJOINT and leaves the iterator
-    // positioned arbitrarily.
-    virtual CellRelation Locate(S2CellId target) = 0;
+    bool done() const override;
 
    protected:
     IteratorBase() : id_(S2CellId::Sentinel()), cell_(nullptr) {}
@@ -540,17 +548,6 @@ class S2ShapeIndex {
     // REQUIRES: "other" has the same concrete type as "this".
     virtual void Copy(const IteratorBase& other) = 0;
 
-    // The default implementation of Locate(S2Point).  It is instantiated by
-    // each subtype in order to (1) minimize the number of virtual method
-    // calls (since subtypes are typically "final") and (2) ensure that the
-    // correct versions of non-virtual methods such as cell() are called.
-    template <class Iter>
-    static bool LocateImpl(const S2Point& target, Iter* it);
-
-    // The default implementation of Locate(S2CellId) (see comments above).
-    template <class Iter>
-    static CellRelation LocateImpl(S2CellId target, Iter* it);
-
    private:
     friend class Iterator;
 
@@ -605,7 +602,7 @@ inline void S2ClippedShape::Destruct() {
 }
 
 inline bool S2ClippedShape::is_inline() const {
-  return num_edges_ <= inline_edges_.size();
+  return num_edges_ <= kMaxInlineEdges;
 }
 
 // Set "contains_center_" to indicate whether this clipped shape contains the
@@ -687,9 +684,16 @@ inline const S2ShapeIndexCell& S2ShapeIndex::IteratorBase::cell() const {
   // Like other const methods, this method is thread-safe provided that it
   // does not overlap with calls to non-const methods.
   S2_DCHECK(!done());
-  auto cell = raw_cell();
+  const S2ShapeIndexCell* cell = raw_cell();
   if (cell == nullptr) {
     cell = GetCell();
+    if (cell == nullptr) {
+      static const S2ShapeIndexCell* empty_cell = new S2ShapeIndexCell();
+
+      // TODO(b/264878715): Replace with proper error handling.
+      S2_LOG(WARNING) << "Couldn't retrieve cell, possible data corruption?";
+      cell = empty_cell;
+    }
     set_cell(cell);
   }
   return *cell;
@@ -711,8 +715,7 @@ inline void S2ShapeIndex::IteratorBase::set_state(
 }
 
 inline void S2ShapeIndex::IteratorBase::set_finished() {
-  id_ = S2CellId::Sentinel();
-  set_cell(nullptr);
+  set_state(S2CellId::Sentinel(), nullptr);
 }
 
 inline const S2ShapeIndexCell* S2ShapeIndex::IteratorBase::raw_cell()
@@ -725,39 +728,6 @@ inline void S2ShapeIndex::IteratorBase::set_cell(
   cell_.store(cell, std::memory_order_relaxed);
 }
 
-template <class Iter>
-inline bool S2ShapeIndex::IteratorBase::LocateImpl(
-    const S2Point& target_point, Iter* it) {
-  // Let I = cell_map_->lower_bound(T), where T is the leaf cell containing
-  // "target_point".  Then if T is contained by an index cell, then the
-  // containing cell is either I or I'.  We test for containment by comparing
-  // the ranges of leaf cells spanned by T, I, and I'.
-
-  S2CellId target(target_point);
-  it->Seek(target);
-  if (!it->done() && it->id().range_min() <= target) return true;
-  if (it->Prev() && it->id().range_max() >= target) return true;
-  return false;
-}
-
-template <class Iter>
-inline S2ShapeIndex::CellRelation
-S2ShapeIndex::IteratorBase::LocateImpl(S2CellId target, Iter* it) {
-  // Let T be the target, let I = cell_map_->lower_bound(T.range_min()), and
-  // let I' be the predecessor of I.  If T contains any index cells, then T
-  // contains I.  Similarly, if T is contained by an index cell, then the
-  // containing cell is either I or I'.  We test for containment by comparing
-  // the ranges of leaf cells spanned by T, I, and I'.
-
-  it->Seek(target.range_min());
-  if (!it->done()) {
-    if (it->id() >= target && it->id().range_min() <= target) return INDEXED;
-    if (it->id() <= target.range_max()) return SUBDIVIDED;
-  }
-  if (it->Prev() && it->id().range_max() >= target) return INDEXED;
-  return DISJOINT;
-}
-
 inline S2ShapeIndex::Iterator::Iterator(const Iterator& other)
     : iter_(other.iter_->Clone()) {
 }
diff --git a/src/s2/s2shape_index_buffered_region.cc b/src/s2/s2shape_index_buffered_region.cc
index 9fc29ba1..324b4e41 100644
--- a/src/s2/s2shape_index_buffered_region.cc
+++ b/src/s2/s2shape_index_buffered_region.cc
@@ -19,14 +19,23 @@
 
 #include <algorithm>
 #include <vector>
+
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
+#include "s2/s2cap.h"
+#include "s2/s2cell.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2closest_edge_query.h"
+#include "s2/s2latlng_rect.h"
 #include "s2/s2metrics.h"
+#include "s2/s2point.h"
+#include "s2/s2shape_index.h"
 #include "s2/s2shape_index_region.h"
 
 using std::min;
 using std::vector;
 
-S2ShapeIndexBufferedRegion::S2ShapeIndexBufferedRegion() {
-}
+S2ShapeIndexBufferedRegion::S2ShapeIndexBufferedRegion() = default;
 
 void S2ShapeIndexBufferedRegion::Init(const S2ShapeIndex* index,
                                       S1ChordAngle radius) {
@@ -77,6 +86,9 @@ void S2ShapeIndexBufferedRegion::GetCellUnionBound(vector<S2CellId> *cellids)
 }
 
 bool S2ShapeIndexBufferedRegion::Contains(const S2Cell& cell) const {
+  // Return true if the buffered region is guaranteed to cover whole globe.
+  if (radius_successor_ > S1ChordAngle::Straight()) return true;
+
   // To implement this method perfectly would require computing the directed
   // Hausdorff distance, which is expensive (and not currently implemented).
   // However the following heuristic is almost as good in practice and much
diff --git a/src/s2/s2shape_index_buffered_region.h b/src/s2/s2shape_index_buffered_region.h
index 8bdca163..e17f55b1 100644
--- a/src/s2/s2shape_index_buffered_region.h
+++ b/src/s2/s2shape_index_buffered_region.h
@@ -19,11 +19,16 @@
 #define S2_S2SHAPE_INDEX_BUFFERED_REGION_H_
 
 #include <vector>
+
+#include "s2/s1angle.h"
+#include "s2/s1chord_angle.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
 #include "s2/s2closest_edge_query.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
 #include "s2/s2shape_index.h"
 
diff --git a/src/s2/s2shape_index_measures.cc b/src/s2/s2shape_index_measures.cc
index ef857bf9..b599b6d7 100644
--- a/src/s2/s2shape_index_measures.cc
+++ b/src/s2/s2shape_index_measures.cc
@@ -17,6 +17,12 @@
 
 #include "s2/s2shape_index_measures.h"
 
+#include <algorithm>
+
+#include "s2/s1angle.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
 #include "s2/s2shape_measures.h"
 
 namespace S2 {
diff --git a/src/s2/s2shape_index_region.h b/src/s2/s2shape_index_region.h
index 9f0644b5..5472c84a 100644
--- a/src/s2/s2shape_index_region.h
+++ b/src/s2/s2shape_index_region.h
@@ -17,21 +17,35 @@
 #ifndef S2_S2SHAPE_INDEX_REGION_H_
 #define S2_S2SHAPE_INDEX_REGION_H_
 
+#include <functional>
+#include <utility>
 #include <vector>
+
+#include "absl/base/optimization.h"
+#include "absl/container/flat_hash_map.h"
+#include "s2/r2.h"
+#include "s2/r2rect.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
 #include "s2/s2contains_point_query.h"
 #include "s2/s2edge_clipping.h"
 #include "s2/s2edge_crosser.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2point.h"
 #include "s2/s2region.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 
 // This class wraps an S2ShapeIndex object with the additional methods needed
 // to implement the S2Region API, in order to allow S2RegionCoverer to compute
 // S2CellId coverings of arbitrary collections of geometry.
 //
+// It also contains a method VisitIntersectingShapes() that may be used to
+// efficiently visit all shapes that intersect an arbitrary S2CellId (not
+// limited to cells in the index).
+//
 // These methods could conceivably be made part of S2ShapeIndex itself, but
 // there are several advantages to having a separate class:
 //
@@ -97,6 +111,27 @@ class S2ShapeIndexRegion final : public S2Region {
   // error is less than 10 * DBL_EPSILON radians (or about 15 nanometers).
   bool MayIntersect(const S2Cell& target) const override;
 
+  // A function that is called with shapes that intersect a target S2Cell.
+  // "contains_target" means that the shape fully contains the target S2Cell.
+  // The function should return true to continue visiting intersecting shapes,
+  // or false to terminate the algorithm early.
+  //
+  // Note that the API allows non-const access to the visited shapes.
+  //
+  // ENSURES: shape != nullptr
+  using ShapeVisitor = std::function<bool (S2Shape* shape,
+                                           bool contains_target)>;
+
+  // Visits all shapes that intersect "target", terminating early if the
+  // "visitor" return false (in which case VisitIntersectingShapes returns
+  // false as well).  Each shape is visited at most once.
+  //
+  // This method can also be used to visit all shapes that fully contain
+  // "target" (VisitContainingShapes) by simply having the ShapeVisitor
+  // function immediately return true when "contains_target" is false.
+  bool VisitIntersectingShapes(const S2Cell& target,
+                               const ShapeVisitor& visitor);
+
   // Returns true if the given point is contained by any two-dimensional shape
   // (i.e., polygon).  Boundaries are treated as being semi-open (i.e., the
   // same rules as S2Polygon).  Zero and one-dimensional shapes are ignored by
@@ -112,9 +147,8 @@ class S2ShapeIndexRegion final : public S2Region {
   // Returns true if the indexed shape "clipped" in the indexed cell "id"
   // contains the point "p".
   //
-  // REQUIRES: id.contains(S2CellId(p))
-  bool Contains(S2CellId id, const S2ClippedShape& clipped,
-                const S2Point& p) const;
+  // REQUIRES: iter_.id() contains "p".
+  bool Contains(const S2ClippedShape& clipped, const S2Point& p) const;
 
   // Returns true if any edge of the indexed shape "clipped" intersects the
   // cell "target".  It may also return true if an edge is very close to
@@ -246,14 +280,20 @@ inline void S2ShapeIndexRegion<IndexType>::CoverRange(
   }
 }
 
+template <class IndexType>
+inline bool S2ShapeIndexRegion<IndexType>::Contains(
+    const S2ClippedShape& clipped, const S2Point& p) const {
+  return contains_query_.ShapeContains(iter_.id(), clipped, p);
+}
+
 template <class IndexType>
 bool S2ShapeIndexRegion<IndexType>::Contains(const S2Cell& target) const {
-  S2ShapeIndex::CellRelation relation = iter_.Locate(target.id());
+  S2CellRelation relation = iter_.Locate(target.id());
 
   // If the relation is DISJOINT, then "target" is not contained.  Similarly if
   // the relation is SUBDIVIDED then "target" is not contained, since index
   // cells are subdivided only if they (nearly) intersect too many edges.
-  if (relation != S2ShapeIndex::INDEXED) return false;
+  if (relation != S2CellRelation::INDEXED) return false;
 
   // Otherwise, the iterator points to an index cell containing "target".
   // If any shape contains the target cell, we return true.
@@ -269,7 +309,7 @@ bool S2ShapeIndexRegion<IndexType>::Contains(const S2Cell& target) const {
       // It is faster to call AnyEdgeIntersects() before Contains().
       if (index().shape(clipped.shape_id())->dimension() == 2 &&
           !AnyEdgeIntersects(clipped, target) &&
-          contains_query_.ShapeContains(iter_, clipped, target.GetCenter())) {
+          Contains(clipped, target.GetCenter())) {
         return true;
       }
     }
@@ -279,14 +319,14 @@ bool S2ShapeIndexRegion<IndexType>::Contains(const S2Cell& target) const {
 
 template <class IndexType>
 bool S2ShapeIndexRegion<IndexType>::MayIntersect(const S2Cell& target) const {
-  S2ShapeIndex::CellRelation relation = iter_.Locate(target.id());
+  S2CellRelation relation = iter_.Locate(target.id());
 
   // If "target" does not overlap any index cell, there is no intersection.
-  if (relation == S2ShapeIndex::DISJOINT) return false;
+  if (relation == S2CellRelation::DISJOINT) return false;
 
   // If "target" is subdivided into one or more index cells, then there is an
   // intersection to within the S2ShapeIndex error bound.
-  if (relation == S2ShapeIndex::SUBDIVIDED) return true;
+  if (relation == S2CellRelation::SUBDIVIDED) return true;
 
   // Otherwise, the iterator points to an index cell containing "target".
   //
@@ -301,11 +341,66 @@ bool S2ShapeIndexRegion<IndexType>::MayIntersect(const S2Cell& target) const {
   for (int s = 0; s < cell.num_clipped(); ++s) {
     const S2ClippedShape& clipped = cell.clipped(s);
     if (AnyEdgeIntersects(clipped, target)) return true;
-    if (contains_query_.ShapeContains(iter_, clipped, target.GetCenter())) {
+    if (Contains(clipped, target.GetCenter())) return true;
+  }
+  return false;
+}
+
+template <class IndexType>
+bool S2ShapeIndexRegion<IndexType>::VisitIntersectingShapes(
+    const S2Cell& target, const ShapeVisitor& visitor) {
+  switch (iter_.Locate(target.id())) {
+    case S2CellRelation::DISJOINT:
+      return true;
+
+    case S2CellRelation::SUBDIVIDED: {
+      // A shape contains the target cell iff it appears in at least one cell,
+      // it contains the center of all cells, and it has no edges in any cell.
+      // It is easier to keep track of whether a shape does *not* contain the
+      // target cell because boolean values default to false.
+      absl::flat_hash_map<int, bool> shape_not_contains;
+      for (const S2CellId max = target.id().range_max();
+           !iter_.done() && iter_.id() <= max; iter_.Next()) {
+        const S2ShapeIndexCell& cell = iter_.cell();
+        for (int s = 0; s < cell.num_clipped(); ++s) {
+          const S2ClippedShape& clipped = cell.clipped(s);
+          shape_not_contains[clipped.shape_id()] |=
+              clipped.num_edges() > 0 || !clipped.contains_center();
+        }
+      }
+      // TODO(user,b/210097200): Use structured bindings when we require
+      // C++17 in opensource.
+      for (const auto& p : shape_not_contains) {
+        const int shape_id = p.first;
+        const bool not_contains = p.second;
+        if (!visitor(index().shape(shape_id), !not_contains)) return false;
+      }
+      return true;
+    }
+
+    case S2CellRelation::INDEXED: {
+      const S2ShapeIndexCell& cell = iter_.cell();
+      for (int s = 0; s < cell.num_clipped(); ++s) {
+        // The shape contains the target cell iff the shape contains the cell
+        // center and none of its edges intersects the (padded) cell interior.
+        const S2ClippedShape& clipped = cell.clipped(s);
+        bool contains = false;
+        if (iter_.id() == target.id()) {
+          contains = clipped.num_edges() == 0 && clipped.contains_center();
+        } else {
+          if (!AnyEdgeIntersects(clipped, target)) {
+            if (!Contains(clipped, target.GetCenter())) {
+              continue;  // Disjoint.
+            }
+            contains = true;
+          }
+        }
+        if (!visitor(index().shape(clipped.shape_id()), contains)) return false;
+      }
       return true;
     }
   }
-  return false;
+  ABSL_UNREACHABLE();
 }
 
 template <class IndexType>
@@ -313,9 +408,7 @@ bool S2ShapeIndexRegion<IndexType>::Contains(const S2Point& p) const {
   if (iter_.Locate(p)) {
     const S2ShapeIndexCell& cell = iter_.cell();
     for (int s = 0; s < cell.num_clipped(); ++s) {
-      if (contains_query_.ShapeContains(iter_, cell.clipped(s), p)) {
-        return true;
-      }
+      if (Contains(cell.clipped(s), p)) return true;
     }
   }
   return false;
diff --git a/src/s2/s2shape_measures.cc b/src/s2/s2shape_measures.cc
index 65bdf718..bbdedec1 100644
--- a/src/s2/s2shape_measures.cc
+++ b/src/s2/s2shape_measures.cc
@@ -19,8 +19,14 @@
 
 #include <cmath>
 #include <vector>
+
+#include "s2/base/log_severity.h"
+#include "s2/s1angle.h"
 #include "s2/s2loop_measures.h"
+#include "s2/s2point.h"
+#include "s2/s2point_span.h"
 #include "s2/s2polyline_measures.h"
+#include "s2/s2shape.h"
 
 using std::fabs;
 using std::vector;
@@ -66,16 +72,28 @@ double GetArea(const S2Shape& shape) {
   //
   // So instead we use S2::GetSignedArea() to ensure that all loops have areas
   // in the range [-2*Pi, 2*Pi].
+  //
+  // TODO(ericv): Rarely, this function returns the area of the complementary
+  // region (4*Pi - area).  This can only happen when the true area is very
+  // close to zero or 4*Pi and the polygon has multiple loops.  To make this
+  // function completely robust requires checking whether the signed area sum is
+  // ambiguous, and if so, determining the loop nesting structure.  This allows
+  // the sum to be evaluated in a way that is guaranteed to have the correct
+  // sign.
   double area = 0;
+  double max_error = 0;
   vector<S2Point> vertices;
   int num_chains = shape.num_chains();
   for (int chain_id = 0; chain_id < num_chains; ++chain_id) {
     GetChainVertices(shape, chain_id, &vertices);
     area += S2::GetSignedArea(S2PointLoopSpan(vertices));
+    if (google::DEBUG_MODE) {
+      max_error += S2::GetCurvatureMaxError(S2PointLoopSpan(vertices));
+    }
   }
   // Note that S2::GetSignedArea() guarantees that the full loop (containing
   // all points on the sphere) has a very small negative area.
-  S2_DCHECK_LE(fabs(area), 4 * M_PI);
+  S2_DCHECK_LE(fabs(area), 4 * M_PI + max_error);
   if (area < 0.0) area += 4 * M_PI;
   return area;
 }
@@ -119,7 +137,7 @@ S2Point GetCentroid(const S2Shape& shape) {
 }
 
 void GetChainVertices(const S2Shape& shape, int chain_id,
-                      std::vector<S2Point>* vertices) {
+                      vector<S2Point>* vertices) {
   S2Shape::Chain chain = shape.chain(chain_id);
   int num_vertices = chain.length + (shape.dimension() == 1);
   vertices->clear();
diff --git a/src/s2/s2shape_nesting_query.cc b/src/s2/s2shape_nesting_query.cc
new file mode 100644
index 00000000..c65806f7
--- /dev/null
+++ b/src/s2/s2shape_nesting_query.cc
@@ -0,0 +1,264 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+
+#include "s2/s2shape_nesting_query.h"
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "absl/container/fixed_array.h"
+#include "absl/strings/str_format.h"
+#include "s2/util/bitmap/bitmap.h"
+#include "s2/s2crossing_edge_query.h"
+#include "s2/s2point.h"
+#include "s2/s2predicates.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+#include "s2/s2shapeutil_shape_edge.h"
+#include "s2/s2shapeutil_shape_edge_id.h"
+
+using std::vector;
+using util::bitmap::Bitmap64;
+
+// Takes N equally spaced points from the given chain of the shape and finds
+// the one closest to the target point, returning its index.
+static inline int ClosestOfNPoints(const S2Point& target, const S2Shape& shape,
+                                   int chain, int num_points) {
+  const int chain_len = shape.chain(chain).length;
+
+  // If chain_len is < num_points, we still want to use whatever points there
+  // are in the chain, so max 1 the minimum step size and we'll modulo to get
+  // back into bounds for the chain.
+  const int step = std::max(1, chain_len / num_points);
+
+  double min_dist2 = std::numeric_limits<double>::infinity();
+  int closest_idx = 0;
+  for (int i = 0; i < num_points; ++i) {
+    int idx = (i * step) % chain_len;
+    S2Point point = shape.chain_edge(chain, idx).v0;
+    double dist2 = (target - point).Norm2();
+    if (dist2 < min_dist2) {
+      min_dist2 = dist2;
+      closest_idx = idx;
+    }
+  }
+  return closest_idx;
+}
+
+// Returns the next edge of a particular chain, handling index wrap around.
+static inline S2Shape::Edge NextChainEdge(const S2Shape* shape, int chain,
+                                          int edge) {
+  return shape->chain_edge(chain, (edge + 1) % shape->chain(chain).length);
+}
+
+// Returns the previous edge of a particular chain, handling index wrap around.
+static inline S2Shape::Edge PrevChainEdge(const S2Shape* shape, int chain,
+                                          int edge) {
+  int index = edge - 1;
+  if (index < 0) {
+    index = shape->chain(chain).length - 1;
+  }
+  return shape->chain_edge(chain, index);
+}
+
+S2ShapeNestingQuery::S2ShapeNestingQuery(const S2ShapeIndex* index,
+                                         const Options& options) {
+  Init(index, options);
+}
+
+void S2ShapeNestingQuery::Init(const S2ShapeIndex* index,
+                               const Options& options) {
+  index_ = index;
+  options_ = options;
+}
+
+// Finds three consecutive vertices that aren't degenerate and stores them in
+// the given array.  Returns false if there aren't three consecutive
+// non-degenerate points.
+vector<S2ShapeNestingQuery::ChainRelation>
+S2ShapeNestingQuery::ComputeShapeNesting(int shape_id) {
+  const S2Shape* shape = index_->shape(shape_id);
+  if (shape == nullptr || shape->num_chains() == 0) {
+    return {};
+  }
+  S2_DCHECK_EQ(shape->dimension(), 2);
+
+  const int num_chains = shape->num_chains();
+
+  // A single chain is always a shell, with no holes.
+  if (num_chains == 1) {
+    return {ChainRelation::MakeShell()};
+  }
+
+  // Sets to track possible parents and possible children for each chain.
+  absl::FixedArray<Bitmap64> parents(num_chains, Bitmap64(num_chains, false));
+  absl::FixedArray<Bitmap64> children(num_chains, Bitmap64(num_chains, false));
+
+  // We'll compute edge crossings along a line segment from the datum shell to a
+  // random point on the other chains.  This choice is arbitrary, so we'll use
+  // the first vertex of edge 1 so we can easily get the next and previous
+  // points to check for orientation.
+  int32 datum_shell = options().datum_strategy()(shape);
+  const S2Point vertices[3] = {
+      shape->chain_edge(datum_shell, 0).v0,
+      shape->chain_edge(datum_shell, 1).v0,
+      shape->chain_edge(datum_shell, 2).v0,
+  };
+  const S2Point start_point = vertices[1];
+
+  S2CrossingEdgeQuery crossing_query(index_);
+  vector<s2shapeutil::ShapeEdge> edges;
+  for (int chain = 0; chain < num_chains; ++chain) {
+    if (chain == datum_shell) {
+      continue;
+    }
+    S2_VLOG(1) << "Processing chain " << chain;
+
+    // Find a close point on the target chain out of 4 equally spaced ones.
+    int end_idx = ClosestOfNPoints(start_point, *shape, chain, 4);
+    S2Point end_point = shape->chain_edge(chain, end_idx).v0;
+
+    // We need to know whether we're inside the datum shell at the end, so we
+    // need to properly seed its starting state.  If we start by entering the
+    // datum shell's interior _and_ end by arriving from the target chain's
+    // interior, we set it to true.
+    //
+    // As we cross edges from the datum to the target chain the total number of
+    // datum shell _or_ target chain edges we'll cross is either even or odd.
+    // Each of these edges toggles our "insideness" relative to the datum shell,
+    if (s2pred::OrderedCCW(vertices[2], end_point, vertices[0], start_point)) {
+      S2_VLOG(1) << "  Edge starts into interior of datum chain";
+      parents[chain].Set(datum_shell, true);
+      children[datum_shell].Set(chain, true);
+    }
+
+    // Arriving from the interior of the target chain?
+    S2Point next = NextChainEdge(shape, chain, end_idx).v0;
+    S2Point prev = PrevChainEdge(shape, chain, end_idx).v0;
+    if (s2pred::OrderedCCW(next, start_point, prev, end_point)) {
+      S2_VLOG(1) << "  Edge ends from interior of target chain";
+      parents[chain].Set(chain, true);
+    }
+    S2_VLOG(2) << "    Initial set: " << parents[chain].ToString(8);
+
+    // Query all the edges crossed by the line from the datum shell to a point
+    // on this chain.  Only look at edges that belong to the requested shape.
+    // Using INTERIOR here will avoid returning the two edges on the datum and
+    // target shells that are touched by the endpoints of our line segment.
+    crossing_query.GetCrossingEdges(start_point, end_point, *shape,
+                                    s2shapeutil::CrossingType::INTERIOR,
+                                    &edges);
+
+    // Walk through the intersected chains and toggle corresponding bits.
+    for (const auto& edge : edges) {
+      int32 other_chain = shape->chain_position(edge.id().edge_id).chain_id;
+
+      parents[chain].Toggle(other_chain);
+      if (other_chain != chain) {
+        children[other_chain].Toggle(chain);
+      }
+      S2_VLOG(1) << "  Crosses chain " << other_chain;
+      S2_VLOG(2) << "    Parent set: " << parents[chain].ToString(8);
+    }
+
+    // Now set the final state.  Remove the target chain from its own parent set
+    // to make following logic simpler.  The datum shell is a potential parent
+    // if both parent and target chain bits are set.
+    parents[chain].Set(datum_shell, parents[chain].Get(datum_shell) &&
+                                        parents[chain].Get(chain));
+    parents[chain].Set(chain, false);
+  }
+
+  if (S2_VLOG_IS_ON(2)) {
+    S2_LOG(INFO) << "Current parent set";
+    for (int chain = 0; chain < num_chains; ++chain) {
+      S2_LOG(INFO) << "  " << absl::StrFormat("%2d", chain) << ": "
+                << parents[chain].ToString(8);
+    }
+  }
+
+  // Look at each chain with a single parent and remove the parent from any of
+  // its child chains.  This enforces the constraint that if A is a parent of B
+  // and B is a parent of C, then A shouldn't directly be a parent of C.
+  for (int current_chain = 0; current_chain < num_chains; ++current_chain) {
+    if (parents[current_chain].GetOnesCount() != 1) {
+      continue;
+    }
+
+    Bitmap64::size_type parent_chain;
+    parents[current_chain].FindFirstSetBit(&parent_chain);
+
+    Bitmap64::size_type next_chain = current_chain;
+    Bitmap64::size_type child = 0;
+    for (; children[current_chain].FindNextSetBit(&child); child++) {
+      if (parents[child].Get(parent_chain)) {
+        parents[child].Set(parent_chain, false);
+
+        // If this chain has a single parent now, we have to process it as well,
+        // so if we've already passed it in the outer loop, we have to back up.
+        if (parents[child].GetOnesCount() == 1 && child < next_chain) {
+          next_chain = child;
+        }
+      }
+    }
+
+    S2_VLOG(1) << "Chain " << current_chain << " has one parent";
+    if (S2_VLOG_IS_ON(2)) {
+      S2_LOG(INFO) << "  Parent set now:";
+      for (int chain = 0; chain < num_chains; ++chain) {
+        S2_LOG(INFO) << "  " << absl::StrFormat("%2d", chain) << ": "
+                  << parents[chain].ToString(8);
+      }
+    }
+
+    // Backup current chain so next loop increment sets it properly
+    if (next_chain != static_cast<Bitmap64::size_type>(current_chain)) {
+      current_chain = next_chain - 1;
+    }
+  }
+
+  // Each chain now points to its immediate parent.  Scan through and set child
+  // to point to parent and vice-versa.
+  vector<ChainRelation> relations(num_chains);
+  for (int chain = 0; chain < num_chains; ++chain) {
+    S2_DCHECK_LE(parents[chain].GetOnesCount(), 1);
+
+    Bitmap64::size_type parent;
+    if (parents[chain].FindFirstSetBit(&parent)) {
+      relations[chain].SetParent(parent);
+      relations[parent].AddHole(chain);
+    }
+  }
+
+  // Detach any chains that are even depth from their parent and make them
+  // shells.  This is effectively implementing the even/odd rule.
+  for (int chain = 0; chain < num_chains; ++chain) {
+    int depth = -1, current = chain;
+    do {
+      ++depth;
+      current = relations[current].parent_id();
+    } while (current >= 0 && depth < num_chains);
+    S2_DCHECK_LT(depth, num_chains);
+
+    if (depth && (depth % 2 == 0)) {
+      relations[chain].ClearParent();
+    }
+  }
+
+  return relations;
+}
diff --git a/src/s2/s2shape_nesting_query.h b/src/s2/s2shape_nesting_query.h
new file mode 100644
index 00000000..e41fd4e2
--- /dev/null
+++ b/src/s2/s2shape_nesting_query.h
@@ -0,0 +1,192 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+
+#ifndef S2_S2SHAPE_NESTING_QUERY_H_
+#define S2_S2SHAPE_NESTING_QUERY_H_
+
+#include <climits>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "absl/container/inlined_vector.h"
+#include "absl/types/span.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+
+// On a sphere, polygon hierarchy is ambiguous.  If you imagine two chains
+// around a sphere at +/- 10 degrees latitude, then we're completely justified
+// to consider either one a shell, with the other being a hole in that shell.
+//
+// Given this ambiguity, we need to specify a strategy to choose a chain to be a
+// shell by definition (which we call the datum shell). Other chains are then
+// classified relative to that chain.  Often, the first chain in a shape will be
+// a shell by construction, so use that as our default strategy.
+//
+// To specify the datum chain, we just need a function that takes an S2Shape and
+// produces a chain id to use.  We use a function pointer to avoid the copying
+// issues associated with std::function.  Non-capturing lambdas may still be
+// used with a function pointer, but this may change to an `AnyInvocable` in the
+// future.
+using S2DatumStrategy = int (*)(const S2Shape*);
+
+// `S2ShapeNestingQuery` defines a query to determine the relationships between
+// chains in a shape.  Chains are either shells or holes.  Shells have no parent
+// and may have zero or more holes associated with them.  Holes belong to a
+// single parent shell and have zero holes of their own.  Polygon interiors are
+// always on the left of their boundaries, so shells and holes face each other
+// in the sense of containing the interior between them.
+//
+// It's always possible to reverse how we interpret shells and holes due to the
+// geometry being on a sphere; so to construct a consistent relationship we need
+// to have a strategy to identify a chain to use as a 'datum' shell to which all
+// the other chains are classified relatively.  The strategy used for this is
+// configurable through the `S2ShapeNestingQueryOptions` and defaults to
+// FIRST_CHAIN, selecting the first chain in the shape as the datum.
+//
+// The `ShapeNesting`() function determines the relationship between shells and
+// returns a vector of `ChainRelation` instances.  The `ChainRelation`s are in
+// 1:1 correspondence with the chain_id in the shape, e.g. chain 1's relations
+// are always given by `ShapeNesting()[1]`.
+//
+// In the common case of a shell having one or no holes, we store the hole
+// relationships efficiently so that no memory allocation is required when
+// creating a `ChainRelation`.  Complex geometry may exceed those limits and
+// allocate however.
+//
+// Restrictions:
+//   `S2ShapeNestingQuery` is only meaningful for S2ShapeIndex instances
+//   containing 2D geometry.
+//
+//   The query currently doesn't handle any sort of degeneracy in the underlying
+//   geometry.
+//
+class S2ShapeNestingQuery {
+ public:
+  // Class to model options for the query, passed to `Init()`.
+  class Options {
+   public:
+    // Returns the first chain id in a shape (always zero), used as the
+    // default datum strategy.
+    static int FirstChain(const S2Shape*) { return 0; }
+
+    Options() : datum_strategy_(FirstChain) {}
+
+    S2DatumStrategy datum_strategy() const { return datum_strategy_; }
+    Options& set_datum_strategy(S2DatumStrategy strategy) {
+      datum_strategy_ = strategy;
+      return *this;
+    }
+
+   private:
+    S2DatumStrategy datum_strategy_;
+  };
+
+  // `ChainRelation` models the parent/child relationship between chains in a
+  // shape and chain classification as a shell or a hole.
+  class ChainRelation {
+   public:
+    // Builds a `ChainRelation` that's a shell with given holes.
+    static ChainRelation MakeShell(absl::Span<const int32> holes = {}) {
+      ChainRelation relation;
+      for (int32 chain : holes) {
+        relation.AddHole(chain);
+      }
+      return relation;
+    }
+
+    // Builds a `ChainRelation` that's a hole with given parent.
+    static ChainRelation MakeHole(int32 parent) {
+      return ChainRelation(parent);
+    }
+
+    explicit ChainRelation(int32 parent = -1) : parent_(parent) {}
+
+    // Returns the id of the parent chain of the associated chain.  Chains that
+    // are shells don't have a parent and have a parent id of -1.
+    int32 parent_id() const { return parent_; }
+
+    // Returns true if the associated chain is a shell.  Otherwise the chain
+    // is a hole in its parent chain.
+    bool is_shell() const { return parent_id() < 0; }
+
+    // Returns true if the associated chain is a hole.  This is true iff the
+    // chain is not a shell.
+    bool is_hole() const { return !is_shell(); }
+
+    // Return number of holes in the associated chain.
+    int num_holes() const { return holes_.size(); }
+
+    // Returns a read only view over the hole ids for the associated chain.
+    absl::Span<const int32> holes() const {
+      return absl::Span<const int32>(holes_);
+    }
+
+   private:
+    // So that the query can add Holes to the chain before returning it.
+    friend class S2ShapeNestingQuery;
+
+    // `absl::InlinedVector` doesn't round up its inline capacity even if it
+    // wouldn't make the object any bigger.  It stores a pointer and a capacity
+    // with the inlined data in a union:
+    //
+    //   [ pointer | capacity ]
+    //   [        T[N]        ]
+    //
+    // We pay for the size of the pointer and capacity regardless, so it makes
+    // sense for us to scale the number of reserved elements to take advantage
+    // of this.  On 64-bit systems we can store 4 int32s and on 32-bit we can
+    // store 2.
+    absl::InlinedVector<int32, 2 * sizeof(void*) / sizeof(int32)> holes_;
+    int32 parent_;
+
+    // Adds the given id as a hole of this chain.  The `ParentId` of the hole
+    // should therefore be the id of this chain as an invariant.
+    void AddHole(int32 id) { holes_.push_back(id); }
+
+    // Sets the parent chain id.
+    void SetParent(int32 id) { parent_ = id; }
+
+    // Clears the parent of the associated chain (thus marking it as a shell).
+    void ClearParent() { parent_ = -1; }
+  };  // class ChainRelation
+
+  // Default constructor, requires Init() to be called.
+  S2ShapeNestingQuery() = default;
+
+  explicit S2ShapeNestingQuery(const S2ShapeIndex* index,
+                               const Options& options = Options());
+
+  const S2ShapeIndex& index() const { return *index_; }
+  const Options& options() const { return options_; }
+  Options& mutable_options() { return options_; }
+
+  // Equivalent to the two-argument constructor above.
+  void Init(const S2ShapeIndex* index, const Options& options = Options());
+
+  // Evaluates the relationships between chains in a shape.  This determines
+  // which chains are shells, and which are holes associated with a parent
+  // shell.
+  //
+  // The returned `ChainRelation` instances are in 1:1 correspondence with the
+  // chains in the shape, i.e. chain id 3 responds to `result[3]`.
+  std::vector<ChainRelation> ComputeShapeNesting(int shape_id);
+
+ private:
+  const S2ShapeIndex* index_;
+  Options options_;
+};
+
+#endif  // S2_S2SHAPE_NESTING_QUERY_H_
diff --git a/src/s2/s2shapeutil_build_polygon_boundaries.cc b/src/s2/s2shapeutil_build_polygon_boundaries.cc
index e7a9892d..c947b3a1 100644
--- a/src/s2/s2shapeutil_build_polygon_boundaries.cc
+++ b/src/s2/s2shapeutil_build_polygon_boundaries.cc
@@ -17,11 +17,15 @@
 
 #include "s2/s2shapeutil_build_polygon_boundaries.h"
 
+#include <memory>
+#include <utility>
+#include <vector>
+
 #include "absl/container/btree_map.h"
-#include "absl/memory/memory.h"
 #include "s2/mutable_s2shape_index.h"
 #include "s2/s2contains_point_query.h"
-#include "s2/s2shape_index.h"
+#include "s2/s2pointutil.h"
+#include "s2/s2shape.h"
 #include "s2/s2shapeutil_contains_brute_force.h"
 
 using absl::WrapUnique;
@@ -59,7 +63,7 @@ void BuildPolygonBoundaries(const vector<vector<S2Shape*>>& components,
   // A map from shape.id() to the corresponding component number.
   vector<int> component_ids;
   vector<S2Shape*> outer_loops;
-  for (int i = 0; i < components.size(); ++i) {
+  for (size_t i = 0; i < components.size(); ++i) {
     const auto& component = components[i];
     for (S2Shape* loop : component) {
       if (component.size() > 1 &&
@@ -77,7 +81,7 @@ void BuildPolygonBoundaries(const vector<vector<S2Shape*>>& components,
   // Find the loops containing each component.
   vector<vector<S2Shape*>> ancestors(components.size());
   auto contains_query = MakeS2ContainsPointQuery(&index);
-  for (int i = 0; i < outer_loops.size(); ++i) {
+  for (size_t i = 0; i < outer_loops.size(); ++i) {
     auto loop = outer_loops[i];
     S2_DCHECK_GT(loop->num_edges(), 0);
     ancestors[i] = contains_query.GetContainingShapes(loop->edge(0).v0);
@@ -85,9 +89,9 @@ void BuildPolygonBoundaries(const vector<vector<S2Shape*>>& components,
   // Assign each outer loop to the component whose depth is one less.
   // Components at depth 0 become a single face.
   absl::btree_map<S2Shape*, vector<S2Shape*>> children;
-  for (int i = 0; i < outer_loops.size(); ++i) {
+  for (size_t i = 0; i < outer_loops.size(); ++i) {
     S2Shape* ancestor = nullptr;
-    int depth = ancestors[i].size();
+    size_t depth = ancestors[i].size();
     if (depth > 0) {
       for (auto candidate : ancestors[i]) {
         if (ancestors[component_ids[candidate->id()]].size() == depth - 1) {
diff --git a/src/s2/s2shapeutil_build_polygon_boundaries.h b/src/s2/s2shapeutil_build_polygon_boundaries.h
index a2d330e4..91dec3f3 100644
--- a/src/s2/s2shapeutil_build_polygon_boundaries.h
+++ b/src/s2/s2shapeutil_build_polygon_boundaries.h
@@ -20,6 +20,7 @@
 
 #include <vector>
 
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 
 namespace s2shapeutil {
diff --git a/src/s2/s2shapeutil_coding.cc b/src/s2/s2shapeutil_coding.cc
index a675a469..d854114a 100644
--- a/src/s2/s2shapeutil_coding.cc
+++ b/src/s2/s2shapeutil_coding.cc
@@ -18,18 +18,24 @@
 #include "s2/s2shapeutil_coding.h"
 
 #include <memory>
+#include <utility>
+#include <vector>
 
-#include "absl/memory/memory.h"
-#include "s2/encoded_s2point_vector.h"
+#include "s2/base/integral_types.h"
+#include "s2/util/coding/coder.h"
 #include "s2/encoded_string_vector.h"
+#include "s2/s2coder.h"
 #include "s2/s2lax_polygon_shape.h"
 #include "s2/s2lax_polyline_shape.h"
 #include "s2/s2point_vector_shape.h"
 #include "s2/s2polygon.h"
 #include "s2/s2polyline.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+#include "s2/s2wrapped_shape.h"
 
-using absl::make_unique;
 using std::make_shared;
+using std::make_unique;
 using std::unique_ptr;
 using std::vector;
 
@@ -38,62 +44,27 @@ using CodingHint = s2coding::CodingHint;
 namespace s2shapeutil {
 
 bool FastEncodeShape(const S2Shape& shape, Encoder* encoder) {
-  switch (shape.type_tag()) {
-    case S2Polygon::Shape::kTypeTag: {
-      down_cast<const S2Polygon::Shape*>(&shape)->EncodeUncompressed(encoder);
-      return true;
-    }
-    case S2Polyline::Shape::kTypeTag: {
-      down_cast<const S2Polyline::Shape*>(&shape)->Encode(encoder);
-      return true;
-    }
-    case S2PointVectorShape::kTypeTag: {
-      down_cast<const S2PointVectorShape*>(&shape)->Encode(
-          encoder, CodingHint::FAST);
-      return true;
-    }
-    case S2LaxPolylineShape::kTypeTag: {
-      down_cast<const S2LaxPolylineShape*>(&shape)->Encode(
-          encoder, CodingHint::FAST);
-      return true;
-    }
-    case S2LaxPolygonShape::kTypeTag: {
-      down_cast<const S2LaxPolygonShape*>(&shape)->Encode(
-          encoder, CodingHint::FAST);
-      return true;
-    }
-    default: {
-      S2_LOG(DFATAL) << "Unsupported S2Shape type: " << shape.type_tag();
-      return false;
-    }
+  uint32 tag = shape.type_tag();
+  if (tag == S2Shape::kNoTypeTag) {
+    S2_LOG(ERROR) << "Unsupported S2Shape type: " << tag;
+    return false;
   }
+  // Update the following constant when adding new S2Shape encodings.
+  S2_DCHECK_LT(shape.type_tag(), S2Shape::kNextAvailableTypeTag);
+  shape.Encode(encoder, CodingHint::FAST);
+  return true;
 }
 
 bool CompactEncodeShape(const S2Shape& shape, Encoder* encoder) {
-  switch (shape.type_tag()) {
-    case S2Polygon::Shape::kTypeTag: {
-      down_cast<const S2Polygon::Shape*>(&shape)->Encode(encoder);
-      return true;
-    }
-    case S2PointVectorShape::kTypeTag: {
-      down_cast<const S2PointVectorShape*>(&shape)->Encode(
-          encoder, CodingHint::COMPACT);
-      return true;
-    }
-    case S2LaxPolylineShape::kTypeTag: {
-      down_cast<const S2LaxPolylineShape*>(&shape)->Encode(
-          encoder, CodingHint::COMPACT);
-      return true;
-    }
-    case S2LaxPolygonShape::kTypeTag: {
-      down_cast<const S2LaxPolygonShape*>(&shape)->Encode(
-          encoder, CodingHint::COMPACT);
-      return true;
-    }
-    default: {
-      return FastEncodeShape(shape, encoder);
-    }
+  uint32 tag = shape.type_tag();
+  if (tag == S2Shape::kNoTypeTag) {
+    S2_LOG(ERROR) << "Unsupported S2Shape type: " << tag;
+    return false;
   }
+  // Update the following constant when adding new S2Shape encodings.
+  S2_DCHECK_LT(shape.type_tag(), S2Shape::kNextAvailableTypeTag);
+  shape.Encode(encoder, CodingHint::COMPACT);
+  return true;
 }
 
 // A ShapeDecoder that fully decodes an S2Shape of the given type.  After this
@@ -103,30 +74,31 @@ unique_ptr<S2Shape> FullDecodeShape(S2Shape::TypeTag tag, Decoder* decoder) {
     case S2Polygon::Shape::kTypeTag: {
       auto shape = make_unique<S2Polygon::OwningShape>();
       if (!shape->Init(decoder)) return nullptr;
-      return std::move(shape);
+      // Some platforms (e.g. NaCl) require the following conversion.
+      return std::move(shape);  // Converts to S2Shape.
     }
     case S2Polyline::Shape::kTypeTag: {
       auto shape = make_unique<S2Polyline::OwningShape>();
       if (!shape->Init(decoder)) return nullptr;
-      return std::move(shape);
+      return std::move(shape);  // Converts to S2Shape.
     }
     case S2PointVectorShape::kTypeTag: {
       auto shape = make_unique<S2PointVectorShape>();
       if (!shape->Init(decoder)) return nullptr;
-      return std::move(shape);
+      return std::move(shape);  // Converts to S2Shape.
     }
     case S2LaxPolylineShape::kTypeTag: {
       auto shape = make_unique<S2LaxPolylineShape>();
       if (!shape->Init(decoder)) return nullptr;
-      return std::move(shape);
+      return std::move(shape);  // Converts to S2Shape.
     }
     case S2LaxPolygonShape::kTypeTag: {
       auto shape = make_unique<S2LaxPolygonShape>();
       if (!shape->Init(decoder)) return nullptr;
-      return std::move(shape);
+      return std::move(shape);  // Converts to S2Shape.
     }
     default: {
-      S2_LOG(DFATAL) << "Unsupported S2Shape type: " << tag;
+      S2_LOG(ERROR) << "Unsupported S2Shape type: " << tag;
       return nullptr;
     }
   }
@@ -137,17 +109,18 @@ unique_ptr<S2Shape> LazyDecodeShape(S2Shape::TypeTag tag, Decoder* decoder) {
     case S2PointVectorShape::kTypeTag: {
       auto shape = make_unique<EncodedS2PointVectorShape>();
       if (!shape->Init(decoder)) return nullptr;
-      return std::move(shape);
+      // Some platforms (e.g. NaCl) require the following conversion.
+      return std::move(shape);  // Converts to S2Shape.
     }
     case S2LaxPolylineShape::kTypeTag: {
       auto shape = make_unique<EncodedS2LaxPolylineShape>();
       if (!shape->Init(decoder)) return nullptr;
-      return std::move(shape);
+      return std::move(shape);  // Converts to S2Shape.
     }
     case S2LaxPolygonShape::kTypeTag: {
       auto shape = make_unique<EncodedS2LaxPolygonShape>();
       if (!shape->Init(decoder)) return nullptr;
-      return std::move(shape);
+      return std::move(shape);  // Converts to S2Shape.
     }
     default: {
       return FullDecodeShape(tag, decoder);
@@ -163,14 +136,9 @@ bool EncodeTaggedShapes(const S2ShapeIndex& index,
     Encoder* sub_encoder = shape_vector.AddViaEncoder();
     if (shape == nullptr) continue;  // Encode as zero bytes.
 
-    uint32 tag = shape->type_tag();
-    if (tag == S2Shape::kNoTypeTag) {
-      S2_LOG(DFATAL) << "Unsupported S2Shape type: " << tag;
-      return false;
-    }
     sub_encoder->Ensure(Encoder::kVarintMax32);
-    sub_encoder->put_varint32(tag);
-    shape_encoder(*shape, sub_encoder);
+    sub_encoder->put_varint32(shape->type_tag());
+    if (!shape_encoder(*shape, sub_encoder)) return false;
   }
   shape_vector.Encode(encoder);
   return true;
@@ -214,40 +182,16 @@ unique_ptr<S2Shape> VectorShapeFactory::operator[](int shape_id) const {
   return std::move((*shared_shapes_)[shape_id]);
 }
 
-VectorShapeFactory SingletonShapeFactory(std::unique_ptr<S2Shape> shape) {
+VectorShapeFactory SingletonShapeFactory(unique_ptr<S2Shape> shape) {
   vector<unique_ptr<S2Shape>> shapes;
   shapes.push_back(std::move(shape));
   return VectorShapeFactory(std::move(shapes));
 }
 
-// An S2Shape that simply wraps some other shape.
-class WrappedShape : public S2Shape {
- public:
-  explicit WrappedShape(S2Shape* shape) : shape_(*shape) {}
-  // S2Shape interface:
-  int num_edges() const final { return shape_.num_edges(); }
-  Edge edge(int e) const final { return shape_.edge(e); }
-  int dimension() const final { return shape_.dimension(); }
-  ReferencePoint GetReferencePoint() const final {
-    return shape_.GetReferencePoint();
-  }
-  int num_chains() const final { return shape_.num_chains(); }
-  Chain chain(int i) const final { return shape_.chain(i); }
-  Edge chain_edge(int i, int j) const final {
-    return shape_.chain_edge(i, j);
-  }
-  ChainPosition chain_position(int e) const final {
-    return shape_.chain_position(e);
-  }
-
- private:
-  const S2Shape& shape_;
-};
-
 unique_ptr<S2Shape> WrappedShapeFactory::operator[](int shape_id) const {
   S2Shape* shape = index_.shape(shape_id);
   if (shape == nullptr) return nullptr;
-  return make_unique<WrappedShape>(shape);
+  return make_unique<S2WrappedShape>(shape);
 }
 
 }  // namespace s2shapeutil
diff --git a/src/s2/s2shapeutil_coding.h b/src/s2/s2shapeutil_coding.h
index 8a7d5a8a..ccaababf 100644
--- a/src/s2/s2shapeutil_coding.h
+++ b/src/s2/s2shapeutil_coding.h
@@ -23,23 +23,25 @@
 //  - Allow control over decoding tradeoffs (e.g., whether to decode data
 //    immediately or lazily).
 //
-//  - Don't force all S2Shape types to have the same Encode() method.  Some
-//    implementations may want extra parameters.
-//
-//  - Support custom encodings of shape vectors; e.g., if all shapes are
-//    of a known type, then there is no need to tag them individually.
-//
 //  - Support client-defined S2Shape types.
 //
 //  - Support client-defined encodings of standard S2Shape types.
+//
+//  - Support custom encodings of shape vectors; e.g., if all shapes are of a
+//    known type, then there is no need to tag them individually.
 
 #ifndef S2_S2SHAPEUTIL_CODING_H_
 #define S2_S2SHAPEUTIL_CODING_H_
 
 #include <functional>
 #include <memory>
+#include <utility>
+#include <vector>
+
+#include "s2/base/casts.h"
 #include "s2/util/coding/coder.h"
 #include "s2/encoded_string_vector.h"
+#include "s2/s2coder.h"
 #include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 
@@ -50,12 +52,14 @@ namespace s2shapeutil {
 // (e.g., shape->type_tag()); the caller is responsible for encoding this
 // separately if necessary.
 //
-// Note that you can add your own encodings and/or shape types by wrapping one
-// of the standard functions and adding exceptions:
+// The purpose of ShapeEncoder is to allow implementing custom encodings
+// without subclassing the affected S2Shape types.  For example, you can
+// define an encoder that wraps one of the standard functions and adds
+// exceptions:
 //
 // void MyShapeEncoder(const S2Shape& shape, Encoder* encoder) {
-//   if (shape.type_tag() == MyShape::kTypeTag) {
-//     down_cast<const MyShape*>(&shape)->Encode(encoder);
+//   if (shape.type_tag() == S2LaxPolygonShape::kTypeTag) {
+//     MyEncodeLaxPolygon(shape, encoder);
 //     return true;
 //   } else {
 //     return CompactEncodeShape(shape, encoder);
@@ -155,7 +159,7 @@ class TaggedShapeFactory : public S2ShapeIndex::ShapeFactory {
   std::unique_ptr<S2Shape> operator[](int shape_id) const override;
 
   std::unique_ptr<ShapeFactory> Clone() const override {
-    return absl::make_unique<TaggedShapeFactory>(*this);
+    return std::make_unique<TaggedShapeFactory>(*this);
   }
 
  private:
@@ -185,7 +189,7 @@ class VectorShapeFactory : public S2ShapeIndex::ShapeFactory {
   std::unique_ptr<S2Shape> operator[](int shape_id) const override;
 
   std::unique_ptr<ShapeFactory> Clone() const override {
-    return absl::make_unique<VectorShapeFactory>(*this);
+    return std::make_unique<VectorShapeFactory>(*this);
   }
 
  private:
@@ -208,7 +212,7 @@ class WrappedShapeFactory : public S2ShapeIndex::ShapeFactory {
   std::unique_ptr<S2Shape> operator[](int shape_id) const override;
 
   std::unique_ptr<ShapeFactory> Clone() const override {
-    return absl::make_unique<WrappedShapeFactory>(*this);
+    return std::make_unique<WrappedShapeFactory>(*this);
   }
 
  private:
@@ -221,11 +225,13 @@ class WrappedShapeFactory : public S2ShapeIndex::ShapeFactory {
 // This is useful for encoding experimental or locally defined types, or when
 // the S2Shape type in a given index is known in advance.
 //
-// REQUIRES: The Shape class must have an Encode(Encoder*) method.
+// REQUIRES: "Shape" must have an Encode(Encoder*, s2coding::CodingHint) method.
 // REQUIRES: "encoder" uses the default constructor, so that its buffer
 //           can be enlarged as necessary by calling Ensure(int).
 template <class Shape>
-void EncodeHomogeneousShapes(const S2ShapeIndex& index, Encoder* encoder);
+void EncodeHomogeneousShapes(
+    const S2ShapeIndex& index, Encoder* encoder,
+    s2coding::CodingHint hint = s2coding::CodingHint::COMPACT);
 
 // A ShapeFactory that decodes shapes of a given fixed type (e.g.,
 // EncodedS2LaxPolylineShape).  Example usage:
@@ -244,7 +250,7 @@ class HomogeneousShapeFactory : public S2ShapeIndex::ShapeFactory {
   std::unique_ptr<S2Shape> operator[](int shape_id) const override;
 
   std::unique_ptr<ShapeFactory> Clone() const override {
-    return absl::make_unique<HomogeneousShapeFactory>(*this);
+    return std::make_unique<HomogeneousShapeFactory>(*this);
   }
 
  private:
@@ -255,11 +261,12 @@ class HomogeneousShapeFactory : public S2ShapeIndex::ShapeFactory {
 
 
 template <class Shape>
-void EncodeHomogeneousShapes(const S2ShapeIndex& index, Encoder* encoder) {
+void EncodeHomogeneousShapes(const S2ShapeIndex& index, Encoder* encoder,
+                             s2coding::CodingHint hint) {
   s2coding::StringVectorEncoder shape_vector;
   for (S2Shape* shape : index) {
     S2_DCHECK(shape != nullptr);
-    down_cast<Shape*>(shape)->Encode(shape_vector.AddViaEncoder());
+    down_cast<Shape*>(shape)->Encode(shape_vector.AddViaEncoder(), hint);
   }
   shape_vector.Encode(encoder);
 }
@@ -273,8 +280,9 @@ template <class Shape>
 std::unique_ptr<S2Shape> HomogeneousShapeFactory<Shape>::operator[](
     int shape_id) const {
   Decoder decoder = encoded_shapes_.GetDecoder(shape_id);
-  auto shape = absl::make_unique<Shape>();
+  auto shape = std::make_unique<Shape>();
   if (!shape->Init(&decoder)) return nullptr;
+  // Some platforms (e.g. NaCl) require the following conversion.
   return std::move(shape);  // Converts from Shape to S2Shape.
 }
 
diff --git a/src/s2/s2shapeutil_contains_brute_force.cc b/src/s2/s2shapeutil_contains_brute_force.cc
index 28fcd341..2f21aefb 100644
--- a/src/s2/s2shapeutil_contains_brute_force.cc
+++ b/src/s2/s2shapeutil_contains_brute_force.cc
@@ -17,8 +17,9 @@
 
 #include "s2/s2shapeutil_contains_brute_force.h"
 
-#include <utility>
 #include "s2/s2edge_crosser.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
 
 namespace s2shapeutil {
 
diff --git a/src/s2/s2shapeutil_contains_brute_force.h b/src/s2/s2shapeutil_contains_brute_force.h
index 9808639f..3c264118 100644
--- a/src/s2/s2shapeutil_contains_brute_force.h
+++ b/src/s2/s2shapeutil_contains_brute_force.h
@@ -19,6 +19,7 @@
 #define S2_S2SHAPEUTIL_CONTAINS_BRUTE_FORCE_H_
 
 #include "s2/s2point.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 
 namespace s2shapeutil {
diff --git a/src/s2/s2shapeutil_conversion.cc b/src/s2/s2shapeutil_conversion.cc
new file mode 100644
index 00000000..53452372
--- /dev/null
+++ b/src/s2/s2shapeutil_conversion.cc
@@ -0,0 +1,72 @@
+// Copyright 2021 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#include "s2/s2shapeutil_conversion.h"
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "s2/s2loop.h"
+#include "s2/s2point.h"
+#include "s2/s2polygon.h"
+#include "s2/s2polyline.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_measures.h"
+
+namespace s2shapeutil {
+
+using std::make_unique;
+using std::unique_ptr;
+using std::vector;
+
+vector<S2Point> ShapeToS2Points(const S2Shape& multipoint) {
+  S2_DCHECK_EQ(multipoint.dimension(), 0);
+  vector<S2Point> points;
+  points.reserve(multipoint.num_edges());
+  for (int i = 0; i < multipoint.num_edges(); ++i) {
+    points.push_back(multipoint.edge(i).v0);
+  }
+  return points;
+}
+
+unique_ptr<S2Polyline> ShapeToS2Polyline(const S2Shape& line) {
+  S2_DCHECK_EQ(line.dimension(), 1);
+  S2_DCHECK_EQ(line.num_chains(), 1);
+  vector<S2Point> vertices;
+  S2::GetChainVertices(line, 0, &vertices);
+  return make_unique<S2Polyline>(std::move(vertices));
+}
+
+unique_ptr<S2Polygon> ShapeToS2Polygon(const S2Shape& poly) {
+  if (poly.is_full()) {
+    return make_unique<S2Polygon>(make_unique<S2Loop>(S2Loop::kFull()));
+  }
+  S2_DCHECK_EQ(poly.dimension(), 2);
+  vector<unique_ptr<S2Loop>> loops;
+  vector<S2Point> vertices;
+  for (int i = 0; i < poly.num_chains(); ++i) {
+    S2::GetChainVertices(poly, i, &vertices);
+    loops.push_back(make_unique<S2Loop>(vertices));
+  }
+  auto output_poly = make_unique<S2Polygon>();
+  output_poly->InitOriented(std::move(loops));
+
+  return output_poly;
+}
+
+}  // namespace s2shapeutil
diff --git a/src/s2/s2shapeutil_conversion.h b/src/s2/s2shapeutil_conversion.h
new file mode 100644
index 00000000..737a61a3
--- /dev/null
+++ b/src/s2/s2shapeutil_conversion.h
@@ -0,0 +1,51 @@
+// Copyright 2021 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+//
+// Helper functions for converting S2Shapes to typed shapes: S2Point,
+// S2Polyline and S2Polygon.
+
+#ifndef S2_S2SHAPEUTIL_CONVERSION_H_
+#define S2_S2SHAPEUTIL_CONVERSION_H_
+
+#include <memory>
+#include <vector>
+
+#include "s2/s2point.h"
+#include "s2/s2polygon.h"
+#include "s2/s2polyline.h"
+#include "s2/s2shape.h"
+
+namespace s2shapeutil {
+
+// Converts a 0-dimensional S2Shape into a list of S2Points.
+// This method does allow an empty shape (i.e. a shape with no vertices).
+std::vector<S2Point> ShapeToS2Points(const S2Shape& multipoint);
+
+// Converts a 1-dimensional S2Shape into an S2Polyline. Converts the first
+// chain in the shape to a vector of S2Point vertices and uses that to construct
+// the S2Polyline. Note that the input 1-dimensional S2Shape must contain at
+// most 1 chain, and that this method does not accept an empty shape (i.e. a
+// shape with no vertices).
+std::unique_ptr<S2Polyline> ShapeToS2Polyline(const S2Shape& line);
+
+// Converts a 2-dimensional S2Shape into an S2Polygon. Each chain is converted
+// to an S2Loop and the vector of loops is used to construct the S2Polygon.
+std::unique_ptr<S2Polygon> ShapeToS2Polygon(const S2Shape& poly);
+
+}  // namespace s2shapeutil
+
+#endif  // S2_S2SHAPEUTIL_CONVERSION_H_
diff --git a/src/s2/s2shapeutil_count_edges.h b/src/s2/s2shapeutil_count_edges.h
index 8d8456f7..d44fae34 100644
--- a/src/s2/s2shapeutil_count_edges.h
+++ b/src/s2/s2shapeutil_count_edges.h
@@ -18,24 +18,26 @@
 #ifndef S2_S2SHAPEUTIL_COUNT_EDGES_H_
 #define S2_S2SHAPEUTIL_COUNT_EDGES_H_
 
+#include <limits>
+
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 
 namespace s2shapeutil {
 
-// Returns the total number of edges in all indexed shapes.  This method takes
-// time linear in the number of shapes.
+// Returns the total number of edges and points in all indexed shapes in the
+// given index.  This method takes time linear in the number of shapes.
 template <class S2ShapeIndexType>
 int CountEdges(const S2ShapeIndexType& index);
 
-// Like CountEdges(), but stops once "max_edges" edges have been found (in
-// which case the current running total is returned).
+// Like CountEdges(), but stops once "max_edges" edges and / or points have been
+// found, in which case the current running total is returned.
 template <class S2ShapeIndexType>
 int CountEdgesUpTo(const S2ShapeIndexType& index, int max_edges);
 
 
 //////////////////   Implementation details follow   ////////////////////
 
-
 template <class S2ShapeIndexType>
 inline int CountEdges(const S2ShapeIndexType& index) {
   return CountEdgesUpTo(index, std::numeric_limits<int>::max());
diff --git a/src/s2/s2shapeutil_edge_iterator.cc b/src/s2/s2shapeutil_edge_iterator.cc
index 2d2859d9..b0a3a275 100644
--- a/src/s2/s2shapeutil_edge_iterator.cc
+++ b/src/s2/s2shapeutil_edge_iterator.cc
@@ -15,7 +15,13 @@
 
 #include "s2/s2shapeutil_edge_iterator.h"
 
+#include <string>
+
 #include "absl/strings/str_cat.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+
+using std::string;
 
 namespace s2shapeutil {
 
@@ -38,7 +44,7 @@ void EdgeIterator::Next() {
   }
 }
 
-std::string EdgeIterator::DebugString() const {
+string EdgeIterator::DebugString() const {
   return absl::StrCat("(shape=", shape_id_, ", edge=", edge_id_, ")");
 }
 
diff --git a/src/s2/s2shapeutil_edge_iterator.h b/src/s2/s2shapeutil_edge_iterator.h
index a65c4b76..43035c5c 100644
--- a/src/s2/s2shapeutil_edge_iterator.h
+++ b/src/s2/s2shapeutil_edge_iterator.h
@@ -16,6 +16,10 @@
 #ifndef S2_S2SHAPEUTIL_EDGE_ITERATOR_H_
 #define S2_S2SHAPEUTIL_EDGE_ITERATOR_H_
 
+#include <string>
+
+#include "s2/base/integral_types.h"
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 #include "s2/s2shapeutil_shape_edge_id.h"
 
diff --git a/src/s2/s2shapeutil_get_reference_point.cc b/src/s2/s2shapeutil_get_reference_point.cc
index 0a03e1a6..b43dd3e0 100644
--- a/src/s2/s2shapeutil_get_reference_point.cc
+++ b/src/s2/s2shapeutil_get_reference_point.cc
@@ -18,9 +18,11 @@
 #include "s2/s2shapeutil_get_reference_point.h"
 
 #include <algorithm>
+#include <vector>
 
-#include "s2/base/logging.h"
 #include "s2/s2contains_vertex_query.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
 
 using std::vector;
 using ReferencePoint = S2Shape::ReferencePoint;
@@ -36,7 +38,7 @@ static bool GetReferencePointAtVertex(
     const S2Shape& shape, const S2Point& vtest, ReferencePoint* result) {
   // Let P be an unbalanced vertex.  Vertex P is defined to be inside the
   // region if the region contains a particular direction vector starting from
-  // P, namely the direction S2::Ortho(P).  This can be calculated using
+  // P, namely the direction S2::RefDir(P).  This can be calculated using
   // S2ContainsVertexQuery.
   S2ContainsVertexQuery contains_query(vtest);
   int n = shape.num_edges();
diff --git a/src/s2/s2shapeutil_get_reference_point.h b/src/s2/s2shapeutil_get_reference_point.h
index 726b7f6f..b0ef9396 100644
--- a/src/s2/s2shapeutil_get_reference_point.h
+++ b/src/s2/s2shapeutil_get_reference_point.h
@@ -18,6 +18,7 @@
 #ifndef S2_S2SHAPEUTIL_GET_REFERENCE_POINT_H_
 #define S2_S2SHAPEUTIL_GET_REFERENCE_POINT_H_
 
+#include "s2/s2shape.h"
 #include "s2/s2shape_index.h"
 
 namespace s2shapeutil {
diff --git a/src/s2/s2shapeutil_range_iterator.cc b/src/s2/s2shapeutil_range_iterator.cc
deleted file mode 100644
index cc5f4a75..00000000
--- a/src/s2/s2shapeutil_range_iterator.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-// Author: ericv@google.com (Eric Veach)
-
-#include "s2/s2shapeutil_range_iterator.h"
-
-namespace s2shapeutil {
-
-RangeIterator::RangeIterator(const S2ShapeIndex& index)
-    : it_(&index, S2ShapeIndex::BEGIN) {
-  Refresh();
-}
-
-void RangeIterator::Next() {
-  it_.Next();
-  Refresh();
-}
-
-void RangeIterator::SeekTo(const RangeIterator& target) {
-  it_.Seek(target.range_min());
-  // If the current cell does not overlap "target", it is possible that the
-  // previous cell is the one we are looking for.  This can only happen when
-  // the previous cell contains "target" but has a smaller S2CellId.
-  if (it_.done() || it_.id().range_min() > target.range_max()) {
-    if (it_.Prev() && it_.id().range_max() < target.id()) it_.Next();
-  }
-  Refresh();
-}
-
-void RangeIterator::SeekBeyond(const RangeIterator& target) {
-  it_.Seek(target.range_max().next());
-  if (!it_.done() && it_.id().range_min() <= target.range_max()) {
-    it_.Next();
-  }
-  Refresh();
-}
-
-// This method is inline, but is only called by non-inline methods defined in
-// this file.  Putting the definition here enforces this requirement.
-inline void RangeIterator::Refresh() {
-  range_min_ = id().range_min();
-  range_max_ = id().range_max();
-}
-
-}  // namespace s2shapeutil
diff --git a/src/s2/s2shapeutil_range_iterator.h b/src/s2/s2shapeutil_range_iterator.h
deleted file mode 100644
index e4703c6e..00000000
--- a/src/s2/s2shapeutil_range_iterator.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-// Author: ericv@google.com (Eric Veach)
-
-#ifndef S2_S2SHAPEUTIL_RANGE_ITERATOR_H_
-#define S2_S2SHAPEUTIL_RANGE_ITERATOR_H_
-
-#include "s2/s2cell_id.h"
-#include "s2/s2shape_index.h"
-
-class S2Loop;
-class S2Error;
-
-namespace s2shapeutil {
-
-// RangeIterator is a wrapper over S2ShapeIndex::Iterator with extra methods
-// that are useful for merging the contents of two or more S2ShapeIndexes.
-class RangeIterator {
- public:
-  // Construct a new RangeIterator positioned at the first cell of the index.
-  explicit RangeIterator(const S2ShapeIndex& index);
-
-  // The current S2CellId and cell contents.
-  S2CellId id() const { return it_.id(); }
-  const S2ShapeIndexCell& cell() const { return it_.cell(); }
-
-  // The min and max leaf cell ids covered by the current cell.  If done() is
-  // true, these methods return a value larger than any valid cell id.
-  S2CellId range_min() const { return range_min_; }
-  S2CellId range_max() const { return range_max_; }
-
-  void Next();
-  bool done() { return it_.done(); }
-
-  // Position the iterator at the first cell that overlaps or follows
-  // "target", i.e. such that range_max() >= target.range_min().
-  void SeekTo(const RangeIterator& target);
-
-  // Position the iterator at the first cell that follows "target", i.e. the
-  // first cell such that range_min() > target.range_max().
-  void SeekBeyond(const RangeIterator& target);
-
- private:
-  // Updates internal state after the iterator has been repositioned.
-  void Refresh();
-  S2ShapeIndex::Iterator it_;
-  S2CellId range_min_, range_max_;
-};
-
-}  // namespace s2shapeutil
-
-#endif  // S2_S2SHAPEUTIL_RANGE_ITERATOR_H_
diff --git a/src/s2/s2shapeutil_shape_edge.h b/src/s2/s2shapeutil_shape_edge.h
index 3562c040..ad83647e 100644
--- a/src/s2/s2shapeutil_shape_edge.h
+++ b/src/s2/s2shapeutil_shape_edge.h
@@ -18,6 +18,7 @@
 #ifndef S2_S2SHAPEUTIL_SHAPE_EDGE_H_
 #define S2_S2SHAPEUTIL_SHAPE_EDGE_H_
 
+#include "s2/base/integral_types.h"
 #include "s2/s2point.h"
 #include "s2/s2shape.h"
 #include "s2/s2shapeutil_shape_edge_id.h"
@@ -28,7 +29,7 @@ namespace s2shapeutil {
 // edge.  It should be passed by reference.
 struct ShapeEdge {
  public:
-  ShapeEdge() {}
+  ShapeEdge() = default;
   ShapeEdge(const S2Shape& shape, int32 edge_id);
   ShapeEdge(int32 shape_id, int32 edge_id, const S2Shape::Edge& edge);
   ShapeEdgeId id() const { return id_; }
@@ -43,15 +44,12 @@ struct ShapeEdge {
 
 //////////////////   Implementation details follow   ////////////////////
 
-
 inline ShapeEdge::ShapeEdge(const S2Shape& shape, int32 edge_id)
-    : ShapeEdge(shape.id(), edge_id, shape.edge(edge_id)) {
-}
+    : ShapeEdge(shape.id(), edge_id, shape.edge(edge_id)) {}
 
 inline ShapeEdge::ShapeEdge(int32 shape_id, int32 edge_id,
                             const S2Shape::Edge& edge)
-    : id_(shape_id, edge_id), edge_(edge) {
-}
+    : id_(shape_id, edge_id), edge_(edge) {}
 
 }  // namespace s2shapeutil
 
diff --git a/src/s2/s2shapeutil_shape_edge_id.h b/src/s2/s2shapeutil_shape_edge_id.h
index b17affb1..6fbecd67 100644
--- a/src/s2/s2shapeutil_shape_edge_id.h
+++ b/src/s2/s2shapeutil_shape_edge_id.h
@@ -18,7 +18,11 @@
 #ifndef S2_S2SHAPEUTIL_SHAPE_EDGE_ID_H_
 #define S2_S2SHAPEUTIL_SHAPE_EDGE_ID_H_
 
+#include <cstddef>
+
 #include <iostream>
+#include <ostream>
+
 #include "s2/base/integral_types.h"
 
 namespace s2shapeutil {
@@ -39,13 +43,14 @@ struct ShapeEdgeId {
   bool operator>(ShapeEdgeId other) const;
   bool operator<=(ShapeEdgeId other) const;
   bool operator>=(ShapeEdgeId other) const;
+
+  template <typename H>
+  friend H AbslHashValue(H h, ShapeEdgeId id) {
+    return H::combine(std::move(h), id.shape_id, id.edge_id);
+  }
 };
 std::ostream& operator<<(std::ostream& os, ShapeEdgeId id);
 
-// Hasher for ShapeEdgeId.
-// Example use: std::unordered_set<ShapeEdgeId, ShapeEdgeIdHash>.
-struct ShapeEdgeIdHash;
-
 
 //////////////////   Implementation details follow   ////////////////////
 
@@ -84,14 +89,6 @@ inline std::ostream& operator<<(std::ostream& os, ShapeEdgeId id) {
   return os << id.shape_id << ":" << id.edge_id;
 }
 
-struct ShapeEdgeIdHash {
-  size_t operator()(ShapeEdgeId id) const {
-    // The following preserves all bits even when edge_id < 0.
-    return std::hash<uint64>()((static_cast<uint64>(id.shape_id) << 32) |
-                               static_cast<uint32>(id.edge_id));
-  }
-};
-
 }  // namespace s2shapeutil
 
 #endif  // S2_S2SHAPEUTIL_SHAPE_EDGE_ID_H_
diff --git a/src/s2/s2shapeutil_testing.cc b/src/s2/s2shapeutil_testing.cc
new file mode 100644
index 00000000..d3bc81e0
--- /dev/null
+++ b/src/s2/s2shapeutil_testing.cc
@@ -0,0 +1,109 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#include "s2/s2shapeutil_testing.h"
+
+#include <string>
+
+#include <gtest/gtest.h>
+#include "s2/s2cell_id.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+
+namespace s2testing {
+
+void ExpectEqual(const S2Shape& a, const S2Shape& b) {
+  ASSERT_EQ(a.num_edges(), b.num_edges());
+  for (int i = 0; i < a.num_edges(); ++i) {
+    EXPECT_EQ(a.edge(i), b.edge(i));
+    EXPECT_EQ(a.chain_position(i), b.chain_position(i));
+  }
+  EXPECT_EQ(a.dimension(), b.dimension());
+  EXPECT_EQ(a.GetReferencePoint(), b.GetReferencePoint());
+  ASSERT_EQ(a.num_chains(), b.num_chains());
+  for (int i = 0; i < a.num_chains(); ++i) {
+    ASSERT_EQ(a.chain(i), b.chain(i));
+    int chain_length = a.chain(i).length;
+    for (int j = 0; j < chain_length; ++j) {
+      EXPECT_EQ(a.chain_edge(i, j), b.chain_edge(i, j));
+    }
+  }
+}
+
+// Verifies that all methods of the two S2ShapeIndexes return identical
+// results (including all the S2Shapes in both indexes).
+void ExpectEqual(const S2ShapeIndex& a, const S2ShapeIndex& b) {
+  // Check that both indexes have identical shapes.
+  ASSERT_EQ(a.num_shape_ids(), b.num_shape_ids());
+  for (int shape_id = 0; shape_id < a.num_shape_ids(); ++shape_id) {
+    S2Shape* a_shape = a.shape(shape_id);
+    S2Shape* b_shape = b.shape(shape_id);
+    if (a_shape == nullptr || b_shape == nullptr) {
+      EXPECT_EQ(a_shape, b_shape);
+    } else {
+      EXPECT_EQ(a_shape->id(), b_shape->id());
+      s2testing::ExpectEqual(*a_shape, *b_shape);
+    }
+  }
+
+  // Check that both indexes have identical cell contents.
+  S2ShapeIndex::Iterator a_it(&a, S2ShapeIndex::BEGIN);
+  S2ShapeIndex::Iterator b_it(&b, S2ShapeIndex::BEGIN);
+  for (; !a_it.done(); a_it.Next(), b_it.Next()) {
+    ASSERT_FALSE(b_it.done());
+    ASSERT_EQ(a_it.id(), b_it.id());
+    const S2ShapeIndexCell& a_cell = a_it.cell();
+    const S2ShapeIndexCell& b_cell = b_it.cell();
+    ASSERT_EQ(a_cell.num_clipped(), b_cell.num_clipped());
+    for (int i = 0; i < a_cell.num_clipped(); ++i) {
+      const S2ClippedShape& a_clipped = a_cell.clipped(i);
+      const S2ClippedShape& b_clipped = b_cell.clipped(i);
+      EXPECT_EQ(a_clipped.shape_id(), b_clipped.shape_id());
+      EXPECT_EQ(a_clipped.contains_center(), b_clipped.contains_center());
+      ASSERT_EQ(a_clipped.num_edges(), b_clipped.num_edges());
+      for (int j = 0; j < a_clipped.num_edges(); ++j) {
+        EXPECT_EQ(a_clipped.edge(j), b_clipped.edge(j));
+      }
+    }
+  }
+  EXPECT_TRUE(b_it.done());
+
+  // Spot-check the other iterator methods.  (We know that both indexes have
+  // the same contents, so any differences are due to implementation bugs.)
+  a_it.Begin();
+  b_it.Begin();
+  EXPECT_EQ(a_it.id(), b_it.id());
+  if (!a_it.done()) {
+    a_it.Next();
+    b_it.Next();
+    EXPECT_EQ(a_it.id(), b_it.id());
+    EXPECT_EQ(a_it.done(), b_it.done());
+    EXPECT_TRUE(a_it.Prev());
+    EXPECT_TRUE(b_it.Prev());
+    EXPECT_EQ(a_it.id(), b_it.id());
+  }
+  EXPECT_FALSE(a_it.Prev());
+  EXPECT_FALSE(b_it.Prev());
+  a_it.Finish();
+  b_it.Finish();
+  EXPECT_EQ(a_it.id(), b_it.id());
+  a_it.Seek(a_it.id().next());
+  b_it.Seek(b_it.id().next());
+  EXPECT_EQ(a_it.id(), b_it.id());
+}
+
+}  // namespace s2testing
diff --git a/src/s2/s2shapeutil_visit_crossing_edge_pairs.cc b/src/s2/s2shapeutil_visit_crossing_edge_pairs.cc
index ebf1a5d5..820cf1a2 100644
--- a/src/s2/s2shapeutil_visit_crossing_edge_pairs.cc
+++ b/src/s2/s2shapeutil_visit_crossing_edge_pairs.cc
@@ -17,10 +17,23 @@
 
 #include "s2/s2shapeutil_visit_crossing_edge_pairs.h"
 
+#include <string>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "absl/container/inlined_vector.h"
+#include "absl/strings/str_format.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2cell_range_iterator.h"
 #include "s2/s2crossing_edge_query.h"
 #include "s2/s2edge_crosser.h"
 #include "s2/s2error.h"
-#include "s2/s2shapeutil_range_iterator.h"
+#include "s2/s2padded_cell.h"
+#include "s2/s2point.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
+#include "s2/s2shapeutil_shape_edge.h"
+#include "s2/s2shapeutil_shape_edge_id.h"
 #include "s2/s2wedge_relations.h"
 
 using std::vector;
@@ -111,8 +124,9 @@ static bool VisitCrossings(const ShapeEdgeVector& shape_edges,
 static bool VisitCrossings(
     const S2ShapeIndex& index, CrossingType type, bool need_adjacent,
     const EdgePairVisitor& visitor) {
-  // TODO(ericv): Use brute force if the total number of edges is small enough
-  // (using a larger threshold if the S2ShapeIndex is not constructed yet).
+  // TODO(b/262264880): Use brute force if the total number of edges is small
+  // enough (using a larger threshold if the S2ShapeIndex is not constructed
+  // yet).
   ShapeEdgeVector shape_edges;
   for (S2ShapeIndex::Iterator it(&index, S2ShapeIndex::BEGIN);
        !it.done(); it.Next()) {
@@ -139,8 +153,8 @@ bool VisitCrossingEdgePairs(const S2ShapeIndex& index, CrossingType type,
 namespace {
 class IndexCrosser {
  public:
-  // If "swapped" is true, the loops A and B have been swapped.  This affects
-  // how arguments are passed to the given loop relation, since for example
+  // If "swapped" is true, the shape indexes A and B have been swapped.  This
+  // affects how arguments are passed to the visitor, since for example
   // A.Contains(B) is not the same as B.Contains(A).
   IndexCrosser(const S2ShapeIndex& a_index, const S2ShapeIndex& b_index,
                CrossingType type, const EdgePairVisitor& visitor, bool swapped)
@@ -153,7 +167,8 @@ class IndexCrosser {
   // visits all crossings between edges of A and B that intersect a->id().
   // Terminates early and returns false if visitor_ returns false.
   // Advances both iterators past ai->id().
-  bool VisitCrossings(RangeIterator* ai, RangeIterator* bi);
+  bool VisitCrossings(S2CellRangeIterator<S2ShapeIndex::Iterator>* ai,
+                      S2CellRangeIterator<S2ShapeIndex::Iterator>* bi);
 
   // Given two index cells, visits all crossings between edges of those cells.
   // Terminates early and returns false if visitor_ returns false.
@@ -266,9 +281,11 @@ inline bool IndexCrosser::VisitCellCellCrossings(
   return VisitEdgesEdgesCrossings(a_shape_edges_, b_shape_edges_);
 }
 
-bool IndexCrosser::VisitCrossings(RangeIterator* ai, RangeIterator* bi) {
+bool IndexCrosser::VisitCrossings(
+    S2CellRangeIterator<S2ShapeIndex::Iterator>* ai,
+    S2CellRangeIterator<S2ShapeIndex::Iterator>* bi) {
   S2_DCHECK(ai->id().contains(bi->id()));
-  if (ai->cell().num_edges() == 0) {
+  if (ai->iterator().cell().num_edges() == 0) {
     // Skip over the cells of B using binary search.
     bi->SeekBeyond(*ai);
   } else {
@@ -281,22 +298,23 @@ bool IndexCrosser::VisitCrossings(RangeIterator* ai, RangeIterator* bi) {
     int b_edges = 0;
     b_cells_.clear();
     do {
-      int cell_edges = bi->cell().num_edges();
+      int cell_edges = bi->iterator().cell().num_edges();
       if (cell_edges > 0) {
         b_edges += cell_edges;
         if (b_edges >= kEdgeQueryMinEdges) {
           // There are too many edges, so use an S2CrossingEdgeQuery.
-          if (!VisitSubcellCrossings(ai->cell(), ai->id())) return false;
+          if (!VisitSubcellCrossings(ai->iterator().cell(), ai->id()))
+            return false;
           bi->SeekBeyond(*ai);
           return true;
         }
-        b_cells_.push_back(&bi->cell());
+        b_cells_.push_back(&bi->iterator().cell());
       }
       bi->Next();
     } while (bi->id() <= ai->range_max());
     if (!b_cells_.empty()) {
       // Test all the edge crossings directly.
-      GetShapeEdges(a_index_, ai->cell(), &a_shape_edges_);
+      GetShapeEdges(a_index_, ai->iterator().cell(), &a_shape_edges_);
       GetShapeEdges(b_index_, b_cells_, &b_shape_edges_);
       if (!VisitEdgesEdgesCrossings(a_shape_edges_, b_shape_edges_)) {
         return false;
@@ -313,9 +331,11 @@ bool VisitCrossingEdgePairs(const S2ShapeIndex& a_index,
   // We look for S2CellId ranges where the indexes of A and B overlap, and
   // then test those edges for crossings.
 
-  // TODO(ericv): Use brute force if the total number of edges is small enough
-  // (using a larger threshold if the S2ShapeIndex is not constructed yet).
-  RangeIterator ai(a_index), bi(b_index);
+  // TODO(b/262264880): Use brute force if the total number of edges is small
+  // enough (using a larger threshold if the S2ShapeIndex is not constructed
+  // yet).
+  auto ai = MakeS2CellRangeIterator(&a_index);
+  auto bi = MakeS2CellRangeIterator(&b_index);
   IndexCrosser ab(a_index, b_index, type, visitor, false);  // Tests A against B
   IndexCrosser ba(b_index, a_index, type, visitor, true);   // Tests B against A
   while (!ai.done() || !bi.done()) {
@@ -336,8 +356,11 @@ bool VisitCrossingEdgePairs(const S2ShapeIndex& a_index,
         if (!ba.VisitCrossings(&bi, &ai)) return false;
       } else {
         // The A and B cells are the same.
-        if (ai.cell().num_edges() > 0 && bi.cell().num_edges() > 0) {
-          if (!ab.VisitCellCellCrossings(ai.cell(), bi.cell())) return false;
+        if (ai.iterator().cell().num_edges() > 0 &&
+            bi.iterator().cell().num_edges() > 0) {
+          if (!ab.VisitCellCellCrossings(ai.iterator().cell(),
+                                         bi.iterator().cell()))
+            return false;
         }
         ai.Next();
         bi.Next();
@@ -351,7 +374,8 @@ bool VisitCrossingEdgePairs(const S2ShapeIndex& a_index,
 
 // Helper function that formats a loop error message.  If the loop belongs to
 // a multi-loop polygon, adds a prefix indicating which loop is affected.
-static void InitLoopError(S2Error::Code code, const char* format,
+static void InitLoopError(S2Error::Code code,
+                          const absl::FormatSpec<int, int>& format,
                           ChainPosition ap, ChainPosition bp,
                           bool is_polygon, S2Error* error) {
   error->Init(code, format, ap.offset, bp.offset);
diff --git a/src/s2/s2shapeutil_visit_crossing_edge_pairs.h b/src/s2/s2shapeutil_visit_crossing_edge_pairs.h
index 30430c9f..77c49837 100644
--- a/src/s2/s2shapeutil_visit_crossing_edge_pairs.h
+++ b/src/s2/s2shapeutil_visit_crossing_edge_pairs.h
@@ -19,7 +19,9 @@
 #define S2_S2SHAPEUTIL_VISIT_CROSSING_EDGE_PAIRS_H_
 
 #include <functional>
+
 #include "s2/s2crossing_edge_query.h"
+#include "s2/s2error.h"
 #include "s2/s2shape_index.h"
 #include "s2/s2shapeutil_shape_edge.h"
 
@@ -39,7 +41,7 @@ using EdgePairVisitor = std::function<
 
 // Visits all pairs of crossing edges in the given S2ShapeIndex, terminating
 // early if the given EdgePairVisitor function returns false (in which case
-// VisitCrossings returns false as well).  "type" indicates whether all
+// VisitCrossingEdgePairs returns false as well).  "type" indicates whether all
 // crossings should be visited, or only interior crossings.
 //
 // CAVEAT: Crossings may be visited more than once.
diff --git a/src/s2/s2testing.cc b/src/s2/s2testing.cc
index e1b431b1..e6425b21 100644
--- a/src/s2/s2testing.cc
+++ b/src/s2/s2testing.cc
@@ -1,4 +1,3 @@
-#include "cpp-compat.h"
 // Copyright 2005 Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -22,38 +21,48 @@
 #include <cmath>
 #include <cstddef>
 #include <cstdlib>
+#include <ios>
 #include <memory>
+#include <ostream>
+#include <string>
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
+
 #include "s2/base/commandlineflags.h"
 #include "s2/base/integral_types.h"
-#include "s2/base/logging.h"
 #include "s2/r1interval.h"
+#include "s2/r2.h"
 #include "s2/s1angle.h"
 #include "s2/s1interval.h"
 #include "s2/s2cap.h"
 #include "s2/s2cell.h"
+#include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
+#include "s2/s2edge_distances.h"
 #include "s2/s2latlng.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2lax_polygon_shape.h"
+#include "s2/s2lax_polyline_shape.h"
 #include "s2/s2loop.h"
+#include "s2/s2point.h"
 #include "s2/s2pointutil.h"
 #include "s2/s2polygon.h"
 #include "s2/s2polyline.h"
 #include "s2/s2region.h"
+#include "s2/s2shape_index.h"
 #include "s2/s2text_format.h"
-#include "s2/strings/serialize.h"
-#include "absl/memory/memory.h"
-#include "absl/strings/str_split.h"
 #include "s2/util/math/matrix3x3.h"
 
-using absl::make_unique;
+using absl::string_view;
+using std::make_unique;
 using std::max;
+using std::string;
 using std::unique_ptr;
 using std::vector;
 
-DEFINE_int32(s2_random_seed, 1,
+S2_DEFINE_int32(s2_random_seed, 1,
              "Seed value that can be passed to S2Testing::rnd.Reset()");
 
 const double S2Testing::kEarthRadiusKm = 6371.01;
@@ -62,11 +71,11 @@ S2Testing::Random::Random() {
   // Unfortunately we can't use FLAGS_s2_random_seed here, because the default
   // S2Testing::Random instance is initialized before command-line flags have
   // been parsed.
-  cpp_compat_srandom(1);
+  srandom(1);
 }
 
 void S2Testing::Random::Reset(int seed) {
-  cpp_compat_srandom(seed);
+  srandom(seed);
 }
 
 // Return a 64-bit unsigned integer whose lowest "num_bits" are random, and
@@ -91,7 +100,7 @@ inline uint64 GetBits(int num_bits) {
 
   uint64 result = 0;
   for (int bits = 0; bits < num_bits; bits += RAND_BITS) {
-    result = (result << RAND_BITS) + cpp_compat_random();
+    result = (result << RAND_BITS) + random();
   }
   if (num_bits < 64) {  // Not legal to shift by full bitwidth of type
     result &= ((1ULL << num_bits) - 1);
@@ -99,17 +108,13 @@ inline uint64 GetBits(int num_bits) {
   return result;
 }
 
-uint64 S2Testing::Random::Rand64() {
-  return GetBits(64);
-}
+uint64 S2Testing::Random::Rand64() { return GetBits(64); }
 
-uint32 S2Testing::Random::Rand32() {
-  return GetBits(32);
-}
+uint32 S2Testing::Random::Rand32() { return GetBits(32); }
 
 double S2Testing::Random::RandDouble() {
   const int NUM_BITS = 53;
-  return ldexp((double) GetBits(NUM_BITS), -NUM_BITS);
+  return ldexp(GetBits(NUM_BITS), -NUM_BITS);
 }
 
 int32 S2Testing::Random::Uniform(int32 n) {
@@ -122,9 +127,7 @@ double S2Testing::Random::UniformDouble(double min, double limit) {
   return min + RandDouble() * (limit - min);
 }
 
-bool S2Testing::Random::OneIn(int32 n) {
-  return Uniform(n) == 0;
-}
+bool S2Testing::Random::OneIn(int32 n) { return Uniform(n) == 0; }
 
 int32 S2Testing::Random::Skewed(int max_log) {
   S2_DCHECK_GE(max_log, 0);
@@ -172,38 +175,54 @@ double S2Testing::AreaToKm2(double steradians) {
   return steradians * kEarthRadiusKm * kEarthRadiusKm;
 }
 
-// The overloaded Dump() function is for use within a debugger.
+// The Dump*() functions are for use within a debugger.  They are similar to
+// the corresponding s2textformat::ToString() functions except that they
+// prefix their output with a label and they don't require default arguments
+// or constructing absl::Span objects (which gdb doesn't know how to do).
 void Dump(const S2Point& p) {
-  cpp_compat_cout << "S2Point: " << s2textformat::ToString(p) << std::endl;
+  std::cout << "S2Point: " << s2textformat::ToString(p) << std::endl;
+}
+
+void Dump(const vector<S2Point>& points) {
+  std::cout << "S2Polygon: " << s2textformat::ToString(points) << std::endl;
 }
 
 void Dump(const S2Loop& loop) {
-  cpp_compat_cout << "S2Polygon: " << s2textformat::ToString(loop) << std::endl;
+  std::cout << "S2Polygon: " << s2textformat::ToString(loop) << std::endl;
 }
 
 void Dump(const S2Polyline& polyline) {
-  cpp_compat_cout << "S2Polyline: " << s2textformat::ToString(polyline) << std::endl;
+  std::cout << "S2Polyline: " << s2textformat::ToString(polyline) << std::endl;
 }
 
 void Dump(const S2Polygon& polygon) {
-  cpp_compat_cout << "S2Polygon: " << s2textformat::ToString(polygon) << std::endl;
+  std::cout << "S2Polygon: " << s2textformat::ToString(polygon) << std::endl;
+}
+
+void Dump(const S2LaxPolylineShape& polyline) {
+  std::cout << "S2Polyline: " << s2textformat::ToString(polyline) << std::endl;
+}
+
+void Dump(const S2LaxPolygonShape& polygon) {
+  std::cout << "S2Polygon: " << s2textformat::ToString(polygon) << std::endl;
 }
 
 // Outputs the contents of an S2ShapeIndex in human-readable form.
 void Dump(const S2ShapeIndex& index) {
-  cpp_compat_cout << "S2ShapeIndex: " << &index << std::endl;
+  std::cout << "S2ShapeIndex: " << &index << std::endl;
+  std::cout << "  " << s2textformat::ToString(index) << std::endl;
   for (S2ShapeIndex::Iterator it(&index, S2ShapeIndex::BEGIN);
        !it.done(); it.Next()) {
-    cpp_compat_cout << "  id: " << it.id().ToString() << std::endl;
+    std::cout << "  id: " << it.id().ToString() << std::endl;
     const S2ShapeIndexCell& cell = it.cell();
     for (int s = 0; s < cell.num_clipped(); ++s) {
       const S2ClippedShape& clipped = cell.clipped(s);
-      cpp_compat_cout << "    shape_id " << clipped.shape_id() << ": ";
+      std::cout << "    shape_id " << clipped.shape_id() << ": ";
       for (int e = 0; e < clipped.num_edges(); ++e) {
-        if (e > 0) cpp_compat_cout << ", ";
-        cpp_compat_cout << clipped.edge(e);
+        if (e > 0) std::cout << ", ";
+        std::cout << clipped.edge(e);
       }
-      cpp_compat_cout << std::endl;
+      std::cout << std::endl;
     }
   }
 }
@@ -218,7 +237,7 @@ S2Point S2Testing::RandomPoint() {
   return S2Point(x, y, z).Normalize();
 }
 
-void S2Testing::GetRandomFrame(Vector3_d* x, Vector3_d* y, Vector3_d* z) {
+void S2Testing::GetRandomFrame(S2Point* x, S2Point* y, S2Point* z) {
   *z = RandomPoint();
   GetRandomFrameAt(*z, x, y);
 }
@@ -310,6 +329,11 @@ S2Point S2Testing::SamplePoint(const S2LatLngRect& rect) {
   return S2LatLng::FromRadians(lat, lng).Normalized().ToPoint();
 }
 
+void S2Testing::SampleCapEdge(const S2Cap& cap, S2Point* a, S2Point* b) {
+  *a = SamplePoint(cap);
+  *b = S2::GetPointOnLine(*a, cap.center(), 2 * cap.GetRadius());
+}
+
 void S2Testing::CheckCovering(const S2Region& region,
                               const S2CellUnion& covering,
                               bool check_tight, S2CellId id) {
@@ -340,7 +364,7 @@ void S2Testing::CheckCovering(const S2Region& region,
 
 S2Testing::Fractal::Fractal()
     : max_level_(-1), min_level_arg_(-1), min_level_(-1),
-      dimension_(log(4.0)/log(3.0)), /* standard Koch curve */
+      dimension_(log(4)/log(3)), /* standard Koch curve */
       edge_fraction_(0), offset_fraction_(0) {
   ComputeOffsets();
 }
@@ -412,15 +436,15 @@ double S2Testing::Fractal::min_radius_factor() const {
 double S2Testing::Fractal::max_radius_factor() const {
   // The maximum radius is always attained at either an original triangle
   // vertex or at a middle vertex from the first subdivision step.
-  return max(1.0, offset_fraction_ * sqrt(3.0) + 0.5);
+  return max(1.0, offset_fraction_ * sqrt(3) + 0.5);
 }
 
 void S2Testing::Fractal::GetR2Vertices(vector<R2Point>* vertices) const {
   // The Koch "snowflake" consists of three Koch curves whose initial edges
   // form an equilateral triangle.
   R2Point v0(1.0, 0.0);
-  R2Point v1(-0.5, sqrt(3.0)/2);
-  R2Point v2(-0.5, -sqrt(3.0)/2);
+  R2Point v1(-0.5, sqrt(3)/2);
+  R2Point v2(-0.5, -sqrt(3)/2);
   GetR2VerticesHelper(v0, v1, 0, vertices);
   GetR2VerticesHelper(v1, v2, 0, vertices);
   GetR2VerticesHelper(v2, v0, 0, vertices);
@@ -450,9 +474,8 @@ void S2Testing::Fractal::GetR2VerticesHelper(const R2Point& v0,
   GetR2VerticesHelper(v3, v4, level+1, vertices);
 }
 
-std::unique_ptr<S2Loop> S2Testing::Fractal::MakeLoop(
-    const Matrix3x3_d& frame,
-    S1Angle nominal_radius) const {
+unique_ptr<S2Loop> S2Testing::Fractal::MakeLoop(const Matrix3x3_d& frame,
+                                                S1Angle nominal_radius) const {
   vector<R2Point> r2vertices;
   GetR2Vertices(&r2vertices);
   vector<S2Point> vertices;
diff --git a/src/s2/s2testing.h b/src/s2/s2testing.h
index 598fbf19..f222ad5b 100644
--- a/src/s2/s2testing.h
+++ b/src/s2/s2testing.h
@@ -17,22 +17,27 @@
 
 #ifndef S2_S2TESTING_H_
 #define S2_S2TESTING_H_
-#include "cpp-compat.h"
 
 #include <algorithm>
+#include <iostream>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 
+#include "absl/base/macros.h"
+#include "absl/strings/string_view.h"
+
 #include "s2/base/commandlineflags.h"
+#include "s2/base/commandlineflags_declare.h"
 #include "s2/base/integral_types.h"
 #include "s2/_fp_contract_off.h"
 #include "s2/r2.h"
 #include "s2/s1angle.h"
 #include "s2/s1chord_angle.h"
 #include "s2/s2cell_id.h"
-#include "absl/base/macros.h"
+#include "s2/s2point.h"
+#include "s2/s2region.h"
 #include "s2/util/math/matrix3x3.h"
 
 class S1Angle;
@@ -52,7 +57,7 @@ class S2Region;
 //
 // This flag currently does *not* affect the initial seed value for
 // S2Testing::rnd.  TODO(user): Fix this.
-DECLARE_int32(s2_random_seed);
+S2_DECLARE_int32(s2_random_seed);
 
 // This class defines various static functions that are useful for writing
 // unit tests.
@@ -214,6 +219,11 @@ class S2Testing {
   // sphere) from the given latitude-longitude rectangle.
   static S2Point SamplePoint(const S2LatLngRect& rect);
 
+  // Return an edge that bisects a cap.  We pick a random uniform point on the
+  // cap with SamplePoint() and connect that point to its image reflected across
+  // the cap center.
+  static void SampleCapEdge(const S2Cap& cap, S2Point* a, S2Point* b);
+
   // Return a random cell id at the given level or at a randomly chosen
   // level.  The distribution is uniform over the space of cell ids,
   // but only approximately uniform over the surface of the sphere.
@@ -272,9 +282,7 @@ class S2Testing::Random {
 
   // A functor-style version of Uniform, so that this class can be used with
   // STL functions that require a RandomNumberGenerator concept.
-  int32 operator() (int32 n) {
-    return Uniform(n);
-  }
+  int32 operator()(int32 n) { return Uniform(n); }
 
   // Return true with probability 1 in n.
   bool OneIn(int32 n);
@@ -318,7 +326,7 @@ bool CheckResultSet(const std::vector<std::pair<Distance, Id>>& x,
                     int max_size, Distance max_distance,
                     typename Distance::Delta max_error,
                     typename Distance::Delta max_pruning_error,
-                    const std::string& label) {
+                    absl::string_view label) {
   using Result = std::pair<Distance, Id>;
   // Results should be sorted by distance, but not necessarily then by Id.
   EXPECT_TRUE(std::is_sorted(x.begin(), x.end(),
@@ -353,7 +361,7 @@ bool CheckResultSet(const std::vector<std::pair<Distance, Id>>& x,
       });
     if (yp.first < limit && count != 1) {
       result = false;
-      cpp_compat_cout << (count > 1 ? "Duplicate" : label) << " distance = "
+      std::cout << (count > 1 ? "Duplicate" : label) << " distance = "
                 << S1ChordAngle(yp.first) << ", id = " << yp.second
                 << std::endl;
     }
@@ -375,12 +383,14 @@ bool CheckDistanceResults(
   // pruned from the result set even though they may be slightly closer.
   static const typename Distance::Delta kMaxPruningError(
       S1ChordAngle::Radians(1e-15));
-  return (S2::internal::CheckResultSet(
+  // Use `&` instead of `&&` to evaluate both sides and cast to int to avoid
+  // `bitwise-instead-of-logical` warning.
+  return (static_cast<int>(S2::internal::CheckResultSet(
               actual, expected, max_size, max_distance, max_error,
-              kMaxPruningError, "Missing") & /*not &&*/
-          S2::internal::CheckResultSet(
+              kMaxPruningError, "Missing")) & /*not &&*/
+          static_cast<int>(S2::internal::CheckResultSet(
               expected, actual, max_size, max_distance, max_error,
-              Distance::Delta::Zero(), "Extra"));
+              Distance::Delta::Zero(), "Extra")));
 }
 
 #endif  // S2_S2TESTING_H_
diff --git a/src/s2/s2text_format.cc b/src/s2/s2text_format.cc
index 36c1f9bb..266aa8cd 100644
--- a/src/s2/s2text_format.cc
+++ b/src/s2/s2text_format.cc
@@ -15,28 +15,41 @@
 
 #include "s2/s2text_format.h"
 
+#include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
-#include "s2/base/logging.h"
-#include "s2/base/stringprintf.h"
-#include "s2/strings/serialize.h"
-#include "absl/memory/memory.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
 #include "absl/strings/str_split.h"
 #include "absl/strings/string_view.h"
-#include "absl/strings/strip.h"
+#include "absl/types/span.h"
+
 #include "s2/mutable_s2shape_index.h"
+#include "s2/s1angle.h"
+#include "s2/s2cell_id.h"
+#include "s2/s2cell_union.h"
+#include "s2/s2debug.h"
 #include "s2/s2latlng.h"
+#include "s2/s2latlng_rect.h"
 #include "s2/s2lax_polygon_shape.h"
 #include "s2/s2lax_polyline_shape.h"
 #include "s2/s2loop.h"
+#include "s2/s2point.h"
 #include "s2/s2point_vector_shape.h"
 #include "s2/s2polygon.h"
 #include "s2/s2polyline.h"
+#include "s2/s2shape.h"
+#include "s2/s2shape_index.h"
 
-using absl::make_unique;
+using absl::Span;
 using absl::string_view;
+using std::make_unique;
 using std::pair;
+using std::string;
 using std::unique_ptr;
 using std::vector;
 
@@ -51,12 +64,6 @@ static vector<string_view> SplitString(string_view str, char separator) {
   return result;
 }
 
-static bool ParseDouble(const std::string& str, double* value) {
-  char* end_ptr = nullptr;
-  *value = strtod(str.c_str(), &end_ptr);
-  return end_ptr && *end_ptr == 0;
-}
-
 vector<S2LatLng> ParseLatLngsOrDie(string_view str) {
   vector<S2LatLng> latlngs;
   S2_CHECK(ParseLatLngs(str, &latlngs)) << ": str == \"" << str << "\"";
@@ -64,13 +71,13 @@ vector<S2LatLng> ParseLatLngsOrDie(string_view str) {
 }
 
 bool ParseLatLngs(string_view str, vector<S2LatLng>* latlngs) {
-  vector<pair<std::string, std::string>> ps;
-  if (!strings::DictionaryParse(str, &ps)) return false;
-  for (const auto& p : ps) {
-    double lat;
-    if (!ParseDouble(p.first, &lat)) return false;
-    double lng;
-    if (!ParseDouble(p.second, &lng)) return false;
+  for (const string_view lat_lng_str :
+       absl::StrSplit(str, ',', absl::SkipEmpty())) {
+    const vector<string_view> lat_lng = absl::StrSplit(lat_lng_str, ':');
+    if (lat_lng.size() != 2) return false;
+    double lat, lng;
+    if (!absl::SimpleAtod(lat_lng[0], &lat)) return false;
+    if (!absl::SimpleAtod(lat_lng[1], &lng)) return false;
     latlngs->push_back(S2LatLng::FromDegrees(lat, lng));
   }
   return true;
@@ -105,7 +112,7 @@ bool MakePoint(string_view str, S2Point* point) {
 }
 
 bool MakeLatLng(string_view str, S2LatLng* latlng) {
-  std::vector<S2LatLng> latlngs;
+  vector<S2LatLng> latlngs;
   if (!ParseLatLngs(str, &latlngs) || latlngs.size() != 1) return false;
   *latlng = latlngs[0];
   return true;
@@ -127,7 +134,7 @@ bool MakeLatLngRect(string_view str, S2LatLngRect* rect) {
   vector<S2LatLng> latlngs;
   if (!ParseLatLngs(str, &latlngs) || latlngs.empty()) return false;
   *rect = S2LatLngRect::FromPoint(latlngs[0]);
-  for (int i = 1; i < latlngs.size(); ++i) {
+  for (size_t i = 1; i < latlngs.size(); ++i) {
     rect->AddPoint(latlngs[i]);
   }
   return rect;
@@ -183,10 +190,6 @@ bool MakeLoop(string_view str, unique_ptr<S2Loop>* loop,
   return true;
 }
 
-std::unique_ptr<S2Loop> MakeLoop(string_view str, S2Debug debug_override) {
-  return MakeLoopOrDie(str, debug_override);
-}
-
 unique_ptr<S2Polyline> MakePolylineOrDie(string_view str,
                                          S2Debug debug_override) {
   unique_ptr<S2Polyline> polyline;
@@ -203,11 +206,6 @@ bool MakePolyline(string_view str, unique_ptr<S2Polyline>* polyline,
   return true;
 }
 
-std::unique_ptr<S2Polyline> MakePolyline(string_view str,
-                                         S2Debug debug_override) {
-  return MakePolylineOrDie(str, debug_override);
-}
-
 unique_ptr<S2LaxPolylineShape> MakeLaxPolylineOrDie(string_view str) {
   unique_ptr<S2LaxPolylineShape> lax_polyline;
   S2_CHECK(MakeLaxPolyline(str, &lax_polyline)) << ": str == \"" << str << "\"";
@@ -222,10 +220,6 @@ bool MakeLaxPolyline(string_view str,
   return true;
 }
 
-std::unique_ptr<S2LaxPolylineShape> MakeLaxPolyline(string_view str) {
-  return MakeLaxPolylineOrDie(str);
-}
-
 static bool InternalMakePolygon(string_view str,
                                 S2Debug debug_override,
                                 bool normalize_loops,
@@ -234,7 +228,7 @@ static bool InternalMakePolygon(string_view str,
   vector<string_view> loop_strs = SplitString(str, ';');
   vector<unique_ptr<S2Loop>> loops;
   for (const auto& loop_str : loop_strs) {
-    std::unique_ptr<S2Loop> loop;
+    unique_ptr<S2Loop> loop;
     if (!MakeLoop(loop_str, &loop, debug_override)) return false;
     // Don't normalize loops that were explicitly specified as "full".
     if (normalize_loops && !loop->is_full()) loop->Normalize();
@@ -257,11 +251,6 @@ bool MakePolygon(string_view str, unique_ptr<S2Polygon>* polygon,
   return InternalMakePolygon(str, debug_override, true, polygon);
 }
 
-std::unique_ptr<S2Polygon> MakePolygon(string_view str,
-                                       S2Debug debug_override) {
-  return MakePolygonOrDie(str, debug_override);
-}
-
 unique_ptr<S2Polygon> MakeVerbatimPolygonOrDie(string_view str) {
   unique_ptr<S2Polygon> polygon;
   S2_CHECK(MakeVerbatimPolygon(str, &polygon)) << ": str == \"" << str << "\"";
@@ -272,10 +261,6 @@ bool MakeVerbatimPolygon(string_view str, unique_ptr<S2Polygon>* polygon) {
   return InternalMakePolygon(str, S2Debug::ALLOW, false, polygon);
 }
 
-std::unique_ptr<S2Polygon> MakeVerbatimPolygon(string_view str) {
-  return MakeVerbatimPolygonOrDie(str);
-}
-
 unique_ptr<S2LaxPolygonShape> MakeLaxPolygonOrDie(string_view str) {
   unique_ptr<S2LaxPolygonShape> lax_polygon;
   S2_CHECK(MakeLaxPolygon(str, &lax_polygon)) << ": str == \"" << str << "\"";
@@ -299,17 +284,13 @@ bool MakeLaxPolygon(string_view str,
   return true;
 }
 
-std::unique_ptr<S2LaxPolygonShape> MakeLaxPolygon(string_view str) {
-  return MakeLaxPolygonOrDie(str);
-}
-
 unique_ptr<MutableS2ShapeIndex> MakeIndexOrDie(string_view str) {
   auto index = make_unique<MutableS2ShapeIndex>();
   S2_CHECK(MakeIndex(str, &index)) << ": str == \"" << str << "\"";
   return index;
 }
 
-bool MakeIndex(string_view str, std::unique_ptr<MutableS2ShapeIndex>* index) {
+bool MakeIndex(string_view str, unique_ptr<MutableS2ShapeIndex>* index) {
   vector<string_view> strs = absl::StrSplit(str, '#');
   S2_DCHECK_EQ(3, strs.size()) << "Must contain two # characters: " << str;
 
@@ -323,64 +304,68 @@ bool MakeIndex(string_view str, std::unique_ptr<MutableS2ShapeIndex>* index) {
     (*index)->Add(make_unique<S2PointVectorShape>(std::move(points)));
   }
   for (const auto& line_str : SplitString(strs[1], '|')) {
-    std::unique_ptr<S2LaxPolylineShape> lax_polyline;
+    unique_ptr<S2LaxPolylineShape> lax_polyline;
     if (!MakeLaxPolyline(line_str, &lax_polyline)) return false;
-    (*index)->Add(unique_ptr<S2Shape>(lax_polyline.release()));
+    (*index)->Add(std::move(lax_polyline));
   }
   for (const auto& polygon_str : SplitString(strs[2], '|')) {
-    std::unique_ptr<S2LaxPolygonShape> lax_polygon;
+    unique_ptr<S2LaxPolygonShape> lax_polygon;
     if (!MakeLaxPolygon(polygon_str, &lax_polygon)) return false;
-    (*index)->Add(unique_ptr<S2Shape>(lax_polygon.release()));
+    (*index)->Add(std::move(lax_polygon));
   }
   return true;
 }
 
-std::unique_ptr<MutableS2ShapeIndex> MakeIndex(string_view str) {
-  return MakeIndexOrDie(str);
-}
-
-static void AppendVertex(const S2LatLng& ll, std::string* out) {
-  StringAppendF(out, "%.15g:%.15g", ll.lat().degrees(), ll.lng().degrees());
+static void AppendVertex(const S2LatLng& ll, string* out,
+                         bool roundtrip_precision = false) {
+  if (roundtrip_precision) {
+    absl::StrAppendFormat(out, "%.17g:%.17g", ll.lat().degrees(),
+                          ll.lng().degrees());
+  } else {
+    absl::StrAppendFormat(out, "%.15g:%.15g", ll.lat().degrees(),
+                          ll.lng().degrees());
+  }
 }
 
-static void AppendVertex(const S2Point& p, std::string* out) {
+static void AppendVertex(const S2Point& p, string* out,
+                         bool roundtrip_precision = false) {
   S2LatLng ll(p);
-  return AppendVertex(ll, out);
+  return AppendVertex(ll, out, roundtrip_precision);
 }
 
-static void AppendVertices(const S2Point* v, int n, std::string* out) {
+static void AppendVertices(const S2Point* v, int n, string* out) {
   for (int i = 0; i < n; ++i) {
     if (i > 0) *out += ", ";
     AppendVertex(v[i], out);
   }
 }
 
-std::string ToString(const S2Point& point) {
-  std::string out;
+string ToString(const S2Point& point) {
+  string out;
   AppendVertex(point, &out);
   return out;
 }
 
-std::string ToString(const S2LatLng& latlng) {
-  std::string out;
+string ToString(const S2LatLng& latlng) {
+  string out;
   AppendVertex(latlng, &out);
   return out;
 }
 
-std::string ToString(const S2LatLngRect& rect) {
-  std::string out;
+string ToString(const S2LatLngRect& rect) {
+  string out;
   AppendVertex(rect.lo(), &out);
   out += ", ";
   AppendVertex(rect.hi(), &out);
   return out;
 }
 
-std::string ToString(const S2CellId& cell_id) {
+string ToString(const S2CellId cell_id) {
   return cell_id.ToString();
 }
 
-std::string ToString(const S2CellUnion& cell_union) {
-  std::string out;
+string ToString(const S2CellUnion& cell_union) {
+  string out;
   for (S2CellId cell_id : cell_union) {
     if (!out.empty()) out += ", ";
     out += cell_id.ToString();
@@ -388,80 +373,94 @@ std::string ToString(const S2CellUnion& cell_union) {
   return out;
 }
 
-std::string ToString(const S2Loop& loop) {
+string ToString(const S2Loop& loop) {
   if (loop.is_empty()) {
     return "empty";
   } else if (loop.is_full()) {
     return "full";
   }
-  std::string out;
+  string out;
   if (loop.num_vertices() > 0) {
     AppendVertices(&loop.vertex(0), loop.num_vertices(), &out);
   }
   return out;
 }
 
-std::string ToString(S2PointLoopSpan loop) {
-  // S2Shape represents the full loop as a loop with no vertices.
-  // There is no representation of the empty loop.
-  if (loop.empty()) {
-    return "full";
-  }
-  std::string out;
-  AppendVertices(loop.data(), loop.size(), &out);
-  return out;
-}
-
-std::string ToString(const S2Polyline& polyline) {
-  std::string out;
+string ToString(const S2Polyline& polyline) {
+  string out;
   if (polyline.num_vertices() > 0) {
     AppendVertices(&polyline.vertex(0), polyline.num_vertices(), &out);
   }
   return out;
 }
 
-std::string ToString(const S2Polygon& polygon, const char* loop_separator) {
+string ToString(const S2Polygon& polygon, string_view loop_separator) {
   if (polygon.is_empty()) {
     return "empty";
   } else if (polygon.is_full()) {
     return "full";
   }
-  std::string out;
+  string out;
   for (int i = 0; i < polygon.num_loops(); ++i) {
-    if (i > 0) out += loop_separator;
+    if (i > 0) absl::StrAppend(&out, loop_separator);
     const S2Loop& loop = *polygon.loop(i);
     AppendVertices(&loop.vertex(0), loop.num_vertices(), &out);
   }
   return out;
 }
 
-std::string ToString(const vector<S2Point>& points) {
-  std::string out;
+string ToString(Span<const S2Point> points) {
+  string out;
   AppendVertices(points.data(), points.size(), &out);
   return out;
 }
 
-std::string ToString(const vector<S2LatLng>& latlngs) {
-  std::string out;
-  for (int i = 0; i < latlngs.size(); ++i) {
+string ToString(Span<const S2LatLng> latlngs) {
+  string out;
+  for (size_t i = 0; i < latlngs.size(); ++i) {
     if (i > 0) out += ", ";
     AppendVertex(latlngs[i], &out);
   }
   return out;
 }
 
-std::string ToString(const S2LaxPolylineShape& polyline) {
-  std::string out;
+string ToString(const S2Shape& shape) {
+  string out;
+  if (shape.dimension() == 1) out += "# ";
+  if (shape.dimension() == 2) out += "## ";
+
+  int nchain = 0;
+  for (const auto& chain : shape.chains()) {
+    if (nchain++ > 0) {
+      out += " | ";
+    }
+
+    int nvertex = 0;
+    for (const S2Point& vertex : shape.vertices(chain)) {
+      if (nvertex++ > 0) {
+        out += ", ";
+      }
+      AppendVertex(vertex, &out);
+    }
+  }
+
+  if (shape.dimension() == 1) out += " #";
+  if (shape.dimension() == 0) out += " ##";
+  return out;
+}
+
+string ToString(const S2LaxPolylineShape& polyline) {
+  string out;
   if (polyline.num_vertices() > 0) {
     AppendVertices(&polyline.vertex(0), polyline.num_vertices(), &out);
   }
   return out;
 }
 
-std::string ToString(const S2LaxPolygonShape& polygon, const char* loop_separator) {
-  std::string out;
+string ToString(const S2LaxPolygonShape& polygon, string_view loop_separator) {
+  string out;
   for (int i = 0; i < polygon.num_loops(); ++i) {
-    if (i > 0) out += loop_separator;
+    if (i > 0) absl::StrAppend(&out, loop_separator);
     int n = polygon.num_loop_vertices(i);
     if (n == 0) {
       out += "full";
@@ -472,8 +471,8 @@ std::string ToString(const S2LaxPolygonShape& polygon, const char* loop_separato
   return out;
 }
 
-std::string ToString(const S2ShapeIndex& index) {
-  std::string out;
+string ToString(const S2ShapeIndex& index, bool roundtrip_precision) {
+  string out;
   for (int dim = 0; dim < 3; ++dim) {
     if (dim > 0) out += "#";
     int count = 0;
@@ -487,13 +486,13 @@ std::string ToString(const S2ShapeIndex& index) {
           S2_DCHECK_EQ(dim, 2);
           out += "full";
         } else {
-          AppendVertex(shape->edge(chain.start).v0, &out);
+          AppendVertex(shape->edge(chain.start).v0, &out, roundtrip_precision);
         }
         int limit = chain.start + chain.length;
         if (dim != 1) --limit;
         for (int e = chain.start; e < limit; ++e) {
           out += ", ";
-          AppendVertex(shape->edge(e).v1, &out);
+          AppendVertex(shape->edge(e).v1, &out, roundtrip_precision);
         }
       }
     }
diff --git a/src/s2/s2text_format.h b/src/s2/s2text_format.h
index f092613c..8c468a99 100644
--- a/src/s2/s2text_format.h
+++ b/src/s2/s2text_format.h
@@ -27,13 +27,24 @@
 #include <string>
 #include <vector>
 
-#include "absl/base/attributes.h"
+#include "absl/base/macros.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+
+#include "s2/mutable_s2shape_index.h"
 #include "s2/s2cell_id.h"
 #include "s2/s2cell_union.h"
 #include "s2/s2debug.h"
+#include "s2/s2latlng.h"
 #include "s2/s2latlng_rect.h"
+#include "s2/s2lax_polygon_shape.h"  // TODO(user,b/207351837): Remove.
+#include "s2/s2lax_polyline_shape.h"  // TODO(user,b/207351837): Remove.
+#include "s2/s2loop.h"
+#include "s2/s2point.h"
 #include "s2/s2point_span.h"
+#include "s2/s2polygon.h"   // TODO(user,b/207351837): Remove.
+#include "s2/s2polyline.h"  // TODO(user,b/207351837): Remove.
+#include "s2/s2shape_index.h"
 
 class MutableS2ShapeIndex;
 class S2LaxPolygonShape;
@@ -54,7 +65,7 @@ S2Point MakePointOrDie(absl::string_view str);
 // is successful.
 ABSL_MUST_USE_RESULT bool MakePoint(absl::string_view str, S2Point* point);
 
-ABSL_DEPRECATED("Use MakePointOrDie.")
+ABSL_DEPRECATED("Inline the implementation")
 inline S2Point MakePoint(absl::string_view str) { return MakePointOrDie(str); }
 
 // Parses a string of one or more latitude-longitude coordinates in degrees,
@@ -68,9 +79,9 @@ std::vector<S2LatLng> ParseLatLngsOrDie(absl::string_view str);
 // As above, but does not S2_CHECK-fail on invalid input. Returns true if
 // conversion is successful.
 ABSL_MUST_USE_RESULT bool ParseLatLngs(absl::string_view str,
-                                       std::vector<S2LatLng>* latlngs);
+                                std::vector<S2LatLng>* latlngs);
 
-ABSL_DEPRECATED("Use ParseLatLngsOrDie.")
+ABSL_DEPRECATED("Inline the implementation")
 inline std::vector<S2LatLng> ParseLatLngs(absl::string_view str) {
   return ParseLatLngsOrDie(str);
 }
@@ -82,12 +93,7 @@ std::vector<S2Point> ParsePointsOrDie(absl::string_view str);
 // As above, but does not S2_CHECK-fail on invalid input. Returns true if
 // conversion is successful.
 ABSL_MUST_USE_RESULT bool ParsePoints(absl::string_view str,
-                                      std::vector<S2Point>* vertices);
-
-ABSL_DEPRECATED("Use ParsePointsOrDie.")
-inline std::vector<S2Point> ParsePoints(absl::string_view str) {
-  return ParsePointsOrDie(str);
-}
+                               std::vector<S2Point>* vertices);
 
 // Given a string in the same format as ParseLatLngs, returns a single S2LatLng.
 S2LatLng MakeLatLngOrDie(absl::string_view str);
@@ -102,10 +108,9 @@ S2LatLngRect MakeLatLngRectOrDie(absl::string_view str);
 
 // As above, but does not S2_CHECK-fail on invalid input. Returns true if
 // conversion is successful.
-ABSL_MUST_USE_RESULT bool MakeLatLngRect(absl::string_view str,
-                                         S2LatLngRect* rect);
+ABSL_MUST_USE_RESULT bool MakeLatLngRect(absl::string_view str, S2LatLngRect* rect);
 
-ABSL_DEPRECATED("Use MakeLatLngRectOrDie.")
+ABSL_DEPRECATED("Inline the implementation")
 inline S2LatLngRect MakeLatLngRect(absl::string_view str) {
   return MakeLatLngRectOrDie(str);
 }
@@ -134,7 +139,7 @@ S2CellUnion MakeCellUnionOrDie(absl::string_view str);
 // As above, but does not S2_CHECK-fail on invalid input. Returns true if
 // conversion is successful.
 ABSL_MUST_USE_RESULT bool MakeCellUnion(absl::string_view str,
-                                        S2CellUnion* cell_union);
+                                 S2CellUnion* cell_union);
 
 // Given a string of latitude-longitude coordinates in degrees,
 // returns a newly allocated loop.  Example of the input format:
@@ -146,12 +151,8 @@ std::unique_ptr<S2Loop> MakeLoopOrDie(absl::string_view str,
 // As above, but does not S2_CHECK-fail on invalid input. Returns true if
 // conversion is successful.
 ABSL_MUST_USE_RESULT bool MakeLoop(absl::string_view str,
-                                   std::unique_ptr<S2Loop>* loop,
-                                   S2Debug debug_override = S2Debug::ALLOW);
-
-ABSL_DEPRECATED("Use MakeLoopOrDie.")
-std::unique_ptr<S2Loop> MakeLoop(absl::string_view str,
-                                 S2Debug debug_override = S2Debug::ALLOW);
+                            std::unique_ptr<S2Loop>* loop,
+                            S2Debug debug_override = S2Debug::ALLOW);
 
 // Similar to MakeLoop(), but returns an S2Polyline rather than an S2Loop.
 std::unique_ptr<S2Polyline> MakePolylineOrDie(
@@ -161,13 +162,8 @@ std::unique_ptr<S2Polyline> MakePolylineOrDie(
 // As above, but does not S2_CHECK-fail on invalid input. Returns true if
 // conversion is successful.
 ABSL_MUST_USE_RESULT bool MakePolyline(absl::string_view str,
-                                       std::unique_ptr<S2Polyline>* polyline,
-                                       S2Debug debug_override = S2Debug::ALLOW);
-
-ABSL_DEPRECATED("Use MakePolylineOrDie.")
-std::unique_ptr<S2Polyline> MakePolyline(
-    absl::string_view str,
-    S2Debug debug_override = S2Debug::ALLOW);
+                                std::unique_ptr<S2Polyline>* polyline,
+                                S2Debug debug_override = S2Debug::ALLOW);
 
 // Like MakePolyline, but returns an S2LaxPolylineShape instead.
 std::unique_ptr<S2LaxPolylineShape> MakeLaxPolylineOrDie(absl::string_view str);
@@ -177,9 +173,6 @@ std::unique_ptr<S2LaxPolylineShape> MakeLaxPolylineOrDie(absl::string_view str);
 ABSL_MUST_USE_RESULT bool MakeLaxPolyline(
     absl::string_view str, std::unique_ptr<S2LaxPolylineShape>* lax_polyline);
 
-ABSL_DEPRECATED("Use MakeLaxPolylineOrDie.")
-std::unique_ptr<S2LaxPolylineShape> MakeLaxPolyline(absl::string_view str);
-
 // Given a sequence of loops separated by semicolons, returns a newly
 // allocated polygon.  Loops are automatically normalized by inverting them
 // if necessary so that they enclose at most half of the unit sphere.
@@ -200,12 +193,14 @@ std::unique_ptr<S2Polygon> MakePolygonOrDie(
 // As above, but does not S2_CHECK-fail on invalid input. Returns true if
 // conversion is successful.
 ABSL_MUST_USE_RESULT bool MakePolygon(absl::string_view str,
-                                      std::unique_ptr<S2Polygon>* polygon,
-                                      S2Debug debug_override = S2Debug::ALLOW);
+                               std::unique_ptr<S2Polygon>* polygon,
+                               S2Debug debug_override = S2Debug::ALLOW);
 
-ABSL_DEPRECATED("Use MakePolygonOrDie.")
-std::unique_ptr<S2Polygon> MakePolygon(absl::string_view str,
-                                       S2Debug debug_override = S2Debug::ALLOW);
+ABSL_DEPRECATED("Inline the implementation")
+inline std::unique_ptr<S2Polygon> MakePolygon(
+    absl::string_view str, S2Debug debug_override = S2Debug::ALLOW) {
+  return MakePolygonOrDie(str, debug_override);
+}
 
 // Like MakePolygon(), except that it does not normalize loops (i.e., it
 // gives you exactly what you asked for).
@@ -213,11 +208,8 @@ std::unique_ptr<S2Polygon> MakeVerbatimPolygonOrDie(absl::string_view str);
 
 // As above, but does not S2_CHECK-fail on invalid input. Returns true if
 // conversion is successful.
-ABSL_MUST_USE_RESULT bool MakeVerbatimPolygon(
-    absl::string_view str, std::unique_ptr<S2Polygon>* polygon);
-
-ABSL_DEPRECATED("Use MakeVerbatimPolygonOrDie.")
-std::unique_ptr<S2Polygon> MakeVerbatimPolygon(absl::string_view str);
+ABSL_MUST_USE_RESULT bool MakeVerbatimPolygon(absl::string_view str,
+                                       std::unique_ptr<S2Polygon>* polygon);
 
 // Parses a string in the same format as MakePolygon, except that loops must
 // be oriented so that the interior of the loop is always on the left, and
@@ -230,22 +222,25 @@ std::unique_ptr<S2LaxPolygonShape> MakeLaxPolygonOrDie(absl::string_view str);
 ABSL_MUST_USE_RESULT bool MakeLaxPolygon(
     absl::string_view str, std::unique_ptr<S2LaxPolygonShape>* lax_polygon);
 
-ABSL_DEPRECATED("Use MakeLaxPolygonOrDie.")
-std::unique_ptr<S2LaxPolygonShape> MakeLaxPolygon(absl::string_view str);
-
 // Returns a MutableS2ShapeIndex containing the points, polylines, and loops
-// (in the form of a single polygon) described by the following format:
+// (in the form of one polygon for each group of loops) described by the
+// following format:
 //
 //   point1|point2|... # line1|line2|... # polygon1|polygon2|...
 //
 // Examples:
-//   1:2 | 2:3 # #                     // Two points
+//   1:2 | 2:3 # #                     // Two points (one S2PointVectorShape)
 //   # 0:0, 1:1, 2:2 | 3:3, 4:4 #      // Two polylines
 //   # # 0:0, 0:3, 3:0; 1:1, 2:1, 1:2  // Two nested loops (one polygon)
-//   5:5 # 6:6, 7:7 # 0:0, 0:1, 1:0    // One of each
+//   5:5 # 6:6, 7:7 # 0:0, 0:1, 1:0    // One of each point, line, and polygon
 //   # # empty                         // One empty polygon
 //   # # empty | full                  // One empty polygon, one full polygon
 //
+// All the points, if any, are stored as a single S2PointVectorShape in the
+// index.  Polylines are stored as individual S2LaxPolylineShapes.  Polygons
+// are separated by '|', with distinct loops for a polygon separated by ';'.
+// Each group of loops is stored as an individual S2LaxPolygonShape.
+//
 // Loops should be directed so that the region's interior is on the left.
 // Loops can be degenerate (they do not need to meet S2Loop requirements).
 //
@@ -255,34 +250,37 @@ std::unique_ptr<MutableS2ShapeIndex> MakeIndexOrDie(absl::string_view str);
 
 // As above, but does not S2_CHECK-fail on invalid input. Returns true if
 // conversion is successful.
-ABSL_MUST_USE_RESULT bool MakeIndex(
-    absl::string_view str, std::unique_ptr<MutableS2ShapeIndex>* index);
-
-ABSL_DEPRECATED("Use MakeIndexOrDie.")
-std::unique_ptr<MutableS2ShapeIndex> MakeIndex(absl::string_view str);
+ABSL_MUST_USE_RESULT bool MakeIndex(absl::string_view str,
+                             std::unique_ptr<MutableS2ShapeIndex>* index);
 
 // Convert an S2Point, S2LatLng, S2LatLngRect, S2CellId, S2CellUnion, loop,
 // polyline, or polygon to the string format above.
 std::string ToString(const S2Point& point);
 std::string ToString(const S2LatLng& latlng);
 std::string ToString(const S2LatLngRect& rect);
-std::string ToString(const S2CellId& cell_id);
+std::string ToString(const S2CellId cell_id);
 std::string ToString(const S2CellUnion& cell_union);
 std::string ToString(const S2Loop& loop);
-std::string ToString(S2PointLoopSpan loop);
 std::string ToString(const S2Polyline& polyline);
-std::string ToString(const S2Polygon& polygon, const char* loop_separator = ";\n");
-std::string ToString(const std::vector<S2Point>& points);
-std::string ToString(const std::vector<S2LatLng>& points);
+std::string ToString(const S2Polygon& polygon,
+                     absl::string_view loop_separator = ";\n");
+std::string ToString(absl::Span<const S2Point> points);
+std::string ToString(absl::Span<const S2LatLng> latlngs);
 std::string ToString(const S2LaxPolylineShape& polyline);
 std::string ToString(const S2LaxPolygonShape& polygon,
-                const char* loop_separator = ";\n");
+                     absl::string_view loop_separator = ";\n");
+
+// Convert any S2Shape to the string format above.
+std::string ToString(const S2Shape& shape);
 
 // Convert the contents of an S2ShapeIndex to the format above.  The index may
 // contain S2Shapes of any type.  Shapes are reordered if necessary so that
 // all point geometry (shapes of dimension 0) are first, followed by all
 // polyline geometry, followed by all polygon geometry.
-std::string ToString(const S2ShapeIndex& index);
+// If `roundtrip_precision` is true, the coordinates are formatted using
+// enough precision to exactly preserve the floating point values.
+std::string ToString(const S2ShapeIndex& index,
+                     bool roundtrip_precision = false);
 
 }  // namespace s2textformat
 
diff --git a/src/s2/s2wedge_relations.cc b/src/s2/s2wedge_relations.cc
index 85ec7774..f2ea290e 100644
--- a/src/s2/s2wedge_relations.cc
+++ b/src/s2/s2wedge_relations.cc
@@ -17,6 +17,7 @@
 
 #include "s2/s2wedge_relations.h"
 
+#include "s2/s2point.h"
 #include "s2/s2predicates.h"
 
 namespace S2 {
diff --git a/src/s2/s2winding_operation.cc b/src/s2/s2winding_operation.cc
new file mode 100644
index 00000000..3c5daa63
--- /dev/null
+++ b/src/s2/s2winding_operation.cc
@@ -0,0 +1,561 @@
+// Copyright Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// 2020 Google Inc. All Rights Reserved.
+// Author: ericv@google.com (Eric Veach)
+
+#include "s2/s2winding_operation.h"
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "s2/base/integral_types.h"
+#include "absl/base/optimization.h"
+#include "s2/id_set_lexicon.h"
+#include "s2/mutable_s2shape_index.h"
+#include "s2/s1angle.h"
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2builder_layer.h"
+#include "s2/s2builderutil_get_snapped_winding_delta.h"
+#include "s2/s2builderutil_graph_shape.h"
+#include "s2/s2builderutil_snap_functions.h"
+#include "s2/s2crossing_edge_query.h"
+#include "s2/s2edge_crosser.h"
+#include "s2/s2edge_crossings.h"
+#include "s2/s2error.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2point.h"
+#include "s2/s2point_span.h"
+#include "s2/s2shape.h"
+#include "s2/s2shapeutil_shape_edge_id.h"
+
+using std::make_unique;
+using std::unique_ptr;
+using std::vector;
+
+using EdgeType = S2Builder::EdgeType;
+using Graph = S2Builder::Graph;
+using GraphOptions = S2Builder::GraphOptions;
+using SnapFunction = S2Builder::SnapFunction;
+
+using Edge = Graph::Edge;
+using EdgeId = Graph::EdgeId;
+using InputEdgeId = Graph::InputEdgeId;
+using InputEdgeIdSetId = Graph::InputEdgeIdSetId;
+using VertexId = Graph::VertexId;
+
+using DegenerateEdges = GraphOptions::DegenerateEdges;
+using DuplicateEdges = GraphOptions::DuplicateEdges;
+using SiblingPairs = GraphOptions::SiblingPairs;
+
+using ShapeEdgeId = s2shapeutil::ShapeEdgeId;
+using WindingRule = S2WindingOperation::WindingRule;
+
+namespace s2builderutil {
+
+// The purpose of WindingOracle is to compute winding numbers with respect to
+// a set of input loops after snapping.  It is given the input edges (via
+// S2Builder), the output eges (an S2Builder::Graph), and the winding number
+// at a reference point with respect to the input edges ( "ref_input_edge_id"
+// and "ref_winding_in").  GetWindingNumber() can then be called to compute
+// the winding number at any point with respect to the snapped loops.  The
+// main algorithm uses this to compute the winding number at one arbitrary
+// vertex of each connected component of the S2Builder output graph.
+class WindingOracle {
+ public:
+  WindingOracle(InputEdgeId ref_input_edge_id, int ref_winding_in,
+                const S2Builder& builder, const Graph* g);
+
+  // Returns the winding number at the given point after snapping.
+  int GetWindingNumber(const S2Point& p);
+
+  // Returns the current reference point.
+  S2Point current_ref_point() const { return ref_p_; }
+
+  // Returns the winding number at the current reference point.
+  int current_ref_winding() const { return ref_winding_; }
+
+ private:
+  int SignedCrossingDelta(S2EdgeCrosser* crosser, EdgeId e);
+
+  const Graph& g_;
+
+  // The current reference point.  Initially this is simply the snapped
+  // position of the input reference point, but it is updated on each call to
+  // GetWindingNumber (see below).
+  S2Point ref_p_;
+
+  // The winding number at the current reference point.
+  int ref_winding_;
+
+  // For each connected component of the S2Builder output graph, we determine
+  // the winding number at one arbitrary vertex by counting edge crossings.
+  // Initially we do this by brute force, but if there are too many connected
+  // components then we build an S2ShapeIndex to speed up the process.
+  //
+  // Building an index takes about as long as 25 brute force queries.  The
+  // classic competitive algorithm technique would be to do 25 brute force
+  // queries and then build the index.  However in practice, inputs with one
+  // connected component are common while inputs with 2-25 connected
+  // components are rare.  Therefore we build the index as soon as we realize
+  // that there is more than one component.
+  //
+  // Another idea is to count the connected components in advance, however it
+  // turns out that this takes about 25% as long as building the index does.
+  int brute_force_winding_tests_left_ = 1;
+  MutableS2ShapeIndex index_;  // Built only if needed.
+};
+
+WindingOracle::WindingOracle(
+    InputEdgeId ref_input_edge_id, int ref_winding_in,
+    const S2Builder& builder, const Graph* g)
+    : g_(*g) {
+  S2_DCHECK(g_.options().edge_type() == EdgeType::DIRECTED);
+  S2_DCHECK(g_.options().degenerate_edges() == DegenerateEdges::KEEP);
+  S2_DCHECK(g_.options().duplicate_edges() == DuplicateEdges::KEEP);
+  S2_DCHECK(g_.options().sibling_pairs() == SiblingPairs::KEEP);
+
+  // Compute the winding number at the reference point after snapping (see
+  // s2builderutil::GetSnappedWindingDelta).
+  S2Point ref_in = builder.input_edge(ref_input_edge_id).v0;
+  VertexId ref_v = s2builderutil::FindFirstVertexId(ref_input_edge_id, g_);
+  S2_DCHECK_GE(ref_v, 0);  // No errors are possible.
+  ref_p_ = g_.vertex(ref_v);
+  S2Error error;
+  ref_winding_ = ref_winding_in + s2builderutil::GetSnappedWindingDelta(
+      ref_in, ref_v, s2builderutil::InputEdgeFilter{}, builder, g_, &error);
+  S2_DCHECK(error.ok()) << error;  // No errors are possible.
+
+  // Winding numbers at other points are computed by counting signed edge
+  // crossings.  If we need to do this many times, it is worthwhile to build an
+  // index.  Note that although we initialize the index here, it is only built
+  // the first time we use it (i.e., when brute_force_winding_tests_left_ < 0).
+  //
+  // As a small optimization, we set max_edges_per_cell() higher than its
+  // default value.  This makes it faster to build the index but increases the
+  // time per query.  This is a good tradeoff because the number of calls to
+  // GetWindingNumber() is typically small relative to the number of graph
+  // edges.  It also saves memory, which is important when the graph is very
+  // large (e.g. because the input loops have many self-intersections).
+  constexpr int kMaxEdgesPerCell = 40;
+  MutableS2ShapeIndex::Options options;
+  options.set_max_edges_per_cell(kMaxEdgesPerCell);
+  index_.Init(options);
+  index_.set_memory_tracker(builder.options().memory_tracker());
+  index_.Add(make_unique<s2builderutil::GraphShape>(&g_));
+}
+
+// Returns the change in winding number due to crossing the given graph edge.
+inline int WindingOracle::SignedCrossingDelta(S2EdgeCrosser* crosser,
+                                              EdgeId e) {
+  return crosser->SignedEdgeOrVertexCrossing(&g_.vertex(g_.edge(e).first),
+                                             &g_.vertex(g_.edge(e).second));
+}
+
+// Returns the winding number at the given point "p".
+int WindingOracle::GetWindingNumber(const S2Point& p) {
+  // Count signed edge crossings starting from the reference point, whose
+  // winding number is known.  If we need to do this many times then we build
+  // an S2ShapeIndex to speed up this process.
+  S2EdgeCrosser crosser(&ref_p_, &p);
+  int winding = ref_winding_;
+  if (--brute_force_winding_tests_left_ >= 0) {
+    for (EdgeId e = 0; e < g_.num_edges(); ++e) {
+      winding += SignedCrossingDelta(&crosser, e);
+    }
+  } else {
+    S2CrossingEdgeQuery query(&index_);
+    for (ShapeEdgeId id : query.GetCandidates(ref_p_, p, *index_.shape(0))) {
+      winding += SignedCrossingDelta(&crosser, id.edge_id);
+    }
+  }
+  // It turns out that GetWindingNumber() is called with a sequence of points
+  // that are sorted in approximate S2CellId order.  This means that if we
+  // update our reference point as we go along, the edges for which we need to
+  // count crossings are much shorter on average.
+  ref_p_ = p;
+  ref_winding_ = winding;
+  return winding;
+}
+
+// The actual winding number operation is implemented as an S2Builder layer.
+class WindingLayer : public S2Builder::Layer {
+ public:
+  WindingLayer(const S2WindingOperation* op,
+               unique_ptr<S2Builder::Layer> result_layer)
+      : op_(*op), result_layer_(std::move(result_layer)),
+        tracker_(op->options().memory_tracker()) {
+  }
+
+  // Layer interface:
+  GraphOptions graph_options() const override;
+  void Build(const Graph& g, S2Error* error) override;
+
+ private:
+  bool ComputeBoundary(const Graph& g, WindingOracle* oracle, S2Error* error);
+  EdgeId GetContainingLoopEdge(VertexId v, EdgeId start, const Graph& g,
+                               const vector<EdgeId>& left_turn_map,
+                               const vector<EdgeId>& sibling_map) const;
+  bool MatchesRule(int winding) const;
+  bool MatchesDegeneracy(int winding, int winding_minus, int winding_plus)
+      const;
+  inline void OutputEdge(const Graph& g, EdgeId e);
+
+  // Constructor parameters.
+  const S2WindingOperation& op_;
+  unique_ptr<S2Builder::Layer> result_layer_;
+
+  // The graph data that will be sent to result_layer_.
+  vector<Edge> result_edges_;
+  vector<InputEdgeIdSetId> result_input_edge_ids_;
+
+  S2MemoryTracker::Client tracker_;
+};
+
+GraphOptions WindingLayer::graph_options() const {
+  // The algorithm below has several steps with different graph requirements,
+  // however the first step is to determine how snapping has affected the
+  // winding number of the reference point.  This requires keeping all
+  // degenerate edges and sibling pairs.  We also keep all duplicate edges
+  // since this makes it easier to remove the reference edge.
+  return GraphOptions(EdgeType::DIRECTED, DegenerateEdges::KEEP,
+                      DuplicateEdges::KEEP, SiblingPairs::KEEP);
+}
+
+void WindingLayer::Build(const Graph& g, S2Error* error) {
+  if (!error->ok()) return;  // Abort if an error already exists.
+
+  // The WindingOracle computes the winding number of any point on the sphere.
+  // It requires knowing the winding number at a reference point which must be
+  // an input vertex to S2Builder.  This is achieved by adding a degenerate
+  // edge from the reference point to itself (ref_input_edge_id_).  Once we
+  // have computed the change in winding number, we create a new graph with
+  // this edge removed (since it should not be emitted to the result layer).
+  WindingOracle oracle(op_.ref_input_edge_id_, op_.ref_winding_in_,
+                       op_.builder_, &g);
+  S2_DCHECK(error->ok()) << *error;  // No errors are possible.
+
+  // Now that we have computed the change in winding number, we create a new
+  // graph with the reference edge removed.  Note that S2MemoryTracker errors
+  // are automatically copied to "error" by S2Builder.
+  vector<Edge> new_edges;
+  vector<InputEdgeIdSetId> new_input_edge_ids;
+  if (!tracker_.AddSpace(&new_edges, g.num_edges() - 1)) return;
+  if (!tracker_.AddSpace(&new_input_edge_ids, g.num_edges() - 1)) return;
+  IdSetLexicon new_input_edge_id_set_lexicon = g.input_edge_id_set_lexicon();
+
+  // Copy all of the edges except the reference edge.
+  for (int e = 0; e < g.num_edges(); ++e) {
+    if (*g.input_edge_ids(e).begin() == op_.ref_input_edge_id_) continue;
+    new_edges.push_back(g.edge(e));
+    new_input_edge_ids.push_back(g.input_edge_id_set_id(e));
+  }
+
+  // Our goal is to assemble the given edges into loops that parition the
+  // sphere.  In order to do this we merge duplicate edges and create sibling
+  // edges so that every region can have its own directed boundary loop.
+  //
+  // Isolated degenerate edges and sibling pairs are preserved in order to
+  // provide limited support for working with geometry of dimensions 0 and 1
+  // (i.e., points and polylines).  Clients can simply convert these objects to
+  // degenerate loops and then convert these degenerate loops back to
+  // points/polylines in the output using s2builderutil::ClosedSetNormalizer.
+  GraphOptions new_options(EdgeType::DIRECTED, DegenerateEdges::DISCARD_EXCESS,
+                           DuplicateEdges::MERGE, SiblingPairs::CREATE);
+  Graph new_graph = g.MakeSubgraph(
+      new_options, &new_edges, &new_input_edge_ids,
+      &new_input_edge_id_set_lexicon, nullptr, error, &tracker_);
+  if (!error->ok()) return;
+
+  // Now visit each connected component of the graph and assemble its edges
+  // into loops.  For each loop we determine the winding number of the region
+  // to its left, and if the winding number matches the given rule then we
+  // add its edges to result_edges_.
+  if (!ComputeBoundary(new_graph, &oracle, error)) return;
+
+  // Now we construct yet another S2Builder::Graph by starting with the edges
+  // that bound the desired output region and then processing them according to
+  // the client's requested GraphOptions.  (Note that ProcessEdges can change
+  // these options in certain cases; see S2Builder::GraphOptions for details.)
+  //
+  // The IsFullPolygonPredicate below allows clients to distinguish full from
+  // empty results (including cases where there are degeneracies).  Note that
+  // we can use the winding number at any point on the sphere for this
+  // purpose.
+  auto is_full_polygon_predicate = [&](const Graph& g, S2Error* error) {
+      return MatchesRule(oracle.current_ref_winding());
+    };
+  IdSetLexicon result_id_set_lexicon = new_graph.input_edge_id_set_lexicon();
+  Graph result_graph = new_graph.MakeSubgraph(
+      result_layer_->graph_options(), &result_edges_, &result_input_edge_ids_,
+      &result_id_set_lexicon, is_full_polygon_predicate, error, &tracker_);
+  if (tracker_.Clear(&new_edges) && tracker_.Clear(&new_input_edge_ids)) {
+    result_layer_->Build(result_graph, error);
+  }
+}
+
+bool WindingLayer::ComputeBoundary(const Graph& g, WindingOracle* oracle,
+                                   S2Error* error) {
+  // We assemble the edges into loops using an algorithm similar to
+  // S2Builder::Graph::GetDirectedComponents(), except that we also keep track
+  // of winding numbers and output the relevant edges as we go along.
+  //
+  // The following accounts for sibling_map, left_turn_map, and edge_winding
+  // (which have g.num_edges() elements each).
+  const int64 kTempUsage = 3 * sizeof(EdgeId) * g.num_edges();
+  if (!tracker_.Tally(kTempUsage)) return false;
+
+  vector<EdgeId> sibling_map = g.GetSiblingMap();
+  vector<EdgeId> left_turn_map;
+  g.GetLeftTurnMap(sibling_map, &left_turn_map, error);
+  S2_DCHECK(error->ok()) << *error;
+
+  // A map from EdgeId to the winding number of the region it bounds.
+  vector<int> edge_winding(g.num_edges());
+  vector<EdgeId> frontier;  // Unexplored sibling edges.
+  for (EdgeId e_min = 0; e_min < g.num_edges(); ++e_min) {
+    if (left_turn_map[e_min] < 0) continue;  // Already visited.
+
+    // We have found a new connected component of the graph.  Each component
+    // consists of a set of loops that partition the sphere.  We start by
+    // choosing an arbitrary vertex "v0" and computing the winding number at
+    // this vertex.  Recall that point containment is defined such that when a
+    // set of loops partition the sphere, every point is contained by exactly
+    // one loop.  Therefore the winding number at "v0" is the same as the
+    // winding number of the adjacent loop that contains it.  We choose "e0" to
+    // be an arbitrary edge of this loop (it is the incoming edge to "v0").
+    VertexId v0 = g.edge(e_min).second;
+    EdgeId e0 = GetContainingLoopEdge(v0, e_min, g, left_turn_map, sibling_map);
+    edge_winding[e0] = oracle->GetWindingNumber(g.vertex(v0));
+
+    // Now visit all loop edges in this connected component of the graph.
+    // "frontier" is a stack of unexplored siblings of the edges visited far.
+    if (!tracker_.AddSpace(&frontier, 1)) return false;
+    frontier.push_back(e0);
+    while (!frontier.empty()) {
+      EdgeId e = frontier.back();
+      frontier.pop_back();
+      if (left_turn_map[e] < 0) continue;  // Already visited.
+
+      // Visit all edges of the loop starting at "e".
+      int winding = edge_winding[e];
+      for (EdgeId next; left_turn_map[e] >= 0; e = next) {
+        // Count signed edge crossings to determine the winding number of
+        // the sibling region.  Input edges that snapped to "e" decrease the
+        // winding number by one (since we cross them from left to right),
+        // while input edges that snapped to its sibling edge increase the
+        // winding number by one (since we cross them from right to left).
+        EdgeId sibling = sibling_map[e];
+        int winding_minus = g.input_edge_ids(e).size();
+        int winding_plus = g.input_edge_ids(sibling).size();
+        int sibling_winding = winding - winding_minus + winding_plus;
+
+        // Output all edges that bound the region selected by the winding
+        // rule, plus certain degenerate edges.
+        if ((MatchesRule(winding) && !MatchesRule(sibling_winding)) ||
+            MatchesDegeneracy(winding, winding_minus, winding_plus)) {
+          OutputEdge(g, e);
+        }
+        next = left_turn_map[e];
+        left_turn_map[e] = -1;
+        // If the sibling hasn't been visited yet, add it to the frontier.
+        if (left_turn_map[sibling] >= 0) {
+          edge_winding[sibling] = sibling_winding;
+          if (!tracker_.AddSpace(&frontier, 1)) return false;
+          frontier.push_back(sibling);
+        }
+      }
+    }
+  }
+  tracker_.Untally(frontier);
+  return tracker_.Tally(-kTempUsage);
+}
+
+// Given an incoming edge "start" to a vertex "v", returns an edge of the loop
+// that contains "v" with respect to the usual semi-open boundary rules.
+EdgeId WindingLayer::GetContainingLoopEdge(
+    VertexId v, EdgeId start, const Graph& g,
+    const vector<EdgeId>& left_turn_map,
+    const vector<EdgeId>& sibling_map) const {
+  // If the given edge is degenerate, this is an isolated vertex.
+  S2_DCHECK_EQ(g.edge(start).second, v);
+  if (g.edge(start).first == v) return start;
+
+  // Otherwise search for the loop that contains "v".
+  EdgeId e0 = start;
+  for (;;) {
+    EdgeId e1 = left_turn_map[e0];
+    S2_DCHECK_EQ(g.edge(e0).second, v);
+    S2_DCHECK_EQ(g.edge(e1).first, v);
+
+    // The first test below handles the case where there are only two edges
+    // incident to this vertex (i.e., the vertex angle is 360 degrees).
+    if (g.edge(e0).first == g.edge(e1).second ||
+        S2::AngleContainsVertex(g.vertex(g.edge(e0).first), g.vertex(v),
+                                g.vertex(g.edge(e1).second))) {
+      return e0;
+    }
+    e0 = sibling_map[e1];
+    S2_DCHECK_NE(e0, start);
+  }
+}
+
+bool WindingLayer::MatchesRule(int winding) const {
+  switch (op_.rule_) {
+    case WindingRule::POSITIVE:  return winding > 0;
+    case WindingRule::NEGATIVE:  return winding < 0;
+    case WindingRule::NON_ZERO:  return winding != 0;
+    case WindingRule::ODD:       return (winding & 1) != 0;
+  }
+  ABSL_UNREACHABLE();
+}
+
+bool WindingLayer::MatchesDegeneracy(int winding, int winding_minus,
+                                     int winding_plus) const {
+  if (!op_.options_.include_degeneracies()) return false;
+
+  // A degeneracy is either a self-loop or a sibling pair where equal numbers
+  // of input edges snapped to both edges of the pair.  The test below covers
+  // both cases (because self-loops are their own sibling).
+  if (winding_minus != winding_plus) return false;
+
+  if (op_.rule_ == WindingRule::ODD) {
+    // Any degeneracy whose multiplicity is odd should be part of the result
+    // independent of the winding number of the region that contains it.
+    // This rule allows computing symmetric differences of any combination of
+    // points, polylines, and polygons (where the first two are represented as
+    // degenerate loops).
+    return (winding_plus & 1) != 0;
+  } else {
+    // For all other winding rules we output degeneracies only if they are
+    // contained by a region of winding number zero.  Even though the interface
+    // to this class does not provide enough information to allow consistent
+    // handling of degeneracies in general, this rule is sufficient for several
+    // important cases.  Specifically it allows computing N-way unions of any
+    // mixture of points, polylines, and polygons by converting the points and
+    // polylines to degenerate loops.  In this case all input loops are
+    // degenerate or CCW, and the boundary of the result can be computed using
+    // WindingRule::POSITIVE.  Since there are no clockwise loops, all
+    // degeneracies contained by a region of winding number zero represent
+    // degenerate shells and should be emitted.  (They can be converted back to
+    // points/polylines using s2builderutil::ClosedSetNormalizer.)
+    //
+    // Similarly, this heuristic is sufficient to compute unions of points,
+    // polylines, and polygons where all boundaries are clockwise (by using
+    // WindingRule::NEGATIVE) or where all boundaries are of an unknown but
+    // consistent oreientation (by using WindingRule::NON_ZERO).
+    return winding == 0;
+  }
+}
+
+// Adds an edge to the set of output edges.
+inline void WindingLayer::OutputEdge(const Graph& g, EdgeId e) {
+  if (!tracker_.AddSpace(&result_edges_, 1)) return;
+  if (!tracker_.AddSpace(&result_input_edge_ids_, 1)) return;
+  result_edges_.push_back(g.edge(e));
+  result_input_edge_ids_.push_back(g.input_edge_id_set_id(e));
+}
+
+}  // namespace s2builderutil
+
+S2WindingOperation::Options::Options()
+    : snap_function_(
+          make_unique<s2builderutil::IdentitySnapFunction>(S1Angle::Zero())) {
+}
+
+S2WindingOperation::Options::Options(const SnapFunction& snap_function)
+    : snap_function_(snap_function.Clone()) {
+}
+
+S2WindingOperation::Options::Options(const Options& options)
+    : snap_function_(options.snap_function_->Clone()),
+      include_degeneracies_(options.include_degeneracies_),
+      memory_tracker_(options.memory_tracker_) {
+}
+
+S2WindingOperation::Options& S2WindingOperation::Options::operator=(
+    const Options& options) {
+  snap_function_ = options.snap_function_->Clone();
+  include_degeneracies_ = options.include_degeneracies_;
+  memory_tracker_ = options.memory_tracker_;
+  return *this;
+}
+
+const S2Builder::SnapFunction& S2WindingOperation::Options::snap_function()
+    const {
+  return *snap_function_;
+}
+
+void S2WindingOperation::Options::set_snap_function(
+    const SnapFunction& snap_function) {
+  snap_function_ = snap_function.Clone();
+}
+
+bool S2WindingOperation::Options::include_degeneracies() const {
+  return include_degeneracies_;
+}
+
+void S2WindingOperation::Options::set_include_degeneracies(
+    bool include_degeneracies) {
+  include_degeneracies_ = include_degeneracies;
+}
+
+S2MemoryTracker* S2WindingOperation::Options::memory_tracker() const {
+  return memory_tracker_;
+}
+
+void S2WindingOperation::Options::set_memory_tracker(S2MemoryTracker* tracker) {
+  memory_tracker_ = tracker;
+}
+
+S2WindingOperation::S2WindingOperation() = default;
+
+S2WindingOperation::S2WindingOperation(
+    unique_ptr<S2Builder::Layer> result_layer, const Options& options) {
+  Init(std::move(result_layer), options);
+}
+
+void S2WindingOperation::Init(unique_ptr<S2Builder::Layer> result_layer,
+                              const Options& options) {
+  options_ = options;
+  S2Builder::Options builder_options{options_.snap_function()};
+  builder_options.set_split_crossing_edges(true);
+  builder_options.set_memory_tracker(options.memory_tracker());
+  builder_.Init(builder_options);
+  builder_.StartLayer(make_unique<s2builderutil::WindingLayer>(
+      this, std::move(result_layer)));
+}
+
+const S2WindingOperation::Options& S2WindingOperation::options() const {
+  return options_;
+}
+
+void S2WindingOperation::AddLoop(S2PointLoopSpan loop) {
+  builder_.AddLoop(loop);
+}
+
+bool S2WindingOperation::Build(const S2Point& ref_p, int ref_winding,
+                               WindingRule rule, S2Error* error) {
+  // The reference point must be an S2Builder input vertex in order to
+  // determine how its winding number is affected by snapping.  We record the
+  // input edge id of this edge so that we can find it later.
+  ref_input_edge_id_ = builder_.num_input_edges();
+  builder_.AddPoint(ref_p);
+  ref_winding_in_ = ref_winding;
+  rule_ = rule;
+  return builder_.Build(error);
+}
diff --git a/src/s2/s2winding_operation.h b/src/s2/s2winding_operation.h
new file mode 100644
index 00000000..62e04934
--- /dev/null
+++ b/src/s2/s2winding_operation.h
@@ -0,0 +1,229 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#ifndef S2_S2WINDING_OPERATION_H_
+#define S2_S2WINDING_OPERATION_H_
+
+#include <memory>
+
+#include "s2/s2builder.h"
+#include "s2/s2builder_graph.h"
+#include "s2/s2error.h"
+#include "s2/s2memory_tracker.h"
+#include "s2/s2point.h"
+#include "s2/s2point_span.h"
+
+namespace s2builderutil {
+class WindingLayer;
+}  // namespace s2builderutil
+
+// Given a set of possibly self-intersecting closed loops, this class computes
+// a partitioning of the sphere into regions of constant winding number and
+// returns the subset of those regions selected by a given winding rule.  This
+// functionality can be used to implement N-way boolean polygon operations
+// including union, intersection, and symmetric difference, as well as more
+// exotic operations such as buffering and Minkowski sums.
+//
+// Recall that in the plane, the winding number of a closed curve around a
+// point is an integer representing the number of times the curve travels
+// counterclockwise around that point.  For example, points in the interior of
+// a planar simple polygon with counterclockwise boundary have a winding
+// number of +1, while points outside the polygon have a winding number of 0.
+// If the boundary is clockwise then points in the interior have a winding
+// number of -1.
+//
+// The interior of a complex self-intersecting closed boundary curve may be
+// defined by choosing an appropriate winding rule.  For example, the interior
+// could be defined as all regions whose winding number is positive, or all
+// regions whose winding number is non-zero, or all regions whose winding
+// number is odd.  Different winding rules are useful for implementing the
+// various operations mentioned above (union, symmetric difference, etc).
+//
+// Unfortunately the concept of winding number on the sphere is not
+// well-defined, due to the fact that a given closed curve does not partition
+// the sphere into regions of constant winding number.  This is because the
+// winding number changes when a point crosses either the given curve or that
+// curve's reflection through the origin.
+//
+// Instead we engage in a slight abuse of terminology by modifying the concept
+// of winding number on the sphere to be a relative one.  Given a set of
+// closed curves on the sphere and the winding number of a reference point R,
+// we define the winding number of every other point P by counting signed edge
+// crossings.  When the edge RP crosses from the right side of a curve to its
+// left the winding number increases by one, and when the edge RP crosses from
+// the left side of a curve to its right the winding number decreases by one.
+//
+// This definition agrees with the classical one in the plane when R is taken
+// to be the point at infinity with a winding number of zero.  It also agrees
+// with the rule used by the S2 library to define polygon interiors, namely
+// that the interior of a loop is the region to its left.  And most
+// importantly, it satisfies the property that a closed curve partitions the
+// sphere into regions of constant winding number.
+
+class S2WindingOperation {
+ public:
+  class Options {
+   public:
+    Options();
+
+    // Convenience constructor that calls set_snap_function().
+    explicit Options(const S2Builder::SnapFunction& snap_function);
+
+    // Specifies the function used for snap rounding the output during the
+    // call to Build().
+    //
+    // DEFAULT: s2builderutil::IdentitySnapFunction(S1Angle::Zero())
+    const S2Builder::SnapFunction& snap_function() const;
+    void set_snap_function(const S2Builder::SnapFunction& snap_function);
+
+    // This option can be enabled to provide limited support for degeneracies
+    // (i.e., sibling edge pairs and isolated vertices).  By default the output
+    // does not include such edges because they do not bound any interior.  If
+    // this option is true, then the output includes additional degeneracies as
+    // follows:
+    //
+    //  - WindingRule::ODD outputs degeneracies whose multiplicity is odd.
+    //
+    //  - All other winding rules output degeneracies contained by regions
+    //    whose winding number is zero.
+    //
+    // These rules are sufficient to implement the following useful operations:
+    //
+    //  - WindingRule::Odd can be used to compute the N-way symmetric
+    //    difference of any combination of points, polylines, and polygons.
+    //
+    //  - WindingRule::POSITIVE can be used to compute the N-way union of any
+    //    combination of points, polylines, and polygons.
+    //
+    // These statements only apply when closed boundaries are being used (see
+    // S2BooleanOperation::{Polygon,Polyline}Model::CLOSED) and require the
+    // client to convert points and polylines to degenerate loops and then back
+    // again (e.g. using s2builderutil::ClosedSetNormalizer).  Note that the
+    // handling of degeneracies is NOT sufficient for other polygon operations
+    // (e.g. intersection) or other boundary models (e.g, semi-open).
+    //
+    // DEFAULT: false
+    bool include_degeneracies() const;
+    void set_include_degeneracies(bool include_degeneracies);
+
+    // Specifies that internal memory usage should be tracked using the given
+    // S2MemoryTracker.  If a memory limit is specified and more more memory
+    // than this is required then an error will be returned.  Example usage:
+    //
+    //   S2MemoryTracker tracker;
+    //   tracker.set_limit(500 << 20);  // 500 MB
+    //   S2WindingOperation::Options options;
+    //   options.set_memory_tracker(&tracker);
+    //   S2WindingOperation op{options};
+    //   ...
+    //   S2Error error;
+    //   if (!op.Build(..., &error)) {
+    //     if (error.code() == S2Error::RESOURCE_EXHAUSTED) {
+    //       S2_LOG(ERROR) << error;  // Memory limit exceeded
+    //     }
+    //   }
+    //
+    // CAVEATS:
+    //
+    //  - Memory allocated by the output S2Builder layer is not tracked.
+    //
+    //  - While memory tracking is reasonably complete and accurate, it does
+    //    not account for every last byte.  It is intended only for the
+    //    purpose of preventing clients from running out of memory.
+    //
+    // DEFAULT: nullptr (memory tracking disabled)
+    S2MemoryTracker* memory_tracker() const;
+    void set_memory_tracker(S2MemoryTracker* tracker);
+
+    // Options may be assigned and copied.
+    Options(const Options& options);
+    Options& operator=(const Options& options);
+
+   private:
+    std::unique_ptr<S2Builder::SnapFunction> snap_function_;
+    bool include_degeneracies_ = false;
+    S2MemoryTracker* memory_tracker_ = nullptr;
+  };
+
+  // Default constructor; requires Init() to be called.
+  S2WindingOperation();
+
+  // Convenience constructor that calls Init().
+  explicit S2WindingOperation(std::unique_ptr<S2Builder::Layer> result_layer,
+                              const Options& options = Options{});
+
+  // Starts an operation that sends its output to the given S2Builder layer.
+  // This method may be called more than once.
+  void Init(std::unique_ptr<S2Builder::Layer> result_layer,
+            const Options& options = Options{});
+
+  const Options& options() const;
+
+  // Adds a loop to the set of loops used to partition the sphere.  The given
+  // loop may be degenerate or self-intersecting.
+  void AddLoop(S2PointLoopSpan loop);
+
+  // Specifies the winding rule used to determine which regions belong to the
+  // result.
+  //
+  // Note that additional winding rules may be created by adjusting the
+  // winding number of the reference point.  For example, to select regions
+  // whose winding number is at least 10, simply subtract 9 from ref_winding
+  // and then use WindingRule::POSITIVE.  (This can be used to implement to
+  // implement N-way polygon intersection).  Similarly, additional behaviors
+  // can be obtained by reversing some of the input loops (e.g., this can be
+  // used to compute one polygon minus the union of several other polygons).
+  enum class WindingRule {
+    POSITIVE,  // Winding number > 0
+    NEGATIVE,  // Winding number < 0
+    NON_ZERO,  // Winding number != 0
+    ODD        // Winding number % 2 == 1
+  };
+
+  // Given a reference point "ref_p" whose winding number is given to be
+  // "ref_winding", snaps all the given input loops together using the given
+  // snap_function() and then partitions the sphere into regions of constant
+  // winding number.  As discussed above, the winding number increases by one
+  // when any loop edge is crossed from right to left, and decreases by one
+  // when any loop edge is crossed from left to right.  Each partition region
+  // is classified as belonging to the interior of the result if and only if
+  // its winding number matches the given rule (e.g. WindingRule::POSITIVE).
+  //
+  // The output consists of the boundary of this interior region plus possibly
+  // certain degeneraices (as controlled by the include_degeneracies() option).
+  // The boundary edges are sent to the S2Builder result layer specified in the
+  // constructor, along with an appropriate IsFullPolygonPredicate that can be
+  // used to distinguish whether the result is empty or full (even when
+  // degeneracies are present).  Note that distingishing empty from full
+  // results is a problem unique to spherical geometry.
+  //
+  // REQUIRES: error->ok() [an existing error will not be overwritten]
+  bool Build(const S2Point& ref_p, int ref_winding, WindingRule rule,
+             S2Error* error);
+
+ private:
+  // Most of the implementation is in the WindingLayer class.
+  friend class s2builderutil::WindingLayer;
+
+  Options options_;
+  S2Builder builder_;
+  S2Builder::Graph::InputEdgeId ref_input_edge_id_;
+  int ref_winding_in_;
+  WindingRule rule_;
+};
+
+#endif  // S2_S2WINDING_OPERATION_H_
diff --git a/src/s2/s2wrapped_shape.h b/src/s2/s2wrapped_shape.h
new file mode 100644
index 00000000..c280061a
--- /dev/null
+++ b/src/s2/s2wrapped_shape.h
@@ -0,0 +1,53 @@
+// Copyright 2021 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#ifndef S2_S2WRAPPED_SHAPE_H_
+#define S2_S2WRAPPED_SHAPE_H_
+
+#include "s2/s2shape.h"
+
+// An S2Shape that simply some other shape.  This is useful for adding
+// an existing S2Shape to a new S2ShapeIndex without needing to copy
+// its underlying data.
+//
+// Also see s2shapeutil::WrappedShapeFactory in s2shapeutil_coding.h, which
+// is useful for testing S2ShapeIndex coding.
+class S2WrappedShape : public S2Shape {
+ public:
+  explicit S2WrappedShape(const S2Shape* shape) : shape_(*shape) {}
+
+  // S2Shape interface:
+  int num_edges() const final { return shape_.num_edges(); }
+  Edge edge(int e) const final { return shape_.edge(e); }
+  int dimension() const final { return shape_.dimension(); }
+  ReferencePoint GetReferencePoint() const final {
+    return shape_.GetReferencePoint();
+  }
+  int num_chains() const final { return shape_.num_chains(); }
+  Chain chain(int i) const final { return shape_.chain(i); }
+  Edge chain_edge(int i, int j) const final {
+    return shape_.chain_edge(i, j);
+  }
+  ChainPosition chain_position(int e) const final {
+    return shape_.chain_position(e);
+  }
+
+ private:
+  const S2Shape& shape_;
+};
+
+#endif  // S2_S2WRAPPED_SHAPE_H_
diff --git a/src/s2/sequence_lexicon.h b/src/s2/sequence_lexicon.h
index 7c3aed7b..16686165 100644
--- a/src/s2/sequence_lexicon.h
+++ b/src/s2/sequence_lexicon.h
@@ -18,8 +18,13 @@
 #ifndef S2_SEQUENCE_LEXICON_H_
 #define S2_SEQUENCE_LEXICON_H_
 
+#include <cstddef>
+
+#include <algorithm>
 #include <functional>
+#include <iterator>
 #include <limits>
+#include <utility>
 #include <vector>
 
 #include "s2/base/integral_types.h"
@@ -159,6 +164,7 @@ const Hasher& SequenceLexicon<T, Hasher, KeyEqual>::IdHasher::hasher() const {
 template <class T, class Hasher, class KeyEqual>
 size_t SequenceLexicon<T, Hasher, KeyEqual>::IdHasher::operator()(
     uint32 id) const {
+  // TODO(user,b/205929456): Is there a way to use absl::Hash instead?
   HashMix mix;
   for (const auto& value : lexicon_->sequence(id)) {
     mix.Mix(hasher_(value));
diff --git a/src/s2/strings/ostringstream.cc b/src/s2/strings/ostringstream.cc
deleted file mode 100644
index 51751e5d..00000000
--- a/src/s2/strings/ostringstream.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include <cassert>
-
-#include "s2/strings/ostringstream.h"
-
-namespace strings {
-
-OStringStream::Buf::int_type OStringStream::overflow(int c) {
-  assert(s_);
-  if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof()))
-    s_->push_back(static_cast<char>(c));
-  return 1;
-}
-
-std::streamsize OStringStream::xsputn(const char* s, std::streamsize n) {
-  assert(s_);
-  s_->append(s, n);
-  return n;
-}
-
-}  // namespace strings
diff --git a/src/s2/strings/ostringstream.h b/src/s2/strings/ostringstream.h
deleted file mode 100644
index a6c2fb32..00000000
--- a/src/s2/strings/ostringstream.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-
-
-#ifndef S2_STRINGS_OSTRINGSTREAM_H_
-#define S2_STRINGS_OSTRINGSTREAM_H_
-
-#include <ostream>
-#include <streambuf>
-#include <string>
-
-#include "absl/base/port.h"
-
-namespace strings {
-
-// The same as std::ostringstream but appends to a user-specified string,
-// and is faster. It is ~70% faster to create, ~50% faster to write to, and
-// completely free to extract the result string.
-//
-//   std::string s;
-//   OStringStream strm(&s);
-//   strm << 42 << ' ' << 3.14;  // appends to `s`
-//
-// The stream object doesn't have to be named. Starting from C++11 operator<<
-// works with rvalues of std::ostream.
-//
-//   std::string s;
-//   OStringStream(&s) << 42 << ' ' << 3.14;  // appends to `s`
-//
-// OStringStream is faster to create than std::ostringstream but it's still
-// relatively slow. Avoid creating multiple streams where a single stream will
-// do.
-//
-// Creates unnecessary instances of OStringStream: slow.
-//
-//   std::string s;
-//   OStringStream(&s) << 42;
-//   OStringStream(&s) << ' ';
-//   OStringStream(&s) << 3.14;
-//
-// Creates a single instance of OStringStream and reuses it: fast.
-//
-//   std::string s;
-//   OStringStream strm(&s);
-//   strm << 42;
-//   strm << ' ';
-//   strm << 3.14;
-//
-// Note: flush() has no effect. No reason to call it.
-class OStringStream : private std::basic_streambuf<char>, public std::ostream {
- public:
-  // Export the same types as ostringstream does; for use info, see
-  // http://en.cppreference.com/w/cpp/io/basic_ostringstream
-  typedef std::string::allocator_type allocator_type;
-
-  // These types are defined in both basic_streambuf and ostream, and although
-  // they are identical, they cause compiler ambiguities, so we define them to
-  // avoid that.
-  using std::ostream::char_type;
-  using std::ostream::int_type;
-  using std::ostream::off_type;
-  using std::ostream::pos_type;
-  using std::ostream::traits_type;
-
-  // The argument can be null, in which case you'll need to call str(p) with a
-  // non-null argument before you can write to the stream.
-  //
-  // The destructor of OStringStream doesn't use the string. It's OK to destroy
-  // the string before the stream.
-  explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {}
-
-  std::string* str() { return s_; }
-  const std::string* str() const { return s_; }
-  void str(std::string* s) { s_ = s; }
-
-  // These functions are defined in both basic_streambuf and ostream, but it's
-  // ostream definition that affects the strings produced.
-  using std::ostream::getloc;
-  using std::ostream::imbue;
-
- private:
-  using Buf = std::basic_streambuf<char>;
-
-  Buf::int_type overflow(int c) override;
-  std::streamsize xsputn(const char* s, std::streamsize n) override;
-
-  std::string* s_;
-};
-
-}  // namespace strings
-
-#endif  // S2_STRINGS_OSTRINGSTREAM_H_
diff --git a/src/s2/strings/serialize.cc b/src/s2/strings/serialize.cc
deleted file mode 100644
index ba7f382c..00000000
--- a/src/s2/strings/serialize.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "s2/strings/serialize.h"
-
-#include <string>
-#include <vector>
-
-#include "absl/strings/str_split.h"
-#include "absl/strings/string_view.h"
-
-using absl::StrSplit;
-using absl::string_view;
-using std::pair;
-using std::string;
-using std::vector;
-
-namespace strings {
-
-bool DictionaryParse(string_view encoded_str,
-                     vector<pair<string, string>>* items) {
-  if (encoded_str.empty())
-    return true;
-  vector<string_view> const entries = StrSplit(encoded_str, ',');
-  for (int i = 0; i < entries.size(); ++i) {
-    vector<string_view> const fields = StrSplit(entries[i], ':');
-    if (fields.size() != 2)  // parsing error
-      return false;
-    items->push_back(std::make_pair(string(fields[0]), string(fields[1])));
-  }
-  return true;
-}
-
-}  // namespace strings
diff --git a/src/s2/strings/serialize.h b/src/s2/strings/serialize.h
deleted file mode 100644
index 2498de6f..00000000
--- a/src/s2/strings/serialize.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#ifndef S2_STRINGS_SERIALIZE_H_
-#define S2_STRINGS_SERIALIZE_H_
-
-#include <string>
-#include <vector>
-
-#include "absl/strings/string_view.h"
-
-namespace strings {
-
-// -------------------------------------------------------------------------
-// DictionaryParse
-//   This routine parses a common dictionary format (key and value separated
-//   by ':', entries separated by commas). This format is used for many
-//   complex commandline flags. It is also used to encode dictionaries for
-//   exporting them or writing them to a checkpoint. Returns a vector of
-//   <key, value> pairs. Returns true if there if no error in parsing, false
-//   otherwise.
-// -------------------------------------------------------------------------
-bool DictionaryParse(absl::string_view encoded_str,
-                     std::vector<std::pair<std::string, std::string>>* items);
-
-}  // namespace strings
-
-#endif  // S2_STRINGS_SERIALIZE_H_
diff --git a/src/s2/base/mutex.h b/src/s2/testing/gtest_prod.h
similarity index 66%
rename from src/s2/base/mutex.h
rename to src/s2/testing/gtest_prod.h
index 855042db..cf051d88 100644
--- a/src/s2/base/mutex.h
+++ b/src/s2/testing/gtest_prod.h
@@ -13,12 +13,12 @@
 // limitations under the License.
 //
 
-#ifndef S2_BASE_MUTEX_H_
-#define S2_BASE_MUTEX_H_
+#ifndef S2_TESTING_GTEST_PROD_H_
+#define S2_TESTING_GTEST_PROD_H_
 
-#include <condition_variable>
-#include <mutex>
+// Allow use of prod headers without googletest.
+// This must be kept in sync with the googletest definition.
+#define FRIEND_TEST(test_case_name, test_name) \
+  friend class test_case_name##_##test_name##_Test
 
-#include <absl/synchronization/mutex.h>
-
-#endif  // S2_BASE_MUTEX_H_
+#endif  // S2_TESTING_GTEST_PROD_H_
diff --git a/src/s2/thread_testing.cc b/src/s2/thread_testing.cc
new file mode 100644
index 00000000..39c74bf2
--- /dev/null
+++ b/src/s2/thread_testing.cc
@@ -0,0 +1,93 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#include "s2/thread_testing.h"
+
+#include <functional>
+#include <memory>
+#include <thread>
+
+#include "s2/base/logging.h"
+#include "absl/synchronization/mutex.h"
+
+using std::make_unique;
+using std::unique_ptr;
+
+namespace s2testing {
+
+class ReaderThreadPool {
+ public:
+  ReaderThreadPool(const std::function<void ()>& callback, int num_threads)
+      : threads_(make_unique<std::thread[]>(num_threads)),
+        num_threads_(num_threads) {
+    for (int i = 0; i < num_threads_; ++i) {
+      threads_[i] = std::thread(callback);
+    }
+  }
+  ~ReaderThreadPool() {
+    for (int i = 0; i < num_threads_; ++i) threads_[i].join();
+  }
+
+ private:
+  unique_ptr<std::thread[]> threads_;
+  int num_threads_;
+};
+
+void ReaderWriterTest::ReaderLoop() {
+  lock_.Lock();
+  for (int last_write = 0; ; last_write = num_writes_) {
+    while (num_writes_ == last_write) {
+      write_ready_.Wait(&lock_);
+    }
+    if (num_writes_ < 0) break;
+
+    // Release the lock first so that all reader threads can run in parallel.
+    lock_.Unlock();
+    ReadOp();
+    lock_.Lock();
+    if (--num_readers_left_ == 0) {
+      all_readers_done_.Signal();
+    }
+  }
+  lock_.Unlock();
+}
+
+void ReaderWriterTest::Run(int num_readers, int iters) {
+  ReaderThreadPool pool(std::bind(&ReaderWriterTest::ReaderLoop, this),
+                        num_readers);
+  lock_.Lock();
+  for (int iter = 0; iter < iters; ++iter) {
+    // Loop invariant: lock_ is held and num_readers_left_ == 0.
+    S2_DCHECK_EQ(0, num_readers_left_);
+    WriteOp();
+
+    // Now set the readers loose.
+    num_readers_left_ = num_readers;
+    ++num_writes_;
+    write_ready_.SignalAll();
+    while (num_readers_left_ > 0) {
+      all_readers_done_.Wait(&lock_);
+    }
+  }
+  // Signal the readers to exit.
+  num_writes_ = -1;
+  write_ready_.SignalAll();
+  lock_.Unlock();
+  // ReaderThreadPool destructor waits for all threads to complete.
+}
+
+}  // namespace s2testing
diff --git a/src/s2/thread_testing.h b/src/s2/thread_testing.h
new file mode 100644
index 00000000..2838d926
--- /dev/null
+++ b/src/s2/thread_testing.h
@@ -0,0 +1,65 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Author: ericv@google.com (Eric Veach)
+
+#ifndef S2_THREAD_TESTING_H_
+#define S2_THREAD_TESTING_H_
+
+#include "absl/synchronization/mutex.h"
+
+namespace s2testing {
+
+// A test harness for verifying the thread safety properties of a class.  It
+// creates one writer thread and several reader threads, and repeatedly
+// alternates between executing some operation in the writer thread and then
+// executing some other operation in parallel in the reader threads.
+//
+// It is intended for testing thread-compatible classes, i.e. those where
+// const methods are thread safe and non-const methods are not thread safe.
+class ReaderWriterTest {
+ public:
+  ReaderWriterTest() = default;
+  virtual ~ReaderWriterTest() = default;
+
+  // Create the given number of reader threads and execute the given number of
+  // (write, read) iterations.
+  void Run(int num_readers, int iters);
+
+  // The writer thread calls the following function once between reads.
+  virtual void WriteOp() = 0;
+
+  // Each reader thread calls the following function once between writes.
+  virtual void ReadOp() = 0;
+
+ protected:
+  void ReaderLoop();  // The main loop of each reader thread.
+
+  // The following fields are guarded by lock_.
+  absl::Mutex lock_;
+  int num_writes_ = 0;
+  int num_readers_left_ = 0;
+
+  // TODO(ericv): Consider removing these condition variables now that the
+
+  // Signalled when a new write is ready to be processed.
+  absl::CondVar write_ready_;
+  // Signalled when all readers have processed the latest write.
+  absl::CondVar all_readers_done_;
+};
+
+}  // namespace s2testing
+
+#endif  // S2_THREAD_TESTING_H_
diff --git a/src/s2/util/bitmap/bitmap.h b/src/s2/util/bitmap/bitmap.h
new file mode 100644
index 00000000..e7bbab46
--- /dev/null
+++ b/src/s2/util/bitmap/bitmap.h
@@ -0,0 +1,1137 @@
+// Copyright Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS-IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// A simple bitmap class that handles all that bit twiddling.
+//
+// NOTE: You have to do your bounds checking, especially
+// if you pass in your own map without providing a size.
+
+#ifndef UTIL_BITMAP_BITMAP_H__
+#define UTIL_BITMAP_BITMAP_H__
+
+#include <algorithm>
+#include <climits>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <type_traits>
+
+#include "s2/base/integral_types.h"
+#include "s2/base/logging.h"
+#include "absl/base/macros.h"
+#include "absl/base/attributes.h"
+#include "absl/base/macros.h"
+#include "absl/hash/hash.h"
+#include "s2/util/bits/bits.h"
+
+namespace util {
+namespace bitmap {
+
+template <typename W>
+void SetBit(W* map, size_t index, bool value) {
+  static constexpr size_t kIntBits = CHAR_BIT * sizeof(W);
+  // This is written in such a way that our current compiler generates
+  // a conditional move instead of a conditional branch, which is data
+  // dependent and unpredictable.  Branch mis-prediction is much more
+  // expensive than cost of a conditional move.
+  const W bit = W{1} << (index & (kIntBits - 1));
+  const W old_value = map[index / kIntBits];
+  const W new_value = value ? old_value | bit : old_value & ~bit;
+  map[index / kIntBits] = new_value;
+}
+
+template <typename W>
+bool GetBit(const W* map, size_t index) {
+  static constexpr size_t kIntBits = CHAR_BIT * sizeof(W);
+  return map[index / kIntBits] & (W{1} << (index & (kIntBits - 1)));
+}
+
+namespace internal {
+template <typename W>
+class BasicBitmap {
+ public:
+  using size_type = size_t;
+  using Word = W;  // packed bit internal storage type.
+  using MutableWord = typename std::remove_const<W>::type;
+
+  // Allocates a new bitmap with size bits set to the value fill.
+  BasicBitmap(size_type size, bool fill) : size_(size), alloc_(true) {
+    map_ = std::allocator<Word>().allocate(array_size());
+    SetAll(fill);
+  }
+
+  explicit BasicBitmap(size_type size) : BasicBitmap(size, false) {}
+
+  // Creates a BasicBitmap with the given sequence of boolean values as bits.
+  explicit BasicBitmap(const absl::Span<const bool> values)
+      : BasicBitmap(values.size()) {
+    int index = 0;
+    for (bool value : values) {
+      Set(index++, value);
+    }
+  }
+
+  // Borrows a reference to a region of memory that is the caller's
+  // responsibility to manage for the life of the Bitmap. The map is expected
+  // to have enough memory to store size bits.
+  BasicBitmap(Word* map, size_type size)
+      : map_(map), size_(size), alloc_(false) {}
+
+  static BasicBitmap<const W> CreateConst(const W* map, size_type size) {
+    return BasicBitmap<const W>(map, size);
+  }
+
+  // Default constructor: creates a bitmap with zero bits.
+  BasicBitmap() : size_(0), alloc_(true) {
+    map_ = std::allocator<Word>().allocate(array_size());
+  }
+
+  // Copy constructors.  The move constructor will steal the internal memory of
+  // the `src` bitmap and change `src` to be a reference to the new bitmap.
+  BasicBitmap(const BasicBitmap& src);
+  BasicBitmap(BasicBitmap&& src);
+
+  // Assigns this Bitmap to the values of the src Bitmap.
+  // This includes pointing to the same underlying map_ if the src Bitmap
+  // does not allocate its own.
+  BasicBitmap& operator=(const BasicBitmap& src);
+
+  // Destructor : clean up if we allocated
+  ~BasicBitmap() {
+    if (alloc_) {
+      std::allocator<Word>().deallocate(map_, array_size());
+    }
+  }
+
+  // Resizes the bitmap.
+  // If size < bits(), the extra bits will be discarded.
+  // If size > bits(), the extra bits will be filled with the fill value.
+  void Resize(size_type size, bool fill = false);
+
+  // ACCESSORS
+  size_type bits() const { return size_; }
+  size_type array_size() const { return RequiredArraySize(bits()); }
+
+  // Gets an entry of the internal map. Requires array_index < array_size()
+  Word GetMapElement(size_type array_index) const {
+    S2_CHECK_LT(array_index, array_size());
+    return map_[array_index];
+  }
+
+  // Gets an entry of the internal map. Requires array_index < array_size()
+  // Also performs masking to insure no bits >= bits().
+  Word GetMaskedMapElement(size_type array_index) const {
+    return (array_index == array_size() - 1)
+               ? map_[array_size() - 1] & HighOrderMapElementMask()
+               : map_[array_index];
+  }
+
+  // Sets an element of the internal map. Requires array_index < array_size()
+  void SetMapElement(size_type array_index, Word value) {
+    S2_CHECK_LT(array_index, array_size());
+    map_[array_index] = value;
+  }
+
+  // The highest order element in map_ will have some meaningless bits
+  // (with undefined values) if bits() is not a multiple of
+  // kIntBits. If you & HighOrderMapElementMask with the high order
+  // element, you will be left with only the valid, defined bits (the
+  // others will be 0)
+  Word HighOrderMapElementMask() const {
+    return (size_ == 0) ? 0 : kAllOnesWord >> (-size_ & (kIntBits - 1));
+  }
+
+  bool Get(size_type index) const {
+    S2_DCHECK_LT(index, size_);
+    return GetBit(map_, index);
+  }
+
+  // Returns the number of one bits in the range start to end - 1.
+  // REQUIRES: start <= end <= bits()
+  size_type GetOnesCountInRange(size_type start, size_type end) const;
+
+  // Returns the number of one bits in positions 0 to limit - 1.
+  // REQUIRES: limit <= bits()
+  size_type GetOnesCountBeforeLimit(size_type limit) const {
+    return GetOnesCountInRange(0, limit);
+  }
+
+  // Returns the number of one bits in the bitmap.
+  size_type GetOnesCount() const { return GetOnesCountInRange(0, size_); }
+
+  // Returns the number of zero bits in the range start to end - 1.
+  // REQUIRES: start <= end <= bits()
+  size_type GetZeroesCountInRange(size_type start, size_type end) const {
+    return end - start - GetOnesCountInRange(start, end);
+  }
+
+  // Returns the number of zero bits in positions 0 to limit - 1.
+  // REQUIRES: limit <= bits()
+  size_type GetZeroesCountBeforeLimit(size_type limit) const {
+    return limit - GetOnesCountInRange(0, limit);
+  }
+
+  // Returns the number of zero bits in the bitmap.
+  size_type GetZeroesCount() const {
+    return size_ - GetOnesCountInRange(0, size_);
+  }
+
+  // Returns true if all bits are unset
+  bool IsAllZeroes() const {
+    return std::all_of(map_, map_ + array_size() - 1,
+                       [](Word w) { return w == W{0}; }) &&
+           (map_[array_size() - 1] & HighOrderMapElementMask()) == W{0};
+  }
+
+  // Returns true if all bits are set
+  bool IsAllOnes() const {
+    return std::all_of(map_, map_ + array_size() - 1,
+                       [](Word w) { return w == kAllOnesWord; }) &&
+           ((~map_[array_size() - 1]) & HighOrderMapElementMask()) == W{0};
+  }
+
+  // FindNextSetBitBeforeLimit: Finds the first offset >= "*index" and
+  // < "limit" that has its bit set.  If found, sets "*index" to this
+  // offset and returns true.  Otherwise, does not modify "*index" and
+  // returns false.  REQUIRES: "limit" <= bits().
+  //
+  // FindNextSetBit: like above, with implicit limit == bits().
+  //
+  // Note that to use these methods in a loop you must increment
+  // the index after each use, as in:
+  //
+  //  for (Bitmap::size_type index = 0 ; map->FindNextSetBit(&index) ;
+  //       ++index ) {
+  //    DoSomethingWith(index);
+  //  }
+  //
+  bool FindNextSetBitBeforeLimit(size_type* index, size_type limit) const;
+
+  bool FindNextSetBit(size_type* index) const {
+    return FindNextSetBitBeforeLimit(index, size_);
+  }
+
+  // A static version of FindNextSetBitBeforeLimit that can be called
+  // by other clients that have an array of words in their hands,
+  // layed out in the same way as BitMap.  Scans bits in "*words"
+  // starting at bit "*bit_index", looking for a set bit.  If it finds
+  // a set bit before reaching bit index "bit_limit", sets
+  // "*bit_index" to the bit index and returns true.  Otherwise
+  // returns false.  Will not dereference "words" past
+  // "words[(bit_limit+31)/kIntBits]".
+  static bool FindNextSetBitInVector(const Word* words, size_t* bit_index,
+                                     size_t bit_limit) {
+    return FindNextBitInVector(/*complement=*/false, words, bit_index,
+                               bit_limit);
+  }
+
+  // If no bits in this bitmap are set, returns false. Otherwise returns true
+  // and puts the index of the first set bit in this bitmap in *index. Note
+  // that *index is modified even if we return false.
+  bool FindFirstSetBit(size_type* index) const {
+    *index = 0;
+    return FindNextSetBit(index);
+  }
+
+  // FindNextUnsetBitBeforeLimit: Finds the first offset >= "*index" and
+  // < "limit" that does NOT have its bit set.  If found, sets "*index" to this
+  // offset and returns true.  Otherwise, does not modify "*index" and
+  // returns false.  REQUIRES: "limit" <= bits().
+  //
+  // FindNextUnsetBit: like above, with implicit limit == bits().
+  //
+  // Note that to use these methods in a loop you must increment
+  // the index after each use, as in:
+  //
+  //  for (Bitmap::size_type index = 0; map->FindNextUnsetBit(&index);
+  //       ++index ) {
+  //    DoSomethingWith(index);
+  //  }
+  //
+  bool FindNextUnsetBitBeforeLimit(size_type* index, size_type limit) const;
+
+  bool FindNextUnsetBit(size_type* index) const {
+    return FindNextUnsetBitBeforeLimit(index, size_);
+  }
+
+  // A static version of FindNextUnsetBitBeforeLimit that can be called
+  // by other clients that have an array of words in their hands,
+  // layed out in the same way as BitMap.  Scans bits in "*words"
+  // starting at bit "*bit_index", looking for an unset bit.  If it finds
+  // an unset bit before reaching bit index "bit_limit", sets
+  // "*bit_index" to the bit index and returns true.  Otherwise
+  // returns false.  Will not dereference "words" past
+  // "words[(bit_limit+31)/kIntBits]".
+  static bool FindNextUnsetBitInVector(const Word* words, size_t* bit_index,
+                                       size_t bit_limit) {
+    return FindNextBitInVector(/*complement=*/true, words, bit_index,
+                               bit_limit);
+  }
+
+  // If *index < bits(), finds the first offset <= "*index" and >= "limit" that
+  // has its bit set.  If found, sets "*index" to this offset and returns true.
+  // Otherwise, does not modify "*index" and returns false.
+  //
+  // Note that to use these methods in a loop you must decrement
+  // the index after each use, as in:
+  //
+  //  for (Bitmap::size_type index = 100 ; map->FindPreviousSetBit(&index) ;
+  //        --index ) {
+  //    DoSomethingWith(index);
+  //  }
+  //
+  //  Word to the wise: This depends on unsigned types (i.e. "size_type")
+  //  wrapping to std::numeric_limits::max<> when decremented below zero.  This
+  //  is achieved by returning false if *index >= bits().
+  bool FindPreviousSetBitBeforeLimit(size_type* index, size_type limit) const;
+
+  // Equivalent to FindPreviousSetBit with "limit" set to 0.
+  bool FindPreviousSetBit(size_type* index) const {
+    return FindPreviousSetBitBeforeLimit(index, 0);
+  }
+
+  // If *index < bits(), finds the first offset <= "*index" and >= "limit" that
+  // does NOT have its bit set.  If found, sets "*index" to this offset and
+  // returns true.  Otherwise, does not modify "*index" and returns false.
+  //
+  // Note that to use these methods in a loop you must decrement
+  // the index after each use, as in:
+  //
+  //  for (Bitmap::size_type index = 100 ; map->FindPreviousUnsetBit(&index) ;
+  //        --index ) {
+  //    DoSomethingWith(index);
+  //  }
+  //
+  //  Word to the wise: This depends on unsigned types (i.e. "size_type")
+  //  wrapping to std::numeric_limits::max<> when decremented below zero.  This
+  //  is achieved by returning false if *index >= bits().
+  bool FindPreviousUnsetBitBeforeLimit(size_type* index, size_type limit) const;
+
+  // Equivalent to FindPreviousUnsetBit with "limit" set to 0.
+  bool FindPreviousUnsetBit(size_type* index) const {
+    return FindPreviousUnsetBitBeforeLimit(index, 0);
+  }
+
+  // If no bits in this bitmap are set, returns false. Otherwise returns true
+  // and puts the index of the first set bit in this bitmap in *index. Note
+  // that *index is modified even if we return false.
+  bool FindLastSetBit(size_type* index) const {
+    *index = size_ - 1;  // Note that this handles size_ == 0 correctly.
+    return FindPreviousSetBitBeforeLimit(index, 0);
+  }
+
+  void Set(size_type index, bool value) {
+    S2_DCHECK_LT(index, size_);
+    SetBit(map_, index, value);
+  }
+
+  void Toggle(size_type index) {
+    S2_DCHECK_LT(index, size_);
+    map_[index / kIntBits] ^= (W{1} << (index & (kIntBits - 1)));
+  }
+
+  // Sets all the bits to true or false
+  void SetAll(bool value) {
+    std::fill(map_, map_ + array_size(), value ? kAllOnesWord : W{0});
+  }
+
+  // Clears all bits in the bitmap
+  void Clear() { SetAll(false); }
+
+  // Sets a range of bits (begin inclusive, end exclusive) to true or false
+  void SetRange(size_type begin, size_type end, bool value);
+
+  // Sets "this" to be the union of "this" and "other". The bitmaps do
+  // not have to be the same size. If other is smaller, all the higher
+  // order bits are assumed to be 0. The size of "this" is never
+  // changed by this operation (higher order bits in other are
+  // ignored). Note this make Union *not* commutative -- it matters
+  // which Bitmap is this and which is other
+  void Union(const BasicBitmap& other);
+
+  // Sets "this" to be the intersection of "this" and "other". The
+  // bitmaps do not have to be the same size. If other is smaller, all
+  // the higher order bits are assumed to be 0. The size of this is
+  // never changed by this operation (higher order bits in other are
+  // ignored)
+  void Intersection(const BasicBitmap& other);
+
+  // Returns true if "this" and "other" have any bits set in common.
+  bool IsIntersectionNonEmpty(const BasicBitmap& other) const;
+
+  // Sets "this" to be the "~" (Complement) of "this".
+  void Complement() {
+    std::transform(map_, map_ + array_size(), map_,
+                   [](Word w) -> Word { return ~w; });
+  }
+
+  // Sets "this" to be the set of bits in "this" but not in "other"
+  // REQUIRES: "bits() == other.bits()" (i.e. the bitmaps are the same size)
+  void Difference(const BasicBitmap& other) {
+    S2_CHECK_EQ(bits(), other.bits());
+    std::transform(map_, map_ + array_size(), other.map_, map_,
+                   [](Word a, Word b) { return a & ~b; });
+  }
+
+  // Sets "this" to be the set of bits which is set in either "this" or "other",
+  // but not both.
+  // REQUIRES: "bits() == other.bits()" (i.e. the bitmaps are the same size)
+  void ExclusiveOr(const BasicBitmap& other) {
+    S2_CHECK_EQ(bits(), other.bits());
+    std::transform(map_, map_ + array_size(), other.map_, map_,
+                   [](Word a, Word b) { return a ^ b; });
+  }
+
+  // Return true if any bit between begin inclusive and end exclusive
+  // is set.  0 <= begin <= end <= bits() is required.
+  bool TestRange(size_type begin, size_type end) const;
+
+  // Return true if both Bitmaps are of equal length and have the same
+  // value.
+  bool IsEqual(const BasicBitmap& other) const {
+    return (bits() == other.bits()) &&
+           ((array_size() < 1) ||
+            std::equal(map_, map_ + array_size() - 1, other.map_)) &&
+           ((HighOrderMapElementMask() & other.map_[array_size() - 1]) ==
+            (HighOrderMapElementMask() & map_[array_size() - 1]));
+  }
+  friend bool operator==(const BasicBitmap& lhs, const BasicBitmap& rhs) {
+    return lhs.IsEqual(rhs);
+  }
+  friend bool operator!=(const BasicBitmap& lhs, const BasicBitmap& rhs) {
+    return !lhs.IsEqual(rhs);
+  }
+
+  // Return true is this bitmap is a subset of another bitmap in terms of
+  // the positions of 1s. That is, 0110 is a subset of 1110.
+  // REQUIRES: "bits() == other.bits()" (i.e. the bitmaps are the same size)
+  bool IsSubsetOf(const BasicBitmap& other) const;
+
+  // Returns 0 if the two bitmaps are equal.  Returns a negative number if the
+  // this bitmap is less than other, and a positive number otherwise.
+  //
+  // The relation we use is the natural relation defined by assigning an integer
+  // to each bitmap:
+  //
+  // int(bitmap) = b_0 + 2 * b_1 + ... + 2^k * b_k
+  //
+  // Then for our comparison function:
+  //
+  // if int(b1) != int(b2), then b1 is less than b2 if int(b1) < int(b2),
+  // and b2 is less than b1 otherwise.
+  //
+  // if int(b1) == int(b2), then we compare the numbers of bits in b1 and b2.
+  // If b1 has strictly fewer bits, then b1 is less than b2 (same for b2).
+  // If b1 and b2 have the same number of bits, then they are equal and we
+  // return 0.
+  int CompareTo(const BasicBitmap& other) const;
+  friend bool operator<(const BasicBitmap& lhs, const BasicBitmap& rhs) {
+    return lhs.CompareTo(rhs) < 0;
+  }
+  friend bool operator<=(const BasicBitmap& lhs, const BasicBitmap& rhs) {
+    return lhs.CompareTo(rhs) <= 0;
+  }
+  friend bool operator>(const BasicBitmap& lhs, const BasicBitmap& rhs) {
+    return lhs.CompareTo(rhs) > 0;
+  }
+  friend bool operator>=(const BasicBitmap& lhs, const BasicBitmap& rhs) {
+    return lhs.CompareTo(rhs) >= 0;
+  }
+
+  // Return the bitmap as a string of 0's and 1's in order from
+  // bit 0 to the highest bit.  If <group> is specified and greater
+  // than zero, write an underscore every <group> bits.
+  std::string ToString() const {
+    std::string str(size_, '0');
+    for (size_type index = 0; FindNextSetBit(&index); ++index) str[index] = '1';
+    return str;
+  }
+  // Return the bitmap as a string of 0's and 1's in order from bit 0 to the
+  // highest bit.  If <group> is greater than zero, write an underscore every
+  // <group> bits.
+  std::string ToString(int group) const {
+    const std::string source = ToString();
+    if (group <= 0) return source;
+    std::string target;
+    for (size_t i = 0; i < source.size(); i += group) {
+      target.append(i > 0, '_').append(source, i, group);
+    }
+    return target;
+  }
+
+  // Returns an efficient hash of the bitmap.
+  // The algorithm used is subject to change.
+  size_t HashCode() const { return absl::Hash<BasicBitmap>{}(*this); }
+  template <typename H>
+  friend H AbslHashValue(H state, const BasicBitmap& bitmap) {
+    const size_t complete_map_entries = bitmap.size_ / bitmap.kIntBits;
+    state = H::combine_contiguous(std::move(state), bitmap.map_,
+                                  complete_map_entries);
+
+    if (complete_map_entries != bitmap.array_size()) {
+      const Word last_map_entry = bitmap.map_[bitmap.array_size() - 1] &
+                                  bitmap.HighOrderMapElementMask();
+      state = H::combine(std::move(state), last_map_entry);
+    }
+
+    return state;
+  }
+
+  // return number of allocated words required for a bitmap of size num_bits
+  // minimum size is 1
+  static constexpr size_t RequiredArraySize(size_type num_bits) {
+    return num_bits == 0 ? 1 : (num_bits - 1) / kIntBits + 1;
+  }
+
+  class BitIndexIter {
+   public:
+    using iterator_category = std::input_iterator_tag;
+    using value_type = size_type;
+    using difference_type = std::ptrdiff_t;
+    using pointer = const size_type*;
+    using reference = const size_type&;
+
+    BitIndexIter() : BitIndexIter(nullptr, 0) {}
+
+    explicit BitIndexIter(const BasicBitmap* bitmap)
+        : BitIndexIter(bitmap, 0) {}
+
+    BitIndexIter(const BasicBitmap* bitmap, size_type start_bit);
+
+    reference operator*() const { return bit_position_; }
+
+    pointer operator->() const { return &bit_position_; }
+
+    BitIndexIter& operator++();
+
+    BitIndexIter operator++(int) {
+      BitIndexIter rv = *this;
+      ++(*this);
+      return rv;
+    }
+
+    // Lazy initialization of current_, updates bit_position_ to current bit.
+    size_type BitIndex() const;
+
+    friend bool operator==(const BitIndexIter& a, const BitIndexIter& b) {
+      return a.bit_position_ == b.bit_position_;
+    }
+
+    friend bool operator!=(const BitIndexIter& a, const BitIndexIter& b) {
+      return !(a == b);
+    }
+
+   private:
+    const BasicBitmap* bitmap_;  // Not owned.
+
+    // This will be 0 if and only if we are at the end, otherwise it will be
+    // a cached copy of the bitmap of the word from the bit array containing
+    // the bit specified by bit_position_, but logically right shifted so that
+    // the current bit is at position 0.
+    W current_;
+    size_type bit_position_;
+  };
+
+  class BitIndexRange {
+   public:
+    using value_type = size_type;
+    using const_iterator = BitIndexIter;
+
+    explicit BitIndexRange(const BasicBitmap* bitmap) : bitmap_(bitmap) {}
+
+    BitIndexIter begin() const { return LowerBound(0); }
+
+    BitIndexIter end() const {
+      return LowerBound(std::numeric_limits<value_type>::max());
+    }
+
+    // Starts the iterator from the first non-zero bit above
+    // bit_index_lower_bound, if any.
+    BitIndexIter LowerBound(size_t bit_index_lower_bound) const {
+      return const_iterator(bitmap_, bit_index_lower_bound);
+    }
+
+   private:
+    const BasicBitmap* bitmap_;
+  };
+
+  // Returns a class that can be used in for range contexts to
+  // iterate through the ordinal set of indices of bits that are set,
+  // for example:
+  //   Bitmap bitmap(10);
+  //   bitmap.Set(3);
+  //   bitmap.Set(4);
+  //   bitmap.Set(9);
+  //   for (size_t index : bitmap.TrueBitIndices()) {
+  //     std::cout << index << std::endl;
+  //   }
+  // would print
+  //   3
+  //   4
+  //   9
+  // The range, while valid, will always be in the half-open interval
+  // [0, bitmap.bits()).
+  BitIndexRange TrueBitIndices() const { return BitIndexRange(this); }
+
+ private:
+  // An unsigned integral type that is not promoted and can represent all values
+  // of `Word` (assuming `Word` is unsigned). Note that if `Word` does not get
+  // promoted, then this type is the same as `Word`. We occaisionally use this
+  // type to perform arithmetic, to avoid having `Word`-typed values promoted to
+  // a signed type via integral promotions.
+  // TODO(b/228178585)
+  using ArithmeticWord = std::make_unsigned_t<decltype(+std::declval<Word>())>;
+
+  // A value of type `Word` with all bits set to one. If `Word` !=
+  // `ArithmeticWord`, i.e. `Word` gets promoted, then this is a `Word` with all
+  // bits set to one then promoted to `ArithmeticWord`. In other words, the
+  // lowest N bits are one, and all other bits are zero, where N is the width of
+  // `Word`.
+  //
+  // For example, for uint32, this is 0xFFFFFFFF. However, if `Word` is
+  // `unit8_t`, and `ArithmeticWord` is equal to `uint32` and kAllOnesWord is
+  // 0x000000FF.
+  static constexpr auto kAllOnesWord =
+      ArithmeticWord{static_cast<Word>(~ArithmeticWord{0})};
+
+  // Implements FindNextSetBitInVector if 'complement' is false,
+  // and FindNextUnsetBitInVector if 'complement' is true.
+  static bool FindNextBitInVector(bool complement, const Word* words,
+                                  size_t* bit_index_inout, size_t limit);
+
+  static bool FindPreviousBitInVector(bool complement, const Word* words,
+                                      size_t* bit_index_inout, size_t limit);
+
+  // The same semantics as CompareTo, except that we have the invariant that
+  // first has at least as many bits as second.
+  static int CompareToHelper(const BasicBitmap& first,
+                             const BasicBitmap& second);
+
+  static constexpr unsigned Log2(unsigned n, unsigned p = 0) {
+    return (n <= 1) ? p : Log2(n / 2, p + 1);
+  }
+
+  // NOTE: we make assumptions throughout the code that kIntBits is a power of
+  // 2, so that we can use shift and mask instead of division and modulo.
+  static constexpr int kIntBits = CHAR_BIT * sizeof(Word);  // bits in a Word
+  static constexpr int kLogIntBits = Log2(kIntBits, 0);
+  Word* map_;       // the bitmap
+  size_type size_;  // the upper bound of the bitmap
+  bool alloc_;      // whether or not *we* allocated the memory
+};
+
+// Streams `bitmap.ToString()` to `out`.
+template <typename W>
+inline std::ostream& operator<<(std::ostream& out,
+                                const BasicBitmap<W>& bitmap) {
+  return out << bitmap.ToString();
+}
+
+}  // namespace internal
+
+using BitmapChar = ::util::bitmap::internal::BasicBitmap<char>;
+using Bitmap8 = ::util::bitmap::internal::BasicBitmap<uint8>;
+using Bitmap16 = ::util::bitmap::internal::BasicBitmap<uint16>;
+using Bitmap32 = ::util::bitmap::internal::BasicBitmap<uint32>;
+using Bitmap64 = ::util::bitmap::internal::BasicBitmap<uint64>;
+
+using ConstBitmapChar = ::util::bitmap::internal::BasicBitmap<const char>;
+using ConstBitmap8 = ::util::bitmap::internal::BasicBitmap<const uint8>;
+using ConstBitmap16 = ::util::bitmap::internal::BasicBitmap<const uint16>;
+using ConstBitmap32 = ::util::bitmap::internal::BasicBitmap<const uint32>;
+using ConstBitmap64 = ::util::bitmap::internal::BasicBitmap<const uint64>;
+
+// Implementations follow.
+
+namespace internal {
+
+template <typename W>
+BasicBitmap<W>::BasicBitmap(const BasicBitmap& src)
+    : size_(src.size_), alloc_(src.alloc_) {
+  static_assert((kIntBits & (kIntBits - 1)) == 0, "kIntBits_not_power_of_2");
+  if (alloc_) {
+    map_ = std::allocator<Word>().allocate(array_size());
+    std::copy(src.map_, src.map_ + array_size(), map_);
+  } else {
+    map_ = src.map_;
+  }
+}
+
+template <typename W>
+BasicBitmap<W>::BasicBitmap(BasicBitmap&& src)
+    : size_(src.size_), alloc_(src.alloc_) {
+  map_ = src.map_;
+  if (alloc_) {
+    src.alloc_ = false;
+  }
+}
+
+template <typename W>
+void BasicBitmap<W>::Resize(size_type size, bool fill) {
+  const size_type old_size = size_;
+  const size_t new_array_size = RequiredArraySize(size);
+  if (new_array_size != array_size()) {
+    Word* new_map = std::allocator<Word>().allocate(new_array_size);
+    std::copy(map_, map_ + std::min<size_t>(new_array_size, array_size()),
+              new_map);
+    if (alloc_) {
+      std::allocator<Word>().deallocate(map_, array_size());
+    }
+    map_ = new_map;
+    alloc_ = true;
+  }
+  size_ = size;
+  if (old_size < size_) {
+    SetRange(old_size, size_, fill);
+  }
+}
+
+template <typename W>
+BasicBitmap<W>& BasicBitmap<W>::operator=(const BasicBitmap<W>& src) {
+  if (this != &src) {
+    if (alloc_ && array_size() != src.array_size()) {
+      std::allocator<Word>().deallocate(map_, array_size());
+      map_ = std::allocator<Word>().allocate(src.array_size());
+    }
+    size_ = src.size_;
+    if (src.alloc_) {
+      if (!alloc_) {
+        map_ = std::allocator<Word>().allocate(src.array_size());
+      }
+      std::copy(src.map_, src.map_ + src.array_size(), map_);
+      alloc_ = true;
+    } else {
+      if (alloc_) {
+        std::allocator<Word>().deallocate(map_, array_size());
+      }
+      map_ = src.map_;
+      alloc_ = false;
+    }
+  }
+  return *this;
+}
+
+// Return true if any bit between begin inclusive and end exclusive
+// is set.  0 <= begin <= end <= bits() is required.
+template <typename W>
+bool BasicBitmap<W>::TestRange(size_type begin, size_type end) const {
+  // Return false immediately if the range is empty.
+  if (begin == end) {
+    return false;
+  }
+  // Calculate the indices of the words containing the first and last bits,
+  // along with the positions of the bits within those words.
+  size_t i = begin / kIntBits;
+  size_t j = begin & (kIntBits - 1);
+  size_t ilast = (end - 1) / kIntBits;
+  size_t jlast = (end - 1) & (kIntBits - 1);
+  // If the range spans multiple words, discard the extraneous bits of the
+  // first word by shifting to the right, and then test the remaining bits.
+  if (i < ilast) {
+    if (map_[i++] >> j) {
+      return true;
+    }
+    j = 0;
+
+    // Test each of the "middle" words that lies completely within the range.
+    while (i < ilast) {
+      if (map_[i++]) {
+        return true;
+      }
+    }
+  }
+
+  // Test the portion of the last word that lies within the range. (This logic
+  // also handles the case where the entire range lies within a single word.)
+  const Word mask = (((W{1} << 1) << (jlast - j)) - 1) << j;
+  return (map_[ilast] & mask) != W{0};
+}
+
+template <typename W>
+bool BasicBitmap<W>::IsSubsetOf(const BasicBitmap& other) const {
+  S2_CHECK_EQ(bits(), other.bits());
+  Word* mp = map_;
+  Word* endp = mp + array_size() - 1;
+  Word* op = other.map_;
+  // A is a subset of B if A - B = {}, that is A & ~B = {}
+  for (; mp != endp; ++mp, ++op)
+    if (*mp & ~*op) return false;
+  return (*mp & ~*op & HighOrderMapElementMask()) == W{0};
+}
+
+// Same semantics as CompareTo, except that we have the invariant that first
+// has at least as many bits as second.
+template <typename W>
+int BasicBitmap<W>::CompareToHelper(const BasicBitmap<W>& first,
+                                    const BasicBitmap<W>& second) {
+  // Checks if the high order bits in first that are not in second are set.  If
+  // any of these are set, then first is greater than second, and we return a
+  // positive value.
+  if (first.TestRange(second.bits(), first.bits())) {
+    return 1;
+  }
+
+  // We use unsigned integer comparison to compare the bitmaps.  We need to
+  // handle the high order bits in a special case (since there may be undefined
+  // bits for the element representing the highest order bits) and then we
+  // can do direct integer comparison.
+  size_t index = second.array_size() - 1;
+  MutableWord left = first.map_[index] & second.HighOrderMapElementMask();
+  MutableWord right = second.map_[index] & second.HighOrderMapElementMask();
+  if (left != right) {
+    return left < right ? -1 : 1;
+  }
+  while (index > 0) {
+    --index;
+    left = first.map_[index];
+    right = second.map_[index];
+    if (left != right) {
+      return left < right ? -1 : 1;
+    }
+  }
+  // Now we have reached the end, all common bits are equal, and all bits that
+  // are only in the longer list are 0.  We return 1 if the first bitmap is
+  // strictly larger, and 0 if the bitmaps are of equal size.
+  if (first.bits() == second.bits()) {
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+template <typename W>
+int BasicBitmap<W>::CompareTo(const BasicBitmap<W>& other) const {
+  if (bits() > other.bits()) {
+    return CompareToHelper(*this, other);
+  } else {
+    return -CompareToHelper(other, *this);
+  }
+}
+
+// Note that bits > size end up in undefined states when sizes
+// aren't equal, but that's okay.
+template <typename W>
+void BasicBitmap<W>::Union(const BasicBitmap<W>& other) {
+  const size_t this_array_size = array_size();
+  const size_t other_array_size = other.array_size();
+  const size_t min_array_size = std::min(this_array_size, other_array_size);
+  if (min_array_size == 0) {
+    // Nothing to do.
+    return;
+  }
+  // Perform bitwise OR of all but the last common word.
+  const size_t last = min_array_size - 1;
+  std::transform(map_, map_ + last, other.map_, map_,
+                 [](Word a, Word b) { return a | b; });
+  // Perform bitwise OR of the last common word, applying mask if necessary.
+  map_[last] |= other_array_size == min_array_size
+                    ? other.map_[last] & other.HighOrderMapElementMask()
+                    : other.map_[last];
+}
+
+// Note that bits > size end up in undefined states when sizes
+// aren't equal, but that's okay.
+template <typename W>
+void BasicBitmap<W>::Intersection(const BasicBitmap<W>& other) {
+  const size_t this_array_size = array_size();
+  const size_t other_array_size = other.array_size();
+  const size_t min_array_size = std::min(this_array_size, other_array_size);
+  // Perform bitwise AND of all common words.
+  std::transform(map_, map_ + min_array_size, other.map_, map_,
+                 [](Word a, Word b) { return a & b; });
+  if (other_array_size == min_array_size) {
+    // Zero out bits that are outside the range of 'other'.
+    if (other_array_size != 0) {
+      map_[other_array_size - 1] &= other.HighOrderMapElementMask();
+    }
+    std::fill(map_ + other_array_size, map_ + this_array_size, 0);
+  }
+}
+
+template <typename W>
+bool BasicBitmap<W>::IsIntersectionNonEmpty(const BasicBitmap<W>& other) const {
+  // First check fully overlapping bytes.
+  size_t max_overlap = std::min(array_size(), other.array_size()) - 1;
+  for (size_t i = 0; i < max_overlap; ++i) {
+    if (map_[i] & other.map_[i]) return true;
+  }
+
+  // Now check the highest overlapping byte, applying bit masks as necessary.
+  Word high_byte = map_[max_overlap] & other.map_[max_overlap];
+
+  if (other.array_size() > array_size())
+    return high_byte & HighOrderMapElementMask();
+  else if (array_size() > other.array_size())
+    return high_byte & other.HighOrderMapElementMask();
+
+  // Same array_size, apply both masks.
+  return high_byte & HighOrderMapElementMask() &
+         other.HighOrderMapElementMask();
+}
+
+template <typename W>
+typename BasicBitmap<W>::size_type BasicBitmap<W>::GetOnesCountInRange(
+    size_type start, size_type end) const {
+  S2_CHECK_LE(end, size_);
+  S2_CHECK_LE(start, end);
+
+  if (start >= end) {
+    return 0;
+  }  // Need this?
+
+  size_t start_word = start / kIntBits;
+  size_t end_word = (end - 1) / kIntBits;  // Word containing the last bit.
+
+  Word* p = map_ + start_word;
+  ArithmeticWord c = static_cast<ArithmeticWord>(*p) &
+                     (kAllOnesWord << (start & (kIntBits - 1)));
+
+  ArithmeticWord endmask = (kAllOnesWord >> ((end_word + 1) * kIntBits - end));
+  if (end_word == start_word) {  // Only one word?
+    return absl::popcount(static_cast<Word>(c & endmask));
+  }
+
+  size_type sum = absl::popcount(static_cast<Word>(c));
+
+  for (++p; p < map_ + end_word; ++p) {
+    sum += absl::popcount(*p);
+  }
+
+  return sum + absl::popcount(static_cast<Word>(
+                   static_cast<ArithmeticWord>(*p) & endmask));
+}
+
+template <typename W>
+bool BasicBitmap<W>::FindNextSetBitBeforeLimit(size_type* index,
+                                               size_type limit) const {
+  S2_CHECK_LE(limit, size_);
+  size_t index_as_size_t = *index;
+  bool result = FindNextSetBitInVector(map_, &index_as_size_t, limit);
+  if (result) {
+    *index = static_cast<size_t>(index_as_size_t);
+  }
+  return result;
+}
+
+template <typename W>
+bool BasicBitmap<W>::FindNextUnsetBitBeforeLimit(size_type* index,
+                                                 size_type limit) const {
+  S2_CHECK_LE(limit, size_);
+  size_t index_as_size_t = *index;
+  bool result = FindNextUnsetBitInVector(map_, &index_as_size_t, limit);
+  if (result) {
+    *index = static_cast<size_t>(index_as_size_t);
+  }
+  return result;
+}
+
+/*static*/
+template <typename W>
+bool BasicBitmap<W>::FindNextBitInVector(bool complement, const Word* words,
+                                         size_t* bit_index_inout,
+                                         size_t limit) {
+  const size_t bit_index = *bit_index_inout;
+  if (bit_index >= limit) return false;
+  // From now on limit != 0, since if it was we would have returned false.
+  size_t int_index = bit_index >> kLogIntBits;
+  MutableWord one_word = words[int_index];
+  if (complement) one_word = ~one_word;
+
+  // Simple optimization where we can immediately return true if the first
+  // bit is set.  This helps for cases where many bits are set, and doesn't
+  // hurt too much if not.
+  const size_t first_bit_offset = bit_index & (kIntBits - 1);
+  if (one_word & (W{1} << first_bit_offset)) return true;
+
+  // First word is special - we need to mask off leading bits
+  one_word &= (kAllOnesWord << first_bit_offset);
+
+  // Loop through all but the last word.  Note that 'limit' is one
+  // past the last bit we want to check, and we don't want to read
+  // past the end of "words".  E.g. if size_ == kIntBits only words[0] is
+  // valid, so we want to avoid reading words[1] when limit == kIntBits.
+  const size_t last_int_index = (limit - 1) >> kLogIntBits;
+  while (int_index < last_int_index) {
+    if (one_word != W{0}) {
+      *bit_index_inout =
+          (int_index << kLogIntBits) + Bits::FindLSBSetNonZero64(one_word);
+      return true;
+    }
+    one_word = words[++int_index];
+    if (complement) one_word = ~one_word;
+  }
+
+  // Last word is special - we may need to mask off trailing bits.  Note that
+  // 'limit' is one past the last bit we want to check, and if limit is a
+  // multiple of kIntBits we want to check all bits in this word.
+  one_word &= ~((kAllOnesWord - 1) << ((limit - 1) & (kIntBits - 1)));
+  if (one_word != 0) {
+    *bit_index_inout =
+        (int_index << kLogIntBits) + Bits::FindLSBSetNonZero64(one_word);
+    return true;
+  }
+  return false;
+}
+
+/*static*/
+template <typename W>
+bool BasicBitmap<W>::FindPreviousBitInVector(bool complement, const Word* words,
+                                             size_t* bit_index_inout,
+                                             size_t limit) {
+  const size_type bit_index = *bit_index_inout;
+  size_t map_index = bit_index >> kLogIntBits;
+  const size_t map_limit = limit >> kLogIntBits;
+  const size_t bit_limit_mask = kAllOnesWord << (limit & (kIntBits - 1));
+  MutableWord one_word = complement ? ~words[map_index] : words[map_index];
+
+  if (limit > *bit_index_inout) return false;
+  // Simple optimization where we can immediately return true if the first
+  // bit is set.  This helps for cases where many bits are set, and doesn't
+  // hurt too much if not.
+  const unsigned bit_offset = bit_index & (kIntBits - 1);
+  if (one_word & (W{1} << bit_offset)) return true;
+
+  // First word is special - we need to mask off trailing bits
+  one_word &= ~((kAllOnesWord - 1) << bit_offset);
+  // Then any leading bits if the limit is within this word
+  if (map_index == map_limit) one_word &= bit_limit_mask;
+
+  // Loop through all empty words
+  while (one_word == 0) {
+    if (map_index == map_limit) return false;
+    --map_index;
+    one_word = complement ? ~words[map_index] : words[map_index];
+    if (map_index == map_limit) one_word &= bit_limit_mask;
+  }
+
+  // Found a word with some set bits - return result
+  *bit_index_inout =
+      (map_index << kLogIntBits) + Bits::FindMSBSetNonZero64(one_word);
+  return true;
+}
+
+template <typename W>
+bool BasicBitmap<W>::FindPreviousSetBitBeforeLimit(size_type* index,
+                                                   size_type limit) const {
+  if (*index >= size_) return false;
+  size_t index_as_size_t = *index;
+  bool result = FindPreviousBitInVector(/*complement=*/false, map_,
+                                        &index_as_size_t, limit);
+  if (result) {
+    *index = static_cast<size_t>(index_as_size_t);
+  }
+  return result;
+}
+
+template <typename W>
+bool BasicBitmap<W>::FindPreviousUnsetBitBeforeLimit(size_type* index,
+                                                     size_type limit) const {
+  if (*index >= size_) return false;
+  size_t index_as_size_t = *index;
+  bool result = FindPreviousBitInVector(/*complement=*/true, map_,
+                                        &index_as_size_t, limit);
+  if (result) {
+    *index = static_cast<size_t>(index_as_size_t);
+  }
+  return result;
+}
+
+template <typename W>
+void BasicBitmap<W>::SetRange(size_type begin, size_type end, bool value) {
+  if (begin == end) return;
+  // Figure out which element(s) in the map_ array are affected
+  // by this op.
+  const size_type begin_element = begin / kIntBits;
+  const size_type begin_bit = begin % kIntBits;
+  const size_type end_element = end / kIntBits;
+  const size_type end_bit = end % kIntBits;
+  Word initial_mask = kAllOnesWord << begin_bit;
+  if (end_element == begin_element) {
+    // The range is contained in a single element of the array, so
+    // adjust both ends of the mask.
+    initial_mask = initial_mask & (kAllOnesWord >> (kIntBits - end_bit));
+  }
+  if (value) {
+    map_[begin_element] |= initial_mask;
+  } else {
+    map_[begin_element] &= ~initial_mask;
+  }
+  if (end_element != begin_element) {
+    // Set all the bits in the array elements between the begin
+    // and end elements.
+    std::fill(map_ + begin_element + 1, map_ + end_element,
+              value ? kAllOnesWord : W{0});
+
+    // Update the appropriate bit-range in the last element.
+    // Note end_bit is an exclusive bound, so if it's 0 none of the
+    // bits in end_element are contained in the range (and we don't
+    // have to modify it).
+    if (end_bit != 0) {
+      const Word final_mask = kAllOnesWord >> (kIntBits - end_bit);
+      if (value) {
+        map_[end_element] |= final_mask;
+      } else {
+        map_[end_element] &= ~final_mask;
+      }
+    }
+  }
+}
+
+template <typename W>
+auto BasicBitmap<W>::BitIndexIter::operator++() -> BitIndexIter& {
+  // Shift away the previous bit.
+  current_ >>= 1;
+  ++bit_position_;
+  if (current_ == 0) {
+    if (bitmap_->FindNextSetBit(&bit_position_)) {
+      current_ = bitmap_->GetMaskedMapElement(bit_position_ / kIntBits) >>
+                 (bit_position_ & (kIntBits - 1));
+    } else {
+      bit_position_ = bitmap_->bits();
+    }
+    return *this;
+  }
+  // If the bottom bit is zero, shift until we find a set bit.
+  if ((current_ & 1) == 0) {
+    const int first = Bits::FindLSBSetNonZero64(current_);
+    bit_position_ += first;
+    current_ >>= first;
+  }
+  return *this;
+}
+
+template <typename W>
+BasicBitmap<W>::BitIndexIter::BitIndexIter(const BasicBitmap* bitmap,
+                                           size_type start_bit)
+    : bitmap_(bitmap), current_(0), bit_position_(start_bit) {
+  if (!bitmap_) {
+    bit_position_ = 0;
+    return;
+  }
+  if (!bitmap_->FindNextSetBit(&bit_position_)) {
+    bit_position_ = bitmap_->bits();
+    return;
+  }
+  current_ = bitmap_->GetMaskedMapElement(bit_position_ / kIntBits) >>
+             (bit_position_ & (kIntBits - 1));
+}
+
+}  // namespace internal
+}  // namespace bitmap
+}  // namespace util
+
+#endif  // UTIL_BITMAP_BITMAP_H__
diff --git a/src/s2/util/bits/bit-interleave.cc b/src/s2/util/bits/bit-interleave.cc
index 413336d3..b4e66efc 100644
--- a/src/s2/util/bits/bit-interleave.cc
+++ b/src/s2/util/bits/bit-interleave.cc
@@ -24,6 +24,17 @@
 //       gcc-4.3.1-glibc-2.3.6-grte-k8.
 //     TODO(user): Inlining InterleaveUint32 yields a measurable speedup (5
 //     ns vs. 8 ns). Consider cost/benefit of moving implementations inline.
+//  2022-04-21: nywang@google.com microbenchmarked DeinterleaveUint32/16/8 with
+//     Lut and table-free implementations. DeinterleaveUint32/8 would benefit
+//     from table-free implementations with 3% - 8% performance improvement on a
+//     Xeon E5 2690 V3, while DeinterleaveUint16 has 6% performance regression
+//     with table-free implementations.
+//     Some client of this lib like util/geometry/s2point_compression.cc even
+//     see up to 28% performance improvment by adopting the table-free
+//     DeinterleaveUint32 implementation.
+//     TODO(user): As of 2022, Xeon E5 2690 V3 is also an ancient
+//     architecture(Haswell). We need to consider benchmarking it on more
+//     recent architectures.
 
 #include "s2/util/bits/bit-interleave.h"
 
@@ -32,41 +43,41 @@
 namespace util_bits {
 
 static const uint16 kInterleaveLut[256] = {
-  0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015,
-  0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055,
-  0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115,
-  0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
-  0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415,
-  0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455,
-  0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515,
-  0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
-
-  0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015,
-  0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055,
-  0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115,
-  0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
-  0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415,
-  0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455,
-  0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515,
-  0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
-
-  0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015,
-  0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055,
-  0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115,
-  0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
-  0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415,
-  0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455,
-  0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515,
-  0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
-
-  0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015,
-  0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055,
-  0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115,
-  0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
-  0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415,
-  0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455,
-  0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515,
-  0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555,
+    0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015,
+    0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055,
+    0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115,
+    0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
+    0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415,
+    0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455,
+    0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515,
+    0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
+
+    0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015,
+    0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055,
+    0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115,
+    0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
+    0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415,
+    0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455,
+    0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515,
+    0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
+
+    0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015,
+    0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055,
+    0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115,
+    0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
+    0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415,
+    0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455,
+    0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515,
+    0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
+
+    0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015,
+    0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055,
+    0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115,
+    0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
+    0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415,
+    0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455,
+    0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515,
+    0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555,
 };
 
 uint16 InterleaveUint8(const uint8 val0, const uint8 val1) {
@@ -81,104 +92,68 @@ uint32 InterleaveUint16(const uint16 val0, const uint16 val1) {
 }
 
 uint64 InterleaveUint32(const uint32 val0, const uint32 val1) {
-  return
-      (static_cast<uint64>(kInterleaveLut[val0 & 0xff])) |
-      (static_cast<uint64>(kInterleaveLut[(val0 >> 8) & 0xff]) << 16) |
-      (static_cast<uint64>(kInterleaveLut[(val0 >> 16) & 0xff]) << 32) |
-      (static_cast<uint64>(kInterleaveLut[val0 >> 24]) << 48) |
-      (static_cast<uint64>(kInterleaveLut[val1 & 0xff]) << 1) |
-      (static_cast<uint64>(kInterleaveLut[(val1 >> 8) & 0xff]) << 17) |
-      (static_cast<uint64>(kInterleaveLut[(val1 >> 16) & 0xff]) << 33) |
-      (static_cast<uint64>(kInterleaveLut[val1 >> 24]) << 49);
+  return (static_cast<uint64>(kInterleaveLut[val0 & 0xff])) |
+         (static_cast<uint64>(kInterleaveLut[(val0 >> 8) & 0xff]) << 16) |
+         (static_cast<uint64>(kInterleaveLut[(val0 >> 16) & 0xff]) << 32) |
+         (static_cast<uint64>(kInterleaveLut[val0 >> 24]) << 48) |
+         (static_cast<uint64>(kInterleaveLut[val1 & 0xff]) << 1) |
+         (static_cast<uint64>(kInterleaveLut[(val1 >> 8) & 0xff]) << 17) |
+         (static_cast<uint64>(kInterleaveLut[(val1 >> 16) & 0xff]) << 33) |
+         (static_cast<uint64>(kInterleaveLut[val1 >> 24]) << 49);
 }
 
-// The lookup table below can convert a sequence of interleaved 8 bits into
-// non-interleaved 4 bits. The table can convert both odd and even bits at the
-// same time, and lut[x & 0x55] converts the even bits (bits 0, 2, 4 and 6),
-// while lut[x & 0xaa] converts the odd bits (bits 1, 3, 5 and 7).
-//
-// The lookup table below was generated using the following python code:
-//
-// def deinterleave(bits):
-//   if bits == 0: return 0
-//   if bits < 4: return 1
-//   return deinterleave(bits / 4) * 2 + deinterleave(bits & 3)
-//
-// for i in range(256): print "0x%x," % deinterleave(i),
-//
-static const uint8 kDeinterleaveLut[256] = {
-  0x0, 0x1, 0x1, 0x1, 0x2, 0x3, 0x3, 0x3,
-  0x2, 0x3, 0x3, 0x3, 0x2, 0x3, 0x3, 0x3,
-  0x4, 0x5, 0x5, 0x5, 0x6, 0x7, 0x7, 0x7,
-  0x6, 0x7, 0x7, 0x7, 0x6, 0x7, 0x7, 0x7,
-  0x4, 0x5, 0x5, 0x5, 0x6, 0x7, 0x7, 0x7,
-  0x6, 0x7, 0x7, 0x7, 0x6, 0x7, 0x7, 0x7,
-  0x4, 0x5, 0x5, 0x5, 0x6, 0x7, 0x7, 0x7,
-  0x6, 0x7, 0x7, 0x7, 0x6, 0x7, 0x7, 0x7,
-
-  0x8, 0x9, 0x9, 0x9, 0xa, 0xb, 0xb, 0xb,
-  0xa, 0xb, 0xb, 0xb, 0xa, 0xb, 0xb, 0xb,
-  0xc, 0xd, 0xd, 0xd, 0xe, 0xf, 0xf, 0xf,
-  0xe, 0xf, 0xf, 0xf, 0xe, 0xf, 0xf, 0xf,
-  0xc, 0xd, 0xd, 0xd, 0xe, 0xf, 0xf, 0xf,
-  0xe, 0xf, 0xf, 0xf, 0xe, 0xf, 0xf, 0xf,
-  0xc, 0xd, 0xd, 0xd, 0xe, 0xf, 0xf, 0xf,
-  0xe, 0xf, 0xf, 0xf, 0xe, 0xf, 0xf, 0xf,
-
-  0x8, 0x9, 0x9, 0x9, 0xa, 0xb, 0xb, 0xb,
-  0xa, 0xb, 0xb, 0xb, 0xa, 0xb, 0xb, 0xb,
-  0xc, 0xd, 0xd, 0xd, 0xe, 0xf, 0xf, 0xf,
-  0xe, 0xf, 0xf, 0xf, 0xe, 0xf, 0xf, 0xf,
-  0xc, 0xd, 0xd, 0xd, 0xe, 0xf, 0xf, 0xf,
-  0xe, 0xf, 0xf, 0xf, 0xe, 0xf, 0xf, 0xf,
-  0xc, 0xd, 0xd, 0xd, 0xe, 0xf, 0xf, 0xf,
-  0xe, 0xf, 0xf, 0xf, 0xe, 0xf, 0xf, 0xf,
-
-  0x8, 0x9, 0x9, 0x9, 0xa, 0xb, 0xb, 0xb,
-  0xa, 0xb, 0xb, 0xb, 0xa, 0xb, 0xb, 0xb,
-  0xc, 0xd, 0xd, 0xd, 0xe, 0xf, 0xf, 0xf,
-  0xe, 0xf, 0xf, 0xf, 0xe, 0xf, 0xf, 0xf,
-  0xc, 0xd, 0xd, 0xd, 0xe, 0xf, 0xf, 0xf,
-  0xe, 0xf, 0xf, 0xf, 0xe, 0xf, 0xf, 0xf,
-  0xc, 0xd, 0xd, 0xd, 0xe, 0xf, 0xf, 0xf,
-  0xe, 0xf, 0xf, 0xf, 0xe, 0xf, 0xf, 0xf,
-};
+// Extracting the even bits (bit 0, 2, ...).
+inline uint8 ExtractEvenBits(uint16 bits) {
+  bits &= 0x5555;
+  bits |= bits >> 1;
+  bits &= 0x3333;
+  bits |= bits >> 2;
+  bits &= 0x0f0f;
+  bits |= bits >> 4;
+  return static_cast<uint8>(bits);
+}
 
 void DeinterleaveUint8(uint16 val, uint8 *val0, uint8 *val1) {
-  *val0 = ((kDeinterleaveLut[val & 0x55]) |
-           (kDeinterleaveLut[(val >> 8) & 0x55] << 4));
-  *val1 = ((kDeinterleaveLut[val & 0xaa]) |
-           (kDeinterleaveLut[(val >> 8) & 0xaa] << 4));
+  *val0 = ExtractEvenBits(val);
+  *val1 = ExtractEvenBits(val >> 1);
+}
+
+// Extracting the even bits (bit 0, 2, ...).
+inline uint16 ExtractEvenBits(uint32 bits) {
+  bits &= 0x55555555;
+  bits |= bits >> 1;
+  bits &= 0x33333333;
+  bits |= bits >> 2;
+  bits &= 0x0f0f0f0f;
+  bits |= bits >> 4;
+  bits &= 0x00ff00ff;
+  bits |= bits >> 8;
+  return static_cast<uint16>(bits);
 }
 
 void DeinterleaveUint16(uint32 code, uint16 *val0, uint16 *val1) {
-  *val0 = ((kDeinterleaveLut[code & 0x55]) |
-           (kDeinterleaveLut[(code >> 8) & 0x55] << 4) |
-           (kDeinterleaveLut[(code >> 16) & 0x55] << 8) |
-           (kDeinterleaveLut[(code >> 24) & 0x55] << 12));
-  *val1 = ((kDeinterleaveLut[code & 0xaa]) |
-           (kDeinterleaveLut[(code >> 8) & 0xaa] << 4) |
-           (kDeinterleaveLut[(code >> 16) & 0xaa] << 8) |
-           (kDeinterleaveLut[(code >> 24) & 0xaa] << 12));
+  *val0 = ExtractEvenBits(code);
+  *val1 = ExtractEvenBits(code >> 1);
+}
+
+// Extracting the even bits (bit 0, 2, ...).
+inline uint32 ExtractEvenBits(uint64 bits) {
+  bits &= 0x5555555555555555;
+  bits |= bits >> 1;
+  bits &= 0x3333333333333333;
+  bits |= bits >> 2;
+  bits &= 0x0f0f0f0f0f0f0f0f;
+  bits |= bits >> 4;
+  bits &= 0x00ff00ff00ff00ff;
+  bits |= bits >> 8;
+  bits &= 0x0000ffff0000ffff;
+  bits |= bits >> 16;
+  return static_cast<uint32>(bits);
 }
 
 void DeinterleaveUint32(uint64 code, uint32 *val0, uint32 *val1) {
-  *val0 = ((kDeinterleaveLut[code & 0x55]) |
-           (kDeinterleaveLut[(code >> 8) & 0x55] << 4) |
-           (kDeinterleaveLut[(code >> 16) & 0x55] << 8) |
-           (kDeinterleaveLut[(code >> 24) & 0x55] << 12) |
-           (kDeinterleaveLut[(code >> 32) & 0x55] << 16) |
-           (kDeinterleaveLut[(code >> 40) & 0x55] << 20) |
-           (kDeinterleaveLut[(code >> 48) & 0x55] << 24) |
-           (kDeinterleaveLut[(code >> 56) & 0x55] << 28));
-  *val1 = ((kDeinterleaveLut[code & 0xaa]) |
-           (kDeinterleaveLut[(code >> 8) & 0xaa] << 4) |
-           (kDeinterleaveLut[(code >> 16) & 0xaa] << 8) |
-           (kDeinterleaveLut[(code >> 24) & 0xaa] << 12) |
-           (kDeinterleaveLut[(code >> 32) & 0xaa] << 16) |
-           (kDeinterleaveLut[(code >> 40) & 0xaa] << 20) |
-           (kDeinterleaveLut[(code >> 48) & 0xaa] << 24) |
-           (kDeinterleaveLut[(code >> 56) & 0xaa] << 28));
+  *val0 = ExtractEvenBits(code);
+  *val1 = ExtractEvenBits(code >> 1);
 }
 
 // Derivation of the multiplication based interleave algorithm:
@@ -212,7 +187,7 @@ void DeinterleaveUint32(uint64 code, uint32 *val0, uint32 *val1) {
 // BM_3_InterleaveUint8                  5          5  141967960
 // BM_3_ReferenceBitInterleave3         58         58   10000000
 // BM_3_InterleaveUint8_NoTemplate      11         11   61082024
-template<int kShift>
+template <int kShift>
 static uint64 SplitFor3(uint8 x) {
   return
       ((((x * 0x0101010101010101ULL)
@@ -265,7 +240,7 @@ static inline uint8 UnsplitFor3(uint32 x) {
                * 0x00001041U) >> 14;
 }
 
-void DeinterleaveUint8(uint32 x, uint8* a, uint8* b, uint8* c) {
+void DeinterleaveUint8(uint32 x, uint8 *a, uint8 *b, uint8 *c) {
   *a = UnsplitFor3(x);
   *b = UnsplitFor3(x >> 1);
   *c = UnsplitFor3(x >> 2);
diff --git a/src/s2/util/bits/bit-interleave.h b/src/s2/util/bits/bit-interleave.h
index 27cc45b0..a86bb899 100644
--- a/src/s2/util/bits/bit-interleave.h
+++ b/src/s2/util/bits/bit-interleave.h
@@ -46,7 +46,8 @@ void DeinterleaveUint32(uint64 code, uint32 *val0, uint32 *val1);
 uint32 InterleaveUint8(uint8 val0, uint8 val1, uint8 val2);
 
 // These functions will decode the interleaved values.
-void DeinterleaveUint8(uint32 code, uint8 *val0, uint8* val1, uint8* val2);
+void DeinterleaveUint8(uint32 code, uint8 *val0, uint8 *val1,
+                       uint8 *val2);
 
 }  // namespace util_bits
 
diff --git a/src/s2/util/bits/bits.cc b/src/s2/util/bits/bits.cc
deleted file mode 100644
index cc6da30b..00000000
--- a/src/s2/util/bits/bits.cc
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright 2002 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-//
-// Derived from code by Moses Charikar
-
-#include "s2/util/bits/bits.h"
-
-#include <cassert>
-#include "absl/numeric/int128.h"
-
-using absl::uint128;
-
-// this array gives the number of bits for any number from 0 to 255
-// (We could make these ints.  The tradeoff is size (eg does it overwhelm
-// the cache?) vs efficiency in referencing sub-word-sized array elements)
-const char Bits::num_bits[] = {
-  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 };
-
-int Bits::Count(const void *m, int num_bytes) {
-  int nbits = 0;
-  const uint8 *s = (const uint8 *) m;
-  for (int i = 0; i < num_bytes; i++)
-    nbits += num_bits[*s++];
-  return nbits;
-}
-
-int Bits::Difference(const void *m1, const void *m2, int num_bytes) {
-  int nbits = 0;
-  const uint8 *s1 = (const uint8 *) m1;
-  const uint8 *s2 = (const uint8 *) m2;
-  for (int i = 0; i < num_bytes; i++)
-    nbits += num_bits[(*s1++) ^ (*s2++)];
-  return nbits;
-}
-
-int Bits::CappedDifference(const void *m1, const void *m2,
-                           int num_bytes, int cap) {
-  int nbits = 0;
-  const uint8 *s1 = (const uint8 *) m1;
-  const uint8 *s2 = (const uint8 *) m2;
-  for (int i = 0; i < num_bytes && nbits <= cap; i++)
-    nbits += num_bits[(*s1++) ^ (*s2++)];
-  return nbits;
-}
-
-int Bits::Log2Floor_Portable(uint32 n) {
-  if (n == 0)
-    return -1;
-  int log = 0;
-  uint32 value = n;
-  for (int i = 4; i >= 0; --i) {
-    int shift = (1 << i);
-    uint32 x = value >> shift;
-    if (x != 0) {
-      value = x;
-      log += shift;
-    }
-  }
-  assert(value == 1);
-  return log;
-}
-
-int Bits::Log2Ceiling(uint32 n) {
-  int floor = Log2Floor(n);
-  if ((n & (n - 1)) == 0)              // zero or a power of two
-    return floor;
-  else
-    return floor + 1;
-}
-
-int Bits::Log2Ceiling64(uint64 n) {
-  int floor = Log2Floor64(n);
-  if ((n & (n - 1)) == 0)              // zero or a power of two
-    return floor;
-  else
-    return floor + 1;
-}
-
-int Bits::Log2Ceiling128(absl::uint128 n) {
-  int floor = Log2Floor128(n);
-  if ((n & (n - 1)) == 0)              // zero or a power of two
-    return floor;
-  else
-    return floor + 1;
-}
-
-int Bits::FindLSBSetNonZero_Portable(uint32 n) {
-  int rc = 31;
-  for (int i = 4, shift = 1 << 4; i >= 0; --i) {
-    const uint32 x = n << shift;
-    if (x != 0) {
-      n = x;
-      rc -= shift;
-    }
-    shift >>= 1;
-  }
-  return rc;
-}
-
-int Bits::CountLeadingZeros32_Portable(uint32 n) {
-  int bits = 1;
-  if (n == 0)
-    return 32;
-  if ((n >> 16) == 0) {
-    bits += 16;
-    n <<= 16;
-  }
-  if ((n >> 24) == 0) {
-    bits += 8;
-    n <<= 8;
-  }
-  if ((n >> 28) == 0) {
-    bits += 4;
-    n <<= 4;
-  }
-  if ((n >> 30) == 0) {
-    bits += 2;
-    n <<= 2;
-  }
-  return bits - (n >> 31);
-}
-
-int Bits::CountLeadingZeros64_Portable(uint64 n) {
-  return ((n >> 32)
-           ? Bits::CountLeadingZeros32_Portable(n >> 32)
-           : 32 +  Bits::CountLeadingZeros32_Portable(n));
-}
diff --git a/src/s2/util/bits/bits.h b/src/s2/util/bits/bits.h
index 59cb8cda..f1d5d26c 100644
--- a/src/s2/util/bits/bits.h
+++ b/src/s2/util/bits/bits.h
@@ -1,4 +1,4 @@
-// Copyright 2002 Google Inc. All Rights Reserved.
+// Copyright Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,730 +16,59 @@
 #ifndef S2_UTIL_BITS_BITS_H_
 #define S2_UTIL_BITS_BITS_H_
 
-//
-// Various bit-twiddling functions, all of which are static members of the Bits
-// class (making it effectively a namespace). Operands are unsigned integers.
-// Munging bits in _signed_ integers is fraught with peril! For example,
-// -5 << n has undefined behavior (for some values of n).
-//
-// Bits provide the following:
-//
-//   * Count(Ones.*|LeadingZeros.*)? . In a similar vein, there's also the
-//     Find[LM]SBSetNonZero.* family of functions. You can think of them as
-//     (trailing|leading) zero bit count + 1. Also in a similar vein,
-//     (Capped)?Difference, which count the number of one bits in foo ^ bar.
-//
-//   * ReverseBits${power_of_two}
-//
-//   * Log2(Floor|Ceiling)(NonZero)?.* - The NonZero variants have undefined
-//     behavior if argument is 0.
-//
-//   * Bytes(ContainByte(LessThan)?|AllInRange) - These scan a sequence of bytes
-//     looking for one with(out)? some property.
-//
-//   * (Get|Set|Copy)Bits
-//
-//   * GetLowBits - Extract N lowest bits from value.
-//
-// The only other thing is BitPattern, which is a trait class template (not in
-// Bits) containing a few bit patterns (which vary based on value of template
-// parameter).
-
-#include "absl/base/casts.h"
-#include "absl/numeric/int128.h"
-#if defined(__i386__) || defined(__x86_64__)
-#include <x86intrin.h>
-#endif
-
-#include <type_traits>
-
 #include "s2/base/integral_types.h"
-#include "s2/base/logging.h"
-#include "s2/base/port.h"
-#include "absl/base/macros.h"
-
-class Bits {
- public:
-  // A traits class template for unsigned integer type sizes. Primary
-  // information contained herein is corresponding (unsigned) integer type.
-  // E.g. UnsignedTypeBySize<32>::Type is uint32. Used by UnsignedType.
-  template<int size /* in bytes */>
-  struct UnsignedTypeBySize;
-
-  // Auxiliary struct for figuring out an unsigned type for a given type.
-  template<typename T> struct UnsignedType {
-    typedef typename UnsignedTypeBySize<sizeof(T)>::Type Type;
-  };
-
-  // Return the number of one bits in the given integer.
-  static int CountOnesInByte(unsigned char n);
-
-  static int CountOnes(uint32 n) {
-#if defined(__powerpc64__) && defined(__GNUC__)
-    // Use popcount builtin if we know it is inlined and fast.
-    return PopcountWithBuiltin(n);
-#elif (defined(__i386__) || defined(__x86_64__)) && defined(__POPCNT__) && \
-    defined(__GNUC__)
-    return PopcountWithBuiltin(n);
-#else
-    n -= ((n >> 1) & 0x55555555);
-    n = ((n >> 2) & 0x33333333) + (n & 0x33333333);
-    return static_cast<int>((((n + (n >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24);
-#endif
-  }
-
-  // Count bits using sideways addition [WWG'57]. See Knuth TAOCP v4 7.1.3(59)
-  static inline int CountOnes64(uint64 n) {
-#if defined(__powerpc64__) && defined(__GNUC__)
-    return PopcountWithBuiltin(n);
-#elif defined(__x86_64__) && defined(__POPCNT__) && defined(__GNUC__)
-    return PopcountWithBuiltin(n);
-#elif defined(_LP64)
-    n -= (n >> 1) & 0x5555555555555555ULL;
-    n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
-    return static_cast<int>(
-        (((n + (n >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
-#else
-    return CountOnes(n >> 32) + CountOnes(n & 0xffffffff);
-#endif
-  }
-
-  // Count bits in uint128
-  static inline int CountOnes128(absl::uint128 n) {
-    return Bits::CountOnes64(absl::Uint128High64(n)) +
-           Bits::CountOnes64(absl::Uint128Low64(n));
-  }
-
-  // Count leading zeroes.  This is similar to wordsize - 1 - floor(log2(n)).
-  // Returns number of bits if n is 0.
-  static inline int CountLeadingZeros32(uint32 n) {
-    // Instead of using __builtin_clz(), we explicitly write target specific
-    // assembly because we want to handle n == 0.  If we used __builtin_clz(),
-    // we would need to use something like "n ? __builtin_clz(n) : 32".  The
-    // check is not necessary on POWER and aarch64 but we cannot depend on
-    // that because __builtin_clz(0) is documented to be undefined.
-#if defined(__aarch64__) && defined(__GNUC__)
-    int32 count;
-    asm("clz %w0,%w1" : "=r"(count) : "r"(n));
-    return count;
-#elif (defined(__i386__) || defined(__x86_64__)) && defined(__LZCNT__) && \
-    defined(__GNUC__)
-    return __lzcnt32(n);
-#elif (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
-    if (n == 0) return 32;
-    int32 idx;
-    asm("bsr %1, %0"
-        : "=r"(idx)
-        : "ro"(n)
-        : "cc");              // bsr writes Z flag
-    return 31 ^ idx;
-#elif defined(__powerpc64__) && defined(__GNUC__)
-    int32 count;
-    asm("cntlzw %0,%1" : "=r"(count) : "r"(n));
-    return count;
-#elif defined(__GNUC__)
-    return CountLeadingZerosWithBuiltin(n);
+#include "absl/base/optimization.h"
+#include "absl/numeric/bits.h"
+
+// abseil only exposed ABSL_ASSUME in abseil/abseil-cpp@231c393 on 2022-03-16.
+// This is not in an LTS as of 2022-04.  Use either ABSL_ASSUME or
+// ABSL_INTERNAL_ASSUME, depending on which is available.
+// TODO: Remove this and just use ABSL_ASSUME when it is in an LTS release.
+// TODO: Further investigate whether ASSUME is needed at all, or if clang
+// and gcc can prove the argument is non-zero where these are used.
+#if defined(ABSL_ASSUME)
+#define S2_ASSUME(cond) ABSL_ASSUME(cond)
+#elif defined(ABSL_INTERNAL_ASSUME)
+#define S2_ASSUME(cond) ABSL_INTERNAL_ASSUME(cond)
 #else
-    return CountLeadingZeros32_Portable(n);
-#endif
-  }
-
-  static inline int CountLeadingZeros64(uint64 n) {
-#if defined(__aarch64__) && defined(__GNUC__)
-    int64 count;
-    asm("clz %0,%1" : "=r"(count) : "r"(n));
-    return static_cast<int>(count);
-#elif defined(__powerpc64__) && defined(__GNUC__)
-    int64 count;
-    asm("cntlzd %0,%1" : "=r"(count) : "r"(n));
-    return static_cast<int>(count);
-#elif (defined(__i386__) || defined(__x86_64__)) && defined(__LZCNT__) && \
-    defined(__GNUC__)
-    return __lzcnt64(n);
-#elif defined(__x86_64__) && defined(__GNUC__)
-    if (n == 0) return 64;
-    int64 idx;
-    asm ("bsr %1, %0"
-         : "=r"(idx)
-         : "ro"(n)
-         : "cc");              // bsr writes Z flag
-    return static_cast<int>(63 ^ idx);
-#elif defined(__GNUC__)
-    return CountLeadingZerosWithBuiltin(n);
-#else
-    return CountLeadingZeros64_Portable(n);
-#endif
-  }
-
-  static inline int CountLeadingZeros128(absl::uint128 n) {
-    if (uint64 hi = absl::Uint128High64(n))
-      return Bits::CountLeadingZeros64(hi);
-    return Bits::CountLeadingZeros64(absl::Uint128Low64(n)) + 64;
-  }
-
-  // Reverse the bits in the given integer.
-  static uint8 ReverseBits8(uint8 n);
-  static uint32 ReverseBits32(uint32 n);
-  static uint64 ReverseBits64(uint64 n);
-  static absl::uint128 ReverseBits128(absl::uint128 n);
-
-  // Return the number of one bits in the byte sequence.
-  static int Count(const void *m, int num_bytes);
-
-  // Return the number of different bits in the given byte sequences.
-  // (i.e., the Hamming distance)
-  static int Difference(const void *m1, const void *m2, int num_bytes);
-
-  // Return the number of different bits in the given byte sequences,
-  // up to a maximum.  Values larger than the maximum may be returned
-  // (because multiple bits are checked at a time), but the function
-  // may exit early if the cap is exceeded.
-  static int CappedDifference(const void *m1, const void *m2,
-                              int num_bytes, int cap);
-
-  // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
-  static int Log2Floor(uint32 n);
-  static int Log2Floor64(uint64 n);
-  static int Log2Floor128(absl::uint128 n);
-
-  // Potentially faster version of Log2Floor() that returns an
-  // undefined value if n == 0
-  static int Log2FloorNonZero(uint32 n);
-  static int Log2FloorNonZero64(uint64 n);
-  static int Log2FloorNonZero128(absl::uint128 n);
-
-  // Return ceiling(log2(n)) for positive integer n.  Returns -1 iff n == 0.
-  static int Log2Ceiling(uint32 n);
-  static int Log2Ceiling64(uint64 n);
-  static int Log2Ceiling128(absl::uint128 n);
-
-  // Return the first set least / most significant bit, 0-indexed.  Returns an
-  // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
-  // that it's 0-indexed, while FindMSBSetNonZero() is the same as
-  // Log2FloorNonZero().
-  static int FindLSBSetNonZero(uint32 n);
-  static int FindLSBSetNonZero64(uint64 n);
-  static int FindLSBSetNonZero128(absl::uint128 n);
-  static int FindMSBSetNonZero(uint32 n) { return Log2FloorNonZero(n); }
-  static int FindMSBSetNonZero64(uint64 n) { return Log2FloorNonZero64(n); }
-  static int FindMSBSetNonZero128(absl::uint128 n) {
-    return Log2FloorNonZero128(n);
-  }
-
-  // Viewing bytes as a stream of unsigned bytes, does that stream
-  // contain any byte equal to c?
-  template <class T> static bool BytesContainByte(T bytes, uint8 c);
-
-  // Viewing bytes as a stream of unsigned bytes, does that stream
-  // contain any byte b < c?
-  template <class T> static bool BytesContainByteLessThan(T bytes, uint8 c);
-
-  // Viewing bytes as a stream of unsigned bytes, are all elements of that
-  // stream in [lo, hi]?
-  template <class T> static bool BytesAllInRange(T bytes, uint8 lo, uint8 hi);
-
-  // Extract 'nbits' consecutive bits from 'src'.  Position of bits are
-  // specified by 'offset' from the LSB.  'T' is a scalar type (integral,
-  // float or pointer) whose size is the same as one of the unsigned types.
-  // The return type is an unsigned type having the same size as T.
-  template<typename T>
-  static typename UnsignedType<T>::Type GetBits(const T src,
-                                                const int offset,
-                                                const int nbits) {
-    typedef typename UnsignedType<T>::Type UnsignedT;
-    const UnsignedT unsigned_src = absl::bit_cast<UnsignedT>(src);
-    S2_DCHECK_GT(sizeof(UnsignedT) * 8, offset);
-    S2_DCHECK_GE(sizeof(UnsignedT) * 8, offset + nbits);
-    return GetBitsImpl(unsigned_src, offset, nbits);
-  }
-
-  // Overwrite 'nbits' consecutive bits of 'dest.'.  Position of bits are
-  // specified by an offset from the LSB.  'T' is a scalar type (integral,
-  // float or pointer) whose size is the same as one of the unsigned types.
-  template<typename T>
-  static void SetBits(const typename UnsignedType<T>::Type value,
-                      const int offset,
-                      const int nbits,
-                      T* const dest) {
-    typedef typename UnsignedType<T>::Type UnsignedT;
-    const UnsignedT unsigned_dest = absl::bit_cast<UnsignedT>(*dest);
-    S2_DCHECK_GT(sizeof(UnsignedT) * 8, offset);
-    S2_DCHECK_GE(sizeof(UnsignedT) * 8, offset + nbits);
-    const UnsignedT mask = NBitsFromLSB<UnsignedT>(nbits);
-    const UnsignedT unsigned_result =
-        (unsigned_dest & ~(mask << offset)) | ((value & mask) << offset);
-    *dest = absl::bit_cast<T>(unsigned_result);
-  }
-
-  // Combine SetBits and GetBits for convenience.  This is meant to be a
-  // replacement for BitCopy() for some use cases.  Unlike BitCopy(),
-  // Bits::CopyBits() operating on multibyte types has the same behavior on
-  // big-endian and little-endian machines. Sample usage:
-  //
-  // uint32 a, b;
-  // Bits::CopyBits(&a, 0, b, 12, 3);
-  template<typename DestType, typename SrcType>
-  static void CopyBits(DestType* const dest,
-                       const int dest_offset,
-                       const SrcType src,
-                       const int src_offset,
-                       const int nbits) {
-    const typename UnsignedType<SrcType>::Type value =
-        GetBits(src, src_offset, nbits);
-    SetBits(value, dest_offset, nbits, dest);
-  }
-
-  // Extract the lowest 'nbits' consecutive bits from 'src'.
-  // Bits::GetLowBits(13, 3); /* = 5 (0b1101 => 0b101) */
-  template<typename T>
-  static typename UnsignedType<T>::Type GetLowBits(const T src,
-                                                   const int nbits) {
-    typedef typename UnsignedType<T>::Type UnsignedT;
-    const UnsignedT unsigned_src = absl::bit_cast<UnsignedT>(src);
-    S2_DCHECK_GE(sizeof(UnsignedT) * 8, nbits);
-    return GetLowBitsImpl(unsigned_src, nbits);
-  }
-
- private:
-  // We only use this for unsigned types and for 0 <= n <= sizeof(UnsignedT).
-  template<typename UnsignedT>
-  static UnsignedT NBitsFromLSB(const int nbits) {
-    const UnsignedT all_ones = ~static_cast<UnsignedT>(0);
-    return nbits == 0 ? static_cast<UnsignedT>(0)
-                      : all_ones >> (sizeof(UnsignedT) * 8 - nbits);
-  }
-
-  template<typename UnsignedT>
-  static inline UnsignedT GetBitsImpl(const UnsignedT src,
-                                      const int offset,
-                                      const int nbits);
-  template <typename UnsignedT>
-  static inline UnsignedT GetLowBitsImpl(const UnsignedT src, const int nbits);
-
-#ifdef __GNUC__
-  static int CountLeadingZerosWithBuiltin(unsigned n);
-  // NOLINTNEXTLINE(runtime/int)
-  static int CountLeadingZerosWithBuiltin(unsigned long n);
-  // NOLINTNEXTLINE(runtime/int)
-  static int CountLeadingZerosWithBuiltin(unsigned long long n);
-  static int PopcountWithBuiltin(unsigned n);
-  static int PopcountWithBuiltin(unsigned long n);       // NOLINT(runtime/int)
-  static int PopcountWithBuiltin(unsigned long long n);  // NOLINT(runtime/int)
-#if defined(__BMI__) && (defined(__i386__) || defined(__x86_64__))
-  static inline uint32 GetBitsImpl(const uint32 src,
-                                   const int offset,
-                                   const int nbits);
-#endif
-#if defined(__BMI__) && defined(__x86_64__)
-  static inline uint64 GetBitsImpl(const uint64 src,
-                                   const int offset,
-                                   const int nbits);
-#endif
-#if defined(__BMI2__) && (defined(__i386__) || defined(__x86_64__))
-  static inline uint32 GetLowBitsImpl(const uint32 src, const int nbits);
+#error "abseil-cpp must provide ABSL_ASSUME or ABSL_INTERNAL_ASSUME, what version are you using?"
 #endif
-#if defined(__BMI2__) && defined(__x86_64__)
-  static inline uint64 GetLowBitsImpl(const uint64 src, const int nbits);
-#endif
-#endif  // __GNUC__
-
-  // Portable implementations.
-  static int Log2Floor_Portable(uint32 n);
-  static int Log2Floor64_Portable(uint64 n);
-  static int Log2FloorNonZero_Portable(uint32 n);
-  static int Log2FloorNonZero64_Portable(uint64 n);
-  static int CountLeadingZeros32_Portable(uint32 n);
-  static int CountLeadingZeros64_Portable(uint64 n);
-  static int FindLSBSetNonZero_Portable(uint32 n);
-  static int FindLSBSetNonZero64_Portable(uint64 n);
-
-  static const char num_bits[];
-  Bits(Bits const&) = delete;
-  void operator=(Bits const&) = delete;
-};
-
-// A utility class for some handy bit patterns.  The names l and h
-// were chosen to match Knuth Volume 4: l is 0x010101... and h is 0x808080...;
-// half_ones is ones in the lower half only.  We assume sizeof(T) is 1 or even.
-template <class T> struct BitPattern {
-  typedef typename std::make_unsigned<T>::type U;
-  static const U half_ones = (static_cast<U>(1) << (sizeof(U) * 4)) - 1;
-  static const U l =
-      (sizeof(U) == 1) ? 1 : (half_ones / 0xff * (half_ones + 2));
-  static const U h = ~(l * 0x7f);
-};
-
-// ------------------------------------------------------------------------
-// Implementation details follow
-// ------------------------------------------------------------------------
 
-#if defined(__GNUC__)
+// Use namespace because this used to be a static class.
+namespace Bits {
 
-inline int Bits::Log2Floor(uint32 n) {
-  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+inline int FindLSBSetNonZero(uint32 n) {
+  S2_ASSUME(n != 0);
+  return absl::countr_zero(n);
 }
 
-inline int Bits::Log2FloorNonZero(uint32 n) {
-  return 31 ^ __builtin_clz(n);
+inline int FindLSBSetNonZero64(uint64 n) {
+  S2_ASSUME(n != 0);
+  return absl::countr_zero(n);
 }
 
-inline int Bits::FindLSBSetNonZero(uint32 n) {
-  return __builtin_ctz(n);
+inline int Log2FloorNonZero(uint32 n) {
+  S2_ASSUME(n != 0);
+  return absl::bit_width(n) - 1;
 }
 
-inline int Bits::Log2Floor64(uint64 n) {
-  return n == 0 ? -1 : 63 ^ __builtin_clzll(n);
+inline int Log2FloorNonZero64(uint64 n) {
+  S2_ASSUME(n != 0);
+  return absl::bit_width(n) - 1;
 }
 
-inline int Bits::Log2FloorNonZero64(uint64 n) {
-  return 63 ^ __builtin_clzll(n);
-}
-
-inline int Bits::FindLSBSetNonZero64(uint64 n) {
-  return __builtin_ctzll(n);
-}
-
-#elif defined(_MSC_VER)
-
-inline int Bits::FindLSBSetNonZero(uint32 n) {
-  return Bits::FindLSBSetNonZero_Portable(n);
-}
-
-inline int Bits::FindLSBSetNonZero64(uint64 n) {
-  return Bits::FindLSBSetNonZero64_Portable(n);
-}
-
-inline int Bits::Log2FloorNonZero(uint32 n) {
-#ifdef _M_IX86
-  _asm {
-    bsr ebx, n
-    mov n, ebx
-  }
-  return n;
-#else
-  return Bits::Log2FloorNonZero_Portable(n);
-#endif
-}
-
-inline int Bits::Log2Floor(uint32 n) {
-#ifdef _M_IX86
-  _asm {
-    xor ebx, ebx
-    mov eax, n
-    and eax, eax
-    jz return_ebx
-    bsr ebx, eax
-return_ebx:
-    mov n, ebx
-  }
-  return n;
-#else
-  return Bits::Log2Floor_Portable(n);
-#endif
-}
-
-inline int Bits::Log2Floor64(uint64 n) {
-  return Bits::Log2Floor64_Portable(n);
-}
-
-inline int Bits::Log2FloorNonZero64(uint64 n) {
-  return Bits::Log2FloorNonZero64_Portable(n);
-}
+inline int FindMSBSetNonZero(uint32 n) { return Log2FloorNonZero(n); }
+inline int FindMSBSetNonZero64(uint64 n) { return Log2FloorNonZero64(n); }
 
-#else  // !__GNUC__ && !_MSC_VER
-
-inline int Bits::Log2Floor(uint32 n) {
-  return Bits::Log2Floor_Portable(n);
-}
-
-inline int Bits::Log2FloorNonZero(uint32 n) {
-  return Bits::Log2FloorNonZero_Portable(n);
-}
-
-inline int Bits::FindLSBSetNonZero(uint32 n) {
-  return Bits::FindLSBSetNonZero_Portable(n);
-}
-
-inline int Bits::Log2Floor64(uint64 n) {
-  return Bits::Log2Floor64_Portable(n);
-}
-
-inline int Bits::Log2FloorNonZero64(uint64 n) {
-  return Bits::Log2FloorNonZero64_Portable(n);
-}
-
-inline int Bits::FindLSBSetNonZero64(uint64 n) {
-  return Bits::FindLSBSetNonZero64_Portable(n);
-}
-
-#endif
-
-inline int Bits::Log2Floor128(absl::uint128 n) {
-  if (uint64 hi = absl::Uint128High64(n)) return 64 + Log2FloorNonZero64(hi);
-  return Log2Floor64(absl::Uint128Low64(n));
-}
-
-inline int Bits::Log2FloorNonZero128(absl::uint128 n) {
-  if (uint64 hi = absl::Uint128High64(n)) return 64 + Log2FloorNonZero64(hi);
-  return Log2FloorNonZero64(absl::Uint128Low64(n));
-}
-
-inline int Bits::FindLSBSetNonZero128(absl::uint128 n) {
-  if (uint64 lo = absl::Uint128Low64(n)) return Bits::FindLSBSetNonZero64(lo);
-  return 64 + Bits::FindLSBSetNonZero64(absl::Uint128High64(n));
-}
-
-inline int Bits::CountOnesInByte(unsigned char n) {
-  return num_bits[n];
-}
-
-inline uint8 Bits::ReverseBits8(unsigned char n) {
-#if defined(__aarch64__) && defined(__GNUC__)
-  // aarch64 has a reverse bits instruction but there is no gcc builtin.
-  uint32 result;
-  const uint32 n_shifted = static_cast<uint32>(n) << 24;
-  asm("rbit %w0, %w1" : "=r"(result) : "r"(n_shifted));
-  return static_cast<uint8>(result);
-#elif defined (__powerpc64__)
-  uint64 temp = n;
-  // bpermd selects a byte's worth of bits from its second input. Grab one byte
-  // at a time, in reversed order. 0x3f is the lowest order bit of a 64-bit int.
-  // Bits 0x0 through 0x37 will all be zero, and bits 0x38 through 0x3f will
-  // hold the 8 bits from `n`.
-  uint64 result = __builtin_bpermd(0x3f3e3d3c3b3a3938, temp);
-  return static_cast<unsigned char>(result);
-#else
-  n = static_cast<unsigned char>(((n >> 1) & 0x55) | ((n & 0x55) << 1));
-  n = static_cast<unsigned char>(((n >> 2) & 0x33) | ((n & 0x33) << 2));
-  return static_cast<unsigned char>(((n >> 4) & 0x0f)  | ((n & 0x0f) << 4));
-#endif
-}
-
-inline uint32 Bits::ReverseBits32(uint32 n) {
-#if defined(__aarch64__) && defined(__GNUC__)
-  uint32 result;
-  asm("rbit %w0, %w1" : "=r"(result) : "r"(n));
-  return result;
-#elif defined(__powerpc64__)
-  uint64 temp = n;
-  uint64 result_0 = __builtin_bpermd(0x3f3e3d3c3b3a3938, temp) << 24;
-  uint64 result_1 = __builtin_bpermd(0x3736353433323130, temp) << 16;
-  uint64 result_2 = __builtin_bpermd(0x2f2e2d2c2b2a2928, temp) << 8;
-  uint64 result_3 = __builtin_bpermd(0x2726252423222120, temp);
-  return static_cast<uint32>(result_0 | result_1 | result_2 | result_3);
-#else
-  n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1);
-  n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2);
-  n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4);
-  return bswap_32(n);
-#endif
-}
-
-inline uint64 Bits::ReverseBits64(uint64 n) {
-#if defined(__aarch64__) && defined(__GNUC__)
-  uint64 result;
-  asm("rbit %0, %1" : "=r"(result) : "r"(n));
-  return result;
-#elif defined(__powerpc64__)
-  uint64 result_lo0 = __builtin_bpermd(0x3f3e3d3c3b3a3938, n) << 56;
-  uint64 result_lo1 = __builtin_bpermd(0x3736353433323130, n) << 48;
-  uint64 result_lo2 = __builtin_bpermd(0x2f2e2d2c2b2a2928, n) << 40;
-  uint64 result_lo3 = __builtin_bpermd(0x2726252423222120, n) << 32;
-  uint64 result_hi0 = __builtin_bpermd(0x1f1e1d1c1b1a1918, n) << 24;
-  uint64 result_hi1 = __builtin_bpermd(0x1716151413121110, n) << 16;
-  uint64 result_hi2 = __builtin_bpermd(0x0f0e0d0c0b0a0908, n) << 8;
-  uint64 result_hi3 = __builtin_bpermd(0x0706050403020100, n);
-  return (result_lo0 | result_lo1 | result_lo2 | result_lo3 |
-          result_hi0 | result_hi1 | result_hi2 | result_hi3);
-#elif defined(_LP64)
-  n = ((n >> 1) & 0x5555555555555555ULL) | ((n & 0x5555555555555555ULL) << 1);
-  n = ((n >> 2) & 0x3333333333333333ULL) | ((n & 0x3333333333333333ULL) << 2);
-  n = ((n >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((n & 0x0F0F0F0F0F0F0F0FULL) << 4);
-  return bswap_64(n);
-#else
-  return ReverseBits32( n >> 32 ) |
-         (static_cast<uint64>(ReverseBits32(n &  0xffffffff)) << 32);
-#endif
-}
-
-inline absl::uint128 Bits::ReverseBits128(absl::uint128 n) {
-  return absl::MakeUint128(ReverseBits64(absl::Uint128Low64(n)),
-                           ReverseBits64(absl::Uint128High64(n)));
-}
-
-inline int Bits::Log2FloorNonZero_Portable(uint32 n) {
-  // Just use the common routine
-  return Log2Floor(n);
-}
-
-// Log2Floor64() is defined in terms of Log2Floor32(), Log2FloorNonZero32()
-inline int Bits::Log2Floor64_Portable(uint64 n) {
-  const uint32 topbits = static_cast<uint32>(n >> 32);
-  if (topbits == 0) {
-    // Top bits are zero, so scan in bottom bits
-    return Log2Floor(static_cast<uint32>(n));
+inline int Log2Ceiling(uint32 n) {
+  int floor = absl::bit_width(n) - 1;
+  if ((n & (n - 1)) == 0) {  // zero or a power of two
+    return floor;
   } else {
-    return 32 + Log2FloorNonZero(topbits);
+    return floor + 1;
   }
 }
 
-// Log2FloorNonZero64() is defined in terms of Log2FloorNonZero32()
-inline int Bits::Log2FloorNonZero64_Portable(uint64 n) {
-  const uint32 topbits = static_cast<uint32>(n >> 32);
-  if (topbits == 0) {
-    // Top bits are zero, so scan in bottom bits
-    return Log2FloorNonZero(static_cast<uint32>(n));
-  } else {
-    return 32 + Log2FloorNonZero(topbits);
-  }
-}
-
-// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero()
-inline int Bits::FindLSBSetNonZero64_Portable(uint64 n) {
-  const uint32 bottombits = static_cast<uint32>(n);
-  if (bottombits == 0) {
-    // Bottom bits are zero, so scan in top bits
-    return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
-  } else {
-    return FindLSBSetNonZero(bottombits);
-  }
-}
-
-template <class T>
-inline bool Bits::BytesContainByteLessThan(T bytes, uint8 c) {
-  auto l = BitPattern<T>::l;
-  auto h = BitPattern<T>::h;
-  // The c <= 0x80 code is straight out of Knuth Volume 4.
-  // Usually c will be manifestly constant.
-  return c <= 0x80 ?
-      ((h & (bytes - l * c) & ~bytes) != 0) :
-      ((((bytes - l * c) | (bytes ^ h)) & h) != 0);
-}
-
-template <class T> inline bool Bits::BytesContainByte(T bytes, uint8 c) {
-  // Usually c will be manifestly constant.
-  return Bits::BytesContainByteLessThan<T>(bytes ^ (c * BitPattern<T>::l), 1);
-}
-
-template <class T>
-inline bool Bits::BytesAllInRange(T bytes, uint8 lo, uint8 hi) {
-  auto l = BitPattern<T>::l;
-  auto h = BitPattern<T>::h;
-  // In the common case, lo and hi are manifest constants.
-  if (lo > hi) {
-    return false;
-  }
-  if (hi - lo < 128) {
-    auto x = bytes - l * lo;
-    auto y = bytes + l * (127 - hi);
-    return ((x | y) & h) == 0;
-  }
-  return !Bits::BytesContainByteLessThan(bytes + (255 - hi) * l,
-                                         lo + (255 - hi));
-}
-
-// Specializations for Bits::UnsignedTypeBySize.  For unsupported type
-// sizes, a compile-time error will be generated.
-template<>
-struct Bits::UnsignedTypeBySize<1> {
-  typedef uint8 Type;
-};
-
-template<>
-struct Bits::UnsignedTypeBySize<2> {
-  typedef uint16 Type;
-};
-
-template<>
-struct Bits::UnsignedTypeBySize<4> {
-  typedef uint32 Type;
-};
-
-template<>
-struct Bits::UnsignedTypeBySize<8> {
-  typedef uint64 Type;
-};
-
-template<>
-struct Bits::UnsignedTypeBySize<16> {
-  typedef absl::uint128 Type;
-};
-
-#ifdef __GNUC__
-inline int Bits::CountLeadingZerosWithBuiltin(unsigned n) {
-  if (n == 0) {
-    return sizeof(n) * 8;  // __builtin_clz(0) is undefined.
-  }
-  return __builtin_clz(n);
-}
-// NOLINTNEXTLINE(runtime/int)
-inline int Bits::CountLeadingZerosWithBuiltin(unsigned long n) {
-  if (n == 0) {
-    return sizeof(n) * 8;  // __builtin_clzl(0) is undefined.
-  }
-  return __builtin_clzl(n);
-}
-// NOLINTNEXTLINE(runtime/int)
-inline int Bits::CountLeadingZerosWithBuiltin(unsigned long long n) {
-  if (n == 0) {
-    return sizeof(n) * 8;  // __builtin_clzll(0) is undefined.
-  }
-  return __builtin_clzll(n);
-}
-
-inline int Bits::PopcountWithBuiltin(unsigned n) {
-  return __builtin_popcount(n);
-}
-// NOLINTNEXTLINE(runtime/int)
-inline int Bits::PopcountWithBuiltin(unsigned long n) {
-  return __builtin_popcountl(n);
-}
-// NOLINTNEXTLINE(runtime/int)
-inline int Bits::PopcountWithBuiltin(unsigned long long n) {
-  return __builtin_popcountll(n);
-}
-
-#if defined(__BMI__) && (defined(__i386__) || defined(__x86_64__))
-inline uint32 Bits::GetBitsImpl(const uint32 src,
-                                const int offset,
-                                const int nbits) {
-  return _bextr_u32(src, offset, nbits);
-}
-#endif
-
-#if defined(__BMI__) && defined(__x86_64__)
-inline uint64 Bits::GetBitsImpl(const uint64 src,
-                                const int offset,
-                                const int nbits) {
-  return _bextr_u64(src, offset, nbits);
-}
-#endif
-
-#if defined(__BMI2__) && (defined(__i386__) || defined(__x86_64__))
-inline uint32 Bits::GetLowBitsImpl(const uint32 src, const int nbits) {
-  return _bzhi_u32(src, nbits);
-}
-#endif
-
-#if defined(__BMI2__) && defined(__x86_64__)
-inline uint64 Bits::GetLowBitsImpl(const uint64 src, const int nbits) {
-  return _bzhi_u64(src, nbits);
-}
-#endif
-
-#endif  // __GNUC__
-
-template<typename UnsignedT>
-inline UnsignedT Bits::GetBitsImpl(const UnsignedT src,
-                                   const int offset,
-                                   const int nbits) {
-  const UnsignedT result = (src >> offset) & NBitsFromLSB<UnsignedT>(nbits);
-  return result;
-}
-
-template<typename UnsignedT>
-inline UnsignedT Bits::GetLowBitsImpl(const UnsignedT src, const int nbits) {
-  return GetBitsImpl(src, 0, nbits);
-}
+}  // namespace Bits
 
 #endif  // S2_UTIL_BITS_BITS_H_
diff --git a/src/s2/util/coding/coder.cc b/src/s2/util/coding/coder.cc
index ef3e67b4..157968e0 100644
--- a/src/s2/util/coding/coder.cc
+++ b/src/s2/util/coding/coder.cc
@@ -18,33 +18,51 @@
 
 #include "s2/util/coding/coder.h"
 
-#include <algorithm>
 #include <cassert>
 
+#include <algorithm>
+#include <cstdint>
+#include <tuple>
+#include <utility>
+
+#include "absl/utility/utility.h"
+
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
+#include "s2/base/port.h"
 
-// An initialization value used when we are allowed to
-unsigned char Encoder::kEmptyBuffer = 0;
+Encoder::Encoder(Encoder&& other)
+    : buf_(absl::exchange(other.buf_, nullptr)),
+      limit_(absl::exchange(other.limit_, nullptr)),
+      underlying_buffer_(absl::exchange(other.underlying_buffer_, nullptr)),
+      orig_(absl::exchange(other.orig_, nullptr)) {}
 
-Encoder::Encoder()
-  : underlying_buffer_(&kEmptyBuffer) {
+Encoder& Encoder::operator=(Encoder&& other) {
+  if (this == &other) return *this;
+  if (ensure_allowed()) DeleteBuffer(underlying_buffer_, capacity());
+  buf_ = absl::exchange(other.buf_, nullptr);
+  limit_ = absl::exchange(other.limit_, nullptr);
+  underlying_buffer_ = absl::exchange(other.underlying_buffer_, nullptr);
+  orig_ = absl::exchange(other.orig_, nullptr);
+  return *this;
 }
 
 Encoder::~Encoder() {
   S2_CHECK_LE(buf_, limit_);  // Catch the buffer overflow.
-  if (underlying_buffer_ != &kEmptyBuffer) {
-    std::allocator<unsigned char>().deallocate(
-        underlying_buffer_, limit_ - orig_);
-  }
+  if (ensure_allowed()) DeleteBuffer(underlying_buffer_, capacity());
 }
 
-int Encoder::varint32_length(uint32 v) {
-  return Varint::Length32(v);
+int Encoder::varint32_length(uint32 v) { return Varint::Length32(v); }
+
+int Encoder::varint64_length(uint64 v) { return Varint::Length64(v); }
+
+std::pair<unsigned char*, size_t> Encoder::NewBuffer(size_t size) {
+  auto* p = std::allocator<unsigned char>().allocate(size);
+  return {p, size};
 }
 
-int Encoder::varint64_length(uint64 v) {
-  return Varint::Length64(v);
+void Encoder::DeleteBuffer(unsigned char* buf, size_t size) {
+  std::allocator<unsigned char>().deallocate(buf, size);
 }
 
 void Encoder::EnsureSlowPath(size_t N) {
@@ -54,14 +72,16 @@ void Encoder::EnsureSlowPath(size_t N) {
 
   // Double buffer size, but make sure we always have at least N extra bytes
   const size_t current_len = length();
-  const size_t new_capacity = std::max(current_len + N, 2 * current_len);
-
-  unsigned char* new_buffer = std::allocator<unsigned char>().allocate(
-      new_capacity);
-  memcpy(new_buffer, underlying_buffer_, current_len);
-  if (underlying_buffer_ != &kEmptyBuffer) {
-    std::allocator<unsigned char>().deallocate(
-        underlying_buffer_, limit_ - orig_);
+  // Used in opensource; avoid structured bindings (a C++17 feature) to
+  // remain C++11-compatible.  b/210097200
+  unsigned char* new_buffer;
+  size_t new_capacity;
+  std::tie(new_buffer, new_capacity) =
+      NewBuffer(std::max(current_len + N, 2 * current_len));
+
+  if (underlying_buffer_) {
+    memcpy(new_buffer, underlying_buffer_, current_len);
+    DeleteBuffer(underlying_buffer_, capacity());
   }
   underlying_buffer_ = new_buffer;
 
@@ -71,11 +91,6 @@ void Encoder::EnsureSlowPath(size_t N) {
   S2_CHECK(avail() >= N);
 }
 
-void Encoder::RemoveLast(size_t N) {
-  S2_CHECK(length() >= N);
-  buf_ -= N;
-}
-
 void Encoder::Resize(size_t N) {
   S2_CHECK(length() >= N);
   buf_ = orig_ + N;
diff --git a/src/s2/util/coding/coder.h b/src/s2/util/coding/coder.h
index b8d77a93..ee104e9c 100644
--- a/src/s2/util/coding/coder.h
+++ b/src/s2/util/coding/coder.h
@@ -22,13 +22,21 @@
 
 #include <cstring>
 
+#include <cstdint>
+#include <cstring>
+#include <utility>
+
 // Avoid adding expensive includes here.
+#include "absl/base/attributes.h"
+#include "absl/base/macros.h"
+#include "absl/meta/type_traits.h"
+#include "absl/numeric/int128.h"
+#include "absl/utility/utility.h"
+
 #include "s2/base/casts.h"
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "s2/base/port.h"
-#include "absl/base/macros.h"
-#include "absl/meta/type_traits.h"
 #include "s2/util/coding/varint.h"
 #include "s2/util/endian/endian.h"
 
@@ -38,7 +46,13 @@ class Encoder {
  public:
   // Creates an empty Encoder with no room that is enlarged
   // (if necessary) when "Encoder::Ensure(N)" is called.
-  Encoder();
+  Encoder() = default;
+  void reset();
+
+  // Movable.
+  Encoder(Encoder&& other);
+  Encoder& operator=(Encoder&& other);
+
   ~Encoder();
 
   // Initialize encoder to encode into "buf"
@@ -47,28 +61,31 @@ class Encoder {
   void clear();
 
   // Encoding routines.  Note that these do not check bounds
-  void put8(unsigned char v);
-  void put16(uint16 v);
-  void put32(uint32 v);
-  void put64(uint64 v);
-  void putn(const void* mem, size_t n);
+  void put8(unsigned char v) { writer().put8(v); }
+  void put16(uint16 v) { writer().put16(v); }
+  void put32(uint32 v) { writer().put32(v); }
+  void put64(uint64 v) { writer().put64(v); }
+  void put128(absl::uint128 v) { writer().put128(v); }
+  void putn(const void* mem, size_t n) { writer().putn(mem, n); }
 
   // Put no more than n bytes, stopping when c is put.
-  void putcn(const void* mem, int c, size_t n);
+  void putcn(const void* mem, int c, size_t n) { writer().putcn(mem, c, n); }
 
-  void puts(const void* mem);                // put a c-string including \0
-  void puts_without_null(const char* mem);   // put a c-string without \0
-  void putfloat(float f);
-  void putdouble(double d);
+  // put a c-string including \0.
+  void puts(const void* mem) { writer().puts(mem); }
+  // put a c-string without \0.
+  void puts_without_null(const char* mem) { writer().puts_no_null(mem); }
+  void putfloat(float f) { writer().put32(absl::bit_cast<uint32>(f)); }
+  void putdouble(double d) { writer().put64(absl::bit_cast<uint64>(d)); }
 
   // Support for variable length encoding with 7 bits per byte
   // (these are just simple wrappers around the Varint module)
-  static const int kVarintMax32 = Varint::kMax32;
-  static const int kVarintMax64 = Varint::kMax64;
+  static constexpr int kVarintMax32 = Varint::kMax32;
+  static constexpr int kVarintMax64 = Varint::kMax64;
 
-  void put_varint32(uint32 v);
-  void put_varint32_inline(uint32 v);
-  void put_varint64(uint64 v);
+  void put_varint32(uint32 v) { writer().put_varint32(v); }
+  void put_varint32_inline(uint32 v) { writer().put_varint32_inline(v); }
+  void put_varint64(uint64 v) { writer().put_varint64(v); }
   static int varint32_length(uint32 v);  // Length of var encoding of "v"
   static int varint64_length(uint64 v);  // Length of var encoding of "v"
 
@@ -90,7 +107,11 @@ class Encoder {
   // Return number of bytes of space remaining in buffer
   size_t avail() const;
 
-  // REQUIRES: Encoder was created with the 0-argument constructor interface.
+  // Return capacity of buffer.
+  size_t capacity() const { return limit_ - orig_; }
+
+  // REQUIRES: Encoder was created with the 0-argument constructor or 0-argument
+  // reset().
   //
   // This interface ensures that at least "N" more bytes are available
   // in the underlying buffer by resizing the buffer (if necessary).
@@ -102,24 +123,75 @@ class Encoder {
   void Ensure(size_t N);
 
   // Returns true if Ensure is allowed to be called on "this"
-  bool ensure_allowed() const { return underlying_buffer_ != nullptr; }
+  bool ensure_allowed() const { return orig_ == underlying_buffer_; }
 
   // Return ptr to start of encoded data.  This pointer remains valid
   // until reset or Ensure is called.
   const char* base() const { return reinterpret_cast<const char*>(orig_); }
 
-  // Advances the write pointer by "N" bytes.
-  void skip(size_t N) { buf_ += N; }
+  // Advances the write pointer by "N" bytes. It returns the position of the
+  // pointer before the skip (in other words start of the skipped bytes).
+  char* skip(ptrdiff_t N) { return writer().skip(N); }
 
   // REQUIRES: length() >= N
   // Removes the last N bytes out of the encoded buffer
-  void RemoveLast(size_t N);
+  void RemoveLast(size_t N) { writer().skip(-static_cast<ptrdiff_t>(N)); }
 
   // REQUIRES: length() >= N
   // Removes the last length()-N bytes to make the encoded buffer have length N
   void Resize(size_t N);
 
  private:
+  // All encoding operations are done through the Writer. This avoids aliasing
+  // between `buf_` and `this` which allows the compiler to avoid reloading
+  // `buf_` repeatedly. See https://godbolt.org/z/zM36s3ded.
+  struct Writer {
+    Encoder* enc;
+    char* p;
+
+    explicit Writer(Encoder* e)
+        : enc(e), p(reinterpret_cast<char*>(enc->buf_)) {}
+
+    ~Writer() {
+      enc->buf_ = reinterpret_cast<unsigned char*>(p);
+      S2_DCHECK_GE(enc->buf_, enc->orig_);
+      S2_DCHECK_LE(enc->buf_, enc->limit_);
+    }
+
+    char* skip(ptrdiff_t N) { return absl::exchange(p, p + N); }
+
+    void put8(unsigned char v) { *p++ = v; }
+    void put16(uint16 v) { LittleEndian::Store16(skip(2), v); }
+    void put32(uint32 v) { LittleEndian::Store32(skip(4), v); }
+    void put64(uint64 v) { LittleEndian::Store64(skip(8), v); }
+    void put128(absl::uint128 v) { LittleEndian::Store128(skip(16), v); }
+    void putn(const void* src, size_t n) { memcpy(skip(n), src, n); }
+    void putcn(const void* src, int c, size_t n) {
+      auto* o = p;
+      p = static_cast<char*>(memccpy(p, src, c, n));
+      if (p == nullptr) p = o + n;
+    }
+    void puts(const void* src) { putcn(src, '\0', enc->avail()); }
+    void puts_no_null(const char* mem) {
+      auto* l = reinterpret_cast<char*>(enc->limit_);
+      while (*mem != '\0' && p < l) *p++ = *mem++;
+    }
+    ABSL_ATTRIBUTE_ALWAYS_INLINE void put_varint32(uint32 v) {
+      p = Varint::Encode32(p, v);
+    }
+    ABSL_ATTRIBUTE_ALWAYS_INLINE void put_varint32_inline(uint32 v) {
+      p = Varint::Encode32Inline(p, v);
+    }
+    ABSL_ATTRIBUTE_ALWAYS_INLINE void put_varint64(uint64 v) {
+      p = Varint::Encode64(p, v);
+    }
+  };
+
+  Writer writer() { return Writer(this); }
+
+  static std::pair<unsigned char*, size_t> NewBuffer(size_t size);
+  static void DeleteBuffer(unsigned char* buf, size_t size);
+
   void EnsureSlowPath(size_t N);
 
   // Puts varint64 from decoder for varint64 sizes from 3 ~ 10. This is less
@@ -134,21 +206,14 @@ class Encoder {
   // limits_ points just past the last allocated byte in the orig_ buffer.
   unsigned char* limit_ = nullptr;
 
-
-  // If this Encoder owns its buffer, underlying_buffer_ is non-nullptr
-  // and the Encoder is allowed to resize it when Ensure() is called.
+  // If this Encoder owns its buffer, underlying_buffer_ == orig_ (note this is
+  // also the case when both are nullptr). The Encoder is allowed to resize it
+  // when Ensure() is called.
   unsigned char* underlying_buffer_ = nullptr;
 
-  // orig_ points to the start of the encoding buffer,
-  // whether or not the Encoder owns it.
+  // orig_ points to the start of the encoding buffer, whether or not the
+  // Encoder owns it.
   unsigned char* orig_ = nullptr;
-
-  static unsigned char kEmptyBuffer;
-
-#ifndef SWIG
-  Encoder(Encoder const&) = delete;
-  void operator=(Encoder const&) = delete;
-#endif  // SWIG
 };
 
 /* Class for decoding data from a memory buffer */
@@ -172,6 +237,7 @@ class Decoder {
   uint16 get16();
   uint32 get32();
   uint64 get64();
+  absl::uint128 get128();
   float  getfloat();
   double getdouble();
   void   getn(void* mem, size_t n);
@@ -179,7 +245,8 @@ class Decoder {
                                                // stopping after c is got
   void   gets(void* mem, size_t n);            // get a c-string no more than
                                                // n bytes. always appends '\0'
-  void   skip(ptrdiff_t n);
+  const char* skip(ptrdiff_t n);
+  ABSL_DEPRECATED("use skip(0) instead")
   unsigned char const* ptr() const;  // Return ptr to current position in buffer
 
   // "get_varint" actually checks bounds
@@ -217,14 +284,17 @@ inline Encoder::Encoder(void* b, size_t maxn) :
     limit_(reinterpret_cast<unsigned char*>(b) + maxn),
     orig_(reinterpret_cast<unsigned char*>(b)) { }
 
+inline void Encoder::reset() {
+  if (ensure_allowed()) DeleteBuffer(underlying_buffer_, capacity());
+  orig_ = underlying_buffer_ = limit_ = buf_ = nullptr;
+}
+
 inline void Encoder::reset(void* b, size_t maxn) {
-  orig_ = buf_ = reinterpret_cast<unsigned char*>(b);
-  limit_ = orig_ + maxn;
   // Can't use the underlying buffer anymore
-  if (underlying_buffer_ != &kEmptyBuffer) {
-    delete[] underlying_buffer_;
-  }
+  if (ensure_allowed()) DeleteBuffer(underlying_buffer_, capacity());
   underlying_buffer_ = nullptr;
+  orig_ = buf_ = reinterpret_cast<unsigned char*>(b);
+  limit_ = orig_ + maxn;
 }
 
 inline void Encoder::clear() {
@@ -240,7 +310,7 @@ inline void Encoder::Ensure(size_t N) {
 
 inline size_t Encoder::length() const {
   S2_DCHECK_GE(buf_, orig_);
-  S2_CHECK_LE(buf_, limit_);  // Catch the buffer overflow.
+  S2_DCHECK_LE(buf_, limit_);
   return buf_ - orig_;
 }
 
@@ -249,43 +319,6 @@ inline size_t Encoder::avail() const {
   return limit_ - buf_;
 }
 
-inline void Encoder::putn(const void* src, size_t n) {
-  memcpy(buf_, src, n);
-  buf_ += n;
-}
-
-inline void Encoder::putcn(const void* src, int c, size_t n) {
-  unsigned char *old = buf_;
-  buf_ = static_cast<unsigned char *>(memccpy(buf_, src, c, n));
-  if (buf_ == nullptr)
-    buf_ = old + n;
-}
-
-inline void Encoder::puts(const void* src) {
-  putcn(src, '\0', avail());
-}
-
-inline void Encoder::puts_without_null(const char* mem) {
-  while (*mem != '\0' && buf_ < limit_) {
-    *buf_++ = *mem++;
-  }
-}
-
-inline void Encoder::put_varint32(uint32 v) {
-  buf_ = reinterpret_cast<unsigned char*>
-         (Varint::Encode32(reinterpret_cast<char*>(buf_), v));
-}
-
-inline void Encoder::put_varint32_inline(uint32 v) {
-  buf_ = reinterpret_cast<unsigned char*>
-         (Varint::Encode32Inline(reinterpret_cast<char*>(buf_), v));
-}
-
-inline void Encoder::put_varint64(uint64 v) {
-  buf_ = reinterpret_cast<unsigned char*>
-         (Varint::Encode64(reinterpret_cast<char*>(buf_), v));
-}
-
 // Copies N bytes from *src to *dst then advances both pointers by N bytes.
 // Template parameter N specifies the number of bytes to copy. Passing
 // constant size results in optimized code from memcpy for the size.
@@ -443,8 +476,10 @@ inline void Decoder::gets(void* dst, size_t n) {
   getcn(dst, '\0', len);
 }
 
-inline void Decoder::skip(ptrdiff_t n) {
+inline const char* Decoder::skip(ptrdiff_t n) {
+  auto* start = reinterpret_cast<const char*>(buf_);
   buf_ += n;
+  return start;
 }
 
 inline unsigned char const* Decoder::ptr() const {
@@ -461,48 +496,7 @@ inline void DecoderExtensions::FillArray(Decoder* array, int num_decoders) {
                 "Decoder must be trivially copy-assignable");
   static_assert(absl::is_trivially_destructible<Decoder>::value,
                 "Decoder must be trivially destructible");
-
-  // on R 4.0 on Windows, this line gives install warning
-  // warning: 'void* memset(void*, int, size_t)' clearing an object of non-trivial type 'class Decoder';
-  // use assignment or value-initialization instead [-Wclass-memaccess]
-  // std::memset(array, 0, num_decoders * sizeof(Decoder));
-  // using non-optimized version as suggested above (this is not called by R code)
-  for (int i = 0; i < num_decoders; i++) {
-    Decoder* decoder = array + i;
-    decoder->reset(nullptr, 0);
-  }
-}
-
-inline void Encoder::put8(unsigned char v) {
-  S2_DCHECK_GE(avail(), sizeof(v));
-  *buf_ = v;
-  buf_ += sizeof(v);
-}
-
-inline void Encoder::put16(uint16 v) {
-  S2_DCHECK_GE(avail(), sizeof(v));
-  LittleEndian::Store16(buf_, v);
-  buf_ += sizeof(v);
-}
-
-inline void Encoder::put32(uint32 v) {
-  S2_DCHECK_GE(avail(), sizeof(v));
-  LittleEndian::Store32(buf_, v);
-  buf_ += sizeof(v);
-}
-
-inline void Encoder::put64(uint64 v) {
-  S2_DCHECK_GE(avail(), sizeof(v));
-  LittleEndian::Store64(buf_, v);
-  buf_ += sizeof(v);
-}
-
-inline void Encoder::putfloat(float f) {
-  put32(absl::bit_cast<uint32>(f));
-}
-
-inline void Encoder::putdouble(double d) {
-  put64(absl::bit_cast<uint64>(d));
+  std::memset(array, 0, num_decoders * sizeof(Decoder));
 }
 
 inline unsigned char Decoder::get8() {
@@ -529,6 +523,12 @@ inline uint64 Decoder::get64() {
   return v;
 }
 
+inline absl::uint128 Decoder::get128() {
+  const absl::uint128 v = LittleEndian::Load128(buf_);
+  buf_ += sizeof(v);
+  return v;
+}
+
 inline float Decoder::getfloat() {
   return absl::bit_cast<float>(get32());
 }
diff --git a/src/s2/util/coding/varint.cc b/src/s2/util/coding/varint.cc
index 7e6d96b2..cad8443b 100644
--- a/src/s2/util/coding/varint.cc
+++ b/src/s2/util/coding/varint.cc
@@ -16,15 +16,12 @@
 
 #include "s2/util/coding/varint.h"
 
+#include <cstdint>
 #include <string>
+#include <utility>
 
 #include "s2/base/integral_types.h"
-
-#ifndef _MSC_VER
-const int Varint::kMax32;
-const int Varint::kMax64;
-const int Varint::kSlopBytes;
-#endif
+#include "s2/base/logging.h"
 
 char* Varint::Encode32(char* sptr, uint32 v) {
   return Encode32Inline(sptr, v);
@@ -58,68 +55,106 @@ const char* Varint::Parse32Fallback(const char* ptr, uint32* OUTPUT) {
   return Parse32FallbackInline(ptr, OUTPUT);
 }
 
+#if defined(__x86_64__)
+
+std::pair<const char*, uint64> Varint::Parse64FallbackPair(const char* p,
+                                                             int64 res1) {
+  // The algorithm relies on sign extension to set all high bits when the varint
+  // continues. This way it can use "and" to aggregate in to the result.
+  auto ptr = reinterpret_cast<const int8*>(p);
+  // However this requires the low bits after shifting to be 1's as well. On
+  // x86_64 a shld from a single register filled with enough 1's in the high
+  // bits can accomplish all this in one instruction. It so happens that res1
+  // has 57 high bits of ones, which is enough for the largest shift done.
+  S2_DCHECK_EQ(res1 >> 7, -1);
+  uint64 ones = res1;  // save the useful high bit 1's in res1
+  uint64 byte;
+  int64 res2, res3;
+#define SHLD(n) byte = ((byte << (n * 7)) | (ones >> (64 - (n * 7))))
+  int sign_bit;
+  // Micro benchmarks show a substantial improvement to capture the sign
+  // of the result in the case of just assigning the result of the shift
+  // (ie first 2 steps).
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+#define SHLD_SIGN(n)                  \
+  asm("shldq %3, %2, %1"              \
+      : "=@ccs"(sign_bit), "+r"(byte) \
+      : "r"(ones), "i"(n * 7))
+#else
+#define SHLD_SIGN(n)                         \
+  do {                                       \
+    SHLD(n);                                 \
+    sign_bit = static_cast<int64>(byte) < 0; \
+  } while (0)
+#endif
+  byte = ptr[1];
+  SHLD_SIGN(1);
+  res2 = byte;
+  if (!sign_bit) goto done2;
+  byte = ptr[2];
+  SHLD_SIGN(2);
+  res3 = byte;
+  if (!sign_bit) goto done3;
+  byte = ptr[3];
+  SHLD(3);
+  res1 &= byte;
+  if (res1 >= 0) goto done4;
+  byte = ptr[4];
+  SHLD(4);
+  res2 &= byte;
+  if (res2 >= 0) goto done5;
+  byte = ptr[5];
+  SHLD(5);
+  res3 &= byte;
+  if (res3 >= 0) goto done6;
+  byte = ptr[6];
+  SHLD(6);
+  res1 &= byte;
+  if (res1 >= 0) goto done7;
+  byte = ptr[7];
+  SHLD(7);
+  res2 &= byte;
+  if (res2 >= 0) goto done8;
+  byte = ptr[8];
+  SHLD(8);
+  res3 &= byte;
+  if (res3 >= 0) goto done9;
+  byte = ptr[9];
+  // Last byte only contains 0 or 1 for valid 64bit varints. If it's 0 it's
+  // a denormalized varint that shouldn't happen. The continuation bit of byte
+  // 9 has already the right value hence just expect byte to be 1.
+  if (ABSL_PREDICT_TRUE(byte == 1)) goto done10;
+  if (byte == 0) {
+    res3 ^= static_cast<uint64>(1) << 63;
+    goto done10;
+  }
+
+  return {nullptr, 0};  // Value is too long to be a varint64
+
+#define DONE(n) done##n : return {p + n, res1 & res2 & res3};
+done2:
+  return {p + 2, res1 & res2};
+  DONE(3)
+  DONE(4)
+  DONE(5)
+  DONE(6)
+  DONE(7)
+  DONE(8)
+  DONE(9)
+  DONE(10)
+#undef DONE
+}
+
+#endif  // defined(__x86_64__)
+
 const char* Varint::Parse64Fallback(const char* p, uint64* OUTPUT) {
   const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
   assert(*ptr >= 128);
-#if defined(__x86_64__)
-  // This approach saves one redundant operation on the last byte (masking a
-  // byte that doesn't need it). This is conditional on x86 because:
-  // - PowerPC has specialized bit instructions that make masking and
-  //   shifting very efficient
-  // - x86 seems to be one of the few architectures that has a single
-  //   instruction to add 3 values.
-  //
-  // e.g.
-  // Input: 0xff, 0x40
-  // Mask & Or calculates: (0xff & 0x7f) | ((0x40 & 0x7f) << 7) = 0x207f
-  // Sub1 & Add calculates: 0xff         + ((0x40    - 1) << 7) = 0x207f
-  //
-  // The subtract one removes the bit set by the previous byte used to
-  // indicate that more bytes are present. It also has the potential to
-  // allow instructions like LEA to combine 2 adds into one instruction.
-  //
-  // E.g. on an x86 architecture, %rcx = %rax + (%rbx - 1) << 7 could be
-  // emitted as:
-  //   shlq $7, %rbx
-  //   leaq -0x80(%rax, %rbx), %rcx
-  //
   // Fast path: need to accumulate data in upto three result fragments
   //    res1    bits 0..27
   //    res2    bits 28..55
   //    res3    bits 56..63
-
-  uint64 byte, res1, res2 = 0, res3 = 0;
-  byte = *(ptr++); res1 = byte;
-  byte = *(ptr++); res1 += (byte - 1) <<  7; if (byte < 128) goto done1;
-  byte = *(ptr++); res1 += (byte - 1) << 14; if (byte < 128) goto done1;
-  byte = *(ptr++); res1 += (byte - 1) << 21; if (byte < 128) goto done1;
-
-  byte = *(ptr++); res2 = byte;              if (byte < 128) goto done2;
-  byte = *(ptr++); res2 += (byte - 1) <<  7; if (byte < 128) goto done2;
-  byte = *(ptr++); res2 += (byte - 1) << 14; if (byte < 128) goto done2;
-  byte = *(ptr++); res2 += (byte - 1) << 21; if (byte < 128) goto done2;
-
-  byte = *(ptr++); res3 = byte;              if (byte < 128) goto done3;
-  byte = *(ptr++); res3 += (byte - 1) <<  7; if (byte < 2) goto done3;
-
-  return nullptr;       // Value is too long to be a varint64
-
- done1:
-  assert(res2 == 0);
-  assert(res3 == 0);
-  *OUTPUT = res1;
-  return reinterpret_cast<const char*>(ptr);
-
- done2:
-  assert(res3 == 0);
-  *OUTPUT = res1 + ((res2 - 1) << 28);
-  return reinterpret_cast<const char*>(ptr);
-
- done3:
-  *OUTPUT = res1 + ((res2 - 1) << 28) + ((res3 - 1) << 56);
-  return reinterpret_cast<const char*>(ptr);
-#else
-  uint32 byte, res1, res2=0, res3=0;
+  uint32 byte, res1, res2 = 0, res3 = 0;
   byte = *(ptr++); res1 = byte & 127;
   byte = *(ptr++); res1 |= (byte & 127) <<  7; if (byte < 128) goto done1;
   byte = *(ptr++); res1 |= (byte & 127) << 14; if (byte < 128) goto done1;
@@ -147,46 +182,42 @@ const char* Varint::Parse64Fallback(const char* p, uint64* OUTPUT) {
   return reinterpret_cast<const char*>(ptr);
 
  done3:
-  *OUTPUT = res1 | (uint64(res2) << 28) | (uint64(res3) << 56);
-  return reinterpret_cast<const char*>(ptr);
-#endif
-}
-
-const char* Varint::Parse32BackwardSlow(const char* ptr, const char* base,
-                                        uint32* OUTPUT) {
-  // Since this method is rarely called, for simplicity, we just skip backward
-  // and then parse forward.
-  const char* prev = Skip32BackwardSlow(ptr, base);
-  if (prev == nullptr)
-    return nullptr; // no value before 'ptr'
-
-  Parse32(prev, OUTPUT);
-  return prev;
-}
-
-const char* Varint::Parse64BackwardSlow(const char* ptr, const char* base,
-                                        uint64* OUTPUT) {
-  // Since this method is rarely called, for simplicity, we just skip backward
-  // and then parse forward.
-  const char* prev = Skip64BackwardSlow(ptr, base);
-  if (prev == nullptr)
-    return nullptr; // no value before 'ptr'
-
-  Parse64(prev, OUTPUT);
-  return prev;
-}
-
-const char* Varint::Parse64WithLimit(const char* p,
-                                     const char* l,
-                                     uint64* OUTPUT) {
-  if (p + kMax64 <= l) {
-    return Parse64(p, OUTPUT);
-  } else {
-    // See detailed comment in Varint::Parse64Fallback about this general
-    // approach.
-    const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
-    const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
-    uint64 b, result;
+   *OUTPUT = res1 | (uint64(res2) << 28) | (uint64(res3) << 56);
+   return reinterpret_cast<const char*>(ptr);
+ }
+
+ const char* Varint::Parse32BackwardSlow(const char* ptr, const char* base,
+                                         uint32* OUTPUT) {
+   // Since this method is rarely called, for simplicity, we just skip backward
+   // and then parse forward.
+   const char* prev = Skip32BackwardSlow(ptr, base);
+   if (prev == nullptr) return nullptr;  // no value before 'ptr'
+
+   Parse32(prev, OUTPUT);
+   return prev;
+ }
+
+ const char* Varint::Parse64BackwardSlow(const char* ptr, const char* base,
+                                         uint64* OUTPUT) {
+   // Since this method is rarely called, for simplicity, we just skip backward
+   // and then parse forward.
+   const char* prev = Skip64BackwardSlow(ptr, base);
+   if (prev == nullptr) return nullptr;  // no value before 'ptr'
+
+   Parse64(prev, OUTPUT);
+   return prev;
+ }
+
+ const char* Varint::Parse64WithLimit(const char* p, const char* l,
+                                      uint64* OUTPUT) {
+   if (p + kMax64 <= l) {
+     return Parse64(p, OUTPUT);
+   } else {
+     // See detailed comment in Varint::Parse64Fallback about this general
+     // approach.
+     const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
+     const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
+     uint64 b, result;
 #if defined(__x86_64__)
     if (ptr >= limit) return nullptr;
     b = *(ptr++); result = b;              if (b < 128) goto done;
@@ -236,7 +267,7 @@ const char* Varint::Parse64WithLimit(const char* p,
     *OUTPUT = result;
     return reinterpret_cast<const char*>(ptr);
   }
-}
+ }
 
 const char* Varint::Skip32BackwardSlow(const char* p, const char* b) {
   const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
@@ -278,12 +309,14 @@ const char* Varint::Skip64BackwardSlow(const char* p, const char* b) {
 
 void Varint::Append32Slow(std::string* s, uint32 value) {
   const size_t start = s->size();
-  s->resize(start + Varint::Length32(value));
+  s->resize(
+                                             start + Varint::Length32(value));
   Varint::Encode32(&((*s)[start]), value);
 }
 
 void Varint::Append64Slow(std::string* s, uint64 value) {
   const size_t start = s->size();
-  s->resize(start + Varint::Length64(value));
+  s->resize(
+                                             start + Varint::Length64(value));
   Varint::Encode64(&((*s)[start]), value);
 }
diff --git a/src/s2/util/coding/varint.h b/src/s2/util/coding/varint.h
index 04726e4a..606c2137 100644
--- a/src/s2/util/coding/varint.h
+++ b/src/s2/util/coding/varint.h
@@ -33,7 +33,10 @@
 // Avoid adding expensive includes here.
 #include <cassert>
 #include <cstddef>
+
+#include <cstdint>
 #include <string>
+#include <utility>
 
 #include "s2/base/integral_types.h"
 #include "s2/base/port.h"
@@ -44,11 +47,11 @@
 class Varint {
  public:
   // Maximum lengths of varint encoding of uint32 and uint64
-  static const int kMax32 = 5;
-  static const int kMax64 = 10;
+  static constexpr int kMax32 = 5;
+  static constexpr int kMax64 = 10;
 
   // The decoder does not read past the end of the encoded data.
-  static const int kSlopBytes = 0;
+  static constexpr int kSlopBytes = 0;
 
   // REQUIRES   "ptr" points to a buffer of length at least kMaxXX
   // EFFECTS    Scan next varint from "ptr" and store in OUTPUT.
@@ -130,12 +133,6 @@ class Varint {
   //            encoded in a single byte.
   ABSL_DEPRECATED("Use TwoValuesVarint::Encode32.")
   static void EncodeTwo32Values(std::string* s, uint32 a, uint32 b);
-  ABSL_DEPRECATED("Use TwoValuesVarint::Decode32.")
-  static const char* DecodeTwo32Values(const char* ptr, uint32* a, uint32* b);
-  ABSL_DEPRECATED("Use TwoValuesVarint::Decode32WithLimit.")
-  static const char* DecodeTwo32ValuesWithLimit(const char* ptr,
-                                                const char* limit, uint32* a,
-                                                uint32* b);
 
   // Decode and sum up a sequence of deltas until the sum >= goal.
   // It is significantly faster than calling ParseXXInline in a loop.
@@ -151,13 +148,17 @@ class Varint {
   //            goal is positive and fit into a signed int64.
   // EFFECTS    Returns a pointer just past last read byte.
   //            "out" stores the actual sum.
-  static const char* FastDecodeDeltas(const char* ptr, int64 goal, int64* out);
+  static const char* FastDecodeDeltas(const char* ptr, int64 goal,
+                                      int64* out);
 
  private:
   static const char* Parse32FallbackInline(const char* p, uint32* val);
   static const char* Parse32Fallback(const char* p, uint32* val);
+#if defined(__x86_64__)
+  static std::pair<const char*, uint64> Parse64FallbackPair(const char* p,
+                                                              int64 res1);
+#endif
   static const char* Parse64Fallback(const char* p, uint64* val);
-
   static char* Encode32Fallback(char* ptr, uint32 v);
 
   static const char* Parse32BackwardSlow(const char* ptr, const char* base,
@@ -169,7 +170,6 @@ class Varint {
 
   static void Append32Slow(std::string* s, uint32 value);
   static void Append64Slow(std::string* s, uint64 value);
-
 };
 
 /***** Implementation details; clients should ignore *****/
@@ -189,31 +189,31 @@ inline const char* Varint::Parse32FallbackInline(const char* p,
  done:
   *OUTPUT = result;
   return reinterpret_cast<const char*>(ptr);
-}
-
-inline const char* Varint::Parse32(const char* p, uint32* OUTPUT) {
-  // Fast path for inlining
-  const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
-  uint32 byte = *ptr;
-  if (byte < 128) {
-    *OUTPUT = byte;
-    return reinterpret_cast<const char*>(ptr) + 1;
-  } else {
-    return Parse32Fallback(p, OUTPUT);
-  }
-}
-
-inline const char* Varint::Parse32Inline(const char* p, uint32* OUTPUT) {
-  // Fast path for inlining
-  const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
-  uint32 byte = *ptr;
-  if (byte < 128) {
-    *OUTPUT = byte;
-    return reinterpret_cast<const char*>(ptr) + 1;
-  } else {
-    return Parse32FallbackInline(p, OUTPUT);
-  }
-}
+ }
+
+ inline const char* Varint::Parse32(const char* p, uint32* OUTPUT) {
+   // Fast path for inlining
+   const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
+   uint32 byte = *ptr;
+   if (byte < 128) {
+     *OUTPUT = byte;
+     return reinterpret_cast<const char*>(ptr) + 1;
+   } else {
+     return Parse32Fallback(p, OUTPUT);
+   }
+ }
+
+ inline const char* Varint::Parse32Inline(const char* p, uint32* OUTPUT) {
+   // Fast path for inlining
+   const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
+   uint32 byte = *ptr;
+   if (byte < 128) {
+     *OUTPUT = byte;
+     return reinterpret_cast<const char*>(ptr) + 1;
+   } else {
+     return Parse32FallbackInline(p, OUTPUT);
+   }
+ }
 
 inline const char* Varint::Skip32(const char* p) {
   const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
@@ -266,8 +266,7 @@ inline const char* Varint::Skip32Backward(const char* p, const char* base) {
   }
 }
 
-inline const char* Varint::Parse32WithLimit(const char* p,
-                                            const char* l,
+inline const char* Varint::Parse32WithLimit(const char* p, const char* l,
                                             uint32* OUTPUT) {
   // Version with bounds checks.
   // This formerly had an optimization to inline the non-bounds checking Parse32
@@ -289,9 +288,21 @@ inline const char* Varint::Parse32WithLimit(const char* p,
  done:
   *OUTPUT = result;
   return reinterpret_cast<const char*>(ptr);
-}
-
-inline const char* Varint::Parse64(const char* p, uint64* OUTPUT) {
+ }
+
+ inline const char* Varint::Parse64(const char* p, uint64* OUTPUT) {
+#if defined(__x86_64__)
+  auto ptr = reinterpret_cast<const int8*>(p);
+  int64 byte = *ptr;
+  if (byte >= 0) {
+    *OUTPUT = static_cast<uint64>(byte);
+    return reinterpret_cast<const char*>(ptr) + 1;
+  } else {
+    auto tmp = Parse64FallbackPair(p, byte);
+    if (ABSL_PREDICT_TRUE(tmp.first)) *OUTPUT = tmp.second;
+    return tmp.first;
+  }
+#else
   const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
   uint32 byte = *ptr;
   if (byte < 128) {
@@ -300,7 +311,8 @@ inline const char* Varint::Parse64(const char* p, uint64* OUTPUT) {
   } else {
     return Parse64Fallback(p, OUTPUT);
   }
-}
+#endif
+ }
 
 inline const char* Varint::Skip64(const char* p) {
   const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
@@ -384,7 +396,7 @@ inline int Varint::Length32(uint32 v) {
   // Use an explicit multiplication to implement the divide of
   // a number in the 1..31 range.
   // Explicit OR 0x1 to handle v == 0.
-  uint32 log2value = Bits::Log2FloorNonZero(v | 0x1);
+  uint32 log2value = static_cast<uint32>(Bits::Log2FloorNonZero(v | 0x1));
   return static_cast<int>((log2value * 9 + 73) / 64);
 }
 
@@ -393,7 +405,7 @@ inline int Varint::Length64(uint64 v) {
   // Use an explicit multiplication to implement the divide of
   // a number in the 1..63 range.
   // Explicit OR 0x1 to handle v == 0.
-  uint32 log2value = Bits::Log2FloorNonZero64(v | 0x1);
+  uint32 log2value = static_cast<uint32>(Bits::Log2FloorNonZero64(v | 0x1));
   return static_cast<int>((log2value * 9 + 73) / 64);
 }
 
@@ -402,7 +414,7 @@ inline void Varint::Append32(std::string* s, uint32 value) {
   // file for the full version. The size<capacity check is so the compiler can
   // optimize out the string resize code.
   if (value < 128 && s->size() < s->capacity()) {
-    s->push_back((unsigned char)value);
+    s->push_back(static_cast<unsigned char>(value));
   } else {
     Append32Slow(s, value);
   }
@@ -413,7 +425,7 @@ inline void Varint::Append64(std::string* s, uint64 value) {
   // file for the full version. The size<capacity check is so the compiler can
   // optimize out the string resize code.
   if (value < 128 && s->size() < s->capacity()) {
-    s->push_back((unsigned char)value);
+    s->push_back(static_cast<unsigned char>(value));
   } else {
     Append64Slow(s, value);
   }
@@ -427,22 +439,22 @@ inline char* Varint::Encode32Inline(char* sptr, uint32 v) {
     *(ptr++) = static_cast<uint8>(v);
   } else if (v < (1<<14)) {
     *(ptr++) = static_cast<uint8>(v | B);
-    *(ptr++) = static_cast<uint8>(v>>7);
+    *(ptr++) = static_cast<uint8>(v >> 7);
   } else if (v < (1<<21)) {
     *(ptr++) = static_cast<uint8>(v | B);
-    *(ptr++) = static_cast<uint8>((v>>7) | B);
-    *(ptr++) = static_cast<uint8>(v>>14);
+    *(ptr++) = static_cast<uint8>((v >> 7) | B);
+    *(ptr++) = static_cast<uint8>(v >> 14);
   } else if (v < (1<<28)) {
     *(ptr++) = static_cast<uint8>(v | B);
-    *(ptr++) = static_cast<uint8>((v>>7) | B);
-    *(ptr++) = static_cast<uint8>((v>>14) | B);
-    *(ptr++) = static_cast<uint8>(v>>21);
+    *(ptr++) = static_cast<uint8>((v >> 7) | B);
+    *(ptr++) = static_cast<uint8>((v >> 14) | B);
+    *(ptr++) = static_cast<uint8>(v >> 21);
   } else {
     *(ptr++) = static_cast<uint8>(v | B);
-    *(ptr++) = static_cast<uint8>((v>>7) | B);
-    *(ptr++) = static_cast<uint8>((v>>14) | B);
-    *(ptr++) = static_cast<uint8>((v>>21) | B);
-    *(ptr++) = static_cast<uint8>(v>>28);
+    *(ptr++) = static_cast<uint8>((v >> 7) | B);
+    *(ptr++) = static_cast<uint8>((v >> 14) | B);
+    *(ptr++) = static_cast<uint8>((v >> 21) | B);
+    *(ptr++) = static_cast<uint8>(v >> 28);
   }
   return reinterpret_cast<char*>(ptr);
 }
@@ -450,11 +462,10 @@ inline char* Varint::Encode32Inline(char* sptr, uint32 v) {
 #if (-1 >> 1) != -1
 #error FastDecodeDeltas() needs right-shift to sign-extend.
 #endif
-inline const char* Varint::FastDecodeDeltas(const char* ptr,
-                                            int64 goal,
+inline const char* Varint::FastDecodeDeltas(const char* ptr, int64 goal,
                                             int64* out) {
   int64 value;
-  int64 sum = - goal;
+  int64 sum = -goal;
   int64 shift = 0;
   // Make decoding faster by eliminating unpredictable branching.
   do {
diff --git a/src/s2/util/endian/endian.h b/src/s2/util/endian/endian.h
index 6d33851c..858ec32a 100644
--- a/src/s2/util/endian/endian.h
+++ b/src/s2/util/endian/endian.h
@@ -25,62 +25,32 @@
 #define S2_UTIL_ENDIAN_ENDIAN_H_
 
 #include <cassert>
+
+#include <cstdint>
 #include <type_traits>
 
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
 #include "s2/base/port.h"
 #include "absl/base/casts.h"
+#include "absl/base/config.h"
+#include "absl/base/internal/endian.h"
+#include "absl/base/optimization.h"
 #include "absl/base/port.h"
 #include "absl/numeric/int128.h"
 
-// Use compiler byte-swapping intrinsics if they are available.  32-bit
-// and 64-bit versions are available in Clang and GCC as of GCC 4.3.0.
-// The 16-bit version is available in Clang and GCC only as of GCC 4.8.0.
-// For simplicity, we enable them all only for GCC 4.8.0 or later.
-#if defined(__clang__) || \
-    (defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || \
-                           __GNUC__ >= 5))
-
 inline uint64 gbswap_64(uint64 host_int) {
-  return __builtin_bswap64(host_int);
-}
-inline uint32 gbswap_32(uint32 host_int) {
-  return __builtin_bswap32(host_int);
-}
-inline uint16 gbswap_16(uint16 host_int) {
-  return __builtin_bswap16(host_int);
+  return absl::gbswap_64(host_int);
 }
 
-#else
-
-inline uint64 gbswap_64(uint64 host_int) {
-#if defined(__GNUC__) && defined(__x86_64__) && \
-    !(defined(__APPLE__) && defined(__MACH__))
-  // Adapted from /usr/include/byteswap.h.  Not available on Mac.
-  if (__builtin_constant_p(host_int)) {
-    return __bswap_constant_64(host_int);
-  } else {
-    uint64 result;
-    __asm__("bswap %0" : "=r" (result) : "0" (host_int));
-    return result;
-  }
-#elif defined(bswap_64)
-  return bswap_64(host_int);
-#else
-  return static_cast<uint64>(bswap_32(static_cast<uint32>(host_int >> 32))) |
-    (static_cast<uint64>(bswap_32(static_cast<uint32>(host_int))) << 32);
-#endif  // bswap_64
-}
 inline uint32 gbswap_32(uint32 host_int) {
-  return bswap_32(host_int);
+  return absl::gbswap_32(host_int);
 }
+
 inline uint16 gbswap_16(uint16 host_int) {
-  return bswap_16(host_int);
+  return absl::gbswap_16(host_int);
 }
 
-#endif  // intrinics available
-
 inline absl::uint128 gbswap_128(absl::uint128 host_int) {
   return absl::MakeUint128(gbswap_64(absl::Uint128Low64(host_int)),
                            gbswap_64(absl::Uint128High64(host_int)));
@@ -223,11 +193,11 @@ class LittleEndian {
   }
 
   // Functions to do unaligned loads and stores in little-endian order.
-  static uint16 Load16(const void *p) {
+  static uint16 Load16(const void* p) {
     return ToHost16(UNALIGNED_LOAD16(p));
   }
 
-  static void Store16(void *p, uint16 v) {
+  static void Store16(void* p, uint16 v) {
     UNALIGNED_STORE16(p, FromHost16(v));
   }
 
@@ -253,15 +223,15 @@ class LittleEndian {
 #endif
   }
 
-  static uint32 Load32(const void *p) {
+  static uint32 Load32(const void* p) {
     return ToHost32(UNALIGNED_LOAD32(p));
   }
 
-  static void Store32(void *p, uint32 v) {
+  static void Store32(void* p, uint32 v) {
     UNALIGNED_STORE32(p, FromHost32(v));
   }
 
-  static uint64 Load64(const void *p) {
+  static uint64 Load64(const void* p) {
     return ToHost64(UNALIGNED_LOAD64(p));
   }
 
@@ -276,26 +246,31 @@ class LittleEndian {
   // uint64 val = 0;
   // memcpy(&val, p, len);
   // return ToHost64(val);
-  // TODO(jyrki): write a small benchmark and benchmark the speed
-  // of a memcpy based approach.
   //
-  // For speed reasons this function does not work for len == 0.
-  // The caller needs to guarantee that 1 <= len <= 8.
-  static uint64 Load64VariableLength(const void * const p, int len) {
-    assert(len >= 1 && len <= 8);
-    const char * const buf = static_cast<const char * const>(p);
+  // The caller needs to guarantee that 0 <= len <= 8.
+  static uint64 Load64VariableLength(const void* const p, int len) {
+    ABSL_ASSUME(len >= 0 && len <= 8);
     uint64 val = 0;
-    --len;
-    do {
-      val = (val << 8) | buf[len];
-      // (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
-    } while (--len >= 0);
-    // No ToHost64(...) needed. The bytes are accessed in little-endian manner
-    // on every architecture.
+    const uint8* const src = static_cast<const uint8*>(p);
+    for (int i = 0; i < len; ++i) {
+      val |= static_cast<uint64>(src[i]) << (8 * i);
+    }
     return val;
   }
 
-  static void Store64(void *p, uint64 v) {
+  // Store the least significant 1-8 bytes of a uint64.
+  // 8 * len least significant bits are loaded from the given uint64 and written
+  // to the provided buffer in LittleEndian order. The 64 - 8 * len most
+  // significant bits are ignored.
+  //
+  // The caller needs to guarantee that 0 <= len <= 8.
+  static void Store64VariableLength(void* const p, uint64 v, int len) {
+    assert(len >= 0 && len <= 8);
+    v = FromHost64(v);
+    memcpy(p, &v, len);
+  }
+
+  static void Store64(void* p, uint64 v) {
     UNALIGNED_STORE64(p, FromHost64(v));
   }
 
@@ -418,11 +393,11 @@ class BigEndian {
   }
 
   // Functions to do unaligned loads and stores in big-endian order.
-  static uint16 Load16(const void *p) {
+  static uint16 Load16(const void* p) {
     return ToHost16(UNALIGNED_LOAD16(p));
   }
 
-  static void Store16(void *p, uint16 v) {
+  static void Store16(void* p, uint16 v) {
     UNALIGNED_STORE16(p, FromHost16(v));
   }
 
@@ -437,15 +412,15 @@ class BigEndian {
     *data = static_cast<uint8>(v >> 16);
   }
 
-  static uint32 Load32(const void *p) {
+  static uint32 Load32(const void* p) {
     return ToHost32(UNALIGNED_LOAD32(p));
   }
 
-  static void Store32(void *p, uint32 v) {
+  static void Store32(void* p, uint32 v) {
     UNALIGNED_STORE32(p, FromHost32(v));
   }
 
-  static uint64 Load64(const void *p) {
+  static uint64 Load64(const void* p) {
     return ToHost64(UNALIGNED_LOAD64(p));
   }
 
@@ -460,26 +435,33 @@ class BigEndian {
   // uint64 val = 0;
   // memcpy(&val, p, len);
   // return ToHost64(val);
-  // TODO(jyrki): write a small benchmark and benchmark the speed
-  // of a memcpy based approach.
   //
-  // For speed reasons this function does not work for len == 0.
-  // The caller needs to guarantee that 1 <= len <= 8.
-
-  static uint64 Load64VariableLength(const void * const p, int len) {
-    //    uint64 val = LittleEndian::Load64VariableLength(p, len);
-    //    return Load64(&val) >> (8*(8-len));
-    assert(len >= 1 && len <= 8);
-    const char* buf = static_cast<const char * const>(p);
+  // The caller needs to guarantee that 0 <= len <= 8.
+  static uint64 Load64VariableLength(const void* const p, int len) {
+    assert(len >= 0 && len <= 8);
     uint64 val = 0;
-    do {
-      val = (val << 8) | *buf;
-      ++buf;
-    } while (--len > 0);
+    const uint8* const src = static_cast<const uint8*>(p);
+    for (int i = 0; i < 8; ++i) {
+      if (i < len) {
+        val = (val << 8) | src[i];
+      }
+    }
     return val;
   }
 
-  static void Store64(void *p, uint64 v) {
+  // Store the least significant 1-8 bytes of a uint64.
+  // 8 * len least significant bits are loaded from the given uint64 and written
+  // to the provided buffer in BigEndian order. The 64 - 8 * len most
+  // significant bits are ignored.
+  //
+  // The caller needs to guarantee that 0 <= len <= 8.
+  static void Store64VariableLength(void* const p, uint64 v, int len) {
+    assert(len >= 0 && len <= 8);
+    v = FromHost64(v);
+    memcpy(p, reinterpret_cast<uint8*>(&v) + sizeof(uint64) - len, len);
+  }
+
+  static void Store64(void* p, uint64 v) {
     UNALIGNED_STORE64(p, FromHost64(v));
   }
 
@@ -567,22 +549,20 @@ typedef BigEndian NetworkByteOrder;
   struct tofromhost_value_type_traits<VTYPE> { \
     typedef VTYPE value_type;                  \
     typedef ITYPE int_type;                    \
-  };
-
-// dd: the extra semi-colons here made it harder to track down other errors with
-// -Wpedantic (because the semi-colon is already defined in the marcro)
-FROMHOST_TYPE_MAP(uint8, uint8)
-FROMHOST_TYPE_MAP(uint8, int8)
-FROMHOST_TYPE_MAP(uint16, uint16)
-FROMHOST_TYPE_MAP(uint16, int16)
-FROMHOST_TYPE_MAP(uint32, uint32)
-FROMHOST_TYPE_MAP(uint32, int32)
-FROMHOST_TYPE_MAP(uint64, uint64)
-FROMHOST_TYPE_MAP(uint64, int64)
-FROMHOST_TYPE_MAP(uint32, float)
-FROMHOST_TYPE_MAP(uint64, double)
-FROMHOST_TYPE_MAP(uint8, bool)
-FROMHOST_TYPE_MAP(absl::uint128, absl::uint128)
+  }
+
+FROMHOST_TYPE_MAP(uint8, uint8);
+FROMHOST_TYPE_MAP(uint8, int8);
+FROMHOST_TYPE_MAP(uint16, uint16);
+FROMHOST_TYPE_MAP(uint16, int16);
+FROMHOST_TYPE_MAP(uint32, uint32);
+FROMHOST_TYPE_MAP(uint32, int32);
+FROMHOST_TYPE_MAP(uint64, uint64);
+FROMHOST_TYPE_MAP(uint64, int64);
+FROMHOST_TYPE_MAP(uint32, float);
+FROMHOST_TYPE_MAP(uint64, double);
+FROMHOST_TYPE_MAP(uint8, bool);
+FROMHOST_TYPE_MAP(absl::uint128, absl::uint128);
 #undef FROMHOST_TYPE_MAP
 
 // Default implementation for the unified FromHost(ValueType) API, which
@@ -729,7 +709,8 @@ inline float LoadFloat(const char* p) {
 
 template<typename EndianClass>
 inline void StoreFloat(float value, char* p) {
-  UNALIGNED_STORE32(p, EndianClass::FromHost32(absl::bit_cast<uint32>(value)));
+  UNALIGNED_STORE32(p,
+                    EndianClass::FromHost32(absl::bit_cast<uint32>(value)));
 }
 
 template<typename EndianClass>
@@ -739,7 +720,8 @@ inline double LoadDouble(const char* p) {
 
 template<typename EndianClass>
 inline void StoreDouble(double value, char* p) {
-  UNALIGNED_STORE64(p, EndianClass::FromHost64(absl::bit_cast<uint64>(value)));
+  UNALIGNED_STORE64(p,
+                    EndianClass::FromHost64(absl::bit_cast<uint64>(value)));
 }
 
 }  // namespace endian_internal
diff --git a/src/s2/util/gtl/btree.h b/src/s2/util/gtl/btree.h
deleted file mode 100644
index e63f2169..00000000
--- a/src/s2/util/gtl/btree.h
+++ /dev/null
@@ -1,2471 +0,0 @@
-// Copyright 2007 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-//
-// A btree implementation of the STL set and map interfaces. A btree is smaller
-// and generally also faster than STL set/map (refer to the benchmarks below).
-// The red-black tree implementation of STL set/map has an overhead of 3
-// pointers (left, right and parent) plus the node color information for each
-// stored value. So a set<int32> consumes 40 bytes for each value stored in
-// 64-bit mode. This btree implementation stores multiple values on fixed
-// size nodes (usually 256 bytes) and doesn't store child pointers for leaf
-// nodes. The result is that a btree_set<int32> may use much less memory per
-// stored value. For the random insertion benchmark in btree_bench.cc, a
-// btree_set<int32> with node-size of 256 uses 5.1 bytes per stored value.
-//
-// The packing of multiple values on to each node of a btree has another effect
-// besides better space utilization: better cache locality due to fewer cache
-// lines being accessed. Better cache locality translates into faster
-// operations.
-//
-// CAVEATS
-//
-// Insertions and deletions on a btree can cause splitting, merging or
-// rebalancing of btree nodes. And even without these operations, insertions
-// and deletions on a btree will move values around within a node. In both
-// cases, the result is that insertions and deletions can invalidate iterators
-// pointing to values other than the one being inserted/deleted. Therefore, this
-// container does not provide pointer stability. This is notably different from
-// STL set/map which takes care to not invalidate iterators on insert/erase
-// except, of course, for iterators pointing to the value being erased.  A
-// partial workaround when erasing is available: erase() returns an iterator
-// pointing to the item just after the one that was erased (or end() if none
-// exists).
-
-// PERFORMANCE
-//
-// See the latest benchmark results at:
-// https://paste.googleplex.com/5549632792821760
-//
-
-#ifndef S2_UTIL_GTL_BTREE_H_
-#define S2_UTIL_GTL_BTREE_H_
-
-#include <cstddef>
-#include <cstring>
-#include <algorithm>
-#include <cassert>
-#include <functional>
-#include <iterator>
-#include <limits>
-#include <new>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-#include "s2/base/integral_types.h"
-#include "absl/base/macros.h"
-#include "absl/container/internal/compressed_tuple.h"
-#include "absl/container/internal/container_memory.h"
-#include "absl/memory/memory.h"
-#include "absl/meta/type_traits.h"
-#include "absl/strings/string_view.h"
-#include "absl/utility/utility.h"
-#include "s2/util/gtl/layout.h"
-
-namespace gtl {
-
-// A helper type used to indicate that a key-compare-to functor has been
-// provided. A key-compare-to functor compares two arguments of type value_type
-// and returns 0 if they are equal, a negative integer when the first argument
-// should be first, and a positive integer otherwise. A user can specify a
-// key-compare-to functor by doing:
-//
-//  struct MyStringComparer
-//      : public gtl::btree_key_compare_to_tag {
-//    int operator()(const string &a, const string &b) const {
-//      return a.compare(b);
-//    }
-//  };
-//
-// Note that the return type is an int and not a bool. There is a
-// static_assert which enforces this return type.
-// TODO(user): get rid of this tag and just detect whether there is an operator
-struct btree_key_compare_to_tag {};
-
-// A helper class that indicates if the Compare parameter is derived from
-// btree_key_compare_to_tag.
-template <typename Compare>
-using btree_is_key_compare_to =
-    std::is_convertible<Compare, btree_key_compare_to_tag>;
-
-namespace internal_btree {
-// A helper class used to indicate if the comparator provided is transparent
-// and thus supports heterogeneous lookups. This is only used internally to
-// check if the Compare parameter has a valid is_transparent member.
-// A transparent comparator will see lookup keys with any type (lookup_type)
-// passed by the user to any of the lookup methods. The comparator then has a
-// chance to do the comparison without first converting the lookup key to a
-// key_type.
-//
-// For example, a comparator that is transparent may look like:
-//
-//  struct MyStringComparer {
-//    bool operator()(const string &a, const string &b) const {
-//      return a < b;
-//    }
-//    bool operator()(const string &a, const char* b) const {
-//      return strcmp(a.c_str(), b) < 0;
-//    }
-//    bool operator()(const char* a, const string& b) const {
-//      return strcmp(a, b.c_str()) < 0;
-//    }
-//    using is_transparent = void;
-//  };
-//
-// Note that we need to declare operator() for both combinations of key_type and
-// lookup_type. Also note that setting is_transparent to void is an arbitrary
-// decision; it can be std::true_type, int, or anything else, just as long as
-// the member is_transparent is defined to be something.
-template <typename, typename = void>
-struct is_comparator_transparent : std::false_type {};
-template <typename Compare>
-struct is_comparator_transparent<Compare,
-                                 absl::void_t<typename Compare::is_transparent>>
-    : std::true_type {};
-
-// A helper class to convert a boolean comparison into a three-way "compare-to"
-// comparison that returns a negative value to indicate less-than, zero to
-// indicate equality and a positive value to indicate greater-than. This helper
-// class is specialized for less<string>, greater<string>, less<string_view>,
-// and greater<string_view>. The
-// key_compare_to_adapter is provided so that btree users
-// automatically get the more efficient compare-to code when using common
-// google string types with common comparison functors.
-// TODO(user): see if we can extract this logic so that it can be used with
-template <typename Compare>
-struct key_compare_to_adapter {
-  using type = Compare;
-};
-
-template <>
-struct key_compare_to_adapter<std::less<std::string>> {
-  struct type : public btree_key_compare_to_tag {
-    type() = default;
-    explicit type(const std::less<std::string> &) {}
-    int operator()(const std::string &a, const std::string &b) const {
-      return a.compare(b);
-    }
-  };
-};
-
-template <>
-struct key_compare_to_adapter<std::greater<std::string>> {
-  struct type : public btree_key_compare_to_tag {
-    type() = default;
-    explicit type(const std::greater<std::string> &) {}
-    int operator()(const std::string &a, const std::string &b) const {
-      return b.compare(a);
-    }
-  };
-};
-
-template <>
-struct key_compare_to_adapter<std::less<absl::string_view>> {
-  struct type : public btree_key_compare_to_tag {
-    type() = default;
-    explicit type(const std::less<absl::string_view> &) {}
-    int operator()(const absl::string_view a, const absl::string_view b) const {
-      return a.compare(b);
-    }
-  };
-};
-
-template <>
-struct key_compare_to_adapter<std::greater<absl::string_view>> {
-  struct type : public btree_key_compare_to_tag {
-    type() = default;
-    explicit type(const std::greater<absl::string_view> &) {}
-    int operator()(const absl::string_view a, const absl::string_view b) const {
-      return b.compare(a);
-    }
-  };
-};
-
-
-// A helper function to do a boolean comparison of two keys given a boolean
-// or key-compare-to (three-way) comparator.
-template <typename K, typename LK, typename Compare>
-bool bool_compare_keys(const Compare &comp, const K &x, const LK &y) {
-  return btree_is_key_compare_to<Compare>::value ? comp(x, y) < 0 : comp(x, y);
-}
-
-// Detects a 'absl_btree_prefer_linear_node_search' member. This is
-// a protocol used as an opt-in or opt-out of linear search.
-//
-//  For example, this would be useful for key types that wrap an integer
-//  and define their own cheap operator<(). For example:
-//
-//   class K {
-//    public:
-//     using absl_btree_prefer_linear_node_search = std::true_type;
-//     ...
-//    private:
-//     friend bool operator<(K a, K b) { return a.k_ < b.k_; }
-//     int k_;
-//   };
-//
-//   btree_map<K, V> m;  // Uses linear search
-//   assert((btree_map<K, V>::testonly_uses_linear_node_search()));
-//
-// If T has the preference tag, then it has a preference.
-// Btree will use the tag's truth value.
-template <typename T, typename = void>
-struct has_linear_node_search_preference : std::false_type {};
-template <typename T, typename = void>
-struct prefers_linear_node_search : std::false_type {};
-template <typename T>
-struct has_linear_node_search_preference<
-    T, absl::void_t<typename T::absl_btree_prefer_linear_node_search>>
-    : std::true_type {};
-template <typename T>
-struct prefers_linear_node_search<
-    T, absl::void_t<typename T::absl_btree_prefer_linear_node_search>>
-    : T::absl_btree_prefer_linear_node_search {};
-
-template <typename Key, typename Compare, typename Alloc, int TargetNodeSize,
-          int ValueSize, bool Multi>
-struct common_params {
-  // If Compare is derived from btree_key_compare_to_tag then use it as the
-  // key_compare type. Otherwise, use key_compare_to_adapter<> which will
-  // fall-back to Compare if we don't have an appropriate specialization.
-  using key_compare =
-      absl::conditional_t<btree_is_key_compare_to<Compare>::value, Compare,
-                          typename key_compare_to_adapter<Compare>::type>;
-  // A type which indicates if we have a key-compare-to functor or a plain old
-  // key-compare functor.
-  using is_key_compare_to = btree_is_key_compare_to<key_compare>;
-
-  using allocator_type = Alloc;
-  using key_type = Key;
-  using size_type = std::make_signed<size_t>::type;
-  using difference_type = ptrdiff_t;
-
-  // True if this is a multiset or multimap.
-  using is_multi_container = std::integral_constant<bool, Multi>;
-
-  enum {
-    kTargetNodeSize = TargetNodeSize,
-
-    // Upper bound for the available space for values. This is largest for leaf
-    // nodes, which have overhead of at least a pointer + 4 bytes (for storing
-    // 3 field_types and an enum).
-    kNodeValueSpace =
-        TargetNodeSize - /*minimum overhead=*/(sizeof(void *) + 4),
-  };
-
-  // This is an integral type large enough to hold as many
-  // ValueSize-values as will fit a node of TargetNodeSize bytes.
-  using node_count_type =
-      absl::conditional_t<(kNodeValueSpace / ValueSize >
-                           std::numeric_limits<uint8>::max()),
-                          uint16, uint8>;  // NOLINT
-  static_assert(kNodeValueSpace / ValueSize <=
-                    std::numeric_limits<uint16>::max(),
-                "uint16 is not big enough for node_count_type.");
-};
-
-// A parameters structure for holding the type parameters for a btree_map.
-// Compare and Alloc should be nothrow copy-constructible.
-template <typename Key, typename Data, typename Compare, typename Alloc,
-          int TargetNodeSize, bool Multi>
-struct map_params : common_params<Key, Compare, Alloc, TargetNodeSize,
-                                  sizeof(std::pair<const Key, Data>), Multi> {
-  using mapped_type = Data;
-  // This type allows us to move keys when it is safe to do so. It is safe
-  // for maps in which value_type and mutable_value_type are layout compatible.
-  using slot_type = absl::container_internal::slot_type<Key, mapped_type>;
-  using value_type = typename slot_type::value_type;
-  using mutable_value_type = typename slot_type::mutable_value_type;
-  using pointer = value_type *;
-  using const_pointer = const value_type *;
-  using reference = value_type &;
-  using const_reference = const value_type &;
-
-  using key_compare = typename map_params::common_params::key_compare;
-  // Inherit from key_compare for empty base class optimization.
-  struct value_compare : private key_compare {
-    value_compare() = default;
-    explicit value_compare(const key_compare &cmp) : key_compare(cmp) {}
-
-    template <typename T, typename U>
-    absl::conditional_t<btree_is_key_compare_to<key_compare>::value, int, bool>
-    operator()(const T &left, const U &right) const {
-      return key_compare::operator()(left.first, right.first);
-    }
-  };
-
-  static const Key& key(const value_type &x) { return x.first; }
-  static const Key& key(const mutable_value_type &x) { return x.first; }
-};
-
-// A parameters structure for holding the type parameters for a btree_set.
-// Compare and Alloc should be nothrow copy-constructible.
-template <typename Key, typename Compare, typename Alloc, int TargetNodeSize,
-          bool Multi>
-struct set_params
-    : common_params<Key, Compare, Alloc, TargetNodeSize, sizeof(Key), Multi> {
-  using mapped_type = void;
-  using value_type = Key;
-  using mutable_value_type = Key;
-  // This type implements the necessary functions from the
-  // absl::container_internal::slot_type interface.
-  struct slot_type {
-    value_type value;
-
-    template <class... Args>
-    static void construct(Alloc *alloc, slot_type *slot, Args &&... args) {
-      absl::allocator_traits<Alloc>::construct(*alloc, &slot->value,
-                                               std::forward<Args>(args)...);
-    }
-
-    static void construct(Alloc *alloc, slot_type *slot, slot_type *other) {
-      absl::allocator_traits<Alloc>::construct(*alloc, &slot->value,
-                                               std::move(other->value));
-    }
-
-    static void destroy(Alloc *alloc, slot_type *slot) {
-      absl::allocator_traits<Alloc>::destroy(*alloc, &slot->value);
-    }
-
-    static void swap(Alloc * /*alloc*/, slot_type *a, slot_type *b) {
-      using std::swap;
-      swap(a->value, b->value);
-    }
-
-    static void move(Alloc * /*alloc*/, slot_type *src, slot_type *dest) {
-      dest->value = std::move(src->value);
-    }
-
-    static void move(Alloc *alloc, slot_type *first, slot_type *last,
-                     slot_type *result) {
-      for (slot_type *src = first, *dest = result; src != last; ++src, ++dest)
-        move(alloc, src, dest);
-    }
-  };
-  using pointer = value_type *;
-  using const_pointer = const value_type *;
-  using reference = value_type &;
-  using const_reference = const value_type &;
-  using value_compare = typename set_params::common_params::key_compare;
-
-  static const Key& key(const value_type &x) { return x; }
-};
-
-// An adapter class that converts a lower-bound compare into an upper-bound
-// compare. Note: there is no need to make a version of this adapter specialized
-// for key-compare-to functors because the upper-bound (the first value greater
-// than the input) is never an exact match.
-template <typename Compare>
-struct upper_bound_adapter {
-  explicit upper_bound_adapter(const Compare &c) : comp(c) {}
-  template <typename K, typename LK>
-  bool operator()(const K &a, const LK &b) const {
-    // Returns true when a is not greater than b.
-    return !bool_compare_keys(comp, b, a);
-  }
-
- private:
-  Compare comp;
-};
-
-// A node in the btree holding. The same node type is used for both internal
-// and leaf nodes in the btree, though the nodes are allocated in such a way
-// that the children array is only valid in internal nodes.
-template <typename Params>
-class btree_node {
-  using is_key_compare_to = typename Params::is_key_compare_to;
-  using is_multi_container = typename Params::is_multi_container;
-  using field_type = typename Params::node_count_type;
-  using allocator_type = typename Params::allocator_type;
-  using slot_type = typename Params::slot_type;
-
- public:
-  using params_type = Params;
-  using key_type = typename Params::key_type;
-  using value_type = typename Params::value_type;
-  using mutable_value_type = typename Params::mutable_value_type;
-  using pointer = typename Params::pointer;
-  using const_pointer = typename Params::const_pointer;
-  using reference = typename Params::reference;
-  using const_reference = typename Params::const_reference;
-  using key_compare = typename Params::key_compare;
-  using size_type = typename Params::size_type;
-  using difference_type = typename Params::difference_type;
-
-  // Btree's choice of binary search or linear search is a customization
-  // point that can be configured via the key_compare and key_type.
-  // Btree decides whether to use linear node search as follows:
-  //   - If the comparator expresses a preference, use that.
-  //   - Otherwise, if the key expresses a preference, use that.
-  //   - Otherwise, if the key is arithmetic and the comparator is std::less or
-  //     std::greater, choose linear.
-  //   - Otherwise, choose binary.
-  // See documentation for has_linear_node_search_preference and
-  // prefers_linear_node_search above.
-  // Might be wise to also configure linear search based on node-size.
-  using use_linear_search = absl::conditional_t<
-      has_linear_node_search_preference<key_compare>::value
-          ? prefers_linear_node_search<key_compare>::value
-          : has_linear_node_search_preference<key_type>::value
-                ? prefers_linear_node_search<key_type>::value
-                : std::is_arithmetic<key_type>::value &&
-                      (std::is_same<std::less<key_type>, key_compare>::value ||
-                       std::is_same<std::greater<key_type>,
-                                    key_compare>::value),
-      std::true_type, std::false_type>;
-
-  // This class is organized by gtl::Layout as if it had the following
-  // structure:
-  //   // A pointer to the node's parent.
-  //   btree_node *parent;
-  //
-  //   // The position of the node in the node's parent.
-  //   field_type position;
-  //   // The index of the first populated value in `values`.
-  //   // TODO(user): right now, `start` is always 0. Update insertion/merge
-  //   // logic to allow for floating storage within nodes.
-  //   field_type start;
-  //   // The count of the number of populated values in the node.
-  //   field_type count;
-  //   // The maximum number of values the node can hold. This is an integer in
-  //   // [1, kNodeValues] for root leaf nodes, kNodeValues for non-root leaf
-  //   // nodes, and kInternalNodeMaxCount (as a sentinel value) for internal
-  //   // nodes (even though there are still kNodeValues values in the node).
-  //   // TODO(user): make max_count use only 4 bits and record log2(capacity)
-  //   // to free extra bits for is_root, etc.
-  //   field_type max_count;
-  //
-  //   // The array of values. The capacity is `max_count` for leaf nodes and
-  //   // kNodeValues for internal nodes. Only the values in
-  //   // [start, start + count) have been initialized and are valid.
-  //   slot_type values[max_count];
-  //
-  //   // The array of child pointers. The keys in children[i] are all less
-  //   // than key(i). The keys in children[i + 1] are all greater than key(i).
-  //   // There are 0 children for leaf nodes and kNodeValues + 1 children for
-  //   // internal nodes.
-  //   btree_node *children[kNodeValues + 1];
-  //
-  // This class is never constructed or deleted. Instead, pointers to the layout
-  // above are allocated, cast to btree_node*, and de-allocated within the btree
-  // implementation.
-  btree_node() = delete;
-  ~btree_node() = delete;
-  btree_node(btree_node const &) = delete;
-  btree_node &operator=(btree_node const &) = delete;
-
- private:
-  using layout_type = Layout<btree_node *, field_type, slot_type, btree_node *>;
-  constexpr static size_type SizeWithNValues(size_type n) {
-    return layout_type(/*parent*/ 1,
-                       /*position, start, count, max_count*/ 4,
-                       /*values*/ n,
-                       /*children*/ 0)
-        .AllocSize();
-  }
-  // A lower bound for the overhead of fields other than values in a leaf node.
-  constexpr static size_type MinimumOverhead() {
-    return SizeWithNValues(1) - sizeof(value_type);
-  }
-
-  // Compute how many values we can fit onto a leaf node taking into account
-  // padding.
-  constexpr static size_type NodeTargetValues(const int begin, const int end) {
-    return begin == end ? begin
-                        : SizeWithNValues((begin + end) / 2 + 1) >
-                                  params_type::kTargetNodeSize
-                              ? NodeTargetValues(begin, (begin + end) / 2)
-                              : NodeTargetValues((begin + end) / 2 + 1, end);
-  }
-
-  enum {
-    kTargetNodeSize = params_type::kTargetNodeSize,
-    kNodeTargetValues = NodeTargetValues(0, params_type::kTargetNodeSize),
-
-    // We need a minimum of 3 values per internal node in order to perform
-    // splitting (1 value for the two nodes involved in the split and 1 value
-    // propagated to the parent as the delimiter for the split).
-    kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3,
-
-    kExactMatch = 1 << 30,
-    kMatchMask = kExactMatch - 1,
-
-    // The node is internal (i.e. is not a leaf node) if and only if `max_count`
-    // has this value.
-    kInternalNodeMaxCount = 0,
-  };
-
-  // Leaves can have less than kNodeValues values.
-  constexpr static layout_type LeafLayout(const int max_values = kNodeValues) {
-    return layout_type(/*parent*/ 1,
-                       /*position, start, count, max_count*/ 4,
-                       /*values*/ max_values,
-                       /*children*/ 0);
-  }
-  constexpr static layout_type InternalLayout() {
-    return layout_type(/*parent*/ 1,
-                       /*position, start, count, max_count*/ 4,
-                       /*values*/ kNodeValues,
-                       /*children*/ kNodeValues + 1);
-  }
-  constexpr static size_type LeafSize(const int max_values = kNodeValues) {
-    return LeafLayout(max_values).AllocSize();
-  }
-  constexpr static size_type InternalSize() {
-    return InternalLayout().AllocSize();
-  }
-  constexpr static size_type Alignment() {
-    static_assert(LeafLayout(1).Alignment() == InternalLayout().Alignment(),
-                  "Alignment of all nodes must be equal.");
-    return InternalLayout().Alignment();
-  }
-
-  // N is the index of the type in the Layout definition.
-  // ElementType<N> is the Nth type in the Layout definition.
-  template <size_type N>
-  inline typename layout_type::template ElementType<N> *GetField() {
-    // We assert that we don't read from values that aren't there.
-    assert(N < 3 || !leaf());
-    return InternalLayout().template Pointer<N>(reinterpret_cast<char *>(this));
-  }
-  template <size_type N>
-  inline const typename layout_type::template ElementType<N> *GetField() const {
-    assert(N < 3 || !leaf());
-    return InternalLayout().template Pointer<N>(
-        reinterpret_cast<const char *>(this));
-  }
-  void set_parent(btree_node *p) { *GetField<0>() = p; }
-  field_type &mutable_count() { return GetField<1>()[2]; }
-  slot_type *slot(int i) { return &GetField<2>()[i]; }
-  const slot_type *slot(int i) const { return &GetField<2>()[i]; }
-  void set_position(field_type v) { GetField<1>()[0] = v; }
-  void set_start(field_type v) { GetField<1>()[1] = v; }
-  void set_count(field_type v) { GetField<1>()[2] = v; }
-  // This method is only called by the node init methods.
-  void set_max_count(field_type v) { GetField<1>()[3] = v; }
-
- public:
-  // Whether this is a leaf node or not. This value doesn't change after the
-  // node is created.
-  bool leaf() const { return GetField<1>()[3] != kInternalNodeMaxCount; }
-
-  // Getter for the position of this node in its parent.
-  field_type position() const { return GetField<1>()[0]; }
-
-  // Getter for the offset of the first value in the `values` array.
-  field_type start() const { return GetField<1>()[1]; }
-
-  // Getters for the number of values stored in this node.
-  field_type count() const { return GetField<1>()[2]; }
-  field_type max_count() const {
-    // Internal nodes have max_count==kInternalNodeMaxCount.
-    // Leaf nodes have max_count in [1, kNodeValues].
-    const field_type max_count = GetField<1>()[3];
-    return max_count == kInternalNodeMaxCount ? kNodeValues : max_count;
-  }
-
-  // Getter for the parent of this node.
-  btree_node *parent() const { return *GetField<0>(); }
-  // Getter for whether the node is the root of the tree. The parent of the
-  // root of the tree is the leftmost node in the tree which is guaranteed to
-  // be a leaf.
-  bool is_root() const { return parent()->leaf(); }
-  void make_root() {
-    assert(parent()->is_root());
-    set_parent(parent()->parent());
-  }
-
-  // Getters for the key/value at position i in the node.
-  const key_type &key(int i) const { return params_type::key(slot(i)->value); }
-  reference value(int i) { return slot(i)->value; }
-  const_reference value(int i) const { return slot(i)->value; }
-
-  // Getters/setter for the child at position i in the node.
-  btree_node *child(int i) const { return GetField<3>()[i]; }
-  btree_node *&mutable_child(int i) { return GetField<3>()[i]; }
-  void clear_child(int i) {
-    absl::container_internal::SanitizerPoisonObject(&mutable_child(i));
-  }
-  void set_child(int i, btree_node *c) {
-    absl::container_internal::SanitizerUnpoisonObject(&mutable_child(i));
-    mutable_child(i) = c;
-    c->set_position(i);
-  }
-  void init_child(int i, btree_node *c) {
-    set_child(i, c);
-    c->set_parent(this);
-  }
-
-  // Returns the position of the first value whose key is not less than k.
-  template <typename K>
-  int lower_bound(const K &k, const key_compare &comp) const {
-    return use_linear_search::value ? linear_search(k, comp)
-                                    : binary_search(k, comp);
-  }
-  // Returns the position of the first value whose key is greater than k.
-  template <typename K>
-  int upper_bound(const K &k, const key_compare &comp) const {
-    auto upper_compare = upper_bound_adapter<key_compare>(comp);
-    return use_linear_search::value ? linear_search(k, upper_compare)
-                                    : binary_search(k, upper_compare);
-  }
-
-  template <typename K, typename Compare>
-  int linear_search(const K &k, const Compare &comp) const {
-    return btree_is_key_compare_to<Compare>::value
-               ? linear_search_compare_to(k, 0, count(), comp)
-               : linear_search_plain_compare(k, 0, count(), comp);
-  }
-
-  template <typename K, typename Compare>
-  int binary_search(const K &k, const Compare &comp) const {
-    return btree_is_key_compare_to<Compare>::value
-               ? binary_search_compare_to(k, 0, count(), comp)
-               : binary_search_plain_compare(k, 0, count(), comp);
-  }
-
-  // Returns the position of the first value whose key is not less than k using
-  // linear search performed using plain compare.
-  template <typename K, typename Compare>
-  int linear_search_plain_compare(const K &k, int s, const int e,
-                                  const Compare &comp) const {
-    while (s < e) {
-      if (!comp(key(s), k)) {
-        break;
-      }
-      ++s;
-    }
-    return s;
-  }
-
-  // Returns the position of the first value whose key is not less than k using
-  // linear search performed using compare-to.
-  template <typename K, typename Compare>
-  int linear_search_compare_to(const K &k, int s, const int e,
-                               const Compare &comp) const {
-    while (s < e) {
-      const int c = comp(key(s), k);
-      if (c == 0) {
-        return s | kExactMatch;
-      } else if (c > 0) {
-        break;
-      }
-      ++s;
-    }
-    return s;
-  }
-
-  // Returns the position of the first value whose key is not less than k using
-  // binary search performed using plain compare.
-  template <typename K, typename Compare>
-  int binary_search_plain_compare(const K &k, int s, int e,
-                                  const Compare &comp) const {
-    while (s != e) {
-      const int mid = (s + e) >> 1;
-      if (comp(key(mid), k)) {
-        s = mid + 1;
-      } else {
-        e = mid;
-      }
-    }
-    return s;
-  }
-
-  // Returns the position of the first value whose key is not less than k using
-  // binary search performed using compare-to.
-  template <typename K, typename CompareTo>
-  int binary_search_compare_to(
-      const K &k, int s, int e, const CompareTo &comp) const {
-    if (is_multi_container::value) {
-      int exact_match = 0;
-      while (s != e) {
-        const int mid = (s + e) >> 1;
-        const int c = comp(key(mid), k);
-        if (c < 0) {
-          s = mid + 1;
-        } else {
-          e = mid;
-          if (c == 0) {
-            // Need to return the first value whose key is not less than k,
-            // which requires continuing the binary search if this is a
-            // multi-container.
-            exact_match = kExactMatch;
-          }
-        }
-      }
-      return s | exact_match;
-    } else {  // Not a multi-container.
-      while (s != e) {
-        const int mid = (s + e) >> 1;
-        const int c = comp(key(mid), k);
-        if (c < 0) {
-          s = mid + 1;
-        } else if (c > 0) {
-          e = mid;
-        } else {
-          return mid | kExactMatch;
-        }
-      }
-      return s;
-    }
-  }
-
-  // Emplaces a value at position i, shifting all existing values and
-  // children at positions >= i to the right by 1.
-  template <typename... Args>
-  void emplace_value(size_type i, allocator_type *alloc, Args &&... args);
-
-  // Removes the value at position i, shifting all existing values and children
-  // at positions > i to the left by 1.
-  void remove_value(int i, allocator_type *alloc);
-
-  // Rebalances a node with its right sibling.
-  void rebalance_right_to_left(int to_move, btree_node *right,
-                               allocator_type *alloc);
-  void rebalance_left_to_right(int to_move, btree_node *right,
-                               allocator_type *alloc);
-
-  // Splits a node, moving a portion of the node's values to its right sibling.
-  void split(int insert_position, btree_node *dest, allocator_type *alloc);
-
-  // Merges a node with its right sibling, moving all of the values and the
-  // delimiting key in the parent node onto itself.
-  void merge(btree_node *sibling, allocator_type *alloc);
-
-  // Swap the contents of "this" and "src".
-  void swap(btree_node *src, allocator_type *alloc);
-
-  // Node allocation/deletion routines.
-  static btree_node *init_leaf(btree_node *n, btree_node *parent,
-                               int max_count) {
-    n->set_parent(parent);
-    n->set_position(0);
-    n->set_start(0);
-    n->set_count(0);
-    n->set_max_count(max_count);
-    absl::container_internal::SanitizerPoisonMemoryRegion(
-        n->slot(0), max_count * sizeof(slot_type));
-    return n;
-  }
-  static btree_node *init_internal(btree_node *n, btree_node *parent) {
-    init_leaf(n, parent, kNodeValues);
-    // Set `max_count` to a sentinel value to indicate that this node is
-    // internal.
-    n->set_max_count(kInternalNodeMaxCount);
-    absl::container_internal::SanitizerPoisonMemoryRegion(
-        &n->mutable_child(0), (kNodeValues + 1) * sizeof(btree_node *));
-    return n;
-  }
-  void destroy(allocator_type *alloc) {
-    for (int i = 0; i < count(); ++i) {
-      value_destroy(i, alloc);
-    }
-  }
-
- public:
-  // Exposed only for tests.
-  static bool testonly_uses_linear_node_search() {
-    return use_linear_search::value;
-  }
-
- private:
-  template <typename... Args>
-  void value_init(const size_type i, allocator_type *alloc, Args &&... args) {
-    absl::container_internal::SanitizerUnpoisonObject(slot(i));
-    slot_type::construct(alloc, slot(i), std::forward<Args>(args)...);
-  }
-  void value_destroy(const size_type i, allocator_type *alloc) {
-    slot_type::destroy(alloc, slot(i));
-    absl::container_internal::SanitizerPoisonObject(slot(i));
-  }
-
-  // Move n values starting at value i in this node into the values starting at
-  // value j in node x.
-  void uninitialized_move_n(const size_type n, const size_type i,
-                            const size_type j, btree_node *x,
-                            allocator_type *alloc) {
-    absl::container_internal::SanitizerUnpoisonMemoryRegion(
-        x->slot(j), n * sizeof(slot_type));
-    for (slot_type *src = slot(i), *end = src + n, *dest = x->slot(j);
-         src != end; ++src, ++dest) {
-      slot_type::construct(alloc, dest, src);
-    }
-  }
-
-  // Destroys a range of n values, starting at index i.
-  void value_destroy_n(const size_type i, const size_type n,
-                       allocator_type *alloc) {
-    for (int j = 0; j < n; ++j) {
-      value_destroy(i + j, alloc);
-    }
-  }
-
-  template <typename P>
-  friend class btree;
-  friend class BtreeNodePeer;
-};
-
-template <typename Node, typename Reference, typename Pointer>
-struct btree_iterator {
- private:
-  using key_type = typename Node::key_type;
-  using size_type = typename Node::size_type;
-  using params_type = typename Node::params_type;
-
-  using node_type = Node;
-  using normal_node = typename std::remove_const<Node>::type;
-  using const_node = const Node;
-  using normal_pointer = typename params_type::pointer;
-  using normal_reference = typename params_type::reference;
-  using const_pointer = typename params_type::const_pointer;
-  using const_reference = typename params_type::const_reference;
-
-  using iterator =
-      btree_iterator<normal_node, normal_reference, normal_pointer>;
-  using const_iterator =
-      btree_iterator<const_node, const_reference, const_pointer>;
-
- public:
-  // These aliases are public for std::iterator_traits.
-  using difference_type = typename Node::difference_type;
-  using value_type = typename params_type::value_type;
-  using pointer = Pointer;
-  using reference = Reference;
-  using iterator_category = std::bidirectional_iterator_tag;
-
-  btree_iterator() : node(nullptr), position(-1) {}
-  btree_iterator(Node *n, int p) : node(n), position(p) {}
-
-  // NOTE: this SFINAE allows for implicit conversions from iterator to
-  // const_iterator, but it specifically avoids defining copy constructors so
-  // that btree_iterator can be trivially copyable. This is for performance and
-  // binary size reasons.
-  template <
-      typename N, typename R, typename P,
-      absl::enable_if_t<
-          std::is_same<btree_iterator<N, R, P>, iterator>::value &&
-              !std::is_same<btree_iterator<N, R, P>, btree_iterator>::value,
-          int> = 0>
-  btree_iterator(const btree_iterator<N, R, P> &x)  // NOLINT
-      : node(x.node), position(x.position) {}
-
- private:
-  // Increment/decrement the iterator.
-  void increment() {
-    if (node->leaf() && ++position < node->count()) {
-      return;
-    }
-    increment_slow();
-  }
-  void increment_slow();
-
-  void decrement() {
-    if (node->leaf() && --position >= 0) {
-      return;
-    }
-    decrement_slow();
-  }
-  void decrement_slow();
-
- public:
-  bool operator==(const const_iterator &x) const {
-    return node == x.node && position == x.position;
-  }
-  bool operator!=(const const_iterator &x) const {
-    return node != x.node || position != x.position;
-  }
-
-  // Accessors for the key/value the iterator is pointing at.
-  reference operator*() const {
-    return node->value(position);
-  }
-  pointer operator->() const {
-    return &node->value(position);
-  }
-
-  btree_iterator& operator++() {
-    increment();
-    return *this;
-  }
-  btree_iterator& operator--() {
-    decrement();
-    return *this;
-  }
-  btree_iterator operator++(int) {
-    btree_iterator tmp = *this;
-    ++*this;
-    return tmp;
-  }
-  btree_iterator operator--(int) {
-    btree_iterator tmp = *this;
-    --*this;
-    return tmp;
-  }
-
- private:
-  template <typename Params>
-  friend class btree;
-  template <typename N, typename R, typename P>
-  friend struct btree_iterator;
-  template <typename TreeType, typename CheckerType>
-  friend class base_checker;
-
-  const key_type &key() const { return node->key(position); }
-
-  // The node in the tree the iterator is pointing at.
-  Node *node;
-  // The position within the node of the tree the iterator is pointing at.
-  int position;
-};
-
-// Approximation of std::is_trivially_copyable (which is currently unsupported).
-template <typename T>
-using is_trivially_copyable = absl::conjunction<
-    absl::is_trivially_copy_constructible<T>,
-    absl::disjunction<absl::is_trivially_copy_assignable<T>,
-                      absl::negation<std::is_copy_assignable<T>>>,
-    absl::is_trivially_destructible<T>>;
-
-template <typename Params>
-class btree {
-  using node_type = btree_node<Params>;
-  using is_key_compare_to = typename Params::is_key_compare_to;
-
-  template <typename K>
-  using const_lookup_key_reference = absl::conditional_t<
-      is_comparator_transparent<typename Params::key_compare>::value, const K &,
-      const typename Params::key_type &>;
-
-  enum {
-    kNodeValues = node_type::kNodeValues,
-    kMinNodeValues = kNodeValues / 2,
-    kExactMatch = node_type::kExactMatch,
-    kMatchMask = node_type::kMatchMask,
-  };
-
-  struct node_stats {
-    using size_type = typename Params::size_type;
-
-    node_stats(size_type l, size_type i)
-        : leaf_nodes(l),
-          internal_nodes(i) {
-    }
-
-    node_stats& operator+=(const node_stats &x) {
-      leaf_nodes += x.leaf_nodes;
-      internal_nodes += x.internal_nodes;
-      return *this;
-    }
-
-    size_type leaf_nodes;
-    size_type internal_nodes;
-  };
-
- public:
-  using key_type = typename Params::key_type;
-  using mapped_type = typename Params::mapped_type;
-  using value_type = typename Params::value_type;
-  using size_type = typename Params::size_type;
-  using difference_type = typename Params::difference_type;
-  using key_compare = typename Params::key_compare;
-  using value_compare = typename Params::value_compare;
-  using allocator_type = typename Params::allocator_type;
-  using reference = typename Params::reference;
-  using const_reference = typename Params::const_reference;
-  using pointer = typename Params::pointer;
-  using const_pointer = typename Params::const_pointer;
-  using iterator = btree_iterator<node_type, reference, pointer>;
-  using const_iterator = typename iterator::const_iterator;
-  using reverse_iterator = std::reverse_iterator<iterator>;
-  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
-
-  // Internal types made public for use by btree_container types.
-  using params_type = Params;
-  using mutable_value_type = typename Params::mutable_value_type;
-  using slot_type = typename Params::slot_type;
-
- private:
-  // Copies the values in x into this btree in their order in x.
-  // This btree must be empty before this method is called.
-  // This method is used in copy construction and copy assignment.
-  void copy_values_in_order(const btree &x);
-
- public:
-  btree(const key_compare &comp, const allocator_type &alloc);
-
-  btree(const btree &x);
-  btree(btree &&x) noexcept
-      : root_(std::move(x.root_)),
-        rightmost_(absl::exchange(x.rightmost_, nullptr)),
-        size_(absl::exchange(x.size_, 0)) {
-    x.mutable_root() = nullptr;
-  }
-
-  ~btree() {
-    static_assert(std::is_nothrow_copy_constructible<key_compare>::value,
-                  "Key comparison must be nothrow copy constructible");
-    static_assert(std::is_nothrow_copy_constructible<allocator_type>::value,
-                  "Allocator must be nothrow copy constructible");
-    static_assert(is_trivially_copyable<iterator>::value,
-                  "iterator not trivially copyable.");
-    clear();
-  }
-
-  // Assign the contents of x to *this.
-  btree &operator=(const btree &x);
-
-  btree &operator=(btree &&x) noexcept {
-    clear();
-    swap(x);
-    return *this;
-  }
-
-  iterator begin() {
-    return iterator(leftmost(), 0);
-  }
-  const_iterator begin() const {
-    return const_iterator(leftmost(), 0);
-  }
-  iterator end() {
-    return iterator(rightmost_,
-                    rightmost_ != nullptr ? rightmost_->count() : 0);
-  }
-  const_iterator end() const {
-    return const_iterator(rightmost_,
-                          rightmost_ != nullptr ? rightmost_->count() : 0);
-  }
-  reverse_iterator rbegin() {
-    return reverse_iterator(end());
-  }
-  const_reverse_iterator rbegin() const {
-    return const_reverse_iterator(end());
-  }
-  reverse_iterator rend() {
-    return reverse_iterator(begin());
-  }
-  const_reverse_iterator rend() const {
-    return const_reverse_iterator(begin());
-  }
-
-  // Finds the first element whose key is not less than key.
-  template <typename K>
-  iterator lower_bound(const K &key) {
-    return internal_end(
-        internal_lower_bound(key, iterator(root(), 0)));
-  }
-  template <typename K>
-  const_iterator lower_bound(const K &key) const {
-    return internal_end(
-        internal_lower_bound(key, const_iterator(root(), 0)));
-  }
-
-  // Finds the first element whose key is greater than key.
-  template <typename K>
-  iterator upper_bound(const K &key) {
-    return internal_end(
-        internal_upper_bound(key, iterator(root(), 0)));
-  }
-  template <typename K>
-  const_iterator upper_bound(const K &key) const {
-    return internal_end(
-        internal_upper_bound(key, const_iterator(root(), 0)));
-  }
-
-  // Finds the range of values which compare equal to key. The first member of
-  // the returned pair is equal to lower_bound(key). The second member pair of
-  // the pair is equal to upper_bound(key).
-  template <typename K>
-  std::pair<iterator, iterator> equal_range(const K &key) {
-    const_lookup_key_reference<K> lookup_key(key);
-    return std::make_pair(lower_bound(lookup_key), upper_bound(lookup_key));
-  }
-  template <typename K>
-  std::pair<const_iterator, const_iterator> equal_range(const K &key) const {
-    const_lookup_key_reference<K> lookup_key(key);
-    return std::make_pair(lower_bound(lookup_key), upper_bound(lookup_key));
-  }
-
-  // Inserts a value into the btree only if it does not already exist. The
-  // boolean return value indicates whether insertion succeeded or failed.
-  template <typename... Args>
-  std::pair<iterator, bool> insert_unique(const key_type &key, Args &&... args);
-
-  // Insert with hint. Check to see if the value should be placed immediately
-  // before position in the tree. If it does, then the insertion will take
-  // amortized constant time. If not, the insertion will take amortized
-  // logarithmic time as if a call to insert_unique(v) were made.
-  template <typename... Args>
-  iterator insert_hint_unique(iterator position, const key_type &key,
-                              Args &&... args);
-
-  // Insert a range of values into the btree.
-  template <typename InputIterator>
-  void insert_iterator_unique(InputIterator b, InputIterator e);
-
-  // Inserts a value into the btree.
-  template <typename ValueType>
-  iterator insert_multi(const key_type &key, ValueType &&v);
-
-  // Inserts a value into the btree.
-  template <typename ValueType>
-  iterator insert_multi(ValueType &&v) {
-    return insert_multi(params_type::key(v), std::forward<ValueType>(v));
-  }
-
-  // Insert with hint. Check to see if the value should be placed immediately
-  // before position in the tree. If it does, then the insertion will take
-  // amortized constant time. If not, the insertion will take amortized
-  // logarithmic time as if a call to insert_multi(v) were made.
-  template <typename ValueType>
-  iterator insert_hint_multi(iterator position, ValueType &&v);
-
-  // Insert a range of values into the btree.
-  template <typename InputIterator>
-  void insert_iterator_multi(InputIterator b, InputIterator e);
-
-  // Erase the specified iterator from the btree. The iterator must be valid
-  // (i.e. not equal to end()).  Return an iterator pointing to the node after
-  // the one that was erased (or end() if none exists).
-  iterator erase(iterator iter);
-
-  // Erases range. Returns the number of keys erased.
-  int erase(iterator begin, iterator end);
-
-  // Erases the specified key from the btree. Returns 1 if an element was
-  // erased and 0 otherwise.
-  template <typename K>
-  int erase_unique(const K &key);
-
-  // Erases all of the entries matching the specified key from the
-  // btree. Returns the number of elements erased.
-  template <typename K>
-  int erase_multi(const K &key);
-
-  // Finds the iterator corresponding to a key or returns end() if the key is
-  // not present.
-  template <typename K>
-  iterator find_unique(const K &key) {
-    return internal_end(
-        internal_find_unique(key, iterator(root(), 0)));
-  }
-  template <typename K>
-  const_iterator find_unique(const K &key) const {
-    return internal_end(
-        internal_find_unique(key, const_iterator(root(), 0)));
-  }
-  template <typename K>
-  iterator find_multi(const K &key) {
-    return internal_end(
-        internal_find_multi(key, iterator(root(), 0)));
-  }
-  template <typename K>
-  const_iterator find_multi(const K &key) const {
-    return internal_end(
-        internal_find_multi(key, const_iterator(root(), 0)));
-  }
-
-  // Returns a count of the number of times the key appears in the btree.
-  template <typename K>
-  size_type count_unique(const K &key) const {
-    const_iterator begin = internal_find_unique(
-        key, const_iterator(root(), 0));
-    if (!begin.node) {
-      // The key doesn't exist in the tree.
-      return 0;
-    }
-    return 1;
-  }
-  // Returns a count of the number of times the key appears in the btree.
-  template <typename K>
-  size_type count_multi(const K &key) const {
-    const auto range = equal_range(key);
-    return std::distance(range.first, range.second);
-  }
-
-  // Clear the btree, deleting all of the values it contains.
-  void clear();
-
-  // Swap the contents of *this and x.
-  void swap(btree &x);
-
-  const key_compare &key_comp() const noexcept {
-    return root_.template get<0>();
-  }
-  template <typename K, typename LK>
-  bool compare_keys(const K &x, const LK &y) const {
-    return bool_compare_keys(key_comp(), x, y);
-  }
-
-  value_compare value_comp() const { return value_compare(key_comp()); }
-
-  // Verifies the structure of the btree.
-  void verify() const;
-
-  // Size routines.
-  size_type size() const { return size_; }
-  size_type max_size() const { return std::numeric_limits<size_type>::max(); }
-  bool empty() const { return size_ == 0; }
-
-  // The height of the btree. An empty tree will have height 0.
-  size_type height() const {
-    size_type h = 0;
-    if (root()) {
-      // Count the length of the chain from the leftmost node up to the
-      // root. We actually count from the root back around to the level below
-      // the root, but the calculation is the same because of the circularity
-      // of that traversal.
-      const node_type *n = root();
-      do {
-        ++h;
-        n = n->parent();
-      } while (n != root());
-    }
-    return h;
-  }
-
-  // The number of internal, leaf and total nodes used by the btree.
-  size_type leaf_nodes() const {
-    return internal_stats(root()).leaf_nodes;
-  }
-  size_type internal_nodes() const {
-    return internal_stats(root()).internal_nodes;
-  }
-  size_type nodes() const {
-    node_stats stats = internal_stats(root());
-    return stats.leaf_nodes + stats.internal_nodes;
-  }
-
-  // The total number of bytes used by the btree.
-  size_type bytes_used() const {
-    node_stats stats = internal_stats(root());
-    if (stats.leaf_nodes == 1 && stats.internal_nodes == 0) {
-      return sizeof(*this) +
-             node_type::LeafSize(root()->max_count());
-    } else {
-      return sizeof(*this) +
-             stats.leaf_nodes * node_type::LeafSize() +
-             stats.internal_nodes * node_type::InternalSize();
-    }
-  }
-
-  // The average number of bytes used per value stored in the btree.
-  static double average_bytes_per_value() {
-    // Returns the number of bytes per value on a leaf node that is 75%
-    // full. Experimentally, this matches up nicely with the computed number of
-    // bytes per value in trees that had their values inserted in random order.
-    return node_type::LeafSize() / (kNodeValues * 0.75);
-  }
-
-  // The fullness of the btree. Computed as the number of elements in the btree
-  // divided by the maximum number of elements a tree with the current number
-  // of nodes could hold. A value of 1 indicates perfect space
-  // utilization. Smaller values indicate space wastage.
-  double fullness() const {
-    return static_cast<double>(size()) / (nodes() * kNodeValues);
-  }
-  // The overhead of the btree structure in bytes per node. Computed as the
-  // total number of bytes used by the btree minus the number of bytes used for
-  // storing elements divided by the number of elements.
-  double overhead() const {
-    if (empty()) {
-      return 0.0;
-    }
-    return (bytes_used() - size() * sizeof(value_type)) /
-           static_cast<double>(size());
-  }
-
-  // The allocator used by the btree.
-  allocator_type get_allocator() const {
-    return allocator();
-  }
-
- private:
-  // Internal accessor routines.
-  node_type *root() { return root_.template get<2>(); }
-  const node_type *root() const { return root_.template get<2>(); }
-  node_type *&mutable_root() noexcept { return root_.template get<2>(); }
-  key_compare *mutable_key_comp() noexcept { return &root_.template get<0>(); }
-
-  // The leftmost node is stored as the parent of the root node.
-  node_type *leftmost() { return root() ? root()->parent() : nullptr; }
-  const node_type *leftmost() const {
-    return root() ? root()->parent() : nullptr;
-  }
-
-  // Allocator routines.
-  allocator_type *mutable_allocator() noexcept {
-    return &root_.template get<1>();
-  }
-  const allocator_type &allocator() const noexcept {
-    return root_.template get<1>();
-  }
-
-  // Allocates a correctly aligned node of at least size bytes using the
-  // allocator.
-  node_type *allocate(const size_type size) {
-    return reinterpret_cast<node_type *>(
-        absl::container_internal::Allocate<node_type::Alignment()>(
-            mutable_allocator(), size));
-  }
-
-  // Node creation/deletion routines.
-  node_type* new_internal_node(node_type *parent) {
-    node_type *p = allocate(node_type::InternalSize());
-    return node_type::init_internal(p, parent);
-  }
-  node_type* new_leaf_node(node_type *parent) {
-    node_type *p = allocate(node_type::LeafSize());
-    return node_type::init_leaf(p, parent, kNodeValues);
-  }
-  node_type *new_leaf_root_node(const int max_count) {
-    node_type *p = allocate(node_type::LeafSize(max_count));
-    return node_type::init_leaf(p, p, max_count);
-  }
-
-  // Deallocates a node of a certain size in bytes using the allocator.
-  void deallocate(const size_type size, node_type *node) {
-    absl::container_internal::Deallocate<node_type::Alignment()>(
-        mutable_allocator(), node, size);
-  }
-
-  void delete_internal_node(node_type *node) {
-    node->destroy(mutable_allocator());
-    deallocate(node_type::InternalSize(), node);
-  }
-  void delete_leaf_node(node_type *node) {
-    node->destroy(mutable_allocator());
-    deallocate(node_type::LeafSize(node->max_count()), node);
-  }
-
-  // Rebalances or splits the node iter points to.
-  void rebalance_or_split(iterator *iter);
-
-  // Merges the values of left, right and the delimiting key on their parent
-  // onto left, removing the delimiting key and deleting right.
-  void merge_nodes(node_type *left, node_type *right);
-
-  // Tries to merge node with its left or right sibling, and failing that,
-  // rebalance with its left or right sibling. Returns true if a merge
-  // occurred, at which point it is no longer valid to access node. Returns
-  // false if no merging took place.
-  bool try_merge_or_rebalance(iterator *iter);
-
-  // Tries to shrink the height of the tree by 1.
-  void try_shrink();
-
-  iterator internal_end(iterator iter) {
-    return iter.node ? iter : end();
-  }
-  const_iterator internal_end(const_iterator iter) const {
-    return iter.node ? iter : end();
-  }
-
-  // Emplaces a value into the btree immediately before iter. Requires that
-  // key(v) <= iter.key() and (--iter).key() <= key(v).
-  template <typename... Args>
-  iterator internal_emplace(iterator iter, Args &&... args);
-
-  // Returns an iterator pointing to the first value >= the value "iter" is
-  // pointing at. Note that "iter" might be pointing to an invalid location as
-  // iter.position == iter.node->count(). This routine simply moves iter up in
-  // the tree to a valid location.
-  template <typename IterType>
-  static IterType internal_last(IterType iter);
-
-  // Returns an iterator pointing to the leaf position at which key would
-  // reside in the tree. We provide 2 versions of internal_locate. The first
-  // version (internal_locate_plain_compare) always returns 0 for the second
-  // field of the pair. The second version (internal_locate_compare_to) is for
-  // the key-compare-to specialization and returns either kExactMatch (if the
-  // key was found in the tree) or -kExactMatch (if it wasn't) in the second
-  // field of the pair. The compare_to specialization allows the caller to
-  // avoid a subsequent comparison to determine if an exact match was made,
-  // speeding up string, cord and string_view keys.
-  template <typename K, typename IterType>
-  std::pair<IterType, int> internal_locate(
-      const K &key, IterType iter) const;
-  template <typename K, typename IterType>
-  std::pair<IterType, int> internal_locate_plain_compare(
-      const K &key, IterType iter) const;
-  template <typename K, typename IterType>
-  std::pair<IterType, int> internal_locate_compare_to(
-      const K &key, IterType iter) const;
-
-  // Internal routine which implements lower_bound().
-  template <typename K, typename IterType>
-  IterType internal_lower_bound(
-      const K &key, IterType iter) const;
-
-  // Internal routine which implements upper_bound().
-  template <typename K, typename IterType>
-  IterType internal_upper_bound(
-      const K &key, IterType iter) const;
-
-  // Internal routine which implements find_unique().
-  template <typename K, typename IterType>
-  IterType internal_find_unique(
-      const K &key, IterType iter) const;
-
-  // Internal routine which implements find_multi().
-  template <typename K, typename IterType>
-  IterType internal_find_multi(
-      const K &key, IterType iter) const;
-
-  // Deletes a node and all of its children.
-  void internal_clear(node_type *node);
-
-  // Verifies the tree structure of node.
-  int internal_verify(const node_type *node,
-                      const key_type *lo, const key_type *hi) const;
-
-  node_stats internal_stats(const node_type *node) const {
-    if (!node) {
-      return node_stats(0, 0);
-    }
-    if (node->leaf()) {
-      return node_stats(1, 0);
-    }
-    node_stats res(0, 1);
-    for (int i = 0; i <= node->count(); ++i) {
-      res += internal_stats(node->child(i));
-    }
-    return res;
-  }
-
- public:
-  // Exposed only for tests.
-  static bool testonly_uses_linear_node_search() {
-    return node_type::testonly_uses_linear_node_search();
-  }
-
- private:
-  // We use compressed tuple in order to save space because key_compare and
-  // allocator_type are usually empty.
-  absl::container_internal::CompressedTuple<key_compare, allocator_type,
-                                            node_type *>
-      root_;
-
-  // A pointer to the rightmost node. Note that the leftmost node is stored as
-  // the root's parent.
-  node_type *rightmost_;
-
-  // Number of values.
-  size_type size_;
-
-  // Verify that key_compare returns an int or bool, as appropriate
-  // depending on the value of is_key_compare_to.
-  static_assert(std::is_same<absl::result_of_t<key_compare(key_type, key_type)>,
-                             absl::conditional_t<is_key_compare_to::value, int,
-                                                 bool>>::value,
-                "key comparison function must return bool");
-
-  // Note: We assert that kTargetValues, which is computed from
-  // Params::kTargetNodeSize, must fit the node_type::field_type.
-  static_assert(kNodeValues <
-                    (1 << (8 * sizeof(typename node_type::field_type))),
-                "target node size too large");
-
-  // Test the assumption made in setting kNodeValueSpace.
-  static_assert(node_type::MinimumOverhead() >= sizeof(void *) + 4,
-                "node space assumption incorrect");
-};
-
-////
-// btree_node methods
-template <typename P>
-template <typename... Args>
-inline void btree_node<P>::emplace_value(const size_type i,
-                                         allocator_type *alloc,
-                                         Args &&... args) {
-  assert(i <= count());
-  // Shift old values to create space for new value and then construct it in
-  // place.
-  if (i < count()) {
-    value_init(count(), alloc, slot(count() - 1));
-    for (size_type j = count() - 1; j > i; --j)
-      slot_type::move(alloc, slot(j - 1), slot(j));
-    value_destroy(i, alloc);
-  }
-  value_init(i, alloc, std::forward<Args>(args)...);
-  set_count(count() + 1);
-
-  if (!leaf() && count() > i + 1) {
-    for (int j = count(); j > i + 1; --j) {
-      set_child(j, child(j - 1));
-    }
-    clear_child(i + 1);
-  }
-}
-
-template <typename P>
-inline void btree_node<P>::remove_value(const int i, allocator_type *alloc) {
-  if (!leaf() && count() > i + 1) {
-    assert(child(i + 1)->count() == 0);
-    for (size_type j = i + 1; j < count(); ++j) {
-      set_child(j, child(j + 1));
-    }
-    clear_child(count());
-  }
-
-  slot_type::move(alloc, slot(i + 1), slot(count()), slot(i));
-  value_destroy(count() - 1, alloc);
-  set_count(count() - 1);
-}
-
-template <typename P>
-void btree_node<P>::rebalance_right_to_left(const int to_move,
-                                            btree_node *right,
-                                            allocator_type *alloc) {
-  assert(parent() == right->parent());
-  assert(position() + 1 == right->position());
-  assert(right->count() >= count());
-  assert(to_move >= 1);
-  assert(to_move <= right->count());
-
-  // 1) Move the delimiting value in the parent to the left node.
-  value_init(count(), alloc, parent()->slot(position()));
-
-  // 2) Move the (to_move - 1) values from the right node to the left node.
-  right->uninitialized_move_n(to_move - 1, 0, count() + 1, this, alloc);
-
-  // 3) Move the new delimiting value to the parent from the right node.
-  slot_type::move(alloc, right->slot(to_move - 1), parent()->slot(position()));
-
-  // 4) Shift the values in the right node to their correct position.
-  slot_type::move(alloc, right->slot(to_move), right->slot(right->count()),
-                  right->slot(0));
-
-  // 5) Destroy the now-empty to_move entries in the right node.
-  right->value_destroy_n(right->count() - to_move, to_move, alloc);
-
-  if (!leaf()) {
-    // Move the child pointers from the right to the left node.
-    for (int i = 0; i < to_move; ++i) {
-      init_child(count() + i + 1, right->child(i));
-    }
-    for (int i = 0; i <= right->count() - to_move; ++i) {
-      assert(i + to_move <= right->max_count());
-      right->init_child(i, right->child(i + to_move));
-      right->clear_child(i + to_move);
-    }
-  }
-
-  // Fixup the counts on the left and right nodes.
-  set_count(count() + to_move);
-  right->set_count(right->count() - to_move);
-}
-
-template <typename P>
-void btree_node<P>::rebalance_left_to_right(const int to_move,
-                                            btree_node *right,
-                                            allocator_type *alloc) {
-  assert(parent() == right->parent());
-  assert(position() + 1 == right->position());
-  assert(count() >= right->count());
-  assert(to_move >= 1);
-  assert(to_move <= count());
-
-  // Values in the right node are shifted to the right to make room for the
-  // new to_move values. Then, the delimiting value in the parent and the
-  // other (to_move - 1) values in the left node are moved into the right node.
-  // Lastly, a new delimiting value is moved from the left node into the
-  // parent, and the remaining empty left node entries are destroyed.
-
-  if (right->count() >= to_move) {
-    // The original location of the right->count() values are sufficient to hold
-    // the new to_move entries from the parent and left node.
-
-    // 1) Shift existing values in the right node to their correct positions.
-    right->uninitialized_move_n(to_move, right->count() - to_move,
-                                right->count(), right, alloc);
-    for (slot_type *src = right->slot(right->count() - to_move - 1),
-                   *dest = right->slot(right->count() - 1),
-                   *end = right->slot(0);
-         src >= end; --src, --dest) {
-      slot_type::move(alloc, src, dest);
-    }
-
-    // 2) Move the delimiting value in the parent to the right node.
-    slot_type::move(alloc, parent()->slot(position()),
-                    right->slot(to_move - 1));
-
-    // 3) Move the (to_move - 1) values from the left node to the right node.
-    slot_type::move(alloc, slot(count() - (to_move - 1)), slot(count()),
-                    right->slot(0));
-  } else {
-    // The right node does not have enough initialized space to hold the new
-    // to_move entries, so part of them will move to uninitialized space.
-
-    // 1) Shift existing values in the right node to their correct positions.
-    right->uninitialized_move_n(right->count(), 0, to_move, right, alloc);
-
-    // 2) Move the delimiting value in the parent to the right node.
-    right->value_init(to_move - 1, alloc, parent()->slot(position()));
-
-    // 3) Move the (to_move - 1) values from the left node to the right node.
-    const size_type uninitialized_remaining = to_move - right->count() - 1;
-    uninitialized_move_n(uninitialized_remaining,
-                         count() - uninitialized_remaining, right->count(),
-                         right, alloc);
-    slot_type::move(alloc, slot(count() - (to_move - 1)),
-                    slot(count() - uninitialized_remaining), right->slot(0));
-  }
-
-  // 4) Move the new delimiting value to the parent from the left node.
-  slot_type::move(alloc, slot(count() - to_move), parent()->slot(position()));
-
-  // 5) Destroy the now-empty to_move entries in the left node.
-  value_destroy_n(count() - to_move, to_move, alloc);
-
-  if (!leaf()) {
-    // Move the child pointers from the left to the right node.
-    for (int i = right->count(); i >= 0; --i) {
-      right->init_child(i + to_move, right->child(i));
-      right->clear_child(i);
-    }
-    for (int i = 1; i <= to_move; ++i) {
-      right->init_child(i - 1, child(count() - to_move + i));
-      clear_child(count() - to_move + i);
-    }
-  }
-
-  // Fixup the counts on the left and right nodes.
-  set_count(count() - to_move);
-  right->set_count(right->count() + to_move);
-}
-
-template <typename P>
-void btree_node<P>::split(const int insert_position, btree_node *dest,
-                          allocator_type *alloc) {
-  assert(dest->count() == 0);
-  assert(max_count() == kNodeValues);
-
-  // We bias the split based on the position being inserted. If we're
-  // inserting at the beginning of the left node then bias the split to put
-  // more values on the right node. If we're inserting at the end of the
-  // right node then bias the split to put more values on the left node.
-  if (insert_position == 0) {
-    dest->set_count(count() - 1);
-  } else if (insert_position == kNodeValues) {
-    dest->set_count(0);
-  } else {
-    dest->set_count(count() / 2);
-  }
-  set_count(count() - dest->count());
-  assert(count() >= 1);
-
-  // Move values from the left sibling to the right sibling.
-  uninitialized_move_n(dest->count(), count(), 0, dest, alloc);
-
-  // Destroy the now-empty entries in the left node.
-  value_destroy_n(count(), dest->count(), alloc);
-
-  // The split key is the largest value in the left sibling.
-  set_count(count() - 1);
-  parent()->emplace_value(position(), alloc, slot(count()));
-  value_destroy(count(), alloc);
-  parent()->init_child(position() + 1, dest);
-
-  if (!leaf()) {
-    for (int i = 0; i <= dest->count(); ++i) {
-      assert(child(count() + i + 1) != nullptr);
-      dest->init_child(i, child(count() + i + 1));
-      clear_child(count() + i + 1);
-    }
-  }
-}
-
-template <typename P>
-void btree_node<P>::merge(btree_node *src, allocator_type *alloc) {
-  assert(parent() == src->parent());
-  assert(position() + 1 == src->position());
-
-  // Move the delimiting value to the left node.
-  value_init(count(), alloc, parent()->slot(position()));
-
-  // Move the values from the right to the left node.
-  src->uninitialized_move_n(src->count(), 0, count() + 1, this, alloc);
-
-  // Destroy the now-empty entries in the right node.
-  src->value_destroy_n(0, src->count(), alloc);
-
-  if (!leaf()) {
-    // Move the child pointers from the right to the left node.
-    for (int i = 0; i <= src->count(); ++i) {
-      init_child(count() + i + 1, src->child(i));
-      src->clear_child(i);
-    }
-  }
-
-  // Fixup the counts on the src and dest nodes.
-  set_count(1 + count() + src->count());
-  src->set_count(0);
-
-  // Remove the value on the parent node.
-  parent()->remove_value(position(), alloc);
-}
-
-template <typename P>
-void btree_node<P>::swap(btree_node *x, allocator_type *alloc) {
-  using std::swap;
-  assert(leaf() == x->leaf());
-
-  // Determine which is the smaller/larger node.
-  btree_node *smaller = this, *larger = x;
-  if (smaller->count() > larger->count()) {
-    swap(smaller, larger);
-  }
-
-  // Swap the values.
-  for (slot_type *a = smaller->slot(0), *b = larger->slot(0),
-                 *end = a + smaller->count();
-       a != end; ++a, ++b) {
-    slot_type::swap(alloc, a, b);
-  }
-
-  // Move values that can't be swapped.
-  const size_type to_move = larger->count() - smaller->count();
-  larger->uninitialized_move_n(to_move, smaller->count(), smaller->count(),
-                               smaller, alloc);
-  larger->value_destroy_n(smaller->count(), to_move, alloc);
-
-  if (!leaf()) {
-    // Swap the child pointers.
-    std::swap_ranges(&smaller->mutable_child(0),
-                     &smaller->mutable_child(smaller->count() + 1),
-                     &larger->mutable_child(0));
-    // Update swapped children's parent pointers.
-    int i = 0;
-    for (; i <= smaller->count(); ++i) {
-      smaller->child(i)->set_parent(smaller);
-      larger->child(i)->set_parent(larger);
-    }
-    // Move the child pointers that couldn't be swapped.
-    for (; i <= larger->count(); ++i) {
-      smaller->init_child(i, larger->child(i));
-      larger->clear_child(i);
-    }
-  }
-
-  // Swap the counts.
-  swap(mutable_count(), x->mutable_count());
-}
-
-////
-// btree_iterator methods
-template <typename N, typename R, typename P>
-void btree_iterator<N, R, P>::increment_slow() {
-  if (node->leaf()) {
-    assert(position >= node->count());
-    btree_iterator save(*this);
-    while (position == node->count() && !node->is_root()) {
-      assert(node->parent()->child(node->position()) == node);
-      position = node->position();
-      node = node->parent();
-    }
-    if (position == node->count()) {
-      *this = save;
-    }
-  } else {
-    assert(position < node->count());
-    node = node->child(position + 1);
-    while (!node->leaf()) {
-      node = node->child(0);
-    }
-    position = 0;
-  }
-}
-
-template <typename N, typename R, typename P>
-void btree_iterator<N, R, P>::decrement_slow() {
-  if (node->leaf()) {
-    assert(position <= -1);
-    btree_iterator save(*this);
-    while (position < 0 && !node->is_root()) {
-      assert(node->parent()->child(node->position()) == node);
-      position = node->position() - 1;
-      node = node->parent();
-    }
-    if (position < 0) {
-      *this = save;
-    }
-  } else {
-    assert(position >= 0);
-    node = node->child(position);
-    while (!node->leaf()) {
-      node = node->child(node->count());
-    }
-    position = node->count() - 1;
-  }
-}
-
-////
-// btree methods
-template <typename P>
-void btree<P>::copy_values_in_order(const btree &x) {
-  assert(empty());
-
-  // We can avoid key comparisons because we know the order of the
-  // values is the same order we'll store them in.
-  const_iterator iter = x.begin();
-  if (iter == x.end()) return;
-  insert_multi(*iter);
-  ++iter;
-  for (; iter != x.end(); ++iter) {
-    // If the btree is not empty, we can just insert the new value at the end
-    // of the tree!
-    internal_emplace(end(), *iter);
-  }
-}
-
-template <typename P>
-btree<P>::btree(const key_compare &comp, const allocator_type &alloc)
-    : root_(comp, alloc, nullptr), rightmost_(nullptr), size_(0) {}
-
-template <typename P>
-btree<P>::btree(const btree &x) : btree(x.key_comp(), x.allocator()) {
-  copy_values_in_order(x);
-}
-
-template <typename P>
-template <typename... Args>
-auto btree<P>::insert_unique(const key_type &key, Args &&... args)
-    -> std::pair<iterator, bool> {
-  if (empty()) {
-    mutable_root() = rightmost_ = new_leaf_root_node(1);
-  }
-
-  std::pair<iterator, int> res = internal_locate(key, iterator(root(), 0));
-  iterator &iter = res.first;
-  if (res.second == kExactMatch) {
-    // The key already exists in the tree, do nothing.
-    return std::make_pair(internal_last(iter), false);
-  } else if (!res.second) {
-    iterator last = internal_last(iter);
-    if (last.node && !compare_keys(key, last.key())) {
-      // The key already exists in the tree, do nothing.
-      return std::make_pair(last, false);
-    }
-  }
-
-  return std::make_pair(internal_emplace(iter, std::forward<Args>(args)...),
-                        true);
-}
-
-template <typename P>
-template <typename... Args>
-inline auto btree<P>::insert_hint_unique(iterator position, const key_type &key,
-                                         Args &&... args) -> iterator {
-  if (!empty()) {
-    if (position == end() || compare_keys(key, position.key())) {
-      iterator prev = position;
-      if (position == begin() || compare_keys((--prev).key(), key)) {
-        // prev.key() < key < position.key()
-        return internal_emplace(position, std::forward<Args>(args)...);
-      }
-    } else if (compare_keys(position.key(), key)) {
-      iterator next = position;
-      ++next;
-      if (next == end() || compare_keys(key, next.key())) {
-        // position.key() < key < next.key()
-        return internal_emplace(next, std::forward<Args>(args)...);
-      }
-    } else {
-      // position.key() == key
-      return position;
-    }
-  }
-  return insert_unique(key, std::forward<Args>(args)...).first;
-}
-
-template <typename P>
-template <typename InputIterator>
-void btree<P>::insert_iterator_unique(InputIterator b, InputIterator e) {
-  for (; b != e; ++b) {
-    insert_hint_unique(end(), params_type::key(*b), *b);
-  }
-}
-
-template <typename P>
-template <typename ValueType>
-auto btree<P>::insert_multi(const key_type &key, ValueType &&v) -> iterator {
-  if (empty()) {
-    mutable_root() = rightmost_ = new_leaf_root_node(1);
-  }
-
-  iterator iter = internal_upper_bound(key, iterator(root(), 0));
-  if (!iter.node) {
-    iter = end();
-  }
-  return internal_emplace(iter, std::forward<ValueType>(v));
-}
-
-template <typename P>
-template <typename ValueType>
-auto btree<P>::insert_hint_multi(iterator position, ValueType &&v) -> iterator {
-  if (!empty()) {
-    const key_type &key = params_type::key(v);
-    if (position == end() || !compare_keys(position.key(), key)) {
-      iterator prev = position;
-      if (position == begin() || !compare_keys(key, (--prev).key())) {
-        // prev.key() <= key <= position.key()
-        return internal_emplace(position, std::forward<ValueType>(v));
-      }
-    } else {
-      iterator next = position;
-      ++next;
-      if (next == end() || !compare_keys(next.key(), key)) {
-        // position.key() < key <= next.key()
-        return internal_emplace(next, std::forward<ValueType>(v));
-      }
-    }
-  }
-  return insert_multi(std::forward<ValueType>(v));
-}
-
-template <typename P>
-template <typename InputIterator>
-void btree<P>::insert_iterator_multi(InputIterator b, InputIterator e) {
-  for (; b != e; ++b) {
-    insert_hint_multi(end(), *b);
-  }
-}
-
-template <typename P>
-auto btree<P>::operator=(const btree &x) -> btree & {
-  if (this != &x) {
-    clear();
-
-    *mutable_key_comp() = x.key_comp();
-    *mutable_allocator() = x.allocator();
-
-    copy_values_in_order(x);
-  }
-  return *this;
-}
-
-template <typename P>
-auto btree<P>::erase(iterator iter) -> iterator {
-  bool internal_delete = false;
-  if (!iter.node->leaf()) {
-    // Deletion of a value on an internal node. First, move the largest value
-    // from our left child here, then delete that position (in remove_value()
-    // below). We can get to the largest value from our left child by
-    // decrementing iter.
-    iterator internal_iter(iter);
-    --iter;
-    assert(iter.node->leaf());
-    assert(!compare_keys(internal_iter.key(), iter.key()));
-    slot_type::move(mutable_allocator(), iter.node->slot(iter.position),
-                    internal_iter.node->slot(internal_iter.position));
-    internal_delete = true;
-  }
-
-  // Delete the key from the leaf.
-  iter.node->remove_value(iter.position, mutable_allocator());
-  --size_;
-
-  // We want to return the next value after the one we just erased. If we
-  // erased from an internal node (internal_delete == true), then the next
-  // value is ++(++iter). If we erased from a leaf node (internal_delete ==
-  // false) then the next value is ++iter. Note that ++iter may point to an
-  // internal node and the value in the internal node may move to a leaf node
-  // (iter.node) when rebalancing is performed at the leaf level.
-
-  // Merge/rebalance as we walk back up the tree.
-  iterator res(iter);
-  for (;;) {
-    if (iter.node == root()) {
-      try_shrink();
-      if (empty()) {
-        return end();
-      }
-      break;
-    }
-    if (iter.node->count() >= kMinNodeValues) {
-      break;
-    }
-    bool merged = try_merge_or_rebalance(&iter);
-    if (iter.node->leaf()) {
-      res = iter;
-    }
-    if (!merged) {
-      break;
-    }
-    iter.node = iter.node->parent();
-  }
-
-  // Adjust our return value. If we're pointing at the end of a node, advance
-  // the iterator.
-  if (res.position == res.node->count()) {
-    res.position = res.node->count() - 1;
-    ++res;
-  }
-  // If we erased from an internal node, advance the iterator.
-  if (internal_delete) {
-    ++res;
-  }
-  return res;
-}
-
-template <typename P>
-int btree<P>::erase(iterator begin, iterator end) {
-  int count = std::distance(begin, end);
-  for (int i = 0; i < count; i++) {
-    begin = erase(begin);
-  }
-  return count;
-}
-
-template <typename P> template <typename K>
-int btree<P>::erase_unique(const K &key) {
-  iterator iter = internal_find_unique(key, iterator(root(), 0));
-  if (!iter.node) {
-    // The key doesn't exist in the tree, return nothing done.
-    return 0;
-  }
-  erase(iter);
-  return 1;
-}
-
-template <typename P> template <typename K>
-int btree<P>::erase_multi(const K &key) {
-  iterator begin = internal_lower_bound(key, iterator(root(), 0));
-  if (!begin.node) {
-    // The key doesn't exist in the tree, return nothing done.
-    return 0;
-  }
-  // Delete all of the keys between begin and upper_bound(key).
-  iterator end = internal_end(
-      internal_upper_bound(key, iterator(root(), 0)));
-  return erase(begin, end);
-}
-
-template <typename P>
-void btree<P>::clear() {
-  if (root() != nullptr) {
-    internal_clear(root());
-  }
-  mutable_root() = nullptr;
-  rightmost_ = nullptr;
-  size_ = 0;
-}
-
-template <typename P>
-void btree<P>::swap(btree &x) {
-  using std::swap;
-  swap(root_, x.root_);
-  swap(rightmost_, x.rightmost_);
-  swap(size_, x.size_);
-}
-
-template <typename P>
-void btree<P>::verify() const {
-  if (root() != nullptr) {
-    assert(size() == internal_verify(root(), nullptr, nullptr));
-    assert(leftmost() == (++const_iterator(root(), -1)).node);
-    assert(rightmost_ == (--const_iterator(root(), root()->count())).node);
-    assert(leftmost()->leaf());
-    assert(rightmost_->leaf());
-  } else {
-    assert(empty());
-    assert(leftmost() == nullptr);
-    assert(rightmost_ == nullptr);
-  }
-}
-
-template <typename P>
-void btree<P>::rebalance_or_split(iterator *iter) {
-  node_type *&node = iter->node;
-  int &insert_position = iter->position;
-  assert(node->count() == node->max_count());
-  assert(kNodeValues == node->max_count());
-
-  // First try to make room on the node by rebalancing.
-  node_type *parent = node->parent();
-  if (node != root()) {
-    if (node->position() > 0) {
-      // Try rebalancing with our left sibling.
-      node_type *left = parent->child(node->position() - 1);
-      assert(left->max_count() == kNodeValues);
-      if (left->count() < kNodeValues) {
-        // We bias rebalancing based on the position being inserted. If we're
-        // inserting at the end of the right node then we bias rebalancing to
-        // fill up the left node.
-        int to_move = (kNodeValues - left->count()) /
-                      (1 + (insert_position < kNodeValues));
-        to_move = std::max(1, to_move);
-
-        if (((insert_position - to_move) >= 0) ||
-            ((left->count() + to_move) < kNodeValues)) {
-          left->rebalance_right_to_left(to_move, node, mutable_allocator());
-
-          assert(node->max_count() - node->count() == to_move);
-          insert_position = insert_position - to_move;
-          if (insert_position < 0) {
-            insert_position = insert_position + left->count() + 1;
-            node = left;
-          }
-
-          assert(node->count() < node->max_count());
-          return;
-        }
-      }
-    }
-
-    if (node->position() < parent->count()) {
-      // Try rebalancing with our right sibling.
-      node_type *right = parent->child(node->position() + 1);
-      assert(right->max_count() == kNodeValues);
-      if (right->count() < kNodeValues) {
-        // We bias rebalancing based on the position being inserted. If we're
-        // inserting at the beginning of the left node then we bias rebalancing
-        // to fill up the right node.
-        int to_move =
-            (kNodeValues - right->count()) / (1 + (insert_position > 0));
-        to_move = std::max(1, to_move);
-
-        if ((insert_position <= (node->count() - to_move)) ||
-            ((right->count() + to_move) < kNodeValues)) {
-          node->rebalance_left_to_right(to_move, right, mutable_allocator());
-
-          if (insert_position > node->count()) {
-            insert_position = insert_position - node->count() - 1;
-            node = right;
-          }
-
-          assert(node->count() < node->max_count());
-          return;
-        }
-      }
-    }
-
-    // Rebalancing failed, make sure there is room on the parent node for a new
-    // value.
-    assert(parent->max_count() == kNodeValues);
-    if (parent->count() == kNodeValues) {
-      iterator parent_iter(node->parent(), node->position());
-      rebalance_or_split(&parent_iter);
-    }
-  } else {
-    // Rebalancing not possible because this is the root node.
-    // Create a new root node and set the current root node as the child of the
-    // new root.
-    parent = new_internal_node(parent);
-    parent->init_child(0, root());
-    mutable_root() = parent;
-    // If the former root was a leaf node, then it's now the rightmost node.
-    assert(!parent->child(0)->leaf() || parent->child(0) == rightmost_);
-  }
-
-  // Split the node.
-  node_type *split_node;
-  if (node->leaf()) {
-    split_node = new_leaf_node(parent);
-    node->split(insert_position, split_node, mutable_allocator());
-    if (rightmost_ == node) rightmost_ = split_node;
-  } else {
-    split_node = new_internal_node(parent);
-    node->split(insert_position, split_node, mutable_allocator());
-  }
-
-  if (insert_position > node->count()) {
-    insert_position = insert_position - node->count() - 1;
-    node = split_node;
-  }
-}
-
-template <typename P>
-void btree<P>::merge_nodes(node_type *left, node_type *right) {
-  left->merge(right, mutable_allocator());
-  if (right->leaf()) {
-    if (rightmost_ == right) rightmost_ = left;
-    delete_leaf_node(right);
-  } else {
-    delete_internal_node(right);
-  }
-}
-
-template <typename P>
-bool btree<P>::try_merge_or_rebalance(iterator *iter) {
-  node_type *parent = iter->node->parent();
-  if (iter->node->position() > 0) {
-    // Try merging with our left sibling.
-    node_type *left = parent->child(iter->node->position() - 1);
-    assert(left->max_count() == kNodeValues);
-    if ((1 + left->count() + iter->node->count()) <= kNodeValues) {
-      iter->position += 1 + left->count();
-      merge_nodes(left, iter->node);
-      iter->node = left;
-      return true;
-    }
-  }
-  if (iter->node->position() < parent->count()) {
-    // Try merging with our right sibling.
-    node_type *right = parent->child(iter->node->position() + 1);
-    assert(right->max_count() == kNodeValues);
-    if ((1 + iter->node->count() + right->count()) <= kNodeValues) {
-      merge_nodes(iter->node, right);
-      return true;
-    }
-    // Try rebalancing with our right sibling. We don't perform rebalancing if
-    // we deleted the first element from iter->node and the node is not
-    // empty. This is a small optimization for the common pattern of deleting
-    // from the front of the tree.
-    if ((right->count() > kMinNodeValues) &&
-        ((iter->node->count() == 0) ||
-         (iter->position > 0))) {
-      int to_move = (right->count() - iter->node->count()) / 2;
-      to_move = std::min(to_move, right->count() - 1);
-      iter->node->rebalance_right_to_left(to_move, right, mutable_allocator());
-      return false;
-    }
-  }
-  if (iter->node->position() > 0) {
-    // Try rebalancing with our left sibling. We don't perform rebalancing if
-    // we deleted the last element from iter->node and the node is not
-    // empty. This is a small optimization for the common pattern of deleting
-    // from the back of the tree.
-    node_type *left = parent->child(iter->node->position() - 1);
-    if ((left->count() > kMinNodeValues) &&
-        ((iter->node->count() == 0) ||
-         (iter->position < iter->node->count()))) {
-      int to_move = (left->count() - iter->node->count()) / 2;
-      to_move = std::min(to_move, left->count() - 1);
-      left->rebalance_left_to_right(to_move, iter->node, mutable_allocator());
-      iter->position += to_move;
-      return false;
-    }
-  }
-  return false;
-}
-
-template <typename P>
-void btree<P>::try_shrink() {
-  if (root()->count() > 0) {
-    return;
-  }
-  // Deleted the last item on the root node, shrink the height of the tree.
-  if (root()->leaf()) {
-    assert(size() == 0);
-    delete_leaf_node(root());
-    mutable_root() = nullptr;
-    rightmost_ = nullptr;
-  } else {
-    node_type *child = root()->child(0);
-    child->make_root();
-    delete_internal_node(root());
-    mutable_root() = child;
-  }
-}
-
-template <typename P> template <typename IterType>
-inline IterType btree<P>::internal_last(IterType iter) {
-  while (iter.node && iter.position == iter.node->count()) {
-    iter.position = iter.node->position();
-    iter.node = iter.node->parent();
-    if (iter.node->leaf()) {
-      iter.node = nullptr;
-    }
-  }
-  return iter;
-}
-
-template <typename P>
-template <typename... Args>
-inline auto btree<P>::internal_emplace(iterator iter, Args &&... args)
-    -> iterator {
-  if (!iter.node->leaf()) {
-    // We can't insert on an internal node. Instead, we'll insert after the
-    // previous value which is guaranteed to be on a leaf node.
-    --iter;
-    ++iter.position;
-  }
-  const int max_count = iter.node->max_count();
-  if (iter.node->count() == max_count) {
-    // Make room in the leaf for the new item.
-    if (max_count < kNodeValues) {
-      // Insertion into the root where the root is smaller than the full node
-      // size. Simply grow the size of the root node.
-      assert(iter.node == root());
-      iter.node = new_leaf_root_node(std::min<int>(kNodeValues, 2 * max_count));
-      iter.node->swap(root(), mutable_allocator());
-      delete_leaf_node(root());
-      mutable_root() = iter.node;
-      rightmost_ = iter.node;
-    } else {
-      rebalance_or_split(&iter);
-    }
-  }
-  iter.node->emplace_value(iter.position, mutable_allocator(),
-                           std::forward<Args>(args)...);
-  ++size_;
-  return iter;
-}
-
-template <typename P> template <typename K, typename IterType>
-inline std::pair<IterType, int> btree<P>::internal_locate(
-    const K &key, IterType iter) const {
-  return is_key_compare_to::value ? internal_locate_compare_to(key, iter)
-                                  : internal_locate_plain_compare(key, iter);
-}
-
-template <typename P> template <typename K, typename IterType>
-inline std::pair<IterType, int> btree<P>::internal_locate_plain_compare(
-    const K &key, IterType iter) const {
-  for (;;) {
-    iter.position = iter.node->lower_bound(key, key_comp());
-    if (iter.node->leaf()) {
-      break;
-    }
-    iter.node = iter.node->child(iter.position);
-  }
-  return std::make_pair(iter, 0);
-}
-
-template <typename P> template <typename K, typename IterType>
-inline std::pair<IterType, int> btree<P>::internal_locate_compare_to(
-    const K &key, IterType iter) const {
-  for (;;) {
-    int res = iter.node->lower_bound(key, key_comp());
-    iter.position = res & kMatchMask;
-    if (res & kExactMatch) {
-      return std::make_pair(iter, static_cast<int>(kExactMatch));
-    }
-    if (iter.node->leaf()) {
-      break;
-    }
-    iter.node = iter.node->child(iter.position);
-  }
-  return std::make_pair(iter, -kExactMatch);
-}
-
-template <typename P> template <typename K, typename IterType>
-IterType btree<P>::internal_lower_bound(
-    const K &key, IterType iter) const {
-  const_lookup_key_reference<K> lookup_key(key);
-  if (iter.node) {
-    for (;;) {
-      iter.position =
-          iter.node->lower_bound(lookup_key, key_comp()) & kMatchMask;
-      if (iter.node->leaf()) {
-        break;
-      }
-      iter.node = iter.node->child(iter.position);
-    }
-    iter = internal_last(iter);
-  }
-  return iter;
-}
-
-template <typename P> template <typename K, typename IterType>
-IterType btree<P>::internal_upper_bound(
-    const K &key, IterType iter) const {
-  const_lookup_key_reference<K> lookup_key(key);
-  if (iter.node) {
-    for (;;) {
-      iter.position = iter.node->upper_bound(lookup_key, key_comp());
-      if (iter.node->leaf()) {
-        break;
-      }
-      iter.node = iter.node->child(iter.position);
-    }
-    iter = internal_last(iter);
-  }
-  return iter;
-}
-
-template <typename P> template <typename K, typename IterType>
-IterType btree<P>::internal_find_unique(
-    const K &key, IterType iter) const {
-  const_lookup_key_reference<K> lookup_key(key);
-  if (iter.node) {
-    std::pair<IterType, int> res = internal_locate(lookup_key, iter);
-    if (res.second == kExactMatch) {
-      return res.first;
-    }
-    if (!res.second) {
-      iter = internal_last(res.first);
-      if (iter.node && !compare_keys(lookup_key, iter.key())) {
-        return iter;
-      }
-    }
-  }
-  return IterType(nullptr, 0);
-}
-
-template <typename P> template <typename K, typename IterType>
-IterType btree<P>::internal_find_multi(
-    const K &key, IterType iter) const {
-  const_lookup_key_reference<K> lookup_key(key);
-  if (iter.node) {
-    iter = internal_lower_bound(lookup_key, iter);
-    if (iter.node) {
-      iter = internal_last(iter);
-      if (iter.node && !compare_keys(lookup_key, iter.key())) {
-        return iter;
-      }
-    }
-  }
-  return IterType(nullptr, 0);
-}
-
-template <typename P>
-void btree<P>::internal_clear(node_type *node) {
-  if (!node->leaf()) {
-    for (int i = 0; i <= node->count(); ++i) {
-      internal_clear(node->child(i));
-    }
-    delete_internal_node(node);
-  } else {
-    delete_leaf_node(node);
-  }
-}
-
-template <typename P>
-int btree<P>::internal_verify(
-    const node_type *node, const key_type *lo, const key_type *hi) const {
-  assert(node->count() > 0);
-  assert(node->count() <= node->max_count());
-  if (lo) {
-    assert(!compare_keys(node->key(0), *lo));
-  }
-  if (hi) {
-    assert(!compare_keys(*hi, node->key(node->count() - 1)));
-  }
-  for (int i = 1; i < node->count(); ++i) {
-    assert(!compare_keys(node->key(i), node->key(i - 1)));
-  }
-  int count = node->count();
-  if (!node->leaf()) {
-    for (int i = 0; i <= node->count(); ++i) {
-      assert(node->child(i) != nullptr);
-      assert(node->child(i)->parent() == node);
-      assert(node->child(i)->position() == i);
-      count += internal_verify(
-          node->child(i),
-          (i == 0) ? lo : &node->key(i - 1),
-          (i == node->count()) ? hi : &node->key(i));
-    }
-  }
-  return count;
-}
-
-}  // namespace internal_btree
-}  // namespace gtl
-
-
-#endif  // S2_UTIL_GTL_BTREE_H_
diff --git a/src/s2/util/gtl/btree_container.h b/src/s2/util/gtl/btree_container.h
deleted file mode 100644
index 5a8b57f0..00000000
--- a/src/s2/util/gtl/btree_container.h
+++ /dev/null
@@ -1,411 +0,0 @@
-// Copyright 2007 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-//
-
-#ifndef S2_UTIL_GTL_BTREE_CONTAINER_H_
-#define S2_UTIL_GTL_BTREE_CONTAINER_H_
-
-#include <algorithm>
-#include <initializer_list>
-#include <utility>
-
-#include "absl/base/internal/throw_delegate.h"
-#include "s2/util/gtl/btree.h"  // IWYU pragma: export
-
-namespace gtl {
-namespace internal_btree {
-
-// A common base class for btree_set, btree_map, btree_multiset, and
-// btree_multimap.
-template <typename Tree>
-class btree_container {
- public:
-  using key_type = typename Tree::key_type;
-  using value_type = typename Tree::value_type;
-  using size_type = typename Tree::size_type;
-  using difference_type = typename Tree::difference_type;
-  using key_compare = typename Tree::key_compare;
-  using value_compare = typename Tree::value_compare;
-  using allocator_type = typename Tree::allocator_type;
-  using reference = typename Tree::reference;
-  using const_reference = typename Tree::const_reference;
-  using pointer = typename Tree::pointer;
-  using const_pointer = typename Tree::const_pointer;
-  using iterator = typename Tree::iterator;
-  using const_iterator = typename Tree::const_iterator;
-  using reverse_iterator = typename Tree::reverse_iterator;
-  using const_reverse_iterator = typename Tree::const_reverse_iterator;
-
-  // Constructors/assignments.
-  btree_container() : tree_(key_compare(), allocator_type()) {}
-  explicit btree_container(const key_compare &comp,
-                           const allocator_type &alloc = allocator_type())
-      : tree_(comp, alloc) {}
-  btree_container(const btree_container &x) = default;
-  btree_container(btree_container &&x) noexcept = default;
-  btree_container &operator=(const btree_container &x) = default;
-  btree_container &operator=(btree_container &&x) noexcept(
-      std::is_nothrow_move_assignable<Tree>::value) = default;
-
-  // Iterator routines.
-  iterator begin() { return tree_.begin(); }
-  const_iterator begin() const { return tree_.begin(); }
-  const_iterator cbegin() const { return tree_.begin(); }
-  iterator end() { return tree_.end(); }
-  const_iterator end() const { return tree_.end(); }
-  const_iterator cend() const { return tree_.end(); }
-  reverse_iterator rbegin() { return tree_.rbegin(); }
-  const_reverse_iterator rbegin() const { return tree_.rbegin(); }
-  const_reverse_iterator crbegin() const { return tree_.rbegin(); }
-  reverse_iterator rend() { return tree_.rend(); }
-  const_reverse_iterator rend() const { return tree_.rend(); }
-  const_reverse_iterator crend() const { return tree_.rend(); }
-
-  // Lookup routines.
-  template <typename K>
-  iterator lower_bound(const K &key) {
-    return tree_.lower_bound(key);
-  }
-  template <typename K>
-  const_iterator lower_bound(const K &key) const {
-    return tree_.lower_bound(key);
-  }
-  template <typename K>
-  iterator upper_bound(const K &key) {
-    return tree_.upper_bound(key);
-  }
-  template <typename K>
-  const_iterator upper_bound(const K &key) const {
-    return tree_.upper_bound(key);
-  }
-  template <typename K>
-  std::pair<iterator, iterator> equal_range(const K &key) {
-    return tree_.equal_range(key);
-  }
-  template <typename K>
-  std::pair<const_iterator, const_iterator> equal_range(const K &key) const {
-    return tree_.equal_range(key);
-  }
-
-  // Utility routines.
-  void clear() { tree_.clear(); }
-  void swap(btree_container &x) { tree_.swap(x.tree_); }
-  void verify() const { tree_.verify(); }
-
-  // Size routines.
-  size_type size() const { return tree_.size(); }
-  size_type max_size() const { return tree_.max_size(); }
-  bool empty() const { return tree_.empty(); }
-  size_type height() const { return tree_.height(); }
-  size_type internal_nodes() const { return tree_.internal_nodes(); }
-  size_type leaf_nodes() const { return tree_.leaf_nodes(); }
-  size_type nodes() const { return tree_.nodes(); }
-  size_type bytes_used() const { return tree_.bytes_used(); }
-  static double average_bytes_per_value() {
-    return Tree::average_bytes_per_value();
-  }
-  double fullness() const { return tree_.fullness(); }
-  double overhead() const { return tree_.overhead(); }
-
-  friend bool operator==(const btree_container &x, const btree_container &y) {
-    if (x.size() != y.size()) return false;
-    return std::equal(x.begin(), x.end(), y.begin());
-  }
-
-  friend bool operator!=(const btree_container &x, const btree_container &y) {
-    return !(x == y);
-  }
-
-  friend bool operator<(const btree_container &x, const btree_container &y) {
-    return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end());
-  }
-
-  friend bool operator>(const btree_container &x, const btree_container &y) {
-    return y < x;
-  }
-
-  friend bool operator<=(const btree_container &x, const btree_container &y) {
-    return !(y < x);
-  }
-
-  friend bool operator>=(const btree_container &x, const btree_container &y) {
-    return !(x < y);
-  }
-
-  // The allocator used by the btree.
-  allocator_type get_allocator() const { return tree_.get_allocator(); }
-
-  // The key comparator used by the btree.
-  key_compare key_comp() const { return tree_.key_comp(); }
-  value_compare value_comp() const { return tree_.value_comp(); }
-
-  // Support absl::Hash.
-  template <typename State>
-  friend State AbslHashValue(State h, const btree_container &b) {
-    for (const auto &v : b) {
-      h = State::combine(std::move(h), v);
-    }
-    return State::combine(std::move(h), b.size());
-  }
-
-  // Exposed only for tests.
-  static bool testonly_uses_linear_node_search() {
-    return Tree::testonly_uses_linear_node_search();
-  }
-
- protected:
-  Tree tree_;
-};
-
-// A common base class for btree_set and btree_map.
-template <typename Tree>
-class btree_set_container : public btree_container<Tree> {
-  using super_type = btree_container<Tree>;
-  using mutable_value_type = typename Tree::mutable_value_type;
-  using params_type = typename Tree::params_type;
-  friend class BtreeNodePeer;
-
- public:
-  using value_type = typename Tree::value_type;
-  using size_type = typename Tree::size_type;
-  using key_compare = typename Tree::key_compare;
-  using allocator_type = typename Tree::allocator_type;
-  using iterator = typename Tree::iterator;
-  using const_iterator = typename Tree::const_iterator;
-
-  // Inherit constructors.
-  using super_type::super_type;
-  btree_set_container() {}
-
-  // Range constructor.
-  template <class InputIterator>
-  btree_set_container(InputIterator b, InputIterator e,
-                      const key_compare &comp = key_compare(),
-                      const allocator_type &alloc = allocator_type())
-      : super_type(comp, alloc) {
-    insert(b, e);
-  }
-
-  // Initializer list constructor.
-  btree_set_container(std::initializer_list<value_type> init,
-                      const key_compare &comp = key_compare(),
-                      const allocator_type &alloc = allocator_type())
-      : btree_set_container(init.begin(), init.end(), comp, alloc) {}
-
-  // Lookup routines.
-  template <typename K>
-  iterator find(const K &key) {
-    return this->tree_.find_unique(key);
-  }
-  template <typename K>
-  const_iterator find(const K &key) const {
-    return this->tree_.find_unique(key);
-  }
-  template <typename K>
-  size_type count(const K &key) const {
-    return this->tree_.count_unique(key);
-  }
-
-  // Insertion routines.
-  std::pair<iterator, bool> insert(const value_type &x) {
-    return this->tree_.insert_unique(params_type::key(x), x);
-  }
-  std::pair<iterator, bool> insert(value_type &&x) {
-    return this->tree_.insert_unique(params_type::key(x), std::move(x));
-  }
-  template <typename... Args>
-  std::pair<iterator, bool> emplace(Args &&... args) {
-    mutable_value_type v(std::forward<Args>(args)...);
-    return this->tree_.insert_unique(params_type::key(v), std::move(v));
-  }
-  iterator insert(iterator position, const value_type &x) {
-    return this->tree_.insert_hint_unique(position, params_type::key(x), x);
-  }
-  iterator insert(iterator position, value_type &&x) {
-    return this->tree_.insert_hint_unique(position, params_type::key(x),
-                                          std::move(x));
-  }
-  template <typename... Args>
-  iterator emplace_hint(iterator position, Args &&... args) {
-    mutable_value_type v(std::forward<Args>(args)...);
-    return this->tree_.insert_hint_unique(position, params_type::key(v),
-                                          std::move(v));
-  }
-  template <typename InputIterator>
-  void insert(InputIterator b, InputIterator e) {
-    this->tree_.insert_iterator_unique(b, e);
-  }
-  void insert(std::initializer_list<value_type> init) {
-    this->tree_.insert_iterator_unique(init.begin(), init.end());
-  }
-
-  // Deletion routines.
-  template <typename K>
-  int erase(const K &key) {
-    return this->tree_.erase_unique(key);
-  }
-  // Erase the specified iterator from the btree. The iterator must be valid
-  // (i.e. not equal to end()).  Return an iterator pointing to the node after
-  // the one that was erased (or end() if none exists).
-  iterator erase(const iterator &iter) { return this->tree_.erase(iter); }
-  void erase(const iterator &first, const iterator &last) {
-    this->tree_.erase(first, last);
-  }
-};
-
-// Base class for btree_map.
-template <typename Tree>
-class btree_map_container : public btree_set_container<Tree> {
-  using super_type = btree_set_container<Tree>;
-
- public:
-  using key_type = typename Tree::key_type;
-  using mapped_type = typename Tree::mapped_type;
-  using value_type = typename Tree::value_type;
-  using key_compare = typename Tree::key_compare;
-  using allocator_type = typename Tree::allocator_type;
-
-  // Inherit constructors.
-  using super_type::super_type;
-  btree_map_container() {}
-
-  // Insertion routines.
-  mapped_type &operator[](const key_type &key) {
-    return this->tree_
-        .insert_unique(key, std::piecewise_construct,
-                       std::forward_as_tuple(key), std::forward_as_tuple())
-        .first->second;
-  }
-  mapped_type &operator[](key_type &&key) {
-    return this->tree_
-        .insert_unique(key, std::piecewise_construct,
-                       std::forward_as_tuple(std::move(key)),
-                       std::forward_as_tuple())
-        .first->second;
-  }
-
-  mapped_type &at(const key_type &key) {
-    auto it = this->find(key);
-    if (it == this->end())
-      absl::base_internal::ThrowStdOutOfRange("btree_map::at");
-    return it->second;
-  }
-  const mapped_type &at(const key_type &key) const {
-    auto it = this->find(key);
-    if (it == this->end())
-      absl::base_internal::ThrowStdOutOfRange("btree_map::at");
-    return it->second;
-  }
-};
-
-// A common base class for btree_multiset and btree_multimap.
-template <typename Tree>
-class btree_multiset_container : public btree_container<Tree> {
-  using super_type = btree_container<Tree>;
-
- public:
-  using key_type = typename Tree::key_type;
-  using value_type = typename Tree::value_type;
-  using mapped_type = typename Tree::mapped_type;
-  using size_type = typename Tree::size_type;
-  using key_compare = typename Tree::key_compare;
-  using allocator_type = typename Tree::allocator_type;
-  using iterator = typename Tree::iterator;
-  using const_iterator = typename Tree::const_iterator;
-
-  // Inherit constructors.
-  using super_type::super_type;
-  btree_multiset_container() {}
-
-  // Range constructor.
-  template <class InputIterator>
-  btree_multiset_container(InputIterator b, InputIterator e,
-                           const key_compare &comp = key_compare(),
-                           const allocator_type &alloc = allocator_type())
-      : super_type(comp, alloc) {
-    insert(b, e);
-  }
-
-  // Initializer list constructor.
-  btree_multiset_container(std::initializer_list<value_type> init,
-                           const key_compare &comp = key_compare(),
-                           const allocator_type &alloc = allocator_type())
-      : btree_multiset_container(init.begin(), init.end(), comp, alloc) {}
-
-  // Lookup routines.
-  template <typename K>
-  iterator find(const K &key) {
-    return this->tree_.find_multi(key);
-  }
-  template <typename K>
-  const_iterator find(const K &key) const {
-    return this->tree_.find_multi(key);
-  }
-  template <typename K>
-  size_type count(const K &key) const {
-    return this->tree_.count_multi(key);
-  }
-
-  // Insertion routines.
-  iterator insert(const value_type &x) { return this->tree_.insert_multi(x); }
-  iterator insert(value_type &&x) {
-    return this->tree_.insert_multi(std::move(x));
-  }
-  iterator insert(iterator position, const value_type &x) {
-    return this->tree_.insert_hint_multi(position, x);
-  }
-  iterator insert(iterator position, value_type &&x) {
-    return this->tree_.insert_hint_multi(position, std::move(x));
-  }
-  template <typename InputIterator>
-  void insert(InputIterator b, InputIterator e) {
-    this->tree_.insert_iterator_multi(b, e);
-  }
-  void insert(std::initializer_list<value_type> init) {
-    this->tree_.insert_iterator_multi(init.begin(), init.end());
-  }
-
-  // Deletion routines.
-  template <typename K>
-  int erase(const K &key) {
-    return this->tree_.erase_multi(key);
-  }
-  // Erase the specified iterator from the btree. The iterator must be valid
-  // (i.e. not equal to end()).  Return an iterator pointing to the node after
-  // the one that was erased (or end() if none exists).
-  iterator erase(const iterator &iter) { return this->tree_.erase(iter); }
-  void erase(const iterator &first, const iterator &last) {
-    this->tree_.erase(first, last);
-  }
-};
-
-// A base class for btree_multimap.
-template <typename Tree>
-class btree_multimap_container : public btree_multiset_container<Tree> {
-  using super_type = btree_multiset_container<Tree>;
-
- public:
-  using mapped_type = typename Tree::mapped_type;
-
-  // Inherit constructors.
-  using super_type::super_type;
-  btree_multimap_container() {}
-};
-
-}  // namespace internal_btree
-}  // namespace gtl
-
-#endif  // S2_UTIL_GTL_BTREE_CONTAINER_H_
diff --git a/src/s2/util/gtl/btree_map.h b/src/s2/util/gtl/btree_map.h
deleted file mode 100644
index 6e9ef622..00000000
--- a/src/s2/util/gtl/btree_map.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2007 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-//
-// A btree_map<> implements the STL unique sorted associative container
-// interface and the pair associative container interface (a.k.a map<>) using a
-// btree. A btree_multimap<> implements the STL multiple sorted associative
-// container interface and the pair associative container interface (a.k.a
-// multimap<>) using a btree. See btree.h for details of the btree
-// implementation and caveats.
-//
-
-#ifndef S2_UTIL_GTL_BTREE_MAP_H_
-#define S2_UTIL_GTL_BTREE_MAP_H_
-
-#include <algorithm>
-#include <functional>
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "s2/util/gtl/btree.h"            // IWYU pragma: export
-#include "s2/util/gtl/btree_container.h"  // IWYU pragma: export
-
-namespace gtl {
-
-template <typename Key, typename Value, typename Compare = std::less<Key>,
-          typename Alloc = std::allocator<std::pair<const Key, Value>>,
-          int TargetNodeSize = 256>
-class btree_map
-    : public internal_btree::btree_map_container<
-          internal_btree::btree<internal_btree::map_params<
-              Key, Value, Compare, Alloc, TargetNodeSize, /*Multi=*/false>>> {
-  using Base = typename btree_map::btree_map_container;
-
- public:
-  btree_map() {}
-  using Base::Base;
-};
-
-template <typename K, typename V, typename C, typename A, int T>
-void swap(btree_map<K, V, C, A, T> &x, btree_map<K, V, C, A, T> &y) {
-  return x.swap(y);
-}
-
-template <typename Key, typename Value, typename Compare = std::less<Key>,
-          typename Alloc = std::allocator<std::pair<const Key, Value>>,
-          int TargetNodeSize = 256>
-class btree_multimap
-    : public internal_btree::btree_multimap_container<
-          internal_btree::btree<internal_btree::map_params<
-              Key, Value, Compare, Alloc, TargetNodeSize, /*Multi=*/true>>> {
-  using Base = typename btree_multimap::btree_multimap_container;
-
- public:
-  btree_multimap() {}
-  using Base::Base;
-};
-
-template <typename K, typename V, typename C, typename A, int T>
-void swap(btree_multimap<K, V, C, A, T> &x, btree_multimap<K, V, C, A, T> &y) {
-  return x.swap(y);
-}
-
-}  // namespace gtl
-
-#endif  // S2_UTIL_GTL_BTREE_MAP_H_
diff --git a/src/s2/util/gtl/btree_set.h b/src/s2/util/gtl/btree_set.h
deleted file mode 100644
index c03490e1..00000000
--- a/src/s2/util/gtl/btree_set.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2007 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-//
-// A btree_set<> implements the STL unique sorted associative container
-// interface (a.k.a set<>) using a btree. A btree_multiset<> implements the STL
-// multiple sorted associative container interface (a.k.a multiset<>) using a
-// btree. See btree.h for details of the btree implementation and caveats.
-//
-
-#ifndef S2_UTIL_GTL_BTREE_SET_H_
-#define S2_UTIL_GTL_BTREE_SET_H_
-
-#include <functional>
-#include <memory>
-#include <string>
-
-#include "s2/util/gtl/btree.h"            // IWYU pragma: export
-#include "s2/util/gtl/btree_container.h"  // IWYU pragma: export
-
-namespace gtl {
-
-template <typename Key, typename Compare = std::less<Key>,
-          typename Alloc = std::allocator<Key>, int TargetNodeSize = 256>
-class btree_set
-    : public internal_btree::btree_set_container<
-          internal_btree::btree<internal_btree::set_params<
-              Key, Compare, Alloc, TargetNodeSize, /*Multi=*/false>>> {
-  using Base = typename btree_set::btree_set_container;
-
- public:
-  btree_set() {}
-  using Base::Base;
-};
-
-template <typename K, typename C, typename A, int T>
-void swap(btree_set<K, C, A, T> &x, btree_set<K, C, A, T> &y) {
-  return x.swap(y);
-}
-
-template <typename Key, typename Compare = std::less<Key>,
-          typename Alloc = std::allocator<Key>, int TargetNodeSize = 256>
-class btree_multiset
-    : public internal_btree::btree_multiset_container<
-          internal_btree::btree<internal_btree::set_params<
-              Key, Compare, Alloc, TargetNodeSize, /*Multi=*/true>>> {
-  using Base = typename btree_multiset::btree_multiset_container;
-
- public:
-  btree_multiset() {}
-  using Base::Base;
-};
-
-template <typename K, typename C, typename A, int T>
-void swap(btree_multiset<K, C, A, T> &x, btree_multiset<K, C, A, T> &y) {
-  return x.swap(y);
-}
-
-}  // namespace gtl
-
-#endif  // S2_UTIL_GTL_BTREE_SET_H_
diff --git a/src/s2/util/gtl/compact_array.h b/src/s2/util/gtl/compact_array.h
index cf11f77b..033db144 100644
--- a/src/s2/util/gtl/compact_array.h
+++ b/src/s2/util/gtl/compact_array.h
@@ -40,7 +40,9 @@
 #include <cstddef>
 #include <cstring>
 #include <sys/types.h>
+
 #include <algorithm>
+#include <cstdint>
 #include <iterator>
 #include <memory>
 #include <ostream>  // NOLINT
@@ -48,11 +50,12 @@
 #include <type_traits>
 #include <utility>
 
+#include "absl/base/macros.h"
+#include "absl/meta/type_traits.h"
+
 #include "s2/base/integral_types.h"
 #include "s2/base/logging.h"
-#include "absl/base/macros.h"
 #include "s2/base/port.h"
-#include "absl/meta/type_traits.h"
 #include "s2/util/bits/bits.h"
 #include "s2/util/gtl/container_logging.h"
 
@@ -70,14 +73,14 @@ class compact_array_base {
   static const int kMaxSize = (1 << kSizeNumBits) - 1;
 
 #ifdef IS_LITTLE_ENDIAN
-  uint32 size_        : kSizeNumBits;      // number of valid items in the array
-  uint32 capacity_    : kCapacityNumBits;  // allocated array size
-  uint32 is_exponent_ : 1;                 // whether capacity_ is an exponent
+  uint32 size_ : kSizeNumBits;          // number of valid items in the array
+  uint32 capacity_ : kCapacityNumBits;  // allocated array size
+  uint32 is_exponent_ : 1;              // whether capacity_ is an exponent
 
   // This object might share memory representation (ie. union) with
   // other data structures. We reserved the DO_NOT_USE (32nd bit in
   // little endian format) to be used as a tag.
-  uint32 DO_NOT_USE   : 1;
+  uint32 DO_NOT_USE : 1;
 #else
   uint32 DO_NOT_USE   : 1;
   uint32 is_exponent_ : 1;
@@ -86,10 +89,7 @@ class compact_array_base {
 #endif
 
   // Opportunistically consider allowing inlined elements.
-  // dd: this has to be disabled to pass CRAN checks, since there is a
-  // (potentially) zero-length array that is not the last element of the class (so
-  // this can't be silenced using __extension__)
-#if defined(_LP64) && defined(__GNUC__) && false
+#if defined(_LP64) && defined(__GNUC__)
   // With 64-bit pointers, our approach is to form a 16-byte struct:
   //   [5 bytes for size, capacity, is_exponent and is_inlined]
   //   [3 bytes of padding or inlined elements]
@@ -124,7 +124,7 @@ class compact_array_base {
   char unused_padding_[kUnusedPaddingBytes];
 
   // inlined_elements_ stores the first N elements, potentially as few as zero.
-  __extension__ char inlined_elements_[3 - kUnusedPaddingBytes];
+  char inlined_elements_[3 - kUnusedPaddingBytes];
 
   // compact_array_base itself is at least as aligned as a T* because of the
   // T* member inside this union. The only reason to split inlined_elements_
@@ -148,7 +148,8 @@ class compact_array_base {
     return const_cast<compact_array_base<T, A>*>(this)->Array();
   }
 
-  typedef typename A::template rebind<T>::other value_allocator_type;
+  using value_allocator_type =
+      typename std::allocator_traits<A>::template rebind_alloc<T>;
 
  public:
   typedef T                                     value_type;
@@ -157,7 +158,7 @@ class compact_array_base {
   typedef const value_type*                     const_pointer;
   typedef value_type&                           reference;
   typedef const value_type&                     const_reference;
-  typedef uint32                                size_type;
+  typedef uint32 size_type;
   typedef ptrdiff_t                             difference_type;
 
   typedef value_type*                           iterator;
@@ -276,6 +277,15 @@ class compact_array_base {
     insert(p, first, last, Int());
   }
 
+  template <typename... Args>
+  reference emplace_back(Args&&... args) {
+    return *Insert(end(), value_type(std::forward<Args>(args)...));
+  }
+  template <typename... Args>
+  iterator emplace(const_iterator p, Args&&... args) {
+    return Insert(p, value_type(std::forward<Args>(args)...));
+  }
+
   iterator erase(const_iterator p) {
     size_type index = p - begin();
     erase_aux(p, 1);
@@ -357,6 +367,13 @@ class compact_array_base {
     set_size(n);
   }
 
+  template <typename H>
+  friend H AbslHashValue(H h, const compact_array_base& v) {
+    return H::combine(
+        H::combine_contiguous(std::move(h), v.ConstArray(), v.size()),
+        v.size());
+  }
+
  private:                               // Low-level helper functions.
   void set_size(size_type n) {
     S2_DCHECK_LE(n, capacity());
@@ -396,10 +413,7 @@ class compact_array_base {
     value_allocator_type allocator;
 
     T* new_ptr = allocator.allocate(capacity());
-    // dd: this modification fixes a ASAN/UBSAN error, because
-    // when old_capacity is 0, Array() is nullptr, which is UB
-    // for memcpy.
-    if (old_capacity > 0) {
+    if (old_capacity != 0) {
       memcpy(new_ptr, Array(), old_capacity * sizeof(T));
       allocator.deallocate(Array(), old_capacity);
     }
diff --git a/src/s2/util/gtl/container_logging.h b/src/s2/util/gtl/container_logging.h
index 93d0d37d..dbdcf7be 100644
--- a/src/s2/util/gtl/container_logging.h
+++ b/src/s2/util/gtl/container_logging.h
@@ -18,18 +18,34 @@
 // Utilities for container logging.
 // TODO(user): Broaden the scope and rename to "stream_util.h"
 //
+//
+// The typical use looks like this:
+//
+//   S2_LOG(INFO) << gtl::LogContainer(container);
+//
+// By default, LogContainer() uses the LogShortUpTo100 policy: comma-space
+// separation, no newlines, and with limit of 100 items.
+//
+// Policies can be specified:
+//
+//   S2_LOG(INFO) << gtl::LogContainer(container, gtl::LogMultiline());
+//
+// The above example will print the container using newlines between
+// elements, enclosed in [] braces.
+//
+// See below for further details on policies.
 
 #ifndef S2_UTIL_GTL_CONTAINER_LOGGING_H_
 #define S2_UTIL_GTL_CONTAINER_LOGGING_H_
 
 #include <limits>
 #include <ostream>
+#include <sstream>
 #include <string>
 #include <type_traits>
 
 #include "s2/base/integral_types.h"
 #include "s2/base/port.h"
-#include "s2/strings/ostringstream.h"
 
 namespace gtl {
 
@@ -162,7 +178,7 @@ inline void LogRangeToStream(std::ostream &out,  // NOLINT
                              IteratorT begin, IteratorT end,
                              const PolicyT &policy) {
   policy.LogOpening(out);
-  for (size_t i = 0; begin != end && i < policy.MaxElements(); ++i, ++begin) {
+  for (int64 i = 0; begin != end && i < policy.MaxElements(); ++i, ++begin) {
     if (i == 0) {
       policy.LogFirstSeparator(out);
     } else {
@@ -201,9 +217,9 @@ class RangeLogger {
   // operator<< above is generally recommended. However, some situations may
   // require a string, so a convenience str() method is provided as well.
   std::string str() const {
-    std::string s;
-    ::strings::OStringStream(&s) << *this;
-    return s;
+    std::stringstream ss;
+    ss << *this;
+    return ss.str();
   }
 
  private:
diff --git a/src/s2/util/gtl/dense_hash_set.h b/src/s2/util/gtl/dense_hash_set.h
index b3b00031..afcdb30c 100644
--- a/src/s2/util/gtl/dense_hash_set.h
+++ b/src/s2/util/gtl/dense_hash_set.h
@@ -117,6 +117,7 @@
 #define S2_UTIL_GTL_DENSE_HASH_SET_H_
 
 #include <cstdio>
+
 #include <algorithm>
 #include <functional>
 #include <memory>
diff --git a/src/s2/util/gtl/densehashtable.h b/src/s2/util/gtl/densehashtable.h
index 737c307b..9c737818 100644
--- a/src/s2/util/gtl/densehashtable.h
+++ b/src/s2/util/gtl/densehashtable.h
@@ -107,20 +107,22 @@
 
 #include <cassert>
 #include <cstddef>
-#include <cstdio>              // for FILE, fwrite, fread
-#include <algorithm>            // For swap(), eg
+#include <cstdio>  // for FILE, fwrite, fread
+
+#include <algorithm>  // For swap(), eg
+#include <cstdint>
 #include <functional>
-#include <iterator>             // For iterator tags
-#include <limits>               // for numeric_limits
-#include <memory>               // For uninitialized_fill
+#include <iterator>  // For iterator tags
+#include <limits>    // for numeric_limits
+#include <memory>    // For uninitialized_fill
 #include <new>
 #include <string>
+#include <type_traits>
 #include <utility>
 #include <vector>
-#include <type_traits>
 
-#include "s2/util/gtl/hashtable_common.h"
 #include "s2/base/port.h"
+#include "s2/util/gtl/hashtable_common.h"
 #include <stdexcept>                 // For length_error
 
 namespace gtl {
@@ -182,7 +184,10 @@ struct dense_hashtable_const_iterator;
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
 struct dense_hashtable_iterator {
  private:
-  typedef typename A::template rebind<V>::other value_alloc_type;
+  using value_alloc_type =
+      typename std::allocator_traits<A>::template rebind_alloc<V>;
+  using value_alloc_traits =
+      typename std::allocator_traits<A>::template rebind_traits<V>;
 
  public:
   typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A>
@@ -192,10 +197,10 @@ struct dense_hashtable_iterator {
 
   typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
-  typedef typename value_alloc_type::difference_type difference_type;
-  typedef typename value_alloc_type::size_type size_type;
-  typedef typename value_alloc_type::reference reference;
-  typedef typename value_alloc_type::pointer pointer;
+  typedef typename value_alloc_traits::difference_type difference_type;
+  typedef typename value_alloc_traits::size_type size_type;
+  typedef typename value_alloc_traits::value_type& reference;
+  typedef typename value_alloc_traits::pointer pointer;
 
   // "Real" constructor and default constructor
   dense_hashtable_iterator(
@@ -245,7 +250,10 @@ struct dense_hashtable_iterator {
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
 struct dense_hashtable_const_iterator {
  private:
-  typedef typename A::template rebind<V>::other value_alloc_type;
+  using value_alloc_type =
+      typename std::allocator_traits<A>::template rebind_alloc<V>;
+  using value_alloc_traits =
+      typename std::allocator_traits<A>::template rebind_traits<V>;
 
  public:
   typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A>
@@ -255,10 +263,10 @@ struct dense_hashtable_const_iterator {
 
   typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
-  typedef typename value_alloc_type::difference_type difference_type;
-  typedef typename value_alloc_type::size_type size_type;
-  typedef typename value_alloc_type::const_reference reference;
-  typedef typename value_alloc_type::const_pointer pointer;
+  typedef typename value_alloc_traits::difference_type difference_type;
+  typedef typename value_alloc_traits::size_type size_type;
+  typedef const typename value_alloc_traits::value_type& reference;
+  typedef typename value_alloc_traits::const_pointer pointer;
 
   // "Real" constructor and default constructor
   dense_hashtable_const_iterator(
@@ -311,7 +319,10 @@ template <class Value, class Key, class HashFcn,
           class ExtractKey, class SetKey, class EqualKey, class Alloc>
 class dense_hashtable {
  private:
-  typedef typename Alloc::template rebind<Value>::other value_alloc_type;
+  using value_alloc_type =
+      typename std::allocator_traits<Alloc>::template rebind_alloc<Value>;
+  using value_alloc_traits =
+      typename std::allocator_traits<Alloc>::template rebind_traits<Value>;
 
 
  public:
@@ -321,12 +332,12 @@ class dense_hashtable {
   typedef EqualKey key_equal;
   typedef Alloc allocator_type;
 
-  typedef typename value_alloc_type::size_type size_type;
-  typedef typename value_alloc_type::difference_type difference_type;
-  typedef typename value_alloc_type::reference reference;
-  typedef typename value_alloc_type::const_reference const_reference;
-  typedef typename value_alloc_type::pointer pointer;
-  typedef typename value_alloc_type::const_pointer const_pointer;
+  typedef typename value_alloc_traits::size_type size_type;
+  typedef typename value_alloc_traits::difference_type difference_type;
+  typedef typename value_alloc_traits::value_type& reference;
+  typedef const typename value_alloc_traits::value_type& const_reference;
+  typedef typename value_alloc_traits::pointer pointer;
+  typedef typename value_alloc_traits::const_pointer const_pointer;
   typedef dense_hashtable_iterator<Value, Key, HashFcn,
                                    ExtractKey, SetKey, EqualKey, Alloc>
   iterator;
@@ -532,9 +543,9 @@ class dense_hashtable {
   }
 
  private:
-  bool test_empty(size_type bucknum, const_pointer table) const {
+  bool test_empty(size_type bucknum, const_pointer ptable) const {
     assert(settings.use_empty());
-    return equals(key_info.empty, get_key(table[bucknum]));
+    return equals(key_info.empty, get_key(ptable[bucknum]));
   }
 
   void fill_range_with_empty(pointer table_start, pointer table_end) {
@@ -575,7 +586,9 @@ class dense_hashtable {
   // FUNCTIONS CONCERNING SIZE
  public:
   size_type size() const      { return num_elements - num_deleted; }
-  size_type max_size() const { return get_allocator().max_size(); }
+  size_type max_size() const {
+    return std::allocator_traits<value_alloc_type>::max_size(get_allocator());
+  }
   bool empty() const          { return size() == 0; }
   size_type bucket_count() const      { return num_buckets; }
   size_type max_bucket_count() const  { return max_size(); }
@@ -1034,7 +1047,7 @@ class dense_hashtable {
     const size_type bucket_count_minus_one = bucket_count() - 1;
     size_type bucknum = key_hash & bucket_count_minus_one;
     size_type insert_pos = ILLEGAL_BUCKET;  // where we would insert
-    while (1) {                             // probe until something happens
+    while (true) {                          // probe until something happens
       if (test_empty(bucknum)) {            // bucket is empty
         if (insert_pos == ILLEGAL_BUCKET)   // found no prior place to insert
           return std::pair<size_type, size_type>(ILLEGAL_BUCKET, bucknum);
@@ -1072,7 +1085,7 @@ class dense_hashtable {
     size_type num_probes = 0;              // how many times we've probed
     const size_type bucket_count_minus_one = bucket_count() - 1;
     size_type bucknum = key_hash & bucket_count_minus_one;
-    while (1) {                             // probe until something happens
+    while (true) {  // probe until something happens
       if (equals(key, get_key(table[bucknum]))) {
         return std::pair<size_type, bool>(bucknum, true);
       } else if (test_empty(bucknum)) {
@@ -1290,7 +1303,7 @@ class dense_hashtable {
 
 
   void erase(iterator pos) {
-    if (pos == end()) return;    // sanity check
+    if (pos == end()) return;
     set_deleted(pos);
     ++num_deleted;
     // will think about shrink after next insert
@@ -1312,7 +1325,7 @@ class dense_hashtable {
   // you can't use the object after it's erased anyway, so it doesn't matter
   // if it's const or not.
   void erase(const_iterator pos) {
-    if (pos == end()) return;    // sanity check
+    if (pos == end()) return;
     set_deleted(pos);
     ++num_deleted;
     // will think about shrink after next insert
diff --git a/src/s2/util/gtl/hashtable_common.h b/src/s2/util/gtl/hashtable_common.h
index a5ea2a09..a8c06901 100644
--- a/src/s2/util/gtl/hashtable_common.h
+++ b/src/s2/util/gtl/hashtable_common.h
@@ -48,6 +48,7 @@
 
 #include <cassert>
 #include <cstddef>
+
 #include <algorithm>
 
 #include <stdexcept>                 // For length_error
diff --git a/src/s2/util/hash/mix.h b/src/s2/util/hash/mix.h
index 65f7bfaf..1d0936e1 100644
--- a/src/s2/util/hash/mix.h
+++ b/src/s2/util/hash/mix.h
@@ -21,8 +21,10 @@
 #define S2_UTIL_HASH_MIX_H_
 
 #include <cstddef>
+
 #include <limits>
 
+
 // Fast mixing of hash values -- not strong enough for fingerprinting.
 // May change from time to time.
 //
@@ -69,6 +71,7 @@ class HashMix {
              (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val;
   }
   size_t get() const { return hash_; }
+
  private:
   size_t hash_;
 };
diff --git a/src/s2/util/math/exactfloat/exactfloat.cc b/src/s2/util/math/exactfloat/exactfloat.cc
index ee645b65..160bf6ba 100644
--- a/src/s2/util/math/exactfloat/exactfloat.cc
+++ b/src/s2/util/math/exactfloat/exactfloat.cc
@@ -20,20 +20,26 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+
 #include <algorithm>
 #include <cmath>
+#include <cstdint>
 #include <limits>
+#include <string>
 
 #include <openssl/bn.h>
 #include <openssl/crypto.h>  // for OPENSSL_free
 
-#include "s2/base/integral_types.h"
-#include "s2/base/logging.h"
 #include "absl/base/macros.h"
 #include "absl/container/fixed_array.h"
+#include "absl/numeric/int128.h"
+
+#include "s2/base/integral_types.h"
+#include "s2/base/logging.h"
 
 using std::max;
 using std::min;
+using std::string;
 
 // Define storage for constants.
 const int ExactFloat::kMinExp;
@@ -136,10 +142,10 @@ inline static uint64 BN_ext_get_uint64(const BIGNUM* bn) {
   uint64 r;
 #ifdef IS_LITTLE_ENDIAN
   S2_CHECK_EQ(BN_bn2lebinpad(bn, reinterpret_cast<unsigned char*>(&r),
-                             sizeof(r)), sizeof(r));
-#elif IS_BIG_ENDIAN
+              sizeof(r)), sizeof(r));
+#elif defined(IS_BIG_ENDIAN)
   S2_CHECK_EQ(BN_bn2binpad(bn, reinterpret_cast<unsigned char*>(&r),
-                           sizeof(r)), sizeof(r));
+              sizeof(r)), sizeof(r));
 #else
 #error one of IS_LITTLE_ENDIAN or IS_BIG_ENDIAN should be defined!
 #endif
@@ -409,13 +415,13 @@ std::string ExactFloat::ToStringWithMaxDigits(int max_digits) const {
       str.append(digits.begin() + 1, digits.end());
     }
     char exp_buf[20];
-    snprintf(exp_buf, sizeof(exp_buf), "e%+02d", exp10 - 1);
+    sprintf(exp_buf, "e%+02d", exp10 - 1);
     str += exp_buf;
   } else {
     // Use fixed format.  We split this into two cases depending on whether
     // the integer portion is non-zero or not.
     if (exp10 > 0) {
-      if (exp10 >= digits.size()) {
+      if (static_cast<size_t>(exp10) >= digits.size()) {
         str += digits;
         for (int i = exp10 - digits.size(); i > 0; --i) {
           str.push_back('0');
@@ -512,7 +518,7 @@ int ExactFloat::GetDecimalDigits(int max_digits, std::string* digits) const {
 
 std::string ExactFloat::ToUniqueString() const {
   char prec_buf[20];
-  snprintf(prec_buf, sizeof(prec_buf), "<%d>", prec());
+  sprintf(prec_buf, "<%d>", prec());
   return ToString() + prec_buf;
 }
 
diff --git a/src/s2/util/math/exactfloat/exactfloat.h b/src/s2/util/math/exactfloat/exactfloat.h
index 028321ee..a2ef9cb8 100644
--- a/src/s2/util/math/exactfloat/exactfloat.h
+++ b/src/s2/util/math/exactfloat/exactfloat.h
@@ -15,24 +15,24 @@
 
 // Author: ericv@google.com (Eric Veach)
 //
-// ExactFloat is a multiple-precision floating point type based on the OpenSSL
-// Bignum library.  It has the same interface as the built-in "float" and
-// "double" types, but only supports the subset of operators and intrinsics
-// where it is possible to compute the result exactly.  So for example,
-// ExactFloat supports addition and multiplication but not division (since in
-// general, the quotient of two floating-point numbers cannot be represented
-// exactly).  Exact arithmetic is useful for geometric algorithms, especially
-// for disambiguating cases where ordinary double-precision arithmetic yields
-// an uncertain result.
+// ExactFloat is a multiple-precision floating point type that uses the OpenSSL
+// Bignum library for numerical calculations.  It has the same interface as the
+// built-in "float" and "double" types, but only supports the subset of
+// operators and intrinsics where it is possible to compute the result exactly.
+// So for example, ExactFloat supports addition and multiplication but not
+// division (since in general, the quotient of two floating-point numbers cannot
+// be represented exactly).  Exact arithmetic is useful for geometric
+// algorithms, especially for disambiguating cases where ordinary
+// double-precision arithmetic yields an uncertain result.
 //
-// ExactFloat is a subset of the faster and more capable MPFloat class (which
-// is based on the GNU MPFR library).  The main reason to use this class
-// rather than MPFloat is that it is subject to a BSD-style license rather
-// than the much more restrictive LGPL license.
+// ExactFloat is a subset of the now-retired MPFloat class, which used the GNU
+// MPFR library for numerical calculations.  The main reason for the switch to
+// ExactFloat is that OpenSSL has a BSD-style license whereas MPFR has a much
+// more restrictive LGPL license.
 //
-// It has the following features:
+// ExactFloat has the following features:
 //
-//  - ExactFloat uses the same syntax as the built-in "float" and "double"
+//  - It uses the same syntax as the built-in "float" and "double"
 //    types, for example: x += 4 + fabs(2*y*y - z*z).  There are a few
 //    differences (see below), but the syntax is compatible enough so that
 //    ExactFloat can be used as a template argument to templatized classes
@@ -112,8 +112,12 @@
 #include <algorithm>
 #include <climits>
 #include <cmath>
+
 #include <algorithm>
+#include <cstdint>
 #include <iostream>
+#include <limits>
+#include <ostream>
 #include <string>
 
 #include <openssl/bn.h>
@@ -128,16 +132,16 @@ class ExactFloat {
 
   // The maximum exponent supported.  If a value has an exponent larger than
   // this, it is replaced by infinity (with the appropriate sign).
-  static const int kMaxExp = 200*1000*1000;  // About 10**(60 million)
+  static constexpr int kMaxExp = 200 * 1000 * 1000;  // About 10**(60 million)
 
   // The minimum exponent supported.  If a value has an exponent less than
   // this, it is replaced by zero (with the appropriate sign).
-  static const int kMinExp = -kMaxExp;   // About 10**(-60 million)
+  static constexpr int kMinExp = -kMaxExp;  // About 10**(-60 million)
 
   // The maximum number of mantissa bits supported.  If a value has more
   // mantissa bits than this, it is replaced with NaN.  (It is expected that
   // users of this class will never want this much precision.)
-  static const int kMaxPrec = 64 << 20;  // About 20 million digits
+  static constexpr int kMaxPrec = 64 << 20;  // About 20 million digits
 
   // Rounding modes.  kRoundTiesToEven and kRoundTiesAwayFromZero both round
   // to the nearest representable value unless two values are equally close.
@@ -536,9 +540,9 @@ class ExactFloat {
   // mantissa of zero.  Do not change these values; methods such as
   // is_normal() make assumptions about their ordering.  Non-normal numbers
   // can have either a positive or negative sign (including zero and NaN).
-  static const int32 kExpNaN = INT_MAX;
-  static const int32 kExpInfinity = INT_MAX - 1;
-  static const int32 kExpZero = INT_MAX - 2;
+  static constexpr int32 kExpNaN = INT_MAX;
+  static constexpr int32 kExpInfinity = INT_MAX - 1;
+  static constexpr int32 kExpZero = INT_MAX - 2;
 
   // Normal numbers are represented as (sign_ * bn_ * (2 ** bn_exp_)), where:
   //  - sign_ is either +1 or -1
@@ -550,7 +554,7 @@ class ExactFloat {
 
   // A standard IEEE "double" has a 53-bit mantissa consisting of a 52-bit
   // fraction plus an implicit leading "1" bit.
-  static const int kDoubleMantissaBits = 53;
+  static constexpr int kDoubleMantissaBits = 53;
 
   // Convert an ExactFloat with no more than 53 bits in its mantissa to a
   // "double".  This method handles non-normal values (NaN, etc).
diff --git a/src/s2/util/math/mathutil.cc b/src/s2/util/math/mathutil.cc
index c9bad601..7373f7ec 100644
--- a/src/s2/util/math/mathutil.cc
+++ b/src/s2/util/math/mathutil.cc
@@ -19,17 +19,6 @@
 #include <cmath>
 #include <cstdlib>
 
-namespace {
-// Returns the sign of x:
-//   -1 if x < 0,
-//   +1 if x > 0,
-//    0 if x = 0.
-template <class T>
-inline T sgn(const T x) {
-  return (x == 0 ? 0 : (x < 0 ? -1 : 1));
-}
-}  // namespace
-
 bool MathUtil::RealRootsForCubic(long double const a,
                                  long double const b,
                                  long double const c,
diff --git a/src/s2/util/math/mathutil.h b/src/s2/util/math/mathutil.h
index ac9eebfe..430159b9 100644
--- a/src/s2/util/math/mathutil.h
+++ b/src/s2/util/math/mathutil.h
@@ -24,6 +24,18 @@
 #include <type_traits>
 
 #include "s2/base/integral_types.h"
+#include "s2/base/logging.h"
+#include "s2/util/bits/bits.h"
+
+// Returns the sign of x:
+//   -1 if x < 0,
+//   +1 if x > 0,
+//    0 if x = 0,
+//    unspecified if x is NaN.
+template <class T>
+inline T sgn(const T x) {
+  return (x == 0 ? 0 : (x < 0 ? -1 : 1));
+}
 
 class MathUtil {
  public:
@@ -154,6 +166,56 @@ class MathUtil {
     return Round<int64, double>(x);
 #endif  // if defined __GNUC__ && ...
   }
+
+  // Computes v^i, where i is a non-negative integer.
+  // When T is a floating point type, this has the same semantics as pow(), but
+  // is much faster.
+  // T can also be any integral type, in which case computations will be
+  // performed in the value domain of this integral type, and overflow semantics
+  // will be those of T.
+  // You can also use any type for which operator*= is defined.
+  template <typename T>
+  static T IPow(T base, int exp) {
+    S2_DCHECK_GE(exp, 0);
+    uint32 uexp = static_cast<uint32>(exp);
+
+    if (uexp < 16) {
+      T result = (uexp & 1) ? base : static_cast<T>(1);
+      if (uexp >= 2) {
+        base *= base;
+        if (uexp & 2) {
+          result *= base;
+        }
+        if (uexp >= 4) {
+          base *= base;
+          if (uexp & 4) {
+            result *= base;
+          }
+          if (uexp >= 8) {
+            base *= base;
+            result *= base;
+          }
+        }
+      }
+      return result;
+    }
+
+    T result = base;
+    int count = 31 ^ Bits::Log2FloorNonZero(uexp);
+
+    uexp <<= count;
+    count ^= 31;
+
+    while (count--) {
+      uexp <<= 1;
+      result *= result;
+      if (uexp >= 0x80000000) {
+        result *= base;
+      }
+    }
+
+    return result;
+  }
 };
 
 // ========================================================================= //
diff --git a/src/s2/util/math/matrix3x3.h b/src/s2/util/math/matrix3x3.h
index e6b60afd..abf829cf 100644
--- a/src/s2/util/math/matrix3x3.h
+++ b/src/s2/util/math/matrix3x3.h
@@ -33,7 +33,7 @@
 
 #include <cmath>
 #include <iosfwd>
-#include <type_traits>
+#include <ostream>
 
 #include "s2/base/logging.h"
 #include "s2/util/math/mathutil.h"
@@ -77,22 +77,21 @@ class Matrix3x3 {
   // Casting constructor
   template <class VType2>
   static Matrix3x3 Cast(const Matrix3x3<VType2> &mb) {
-    return Matrix3x3(static_cast<VType>(mb(0, 0)),
-                     static_cast<VType>(mb(0, 1)),
-                     static_cast<VType>(mb(0, 2)),
-                     static_cast<VType>(mb(1, 0)),
-                     static_cast<VType>(mb(1, 1)),
-                     static_cast<VType>(mb(1, 2)),
-                     static_cast<VType>(mb(2, 0)),
-                     static_cast<VType>(mb(2, 1)),
+    return Matrix3x3(static_cast<VType>(mb(0, 0)),  //
+                     static_cast<VType>(mb(0, 1)),  //
+                     static_cast<VType>(mb(0, 2)),  //
+                     static_cast<VType>(mb(1, 0)),  //
+                     static_cast<VType>(mb(1, 1)),  //
+                     static_cast<VType>(mb(1, 2)),  //
+                     static_cast<VType>(mb(2, 0)),  //
+                     static_cast<VType>(mb(2, 1)),  //
                      static_cast<VType>(mb(2, 2)));
   }
 
   // Change the value of all the coefficients of the matrix
-  inline Matrix3x3 &
-    Set(const VType &m00, const VType &m01, const VType &m02,
-        const VType &m10, const VType &m11, const VType &m12,
-        const VType &m20, const VType &m21, const VType &m22) {
+  inline Matrix3x3 &Set(const VType &m00, const VType &m01, const VType &m02,
+                        const VType &m10, const VType &m11, const VType &m12,
+                        const VType &m20, const VType &m21, const VType &m22) {
     m_[0][0] = m00;
     m_[0][1] = m01;
     m_[0][2] = m02;
@@ -108,7 +107,7 @@ class Matrix3x3 {
   }
 
   // Matrix addition
-  inline Matrix3x3& operator+=(const Matrix3x3 &mb) {
+  inline Matrix3x3 &operator+=(const Matrix3x3 &mb) {
     m_[0][0] += mb.m_[0][0];
     m_[0][1] += mb.m_[0][1];
     m_[0][2] += mb.m_[0][2];
@@ -124,7 +123,7 @@ class Matrix3x3 {
   }
 
   // Matrix subtration
-  inline Matrix3x3& operator-=(const Matrix3x3 &mb) {
+  inline Matrix3x3 &operator-=(const Matrix3x3 &mb) {
     m_[0][0] -= mb.m_[0][0];
     m_[0][1] -= mb.m_[0][1];
     m_[0][2] -= mb.m_[0][2];
@@ -140,7 +139,7 @@ class Matrix3x3 {
   }
 
   // Matrix multiplication by a scalar
-  inline Matrix3x3& operator*=(const VType &k) {
+  inline Matrix3x3 &operator*=(const VType &k) {
     m_[0][0] *= k;
     m_[0][1] *= k;
     m_[0][2] *= k;
@@ -167,8 +166,8 @@ class Matrix3x3 {
 
   // Change the sign of all the coefficients in the matrix
   friend inline Matrix3x3 operator-(const Matrix3x3 &vb) {
-    return Matrix3x3(-vb.m_[0][0], -vb.m_[0][1], -vb.m_[0][2],
-                     -vb.m_[1][0], -vb.m_[1][1], -vb.m_[1][2],
+    return Matrix3x3(-vb.m_[0][0], -vb.m_[0][1], -vb.m_[0][2],  //
+                     -vb.m_[1][0], -vb.m_[1][1], -vb.m_[1][2],  //
                      -vb.m_[2][0], -vb.m_[2][1], -vb.m_[2][2]);
   }
 
@@ -178,11 +177,12 @@ class Matrix3x3 {
   }
 
   friend inline Matrix3x3 operator*(const VType &k, const Matrix3x3 &mb) {
-    return Matrix3x3(mb)*k;
+    return Matrix3x3(mb) * k;
   }
 
   // Matrix multiplication
   inline Matrix3x3 operator*(const Matrix3x3 &mb) const {
+    // clang-format off
     return Matrix3x3(
       m_[0][0] * mb.m_[0][0] + m_[0][1] * mb.m_[1][0] + m_[0][2] * mb.m_[2][0],
       m_[0][0] * mb.m_[0][1] + m_[0][1] * mb.m_[1][1] + m_[0][2] * mb.m_[2][1],
@@ -195,39 +195,30 @@ class Matrix3x3 {
       m_[2][0] * mb.m_[0][0] + m_[2][1] * mb.m_[1][0] + m_[2][2] * mb.m_[2][0],
       m_[2][0] * mb.m_[0][1] + m_[2][1] * mb.m_[1][1] + m_[2][2] * mb.m_[2][1],
       m_[2][0] * mb.m_[0][2] + m_[2][1] * mb.m_[1][2] + m_[2][2] * mb.m_[2][2]);
+    // clang-format on
   }
 
   // Multiplication of a matrix by a vector
   inline MVector operator*(const MVector &v) const {
-    return MVector(
-      m_[0][0] * v[0] + m_[0][1] * v[1] + m_[0][2] * v[2],
-      m_[1][0] * v[0] + m_[1][1] * v[1] + m_[1][2] * v[2],
-      m_[2][0] * v[0] + m_[2][1] * v[1] + m_[2][2] * v[2]);
+    return MVector(m_[0][0] * v[0] + m_[0][1] * v[1] + m_[0][2] * v[2],
+                   m_[1][0] * v[0] + m_[1][1] * v[1] + m_[1][2] * v[2],
+                   m_[2][0] * v[0] + m_[2][1] * v[1] + m_[2][2] * v[2]);
   }
 
   // Return the determinant of the matrix
-  inline VType Det(void) const {
-    return m_[0][0] * m_[1][1] * m_[2][2]
-         + m_[0][1] * m_[1][2] * m_[2][0]
-         + m_[0][2] * m_[1][0] * m_[2][1]
-         - m_[2][0] * m_[1][1] * m_[0][2]
-         - m_[2][1] * m_[1][2] * m_[0][0]
-         - m_[2][2] * m_[1][0] * m_[0][1];
+  inline VType Det() const {
+    return m_[0][0] * m_[1][1] * m_[2][2] + m_[0][1] * m_[1][2] * m_[2][0] +
+           m_[0][2] * m_[1][0] * m_[2][1] - m_[2][0] * m_[1][1] * m_[0][2] -
+           m_[2][1] * m_[1][2] * m_[0][0] - m_[2][2] * m_[1][0] * m_[0][1];
   }
 
   // Return the trace of the matrix
-  inline VType Trace(void) const {
-    return m_[0][0] + m_[1][1] + m_[2][2];
-  }
+  inline VType Trace() const { return m_[0][0] + m_[1][1] + m_[2][2]; }
 
   // Return a pointer to the data array for interface with other libraries
   // like opencv
-  VType* Data() {
-    return reinterpret_cast<VType*>(m_);
-  }
-  const VType* Data() const {
-    return reinterpret_cast<const VType*>(m_);
-  }
+  VType *Data() { return reinterpret_cast<VType *>(m_); }
+  const VType *Data() const { return reinterpret_cast<const VType *>(m_); }
 
   // Return matrix element (i,j) with 0<=i<=2 0<=j<=2
   inline VType &operator()(const int i, const int j) {
@@ -249,39 +240,38 @@ class Matrix3x3 {
   inline VType &operator[](const int i) {
     S2_DCHECK_GE(i, 0);
     S2_DCHECK_LT(i, 9);
-    return reinterpret_cast<VType*>(m_)[i];
+    return reinterpret_cast<VType *>(m_)[i];
   }
   inline VType operator[](const int i) const {
     S2_DCHECK_GE(i, 0);
     S2_DCHECK_LT(i, 9);
-    return reinterpret_cast<const VType*>(m_)[i];
+    return reinterpret_cast<const VType *>(m_)[i];
   }
 
   // Return the transposed matrix
-  inline Matrix3x3 Transpose(void) const {
-    return Matrix3x3(m_[0][0], m_[1][0], m_[2][0],
-                     m_[0][1], m_[1][1], m_[2][1],
+  inline Matrix3x3 Transpose() const {
+    return Matrix3x3(m_[0][0], m_[1][0], m_[2][0],  //
+                     m_[0][1], m_[1][1], m_[2][1],  //
                      m_[0][2], m_[1][2], m_[2][2]);
   }
 
   // Return the transposed of the matrix of the cofactors
   // (Useful for inversion for example)
-  inline Matrix3x3 ComatrixTransposed(void) const {
-    return Matrix3x3(
-      m_[1][1] * m_[2][2] - m_[2][1] * m_[1][2],
-      m_[2][1] * m_[0][2] - m_[0][1] * m_[2][2],
-      m_[0][1] * m_[1][2] - m_[1][1] * m_[0][2],
+  inline Matrix3x3 ComatrixTransposed() const {
+    return Matrix3x3(m_[1][1] * m_[2][2] - m_[2][1] * m_[1][2],
+                     m_[2][1] * m_[0][2] - m_[0][1] * m_[2][2],
+                     m_[0][1] * m_[1][2] - m_[1][1] * m_[0][2],
 
-      m_[1][2] * m_[2][0] - m_[2][2] * m_[1][0],
-      m_[2][2] * m_[0][0] - m_[0][2] * m_[2][0],
-      m_[0][2] * m_[1][0] - m_[1][2] * m_[0][0],
+                     m_[1][2] * m_[2][0] - m_[2][2] * m_[1][0],
+                     m_[2][2] * m_[0][0] - m_[0][2] * m_[2][0],
+                     m_[0][2] * m_[1][0] - m_[1][2] * m_[0][0],
 
-      m_[1][0] * m_[2][1] - m_[2][0] * m_[1][1],
-      m_[2][0] * m_[0][1] - m_[0][0] * m_[2][1],
-      m_[0][0] * m_[1][1] - m_[1][0] * m_[0][1]);
+                     m_[1][0] * m_[2][1] - m_[2][0] * m_[1][1],
+                     m_[2][0] * m_[0][1] - m_[0][0] * m_[2][1],
+                     m_[0][0] * m_[1][1] - m_[1][0] * m_[0][1]);
   }
   // Matrix inversion
-  inline Matrix3x3 Inverse(void) const {
+  inline Matrix3x3 Inverse() const {
     VType det = Det();
     S2_CHECK_NE(det, VType(0)) << " Can't inverse. Determinant = 0.";
     return (VType(1) / det) * ComatrixTransposed();
@@ -302,27 +292,34 @@ class Matrix3x3 {
   }
 
   // Create a matrix from 3 row vectors
-  static inline Matrix3x3 FromRows(const MVector &v1,
-                              const MVector &v2,
-                              const MVector &v3) {
+  static inline Matrix3x3 FromRows(const MVector &v1, const MVector &v2,
+                                   const MVector &v3) {
     Matrix3x3 temp;
-    temp.Set(v1[0], v1[1], v1[2],
-             v2[0], v2[1], v2[2],
+    temp.Set(v1[0], v1[1], v1[2],  //
+             v2[0], v2[1], v2[2],  //
              v3[0], v3[1], v3[2]);
     return temp;
   }
 
   // Create a matrix from 3 column vectors
-  static inline Matrix3x3 FromCols(const MVector &v1,
-                              const MVector &v2,
-                              const MVector &v3) {
+  static inline Matrix3x3 FromCols(const MVector &v1, const MVector &v2,
+                                   const MVector &v3) {
     Matrix3x3 temp;
-    temp.Set(v1[0], v2[0], v3[0],
-             v1[1], v2[1], v3[1],
+    temp.Set(v1[0], v2[0], v3[0],  //
+             v1[1], v2[1], v3[1],  //
              v1[2], v2[2], v3[2]);
     return temp;
   }
 
+  // Create a matrix from outer product of two vectors.
+  static inline Matrix3x3 FromOuter(const MVector &ma, const MVector &mb) {
+    Matrix3x3 m;
+    for (int row = 0; row < 3; ++row) {
+      m.SetRow(row, ma[row] * mb);
+    }
+    return m;
+  }
+
   // Set the vector in row i to be v1
   void SetRow(int i, const MVector &v1) {
     S2_DCHECK_GE(i, 0);
@@ -352,7 +349,7 @@ class Matrix3x3 {
   }
 
   // Return the identity matrix
-  static inline Matrix3x3 Identity(void) {
+  static inline Matrix3x3 Identity() {
     Matrix3x3 temp;
     temp.Set(VType(1), VType(0), VType(0),  //
              VType(0), VType(1), VType(0),  //
@@ -361,30 +358,27 @@ class Matrix3x3 {
   }
 
   // Return a matrix full of zeros
-  static inline Matrix3x3 Zero(void) {
-    return Matrix3x3();
-  }
+  static inline Matrix3x3 Zero() { return Matrix3x3(); }
 
   // Return a diagonal matrix with the coefficients in v
   static inline Matrix3x3 Diagonal(const MVector &v) {
-    return Matrix3x3(v[0], VType(), VType(),
-                     VType(), v[1], VType(),
+    return Matrix3x3(v[0], VType(), VType(),  //
+                     VType(), v[1], VType(),  //
                      VType(), VType(), v[2]);
   }
 
   // Return the matrix vvT
   static Matrix3x3 Sym3(const MVector &v) {
-    return Matrix3x3(
-      v[0]*v[0], v[0]*v[1], v[0]*v[2],
-      v[1]*v[0], v[1]*v[1], v[1]*v[2],
-      v[2]*v[0], v[2]*v[1], v[2]*v[2]);
+    return Matrix3x3(v[0] * v[0], v[0] * v[1], v[0] * v[2],  //
+                     v[1] * v[0], v[1] * v[1], v[1] * v[2],  //
+                     v[2] * v[0], v[2] * v[1], v[2] * v[2]);
   }
   // Return a matrix M such that:
   // for each u,  M * u = v.CrossProd(u)
   static Matrix3x3 AntiSym3(const MVector &v) {
-    return Matrix3x3(VType(),    -v[2],     v[1],
-                     v[2],     VType(),    -v[0],
-                     -v[1],       v[0],   VType());
+    return Matrix3x3(VType(), -v[2], v[1],  //
+                     v[2], VType(), -v[0],  //
+                     -v[1], v[0], VType());
   }
 
   // Returns matrix that rotates |rot| radians around axis rot.
@@ -395,10 +389,18 @@ class Matrix3x3 {
     Matrix3x3 Wv = Matrix3x3::AntiSym3(w);
     Matrix3x3 I = Matrix3x3::Identity();
     Matrix3x3 A = Matrix3x3::Sym3(w);
+    using std::cos;
+    using std::sin;
     R = (1 - cos(theta)) * A + sin(theta) * Wv + cos(theta) * I;
     return R;
   }
 
+  // Return a matrix that reflects a point across the plane defined by normal.
+  static Matrix3x3 Householder(const MVector &normal) {
+    MVector unit = normal.Normalize();
+    return Matrix3x3::Identity() - 2 * Matrix3x3::FromOuter(unit, unit);
+  }
+
   // Returns v.Transpose() * (*this) * u
   VType MulBothSides(const MVector &v, const MVector &u) const {
     return ((*this) * u).DotProd(v);
@@ -418,6 +420,7 @@ class Matrix3x3 {
         sum += m_[i][j] * m_[i][j];
       }
     }
+    using std::sqrt;
     return sqrt(sum);
   }
 
@@ -431,8 +434,8 @@ class Matrix3x3 {
     // characteristic polynomial
     // x^3 + a*x^2 + b*x + c
     VType a = -Trace();
-    VType b = m_[0][0]*m_[1][1] + m_[1][1]*m_[2][2] + m_[2][2]*m_[0][0]
-            - m_[1][0]*m_[0][1] - m_[2][1]*m_[1][2] - m_[0][2]*m_[2][0];
+    VType b = m_[0][0] * m_[1][1] + m_[1][1] * m_[2][2] + m_[2][2] * m_[0][0] -
+              m_[1][0] * m_[0][1] - m_[2][1] * m_[1][2] - m_[0][2] * m_[2][0];
     VType c = -Det();
     bool res = MathUtil::RealRootsForCubic(a, b, c, &r1, &r2, &r3);
     (*eig_val)[0] = r1;
@@ -457,9 +460,9 @@ class Matrix3x3 {
                             Matrix3x3 *eig_vec /*nullable*/) const {
     // Compute characteristic polynomial coefficients.
     double c2 = -Trace();
-    double c1 = -(m_[1][0] * m_[1][0] - m_[0][0] * m_[1][1]
-                  - m_[0][0] * m_[2][2] - m_[1][1] * m_[2][2]
-                  + m_[2][0] * m_[2][0] + m_[2][1] * m_[2][1]);
+    double c1 =
+        -(m_[1][0] * m_[1][0] - m_[0][0] * m_[1][1] - m_[0][0] * m_[2][2] -
+          m_[1][1] * m_[2][2] + m_[2][0] * m_[2][0] + m_[2][1] * m_[2][1]);
     double c0 = -(m_[0][0] * m_[1][1] * m_[2][2]    //
                   - m_[2][0] * m_[2][0] * m_[1][1]  //
                   - m_[1][0] * m_[1][0] * m_[2][2]  //
@@ -470,8 +473,8 @@ class Matrix3x3 {
     // NOTE: Cannot reuse general cubic solver MathUtil::RealRootsForCubic()
     // because it doesn't guarantee finding 3 real roots, e.g. it won't always
     // return roots {2, 2, 0} for the cubic x^3 - 4*x^2 + 4*x + epsilon = 0.
-    double q = (c2*c2-3*c1)/9.0;
-    double r = (2*c2*c2*c2-9*c2*c1+27*c0)/54.0;
+    double q = (c2 * c2 - 3 * c1) / 9.0;
+    double r = (2 * c2 * c2 * c2 - 9 * c2 * c1 + 27 * c0) / 54.0;
     // Assume R^2 <= Q^3 so there are three real roots.
     // Avoid sqrt of negative q, which can only happen due to numerical error.
     if (q < 0) q = 0;
@@ -482,19 +485,18 @@ class Matrix3x3 {
     double theta = atan2(q3_r2 <= 0 ? 0 : sqrt(q3_r2), r);
     double c2_3 = c2 / 3;
     (*eig_val)[0] = sqrt_q * cos(theta / 3.0) - c2_3;
-    (*eig_val)[1] = sqrt_q * cos((theta + 2.0 * M_PI)/3.0) - c2_3;
-    (*eig_val)[2] = sqrt_q * cos((theta - 2.0 * M_PI)/3.0) - c2_3;
+    (*eig_val)[1] = sqrt_q * cos((theta + 2.0 * M_PI) / 3.0) - c2_3;
+    (*eig_val)[2] = sqrt_q * cos((theta - 2.0 * M_PI) / 3.0) - c2_3;
 
     // Sort eigen value in decreasing order
     Vector3<int> d_order = eig_val->ComponentOrder();
-    (*eig_val) = MVector((*eig_val)[d_order[2]],
-                         (*eig_val)[d_order[1]],
+    (*eig_val) = MVector((*eig_val)[d_order[2]], (*eig_val)[d_order[1]],
                          (*eig_val)[d_order[0]]);
 
     // Compute eigenvectors
     if (!eig_vec) return;
     for (int i = 0; i < 3; ++i) {
-      MVector r1 , r2 , r3 , e1 , e2 , e3;
+      MVector r1, r2, r3, e1, e2, e3;
       r1[0] = m_[0][0] - (*eig_val)[i];
       r2[0] = m_[1][0];
       r3[0] = m_[2][0];
@@ -519,9 +521,9 @@ class Matrix3x3 {
 
   // Return true is one of the elements of the matrix is NaN
   bool IsNaN() const {
-    for ( int i = 0; i < 3; ++i ) {
-      for ( int j = 0; j < 3; ++j ) {
-        if ( isnan(m_[i][j]) ) {
+    for (int i = 0; i < 3; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        if (isnan(m_[i][j])) {
           return true;
         }
       }
@@ -530,14 +532,10 @@ class Matrix3x3 {
   }
 
   friend bool operator==(const Matrix3x3 &a, const Matrix3x3 &b) {
-    return a.m_[0][0] == b.m_[0][0] &&
-           a.m_[0][1] == b.m_[0][1] &&
-           a.m_[0][2] == b.m_[0][2] &&
-           a.m_[1][0] == b.m_[1][0] &&
-           a.m_[1][1] == b.m_[1][1] &&
-           a.m_[1][2] == b.m_[1][2] &&
-           a.m_[2][0] == b.m_[2][0] &&
-           a.m_[2][1] == b.m_[2][1] &&
+    return a.m_[0][0] == b.m_[0][0] && a.m_[0][1] == b.m_[0][1] &&
+           a.m_[0][2] == b.m_[0][2] && a.m_[1][0] == b.m_[1][0] &&
+           a.m_[1][1] == b.m_[1][1] && a.m_[1][2] == b.m_[1][2] &&
+           a.m_[2][0] == b.m_[2][0] && a.m_[2][1] == b.m_[2][1] &&
            a.m_[2][2] == b.m_[2][2];
   }
 
@@ -545,10 +543,10 @@ class Matrix3x3 {
     return !(a == b);
   }
 
-  friend std::ostream &operator <<(std::ostream &out, const Matrix3x3 &mb) {
+  friend std::ostream &operator<<(std::ostream &out, const Matrix3x3 &mb) {
     int i, j;
     for (i = 0; i < 3; i++) {
-      if (i ==0) {
+      if (i == 0) {
         out << "[";
       } else {
         out << " ";
@@ -564,10 +562,15 @@ class Matrix3x3 {
     }
     return out;
   }
+
+  template <typename H>
+  friend H AbslHashValue(H h, const Matrix3x3 &m) {
+    return H::combine_contiguous(std::move(h), m.Data(), 3 * 3);
+  }
 };
 
-typedef Matrix3x3<int>    Matrix3x3_i;
-typedef Matrix3x3<float>  Matrix3x3_f;
+typedef Matrix3x3<int> Matrix3x3_i;
+typedef Matrix3x3<float> Matrix3x3_f;
 typedef Matrix3x3<double> Matrix3x3_d;
 
 
diff --git a/src/s2/util/math/vector.h b/src/s2/util/math/vector.h
index 6cddc9cf..8bd3338f 100644
--- a/src/s2/util/math/vector.h
+++ b/src/s2/util/math/vector.h
@@ -22,10 +22,13 @@
 
 #include <algorithm>
 #include <cmath>
+#include <cstdint>
 #include <cstdlib>
+#include <initializer_list>
 #include <iosfwd>
 #include <iostream>  // NOLINT(readability/streams)
 #include <limits>
+#include <ostream>
 #include <type_traits>
 
 #include "s2/base/integral_types.h"
@@ -33,9 +36,12 @@
 #include "absl/base/macros.h"
 #include "absl/utility/utility.h"
 
-template <typename T> class Vector2;
-template <typename T> class Vector3;
-template <typename T> class Vector4;
+template <typename T>
+class Vector2;
+template <typename T>
+class Vector3;
+template <typename T>
+class Vector4;
 
 namespace util {
 namespace math {
@@ -50,20 +56,19 @@ class BasicVector {
   // FloatType is the type returned by Norm() and Angle().  These methods are
   // special because they return floating-point values even when VType is an
   // integer.
-  typedef typename std::conditional<std::is_integral<T>::value,
-                                    double, T>::type FloatType;
+  typedef typename std::conditional<std::is_integral<T>::value, double, T>::type
+      FloatType;
 
   using IdxSeqN = typename absl::make_index_sequence<N>;
 
   template <std::size_t I, typename F, typename... As>
-  static auto Reduce(F f, As*... as)
-      -> decltype(f(as[I]...)) {
+  static auto Reduce(F f, As*... as) -> decltype(f(as[I]...)) {
     return f(as[I]...);
   }
 
   template <typename R = D, std::size_t... Is, typename F, typename... As>
   static R GenerateEach(absl::index_sequence<Is...>, F f, As*... as) {
-    return R(Reduce<Is>(f, as...)...);
+    return R(Reduce<Is>(std::move(f), as...)...);
   }
 
   // Generate<R>(f,a,b,...) returns an R(...), where the constructor arguments
@@ -71,7 +76,8 @@ class BasicVector {
   // and with a,b,...  all optional.
   template <typename R = D, typename F, typename... As>
   static R Generate(F f, As&&... as) {
-    return GenerateEach<R>(IdxSeqN(), f, std::forward<As>(as).Data()...);
+    return GenerateEach<R>(IdxSeqN(), std::move(f),
+                           std::forward<As>(as).Data()...);
   }
 
  public:
@@ -92,20 +98,20 @@ class BasicVector {
   }
 
   // TODO(user): Relationals should be nonmembers.
-  bool operator==(const D& b) const {
-    const T* ap = static_cast<const D&>(*this).Data();
-    return std::equal(ap, ap + this->Size(), b.Data());
+  bool operator==(const BasicVector& b) const {
+    const T* ap = AsD().Data();
+    return std::equal(ap, ap + this->Size(), b.AsD().Data());
   }
-  bool operator!=(const D& b) const { return !(AsD() == b); }
-  bool operator<(const D& b) const {
-    const T* ap = static_cast<const D&>(*this).Data();
-    const T* bp = b.Data();
-    return std::lexicographical_compare(
-        ap, ap + this->Size(), bp, bp + b.Size());
+  bool operator!=(const BasicVector& b) const { return !(*this == b); }
+  bool operator<(const BasicVector& b) const {
+    const T* ap = AsD().Data();
+    const T* bp = b.AsD().Data();
+    return std::lexicographical_compare(ap, ap + this->Size(), bp,
+                                        bp + b.Size());
   }
-  bool operator>(const D& b) const { return b < AsD(); }
-  bool operator<=(const D& b) const { return !(AsD() > b); }
-  bool operator>=(const D& b) const { return !(AsD() < b); }
+  bool operator>(const BasicVector& b) const { return b < *this; }
+  bool operator<=(const BasicVector& b) const { return !(*this > b); }
+  bool operator>=(const BasicVector& b) const { return !(*this < b); }
 
   D& operator+=(const D& b) {
     PlusEq(static_cast<D&>(*this).Data(), b.Data(), IdxSeqN{});
@@ -118,19 +124,19 @@ class BasicVector {
   }
 
   D& operator*=(T k) {
-    MulEq(static_cast<D&>(*this).Data(), k, IdxSeqN{});
+    MulEq(static_cast<D&>(*this).Data(), std::move(k), IdxSeqN{});
     return static_cast<D&>(*this);
   }
 
   D& operator/=(T k) {
-    DivEq(static_cast<D&>(*this).Data(), k, IdxSeqN{});
+    DivEq(static_cast<D&>(*this).Data(), std::move(k), IdxSeqN{});
     return static_cast<D&>(*this);
   }
 
   D operator+(const D& b) const { return D(AsD()) += b; }
   D operator-(const D& b) const { return D(AsD()) -= b; }
-  D operator*(T k) const { return D(AsD()) *= k; }
-  D operator/(T k) const { return D(AsD()) /= k; }
+  D operator*(T k) const { return D(AsD()) *= std::move(k); }
+  D operator/(T k) const { return D(AsD()) /= std::move(k); }
 
   friend D operator-(const D& a) {
     return Generate([](const T& x) { return -x; }, a);
@@ -138,31 +144,29 @@ class BasicVector {
 
   // Convert from another vector type
   template <typename T2>
-  static D Cast(const VecTemplate<T2> &b) {
+  static D Cast(const VecTemplate<T2>& b) {
     return Generate([](const T2& x) { return static_cast<T>(x); }, b);
   }
 
   // multiply two vectors component by component
-  D MulComponents(const D &b) const {
+  D MulComponents(const D& b) const {
     return Generate([](const T& x, const T& y) { return x * y; }, AsD(), b);
   }
   // divide two vectors component by component
-  D DivComponents(const D &b) const {
+  D DivComponents(const D& b) const {
     return Generate([](const T& x, const T& y) { return x / y; }, AsD(), b);
   }
 
   // Element-wise max.  {max(a[0],b[0]), max(a[1],b[1]), ...}
-  friend D Max(const D &a, const D &b) {
-    return Generate([](const T& x, const T& y) {
-      return std::max(x, y);
-    }, a, b);
+  friend D Max(const D& a, const D& b) {
+    return Generate([](const T& x, const T& y) { return std::max(x, y); }, a,
+                    b);
   }
 
   // Element-wise min.  {min(a[0],b[0]), min(a[1],b[1]), ...}
-  friend D Min(const D &a, const D &b) {
-    return Generate([](const T& x, const T& y) {
-      return std::min(x, y);
-    }, a, b);
+  friend D Min(const D& a, const D& b) {
+    return Generate([](const T& x, const T& y) { return std::min(x, y); }, a,
+                    b);
   }
 
   T DotProd(const D& b) const {
@@ -186,25 +190,37 @@ class BasicVector {
     if (n != T(0.0)) {
       n = T(1.0) / n;
     }
-    return D(AsD()) *= n;
+    return D(AsD()) *= std::move(n);
   }
 
   // Compose a vector from the sqrt of each component.
   D Sqrt() const {
-    return Generate([](const T& x) {
-      using std::sqrt;
-      return sqrt(x);
-    }, AsD());
+    return Generate(
+        [](const T& x) {
+          using std::sqrt;
+          return sqrt(x);
+        },
+        AsD());
   }
 
   // Take the floor of each component.
   D Floor() const {
-    return Generate([](const T& x) { return floor(x); }, AsD());
+    return Generate(
+        [](const T& x) {
+          using std::floor;
+          return floor(x);
+        },
+        AsD());
   }
 
   // Take the ceil of each component.
   D Ceil() const {
-    return Generate([](const T& x) { return ceil(x); }, AsD());
+    return Generate(
+        [](const T& x) {
+          using std::ceil;
+          return ceil(x);
+        },
+        AsD());
   }
 
   // Round of each component.
@@ -224,8 +240,8 @@ class BasicVector {
   bool IsNaN() const {
     bool r = false;
     const T* ap = AsD().Data();
-    for (int i = 0; i < SIZE; ++i)
-      r = r || isnan(ap[i]);
+    using std::isnan;
+    for (int i = 0; i < SIZE; ++i) r = r || isnan(ap[i]);
     return r;
   }
 
@@ -236,7 +252,7 @@ class BasicVector {
 
   friend std::ostream& operator<<(std::ostream& out, const D& v) {
     out << "[";
-    const char *sep = "";
+    const char* sep = "";
     for (int i = 0; i < SIZE; ++i) {
       out << sep;
       Print(out, v[i]);
@@ -255,6 +271,25 @@ class BasicVector {
     return Generate([k](const T& x) { return k / x; }, AsD());
   }
 
+  template <typename H>
+  friend H AbslHashValue(H h, const BasicVector& vec) {
+    return H::combine_contiguous(std::move(h),
+                                 static_cast<const D&>(vec).Data(), vec.Size());
+  }
+
+  // Enable Flume default PCoder.
+  template <typename E>
+  friend void FlumeEncode(E e, const BasicVector& vec) {
+    for (int i = 0; i < N; ++i) e(vec[i]);
+  }
+  template <typename D>
+  friend bool FlumeDecode(D d, BasicVector& vec) {
+    for (int i = 0; i < N; ++i) {
+      if (!d(vec[i])) return false;
+    }
+    return true;
+  }
+
  private:
   const D& AsD() const { return static_cast<const D&>(*this); }
   D& AsD() { return static_cast<D&>(*this); }
@@ -262,8 +297,12 @@ class BasicVector {
   // ostream << uint8 prints the ASCII character, which is not useful.
   // Cast to int so that numbers will be printed instead.
   template <typename U>
-  static void Print(std::ostream& out, const U& v) { out << v; }
-  static void Print(std::ostream& out, uint8 v) { out << static_cast<int>(v); }
+  static void Print(std::ostream& out, const U& v) {
+    out << v;
+  }
+  static void Print(std::ostream& out, uint8 v) {
+    out << static_cast<int>(v);
+  }
 
   // Ignores its arguments so that side-effects of variadic unpacking can occur.
   static void Ignore(std::initializer_list<bool>) {}
@@ -298,13 +337,13 @@ class BasicVector {
 // These templates must be defined outside of BasicVector so that the
 // template specialization match algorithm must deduce 'a'. See the review
 // of cl/119944115.
-template <typename K,
-          template <typename> class VT2, typename T2, std::size_t N2>
+template <typename K, template <typename> class VT2, typename T2,
+          std::size_t N2>
 VT2<T2> operator*(const K& k, const BasicVector<VT2, T2, N2>& a) {
   return a.MulScalarInternal(k);
 }
-template <typename K,
-          template <typename> class VT2, typename T2, std::size_t N2>
+template <typename K, template <typename> class VT2, typename T2,
+          std::size_t N2>
 VT2<T2> operator/(const K& k, const BasicVector<VT2, T2, N2>& a) {
   return a.DivScalarInternal(k);
 }
@@ -315,8 +354,7 @@ VT2<T2> operator/(const K& k, const BasicVector<VT2, T2, N2>& a) {
 
 // ======================================================================
 template <typename T>
-class Vector2
-    : public util::math::internal_vector::BasicVector<Vector2, T, 2> {
+class Vector2 : public util::math::internal_vector::BasicVector<Vector2, T, 2> {
  private:
   using Base = util::math::internal_vector::BasicVector<::Vector2, T, 2>;
   using VType = T;
@@ -326,13 +364,13 @@ class Vector2
   using FloatType = typename Base::FloatType;
   using Base::SIZE;
 
-  Vector2() : c_() {}
-  Vector2(T x, T y) {
-    c_[0] = x;
-    c_[1] = y;
+  constexpr Vector2() : c_() {}
+  constexpr Vector2(T x, T y) {
+    c_[0] = std::move(x);
+    c_[1] = std::move(y);
   }
-  explicit Vector2(const Vector3<T> &b) : Vector2(b.x(), b.y()) {}
-  explicit Vector2(const Vector4<T> &b) : Vector2(b.x(), b.y()) {}
+  explicit Vector2(const Vector3<T>& b) : Vector2(b.x(), b.y()) {}
+  explicit Vector2(const Vector4<T>& b) : Vector2(b.x(), b.y()) {}
 
   T* Data() { return c_; }
   const T* Data() const { return c_; }
@@ -342,23 +380,25 @@ class Vector2
   T x() const { return c_[0]; }
   T y() const { return c_[1]; }
 
-  bool aequal(const Vector2 &vb, FloatType margin) const {
+  // Returns true if this vector's dimensions are at most `margin` from `vb`.
+  bool aequal(const Vector2& vb, FloatType margin) const {
     using std::fabs;
-    return (fabs(c_[0]-vb.c_[0]) < margin) && (fabs(c_[1]-vb.c_[1]) < margin);
+    return (fabs(c_[0] - vb.c_[0]) <= margin) &&
+           (fabs(c_[1] - vb.c_[1]) <= margin);
   }
 
-  void Set(T x, T y) { *this = Vector2(x, y); }
+  void Set(T x, T y) { *this = Vector2(std::move(x), std::move(y)); }
 
   // Cross product.  Be aware that if T is an integer type, the high bits
   // of the result are silently discarded.
-  T CrossProd(const Vector2 &vb) const {
+  T CrossProd(const Vector2& vb) const {
     return c_[0] * vb.c_[1] - c_[1] * vb.c_[0];
   }
 
-  // Returns the angle between "this" and v in radians. If either vector is
-  // zero-length, or nearly zero-length, the result will be zero, regardless of
-  // the other value.
-  FloatType Angle(const Vector2 &v) const {
+  // Returns the angle from "this" to v in the counterclockwise direction in
+  // radians. Result range: [-pi, pi]. If either vector is zero-length, or
+  // nearly zero-length, the result will be zero, regardless of the other value.
+  FloatType Angle(const Vector2& v) const {
     using std::atan2;
     return atan2(CrossProd(v), this->DotProd(v));
   }
@@ -384,8 +424,7 @@ class Vector2
 };
 
 template <typename T>
-class Vector3
-    : public util::math::internal_vector::BasicVector<Vector3, T, 3> {
+class Vector3 : public util::math::internal_vector::BasicVector<Vector3, T, 3> {
  private:
   using Base = util::math::internal_vector::BasicVector<::Vector3, T, 3>;
   using VType = T;
@@ -395,33 +434,36 @@ class Vector3
   using FloatType = typename Base::FloatType;
   using Base::SIZE;
 
-  Vector3() : c_() {}
-  Vector3(T x, T y, T z) {
-    c_[0] = x;
-    c_[1] = y;
-    c_[2] = z;
+  constexpr Vector3() : c_() {}
+  constexpr Vector3(T x, T y, T z) {
+    c_[0] = std::move(x);
+    c_[1] = std::move(y);
+    c_[2] = std::move(z);
   }
-  Vector3(const Vector2<T> &b, T z) : Vector3(b.x(), b.y(), z) {}
-  explicit Vector3(const Vector4<T> &b) : Vector3(b.x(), b.y(), b.z()) {}
+  Vector3(const Vector2<T>& b, T z) : Vector3(b.x(), b.y(), z) {}
+  explicit Vector3(const Vector4<T>& b) : Vector3(b.x(), b.y(), b.z()) {}
 
   T* Data() { return c_; }
   const T* Data() const { return c_; }
 
-  void x(const T &v) { c_[0] = v; }
-  void y(const T &v) { c_[1] = v; }
-  void z(const T &v) { c_[2] = v; }
+  void x(const T& v) { c_[0] = v; }
+  void y(const T& v) { c_[1] = v; }
+  void z(const T& v) { c_[2] = v; }
   T x() const { return c_[0]; }
   T y() const { return c_[1]; }
   T z() const { return c_[2]; }
 
-  bool aequal(const Vector3 &vb, FloatType margin) const {
+  // Returns true if this vector's dimensions are at most `margin` from `vb`.
+  bool aequal(const Vector3& vb, FloatType margin) const {
     using std::abs;
-    return (abs(c_[0] - vb.c_[0]) < margin)
-        && (abs(c_[1] - vb.c_[1]) < margin)
-        && (abs(c_[2] - vb.c_[2]) < margin);
+    return (abs(c_[0] - vb.c_[0]) <= margin) &&
+           (abs(c_[1] - vb.c_[1]) <= margin) &&
+           (abs(c_[2] - vb.c_[2]) <= margin);
   }
 
-  void Set(T x, T y, T z) { *this = Vector3(x, y, z); }
+  void Set(T x, T y, T z) {
+    *this = Vector3(std::move(x), std::move(y), std::move(z));
+  }
 
   // Cross product.  Be aware that if VType is an integer type, the high bits
   // of the result are silently discarded.
@@ -440,17 +482,15 @@ class Vector3
     return CrossProd(temp).Normalize();
   }
 
-  // Returns the angle between two vectors in radians. If either vector is
-  // zero-length, or nearly zero-length, the result will be zero, regardless of
-  // the other value.
-  FloatType Angle(const Vector3 &va) const {
+  // Returns the angle between "this" and v in radians, in the range [0, pi]. If
+  // either vector is zero-length, or nearly zero-length, the result will be
+  // zero, regardless of the other value.
+  FloatType Angle(const Vector3& v) const {
     using std::atan2;
-    return atan2(CrossProd(va).Norm(), this->DotProd(va));
+    return atan2(CrossProd(v).Norm(), this->DotProd(v));
   }
 
-  Vector3 Fabs() const {
-    return Abs();
-  }
+  Vector3 Fabs() const { return Abs(); }
 
   Vector3 Abs() const {
     static_assert(
@@ -463,9 +503,9 @@ class Vector3
   // return the index of the largest component (fabs)
   int LargestAbsComponent() const {
     Vector3 temp = Abs();
-    return temp[0] > temp[1] ?
-             temp[0] > temp[2] ? 0 : 2 :
-             temp[1] > temp[2] ? 1 : 2;
+    return temp[0] > temp[1]   ? temp[0] > temp[2] ? 0 : 2
+           : temp[1] > temp[2] ? 1
+                               : 2;
   }
 
   // return the index of the smallest, median ,largest component of the vector
@@ -483,8 +523,7 @@ class Vector3
 };
 
 template <typename T>
-class Vector4
-    : public util::math::internal_vector::BasicVector<Vector4, T, 4> {
+class Vector4 : public util::math::internal_vector::BasicVector<Vector4, T, 4> {
  private:
   using Base = util::math::internal_vector::BasicVector<::Vector4, T, 4>;
   using VType = T;
@@ -494,36 +533,36 @@ class Vector4
   using FloatType = typename Base::FloatType;
   using Base::SIZE;
 
-  Vector4() : c_() {}
-  Vector4(T x, T y, T z, T w) {
-    c_[0] = x;
-    c_[1] = y;
-    c_[2] = z;
-    c_[3] = w;
+  constexpr Vector4() : c_() {}
+  constexpr Vector4(T x, T y, T z, T w) {
+    c_[0] = std::move(x);
+    c_[1] = std::move(y);
+    c_[2] = std::move(z);
+    c_[3] = std::move(w);
   }
 
-  Vector4(const Vector2<T> &b, T z, T w)
-      : Vector4(b.x(), b.y(), z, w) {}
-  Vector4(const Vector2<T> &a, const Vector2<T> &b)
+  Vector4(const Vector2<T>& b, T z, T w) : Vector4(b.x(), b.y(), z, w) {}
+  Vector4(const Vector2<T>& a, const Vector2<T>& b)
       : Vector4(a.x(), a.y(), b.x(), b.y()) {}
-  Vector4(const Vector3<T> &b, T w)
-      : Vector4(b.x(), b.y(), b.z(), w) {}
+  Vector4(const Vector3<T>& b, T w)
+      : Vector4(b.x(), b.y(), b.z(), std::move(w)) {}
 
   T* Data() { return c_; }
   const T* Data() const { return c_; }
 
-  bool aequal(const Vector4 &vb, FloatType margin) const {
+  // Returns true if this vector's dimensions are at most `margin` from `vb`.
+  bool aequal(const Vector4& vb, FloatType margin) const {
     using std::fabs;
-    return (fabs(c_[0] - vb.c_[0]) < margin)
-        && (fabs(c_[1] - vb.c_[1]) < margin)
-        && (fabs(c_[2] - vb.c_[2]) < margin)
-        && (fabs(c_[3] - vb.c_[3]) < margin);
+    return (fabs(c_[0] - vb.c_[0]) <= margin) &&
+           (fabs(c_[1] - vb.c_[1]) <= margin) &&
+           (fabs(c_[2] - vb.c_[2]) <= margin) &&
+           (fabs(c_[3] - vb.c_[3]) <= margin);
   }
 
-  void x(const T &v) { c_[0] = v; }
-  void y(const T &v) { c_[1] = v; }
-  void z(const T &v) { c_[2] = v; }
-  void w(const T &v) { c_[3] = v; }
+  void x(const T& v) { c_[0] = v; }
+  void y(const T& v) { c_[1] = v; }
+  void z(const T& v) { c_[2] = v; }
+  void w(const T& v) { c_[3] = v; }
   T x() const { return c_[0]; }
   T y() const { return c_[1]; }
   T z() const { return c_[2]; }
@@ -547,22 +586,22 @@ class Vector4
   VType c_[SIZE];
 };
 
-typedef Vector2<uint8>  Vector2_b;
-typedef Vector2<int16>  Vector2_s;
-typedef Vector2<int>    Vector2_i;
-typedef Vector2<float>  Vector2_f;
+typedef Vector2<uint8> Vector2_b;
+typedef Vector2<int16> Vector2_s;
+typedef Vector2<int> Vector2_i;
+typedef Vector2<float> Vector2_f;
 typedef Vector2<double> Vector2_d;
 
-typedef Vector3<uint8>  Vector3_b;
-typedef Vector3<int16>  Vector3_s;
-typedef Vector3<int>    Vector3_i;
-typedef Vector3<float>  Vector3_f;
+typedef Vector3<uint8> Vector3_b;
+typedef Vector3<int16> Vector3_s;
+typedef Vector3<int> Vector3_i;
+typedef Vector3<float> Vector3_f;
 typedef Vector3<double> Vector3_d;
 
-typedef Vector4<uint8>  Vector4_b;
-typedef Vector4<int16>  Vector4_s;
-typedef Vector4<int>    Vector4_i;
-typedef Vector4<float>  Vector4_f;
+typedef Vector4<uint8> Vector4_b;
+typedef Vector4<int16> Vector4_s;
+typedef Vector4<int> Vector4_i;
+typedef Vector4<float> Vector4_f;
 typedef Vector4<double> Vector4_d;
 
 
diff --git a/src/s2/util/math/vector3_hash.h b/src/s2/util/math/vector3_hash.h
deleted file mode 100644
index 0c212a73..00000000
--- a/src/s2/util/math/vector3_hash.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS-IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#ifndef S2_UTIL_MATH_VECTOR3_HASH_H_
-#define S2_UTIL_MATH_VECTOR3_HASH_H_
-
-#include <cstddef>
-#include <functional>
-#include <type_traits>
-
-#include "s2/util/hash/mix.h"
-#include "s2/util/math/vector.h"
-
-template <class T>
-struct GoodFastHash;
-
-template <class VType>
-struct GoodFastHash<Vector2<VType>> {
-  std::size_t operator()(const Vector2<VType>& v) const {
-    static_assert(std::is_pod<VType>::value, "POD expected");
-    // std::hash collapses +/-0.
-    std::hash<VType> h;
-    HashMix mix(h(v.x()));
-    mix.Mix(h(v.y()));
-    return mix.get();
-  }
-};
-
-template <class VType>
-struct GoodFastHash<Vector3<VType>> {
-  std::size_t operator()(const Vector3<VType>& v) const {
-    static_assert(std::is_pod<VType>::value, "POD expected");
-    // std::hash collapses +/-0.
-    std::hash<VType> h;
-    HashMix mix(h(v.x()));
-    mix.Mix(h(v.y()));
-    mix.Mix(h(v.z()));
-    return mix.get();
-  }
-};
-
-#endif  // S2_UTIL_MATH_VECTOR3_HASH_H_
diff --git a/src/s2/util/units/physical-units.h b/src/s2/util/units/physical-units.h
index 9d99cebf..610f6d0b 100644
--- a/src/s2/util/units/physical-units.h
+++ b/src/s2/util/units/physical-units.h
@@ -85,8 +85,11 @@
 #define S2_UTIL_UNITS_PHYSICAL_UNITS_H_
 
 #include <cmath>
+#include <cstdint>
 #include <iosfwd>
 #include <iostream>
+#include <ostream>
+#include <sstream>
 #include <string>
 #include <type_traits>
 
@@ -103,10 +106,10 @@ namespace units {
 template <int ScaleNumerator, int ScaleDenominator,
           int OffsetNumerator = 0, int OffsetDenominator = 1>
 struct UnitConversion {
-  static const int SCALE_NUMERATOR = ScaleNumerator;
-  static const int SCALE_DENOMINATOR = ScaleDenominator;
-  static const int OFFSET_NUMERATOR = OffsetNumerator;
-  static const int OFFSET_DENOMINATOR = OffsetDenominator;
+  static constexpr int SCALE_NUMERATOR = ScaleNumerator;
+  static constexpr int SCALE_DENOMINATOR = ScaleDenominator;
+  static constexpr int OFFSET_NUMERATOR = OffsetNumerator;
+  static constexpr int OFFSET_DENOMINATOR = OffsetDenominator;
 };
 
 template <class FromUnit, class ToUnit, typename Float>
@@ -120,19 +123,21 @@ struct UnitConverter {
   constexpr static inline Float Convert(Float value) {
     // scaling and offset
     return static_cast<Float>(
-      (static_cast<double>(value *
-         (static_cast<double>(static_cast<uint64>(ToUnit::SCALE_NUMERATOR) *
-                              FromUnit::SCALE_DENOMINATOR) /
-          static_cast<double>(static_cast<uint64>(ToUnit::SCALE_DENOMINATOR) *
-                              FromUnit::SCALE_NUMERATOR)))) -
-      (static_cast<double>(static_cast<uint64>(ToUnit::SCALE_NUMERATOR) *
-                           FromUnit::SCALE_DENOMINATOR *
-                           FromUnit::OFFSET_NUMERATOR) /
-       static_cast<double>(static_cast<uint64>(ToUnit::SCALE_DENOMINATOR) *
-                           FromUnit::SCALE_NUMERATOR *
-                           FromUnit::OFFSET_DENOMINATOR)) +
-      (static_cast<double>(ToUnit::OFFSET_NUMERATOR) /
-       static_cast<double>(ToUnit::OFFSET_DENOMINATOR)));
+        (static_cast<double>(
+            value * (static_cast<double>(
+                         static_cast<uint64>(ToUnit::SCALE_NUMERATOR) *
+                         FromUnit::SCALE_DENOMINATOR) /
+                     static_cast<double>(
+                         static_cast<uint64>(ToUnit::SCALE_DENOMINATOR) *
+                         FromUnit::SCALE_NUMERATOR)))) -
+        (static_cast<double>(static_cast<uint64>(ToUnit::SCALE_NUMERATOR) *
+                             FromUnit::SCALE_DENOMINATOR *
+                             FromUnit::OFFSET_NUMERATOR) /
+         static_cast<double>(static_cast<uint64>(ToUnit::SCALE_DENOMINATOR) *
+                             FromUnit::SCALE_NUMERATOR *
+                             FromUnit::OFFSET_DENOMINATOR)) +
+        (static_cast<double>(ToUnit::OFFSET_NUMERATOR) /
+         static_cast<double>(ToUnit::OFFSET_DENOMINATOR)));
   }
 };
 
@@ -142,7 +147,7 @@ struct UnitConverter {
 // default unit transformations are assumed to be linear; see
 // temperature-units.h for an example of how to override this default.
 template <class Base>
-struct is_linear_unit_transformation : std::true_type { };
+struct is_linear_unit_transformation : std::true_type {};
 
 // Template class holding a single value with an associated physical
 // unit.  The unit and conversion parameters are statically defined
@@ -190,7 +195,7 @@ class PhysicalUnit {
 
   // Copy operation from other units of the same Base type.
   template <class Unit2>
-  Type operator = (PhysicalUnit<Float, Base, Unit2> other) {
+  Type& operator=(PhysicalUnit<Float, Base, Unit2> other) {
     value_ = UnitConverter<Unit2, Unit, Float>::Convert(other.value());
     return *this;
   }
@@ -206,36 +211,36 @@ class PhysicalUnit {
   constexpr Float value() const { return value_; }
 
   // Trivial arithematic operator wrapping.
-  Type operator - () const {
+  Type operator-() const {
     return Type(-value_);
   }
-  Type operator * (const Float scale) const {
+  Type operator*(const Float scale) const {
     return Type(value_ * scale);
   }
-  Type operator + (const Type other) const {
+  Type operator+(const Type other) const {
     static_assert(is_linear_unit_transformation<Base>::value,
                   "operation not defined");
     return Type(value_ + other.value());
   }
-  Type operator - (const Type other) const {
+  Type operator-(const Type other) const {
     static_assert(is_linear_unit_transformation<Base>::value,
                   "operation not defined");
     return Type(value_ - other.value());
   }
-  Float operator / (const Type other) const {
+  Float operator/(const Type other) const {
     return value_ / other.value();
   }
-  Type operator *= (const Float scale) {
+  Type operator*=(const Float scale) {
     value_ *= scale;
     return *this;
   }
-  Type operator += (const Type other) {
+  Type operator+=(const Type other) {
     static_assert(is_linear_unit_transformation<Base>::value,
                   "operation not defined");
     value_ += other.value();
     return *this;
   }
-  Type operator -= (const Type other) {
+  Type operator-=(const Type other) {
     static_assert(is_linear_unit_transformation<Base>::value,
                   "operation not defined");
     value_ -= other.value();
@@ -244,16 +249,16 @@ class PhysicalUnit {
 
   // Simple comparisons.  Overloaded equality is intentionally omitted;
   // use equals() instead.
-  bool operator < (const Type other) const {
+  bool operator<(const Type other) const {
     return value_ < other.value();
   }
-  bool operator > (const Type other) const {
+  bool operator>(const Type other) const {
     return value_ > other.value();
   }
-  bool operator <= (const Type other) const {
+  bool operator<=(const Type other) const {
     return value_ <= other.value();
   }
-  bool operator >= (const Type other) const {
+  bool operator>=(const Type other) const {
     return value_ >= other.value();
   }
 
@@ -306,8 +311,15 @@ std::ostream& operator<<(std::ostream& os,
             << Base::output_suffix << ")";
 }
 
-} // end namespace units
+template <typename Float, typename Base, typename Unit>
+std::string ToString(PhysicalUnit<Float, Base, Unit> value) {
+  std::ostringstream string_stream;
+  string_stream << value;
+  return string_stream.str();
+}
+
+}  // end namespace units
 
-} // end namespace util
+}  // end namespace util
 
 #endif  // S2_UTIL_UNITS_PHYSICAL_UNITS_H_
diff --git a/src/s2/value_lexicon.h b/src/s2/value_lexicon.h
index 668439aa..c8a0c0ee 100644
--- a/src/s2/value_lexicon.h
+++ b/src/s2/value_lexicon.h
@@ -18,8 +18,11 @@
 #ifndef S2_VALUE_LEXICON_H_
 #define S2_VALUE_LEXICON_H_
 
+#include <cstddef>
+
 #include <functional>
 #include <limits>
+#include <utility>
 #include <vector>
 
 #include "s2/base/integral_types.h"