From e951f65d238a8b7ceeb452f47da94158f9526315 Mon Sep 17 00:00:00 2001 From: James Addison Date: Mon, 4 Mar 2024 01:27:52 +0000 Subject: [PATCH 01/15] searchindex: omit the section reference when indexing each document's title --- sphinx/search/__init__.py | 5 +++-- tests/test_search.py | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 74dd3938272..1d5f1f7bfc5 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -393,8 +393,9 @@ def freeze(self) -> dict[str, Any]: alltitles: dict[str, list[tuple[int, str]]] = {} for docname, titlelist in sorted(self._all_titles.items()): - for title, titleid in titlelist: - alltitles.setdefault(title, []).append((fn2index[docname], titleid)) + for n, (title, titleid) in enumerate(titlelist): + anchor = titleid if n > 0 else None # only add anchors for section headings + alltitles.setdefault(title, []).append((fn2index[docname], anchor)) index_entries: dict[str, list[tuple[int, str]]] = {} for docname, entries in self._index_entries.items(): diff --git a/tests/test_search.py b/tests/test_search.py index fc50162e56e..d28e5dd6410 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -192,7 +192,7 @@ def test_IndexBuilder(): 'test': [0, 1, 2, 3]}, 'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'), 'titleterms': {'section_titl': [0, 1, 2, 3]}, - 'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]}, + 'alltitles': {'section_title': [(0, None), (1, None), (2, None), (3, None)]}, 'indexentries': {}, } assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1} @@ -253,7 +253,7 @@ def test_IndexBuilder(): 'test': [0, 1]}, 'titles': ('title1_2', 'title2_2'), 'titleterms': {'section_titl': [0, 1]}, - 'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]}, + 'alltitles': {'section_title': [(0, None), (1, None)]}, 'indexentries': {}, } assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1} @@ -332,7 +332,8 @@ def assert_is_sorted(item, path): assert_is_sorted(value, f'{path}.{key}') elif isinstance(item, list): if not is_title_tuple_type(item) and path not in lists_not_to_sort: - assert item == sorted(item), f'{err_path} is not sorted' + # sort nulls last; http://stackoverflow.com/questions/19868767/ + assert item == sorted(item, key=lambda x: (x is None, x)), f'{err_path} is not sorted' for i, child in enumerate(item): assert_is_sorted(child, f'{path}[{i}]') From 5e05a27911e696f4a4ac89d6e12dae425ad195d8 Mon Sep 17 00:00:00 2001 From: James Addison Date: Mon, 4 Mar 2024 10:34:53 +0000 Subject: [PATCH 02/15] refactor: move the fix to the wordcollector node visitor Note: reStructuredText document titles are implicit; ref: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#document --- sphinx/search/__init__.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 1d5f1f7bfc5..c315773fb16 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -393,9 +393,8 @@ def freeze(self) -> dict[str, Any]: alltitles: dict[str, list[tuple[int, str]]] = {} for docname, titlelist in sorted(self._all_titles.items()): - for n, (title, titleid) in enumerate(titlelist): - anchor = titleid if n > 0 else None # only add anchors for section headings - alltitles.setdefault(title, []).append((fn2index[docname], anchor)) + for title, titleid in titlelist: + alltitles.setdefault(title, []).append((fn2index[docname], titleid)) index_entries: dict[str, list[tuple[int, str]]] = {} for docname, entries in self._index_entries.items(): @@ -503,9 +502,12 @@ def _visit_nodes(node): elif isinstance(node, nodes.Text): word_store.words.extend(split(node.astext())) elif isinstance(node, nodes.title): - title = node.astext() - ids = node.parent['ids'] - word_store.titles.append((title, ids[0] if ids else None)) + title, titleid = node.astext(), None + # docutils: top-level section title can be considered the document title + if not isinstance(node.parent.parent, nodes.document): + if node.parent['ids']: + titleid = node.parent['ids'][0] + word_store.titles.append((title, titleid)) word_store.title_words.extend(split(title)) for child in node.children: _visit_nodes(child) From d44dc11cd9f5d046a4ff1ba490493a9ed56bb93f Mon Sep 17 00:00:00 2001 From: James Addison Date: Mon, 4 Mar 2024 12:42:08 +0000 Subject: [PATCH 03/15] typing: emit empty-string instead of null for document title anchor values --- sphinx/search/__init__.py | 2 +- sphinx/themes/basic/static/searchtools.js | 2 +- tests/test_search.py | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index c315773fb16..9db5b666ea9 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -502,7 +502,7 @@ def _visit_nodes(node): elif isinstance(node, nodes.Text): word_store.words.extend(split(node.astext())) elif isinstance(node, nodes.title): - title, titleid = node.astext(), None + title, titleid = node.astext(), '' # docutils: top-level section title can be considered the document title if not isinstance(node.parent.parent, nodes.document): if node.parent['ids']: diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 0e134912cee..753f5cdcde5 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -307,7 +307,7 @@ const Search = { results.push([ docNames[file], titles[file] !== title ? `${titles[file]} > ${title}` : title, - id !== null ? "#" + id : "", + id ? "#" + id : "", null, score, filenames[file], diff --git a/tests/test_search.py b/tests/test_search.py index d28e5dd6410..c0144e60199 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -192,7 +192,7 @@ def test_IndexBuilder(): 'test': [0, 1, 2, 3]}, 'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'), 'titleterms': {'section_titl': [0, 1, 2, 3]}, - 'alltitles': {'section_title': [(0, None), (1, None), (2, None), (3, None)]}, + 'alltitles': {'section_title': [(0, ''), (1, ''), (2, ''), (3, '')]}, 'indexentries': {}, } assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1} @@ -253,7 +253,7 @@ def test_IndexBuilder(): 'test': [0, 1]}, 'titles': ('title1_2', 'title2_2'), 'titleterms': {'section_titl': [0, 1]}, - 'alltitles': {'section_title': [(0, None), (1, None)]}, + 'alltitles': {'section_title': [(0, ''), (1, '')]}, 'indexentries': {}, } assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1} @@ -332,8 +332,7 @@ def assert_is_sorted(item, path): assert_is_sorted(value, f'{path}.{key}') elif isinstance(item, list): if not is_title_tuple_type(item) and path not in lists_not_to_sort: - # sort nulls last; http://stackoverflow.com/questions/19868767/ - assert item == sorted(item, key=lambda x: (x is None, x)), f'{err_path} is not sorted' + assert item == sorted(item), f'{err_path} is not sorted' for i, child in enumerate(item): assert_is_sorted(child, f'{path}[{i}]') From a978110df843ebcc6d34bedfacbe7fc2cccb64a9 Mon Sep 17 00:00:00 2001 From: James Addison Date: Mon, 4 Mar 2024 13:39:49 +0000 Subject: [PATCH 04/15] tests: add failing test to catch a regression; other top-level section headings may exist --- tests/test_search.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/tests/test_search.py b/tests/test_search.py index c0144e60199..a1159229744 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -66,6 +66,9 @@ def is_registered_term(index, keyword): .. test that comments are not indexed: boson +another_title +=============== + test that non-comments are indexed: fermion ''' @@ -171,7 +174,10 @@ def test_IndexBuilder(): 'index': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'}, 'test': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'}, } - assert index._title_mapping == {'section_titl': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'}} + assert index._title_mapping == { + 'another_titl': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'}, + 'section_titl': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'}, + } assert index._objtypes == {} assert index._objnames == {} @@ -191,8 +197,14 @@ def test_IndexBuilder(): 'non': [0, 1, 2, 3], 'test': [0, 1, 2, 3]}, 'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'), - 'titleterms': {'section_titl': [0, 1, 2, 3]}, - 'alltitles': {'section_title': [(0, ''), (1, ''), (2, ''), (3, '')]}, + 'titleterms': { + 'another_titl': [0, 1, 2, 3], + 'section_titl': [0, 1, 2, 3], + }, + 'alltitles': { + 'another_title': [(0, 'another-title'), (1, 'another-title'), (2, 'another-title'), (3, 'another-title')], + 'section_title': [(0, ''), (1, ''), (2, ''), (3, '')], + }, 'indexentries': {}, } assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1} @@ -233,7 +245,10 @@ def test_IndexBuilder(): 'index': {'docname1_2', 'docname2_2'}, 'test': {'docname1_2', 'docname2_2'}, } - assert index._title_mapping == {'section_titl': {'docname1_2', 'docname2_2'}} + assert index._title_mapping == { + 'another_titl': {'docname1_2', 'docname2_2'}, + 'section_titl': {'docname1_2', 'docname2_2'}, + } assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1} assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'), 1: ('dummy2', 'objtype1', 'objtype1')} @@ -252,8 +267,14 @@ def test_IndexBuilder(): 'non': [0, 1], 'test': [0, 1]}, 'titles': ('title1_2', 'title2_2'), - 'titleterms': {'section_titl': [0, 1]}, - 'alltitles': {'section_title': [(0, ''), (1, '')]}, + 'titleterms': { + 'another_titl': [0, 1], + 'section_titl': [0, 1], + }, + 'alltitles': { + 'another_title': [(0, 'another-title'), (1, 'another-title')], + 'section_title': [(0, ''), (1, '')], + }, 'indexentries': {}, } assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1} From ed47ab9d3714770aee5e3560811541a4c1622c60 Mon Sep 17 00:00:00 2001 From: James Addison Date: Mon, 4 Mar 2024 13:53:03 +0000 Subject: [PATCH 05/15] fixup: only omit the anchor for the first-encountered title --- sphinx/search/__init__.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 9db5b666ea9..dc4ef259910 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -502,12 +502,9 @@ def _visit_nodes(node): elif isinstance(node, nodes.Text): word_store.words.extend(split(node.astext())) elif isinstance(node, nodes.title): - title, titleid = node.astext(), '' - # docutils: top-level section title can be considered the document title - if not isinstance(node.parent.parent, nodes.document): - if node.parent['ids']: - titleid = node.parent['ids'][0] - word_store.titles.append((title, titleid)) + title = node.astext() + ids = node.parent['ids'] + word_store.titles.append((title, ids[0] if ids and word_store.titles else '')) word_store.title_words.extend(split(title)) for child in node.children: _visit_nodes(child) From 26d6e3c052130138885615dd55cb9a43589ff9ee Mon Sep 17 00:00:00 2001 From: James Addison Date: Mon, 4 Mar 2024 13:59:50 +0000 Subject: [PATCH 06/15] Revert "typing: emit empty-string instead of null for document title anchor values" This reverts commit d44dc11cd9f5d046a4ff1ba490493a9ed56bb93f. --- sphinx/search/__init__.py | 2 +- sphinx/themes/basic/static/searchtools.js | 2 +- tests/test_search.py | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index dc4ef259910..81ff0746686 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -504,7 +504,7 @@ def _visit_nodes(node): elif isinstance(node, nodes.title): title = node.astext() ids = node.parent['ids'] - word_store.titles.append((title, ids[0] if ids and word_store.titles else '')) + word_store.titles.append((title, ids[0] if ids and word_store.titles else None)) word_store.title_words.extend(split(title)) for child in node.children: _visit_nodes(child) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 753f5cdcde5..0e134912cee 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -307,7 +307,7 @@ const Search = { results.push([ docNames[file], titles[file] !== title ? `${titles[file]} > ${title}` : title, - id ? "#" + id : "", + id !== null ? "#" + id : "", null, score, filenames[file], diff --git a/tests/test_search.py b/tests/test_search.py index a1159229744..f0c4b1d72a9 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -203,7 +203,7 @@ def test_IndexBuilder(): }, 'alltitles': { 'another_title': [(0, 'another-title'), (1, 'another-title'), (2, 'another-title'), (3, 'another-title')], - 'section_title': [(0, ''), (1, ''), (2, ''), (3, '')], + 'section_title': [(0, None), (1, None), (2, None), (3, None)], }, 'indexentries': {}, } @@ -273,7 +273,7 @@ def test_IndexBuilder(): }, 'alltitles': { 'another_title': [(0, 'another-title'), (1, 'another-title')], - 'section_title': [(0, ''), (1, '')], + 'section_title': [(0, None), (1, None)], }, 'indexentries': {}, } @@ -353,7 +353,8 @@ def assert_is_sorted(item, path): assert_is_sorted(value, f'{path}.{key}') elif isinstance(item, list): if not is_title_tuple_type(item) and path not in lists_not_to_sort: - assert item == sorted(item), f'{err_path} is not sorted' + # sort nulls last; http://stackoverflow.com/questions/19868767/ + assert item == sorted(item, key=lambda x: (x is None, x)), f'{err_path} is not sorted' for i, child in enumerate(item): assert_is_sorted(child, f'{path}[{i}]') From 5c75c81605da037a5aabec8cd7877f74ca87e2e5 Mon Sep 17 00:00:00 2001 From: James Addison Date: Mon, 4 Mar 2024 14:01:29 +0000 Subject: [PATCH 07/15] linting: add ignore-E501 to overlong line --- sphinx/search/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 81ff0746686..54a928f63b5 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -504,7 +504,7 @@ def _visit_nodes(node): elif isinstance(node, nodes.title): title = node.astext() ids = node.parent['ids'] - word_store.titles.append((title, ids[0] if ids and word_store.titles else None)) + word_store.titles.append((title, ids[0] if ids and word_store.titles else None)) # NoQA: E501 word_store.title_words.extend(split(title)) for child in node.children: _visit_nodes(child) From b8177c528bf6ef378b8d29c5d3a71d9b49580478 Mon Sep 17 00:00:00 2001 From: James Addison Date: Fri, 15 Mar 2024 14:00:02 +0000 Subject: [PATCH 08/15] Add CHANGES.rst entry --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index d49fdd9612e..ad214fc955e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -98,6 +98,9 @@ Bugs fixed Patch by Bénédikt Tran. * #11894: Do not add checksums to css files if building using the htmlhelp builder. Patch by mkay. +* #11961: Omit anchor references from document title entries in the search index, + removing duplication of search results. + Patch by James Addison. Testing ------- From 9fef43ba3ed3c5cb09a264994b55dc1d124e7810 Mon Sep 17 00:00:00 2001 From: James Addison Date: Thu, 13 Jun 2024 12:13:43 +0100 Subject: [PATCH 09/15] [tests] search: regenerate test fixtures using 'utils/generate_js_fixtures.py' script. --- tests/js/fixtures/multiterm/searchindex.js | 2 +- tests/js/fixtures/partial/searchindex.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/js/fixtures/multiterm/searchindex.js b/tests/js/fixtures/multiterm/searchindex.js index b791df93d11..096b97eb7a3 100644 --- a/tests/js/fixtures/multiterm/searchindex.js +++ b/tests/js/fixtures/multiterm/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"Main Page": [[0, "main-page"]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"At": 0, "adjac": 0, "all": 0, "an": 0, "appear": 0, "applic": 0, "ar": 0, "built": 0, "can": 0, "check": 0, "contain": 0, "do": 0, "document": 0, "doesn": 0, "each": 0, "fixtur": 0, "format": 0, "function": 0, "futur": 0, "html": 0, "i": 0, "includ": 0, "match": 0, "messag": 0, "multipl": 0, "multiterm": 0, "order": 0, "other": 0, "output": 0, "perform": 0, "perhap": 0, "phrase": 0, "project": 0, "queri": 0, "requir": 0, "same": 0, "search": 0, "successfulli": 0, "support": 0, "t": 0, "term": 0, "test": 0, "thi": 0, "time": 0, "us": 0, "when": 0, "write": 0}, "titles": ["Main Page"], "titleterms": {"main": 0, "page": 0}}) \ No newline at end of file +Search.setIndex({"alltitles": {"Main Page": [[0, null]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"At": 0, "adjac": 0, "all": 0, "an": 0, "appear": 0, "applic": 0, "ar": 0, "built": 0, "can": 0, "check": 0, "contain": 0, "do": 0, "document": 0, "doesn": 0, "each": 0, "fixtur": 0, "format": 0, "function": 0, "futur": 0, "html": 0, "i": 0, "includ": 0, "match": 0, "messag": 0, "multipl": 0, "multiterm": 0, "order": 0, "other": 0, "output": 0, "perform": 0, "perhap": 0, "phrase": 0, "project": 0, "queri": 0, "requir": 0, "same": 0, "search": 0, "successfulli": 0, "support": 0, "t": 0, "term": 0, "test": 0, "thi": 0, "time": 0, "us": 0, "when": 0, "write": 0}, "titles": ["Main Page"], "titleterms": {"main": 0, "page": 0}}) \ No newline at end of file diff --git a/tests/js/fixtures/partial/searchindex.js b/tests/js/fixtures/partial/searchindex.js index 6ccfbd6d07e..6d9206e0988 100644 --- a/tests/js/fixtures/partial/searchindex.js +++ b/tests/js/fixtures/partial/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"sphinx_utils module": [[0, "sphinx-utils-module"]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"also": 0, "ar": 0, "built": 0, "confirm": 0, "document": 0, "function": 0, "html": 0, "i": 0, "includ": 0, "input": 0, "javascript": 0, "known": 0, "match": 0, "partial": 0, "possibl": 0, "prefix": 0, "project": 0, "provid": 0, "restructuredtext": 0, "sampl": 0, "search": 0, "should": 0, "thi": 0, "titl": 0, "us": 0, "when": 0}, "titles": ["sphinx_utils module"], "titleterms": {"modul": 0, "sphinx_util": 0}}) \ No newline at end of file +Search.setIndex({"alltitles": {"sphinx_utils module": [[0, null]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"also": 0, "ar": 0, "built": 0, "confirm": 0, "document": 0, "function": 0, "html": 0, "i": 0, "includ": 0, "input": 0, "javascript": 0, "known": 0, "match": 0, "partial": 0, "possibl": 0, "prefix": 0, "project": 0, "provid": 0, "restructuredtext": 0, "sampl": 0, "search": 0, "should": 0, "thi": 0, "titl": 0, "us": 0, "when": 0}, "titles": ["sphinx_utils module"], "titleterms": {"modul": 0, "sphinx_util": 0}}) \ No newline at end of file From ad3488e6facc389569cd6ab11712ec2ec4b72a66 Mon Sep 17 00:00:00 2001 From: James Addison Date: Thu, 13 Jun 2024 12:22:01 +0100 Subject: [PATCH 10/15] [docs] Move changelog entry to correct release, and remove preceding empty newline. --- CHANGES.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 924c090f250..ad76ff3fc01 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -45,6 +45,9 @@ Bugs fixed * #12380: LaTeX: Footnote mark sometimes indicates ``Page N`` where ``N`` is the current page number and the footnote does appear on that same page. Patch by Jean-François B. +* #11961: Omit anchor references from document title entries in the search index, + removing duplication of search results. + Patch by James Addison. Testing ------- @@ -307,10 +310,6 @@ Bugs fixed download cache. Patch by James Addison and Adam Turner. -* #11961: Omit anchor references from document title entries in the search index, - removing duplication of search results. - Patch by James Addison. - Testing ------- From dcdd743a9daa3c12bcc1a17c3660a4ab357f3af4 Mon Sep 17 00:00:00 2001 From: James Addison Date: Fri, 14 Jun 2024 19:45:16 +0100 Subject: [PATCH 11/15] [search] refactor title link ID logic to avoid E501 lint error. --- sphinx/search/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 00022167633..3127c434d5b 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -502,9 +502,10 @@ def _visit_nodes(node): elif isinstance(node, nodes.Text): word_store.words.extend(split(node.astext())) elif isinstance(node, nodes.title): - title = node.astext() + title, is_main_title = node.astext(), len(word_store.titles) == 0 ids = node.parent['ids'] - word_store.titles.append((title, ids[0] if ids and word_store.titles else None)) # NoQA: E501 + title_node_id = None if is_main_title else ids[0] if ids else None + word_store.titles.append((title, title_node_id)) word_store.title_words.extend(split(title)) for child in node.children: _visit_nodes(child) From 30d626ecb20da87476ca203d27a0665c16fdfe01 Mon Sep 17 00:00:00 2001 From: James Addison Date: Fri, 14 Jun 2024 19:50:50 +0100 Subject: [PATCH 12/15] [search] lint fixup: allow node-id reference in WordStore.titles to be empty (optional). --- sphinx/environment/__init__.py | 4 ++-- sphinx/search/__init__.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sphinx/environment/__init__.py b/sphinx/environment/__init__.py index 34db01a0f52..c1a4724149d 100644 --- a/sphinx/environment/__init__.py +++ b/sphinx/environment/__init__.py @@ -243,7 +243,7 @@ def __init__(self, app: Sphinx) -> None: # search index data # docname -> title - self._search_index_titles: dict[str, str] = {} + self._search_index_titles: dict[str, str | None] = {} # docname -> filename self._search_index_filenames: dict[str, str] = {} # stemmed words -> set(docname) @@ -251,7 +251,7 @@ def __init__(self, app: Sphinx) -> None: # stemmed words in titles -> set(docname) self._search_index_title_mapping: dict[str, set[str]] = {} # docname -> all titles in document - self._search_index_all_titles: dict[str, list[tuple[str, str]]] = {} + self._search_index_all_titles: dict[str, list[tuple[str, str | None]]] = {} # docname -> list(index entry) self._search_index_index_entries: dict[str, list[tuple[str, str, str]]] = {} # objtype -> index diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 3127c434d5b..bd1c701962e 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -198,7 +198,7 @@ def _is_meta_keywords( @dataclasses.dataclass class WordStore: words: list[str] = dataclasses.field(default_factory=list) - titles: list[tuple[str, str]] = dataclasses.field(default_factory=list) + titles: list[tuple[str, str | None]] = dataclasses.field(default_factory=list) title_words: list[str] = dataclasses.field(default_factory=list) @@ -253,7 +253,7 @@ class IndexBuilder: def __init__(self, env: BuildEnvironment, lang: str, options: dict[str, str], scoring: str) -> None: self.env = env # docname -> title - self._titles: dict[str, str] = env._search_index_titles + self._titles: dict[str, str | None] = env._search_index_titles # docname -> filename self._filenames: dict[str, str] = env._search_index_filenames # stemmed words -> set(docname) @@ -261,7 +261,7 @@ def __init__(self, env: BuildEnvironment, lang: str, options: dict[str, str], sc # stemmed words in titles -> set(docname) self._title_mapping: dict[str, set[str]] = env._search_index_title_mapping # docname -> all titles in document - self._all_titles: dict[str, list[tuple[str, str]]] = env._search_index_all_titles + self._all_titles: dict[str, list[tuple[str, str | None]]] = env._search_index_all_titles # docname -> list(index entry) self._index_entries: dict[str, list[tuple[str, str, str]]] = env._search_index_index_entries # objtype -> index @@ -391,7 +391,7 @@ def freeze(self) -> dict[str, Any]: objtypes = {v: k[0] + ':' + k[1] for (k, v) in self._objtypes.items()} objnames = self._objnames - alltitles: dict[str, list[tuple[int, str]]] = {} + alltitles: dict[str, list[tuple[int, str | None]]] = {} for docname, titlelist in sorted(self._all_titles.items()): for title, titleid in titlelist: alltitles.setdefault(title, []).append((fn2index[docname], titleid)) From c7245d20d5b6d5a280ab450b7b4d9e95464c6128 Mon Sep 17 00:00:00 2001 From: James Addison <55152140+jayaddison@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:43:39 +0100 Subject: [PATCH 13/15] Fixup: remove excess heading delineation characters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- tests/test_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_search.py b/tests/test_search.py index af86a5e7e82..bf2d3a1e205 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -72,7 +72,7 @@ def is_registered_term(index, keyword): .. test that comments are not indexed: boson another_title -=============== +============= test that non-comments are indexed: fermion ''' From 520a205420790c4cbf83e23afa8d796a789bc93b Mon Sep 17 00:00:00 2001 From: James Addison Date: Mon, 24 Jun 2024 19:19:24 +0100 Subject: [PATCH 14/15] [tests] JavaScript: remove workaround test data from mainline that is no longer required with the fix from this branch in place. --- tests/js/searchtools.js | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/js/searchtools.js b/tests/js/searchtools.js index 5e97572fb3e..d020e40d904 100644 --- a/tests/js/searchtools.js +++ b/tests/js/searchtools.js @@ -70,21 +70,12 @@ describe('Basic html theme search', function() { searchParameters = Search._parseQuery('main page'); - // fixme: duplicate result due to https://github.com/sphinx-doc/sphinx/issues/11961 hits = [ [ 'index', 'Main Page', '', null, - 15, - 'index.rst' - ], - [ - 'index', - 'Main Page', - '#main-page', - null, 100, 'index.rst' ] From b349764dc04f758638a436dab553a67175e4c4ff Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 6 Jul 2024 20:07:04 +0100 Subject: [PATCH 15/15] Code review feedback: add explanatory comments regarding runtime vs serialized sort-order for title and document terms. --- sphinx/search/__init__.py | 7 +++++++ tests/test_search.py | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index bd1c701962e..ec194ef6e96 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -369,6 +369,13 @@ def get_objects(self, fn2index: dict[str, int] return rv def get_terms(self, fn2index: dict[str, int]) -> tuple[dict[str, list[int] | int], dict[str, list[int] | int]]: + """ + Return a mapping of document and title terms to their corresponding sorted document IDs. + + When a term is only found within a single document, then the value for that term will be + an integer value. When a term is found within multiple documents, the value will be a list + of integers. + """ rvs: tuple[dict[str, list[int] | int], dict[str, list[int] | int]] = ({}, {}) for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)): for k, v in mapping.items(): diff --git a/tests/test_search.py b/tests/test_search.py index bf2d3a1e205..a2b01c17b6b 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -171,6 +171,10 @@ def test_IndexBuilder(): 'docname2_1': 'title2_1', 'docname2_2': 'title2_2'} assert index._filenames == {'docname1_1': 'filename1_1', 'docname1_2': 'filename1_2', 'docname2_1': 'filename2_1', 'docname2_2': 'filename2_2'} + # note: element iteration order (sort order) is important when the index + # is frozen (serialized) during build -- however, the _mapping-related + # dictionaries below may be iterated in arbitrary order by Python at + # runtime. assert index._mapping == { 'ar': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'}, 'fermion': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},