Skip to content

Commit

Permalink
Add titleVector field to
Browse files Browse the repository at this point in the history
AzureCognitiveSearchWrapper class
  • Loading branch information
piizei committed Nov 17, 2023
1 parent fbc347c commit f5dbe09
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
18 changes: 14 additions & 4 deletions confluence_vector_sync/azure_cognitive_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def create_item(self, item):
"document_id": item["id"],
"space": item["space"]["key"],
"title": item["title"],
"titleVector": self.embedder.embed_query(item["title"]),
"chunk": chunk_text,
"chunkVector": self.embedder.embed_query(chunk_text),
"last_modified_date": last_modified_date,
Expand Down Expand Up @@ -142,22 +143,31 @@ def create_or_update_index(self):
{"name": "space", "type": "Edm.String", "searchable": "true", "retrievable": "true",
"filterable": "true"},
{"name": "title", "type": "Edm.String", "searchable": "true", "retrievable": "true"},
{"name": "titleVector", "type": "Collection(Edm.Single)", "searchable": "true", "retrievable": "true",
"dimensions": 1536, "vectorSearchProfile": "default-vector-profile"},
{"name": "chunk", "type": "Edm.String", "searchable": "true", "retrievable": "true"},
{"name": "chunkVector", "type": "Collection(Edm.Single)", "searchable": "true", "retrievable": "true",
"dimensions": 1536, "vectorSearchConfiguration": "vectorConfig"},
"dimensions": 1536, "vectorSearchProfile": "default-vector-profile"},
{"name": "last_modified_date", "type": "Edm.DateTimeOffset", "searchable": "false",
"retrievable": "true", "filterable": "true"},
{"name": "last_indexed_date", "type": "Edm.DateTimeOffset", "searchable": "false",
"retrievable": "true", "filterable": "true"},
{"name": "url", "type": "Edm.String", "searchable": "false", "retrievable": "true"}
],
"vectorSearch": {
"algorithmConfigurations": [
"algorithms": [
{
"name": "vectorConfig",
"name": "hnsw-config-1",
"kind": "hnsw"
}
]},
],
"profiles": [
{
"name": "default-vector-profile",
"algorithm": "hnsw-config-1",
}
]
},
"semantic": {
"configurations": [
{
Expand Down
2 changes: 1 addition & 1 deletion confluence_vector_sync/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def get_config():
"azure_search_key": os.getenv("AZURE_SEARCH_KEY"),
"azure_search_full_reindex": os.getenv("AZURE_SEARCH_FULL_REINDEX", "false").lower() == "true",
"azure_search_embedding_model": os.getenv("AZURE_SEARCH_EMBEDDING_MODEL", "text-embedding-ada-002"),
"azure_search_api_version": os.getenv("AZURE_SEARCH_API_VERSION", "2023-07-01-Preview"),
"azure_search_api_version": os.getenv("AZURE_SEARCH_API_VERSION", "2023-11-01"),
"azure_search_confluence_index": os.getenv("AZURE_SEARCH_CONFLUENCE_INDEX", "confluence"),
"confluence_url": os.getenv("CONFLUENCE_URL"),
"confluence_user_name": os.getenv("CONFLUENCE_USER_NAME"),
Expand Down
2 changes: 1 addition & 1 deletion confluence_vector_sync/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def sync():
page["status"] in {"archived", "trashed", "deleted"}]
# Create model of documents in search-index for all included confluence spaces
search = search_from_config(config)
search.chunker = confluence
search.create_or_update_index()
search.chunker = confluence
search.index(changeset={"upsert": current, "remove": archived})
logging.info("Indexing complete")
logging.debug(search.diagnostics)
Expand Down

0 comments on commit f5dbe09

Please sign in to comment.