From 4011c8f68c627e8ed339d8dd29f97b0bce571b1c Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Sun, 26 Jan 2025 12:38:32 +0800 Subject: [PATCH] Fix potential error. (#4650) ### What problem does this PR solve? #4622 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/nlp/search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 5d67d9da7b3..abb69401cfb 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -465,7 +465,7 @@ def tag_content(self, tenant_id: str, kb_ids: list[str], doc, all_tags, topn_tag if not aggs: return False cnt = np.sum([c for _, c in aggs]) - tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / (all_tags.get(a, 0.0001)))) for a, c in aggs], + tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs], key=lambda x: x[1] * -1)[:topn_tags] doc[TAG_FLD] = {a: c for a, c in tag_fea if c > 0} return True @@ -481,6 +481,6 @@ def tag_query(self, question: str, tenant_ids: str | list[str], kb_ids: list[str if not aggs: return {} cnt = np.sum([c for _, c in aggs]) - tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / (all_tags.get(a, 0.0001)))) for a, c in aggs], + tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs], key=lambda x: x[1] * -1)[:topn_tags] return {a: max(1, c) for a, c in tag_fea}