Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
cyyeh committed Nov 29, 2024
1 parent 040f9d4 commit f894589
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions wren-ai-service/src/pipelines/retrieval/preprocess_sql_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,26 @@ def preprocess(
encoding: tiktoken.Encoding,
) -> Dict:
_token_count = len(encoding.encode(str(sql_data)))
num_rows_used_in_llm = len(sql_data.get("data", []))

if _token_count > 100_000:
sql_data = {
"columns": sql_data.get("columns", []),
"data": sql_data.get("data", [])[:250],
"dtypes": sql_data.get("dtypes", {}),
}

num_rows_used_in_llm = len(sql_data.get("data", []))

return {
"sql_data": sql_data,
"num_rows_used_in_llm": 500,
"num_rows_used_in_llm": num_rows_used_in_llm,
"tokens": _token_count,
}

return {
"sql_data": sql_data,
"num_rows_used_in_llm": 500,
"num_rows_used_in_llm": num_rows_used_in_llm,
"tokens": _token_count,
}

Expand Down

0 comments on commit f894589

Please sign in to comment.