Skip to content

Commit

Permalink
feat: correct bad json by cheap llm
Browse files Browse the repository at this point in the history
  • Loading branch information
doomspec committed Aug 29, 2024
1 parent 41ebb55 commit 0f69cec
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
27 changes: 27 additions & 0 deletions mllm/utils/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
# Parse
"""

parse_options = {
"cheap_model": "gpt-4o-mini",
"correct_json_by_model": False,
}


class Parse:

@staticmethod
Expand Down Expand Up @@ -44,6 +50,13 @@ def dict(src: str):
except:
pass

if parse_options["correct_json_by_model"]:
try:
res = parse_json_by_cheap_model(json_src)
return res
except:
pass

raise ValueError(f"Invalid json: {src}")

@staticmethod
Expand Down Expand Up @@ -94,3 +107,17 @@ def colon(src: str):
raise ValueError("Invalid colon string")
contents = split[1].strip()
return contents


def parse_json_by_cheap_model(json_src):
from mllm import Chat
prompt = f"""
You are required to correct a JSON dict with semantic errors.
<raw_json>
{json_src}
</raw_json>
You should directly output the corrected JSON dict with a minimal modification.
"""
chat = Chat(prompt)
res = chat.complete(parse="dict", model=parse_options["cheap_model"])
return res
18 changes: 17 additions & 1 deletion test/test_parse.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from mllm import Chat
from mllm.utils.parser import Parse
from mllm.utils.parser import Parse, parse_json_by_cheap_model


def test_parse_quotes():
src = """
Expand Down Expand Up @@ -35,3 +36,18 @@ def test_dict_gen():
chat += prompt
res = chat.complete(parse="dict", cache=False)
assert res == {"a": 1, "b": 2}

def test_model_correct():
raw_json = """
{
no_quote : "
string
with
line
change
"
}
"""
res = parse_json_by_cheap_model(raw_json)
assert res == {'no_quote': '\nstring \nwith \nline \nchange\n'}

0 comments on commit 0f69cec

Please sign in to comment.