Adds support for convert, re adds READMEs

mlverse · edgararuiz · Oct 14, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 12, 2024
commit d3e4d77d0162ecbf7efbef60a90ac732b49b5257
diff --git a/python/README.md b/python/README.md
@@ -0,0 +1,100 @@
+# mall
+
+## Intro
+
+Run multiple LLM predictions against a data frame. The predictions are
+processed row-wise over a specified column. It works using a
+pre-determined one-shot prompt, along with the current row’s content.
+
+## Install
+
+To install from Github, use:
+
+``` python
+pip install "mall @ git+https://[email protected]/edgararuiz/mall.git@python#subdirectory=python"
+```
+
+## Examples
+
+``` python
+import mall 
+import polars as pl
+
+reviews = pl.DataFrame(
+    data=[
+        "This has been the best TV I've ever used. Great screen, and sound.", 
+        "I regret buying this laptop. It is too slow and the keyboard is too noisy",
+        "Not sure how to feel about my new washing machine. Great color, but hard to figure"
+        ],
+    schema=[("review", pl.String)],
+)
+```
+
+## Sentiment
+
+
+``` python
+reviews.llm.sentiment("review")
+```
+
+<small>shape: (3, 2)</small>
+
+| review                           | sentiment  |
+|----------------------------------|------------|
+| str                              | str        |
+| "This has been the best TV I've… | "positive" |
+| "I regret buying this laptop. I… | "negative" |
+| "Not sure how to feel about my … | "neutral"  |
+
+## Summarize
+
+``` python
+reviews.llm.summarize("review", 5)
+```
+
+<small>shape: (3, 2)</small>
+
+| review                           | summary                          |
+|----------------------------------|----------------------------------|
+| str                              | str                              |
+| "This has been the best TV I've… | "it's a great tv"                |
+| "I regret buying this laptop. I… | "laptop not worth the money"     |
+| "Not sure how to feel about my … | "feeling uncertain about new pu… |
+
+## Translate (as in ‘English to French’)
+
+``` python
+reviews.llm.translate("review", "spanish")
+```
+
+<small>shape: (3, 2)</small>
+
+| review                           | translation                      |
+|----------------------------------|----------------------------------|
+| str                              | str                              |
+| "This has been the best TV I've… | "Esta ha sido la mejor TV que h… |
+| "I regret buying this laptop. I… | "Lo lamento comprar este portát… |
+| "Not sure how to feel about my … | "No estoy seguro de cómo sentir… |
+
+## Classify
+
+``` python
+reviews.llm.classify("review", ["computer", "appliance"])
+```
+
+<small>shape: (3, 2)</small>
+
+| review                           | classify    |
+|----------------------------------|-------------|
+| str                              | str         |
+| "This has been the best TV I've… | "appliance" |
+| "I regret buying this laptop. I… | "appliance" |
+| "Not sure how to feel about my … | "appliance" |
+
+## LLM session setup
+
+``` python
+reviews.llm.use(options = dict(seed = 100))
+```
+
+    {'backend': 'ollama', 'model': 'llama3.2', 'options': {'seed': 100}}
diff --git a/python/README.qmd b/python/README.qmd
@@ -0,0 +1,71 @@
+---
+format: gfm
+---
+
+# mall
+
+## Intro
+
+Run multiple LLM predictions against a data frame. The predictions are processed row-wise over a specified column. It works using a pre-determined one-shot prompt, along with the current row’s content.
+
+## Install
+
+To install from Github, use:
+
+```python
+pip install "mall @ git+https://[email protected]/edgararuiz/mall.git@python#subdirectory=python"
+```
+
+## Examples
+
+```{python}
+#| include: false
+import polars as pl
+from polars.dataframe._html import HTMLFormatter
+html_formatter = get_ipython().display_formatter.formatters['text/html']
+html_formatter.for_type(pl.DataFrame, lambda df: "\n".join(HTMLFormatter(df).render()))
+```
+
+
+```{python}
+import mall 
+import polars as pl
+data = mall.MallData
+reviews = data.reviews
+```
+
+```{python}
+#| include: false
+reviews.llm.use(options = dict(seed = 100))
+```
+
+
+## Sentiment
+
+```{python}
+reviews.llm.sentiment("review")
+```
+
+## Summarize
+
+```{python}
+reviews.llm.summarize("review", 5)
+```
+
+## Translate (as in 'English to French')
+
+```{python}
+reviews.llm.translate("review", "spanish")
+```
+
+## Classify
+
+```{python}
+reviews.llm.classify("review", ["computer", "appliance"])
+```
+
+## LLM session setup
+
+```{python}
+reviews.llm.use(options = dict(seed = 100))
+```
diff --git a/python/mall/llm.py b/python/mall/llm.py
@@ -11,7 +11,7 @@ def build_msg(x, msg):
     return out
 
 
-def llm_call(x, msg, use, preview=False, valid_resps=""):
+def llm_call(x, msg, use, preview=False, valid_resps="", convert=None):
 
     call = dict(
         model=use.get("model"),
@@ -41,6 +41,11 @@ def llm_call(x, msg, use, preview=False, valid_resps=""):
         if cache == "":
             cache_record(hash_call, use, call, out)
 
+    if isinstance(convert, dict):
+        for label in convert:
+            if out == label:
+                out = convert.get(label)
+
     if isinstance(valid_resps, list):
         if out not in valid_resps:
             out = None

diff --git a/python/mall/polars.py b/python/mall/polars.py
@@ -1,5 +1,5 @@
 import polars as pl
-from mall.prompt import sentiment, summarize, translate, classify, extract, custom
+from mall.prompt import sentiment, summarize, translate, classify, extract, custom, verify
 from mall.llm import llm_call
 
 
@@ -8,8 +8,8 @@ class MallFrame:
     """Extension to Polars that add ability to use
     an LLM to run batch predictions over a data frame
 
-    We will start by loading the needed libraries, and 
-    set up the data frame that will be used in the 
+    We will start by loading the needed libraries, and
+    set up the data frame that will be used in the
     examples:
 
     ```{python}
@@ -423,7 +423,7 @@ def custom(
         )
         return df
 
-def verify(
+    def verify(
         self,
         col,
         what="",
@@ -439,7 +439,7 @@ def verify(
             The name of the text field to process
 
         what : str
-            The statement or question that needs to be verified against the 
+            The statement or question that needs to be verified against the
             provided text
 
         yes_no : list
@@ -469,18 +469,27 @@ def verify(
         df = map_call(
             df=self._df,
             col=col,
-            msg=verify(what, yes_no, additional=additional),
+            msg=verify(what, additional=additional),
             pred_name=pred_name,
             use=self._use,
             valid_resps=yes_no,
+            convert=dict(yes = yes_no[0], no = yes_no[1]),
         )
         return df
 
-def map_call(df, col, msg, pred_name, use, valid_resps=""):
+
+def map_call(df, col, msg, pred_name, use, valid_resps="", convert=None):
     df = df.with_columns(
         pl.col(col)
         .map_elements(
-            lambda x: llm_call(x, msg, use, False, valid_resps),
+            lambda x: llm_call(
+                x=x,
+                msg=msg,
+                use=use,
+                preview=False,
+                valid_resps=valid_resps,
+                convert=convert,
+            ),
             return_dtype=pl.String,
         )
         .alias(pred_name)