From fad0414c17d4bc32ebc98f50996f9b5cf71e558a Mon Sep 17 00:00:00 2001
From: Morgante Pell <morgantep@google.com>
Date: Tue, 16 Jul 2024 03:31:01 -0700
Subject: [PATCH 1/4] fix: try using replacement approach

---
 crates/core/src/test.rs       | 130 ++++++++++++++++++++++++++++++++++
 crates/language/src/python.rs |   2 +-
 2 files changed, 131 insertions(+), 1 deletion(-)

diff --git a/crates/core/src/test.rs b/crates/core/src/test.rs
index a76b46d2b..81bb91e8c 100644
--- a/crates/core/src/test.rs
+++ b/crates/core/src/test.rs
@@ -12014,6 +12014,136 @@ fn trailing_comma_import_from_python_with_alias() {
     .unwrap();
 }
 
+// refer to https://github.com/getgrit/gritql/issues/416
+#[test]
+fn trailing_comma_after_argument_removal() {
+    run_test_expected({
+        TestArgExpected {
+            pattern: r#"
+                language python
+                `TaskMetadata($args)` where {
+                        $args <: any {
+                            contains `n_samples=$_` as $ns_kwarg where {
+                                $ns_kwarg <: `n_samples = $ns_val` => .
+                            },
+                            contains `avg_character_length=$_` as $avg_kwarg where {
+                                $avg_kwarg <: `avg_character_length = $avg_val` => `stats=GeneralDescriptiveStats(n_samples=$ns_val, avg_character_length=$avg_val)`
+                            },
+                        },
+                    }
+                "#.to_owned(),
+            source: r#"
+                from pydantic import BaseModel
+
+
+                class TaskMetadata(BaseModel):
+                    n_samples: dict[str, int]
+                    avg_character_length: dict[str, float]
+
+
+                if __name__ == "__main__":
+                    TaskMetadata(
+                        name="TbilisiCityHallBitextMining",
+                        dataset={
+                            "path": "jupyterjazz/tbilisi-city-hall-titles",
+                            "revision": "798bb599140565cca2dab8473035fa167e5ee602",
+                        },
+                        description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).",
+                        type="BitextMining",
+                        category="s2s",
+                        eval_splits=[_EVAL_SPLIT],
+                        eval_langs=_EVAL_LANGS,
+                        main_score="f1",
+                        domains=["News"],
+                        text_creation="created",
+                        n_samples={_EVAL_SPLIT: 1820},
+                        reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles",
+                        date=("2024-05-02", "2024-05-03"),
+                        form=["written"],
+                        task_subtypes=[],
+                        license="Not specified",
+                        socioeconomic_status="mixed",
+                        annotations_creators="derived",
+                        dialect=[],
+                        bibtex_citation="",
+                        avg_character_length={_EVAL_SPLIT: 78},
+                    )
+            "#
+                .to_owned(),
+            expected: r#"
+                from pydantic import BaseModel
+
+
+                class TaskMetadata(BaseModel):
+                    n_samples: dict[str, int]
+                    avg_character_length: dict[str, float]
+
+
+                if __name__ == "__main__":
+                    TaskMetadata(
+                        name="TbilisiCityHallBitextMining",
+                        dataset={
+                            "path": "jupyterjazz/tbilisi-city-hall-titles",
+                            "revision": "798bb599140565cca2dab8473035fa167e5ee602",
+                        },
+                        description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).",
+                        type="BitextMining",
+                        category="s2s",
+                        eval_splits=[_EVAL_SPLIT],
+                        eval_langs=_EVAL_LANGS,
+                        main_score="f1",
+                        domains=["News"],
+                        text_creation="created",
+
+                        reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles",
+                        date=("2024-05-02", "2024-05-03"),
+                        form=["written"],
+                        task_subtypes=[],
+                        license="Not specified",
+                        socioeconomic_status="mixed",
+                        annotations_creators="derived",
+                        dialect=[],
+                        bibtex_citation="",
+                        stats=GeneralDescriptiveStats(n_samples={_EVAL_SPLIT: 1820}, avg_character_length={_EVAL_SPLIT: 78}),
+                    )
+            "#
+                .to_owned(),
+        }
+    })
+    .unwrap();
+}
+
+/// Same as above test, but ensures the behavior doesn't depend on line breaks
+#[test]
+fn trailing_comma_after_argument_removal_one_line() {
+    run_test_expected({
+        TestArgExpected {
+            pattern: r#"
+                language python
+                `TaskMetadata($args)` where {
+                        $args <: any {
+                            contains `n_samples=$_` as $ns_kwarg where {
+                                $ns_kwarg <: `n_samples = $ns_val` => .
+                            },
+                            contains `avg_character_length=$_` as $avg_kwarg where {
+                                $avg_kwarg <: `avg_character_length = $avg_val` => `stats=GeneralDescriptiveStats(n_samples=$ns_val, avg_character_length=$avg_val)`
+                            },
+                        },
+                    }
+                "#.to_owned(),
+            source: r#"
+                TaskMetadata(description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).", main_score="f1", domains=["News"], text_creation="created", n_samples={_EVAL_SPLIT: 1820}, reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles")
+            "#
+                .to_owned(),
+            expected: r#"
+                TaskMetadata(description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).", main_score="f1", domains=["News"], text_creation="created",   reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles")
+            "#
+                .to_owned(),
+        }
+    })
+    .unwrap();
+}
+
 #[test]
 fn python_orphaned_from_imports() {
     run_test_expected({
diff --git a/crates/language/src/python.rs b/crates/language/src/python.rs
index ff710feb4..9bab7cad8 100644
--- a/crates/language/src/python.rs
+++ b/crates/language/src/python.rs
@@ -79,7 +79,7 @@ impl Language for Python {
 
     fn check_replacements(&self, n: NodeWithSource<'_>, replacements: &mut Vec<Replacement>) {
         if n.node.is_error() {
-            if n.text().is_ok_and(|t| t == "->") {
+            if n.text().is_ok_and(|t| t == "->") || n.text().is_ok_and(|t| t == ",") {
                 replacements.push(Replacement::new(n.range(), ""));
             }
             return;

From ca46057bc146a7f0914fe7657d2428482c940272 Mon Sep 17 00:00:00 2001
From: Morgante Pell <morgantep@google.com>
Date: Tue, 16 Jul 2024 03:35:47 -0700
Subject: [PATCH 2/4] simpify test

---
 crates/core/src/test.rs | 102 +++++++---------------------------------
 1 file changed, 16 insertions(+), 86 deletions(-)

diff --git a/crates/core/src/test.rs b/crates/core/src/test.rs
index 81bb91e8c..a85690c51 100644
--- a/crates/core/src/test.rs
+++ b/crates/core/src/test.rs
@@ -12022,90 +12022,27 @@ fn trailing_comma_after_argument_removal() {
             pattern: r#"
                 language python
                 `TaskMetadata($args)` where {
-                        $args <: any {
-                            contains `n_samples=$_` as $ns_kwarg where {
-                                $ns_kwarg <: `n_samples = $ns_val` => .
-                            },
-                            contains `avg_character_length=$_` as $avg_kwarg where {
-                                $avg_kwarg <: `avg_character_length = $avg_val` => `stats=GeneralDescriptiveStats(n_samples=$ns_val, avg_character_length=$avg_val)`
-                            },
-                        },
+                        $args <: contains `n_samples=$_` => .
                     }
                 "#.to_owned(),
             source: r#"
-                from pydantic import BaseModel
-
-
-                class TaskMetadata(BaseModel):
-                    n_samples: dict[str, int]
-                    avg_character_length: dict[str, float]
-
-
-                if __name__ == "__main__":
-                    TaskMetadata(
-                        name="TbilisiCityHallBitextMining",
-                        dataset={
-                            "path": "jupyterjazz/tbilisi-city-hall-titles",
-                            "revision": "798bb599140565cca2dab8473035fa167e5ee602",
-                        },
-                        description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).",
-                        type="BitextMining",
-                        category="s2s",
-                        eval_splits=[_EVAL_SPLIT],
-                        eval_langs=_EVAL_LANGS,
-                        main_score="f1",
-                        domains=["News"],
-                        text_creation="created",
-                        n_samples={_EVAL_SPLIT: 1820},
-                        reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles",
-                        date=("2024-05-02", "2024-05-03"),
-                        form=["written"],
-                        task_subtypes=[],
-                        license="Not specified",
-                        socioeconomic_status="mixed",
-                        annotations_creators="derived",
-                        dialect=[],
-                        bibtex_citation="",
-                        avg_character_length={_EVAL_SPLIT: 78},
-                    )
+                TaskMetadata(
+                    description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).",
+                    main_score="f1", domains=["News"],
+                    text_creation="created",
+                    n_samples={_EVAL_SPLIT: 1820},
+                    reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles"
+                )
             "#
                 .to_owned(),
             expected: r#"
-                from pydantic import BaseModel
-
-
-                class TaskMetadata(BaseModel):
-                    n_samples: dict[str, int]
-                    avg_character_length: dict[str, float]
-
-
-                if __name__ == "__main__":
-                    TaskMetadata(
-                        name="TbilisiCityHallBitextMining",
-                        dataset={
-                            "path": "jupyterjazz/tbilisi-city-hall-titles",
-                            "revision": "798bb599140565cca2dab8473035fa167e5ee602",
-                        },
-                        description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).",
-                        type="BitextMining",
-                        category="s2s",
-                        eval_splits=[_EVAL_SPLIT],
-                        eval_langs=_EVAL_LANGS,
-                        main_score="f1",
-                        domains=["News"],
-                        text_creation="created",
-
-                        reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles",
-                        date=("2024-05-02", "2024-05-03"),
-                        form=["written"],
-                        task_subtypes=[],
-                        license="Not specified",
-                        socioeconomic_status="mixed",
-                        annotations_creators="derived",
-                        dialect=[],
-                        bibtex_citation="",
-                        stats=GeneralDescriptiveStats(n_samples={_EVAL_SPLIT: 1820}, avg_character_length={_EVAL_SPLIT: 78}),
-                    )
+                TaskMetadata(
+                    description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).",
+                    main_score="f1", domains=["News"],
+                    text_creation="created",
+
+                    reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles"
+                )
             "#
                 .to_owned(),
         }
@@ -12121,14 +12058,7 @@ fn trailing_comma_after_argument_removal_one_line() {
             pattern: r#"
                 language python
                 `TaskMetadata($args)` where {
-                        $args <: any {
-                            contains `n_samples=$_` as $ns_kwarg where {
-                                $ns_kwarg <: `n_samples = $ns_val` => .
-                            },
-                            contains `avg_character_length=$_` as $avg_kwarg where {
-                                $avg_kwarg <: `avg_character_length = $avg_val` => `stats=GeneralDescriptiveStats(n_samples=$ns_val, avg_character_length=$avg_val)`
-                            },
-                        },
+                        $args <: contains `n_samples=$_` => .
                     }
                 "#.to_owned(),
             source: r#"

From bf1a30f9042eb17c88665dd0cda751ffee2d48f5 Mon Sep 17 00:00:00 2001
From: Alex Ley <alex@scrub-ai.com>
Date: Sat, 26 Oct 2024 23:28:40 +0100
Subject: [PATCH 3/4] fix(python): in `check_replacements()` handle both single
 line and multi line hanging comma after removing kwargs

fixes #416
---
 crates/core/src/test.rs       |  2 +-
 crates/language/src/python.rs | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/crates/core/src/test.rs b/crates/core/src/test.rs
index 2ce94d8f7..640fbef9c 100644
--- a/crates/core/src/test.rs
+++ b/crates/core/src/test.rs
@@ -12324,7 +12324,7 @@ fn trailing_comma_after_argument_removal_one_line() {
             "#
                 .to_owned(),
             expected: r#"
-                TaskMetadata(description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).", main_score="f1", domains=["News"], text_creation="created",   reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles")
+                TaskMetadata(description="Parallel news titles from the Tbilisi City Hall website (https://tbilisi.gov.ge/).", main_score="f1", domains=["News"], text_creation="created",  reference="https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles")
             "#
                 .to_owned(),
         }
diff --git a/crates/language/src/python.rs b/crates/language/src/python.rs
index b2af55f57..3faf4b36e 100644
--- a/crates/language/src/python.rs
+++ b/crates/language/src/python.rs
@@ -79,11 +79,22 @@ impl Language for Python {
 
     fn check_replacements(&self, n: NodeWithSource<'_>, replacements: &mut Vec<Replacement>) {
         if n.node.is_error() {
+            // https://github.com/getgrit/gritql/issues/416 single line example
             if n.text().is_ok_and(|t| t == "->") || n.text().is_ok_and(|t| t == ",") {
                 replacements.push(Replacement::new(n.range(), ""));
             }
             return;
         }
+        if n.node.kind() == "," {
+            // https://github.com/getgrit/gritql/issues/416 multi line example
+            if let Some(prev) = n.node.prev_sibling() {
+                let empty = prev.range().end_byte() == prev.range().start_byte();
+                if prev.kind() == "identifier" && empty {
+                    replacements.push(Replacement::new(n.range(), ""));
+                    return;
+                }
+            }
+        }
         if n.node.kind() == "import_from_statement" {
             if let Some(name_field) = n.node.child_by_field_name("name") {
                 let names_text = name_field

From dcfcf4d0cbdc1789e0abe91790991cdd830f4f18 Mon Sep 17 00:00:00 2001
From: Alex Ley <alex@scrub-ai.com>
Date: Sat, 26 Oct 2024 23:40:59 +0100
Subject: [PATCH 4/4] fix(python): simplify `check_replacements()` to only
 check if the prev node is empty

fixes #416
---
 crates/language/src/python.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/language/src/python.rs b/crates/language/src/python.rs
index 3faf4b36e..5e0171f8a 100644
--- a/crates/language/src/python.rs
+++ b/crates/language/src/python.rs
@@ -89,7 +89,7 @@ impl Language for Python {
             // https://github.com/getgrit/gritql/issues/416 multi line example
             if let Some(prev) = n.node.prev_sibling() {
                 let empty = prev.range().end_byte() == prev.range().start_byte();
-                if prev.kind() == "identifier" && empty {
+                if empty {
                     replacements.push(Replacement::new(n.range(), ""));
                     return;
                 }