From 283d85deb6d36f4868217d31b8edbbe2bb38ad3b Mon Sep 17 00:00:00 2001
From: nntrn <17685332+nntrn@users.noreply.github.com>
Date: Mon, 4 Mar 2024 11:33:18 -0600
Subject: [PATCH] Update examples

---
 _snippets/describe-objects.md |  20 ---
 data-transform.md             |  81 +-----------
 functions/summary.md          |   2 +-
 functions/text.md             |  25 ++++
 functions/unroll.md           |  64 +++++----
 general/data-transform.md     |  80 ++++++++++++
 general/examples.md           |  25 +---
 recipes.jq                    | 238 ++++++++++++++++++++++++++++++++++
 8 files changed, 378 insertions(+), 157 deletions(-)
 delete mode 100644 _snippets/describe-objects.md
 mode change 100644 => 120000 data-transform.md
 create mode 100644 functions/text.md
 create mode 100644 general/data-transform.md
diff --git a/_snippets/describe-objects.md b/_snippets/describe-objects.md
deleted file mode 100644
index d51afd8..0000000
--- a/_snippets/describe-objects.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-title: Object transform
-description: Recursively describe objects 
-source: https://stackoverflow.com/a/77322330
-input: data/type.json
-output: |
-  [{"name":"id","type":"number","value":123456},
-  {"name":"name","type":"string","value":"Test"},
-  {"name":"arraySimple","type":"array","value":[1,2,3]},
-  {"name":"arrayComplex","type":"array","value":[{"id":38392}]},
-  {"name":"instance","type":"object","value":[{"name":"id","type":"number","value":8310}]},
-  {"name":"isEmpty","type":"boolean","value":false},
-  {"name":"empty","type":"null","value":null}]
----
-
-def describef: {
-  type: type,
-  value : (objects |= (to_entries | map({name: .key} + (.value | describef))))
-};
-describef.value
diff --git a/data-transform.md b/data-transform.md
deleted file mode 100644
index f6944df..0000000
--- a/data-transform.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# Data transformation
-
-## json2ini
-
-Adapted from <a>https://stackoverflow.com/a/76665197</a>
-
-Data used: [nested.json](data/nested.json)
-
-```
-{
-  "server": {
-    "atx": {
-      "user": "annie",
-      "port": 22
-    }
-  },
-  "storage": {
-    "nyc": {
-      "user": "nntrn",
-      "port": 22
-    }
-  }
-}
-```
-
-```sh
-jq --stream -nr ' reduce (inputs | select(has(1))) as [$path, $val]
-  ( {}; .[$path[:-1] | join(".")][$path[-1]] = $val )
-| to_entries[]
-| "[\(.key)]", (.value | to_entries[] | "\(.key) = \(.value)" )
-, ""'
-```
-
-Output
-
-```
-[server.atx]
-user = annie
-port = 22
-
-[storage.nyc]
-user = nntrn
-port = 22
-```
-
-## tsv2json
-
-Adapted from <a>https://stackoverflow.com/a/55996042</a>
-
-Data used: [tmp.tsv](data/tmp.tsv)
-
-```
-foo bar baz
-1   2   3
-4   5   6
-```
-
-```sh
-jq -R 'split("[\\s\\t]+";"x") as $head
-| [inputs | split("[\\s\\t]+";"x")]
-| map( . as $row | reduce (keys|.[]) as $x
-    ( {}; . + {"\($head[$x])":$row[$x]} ) )' data/tmp.tsv
-```
-
-Output
-
-```json
-[
-  {
-    "foo": "1",
-    "bar": "2",
-    "baz": "3"
-  },
-  {
-    "foo": "4",
-    "bar": "5",
-    "baz": "6"
-  }
-]
-```
diff --git a/data-transform.md b/data-transform.md
new file mode 120000
index 0000000..a989839
--- /dev/null
+++ b/data-transform.md
@@ -0,0 +1 @@
+general/data-transform.md
\ No newline at end of file
diff --git a/functions/summary.md b/functions/summary.md
index ca73947..ce2ae9d 100644
--- a/functions/summary.md
+++ b/functions/summary.md
@@ -1,4 +1,4 @@
-# summary
+# Summary
 
 ```jq
 def grouped_summary($item):
diff --git a/functions/text.md b/functions/text.md
new file mode 100644
index 0000000..5ada180
--- /dev/null
+++ b/functions/text.md
@@ -0,0 +1,25 @@
+# Text functions
+
+## Recursively split strings w/ new lines
+
+```jq
+def split_newlines($s): 
+  if ((type == "string") and (($s|tostring|split("\n")|length) > 1)?) 
+  then ($s|tostring|split("[\\r\\n]+([\\s]+)?";"x")) 
+  elif (type == "object") then to_entries 
+  else $s end; 
+
+def recuse_split_newlines: walk(split_newlines(.)|from_entries? // .);
+```
+
+## Quoting
+
+```jq
+def squo: [39]|implode;
+
+def squote($text): [squo,$text,squo]|join("");
+def dquote($text): "\"\($text)\"";
+
+def unsmart($text): $text | gsub("[“”]";"\"") | gsub("[’‘]";"'");
+def unsmart: . | unsmart;
+```
\ No newline at end of file
diff --git a/functions/unroll.md b/functions/unroll.md
index 1e3ac54..b01a822 100644
--- a/functions/unroll.md
+++ b/functions/unroll.md
@@ -2,15 +2,15 @@
 
 ```jq
 [leaf_paths as $path | {
-    "key": $path | map(tostring) | join("_"),
-    "value": getpath($path)
+  "key": $path | map(tostring) | join("_"),
+  "value": getpath($path)
 }] | from_entries
 ```
 
 ```console
-$ cat data/nested.json|jq '[leaf_paths as $path | {  
-    "key": $path | map(tostring) | join("_"),
-    "value": getpath($path)
+$ cat data/nested.json|jq '[leaf_paths as $path | {
+  "key": $path | map(tostring) | join("_"),
+  "value": getpath($path)
 }] | from_entries
 '
 {
@@ -23,54 +23,52 @@ $ cat data/nested.json|jq '[leaf_paths as $path | {
 
 ```jq
 def categorize:
-    # Returns "object", "array" or "scalar" to indicate the category
-    # of the piped element.
-    if type == "object" then "object" 
-    elif type == "array" then "array" 
-    else "scalar" 
-    end;
+  # Returns "object", "array" or "scalar" to indicate the category
+  # of the piped element.
+  if type == "object" then "object"
+  elif type == "array" then "array"
+  else "scalar"
+  end;
 
 def pluck($category):
-    # Plucks the children of a particular category from piped element.    
-    if categorize != "object"
-    then empty
-    else to_entries[] 
-         | select(.value | categorize == $category) 
-         | .value
-    end;
-    
+  # Plucks the children of a particular category from piped element.
+  if categorize != "object"
+  then empty
+  else to_entries[]
+    | select(.value | categorize == $category)
+    | .value
+  end;
+
 def split:
-    # Splits the piped element's children into arrays, scalars, and objects 
-    # and returns a meta object containing the children seperated by these
-    # keys. If the piped element is a scalar or array, this does not look 
-    # at the children, but just returns that element in the meta object.
-    if categorize == "scalar" then { objects: [], arrays: [], scalars: [.] }
-    elif categorize == "array" then { objects: [], arrays: [.], scalars: [] } 
-    else { objects: [pluck("object")], arrays : [pluck("array")], scalars: [pluck("scalar")] }
-    end;
+  # Splits the piped element's children into arrays, scalars, and objects
+  # and returns a meta object containing the children seperated by these
+  # keys. If the piped element is a scalar or array, this does not look
+  # at the children, but just returns that element in the meta object.
+  if categorize == "scalar" then { objects: [], arrays: [], scalars: [.] }
+  elif categorize == "array" then { objects: [], arrays: [.], scalars: [] }
+  else { objects: [pluck("object")], arrays : [pluck("array")], scalars: [pluck("scalar")] }
+  end;
 
 def unwrap:
-  # Unwraps an array recursively, until the elements of the returned array 
+  # Unwraps an array recursively, until the elements of the returned array
   # are either scalars or objects but not arrays. If piped element is not
   # an array, returns the element as is.
-
   if type != "array" then .
   elif length == 0  then empty
   else .[] | unwrap
   end;
 
 def extract($category):
-  # Extracts the elements of a particular category from the piped in array. 
-  # If the piped in element is not an array, this fn acts as filter to 
+  # Extracts the elements of a particular category from the piped in array.
+  # If the piped in element is not an array, this fn acts as filter to
   # only return the element if it is of the desired category.
   unwrap | select(.| categorize == $category);
 
-def unroll: 
+def unroll:
   # Unrolls the passed in object recursively until only scalars are left.
   # Returns a row for each leaf node of tree structure of the object and
   # elements of the row would be all the scalars encountered at all the
   # ancestor levels of this left node.
-
   . | .result += .state.scalars
     | .state.objects += [.state.arrays | extract("object")]
     | .state.objects += [.state.arrays | extract("scalar")]
diff --git a/general/data-transform.md b/general/data-transform.md
new file mode 100644
index 0000000..f6944df
--- /dev/null
+++ b/general/data-transform.md
@@ -0,0 +1,80 @@
+# Data transformation
+
+## json2ini
+
+Adapted from <a>https://stackoverflow.com/a/76665197</a>
+
+Data used: [nested.json](data/nested.json)
+
+```
+{
+  "server": {
+    "atx": {
+      "user": "annie",
+      "port": 22
+    }
+  },
+  "storage": {
+    "nyc": {
+      "user": "nntrn",
+      "port": 22
+    }
+  }
+}
+```
+
+```sh
+jq --stream -nr ' reduce (inputs | select(has(1))) as [$path, $val]
+  ( {}; .[$path[:-1] | join(".")][$path[-1]] = $val )
+| to_entries[]
+| "[\(.key)]", (.value | to_entries[] | "\(.key) = \(.value)" )
+, ""'
+```
+
+Output
+
+```
+[server.atx]
+user = annie
+port = 22
+
+[storage.nyc]
+user = nntrn
+port = 22
+```
+
+## tsv2json
+
+Adapted from <a>https://stackoverflow.com/a/55996042</a>
+
+Data used: [tmp.tsv](data/tmp.tsv)
+
+```
+foo bar baz
+1   2   3
+4   5   6
+```
+
+```sh
+jq -R 'split("[\\s\\t]+";"x") as $head
+| [inputs | split("[\\s\\t]+";"x")]
+| map( . as $row | reduce (keys|.[]) as $x
+    ( {}; . + {"\($head[$x])":$row[$x]} ) )' data/tmp.tsv
+```
+
+Output
+
+```json
+[
+  {
+    "foo": "1",
+    "bar": "2",
+    "baz": "3"
+  },
+  {
+    "foo": "4",
+    "bar": "5",
+    "baz": "6"
+  }
+]
+```
diff --git a/general/examples.md b/general/examples.md
index 28084c4..a75325f 100644
--- a/general/examples.md
+++ b/general/examples.md
@@ -53,10 +53,10 @@ $ jq '.[] |= if .attr2 then (.attr2 = "bax") else . end'
 [[Source]](https://github.com/stedolan/jq/issues/873#issuecomment-125385615)
 
 
-## Slurpfiles
+## Slurp-file
 
 ```console
-$ jq --slurpfile cars data/cars.json '{titanic: .[0:2],cars:$cars[][0:2]}' data/titanic.json
+$ jq --slurpfile cars data/cars.json '{titanic: .[0:1], cars: $cars[][0:1]}' data/titanic.json
 {
   "titanic": [
     {
@@ -68,16 +68,6 @@ $ jq --slurpfile cars data/cars.json '{titanic: .[0:2],cars:$cars[][0:2]}' data/
       "Siblings_Spouses_Aboard": 1,
       "Parents_Children_Aboard": 0,
       "Fare": 7.25
-    },
-    {
-      "Survived": 1,
-      "Pclass": 1,
-      "Name": "Mrs. John Bradley (Florence Briggs Thayer) Cumings",
-      "Sex": "female",
-      "Age": 38,
-      "Siblings_Spouses_Aboard": 1,
-      "Parents_Children_Aboard": 0,
-      "Fare": 71.2833
     }
   ],
   "cars": [
@@ -91,17 +81,6 @@ $ jq --slurpfile cars data/cars.json '{titanic: .[0:2],cars:$cars[][0:2]}' data/
       "weight_lb_": 3821,
       "0_60_mph_s_": 11,
       "year": 1973
-    },
-    {
-      "name": "AMC Ambassador DPL",
-      "brand": "AMC",
-      "economy_mpg_": 15,
-      "cylinders": 8,
-      "displacement_cc_": 390,
-      "power_hp_": 190,
-      "weight_lb_": 3850,
-      "0_60_mph_s_": 8.5,
-      "year": 1970
     }
   ]
 }
diff --git a/recipes.jq b/recipes.jq
index 6b8f1b3..0cb121b 100644
--- a/recipes.jq
+++ b/recipes.jq
@@ -9,6 +9,159 @@
 #    https://github.com/nntrn/jq-recipes
 #
 
+##########################################################################################
+# _site/functions/barcharts.md  
+##########################################################################################
+
+def barchart($key):
+  length as $total
+  | map((.[$key] // "null") | tostring)
+  | group_by(.)
+  | (map({ key: .[0], value: length, title_len: (.[0]|tostring|length) }) ) as $columns
+  | $columns
+  | sort_by(.value) | reverse
+  | (max_by(.title_len)|.title_len) as $padding
+  |
+  (if (((($columns|length)/$total) > .8) or (($columns|length) > 1000)) then
+    [ "IGNORING <\($key)>: \($columns|length) out of \($total) rows", ""]
+  else [
+    $key,
+    ("-" * ($key|length) ),
+    map(
+    [
+      .key, (" " * ($padding-.title_len)),
+      "\((.value/$total)*100|tostring|.+".000"|.[0:4])%",
+      ( if (.value == 1) then "▊" else ("█" * (((.value/$total)*100) + (.value|log)|round)) end),
+      .value
+    ] | join(" ")
+    ),
+    ""
+  ] end)
+  | flatten
+  | join("\n");
+
+def run_barchart:
+  . as $data
+  | (.[0]|keys) as $cols
+  | ($cols | map(. as $col | $data | barchart($col)) | join("\n")) as $barcharts
+  | [ $barcharts ]
+  | flatten| join("\n")
+;
+
+##########################################################################################
+# _site/functions/conversion.md  
+##########################################################################################
+
+def to_precision($p):
+  . |tostring|split(".")
+  | [.[0], (.[1]|split("")|.[0:($p|tonumber)]|join(""))]
+  | join(".")
+  | tonumber;
+
+def humansize(bytes;$p):
+  (bytes|tonumber) as $size |
+  if   $size > 1073741824 then "\(($size/1073741824)|to_precision($p))G"
+  elif $size > 1048576    then "\(($size/1048576)|to_precision($p))M"
+  elif $size > 1024       then "\(($size/1024)|to_precision($p))K"
+  else $size
+  end;
+
+def humansize(bytes): humansize(bytes;1);
+
+##########################################################################################
+# _site/functions/json2csv.md  
+##########################################################################################
+
+def json2csv:
+  (map(keys) | add | unique) as $cols
+  | map(. as $row | $cols | map($row[.])) as $rows
+  | $cols, $rows[]
+  | @csv;
+
+##########################################################################################
+# _site/functions/pick.md  
+##########################################################################################
+
+def pick(stream):
+  . as $in
+  | reduce path(stream) as $a (null;
+      setpath($a; $in|getpath($a)) );
+
+##########################################################################################
+# _site/functions/read-history.md  
+##########################################################################################
+
+def history:
+  map(
+    if test("#[0-9]{10,12}")
+    then "\(.|gsub("#";"")|tonumber|todate)"
+    else "\t\(.)\n"
+    end
+  ) | join("");
+
+##########################################################################################
+# _site/functions/summary.md  
+##########################################################################################
+
+def grouped_summary($item):
+  {"\($item? // "blank")":group_by(.[$item])|map({"\(.[0][$item]? // "blank")":length})|add};
+
+def summary:
+  [ (.[0]|keys)[] as $keys | grouped_summary($keys)]
+  | add
+  | to_entries
+  | map(
+      del(select(((.value//"")|keys[0]|length) > 100)) |
+      del(select(((.value//"")|values|length) > 400))
+    )
+  | map(select(.))
+  | from_entries;
+
+def summary_wip:
+  [ (.[0]|keys)[] as $keys | grouped_summary($keys)]
+  | add
+  | to_entries
+  #| map(del(select(((.value//"")|keys|length) > 400)))
+  | map(select(.)|{key,count:(.value|length)})
+  | map(.value |= to_entries);
+
+def summary2:
+  . as $data
+  | (.[0]|keys)
+  | map(. as $item | {
+      key: $item,
+      value: ($data|map(.[$item])|group_by(.)|map({"\(.[0])": length}))|add
+    })
+  | map(select((.value|to_entries|length)< (.90 * ($data|length))))
+  | from_entries;
+
+##########################################################################################
+# _site/general/codepoints.md  
+##########################################################################################
+
+def smart_squotes($s):
+  $s | if (test("[\\s\\n\\t]";"x")) then "\([39]|implode)\($s)\([39]|implode)" else $s end;
+
+def smart_dquotes($s):
+  $s | if (test("[\\s\\n\\t]";"x")) then "\($s|@json)" else $s end;
+
+##########################################################################################
+# _site/general/reduce.md  
+##########################################################################################
+
+def tocsv:
+  .[0] as $cols | .[1:]
+  | map(. as $row
+  | $cols
+  | with_entries({ "key": .value,"value": $row[.key]})
+  );
+
+##########################################################################################
+# _site/general/wrangle.md  
+##########################################################################################
+
+def s: [splits(" +")];
+
 ##########################################################################################
 # functions/barcharts.md  
 ##########################################################################################
@@ -175,6 +328,91 @@ def summary2:
   | map(select((.value|to_entries|length)< (.90 * ($data|length))))
   | from_entries;
 
+##########################################################################################
+# functions/text.md  
+##########################################################################################
+
+def split_newlines($s): 
+  if ((type == "string") and (($s|tostring|split("\n")|length) > 1)?) 
+  then ($s|tostring|split("[\\r\\n]+([\\s]+)?";"x")) 
+  elif (type == "object") then to_entries 
+  else $s end; 
+
+def recuse_split_newlines: walk(split_newlines(.)|from_entries? // .);
+
+def squo: [39]|implode;
+
+def squote($text): [squo,$text,squo]|join("");
+def dquote($text): "\"\($text)\"";
+
+def unsmart($text): $text | gsub("[“”]";"\"") | gsub("[’‘]";"'");
+def unsmart: . | unsmart;
+
+##########################################################################################
+# functions/unroll.md  
+##########################################################################################
+
+def categorize:
+  # Returns "object", "array" or "scalar" to indicate the category
+  # of the piped element.
+  if type == "object" then "object"
+  elif type == "array" then "array"
+  else "scalar"
+  end;
+
+def pluck($category):
+  # Plucks the children of a particular category from piped element.
+  if categorize != "object"
+  then empty
+  else to_entries[]
+    | select(.value | categorize == $category)
+    | .value
+  end;
+
+def split:
+  # Splits the piped element's children into arrays, scalars, and objects
+  # and returns a meta object containing the children seperated by these
+  # keys. If the piped element is a scalar or array, this does not look
+  # at the children, but just returns that element in the meta object.
+  if categorize == "scalar" then { objects: [], arrays: [], scalars: [.] }
+  elif categorize == "array" then { objects: [], arrays: [.], scalars: [] }
+  else { objects: [pluck("object")], arrays : [pluck("array")], scalars: [pluck("scalar")] }
+  end;
+
+def unwrap:
+  # Unwraps an array recursively, until the elements of the returned array
+  # are either scalars or objects but not arrays. If piped element is not
+  # an array, returns the element as is.
+  if type != "array" then .
+  elif length == 0  then empty
+  else .[] | unwrap
+  end;
+
+def extract($category):
+  # Extracts the elements of a particular category from the piped in array.
+  # If the piped in element is not an array, this fn acts as filter to
+  # only return the element if it is of the desired category.
+  unwrap | select(.| categorize == $category);
+
+def unroll:
+  # Unrolls the passed in object recursively until only scalars are left.
+  # Returns a row for each leaf node of tree structure of the object and
+  # elements of the row would be all the scalars encountered at all the
+  # ancestor levels of this left node.
+  . | .result += .state.scalars
+    | .state.objects += [.state.arrays | extract("object")]
+    | .state.objects += [.state.arrays | extract("scalar")]
+    | if (.state.objects | length == 0 )
+      then .result
+      else ({ data : .state.objects,
+              state: .state.objects[] | split,
+              result: .result
+            } | unroll)
+      end;
+
+def unrolls($data): { data: $data, state: $data| split, result: [] } | unroll ;
+def unrolls: unrolls(.);
+
 ##########################################################################################
 # general/codepoints.md  
 ##########################################################################################