From 253d927fd72f60e46704249dad49e3f3a442aebb Mon Sep 17 00:00:00 2001
From: Colin Leong <--unset>
Date: Thu, 20 Jun 2024 12:36:38 -0400
Subject: [PATCH 1/5] CDL: add deprecated status to ATIS

---
 src/datasets/ATIS.json | 1 +
 1 file changed, 1 insertion(+)
diff --git a/src/datasets/ATIS.json b/src/datasets/ATIS.json
index eb871d6..d098dc6 100644
--- a/src/datasets/ATIS.json
+++ b/src/datasets/ATIS.json
@@ -10,6 +10,7 @@
   "#items": 292,
   "#samples": "595 Sentences ",
   "#signers": null,
+  "status": "deprecated",
   "license": null,
   "licenseUrl": null
 }

From 06d4c85f87aed3010f5ff8cb290ac7d0baeb7461 Mon Sep 17 00:00:00 2001
From: Colin Leong <--unset>
Date: Thu, 20 Jun 2024 12:37:36 -0400
Subject: [PATCH 2/5] CDL: add feature to check for deprecated datasets and
 skip them. Also comments on datasets.js

---
 src/datasets.js | 119 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 99 insertions(+), 20 deletions(-)

diff --git a/src/datasets.js b/src/datasets.js
index 4fc0ef2..63be65f 100644
--- a/src/datasets.js
+++ b/src/datasets.js
@@ -1,27 +1,44 @@
+// CDL: added comments via discussion with ChatGPT 4o: https://chatgpt.com/share/3acd13d8-ddf8-4b71-95af-b7904f806b39
+// then manually spot-checked the ones I wasn't sure about.
+// "*" means relevant docs at the end 
+
+
+// Import the NodeJS "file system" module *
 const fs = require('fs');
 
-function link(title, href) {
-    let s = title;
+// Function to create a markdown link
+function createMarkdownLink(title, href) {
+    let s = title; // Initialize link text with title
 
+    // If href is provided, format the string as a markdown link
     if (href) {
         s = `[${s}](${href})`;
     }
 
-    return s;
+    return s; // Return the formatted link or title
 }
 
+// Function to sanitize text *
 function sanitize(text) {
+    // CDL: return unchanged if falsy. Later, falsy values are replaced with ""
     if (!text) {
         return text;
     }
-    if(typeof text === 'number') {
+    // If text is a number, convert it to a string 
+    if (typeof text === 'number') {
         return String(text);
     }
-    return text.replace(/>/, "\\>")
+    // Replace '>' with escaped version
+    return text.replace(/>/, "\\>");
 }
 
+// Function to get an icon for a feature
 function getIcon(feature) {
+    // Split the feature into type and specificity
+    // CDL: this means that things like pose:OpenPose and pose:MediaPipe get the same icon.
     const [type, specificity] = feature.split(":");
+    
+    // Dictionary mapping feature types to emoji
     const dict = {
         'video': '🎥',
         'pose': '👋',
@@ -31,47 +48,109 @@ function getIcon(feature) {
         'text': '📜',
         'speech': '🔊',
     };
+    
+    // Return an HTML span element with the appropriate emoji
     return `<span title="${feature}">${dict[type]}</span>` || "TODO";
+    // Alternative return statement for using image icons
     // return `![${type}](assets/icons/${type}.png "${feature}")`;
 }
 
+// Function to print a table row
 function printRow(row) {
-    console.log('|', row.join(' | '), '|');
+    console.log('|', row.join(' | '), '|'); // Join row elements with ' | ' and print
 }
 
-
+// Define the path to the datasets directory
 const PATH = "src/datasets/";
 
-const datasets = fs.readdirSync(PATH)
-    .map(fName => String(fs.readFileSync(PATH + fName)))
-    .map(d => JSON.parse(d))
-    .sort((a, b) => a.pub.name.toLowerCase() > b.pub.name.toLowerCase() ? 1 : -1);
-
+// Read the datasets directory and process each file *
+// Colin: => means "Arrow function"* 
+const datasets = fs.readdirSync(PATH) // Read all filenames in the directory * 
+    .map(fName => String(fs.readFileSync(PATH + fName))) // Read each file's content and convert to string *
+    .map(d => JSON.parse(d)) // Parse the JSON content. * 
+    .sort((a, b) => a.pub.name.toLowerCase() > b.pub.name.toLowerCase() ? 1 : -1); // Sort datasets by publication name *
 
+// Define column headers and their lengths for the table
 const columns = ['Dataset', 'Publication', 'Language', 'Features', '#Signs', '#Samples', '#Signers', 'License'];
-const lengths = [4, 7, 3, 2, 2, 5, 2, 5]
-// console.log('<table cellspacing="0" border="1" style="max-width: 100%;">')
+const lengths = [4, 7, 3, 2, 2, 5, 2, 5];
+
+// Print the header row
 printRow(columns); // Header row
-console.log('|' + lengths.map((l) => new Array(l).fill('-').join('')).join(' | ') + '|'); // Divider row
+// Print the divider row with dashes
+console.log('|' + lengths.map((l) => new Array(l).fill('-').join('')).join(' | ') + '|');
 
+// Define an emoji for download link
 const downloadEmoji = '💾';
 
+// Iterate over each dataset to print its details
 for (const dataset of datasets) {
-    let title = link(dataset.pub.name, dataset.pub.url);
+    // CDL: should we even include it?
+    if(dataset.status === "deprecated"){
+        continue; //skip to the next one
+    }
+
+    // Create the title link for the dataset
+    let title = createMarkdownLink(dataset.pub.name, dataset.pub.url);
+    
+    // If the dataset has a loader, add a download link
     if (dataset.loader) {
         const sld = 'https://github.com/sign-language-processing/datasets/tree/master/sign_language_datasets/datasets/' + dataset.loader;
-        title += ' ' + link(downloadEmoji, sld);
+        title += ' ' + createMarkdownLink(downloadEmoji, sld);
     }
 
+
+    // Create a row with the dataset details
+    // CDL: falsy (empty, null, etc) values just replaced with blank strings
     const row = [
         title,
-        dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "",
+        dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "", // add citation syntax. Make/Pandoc later replace with citation
         dataset.language,
         dataset["features"].length ? dataset["features"].map(getIcon).join("") : "TODO",
-        dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "",
+        dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "", // if there is an items field, format to standard
         sanitize(dataset["#samples"]) || "",
         dataset["#signers"] || "",
-        link(dataset.license, dataset.licenseUrl)
+        createMarkdownLink(dataset.license, dataset.licenseUrl)
     ];
+    
+    // Print the dataset row
     printRow(row);
 }
+
+// JavaScript notes for non-JS programmers
+
+// Require: similar to "include" or "import"
+// https://www.freecodecamp.org/news/requiring-modules-in-node-js-everything-you-need-to-know-e7fbd119be8/
+
+// Falsy: Includes text with null value, empty strings, etc.
+//      https://www.freecodecamp.org/news/falsy-values-in-javascript/
+//      https://developer.mozilla.org/en-US/docs/Glossary/Falsy
+
+// File system methods
+// https://www.geeksforgeeks.org/node-js-fs-readdirsync-method/
+// https://www.geeksforgeeks.org/node-js-fs-readfilesync-method/
+
+// Locale String: helps you reformat to a standard format.
+//      e.g. 1234 -> 1,234
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/toLocaleString
+
+
+// Sorting an array of strings
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/sort
+
+// JSON
+// apparently in JavaScript, support for JavaScript Object Notations is built-in. Neat!
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse
+
+// JS ternary operator ?
+// Basically an if/else statement. 
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Conditional_operator
+
+// Arrow functions =>
+// kinda like a lambda function. For when you want to make a function but NOT name it/keep it around for later
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Arrow_functions
+
+// map: 
+//      Used above to run the same (anonymous) function on everything in the array
+//      "The map() method of Array instances creates a new array populated with the results of calling a provided function on every element in the calling array."
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/map
\ No newline at end of file

From 2500d0662374707a297d032edc9f26c406f543fa Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 20 Jun 2024 16:56:34 -0400
Subject: [PATCH 3/5] CDL: comments on datasets.js v2

---
 src/datasets.js | 88 +++++++++----------------------------------------
 1 file changed, 15 insertions(+), 73 deletions(-)

diff --git a/src/datasets.js b/src/datasets.js
index 63be65f..ebb9d49 100644
--- a/src/datasets.js
+++ b/src/datasets.js
@@ -1,24 +1,17 @@
-// CDL: added comments via discussion with ChatGPT 4o: https://chatgpt.com/share/3acd13d8-ddf8-4b71-95af-b7904f806b39
-// then manually spot-checked the ones I wasn't sure about.
-// "*" means relevant docs at the end 
-
-
-// Import the NodeJS "file system" module *
 const fs = require('fs');
 
-// Function to create a markdown link
+// If href is provided, format the string as a markdown link
 function createMarkdownLink(title, href) {
-    let s = title; // Initialize link text with title
+    let s = title; 
 
-    // If href is provided, format the string as a markdown link
+    
     if (href) {
         s = `[${s}](${href})`;
     }
 
-    return s; // Return the formatted link or title
+    return s; 
 }
 
-// Function to sanitize text *
 function sanitize(text) {
     // CDL: return unchanged if falsy. Later, falsy values are replaced with ""
     if (!text) {
@@ -32,7 +25,7 @@ function sanitize(text) {
     return text.replace(/>/, "\\>");
 }
 
-// Function to get an icon for a feature
+// Colin: gets the proper emoji icon for dataset features.
 function getIcon(feature) {
     // Split the feature into type and specificity
     // CDL: this means that things like pose:OpenPose and pose:MediaPipe get the same icon.
@@ -49,108 +42,57 @@ function getIcon(feature) {
         'speech': '🔊',
     };
     
-    // Return an HTML span element with the appropriate emoji
     return `<span title="${feature}">${dict[type]}</span>` || "TODO";
-    // Alternative return statement for using image icons
     // return `![${type}](assets/icons/${type}.png "${feature}")`;
 }
 
-// Function to print a table row
 function printRow(row) {
-    console.log('|', row.join(' | '), '|'); // Join row elements with ' | ' and print
+    console.log('|', row.join(' | '), '|'); 
 }
 
-// Define the path to the datasets directory
 const PATH = "src/datasets/";
 
-// Read the datasets directory and process each file *
-// Colin: => means "Arrow function"* 
 const datasets = fs.readdirSync(PATH) // Read all filenames in the directory * 
     .map(fName => String(fs.readFileSync(PATH + fName))) // Read each file's content and convert to string *
     .map(d => JSON.parse(d)) // Parse the JSON content. * 
     .sort((a, b) => a.pub.name.toLowerCase() > b.pub.name.toLowerCase() ? 1 : -1); // Sort datasets by publication name *
 
-// Define column headers and their lengths for the table
 const columns = ['Dataset', 'Publication', 'Language', 'Features', '#Signs', '#Samples', '#Signers', 'License'];
 const lengths = [4, 7, 3, 2, 2, 5, 2, 5];
 
-// Print the header row
-printRow(columns); // Header row
-// Print the divider row with dashes
+
+printRow(columns); 
+
 console.log('|' + lengths.map((l) => new Array(l).fill('-').join('')).join(' | ') + '|');
 
-// Define an emoji for download link
 const downloadEmoji = '💾';
 
-// Iterate over each dataset to print its details
 for (const dataset of datasets) {
-    // CDL: should we even include it?
+
     if(dataset.status === "deprecated"){
         continue; //skip to the next one
     }
-
-    // Create the title link for the dataset
+    
     let title = createMarkdownLink(dataset.pub.name, dataset.pub.url);
     
-    // If the dataset has a loader, add a download link
     if (dataset.loader) {
         const sld = 'https://github.com/sign-language-processing/datasets/tree/master/sign_language_datasets/datasets/' + dataset.loader;
         title += ' ' + createMarkdownLink(downloadEmoji, sld);
     }
 
 
-    // Create a row with the dataset details
-    // CDL: falsy (empty, null, etc) values just replaced with blank strings
+    // CDL: note - falsy (empty, null, etc) values just replaced with blank strings
     const row = [
         title,
-        dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "", // add citation syntax. Make/Pandoc later replace with citation
+        dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "", // add citation syntax @citationkey. Make/Pandoc later replace with citation
         dataset.language,
         dataset["features"].length ? dataset["features"].map(getIcon).join("") : "TODO",
-        dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "", // if there is an items field, format to standard
+        dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "", 
         sanitize(dataset["#samples"]) || "",
         dataset["#signers"] || "",
         createMarkdownLink(dataset.license, dataset.licenseUrl)
     ];
     
-    // Print the dataset row
+    
     printRow(row);
 }
-
-// JavaScript notes for non-JS programmers
-
-// Require: similar to "include" or "import"
-// https://www.freecodecamp.org/news/requiring-modules-in-node-js-everything-you-need-to-know-e7fbd119be8/
-
-// Falsy: Includes text with null value, empty strings, etc.
-//      https://www.freecodecamp.org/news/falsy-values-in-javascript/
-//      https://developer.mozilla.org/en-US/docs/Glossary/Falsy
-
-// File system methods
-// https://www.geeksforgeeks.org/node-js-fs-readdirsync-method/
-// https://www.geeksforgeeks.org/node-js-fs-readfilesync-method/
-
-// Locale String: helps you reformat to a standard format.
-//      e.g. 1234 -> 1,234
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/toLocaleString
-
-
-// Sorting an array of strings
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/sort
-
-// JSON
-// apparently in JavaScript, support for JavaScript Object Notations is built-in. Neat!
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse
-
-// JS ternary operator ?
-// Basically an if/else statement. 
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Conditional_operator
-
-// Arrow functions =>
-// kinda like a lambda function. For when you want to make a function but NOT name it/keep it around for later
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Arrow_functions
-
-// map: 
-//      Used above to run the same (anonymous) function on everything in the array
-//      "The map() method of Array instances creates a new array populated with the results of calling a provided function on every element in the calling array."
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/map
\ No newline at end of file

From a3f37f4c45392ba8a61573069bd3ce4588f80304 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 21 Jun 2024 09:39:14 -0400
Subject: [PATCH 4/5] CDL: more commenting improvements! Thanks, Amit.

---
 src/datasets.js | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/datasets.js b/src/datasets.js
index ebb9d49..bc7fe92 100644
--- a/src/datasets.js
+++ b/src/datasets.js
@@ -2,22 +2,21 @@ const fs = require('fs');
 
 // If href is provided, format the string as a markdown link
 function createMarkdownLink(title, href) {
-    let s = title; 
+    let s = title;
+
 
-    
     if (href) {
         s = `[${s}](${href})`;
     }
 
-    return s; 
+    return s;
 }
 
 function sanitize(text) {
-    // CDL: return unchanged if falsy. Later, falsy values are replaced with ""
     if (!text) {
         return text;
     }
-    // If text is a number, convert it to a string 
+
     if (typeof text === 'number') {
         return String(text);
     }
@@ -30,9 +29,8 @@ function getIcon(feature) {
     // Split the feature into type and specificity
     // CDL: this means that things like pose:OpenPose and pose:MediaPipe get the same icon.
     const [type, specificity] = feature.split(":");
-    
-    // Dictionary mapping feature types to emoji
-    const dict = {
+
+    const featureEmojiDict = {
         'video': '🎥',
         'pose': '👋',
         'mouthing': '👄',
@@ -41,13 +39,13 @@ function getIcon(feature) {
         'text': '📜',
         'speech': '🔊',
     };
-    
-    return `<span title="${feature}">${dict[type]}</span>` || "TODO";
+
+    return `<span title="${feature}">${featureEmojiDict[type]}</span>` || "TODO";
     // return `![${type}](assets/icons/${type}.png "${feature}")`;
 }
 
 function printRow(row) {
-    console.log('|', row.join(' | '), '|'); 
+    console.log('|', row.join(' | '), '|');
 }
 
 const PATH = "src/datasets/";
@@ -60,39 +58,39 @@ const datasets = fs.readdirSync(PATH) // Read all filenames in the directory *
 const columns = ['Dataset', 'Publication', 'Language', 'Features', '#Signs', '#Samples', '#Signers', 'License'];
 const lengths = [4, 7, 3, 2, 2, 5, 2, 5];
 
+printRow(columns);
 
-printRow(columns); 
-
+// divider row
 console.log('|' + lengths.map((l) => new Array(l).fill('-').join('')).join(' | ') + '|');
 
 const downloadEmoji = '💾';
 
 for (const dataset of datasets) {
 
-    if(dataset.status === "deprecated"){
+    if (dataset.status === "deprecated") {
         continue; //skip to the next one
     }
-    
+
     let title = createMarkdownLink(dataset.pub.name, dataset.pub.url);
-    
+
     if (dataset.loader) {
         const sld = 'https://github.com/sign-language-processing/datasets/tree/master/sign_language_datasets/datasets/' + dataset.loader;
         title += ' ' + createMarkdownLink(downloadEmoji, sld);
     }
 
 
-    // CDL: note - falsy (empty, null, etc) values just replaced with blank strings
+    // note - falsy (empty, null, etc) values just replaced with blank strings
     const row = [
         title,
         dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "", // add citation syntax @citationkey. Make/Pandoc later replace with citation
         dataset.language,
         dataset["features"].length ? dataset["features"].map(getIcon).join("") : "TODO",
-        dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "", 
+        dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "",
         sanitize(dataset["#samples"]) || "",
         dataset["#signers"] || "",
         createMarkdownLink(dataset.license, dataset.licenseUrl)
     ];
-    
-    
+
+
     printRow(row);
 }

From b563b41375e9ed9654a9be67a237e76d5a2cbd60 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Mon, 24 Jun 2024 11:51:01 -0400
Subject: [PATCH 5/5] CDL: fix a few more comments in datasets.js

---
 src/datasets.js | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/datasets.js b/src/datasets.js
index bc7fe92..8b800b7 100644
--- a/src/datasets.js
+++ b/src/datasets.js
@@ -24,10 +24,9 @@ function sanitize(text) {
     return text.replace(/>/, "\\>");
 }
 
-// Colin: gets the proper emoji icon for dataset features.
 function getIcon(feature) {
-    // Split the feature into type and specificity
-    // CDL: this means that things like pose:OpenPose and pose:MediaPipe get the same icon.
+    // Split the feature (e.g. "pose:OpenPose") into type and specificity ("pose" and "OpenPose")
+    // allows various specific features with the same type (pose:OpenPose and pose:MediaPipe) to get the same icon.
     const [type, specificity] = feature.split(":");
 
     const featureEmojiDict = {
@@ -68,7 +67,7 @@ const downloadEmoji = '💾';
 for (const dataset of datasets) {
 
     if (dataset.status === "deprecated") {
-        continue; //skip to the next one
+        continue;
     }
 
     let title = createMarkdownLink(dataset.pub.name, dataset.pub.url);