From 253d927fd72f60e46704249dad49e3f3a442aebb Mon Sep 17 00:00:00 2001
From: Colin Leong <--unset>
Date: Thu, 20 Jun 2024 12:36:38 -0400
Subject: [PATCH 1/5] CDL: add deprecated status to ATIS
---
src/datasets/ATIS.json | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/datasets/ATIS.json b/src/datasets/ATIS.json
index eb871d6..d098dc6 100644
--- a/src/datasets/ATIS.json
+++ b/src/datasets/ATIS.json
@@ -10,6 +10,7 @@
"#items": 292,
"#samples": "595 Sentences ",
"#signers": null,
+ "status": "deprecated",
"license": null,
"licenseUrl": null
}
From 06d4c85f87aed3010f5ff8cb290ac7d0baeb7461 Mon Sep 17 00:00:00 2001
From: Colin Leong <--unset>
Date: Thu, 20 Jun 2024 12:37:36 -0400
Subject: [PATCH 2/5] CDL: add feature to check for deprecated datasets and
skip them. Also comments on datasets.js
---
src/datasets.js | 119 ++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 99 insertions(+), 20 deletions(-)
diff --git a/src/datasets.js b/src/datasets.js
index 4fc0ef2..63be65f 100644
--- a/src/datasets.js
+++ b/src/datasets.js
@@ -1,27 +1,44 @@
+// CDL: added comments via discussion with ChatGPT 4o: https://chatgpt.com/share/3acd13d8-ddf8-4b71-95af-b7904f806b39
+// then manually spot-checked the ones I wasn't sure about.
+// "*" means relevant docs at the end
+
+
+// Import the NodeJS "file system" module *
const fs = require('fs');
-function link(title, href) {
- let s = title;
+// Function to create a markdown link
+function createMarkdownLink(title, href) {
+ let s = title; // Initialize link text with title
+ // If href is provided, format the string as a markdown link
if (href) {
s = `[${s}](${href})`;
}
- return s;
+ return s; // Return the formatted link or title
}
+// Function to sanitize text *
function sanitize(text) {
+ // CDL: return unchanged if falsy. Later, falsy values are replaced with ""
if (!text) {
return text;
}
- if(typeof text === 'number') {
+ // If text is a number, convert it to a string
+ if (typeof text === 'number') {
return String(text);
}
- return text.replace(/>/, "\\>")
+ // Replace '>' with escaped version
+ return text.replace(/>/, "\\>");
}
+// Function to get an icon for a feature
function getIcon(feature) {
+ // Split the feature into type and specificity
+ // CDL: this means that things like pose:OpenPose and pose:MediaPipe get the same icon.
const [type, specificity] = feature.split(":");
+
+ // Dictionary mapping feature types to emoji
const dict = {
'video': '🎥',
'pose': '👋',
@@ -31,47 +48,109 @@ function getIcon(feature) {
'text': '📜',
'speech': '🔊',
};
+
+ // Return an HTML span element with the appropriate emoji
return `${dict[type]}` || "TODO";
+ // Alternative return statement for using image icons
// return ``;
}
+// Function to print a table row
function printRow(row) {
- console.log('|', row.join(' | '), '|');
+ console.log('|', row.join(' | '), '|'); // Join row elements with ' | ' and print
}
-
+// Define the path to the datasets directory
const PATH = "src/datasets/";
-const datasets = fs.readdirSync(PATH)
- .map(fName => String(fs.readFileSync(PATH + fName)))
- .map(d => JSON.parse(d))
- .sort((a, b) => a.pub.name.toLowerCase() > b.pub.name.toLowerCase() ? 1 : -1);
-
+// Read the datasets directory and process each file *
+// Colin: => means "Arrow function"*
+const datasets = fs.readdirSync(PATH) // Read all filenames in the directory *
+ .map(fName => String(fs.readFileSync(PATH + fName))) // Read each file's content and convert to string *
+ .map(d => JSON.parse(d)) // Parse the JSON content. *
+ .sort((a, b) => a.pub.name.toLowerCase() > b.pub.name.toLowerCase() ? 1 : -1); // Sort datasets by publication name *
+// Define column headers and their lengths for the table
const columns = ['Dataset', 'Publication', 'Language', 'Features', '#Signs', '#Samples', '#Signers', 'License'];
-const lengths = [4, 7, 3, 2, 2, 5, 2, 5]
-// console.log('
')
+const lengths = [4, 7, 3, 2, 2, 5, 2, 5];
+
+// Print the header row
printRow(columns); // Header row
-console.log('|' + lengths.map((l) => new Array(l).fill('-').join('')).join(' | ') + '|'); // Divider row
+// Print the divider row with dashes
+console.log('|' + lengths.map((l) => new Array(l).fill('-').join('')).join(' | ') + '|');
+// Define an emoji for download link
const downloadEmoji = '💾';
+// Iterate over each dataset to print its details
for (const dataset of datasets) {
- let title = link(dataset.pub.name, dataset.pub.url);
+ // CDL: should we even include it?
+ if(dataset.status === "deprecated"){
+ continue; //skip to the next one
+ }
+
+ // Create the title link for the dataset
+ let title = createMarkdownLink(dataset.pub.name, dataset.pub.url);
+
+ // If the dataset has a loader, add a download link
if (dataset.loader) {
const sld = 'https://github.com/sign-language-processing/datasets/tree/master/sign_language_datasets/datasets/' + dataset.loader;
- title += ' ' + link(downloadEmoji, sld);
+ title += ' ' + createMarkdownLink(downloadEmoji, sld);
}
+
+ // Create a row with the dataset details
+ // CDL: falsy (empty, null, etc) values just replaced with blank strings
const row = [
title,
- dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "",
+ dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "", // add citation syntax. Make/Pandoc later replace with citation
dataset.language,
dataset["features"].length ? dataset["features"].map(getIcon).join("") : "TODO",
- dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "",
+ dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "", // if there is an items field, format to standard
sanitize(dataset["#samples"]) || "",
dataset["#signers"] || "",
- link(dataset.license, dataset.licenseUrl)
+ createMarkdownLink(dataset.license, dataset.licenseUrl)
];
+
+ // Print the dataset row
printRow(row);
}
+
+// JavaScript notes for non-JS programmers
+
+// Require: similar to "include" or "import"
+// https://www.freecodecamp.org/news/requiring-modules-in-node-js-everything-you-need-to-know-e7fbd119be8/
+
+// Falsy: Includes text with null value, empty strings, etc.
+// https://www.freecodecamp.org/news/falsy-values-in-javascript/
+// https://developer.mozilla.org/en-US/docs/Glossary/Falsy
+
+// File system methods
+// https://www.geeksforgeeks.org/node-js-fs-readdirsync-method/
+// https://www.geeksforgeeks.org/node-js-fs-readfilesync-method/
+
+// Locale String: helps you reformat to a standard format.
+// e.g. 1234 -> 1,234
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/toLocaleString
+
+
+// Sorting an array of strings
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/sort
+
+// JSON
+// apparently in JavaScript, support for JavaScript Object Notations is built-in. Neat!
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse
+
+// JS ternary operator ?
+// Basically an if/else statement.
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Conditional_operator
+
+// Arrow functions =>
+// kinda like a lambda function. For when you want to make a function but NOT name it/keep it around for later
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Arrow_functions
+
+// map:
+// Used above to run the same (anonymous) function on everything in the array
+// "The map() method of Array instances creates a new array populated with the results of calling a provided function on every element in the calling array."
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/map
\ No newline at end of file
From 2500d0662374707a297d032edc9f26c406f543fa Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 20 Jun 2024 16:56:34 -0400
Subject: [PATCH 3/5] CDL: comments on datasets.js v2
---
src/datasets.js | 88 +++++++++----------------------------------------
1 file changed, 15 insertions(+), 73 deletions(-)
diff --git a/src/datasets.js b/src/datasets.js
index 63be65f..ebb9d49 100644
--- a/src/datasets.js
+++ b/src/datasets.js
@@ -1,24 +1,17 @@
-// CDL: added comments via discussion with ChatGPT 4o: https://chatgpt.com/share/3acd13d8-ddf8-4b71-95af-b7904f806b39
-// then manually spot-checked the ones I wasn't sure about.
-// "*" means relevant docs at the end
-
-
-// Import the NodeJS "file system" module *
const fs = require('fs');
-// Function to create a markdown link
+// If href is provided, format the string as a markdown link
function createMarkdownLink(title, href) {
- let s = title; // Initialize link text with title
+ let s = title;
- // If href is provided, format the string as a markdown link
+
if (href) {
s = `[${s}](${href})`;
}
- return s; // Return the formatted link or title
+ return s;
}
-// Function to sanitize text *
function sanitize(text) {
// CDL: return unchanged if falsy. Later, falsy values are replaced with ""
if (!text) {
@@ -32,7 +25,7 @@ function sanitize(text) {
return text.replace(/>/, "\\>");
}
-// Function to get an icon for a feature
+// Colin: gets the proper emoji icon for dataset features.
function getIcon(feature) {
// Split the feature into type and specificity
// CDL: this means that things like pose:OpenPose and pose:MediaPipe get the same icon.
@@ -49,108 +42,57 @@ function getIcon(feature) {
'speech': '🔊',
};
- // Return an HTML span element with the appropriate emoji
return `${dict[type]}` || "TODO";
- // Alternative return statement for using image icons
// return ``;
}
-// Function to print a table row
function printRow(row) {
- console.log('|', row.join(' | '), '|'); // Join row elements with ' | ' and print
+ console.log('|', row.join(' | '), '|');
}
-// Define the path to the datasets directory
const PATH = "src/datasets/";
-// Read the datasets directory and process each file *
-// Colin: => means "Arrow function"*
const datasets = fs.readdirSync(PATH) // Read all filenames in the directory *
.map(fName => String(fs.readFileSync(PATH + fName))) // Read each file's content and convert to string *
.map(d => JSON.parse(d)) // Parse the JSON content. *
.sort((a, b) => a.pub.name.toLowerCase() > b.pub.name.toLowerCase() ? 1 : -1); // Sort datasets by publication name *
-// Define column headers and their lengths for the table
const columns = ['Dataset', 'Publication', 'Language', 'Features', '#Signs', '#Samples', '#Signers', 'License'];
const lengths = [4, 7, 3, 2, 2, 5, 2, 5];
-// Print the header row
-printRow(columns); // Header row
-// Print the divider row with dashes
+
+printRow(columns);
+
console.log('|' + lengths.map((l) => new Array(l).fill('-').join('')).join(' | ') + '|');
-// Define an emoji for download link
const downloadEmoji = '💾';
-// Iterate over each dataset to print its details
for (const dataset of datasets) {
- // CDL: should we even include it?
+
if(dataset.status === "deprecated"){
continue; //skip to the next one
}
-
- // Create the title link for the dataset
+
let title = createMarkdownLink(dataset.pub.name, dataset.pub.url);
- // If the dataset has a loader, add a download link
if (dataset.loader) {
const sld = 'https://github.com/sign-language-processing/datasets/tree/master/sign_language_datasets/datasets/' + dataset.loader;
title += ' ' + createMarkdownLink(downloadEmoji, sld);
}
- // Create a row with the dataset details
- // CDL: falsy (empty, null, etc) values just replaced with blank strings
+ // CDL: note - falsy (empty, null, etc) values just replaced with blank strings
const row = [
title,
- dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "", // add citation syntax. Make/Pandoc later replace with citation
+ dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "", // add citation syntax @citationkey. Make/Pandoc later replace with citation
dataset.language,
dataset["features"].length ? dataset["features"].map(getIcon).join("") : "TODO",
- dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "", // if there is an items field, format to standard
+ dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "",
sanitize(dataset["#samples"]) || "",
dataset["#signers"] || "",
createMarkdownLink(dataset.license, dataset.licenseUrl)
];
- // Print the dataset row
+
printRow(row);
}
-
-// JavaScript notes for non-JS programmers
-
-// Require: similar to "include" or "import"
-// https://www.freecodecamp.org/news/requiring-modules-in-node-js-everything-you-need-to-know-e7fbd119be8/
-
-// Falsy: Includes text with null value, empty strings, etc.
-// https://www.freecodecamp.org/news/falsy-values-in-javascript/
-// https://developer.mozilla.org/en-US/docs/Glossary/Falsy
-
-// File system methods
-// https://www.geeksforgeeks.org/node-js-fs-readdirsync-method/
-// https://www.geeksforgeeks.org/node-js-fs-readfilesync-method/
-
-// Locale String: helps you reformat to a standard format.
-// e.g. 1234 -> 1,234
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/toLocaleString
-
-
-// Sorting an array of strings
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/sort
-
-// JSON
-// apparently in JavaScript, support for JavaScript Object Notations is built-in. Neat!
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse
-
-// JS ternary operator ?
-// Basically an if/else statement.
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Conditional_operator
-
-// Arrow functions =>
-// kinda like a lambda function. For when you want to make a function but NOT name it/keep it around for later
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Arrow_functions
-
-// map:
-// Used above to run the same (anonymous) function on everything in the array
-// "The map() method of Array instances creates a new array populated with the results of calling a provided function on every element in the calling array."
-// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/map
\ No newline at end of file
From a3f37f4c45392ba8a61573069bd3ce4588f80304 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 21 Jun 2024 09:39:14 -0400
Subject: [PATCH 4/5] CDL: more commenting improvements! Thanks, Amit.
---
src/datasets.js | 38 ++++++++++++++++++--------------------
1 file changed, 18 insertions(+), 20 deletions(-)
diff --git a/src/datasets.js b/src/datasets.js
index ebb9d49..bc7fe92 100644
--- a/src/datasets.js
+++ b/src/datasets.js
@@ -2,22 +2,21 @@ const fs = require('fs');
// If href is provided, format the string as a markdown link
function createMarkdownLink(title, href) {
- let s = title;
+ let s = title;
+
-
if (href) {
s = `[${s}](${href})`;
}
- return s;
+ return s;
}
function sanitize(text) {
- // CDL: return unchanged if falsy. Later, falsy values are replaced with ""
if (!text) {
return text;
}
- // If text is a number, convert it to a string
+
if (typeof text === 'number') {
return String(text);
}
@@ -30,9 +29,8 @@ function getIcon(feature) {
// Split the feature into type and specificity
// CDL: this means that things like pose:OpenPose and pose:MediaPipe get the same icon.
const [type, specificity] = feature.split(":");
-
- // Dictionary mapping feature types to emoji
- const dict = {
+
+ const featureEmojiDict = {
'video': '🎥',
'pose': '👋',
'mouthing': '👄',
@@ -41,13 +39,13 @@ function getIcon(feature) {
'text': '📜',
'speech': '🔊',
};
-
- return `${dict[type]}` || "TODO";
+
+ return `${featureEmojiDict[type]}` || "TODO";
// return ``;
}
function printRow(row) {
- console.log('|', row.join(' | '), '|');
+ console.log('|', row.join(' | '), '|');
}
const PATH = "src/datasets/";
@@ -60,39 +58,39 @@ const datasets = fs.readdirSync(PATH) // Read all filenames in the directory *
const columns = ['Dataset', 'Publication', 'Language', 'Features', '#Signs', '#Samples', '#Signers', 'License'];
const lengths = [4, 7, 3, 2, 2, 5, 2, 5];
+printRow(columns);
-printRow(columns);
-
+// divider row
console.log('|' + lengths.map((l) => new Array(l).fill('-').join('')).join(' | ') + '|');
const downloadEmoji = '💾';
for (const dataset of datasets) {
- if(dataset.status === "deprecated"){
+ if (dataset.status === "deprecated") {
continue; //skip to the next one
}
-
+
let title = createMarkdownLink(dataset.pub.name, dataset.pub.url);
-
+
if (dataset.loader) {
const sld = 'https://github.com/sign-language-processing/datasets/tree/master/sign_language_datasets/datasets/' + dataset.loader;
title += ' ' + createMarkdownLink(downloadEmoji, sld);
}
- // CDL: note - falsy (empty, null, etc) values just replaced with blank strings
+ // note - falsy (empty, null, etc) values just replaced with blank strings
const row = [
title,
dataset.pub.publication ? `@${dataset.pub.publication}` : dataset.pub.year || "", // add citation syntax @citationkey. Make/Pandoc later replace with citation
dataset.language,
dataset["features"].length ? dataset["features"].map(getIcon).join("") : "TODO",
- dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "",
+ dataset["#items"] ? dataset["#items"].toLocaleString('en-US') : "",
sanitize(dataset["#samples"]) || "",
dataset["#signers"] || "",
createMarkdownLink(dataset.license, dataset.licenseUrl)
];
-
-
+
+
printRow(row);
}
From b563b41375e9ed9654a9be67a237e76d5a2cbd60 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Mon, 24 Jun 2024 11:51:01 -0400
Subject: [PATCH 5/5] CDL: fix a few more comments in datasets.js
---
src/datasets.js | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/datasets.js b/src/datasets.js
index bc7fe92..8b800b7 100644
--- a/src/datasets.js
+++ b/src/datasets.js
@@ -24,10 +24,9 @@ function sanitize(text) {
return text.replace(/>/, "\\>");
}
-// Colin: gets the proper emoji icon for dataset features.
function getIcon(feature) {
- // Split the feature into type and specificity
- // CDL: this means that things like pose:OpenPose and pose:MediaPipe get the same icon.
+ // Split the feature (e.g. "pose:OpenPose") into type and specificity ("pose" and "OpenPose")
+ // allows various specific features with the same type (pose:OpenPose and pose:MediaPipe) to get the same icon.
const [type, specificity] = feature.split(":");
const featureEmojiDict = {
@@ -68,7 +67,7 @@ const downloadEmoji = '💾';
for (const dataset of datasets) {
if (dataset.status === "deprecated") {
- continue; //skip to the next one
+ continue;
}
let title = createMarkdownLink(dataset.pub.name, dataset.pub.url);