From 8d5de21c9def02b443a2ae1e20031b2bca2b3b66 Mon Sep 17 00:00:00 2001 From: Colin Leong <--unset> Date: Thu, 20 Jun 2024 12:54:19 -0400 Subject: [PATCH 1/4] CDL: add notes/docs about pandoc version requirement --- Makefile | 4 ++++ README.md | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 96930d4..8e2773e 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,10 @@ markdown: dst dst/index.html dst/style.css server: dst dst/style.css dst/index.md dst/sitemap.xml +# CDL: this is where the bibliography @ citations are transformed, I believe, to things like (Someone 2024) +# also, requires a newer version of pandoc, in order to use --citeproc +# https://pandoc.org/releases.html#pandoc-2.11-2020-10-11 or greater, +# which means that on Ubuntu you may need to go directly to the source repo and download/install the .deb dst/index.html: dst/index.md src/references.bib src/template/index.html dst/style.css pandoc dst/index.md --template src/template/index.html -s --table-of-contents --bibliography=src/references.bib --citeproc --columns 1000 -H src/header.html -V lang=en -o $@ diff --git a/README.md b/README.md index cbea270..3db737f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ The hosted github page is automatically built on push to master. To build the page locally, run `make`. -Make sure you have [pandoc](https://pandoc.org/) installed. +Make sure you have [pandoc](https://pandoc.org/) installed, version 2.11 or greater. ## Development To continuously build the page locally, listening to changes, run: From 561888027f183bcd6355eb33aefe9c88421ce617 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Thu, 20 Jun 2024 13:51:06 -0400 Subject: [PATCH 2/4] CDL: type->which in bash script to check if program exists --- src/markdown_fix.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/markdown_fix.sh b/src/markdown_fix.sh index cdd7163..26266d8 100644 --- a/src/markdown_fix.sh +++ b/src/markdown_fix.sh @@ -1,6 +1,6 @@ #!/bin/bash -if type gsed >/dev/null +if which gsed >/dev/null then echo "Using gsed" function ssed { gsed "$@" ;} From 4fd0638085b1241477bb587396e042ce78798889 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Thu, 20 Jun 2024 13:58:45 -0400 Subject: [PATCH 3/4] CDL: add a clearer troubleshooting note --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3db737f..9b37bc7 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ The hosted github page is automatically built on push to master. To build the page locally, run `make`. -Make sure you have [pandoc](https://pandoc.org/) installed, version 2.11 or greater. +Make sure you have [pandoc](https://pandoc.org/) installed. +If you see "unknown option: --citeproc", you may need to install [the latest version directly](https://github.com/jgm/pandoc/releases/latest) rather than with `apt`. ## Development To continuously build the page locally, listening to changes, run: From a964ebeafc9047f2d773b2365572fe0b2eb6b987 Mon Sep 17 00:00:00 2001 From: Colin Leong <--unset> Date: Thu, 20 Jun 2024 17:21:03 -0400 Subject: [PATCH 4/4] CDL: updating NCSLGR citation and license --- src/datasets/NCSLGR.json | 6 +++--- src/index.md | 2 +- src/references.bib | 23 ++++++++++++++++------- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/datasets/NCSLGR.json b/src/datasets/NCSLGR.json index aad9aad..f9ef6c0 100644 --- a/src/datasets/NCSLGR.json +++ b/src/datasets/NCSLGR.json @@ -2,7 +2,7 @@ "pub": { "name": "NCSLGR", "year": 2007, - "publication": "dataset:databases2007volumes", + "publication": "dataset:Neidle_2020_NCSLGR_ISLRN", "url": "https://www.bu.edu/asllrp/ncslgr.html" }, "loader": "ncslgr", @@ -15,7 +15,7 @@ "#items": null, "#samples": "1,875 sentences", "#signers": 4, - "license": "TODO", - "licenseUrl": null, + "license": "Research Attribution", + "licenseUrl": "https://www.bu.edu/asllrp/data-credits.html", "contact": "carol@bu.edu" } diff --git a/src/index.md b/src/index.md index f94f473..be37082 100644 --- a/src/index.md +++ b/src/index.md @@ -1042,7 +1042,7 @@ are collections of annotated single signs. They are synthesized [@dataset:ebling contain parallel sequences of signs and spoken language. Available continuous sign corpora are extremely limited, containing 4-6 orders of magnitude fewer sentence pairs than similar corpora for spoken language machine translation [@arivazhagan2019massively]. Moreover, while automatic speech recognition (ASR) datasets contain up to 50,000 hours of recordings [@pratap2020mls], the most extensive continuous sign language corpus contains only 1,150 hours, and only 50 of them are publicly available [@dataset:hanke-etal-2020-extending]. -These datasets are usually synthesized [@dataset:databases2007volumes;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event. +These datasets are usually synthesized [@dataset:Neidle_2020_NCSLGR_ISLRN;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event. ###### Availability {-} diff --git a/src/references.bib b/src/references.bib index e5d4fef..33f1a6c 100644 --- a/src/references.bib +++ b/src/references.bib @@ -73,13 +73,6 @@ @inproceedings{dataset:vintar2012compiling year = {2012} } -@misc{dataset:databases2007volumes, - author = {Databases, NCSLGR}, - publisher = {American Sign Language Linguistic Research Project (Distributed on CD-ROM~…}, - title = {Volumes 2--7}, - year = {2007} -} - @inproceedings{dataset:imashev2020dataset, address = {Online}, author = {Imashev, Alfarabi and @@ -3443,3 +3436,19 @@ @misc{SiMAX2020SignLanguage url = {https://cordis.europa.eu/project/id/778421}, urldate = {2024-06-18} } + +@inproceedings{Vogler2012ANW, + title={A new web interface to facilitate access to corpora: development of the ASLLRP data access interface}, + author={Christian Vogler and C. Neidle}, + year={2012}, + url={https://api.semanticscholar.org/CorpusID:58305327} +} + +@misc{dataset:Neidle_2020_NCSLGR_ISLRN, + type = {Languageresource}, + title = {National Center for Sign Language and Gesture Resources (NCSLGR) corpus. ISLRN 833-505-711-564-4}, + author = {Carol Neidle and Stan Sclaroff}, + year = {2012}, + publisher = {Boston University}, + url = {https://www.islrn.org/resources/833-505-711-564-4/} +} \ No newline at end of file