From 8ae69186491d9ba23cf590696654bc3ab1105ef4 Mon Sep 17 00:00:00 2001
From: Emerson Rocha <rocha@ieee.org>
Date: Thu, 11 Nov 2021 16:52:59 -0300
Subject: [PATCH] fititnt/hxltm-action#5: data-normalization #1

---
 .gitignore                        |  5 +++-
 CHANGELOG.md                      | 10 +++++++
 README.md                         |  8 +++---
 scripts/data-original-download.sh |  2 +-
 scripts/data-original-prepare.sh  | 43 +++++++++++++++++++++++++++++--
 5 files changed, 61 insertions(+), 7 deletions(-)
 create mode 100644 CHANGELOG.md

diff --git a/.gitignore b/.gitignore
index 71dbdef..cd87353 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,7 @@ data/original/terminologies.zip
 data/original/tico19-testset.zip
 !.gitignore
 !README.md
-tmp/
\ No newline at end of file
+tmp/
+
+# temp
+data/original/terminology/facebook/*.csv
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..5693b7f
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,10 @@
+# Changelog
+
+## [Unreleased]
+### Added
+- TODO
+
+## [0.9.0] - 2020-11-11
+### Added
+- **Fiat lux!**
+- Draft of scripts to download data from TICO-19 original sources
diff --git a/README.md b/README.md
index 6f7c1ce..0ba79bf 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,10 @@
 # tico-19-hxltm
-**[draft] Public domain datasets from Translation Initiative for COVID-19
-on the format HXLTM (Multilingual Terminology in Humanitarian Language Exchange)**
+**[draft] Public domain datasets from
+[Translation Initiative for COVID-19](tico-19.github.io) on the format
+HXLTM (Multilingual Terminology in Humanitarian Language Exchange).**
 
-> TODO: move to @EticaAI organization
+> TODO: move to [@EticaAI](https://github.com/EticaAI) organization and
+  publish on a subdomain.
 
 ## License
 
diff --git a/scripts/data-original-download.sh b/scripts/data-original-download.sh
index 0a34bf1..a98c039 100755
--- a/scripts/data-original-download.sh
+++ b/scripts/data-original-download.sh
@@ -10,7 +10,7 @@
 #
 #       OPTIONS:  ---
 #
-#  REQUIREMENTS:  ---
+#  REQUIREMENTS:  - git
 #          BUGS:  ---
 #         NOTES:  ---
 #       AUTHORS:  Emerson Rocha <rocha[at]ieee.org>
diff --git a/scripts/data-original-prepare.sh b/scripts/data-original-prepare.sh
index d6ba97c..3bf181e 100755
--- a/scripts/data-original-prepare.sh
+++ b/scripts/data-original-prepare.sh
@@ -9,7 +9,8 @@
 #
 #       OPTIONS:  ---
 #
-#  REQUIREMENTS:  ---
+#  REQUIREMENTS:  - rename
+#                 - rsync
 #          BUGS:  ---
 #         NOTES:  ---
 #       AUTHORS:  Emerson Rocha <rocha[at]ieee.org>
@@ -22,6 +23,7 @@
 # ==============================================================================
 set -e
 
+PWD_NOW=$(pwd)
 TMP_DIR="tmp"
 DATA_DIR="data"
 DATA_ORIGINAL_DIR="data/original"
@@ -29,7 +31,44 @@ DATA_ORIGINAL_GIT_DIR="tmp/original-git"
 
 set -x
 rsync --archive --verbose "${DATA_ORIGINAL_GIT_DIR}/data/" "$DATA_ORIGINAL_DIR/"
-set +x
+# set +x
+
+
+# cd "$DATA_ORIGINAL_DIR/terminologies"
+# pwd
+
+# Copy
+find "$DATA_ORIGINAL_DIR/terminologies/" -name 'f_*' -type f -exec cp "{}" "$DATA_ORIGINAL_DIR/terminology/facebook"  \;
+
+# Rename
+# find "$DATA_ORIGINAL_DIR/terminology/facebook/" -name 'f_*' -type f -exec ls "{}"  \;
+
+# rename 's/f_//' "$DATA_ORIGINAL_DIR/terminology/facebook/*.csv"
+
+# find "$DATA_ORIGINAL_DIR/terminology/facebook/" -name 'f_*' -type f -exec rename 's/f_//_' "{}"  \;
+
+
+# echo 'oi'
+# find f_* -type f | sed -n "s/f_//" | xargs print
+# echo 'bye'
+# echo 'oi2'
+# find f_* -type f -exec sed -n "s/f_//" {} \;
+# echo 'bye2'
+# echo 'oi2'
+# find ./ -type f -exec sed -i -e 's/f_//g' {} \;
+# echo 'bye2'
+# find f_* -type f  -print0
+# # ecfind f_* -type f  -print0 | xargs --null -I{} mv {} {}_renamed
+# echo 'bye3'
+
+
+
+# find . -type f |
+# sed -n "s/\(.*\)factory\.py$/& \1service\.py/p" |
+# xargs -p -n 2 mv
+
+# for
+
 
 # if [ ! -d "${DATA_ORIGINAL_DIR}/terminology" ]; then
 #     mkdir "${DATA_ORIGINAL_DIR}/terminology"