From e3d1b5ed03d5ff2e29ad9789314291bcb96ecd92 Mon Sep 17 00:00:00 2001
From: Karim-53 <33978275+Karim-53@users.noreply.github.com>
Date: Mon, 7 Jun 2021 19:49:43 +0200
Subject: [PATCH 1/7] fix --surah argument

---
 download.py      | 24 +++++++++++++-----------
 requirements.txt |  1 +
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/download.py b/download.py
index 6c666a2..aa2bb3c 100644
--- a/download.py
+++ b/download.py
@@ -3,7 +3,11 @@
 A file for downloading audio recordings from the Tarteel V1 dataset.
 Contributed by @kareemn.
 
-Example usage: python download.py -s 1 --use-cache
+Example usage 1: download only the audio related to surah 1 (Al-Fatiha) 140 Mb
+python download.py -s 1 --use-cache --keep-downloaded-audio
+
+Example usage 2: download the entire audio dataset
+python download.py --use-cache --keep-downloaded-audio
 """
 
 from argparse import ArgumentParser
@@ -27,7 +31,7 @@
 parser.add_argument('--local-csv-filename', type=str, default='local.csv')
 parser.add_argument('--cache-dir', type=str, default='.cache')
 parser.add_argument('-u', '--use-cache', action='store_true')
-parser.add_argument('-s', '--surah', type=int)
+parser.add_argument('-s', '--surah', type=int, default=0)
 parser.add_argument('-k', '--keep-downloaded-audio', action='store_true')
 parser.add_argument(
     '--log', choices=['DEBUG', 'INFO', 'WARNING', 'CRITICAL'], default='INFO',
@@ -65,7 +69,7 @@ def download_entry_audio(entry, download_audio_dir, raw_audio_dir, use_cache=Tru
 
     # Ensure the proper surah directory structure for the downloaded audio.
     downloaded_ayah_audio_dir = file_utils.prepare_ayah_directory(
-      download_audio_dir, surah_num, ayah_num)
+        download_audio_dir, surah_num, ayah_num)
 
     # Download and save the initially downloaded audio recording to the given path.
     download_recording_from_url(url, downloaded_ayah_audio_dir, use_cache)
@@ -84,13 +88,13 @@ def download_entry_audio(entry, download_audio_dir, raw_audio_dir, use_cache=Tru
     # Prepare all requisite cache directories.
     subcache_directory_names = (DATASET_CSV_CACHE, DOWNLOADED_AUDIO_CACHE, RAW_AUDIO_CACHE)
     csv_cache_dir, downloaded_audio_dir, raw_audio_dir = file_utils.prepare_cache_directories(
-                                                  subcache_directory_names,
-                                                  cache_directory,
-                                                  use_cache)
+        subcache_directory_names,
+        cache_directory,
+        use_cache)
 
     # Create path to dataset csv.
     path_to_dataset_csv = file_utils.get_path_to_dataset_csv(
-      csv_cache_dir, args.local_csv_filename)
+        csv_cache_dir, args.local_csv_filename)
 
     # If we have decided not to use the cache, download the dataset CSV.
     if not use_cache:
@@ -99,7 +103,7 @@ def download_entry_audio(entry, download_audio_dir, raw_audio_dir, use_cache=Tru
     # If csv is not in specified location, then throw an error.
     if not file_utils.does_cached_csv_dataset_exist(path_to_dataset_csv):
         logging.info('Dataset CSV not found at {}. Downloading to location...'.format(
-              path_to_dataset_csv))
+            path_to_dataset_csv))
         download_csv_dataset(args.csv_url, path_to_dataset_csv)
     else:
         logging.info("Using cached copy of dataset csv at {}.".format(path_to_dataset_csv))
@@ -112,9 +116,7 @@ def download_entry_audio(entry, download_audio_dir, raw_audio_dir, use_cache=Tru
 
     # Download the audio in the dataset.
     for entry in tqdm(labeled_entries, desc='Audio Files'):
-        if surah_to_download and entry[0] == str(surah_to_download):
-            download_entry_audio(entry, downloaded_audio_dir, raw_audio_dir, use_cache)
-        else:
+        if surah_to_download == 0 or entry[0] == str(surah_to_download):
             download_entry_audio(entry, downloaded_audio_dir, raw_audio_dir, use_cache)
 
     # If we don't want to keep the raw audio, remove it from the cache.
diff --git a/requirements.txt b/requirements.txt
index 3c49ac6..2f9ab86 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 deepspeech==0.7.1
 google-cloud-speech==1.3.2
+tensorflow
 librosa
 numpy==1.18.2
 pandas==0.25.3

From dcd679e13e9ce2895677576bc62de69478002993 Mon Sep 17 00:00:00 2001
From: Karim-53 <33978275+Karim-53@users.noreply.github.com>
Date: Mon, 7 Jun 2021 20:25:04 +0200
Subject: [PATCH 2/7] pin tf to a version

Co-Authored-By: Anas Abou Allaban <16828657+piraka9011@users.noreply.github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 2f9ab86..a2f8385 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 deepspeech==0.7.1
 google-cloud-speech==1.3.2
-tensorflow
+tensorflow==2.5.0
 librosa
 numpy==1.18.2
 pandas==0.25.3

From dd23dffd8599f8d28148f215a9d2ed3e3283e670 Mon Sep 17 00:00:00 2001
From: Karim-53 <33978275+Karim-53@users.noreply.github.com>
Date: Thu, 10 Jun 2021 18:39:04 +0200
Subject: [PATCH 3/7] refactor

---
 requirements.txt | 1 +
 utils/files.py   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index a2f8385..5517700 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 deepspeech==0.7.1
 google-cloud-speech==1.3.2
 tensorflow==2.5.0
+dill
 librosa
 numpy==1.18.2
 pandas==0.25.3
diff --git a/utils/files.py b/utils/files.py
index 5db9d2e..907493c 100644
--- a/utils/files.py
+++ b/utils/files.py
@@ -103,7 +103,7 @@ def clean_cache_directories(cache_directory: str = DEFAULT_CACHE_DIRECTORY) -> N
     # If the cache directory doesn't exist, then just make an empty one.
     if not os.path.isdir(cache_directory):
         os.makedirs(cache_directory)
-        
+
     for subdirectory in os.listdir(cache_directory):
         logging.info("Removing cache_subdirectory {}.".format(subdirectory))
         shutil.rmtree(os.path.join(cache_directory, subdirectory))

From b24704c9eca3abba98d19ebbe5f1573e0c21bee3 Mon Sep 17 00:00:00 2001
From: Karim-53 <33978275+Karim-53@users.noreply.github.com>
Date: Thu, 10 Jun 2021 18:39:08 +0200
Subject: [PATCH 4/7] Create .editorconfig

---
 .editorconfig | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..32a1e27
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,9 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+indent_size = 4
+indent_style = space
+insert_final_newline = true
+trim_trailing_whitespace = true
\ No newline at end of file

From e134020a92312a456426e7b77bd6f8d88b61ea50 Mon Sep 17 00:00:00 2001
From: Karim-53 <33978275+Karim-53@users.noreply.github.com>
Date: Thu, 10 Jun 2021 19:24:39 +0200
Subject: [PATCH 5/7] Safe delete environment.yml

---
 CONTRIBUTING.md | 60 ++++++++++++++++++++++---------------------------
 README.md       |  8 +++++--
 environment.yml | 19 ----------------
 3 files changed, 33 insertions(+), 54 deletions(-)
 delete mode 100644 environment.yml

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 884d1f8..eb6157b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,63 +1,57 @@
 # Contributing
 
-Tarteel-ML is an open-source project, which means you can help us make it better! 
-Check out the Issues tab to see open issues. 
-You're welcome to start with those issues that are tagged with `Good First Issue`, 
+Tarteel-ML is an open-source project, which means you can help us make it better!
+Check out the Issues tab to see open issues.
+You're welcome to start with those issues that are tagged with `Good First Issue`,
 tackle other issues, or create your own issues.
 
 ## Getting started
 Thank you for considering contributing to Tarteel-ML! Here are step-by-step instructions.
 
 ### Installing Dependencies
-Before starting, you will need to install a few dependencies. We use the
+
+1. Before starting, you will need to install a few dependencies. We use the
 [Anaconda Python distribution](https://www.anaconda.com/) for dependency management, and
 our instructions assume you use it to. You can download it at this
 [link](https://www.anaconda.com/download/).
 
-Once you have installed Anaconda and verified it is being used, download and `cd` into the
-Tarteel-ML repository and run the following commands to install all dependencies.
-```commandline
-conda env create -f environment.yml
-```
+2. Once you have installed Anaconda and verified it is being used, clone and `cd` into the
+Tarteel-ML repository
 
-After this, activate the `tarteel` environment.
-```commandline
-source activate tarteel-ml
-```
 
-You should now be ready to contribute to Tarteel-ML! When you are done, remember to deactivate the
-environment.
+3. We highly recommend creating a specific env for this repo by running the following commands to install all dependencies.
+   ```commandline
+   conda env create --file requirements.txt
+   ```
+
+4. After this, activate the `tarteel` environment.
+   ```commandline
+   source activate tarteel-ml
+   ```
+
+You should now be ready to contribute to Tarteel-ML! When you are done, remember to deactivate the environment.
 ```commandline
 source deactivate tarteel-ml
 ```
 
 
 ### Adding New Dependencies
-Use the `conda install` command to add any new dependencies and ensure that the environment
+Use the `pip install <library-name>` command to add any new dependencies and ensure that the environment
 resolves. Pull requests with new dependencies that break the existing environment for others will be
 rejected.
 
-After adding your new dependencies in Anaconda, add it (with the version number) in `environment.yml`
-under `dependencies`.
-
-#### What if the dependency I want to add isn't in Anaconda?
-For any dependencies not present in Anaconda, there is a way to install with `pip`, the default
-Python library manager. Use the command
-```
-which pip
-```
-ensure that your pip binary is the one installed by Anaconda (the output of the command should be
-similar to `/anaconda3/envs/tarteel/bin/pip`). You should then `pip install` the library and add it
-to the file `environment.yml` under `pip:`.
-
+After adding your new dependencies in Anaconda, add it (with the version number) in `requirements.txt`.
 
 ### Conventions
 
 #### Pull Requests
-Whenever submitting a new PR, create a new branch named using the convention `<username>/<issue>`.
-Make sure to include descriptive and clear commit messages, while also referencing any issues your
-PR addresses. Your pull request will be reviewed by the maintainers of this repository, and upon
-approval, will be merged into the master branch. 
+- Whenever submitting a new PR, create a new branch named using the convention `<username>/<issue>`.
+
+- Make sure to include descriptive and clear commit messages, while also referencing any issues your
+PR addresses.
+
+- Your pull request will be reviewed by the maintainers of this repository, and upon
+approval, will be merged into the master branch.
 
 #### Documentation
 Tarteel-ML requires that your code be well-commented and that you explain clearly what your changes
diff --git a/README.md b/README.md
index e4229a9..b829b20 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,8 @@
-# Tarteel Machine Learning 
+
+[![Platform: windows](https://img.shields.io/badge/Platform-Linux|MacOS-lightgrey)]()
+[![Python Version](https://img.shields.io/badge/python-v3.6-blue)]()
+
+# Tarteel Machine Learning
 
 This repo is designed to house code related to Tarteel machine learning related tasks. :microscope:
 
@@ -17,7 +21,7 @@ If you found this repo helpful, please keep it's contributors in your duaa :rais
 
 ## Getting Started :beginner:
 
-### Prerequisites 
+### Prerequisites
 
 We use Python 3.7 for our development.
 However, any Python above 3.6 should work.
diff --git a/environment.yml b/environment.yml
deleted file mode 100644
index 35cbc47..0000000
--- a/environment.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: tarteel-ml
-channels:
-  - conda-forge
-  - defaults
-dependencies:
-  - dill=0.2.8.2
-  - ffmpeg=4.0
-  - jupyter=1.0.0
-  - matplotlib=3.0.2
-  - numpy=1.15.4
-  - pandas=0.24.1
-  - pip=19.1
-  - python=3.6.8
-  - requests=2.21.0
-  - scikit-learn=0.20.2
-  - sox=14.4.2
-  - tensorflow=1.12.0
-  - pip:
-    - pyAudioAnalysis==0.2.5

From dbe7e30b66dba25233552934021caa7718284d21 Mon Sep 17 00:00:00 2001
From: Karim-53 <33978275+Karim-53@users.noreply.github.com>
Date: Thu, 10 Jun 2021 19:25:27 +0200
Subject: [PATCH 6/7] update and sort requirement.txt

---
 requirements.txt | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index db8a064..7ab1047 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,18 @@
+PyYAML==5.4
 deepspeech==0.7.1
+dill~=0.2.8.2
+ffmpeg==4.0
 google-cloud-speech==1.3.2
-tensorflow==2.5.0
-dill
 librosa
+matplotlib
 numpy==1.18.2
 pandas==0.25.3
+pyAudioAnalysis==0.2.5
 pydub==0.23.1
 python-Levenshtein==0.12.0
-PyYAML==5.4
 requests
 scikit-learn
 soundfile
+sox==14.4.2
+tensorflow==2.5.0
 tqdm==4.43.0
-

From fbaa13d545baa79e9c13bef0434b8e2a69141d62 Mon Sep 17 00:00:00 2001
From: Karim-53 <33978275+Karim-53@users.noreply.github.com>
Date: Thu, 10 Jun 2021 19:44:36 +0200
Subject: [PATCH 7/7] resolve env on google collab

---
 requirements.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 7ab1047..6db787f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,18 +1,18 @@
 PyYAML==5.4
 deepspeech==0.7.1
-dill~=0.2.8.2
-ffmpeg==4.0
+dill
+ffmpeg
 google-cloud-speech==1.3.2
 librosa
 matplotlib
-numpy==1.18.2
-pandas==0.25.3
+numpy
+pandas
 pyAudioAnalysis==0.2.5
 pydub==0.23.1
-python-Levenshtein==0.12.0
+python-Levenshtein~=0.12.0
 requests
 scikit-learn
 soundfile
-sox==14.4.2
+sox
 tensorflow==2.5.0
 tqdm==4.43.0