From 5fea686b91ce241708dbf13eabaf5c0d7deb99fe Mon Sep 17 00:00:00 2001
From: SuperKogito <superkogito@gmail.com>
Date: Mon, 13 Feb 2023 22:21:25 +0100
Subject: [PATCH 1/5] edit code to generate files automatically

---
 src/build_project.sh  |  7 ++++
 src/conf.py           |  3 +-
 src/generate_files.py | 91 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+), 2 deletions(-)
 create mode 100755 src/build_project.sh
 create mode 100644 src/generate_files.py

diff --git a/src/build_project.sh b/src/build_project.sh
new file mode 100755
index 0000000..05f4449
--- /dev/null
+++ b/src/build_project.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+echo "Rst and Md files generations"
+python generate_files.py      
+
+echo "Make html files"
+make html 
diff --git a/src/conf.py b/src/conf.py
index b938dcc..92137ae 100644
--- a/src/conf.py
+++ b/src/conf.py
@@ -36,7 +36,7 @@
     'sphinx.ext.coverage',
     'sphinx.ext.ifconfig',
     'sphinx.ext.viewcode',
-    'sphinx_contributors',
+    #'sphinx_contributors',
     'crate.sphinx.csv',
 ]
 
@@ -59,7 +59,6 @@
 html_theme_options = {
     "github_url": "https://github.com/superkogito/ser-datasets",
     "search_bar_text": "Search this site...",
-    "google_analytics_id": "UA-133660046-1",
 
     "navbar_start": ["navbar-logo"],
     "navbar_center": ["navbar-nav"],
diff --git a/src/generate_files.py b/src/generate_files.py
new file mode 100644
index 0000000..755a97e
--- /dev/null
+++ b/src/generate_files.py
@@ -0,0 +1,91 @@
+
+import csv
+import json 
+from tabulate import tabulate
+
+
+# load datasets
+json_file_path = "ser-datasets.json"
+with open(json_file_path, 'r') as j:
+     content = json.loads(j.read())
+
+# init keys
+keys = ["Dataset", "Year", "Content", "Emotions", "Format", "Size", "Language", "Paper", "Access", "License", "Dataset-link", "Paper-link", "License-link"]
+header = ["Dataset", "Year", "Content", "Emotions", "Format", "Size", "Language", "Paper", "Access", "License"]
+
+md_1 = """***Spoken Emotion Recognition Datasets:*** *A collection of datasets (count=42) for the purpose of emotion recognition/detection in speech.
+The table is chronologically ordered and includes a description of the content of each dataset along with the emotions included.
+The table can be browsed, sorted and searched under https://superkogito.github.io/SER-datasets/*
+"""
+
+md_2 = """## References
+
+- Swain, Monorama & Routray, Aurobinda & Kabisatpathy, Prithviraj, Databases, features and classifiers for speech emotion recognition: a review, International Journal of Speech Technology, [paper](https://www.researchgate.net/publication/322602563_Databases_features_and_classifiers_for_speech_emotion_recognition_a_review#pf19)
+- Dimitrios Ververidis and Constantine Kotropoulos, A State of the Art Review on Emotional Speech Databases, Artificial Intelligence & Information Analysis Laboratory, Department of Informatics Aristotle, University of Thessaloniki, [paper](http://poseidon.csd.auth.gr/papers/PUBLISHED/CONFERENCE/pdf/Ververidis2003b.pdf)
+- A. Pramod Reddy and V. Vijayarajan, Extraction of Emotions from Speech-A Survey, VIT University, International Journal of Applied Engineering Research, [paper](https://www.ripublication.com/ijaer17/ijaerv12n16_46.pdf)
+- Emotional Speech Databases, [document](https://link.springer.com/content/pdf/bbm%3A978-90-481-3129-7%2F1.pdf)
+- Expressive Synthetic Speech, [website](http://emosamples.syntheticspeech.de/)
+- Towards a standard set of acoustic features for the processing of emotion in speech, Technical university Munich, [document](https://asa.scitation.org/doi/pdf/10.1121/1.4739483)
+
+
+## Contribution
+
+- All contributions are welcome! If you know a dataset that belongs here (see [criteria](https://github.com/SuperKogito/SER-datasets/blob/master/CONTRIBUTING.md#criteria)) but is not listed, please feel free to add it. For more information on Contributing, please refer to [CONTRIBUTING.md](https://github.com/SuperKogito/SER-datasets/blob/master/CONTRIBUTING.md).
+
+-  If you notice a typo or a mistake, please [report this as an issue](https://github.com/SuperKogito/SER-datasets/issues/new) and help us improve the quality of this list.
+
+
+## Disclaimer
+- The mainter and the contributors try their best to keep this list up-to-date, and to only include working links (using automated verification with the help of the [urlchecker-action](https://github.com/marketplace/actions/urlchecker-action)). However, we cannot guarantee that all listed links are up-to-date. Read more in [DISCLAIMER.md](https://github.com/SuperKogito/SER-datasets/blob/master/DISCLAIMER.md).
+"""
+
+
+print(" -> Generate Markdown Text")
+def format_md_link(label, link):
+    res = "[{0}]({1})".format(label, link) if "http" in link else label
+    return res
+
+# tabulate
+table = []
+for key, values in content.items():
+    # add elements to row
+    row = [format_md_link(key, values["Dataset-link"])] 
+    row += [values[k] for k in ["Year", "Content", "Emotions", "Format", "Size", "Language"]]
+    row += [format_md_link(values["Paper"], values["Paper-link"]), values["Access"], format_md_link(values["License"], values["License-link"])] 
+    
+    # add styles and add row to table
+    row = ["<sub>{0}</sub>".format(e) for e in row] 
+    table.append(row)
+
+table = tabulate(table, keys, tablefmt="pipe")
+with open("../README.md", "w") as f:
+    f.write(md_1)
+    f.write(table)
+    f.write(md_2)
+
+
+print(" -> Generate Restructured Text")
+def format_rst_link(label, link):
+    res = "`{0} <{1}>`_".format(label, link) if "http" in link else label
+    return res
+
+# tabulate
+table = []
+for key, values in content.items():
+    # add elements to row
+    row = [format_rst_link(key, values["Dataset-link"])] 
+    row += [values[k] for k in ["Year", "Content", "Emotions", "Format", "Size", "Language"]]
+    row += [format_rst_link(values["Paper"], values["Paper-link"]), values["Access"]]
+    row += [format_rst_link(values["License"], values["License-link"])] 
+
+    # format and add row to csv
+    table.append(row) 
+    
+with open('ser-datasets.csv', 'w', encoding='UTF8', newline='') as f:
+    writer = csv.writer(f)
+
+    # write the header
+    writer.writerow(header)
+
+    # write multiple rows
+    writer.writerows(table)

From 525dffd1c0e87d3fb291173aed3f4376b4265922 Mon Sep 17 00:00:00 2001
From: SuperKogito <superkogito@gmail.com>
Date: Mon, 13 Feb 2023 22:21:40 +0100
Subject: [PATCH 2/5] add requirements file

---
 src/requirements.txt | 67 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 src/requirements.txt

diff --git a/src/requirements.txt b/src/requirements.txt
new file mode 100644
index 0000000..5182820
--- /dev/null
+++ b/src/requirements.txt
@@ -0,0 +1,67 @@
+ablog==0.10.25
+alabaster==0.7.12
+Babel==2.10.1
+beautifulsoup4==4.11.1
+bleach==6.0.0
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.12
+commonmark==0.9.1
+cryptography==39.0.1
+docutils==0.17.1
+entrypoints==0.4
+feedgen==0.9.0
+idna==3.3
+imagesize==1.3.0
+importlib-metadata==4.11.3
+invoke==1.7.0
+jeepney==0.8.0
+Jinja2==3.1.2
+keyring==23.6.0
+latexcodec==2.0.1
+lxml==4.8.0
+MarkupSafe==2.1.1
+nest-asyncio==1.5.6
+packaging==21.3
+pkginfo==1.8.3
+pybtex==0.24.0
+pybtex-docutils==1.0.1
+pycparser==2.21
+pydata-sphinx-theme==0.8.1
+Pygments==2.12.0
+pyparsing==3.0.8
+python-dateutil==2.8.2
+pytz==2022.1
+PyYAML==6.0
+pyzmq==24.0.1
+readme-renderer==35.0
+requests==2.27.1
+requests-toolbelt==0.9.1
+rich==12.5.1
+SecretStorage==3.3.2
+six==1.16.0
+snowballstemmer==2.2.0
+soupsieve==2.3.2.post1
+Sphinx==4.5.0
+sphinx-copybutton==0.5.0
+sphinx-csv-filter==0.4.0
+sphinx-panels==0.6.0
+sphinx-sitemap==2.2.0
+sphinxcontrib-applehelp==1.0.2
+sphinxcontrib-bibtex==2.4.2
+sphinxcontrib-devhelp==1.0.2
+sphinxcontrib-htmlhelp==2.0.0
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-pdfembed @ git+https://github.com/SuperKogito/sphinxcontrib-pdfembed@d75fb37f9e4a303888a61f265b568f7729826c4a
+sphinxcontrib-qthelp==1.0.3
+sphinxcontrib-serializinghtml==1.1.5
+sphinxcontrib-tikz==0.4.16
+sphinxemoji==0.2.0
+sphinxext-opengraph==0.6.3
+tabulate==0.9.0
+tornado==6.2
+tqdm==4.64.0
+urllib3==1.26.9
+watchdog==2.1.7
+webencodings==0.5.1
+zipp==3.8.0

From 2c6923caba06772da05850b502746f3554f21246 Mon Sep 17 00:00:00 2001
From: SuperKogito <superkogito@gmail.com>
Date: Mon, 13 Feb 2023 22:22:08 +0100
Subject: [PATCH 3/5] update readme and rst resources

---
 README.md            | 195 ++++++++++---------------------------------
 src/index.rst        | 101 +---------------------
 src/ser-datasets.csv |  86 +++++++++----------
 3 files changed, 88 insertions(+), 294 deletions(-)

diff --git a/README.md b/README.md
index bc5b209..5bd3f97 100644
--- a/README.md
+++ b/README.md
@@ -1,55 +1,50 @@
 ***Spoken Emotion Recognition Datasets:*** *A collection of datasets (count=42) for the purpose of emotion recognition/detection in speech.
 The table is chronologically ordered and includes a description of the content of each dataset along with the emotions included.
 The table can be browsed, sorted and searched under https://superkogito.github.io/SER-datasets/*
-
-| <sub>Dataset</sub>                                                                                | <sub>Year</sub> | <sub>Content</sub>                                                                                                                                    | <sub>Emotions</sub>                                                                                                                                                                                                                                                          | <sub>Format</sub>             | <sub>Size</sub>     | <sub>Language</sub>                                               | <sub>Paper</sub>                                                                                                                                                             | <sub>Access</sub>              | <sub>License</sub>                                                                           |
-|---------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------|---------------------|-------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|----------------------------------------------------------------------------------------------|
-| <sub>[MESD]</sub>                                                                                 | <sub>2022</sub> | <sub>864 audio files of single-word emotional utterances with Mexican cultural shaping.</sub>                                                         | <sub>6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.</sub>                                                                                                                                                             | <sub>Audio</sub>              | <sub>0,097 GB</sub>    | <sub>Spanish (Mexican)</sub>                                      | <sub>[The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning]</sub>                                                              | <sub>Open</sub>                | <sub>[CC BY 4.0]</sub>       
-|<sub>[MLEnd]</sub>                                                                                 | <sub>2021</sub> | <sub>~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from "zero" to "billion")</sub> | <sub> Intonations: neutral, bored, excited and question.</sub>                                                                                                                                                                                                               | <sub>Audio</sub>              | <sub>2.27 GB</sub>     | <sub>--</sub>                                                  | <sub>--</sub>                                                                                                                                                                | <sub>Open</sub>                | <sub>Unknown</sub>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-|<sub>[ASVP-ESD]</sub>                                                                              | <sub>2021</sub> | <sub>~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.</sub>                                           | <sub>12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity. </sub>                                                                                     | <sub>Audio</sub>              | <sub> 2 GB </sub>   | <sub>Chinese, English, French, Russian and others<sub>            | <sub>--<sub>                                                                                                                                                                 | <sub> Open access<sub>         | <sub>Unknown</sub>                                                                           |
-| <sub>[ESD]</sub>                                                                                  | <sub>2021</sub>  | <sub>29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.</sub>                                                   | <sub>5 emotions: angry, happy, neutral, sad, and surprise.</sub>                                                                                                                                                                                                             | <sub>Audio,  Text</sub>       | <sub> 2.4 GB (zip) </sub> | <sub> Chinese, English </sub>                               | <sub>[Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset]</sub>                                                               | <sub>Open access</sub>         | <sub>Available under an Academic License </sub>                                              |
-| <sub>[MuSe-CAR]</sub>                                                                             | <sub>2021</sub> | <sub>40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).</sub>                                        | <sub>continuous emotion dimensions characterized using valence, arousal, and trustworthiness.</sub>                                                                                                                                                                          | <sub>Audio, Video, Text</sub> | <sub> 15 GB </sub>  | <sub> English </sub>                                              | <sub>[The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements]</sub>                                                      | <sub>Restricted access</sub>   | <sub>Available under an Academic License & Commercial License  </sub>                        |
-| <sub>[MSP-Podcast corpus]</sub>                                                                   | <sub>2020</sub> | <sub>100 hours by over 100 speakers (see db link for details).</sub>                                                                                  | <sub>This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other). </sub>                                  | <sub>Audio</sub>              | <sub> -- </sub>     | <sub> -- </sub>                                                   | <sub>[The MSP-Conversation Corpus]</sub>                                                                                                                                     | <sub>Restricted access</sub>   | <sub>Available under an Academic License & Commercial License  </sub>                        |
-| <sub>[emotiontts open db]</sub>                                                                   | <sub>2020</sub> | <sub>Recordings and their associated transcriptions by a diverse group of speakers.</sub>                                                             | <sub>4 emotions: general, joy, anger, and sadness.</sub>                                                                                                                                                                                                                     | <sub>Audio, Text</sub>        | <sub>--</sub>       | <sub>Korean</sub>                                                 | <sub>--</sub>                                                                                                                                                                | <sub>Partial open access</sub> | <sub>[CC BY-NC-SA 4.0] </sub>                                                                |
-| <sub>[URDU-Dataset]</sub>                                                                         | <sub>2020</sub> | <sub>400 utterances by 38 speakers (27 male and 11 female).</sub>                                                                                     | <sub>4 emotions: angry, happy, neutral, and sad.</sub>                                                                                                                                                                                                                       | <sub>Audio</sub>              | <sub>~72.1 MB</sub> | <sub>Urdu</sub>                                                   | <sub>[Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages]</sub>                                                                                            | <sub>Open access</sub>         | <sub>None specified                         </sub>                                           |
-| <sub>[BAVED]</sub>                                                                                | <sub>2020</sub> | <sub>1935 recording by 61 speakers (45 male and 16 female).</sub>                                                                                     | <sub>3 levels of emotion.</sub>                                                                                                                                                                                                                                              | <sub>Audio</sub>              | <sub>~195 MB</sub>  | <sub>Arabic</sub>                                                 | <sub>--</sub>                                                                                                                                                                | <sub>Open access</sub>         | <sub>None specified                         </sub>                                           |
-| <sub>[VIVAE]</sub>                                                                                | <sub>2020</sub> | <sub>non-speech, 1085 audio file by ~12 speakers.</sub>                                                                                               | <sub>non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).</sub>                                                                                                                         | <sub>Audio</sub>              | <sub>--</sub>       | <sub>--</sub>                                                     | <sub>--</sub>                                                                                                                                                                | <sub>Restricted access</sub>   | <sub>[CC BY-NC-SA 4.0] </sub>                                                                |
-| <sub>[SEWA]</sub>                                                                                 | <sub>2019</sub> | <sub> more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.</sub>                               | <sub> emotions are characterized using valence and arousal.</sub>                                                                                                                                                                                                            | <sub>Audio, Video</sub>       | <sub>--</sub>       | <sub>Chinese, English, German, Greek, Hungarian and Serbian</sub> | <sub>[SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild]</sub>                                                                            | <sub>Restricted access</sub>   | <sub>[SEWA EULA]  </sub>                                                                     |
-| <sub>[MELD]</sub>                                                                                 | <sub>2019</sub> | <sub>1400 dialogues and 14000 utterances from Friends TV series  by multiple speakers.</sub>                                                          | <sub>7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear.  MELD also has sentiment (positive, negative and neutral) annotation  for each utterance.</sub>                                                                                                   | <sub>Audio, Video, Text</sub> | <sub>~10.1 GB</sub> | <sub>English</sub>                                                | <sub>[MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations]</sub>                                                                                 | <sub>Open access</sub>         | <sub>[MELD: GPL-3.0 License]  </sub>                                                         |
-| <sub>[ShEMO]</sub>                                                                                | <sub>2019</sub> | <sub>3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers. </sub>  | <sub>6 emotions: anger, fear, happiness, sadness, neutral and surprise. </sub>                                                                                                                                                                                               | <sub>Audio</sub>              | <sub>~1014 MB</sub> | <sub>Persian</sub>                                                | <sub>[ShEMO: a large-scale validated database for Persian speech emotion detection]</sub>                                                                                    | <sub>Open access</sub>         | <sub>None sepcified                                 </sub>                                   |
-| <sub>[DEMoS]</sub>                                                                                | <sub>2019</sub> | <sub>9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males). </sub>                                             | <sub>7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt. </sub>                                                                                                                                                               | <sub>Audio</sub>              | <sub>--</sub>       | <sub>Italian</sub>                                                | <sub>[DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception]</sub>                                                                | <sub>Restricted access</sub>   | <sub>EULA: End User License Agreement</sub>                                                  |
-| <sub>[AESDD]</sub>                                                                                | <sub>2018</sub> | <sub>around 500 utterances by a diverse group of actors (over 5 actors) simlating various emotions.</sub>                                             | <sub> 5 emotions: anger, disgust, fear, happiness, and sadness.</sub>                                                                                                                                                                                                        | <sub>Audio</sub>              | <sub>~392 MB</sub>  | <sub>Greek</sub>                                                  | <sub>[Speech Emotion Recognition for Performance Interaction]</sub>                                                                                                          | <sub>Open access</sub>         | <sub>None specified                       </sub>                                             |
-| <sub>[Emov-DB]</sub>                                                                              | <sub>2018</sub> | <sub>Recordings for 4 speakers- 2 males and 2 females.</sub>                                                                                          | <sub>The emotional styles are neutral, sleepiness, anger, disgust and amused.</sub>                                                                                                                                                                                          | <sub>Audio</sub>              | <sub>5.88 GB</sub>  | <sub>English</sub>                                                | <sub>[The emotional voices database: Towards controlling the emotion dimension in voice generation systems]</sub>                                                            | <sub>Open access</sub>         | <sub>None specified                       </sub>                                             |
-| <sub>[RAVDESS]</sub>                                                                              | <sub>2018</sub> | <sub>7356 recordings by 24 actors.</sub>                                                                                                              | <sub>7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust</sub>                                                                                                                                                                                               | <sub>Audio, Video</sub>       | <sub>~24.8 GB</sub> | <sub>English</sub>                                                | <sub>[The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English]</sub>   | <sub>Open access</sub>         | <sub>[CC BY-NC-SA 4.0] </sub>                                                                |
-| <sub>[JL corpus]</sub>                                                                            | <sub>2018</sub> | <sub>2400 recording of 240 sentences by 4 actors (2 males and 2 females).</sub>                                                                       | <sub>5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.</sub>                                                                                                                               | <sub>Audio</sub>              | <sub> -- </sub>     | <sub>English</sub>                                                | <sub>[An Open Source Emotional Speech Corpus for Human Robot Interaction Applications]</sub>                                                                                 | <sub>Open access</sub>         | <sub>[CC0 1.0]   </sub>                                                                      |
-| <sub>[CaFE]</sub>                                                                                 | <sub>2018</sub> | <sub>6 different sentences by 12 speakers (6 fmelaes + 6 males).</sub>                                                                                | <sub>7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.</sub>                                                                                                                                          | <sub>Audio</sub>              | <sub>~2 GB</sub>    | <sub>French (Canadian)</sub>                                      | <sub>--</sub>                                                                                                                                                                | <sub>Open access</sub>         | <sub>[CC BY-NC-SA 4.0]   </sub>                                                              |
-| <sub>[EmoFilm]</sub>                                                                              | <sub>2018</sub> | <sub>1115 audio instances sentences extracted from various films.</sub>                                                                               | <sub>5 emotions: anger, contempt, happiness, fear, and sadness.</sub>                                                                                                                                                                                                        | <sub>Audio</sub>              | <sub>--</sub>       | <sub>English, Italian & Spanish</sub>                             | <sub>[Categorical vs Dimensional Perception of Italian Emotional Speech]</sub>                                                                                               | <sub>Restricted access</sub>   | <sub>EULA: End User License Agreement   </sub>                                               |
-| <sub>[ANAD]</sub>                                                                                 | <sub>2018</sub> | <sub>1384 recording by multiple speakers.</sub>                                                                                                       | <sub>3 emotions: angry, happy, surprised.</sub>                                                                                                                                                                                                                              | <sub>Audio</sub>              | <sub>~2 GB</sub>    | <sub>Arabic</sub>                                                 | <sub>[Arabic Natural Audio Dataset] </sub>                                                                                                                                   | <sub>Open access</sub>         | <sub>[CC BY-NC-SA 4.0] </sub>                                                                |
-| <sub>[EmoSynth]</sub>                                                                             | <sub>2018</sub> | <sub>144 audio file labelled by 40 listeners.</sub>                                                                                                   | <sub>Emotion (no speech) defined in regard of valence and arousal.</sub>                                                                                                                                                                                                     | <sub>Audio</sub>              | <sub>103.4 MB</sub> | <sub>--</sub>                                                     | <sub>[The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results] </sub>                                                                            | <sub>Open access</sub>         | <sub>[CC BY 4.0] </sub>                                                                      |
-| <sub>[CMU-MOSEI]</sub>                                                                            | <sub>2018</sub> | <sub>65 hours of annotated video from more than 1000 speakers and 250 topics.</sub>                                                                   | <sub>6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.</sub>                                                                                                                                                                                     | <sub>Audio, Video</sub>       | <sub>--</sub>       | <sub>English</sub>                                                | <sub>[Multi-attention Recurrent Network for Human Communication Comprehension] </sub>                                                                                        | <sub>Open access</sub>         | <sub>[CMU-MOSEI License] </sub>                                                              |
-| <sub>[VERBO]</sub>                                                                                | <sub>2018</sub> | <sub>14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings. </sub>                                             | <sub>7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness </sub>                                                                                                                                                               | <sub>Audio</sub>              | <sub>--</sub>       | <sub>Portuguese</sub>                                                | <sub>[VERBO: Voice Emotion Recognition dataBase in Portuguese Language]</sub>                                                                | <sub>Restricted access</sub>   | <sub>Available for research purposes only</sub> 
-| <sub>[CMU-MOSI]</sub>                                                                             | <sub>2017</sub> | <sub>2199 opinion utterances with annotated sentiment.</sub>                                                                                          | <sub>Sentiment annotated between very negative to very positive in seven Likert steps.</sub>                                                                                                                                                                                 | <sub>Audio, Video</sub>       | <sub>--</sub>       | <sub>English</sub>                                                | <sub>[Multi-attention Recurrent Network for Human Communication Comprehension] </sub>                                                                                        | <sub>Open access</sub>         | <sub>[CMU-MOSI License] </sub>                                                               |
-| <sub>[MSP-IMPROV]</sub>                                                                           | <sub>2017</sub> | <sub>20 sentences by 12 actors.</sub>                                                                                                                 | <sub>4 emotions: angry, sad, happy, neutral, other, without agreement</sub>                                                                                                                                                                                                  | <sub>Audio, Video</sub>       | <sub> -- </sub>     | <sub>English</sub>                                                | <sub>[MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception]</sub>                                                                                  | <sub>Restricted access</sub>   | <sub>Available under an Academic License & Commercial License        </sub>                  |
-| <sub>[CREMA-D]</sub>                                                                              | <sub>2017</sub> | <sub>7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).</sub>                                                                   | <sub>6 emotions: angry, disgusted, fearful, happy, neutral, and sad</sub>                                                                                                                                                                                                    | <sub>Audio, Video</sub>       | <sub> -- </sub>     | <sub>English</sub>                                                | <sub>[CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset]</sub>                                                                                                      | <sub>Open access</sub>         | <sub>Available under the [Open Database License & Database Content License]  </sub>          |
-| <sub>[Example emotion videos used in investigation of emotion perception in schizophrenia]</sub>  | <sub>2017</sub> | <sub>6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.</sub>                                   | <sub>3 emotions: angry, happy and neutral.</sub>                                                                                                                                                                                                                             | <sub>Audio, Video</sub>       | <sub>~63 MB</sub>   | <sub>English</sub>                                                | <sub>--</sub>                                                                                                                                                                | <sub>Open access</sub>         | <sub>Available under the [Permitted Non-commercial Re-use with Acknowledgment]      </sub>   |
-| <sub>[EMOVO]</sub>                                                                                | <sub>2014</sub> | <sub>6 actors  who  played  14  sentences.</sub>                                                                                                      | <sub>6 emotions: disgust, fear, anger, joy, surprise, sadness.</sub>                                                                                                                                                                                                         | <sub>Audio</sub>              | <sub> ~355 MB</sub> | <sub>Italian</sub>                                                | <sub>[EMOVO Corpus: an Italian Emotional Speech Database]</sub>                                                                                                              | <sub>Open access</sub>         | <sub>None specified                    </sub>                                                |
-| <sub>[RECOLA]</sub>                                                                               | <sub>2013</sub> | <sub>3.8 hours of recordings by 46 participants.</sub>                                                                                                | <sub>negative and positive sentiment (valence and arousal).</sub>                                                                                                                                                                                                            | <sub>Audio, Video</sub>       | <sub> -- </sub>     | <sub>--</sub>                                                     | <sub>[Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions]</sub>                                                                     | <sub>Restricted access</sub>   | <sub>Available under an Academic License & Commercial License                       </sub>   |
-| <sub>[GEMEP corpus]</sub>                                                                         | <sub>2012</sub> | <sub>Videos10 actors portraying 10 states.</sub>                                                                                                      | <sub>12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage),  fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.</sub> | <sub>Audio, Video</sub>       | <sub> -- </sub>     | <sub>French</sub>                                                 | <sub>[Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception]</sub>                                                             | <sub>Restricted access</sub>   | <sub>None specified                       </sub>                                             |
-| <sub>[OGVC]</sub>                                                                                 | <sub>2012</sub> | <sub> 9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).</sub>                                 | <sub>9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.</sub>                                                                                                                                                 | <sub>Audio</sub>              | <sub>--</sub>       | <sub>Japanese</sub>                                               | <sub>[Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment]</sub>                                               | <sub>Restricted access</sub>   | <sub>None specified  </sub>                                                                  |
-| <sub>[LEGO corpus]</sub>                                                                          | <sub>2012</sub> | <sub>347 dialogs with 9,083 system-user exchanges.</sub>                                                                                              | <sub>Emotions classified as garbage, non-angry, slightly angry and very angry.</sub>                                                                                                                                                                                         | <sub>Audio</sub>              | <sub>1.1 GB</sub>   | <sub>--</sub>                                                     | <sub>[A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System]</sub>                                                                   | <sub>Open access</sub>         | <sub>License available with the data. Free of charges for research purposes only.  </sub>    |
-| <sub>[SEMAINE]</sub>                                                                              | <sub>2012</sub> | <sub>95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.</sub>                | <sub>5 FeelTrace annotations: activation, valence, dominance, power, intensity</sub>                                                                                                                                                                                         | <sub>Audio, Video, Text</sub> | <sub>104 GB</sub>   | <sub>English</sub>                                                | <sub>[The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent]</sub>                                    | <sub>Restricted access<sub>    | <sub>Academic EULA</sub>                                                                     |
-| <sub>[SAVEE]</sub>                                                                                | <sub>2011</sub> | <sub>480 British English utterances by 4 males actors.</sub>                                                                                          | <sub>7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.</sub>                                                                                                                                                                                       | <sub>Audio, Video</sub>       | <sub>--</sub>       | <sub>English (British)</sub>                                      | <sub>[Multimodal Emotion Recognition]</sub>                                                                                                                                  | <sub>Restrictted access</sub>  | <sub>Free of charges for research purposes only.  </sub>                                     |
-| <sub>[TESS]</sub>                                                                                 | <sub>2010</sub> | <sub>2800 recording by 2 actresses.</sub>                                                                                                             | <sub>7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.</sub>                                                                                                                                                                             | <sub>Audio</sub>              | <sub> -- </sub>     | <sub>English</sub>                                                | <sub>[BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET]</sub>                                                                                                      | <sub>Open access</sub>         | <sub>[CC BY-NC-ND 4.0]    </sub>                                                             |
-| <sub>[EEKK]</sub>                                                                                 | <sub>2007</sub> | <sub>26 text passage read by 10 speakers.</sub>                                                                                                       | <sub>4 main emotions: joy, sadness, anger and neutral.</sub>                                                                                                                                                                                                                 | <sub>--</sub>                 | <sub>~352 MB</sub>  | <sub>Estonian</sub>                                               | <sub>[Estonian Emotional Speech Corpus]</sub>                                                                                                                                | <sub>Open access</sub>         | <sub>[CC-BY license]     </sub>                                                              |
-| <sub>[IEMOCAP]</sub>                                                                              | <sub>2007</sub> | <sub>12 hours of audiovisual data by 10 actors.</sub>                                                                                                 | <sub>5 emotions: happiness, anger, sadness, frustration and neutral.</sub>                                                                                                                                                                                                   | <sub>--</sub>                 | <sub>--</sub>       | <sub>English</sub>                                                | <sub>[IEMOCAP: Interactive emotional dyadic motion capture database]</sub>                                                                                                   | <sub>Restricted access</sub>   | <sub>[IEMOCAP license]     </sub>                                                            |
-| <sub>[Keio-ESD]</sub>                                                                             | <sub>2006</sub> | <sub>A set of human speech with vocal emotion spoken by a Japanese male speaker.</sub>                                                                | <sub>47 emotions including angry, joyful, disgusting, downgrading, funny,  worried, gentle, relief, indignation, shameful, etc.</sub>                                                                                                                                        | <sub>Audio</sub>              | <sub> -- </sub>     | <sub>Japanese</sub>                                               | <sub>[EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY]</sub>                                                                                                | <sub>Restricted access</sub>   | <sub>Available for research purposes only   </sub>                                           |
-| <sub>[EMO-DB]</sub>                                                                               | <sub>2005</sub> | <sub>800 recording spoken by 10 actors (5 males and 5 females).</sub>                                                                                 | <sub>7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.</sub>                                                                                                                                                                                           | <sub>Audio</sub>              | <sub> -- </sub>     | <sub>German</sub>                                                 | <sub>[A Database of German Emotional Speech]</sub>                                                                                                                           | <sub>Open access</sub>         | <sub>None specified     </sub>                                                               |
-| <sub>[eNTERFACE05]</sub>                                                                          | <sub>2005</sub> | <sub>Videos by 42 subjects, coming from 14 different nationalities.</sub>                                                                             | <sub>6 emotions: anger, fear, surprise, happiness, sadness and disgust.</sub>                                                                                                                                                                                                | <sub>Audio, Video</sub>       | <sub>~0.8 GB</sub>  | <sub>German</sub>                                                 | <sub>[The eNTERFACE’05 Audio-Visual Emotion Database]</sub>                                                                                                                  | <sub>Open access</sub>         | <sub>Free of charges for research purposes only </sub>                                       |
-| <sub>[DES]</sub>                                                                                  | <sub>2002</sub> | <sub>4 speakers (2 males and 2 females).</sub>                                                                                                        | <sub>5 emotions: neutral,  surprise,  happiness,  sadness  and  anger</sub>                                                                                                                                                                                                  | <sub> -- </sub>               | <sub> -- </sub>     | <sub>Danish</sub>                                                 | <sub>[Documentation of the Danish Emotional Speech Database]</sub>                                                                                                           | <sub> -- </sub>                |  <sub> -- </sub>                                                                             |
-
-
-
-## References
+| Dataset                                                                                                                                           | Year            | Content                                                                                                                                               | Emotions                                                                                                                                                                                                                                                                     | Format                        | Size                    | Language                                                          | Paper                                                                                                                                                                                                                                                                                                                                                     | Access                    | License                                                                                                                                   |
+|:--------------------------------------------------------------------------------------------------------------------------------------------------|:----------------|:------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------|:------------------------|:------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------|:------------------------------------------------------------------------------------------------------------------------------------------|
+| <sub>[MESD](https://data.mendeley.com/datasets/cy34mh68j9/5)</sub>                                                                                | <sub>2022</sub> | <sub>864 audio files of single-word emotional utterances with Mexican cultural shaping.</sub>                                                         | <sub>6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.</sub>                                                                                                                                                             | <sub>Audio</sub>              | <sub>0,097 GB</sub>     | <sub>Spanish (Mexican)</sub>                                      | <sub>[The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning](https://pubmed.ncbi.nlm.nih.gov/34891601/)</sub>                                                                                                                                                                                                | <sub>Open</sub>           | <sub>[CC BY 4.0](https://creativecommons.org/licenses/by/4.0/)</sub>                                                                      |
+| <sub>[MLEnd](https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals)</sub>                                                            | <sub>2021</sub> | <sub>~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from "zero" to "billion")</sub> | <sub>Intonations: neutral, bored, excited and question</sub>                                                                                                                                                                                                                 | <sub>Audio</sub>              | <sub>2.27 GB</sub>      | <sub>--</sub>                                                     | <sub>--</sub>                                                                                                                                                                                                                                                                                                                                             | <sub>Open</sub>           | <sub>Unknown</sub>                                                                                                                        |
+| <sub>[ASVP-ESD](https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances)</sub>                                  | <sub>2021</sub> | <sub>~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.</sub>                                           | <sub>12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity.</sub>                                                                                      | <sub>Audio</sub>              | <sub>2 GB</sub>         | <sub>Chinese, English, French, Russian and others</sub>           | <sub>--</sub>                                                                                                                                                                                                                                                                                                                                             | <sub>Open</sub>           | <sub>Unknown</sub>                                                                                                                        |
+| <sub>[ESD](https://hltsingapore.github.io/ESD/)</sub>                                                                                             | <sub>2021</sub> | <sub>29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.</sub>                                                    | <sub>5 emotions: angry, happy, neutral, sad, and surprise.</sub>                                                                                                                                                                                                             | <sub>Audio,  Text</sub>       | <sub>2.4 GB (zip)</sub> | <sub>Chinese, English</sub>                                       | <sub>[Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset](https://arxiv.org/pdf/2010.14794.pdf)</sub>                                                                                                                                                                                                      | <sub>Open</sub>           | <sub>Academic License</sub>                                                                                                               |
+| <sub>[MuSe-CAR](https://zenodo.org/record/4134758)</sub>                                                                                          | <sub>2021</sub> | <sub>40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).</sub>                                        | <sub>continuous emotion dimensions characterized using valence, arousal, and trustworthiness.</sub>                                                                                                                                                                          | <sub>Audio, Video, Text</sub> | <sub>15 GB</sub>        | <sub>English</sub>                                                | <sub>[The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements](https://arxiv.org/pdf/2101.06053.pdf)</sub>                                                                                                                                                                                             | <sub>Restricted</sub>     | <sub>Academic License & Commercial License</sub>                                                                                          |
+| <sub>[MSP-Podcast corpus](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html)</sub>                                          | <sub>2020</sub> | <sub>100 hours by over 100 speakers (see db link for details).</sub>                                                                                  | <sub>This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other).</sub>                                   | <sub>Audio</sub>              | <sub>--</sub>           | <sub>--</sub>                                                     | <sub>[The MSP-Conversation Corpus](http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684)</sub>                                                                                                                                                                                                                              | <sub>Restricted</sub>     | <sub>Academic License & Commercial License</sub>                                                                                          |
+| <sub>[emotiontts open db](https://github.com/emotiontts/emotiontts_open_db)</sub>                                                                 | <sub>2020</sub> | <sub>Recordings and their associated transcriptions by a diverse group of speakers.</sub>                                                             | <sub>4 emotions: general, joy, anger, and sadness.</sub>                                                                                                                                                                                                                     | <sub>Audio, Text</sub>        | <sub>--</sub>           | <sub>Korean</sub>                                                 | <sub>--</sub>                                                                                                                                                                                                                                                                                                                                             | <sub>Partially open</sub> | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub>                                                          |
+| <sub>[URDU-Dataset](https://github.com/siddiquelatif/urdu-dataset)</sub>                                                                          | <sub>2020</sub> | <sub>400 utterances by 38 speakers (27 male and 11 female).</sub>                                                                                     | <sub>4 emotions: angry, happy, neutral, and sad.</sub>                                                                                                                                                                                                                       | <sub>Audio</sub>              | <sub>0.072 GB</sub>     | <sub>Urdu</sub>                                                   | <sub>[Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages](https://arxiv.org/pdf/1812.10411.pdf)</sub>                                                                                                                                                                                                                                   | <sub>Open</sub>           | <sub>--</sub>                                                                                                                             |
+| <sub>[BAVED](https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset)</sub>                                                             | <sub>2020</sub> | <sub>1935 recording by 61 speakers (45 male and 16 female).</sub>                                                                                     | <sub>3 levels of emotion.</sub>                                                                                                                                                                                                                                              | <sub>Audio</sub>              | <sub>0.195 GB</sub>     | <sub>Arabic</sub>                                                 | <sub>--</sub>                                                                                                                                                                                                                                                                                                                                             | <sub>Open</sub>           | <sub>--</sub>                                                                                                                             |
+| <sub>[VIVAE](https://zenodo.org/record/4066235)</sub>                                                                                             | <sub>2020</sub> | <sub>non-speech, 1085 audio file by 12 speakers.</sub>                                                                                                | <sub>non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).</sub>                                                                                                                         | <sub>Audio</sub>              | <sub>--</sub>           | <sub>--</sub>                                                     | <sub>--</sub>                                                                                                                                                                                                                                                                                                                                             | <sub>Restricted</sub>     | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub>                                                          |
+| <sub>[SEWA](https://db.sewaproject.eu/)</sub>                                                                                                     | <sub>2019</sub> | <sub>more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.</sub>                                | <sub>emotions are characterized using valence and arousal.</sub>                                                                                                                                                                                                             | <sub>Audio, Video</sub>       | <sub>--</sub>           | <sub>Chinese, English, German, Greek, Hungarian and Serbian</sub> | <sub>[SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild](https://arxiv.org/pdf/1901.02839.pdf)</sub>                                                                                                                                                                                                                   | <sub>Restricted</sub>     | <sub>[SEWA EULA](https://db.sewaproject.eu/media/doc/eula.pdf)</sub>                                                                      |
+| <sub>[MELD](https://affective-meld.github.io/)</sub>                                                                                              | <sub>2019</sub> | <sub>1400 dialogues and 14000 utterances from Friends TV series  by multiple speakers.</sub>                                                          | <sub>7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear.  MELD also has sentiment (positive, negative and neutral) annotation  for each utterance.</sub>                                                                                                   | <sub>Audio, Video, Text</sub> | <sub>10.1 GB</sub>      | <sub>English</sub>                                                | <sub>[MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations](https://arxiv.org/pdf/1810.02508.pdf)</sub>                                                                                                                                                                                                                        | <sub>Open</sub>           | <sub>[MELD: GPL-3.0 License](https://github.com/declare-lab/MELD/blob/master/LICENSE)</sub>                                               |
+| <sub>[ShEMO](https://github.com/mansourehk/ShEMO)</sub>                                                                                           | <sub>2019</sub> | <sub>3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers.</sub>   | <sub>6 emotions: anger, fear, happiness, sadness, neutral and surprise.</sub>                                                                                                                                                                                                | <sub>Audio</sub>              | <sub>0.101 GB</sub>     | <sub>Persian</sub>                                                | <sub>[ShEMO: a large-scale validated database for Persian speech emotion detection](https://link.springer.com/article/10.1007/s10579-018-9427-x)</sub>                                                                                                                                                                                                    | <sub>Open</sub>           | <sub>--</sub>                                                                                                                             |
+| <sub>[DEMoS](https://zenodo.org/record/2544829)</sub>                                                                                             | <sub>2019</sub> | <sub>9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males).</sub>                                              | <sub>7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt.</sub>                                                                                                                                                                | <sub>Audio</sub>              | <sub>--</sub>           | <sub>Italian</sub>                                                | <sub>[DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception](https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D)</sub> | <sub>Restricted</sub>     | <sub>EULA: End User License Agreement</sub>                                                                                               |
+| <sub>[AESDD](http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/)</sub>                                                             | <sub>2018</sub> | <sub>around 500 utterances by a diverse group of actors (over 5 actors) siumlating various emotions.</sub>                                            | <sub>5 emotions: anger, disgust, fear, happiness, and sadness.</sub>                                                                                                                                                                                                         | <sub>Audio</sub>              | <sub>0.392 GB</sub>     | <sub>Greek</sub>                                                  | <sub>[Speech Emotion Recognition for Performance Interaction](https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction)</sub>                                                                                                                                                                            | <sub>Open</sub>           | <sub>--</sub>                                                                                                                             |
+| <sub>[Emov-DB](https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K)</sub>                                                                 | <sub>2018</sub> | <sub>Recordings for 4 speakers- 2 males and 2 females.</sub>                                                                                          | <sub>The emotional styles are neutral, sleepiness, anger, disgust and amused.</sub>                                                                                                                                                                                          | <sub>Audio</sub>              | <sub>5.88 GB</sub>      | <sub>English</sub>                                                | <sub>[The emotional voices database: Towards controlling the emotion dimension in voice generation systems](https://arxiv.org/pdf/1806.09514.pdf)</sub>                                                                                                                                                                                                   | <sub>Open</sub>           | <sub>--</sub>                                                                                                                             |
+| <sub>[RAVDESS](https://zenodo.org/record/1188976#.XrC7a5NKjOR)</sub>                                                                              | <sub>2018</sub> | <sub>7356 recordings by 24 actors.</sub>                                                                                                              | <sub>7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust</sub>                                                                                                                                                                                               | <sub>Audio, Video</sub>       | <sub>24.8 GB</sub>      | <sub>English</sub>                                                | <sub>[The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391)</sub>                                                                                                     | <sub>Open</sub>           | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub>                                                          |
+| <sub>[JL corpus](https://www.kaggle.com/tli725/jl-corpus)</sub>                                                                                   | <sub>2018</sub> | <sub>2400 recording of 240 sentences by 4 actors (2 males and 2 females).</sub>                                                                       | <sub>5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.</sub>                                                                                                                               | <sub>Audio</sub>              | <sub>--</sub>           | <sub>English</sub>                                                | <sub>[An Open Source Emotional Speech Corpus for Human Robot Interaction Applications](https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf)</sub>                                                                                                                                                                                          | <sub>Open</sub>           | <sub>[CC0 1.0](https://creativecommons.org/publicdomain/zero/1.0/)</sub>                                                                  |
+| <sub>[CaFE](https://zenodo.org/record/1478765)</sub>                                                                                              | <sub>2018</sub> | <sub>6 different sentences by 12 speakers (6 fmelaes + 6 males).</sub>                                                                                | <sub>7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.</sub>                                                                                                                                          | <sub>Audio</sub>              | <sub>2 GB</sub>         | <sub>French (Canadian)</sub>                                      | <sub>--</sub>                                                                                                                                                                                                                                                                                                                                             | <sub>Open</sub>           | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub>                                                          |
+| <sub>[EmoFilm](https://zenodo.org/record/1326428)</sub>                                                                                           | <sub>2018</sub> | <sub>1115 audio instances sentences extracted from various films.</sub>                                                                               | <sub>5 emotions: anger, contempt, happiness, fear, and sadness.</sub>                                                                                                                                                                                                        | <sub>Audio</sub>              | <sub>--</sub>           | <sub>English, Italian & Spanish</sub>                             | <sub>[Categorical vs Dimensional Perception of Italian Emotional Speech](https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf)</sub>                                                                                                                                                                                            | <sub>Restricted</sub>     | <sub>EULA: End User License Agreement</sub>                                                                                               |
+| <sub>[ANAD](https://www.kaggle.com/suso172/arabic-natural-audio-dataset)</sub>                                                                    | <sub>2018</sub> | <sub>1384 recording by multiple speakers.</sub>                                                                                                       | <sub>3 emotions: angry, happy, surprised.</sub>                                                                                                                                                                                                                              | <sub>Audio</sub>              | <sub>2 GB</sub>         | <sub>Arabic</sub>                                                 | <sub>[Arabic Natural Audio Dataset](https://data.mendeley.com/datasets/xm232yxf7t/1)</sub>                                                                                                                                                                                                                                                                | <sub>Open</sub>           | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub>                                                          |
+| <sub>[EmoSynth](https://zenodo.org/record/3727593)</sub>                                                                                          | <sub>2018</sub> | <sub>144 audio file labelled by 40 listeners.</sub>                                                                                                   | <sub>Emotion (no speech) defined in regard of valence and arousal.</sub>                                                                                                                                                                                                     | <sub>Audio</sub>              | <sub>0.1034 GB</sub>    | <sub>--</sub>                                                     | <sub>[The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results](https://dl.acm.org/doi/10.1145/3243274.3243277)</sub>                                                                                                                                                                                                          | <sub>Open</sub>           | <sub>[CC BY 4.0](https://creativecommons.org/licenses/by/4.0/)</sub>                                                                      |
+| <sub>[CMU-MOSEI](https://www.amir-zadeh.com/datasets)</sub>                                                                                       | <sub>2018</sub> | <sub>65 hours of annotated video from more than 1000 speakers and 250 topics.</sub>                                                                   | <sub>6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.</sub>                                                                                                                                                                                     | <sub>Audio, Video</sub>       | <sub>--</sub>           | <sub>English</sub>                                                | <sub>[Multi-attention Recurrent Network for Human Communication Comprehension](https://arxiv.org/pdf/1802.00923.pdf)</sub>                                                                                                                                                                                                                                | <sub>Open</sub>           | <sub>[CMU-MOSEI License](https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt)</sub>                                      |
+| <sub>[VERBO](https://sites.google.com/view/verbodatabase/home)</sub>                                                                              | <sub>2018</sub> | <sub>14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings.</sub>                                                    | <sub>7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness</sub>                                                                                                                                                                                           | <sub>Audio</sub>              | <sub>--</sub>           | <sub>Portuguese</sub>                                             | <sub>[VERBO: Voice Emotion Recognition dataBase in Portuguese Language](https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf)</sub>                                                                                                                                                                                                                         | <sub>Restricted</sub>     | <sub>Available for research purposes only</sub>                                                                                           |
+| <sub>[CMU-MOSI](https://www.amir-zadeh.com/datasets)</sub>                                                                                        | <sub>2017</sub> | <sub>2199 opinion utterances with annotated sentiment.</sub>                                                                                          | <sub>Sentiment annotated between very negative to very positive in seven Likert steps.</sub>                                                                                                                                                                                 | <sub>Audio, Video</sub>       | <sub>--</sub>           | <sub>English</sub>                                                | <sub>[Multi-attention Recurrent Network for Human Communication Comprehension](https://arxiv.org/pdf/1802.00923.pdf)</sub>                                                                                                                                                                                                                                | <sub>Open</sub>           | <sub>[CMU-MOSI License](https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt)</sub>                                       |
+| <sub>[MSP-IMPROV](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html)</sub>                                                   | <sub>2017</sub> | <sub>20 sentences by 12 actors.</sub>                                                                                                                 | <sub>4 emotions: angry, sad, happy, neutral, other, without agreement</sub>                                                                                                                                                                                                  | <sub>Audio, Video</sub>       | <sub>--</sub>           | <sub>English</sub>                                                | <sub>[MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception](https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf)</sub>                                                                                                                                                                           | <sub>Restricted</sub>     | <sub>Academic License & Commercial License</sub>                                                                                          |
+| <sub>[CREMA-D](https://github.com/CheyneyComputerScience/CREMA-D)</sub>                                                                           | <sub>2017</sub> | <sub>7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).</sub>                                                                   | <sub>6 emotions: angry, disgusted, fearful, happy, neutral, and sad</sub>                                                                                                                                                                                                    | <sub>Audio, Video</sub>       | <sub>--</sub>           | <sub>English</sub>                                                | <sub>[CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/)</sub>                                                                                                                                                                                                                            | <sub>Open</sub>           | <sub>[Open Database License & Database Content License](https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt)</sub>  |
+| <sub>[Example emotion videos used in investigation of emotion perception in schizophrenia](https://espace.library.uq.edu.au/view/UQ:446541)</sub> | <sub>2017</sub> | <sub>6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.</sub>                                   | <sub>3 emotions: angry, happy and neutral.</sub>                                                                                                                                                                                                                             | <sub>Audio, Video</sub>       | <sub>0.063 GB</sub>     | <sub>English</sub>                                                | <sub>--</sub>                                                                                                                                                                                                                                                                                                                                             | <sub>Open</sub>           | <sub>[Permitted Non-commercial Re-use with Acknowledgment](https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions)</sub> |
+| <sub>[EMOVO](http://voice.fub.it/activities/corpora/emovo/index.html)</sub>                                                                       | <sub>2014</sub> | <sub>6 actors  who  played  14  sentences.</sub>                                                                                                      | <sub>6 emotions: disgust, fear, anger, joy, surprise, sadness.</sub>                                                                                                                                                                                                         | <sub>Audio</sub>              | <sub>0.355 GB</sub>     | <sub>Italian</sub>                                                | <sub>[EMOVO Corpus: an Italian Emotional Speech Database](https://core.ac.uk/download/pdf/53857389.pdf)</sub>                                                                                                                                                                                                                                             | <sub>Open</sub>           | <sub>--</sub>                                                                                                                             |
+| <sub>[RECOLA](https://diuf.unifr.ch/main/diva/recola/download.html)</sub>                                                                         | <sub>2013</sub> | <sub>3.8 hours of recordings by 46 participants.</sub>                                                                                                | <sub>negative and positive sentiment (valence and arousal).</sub>                                                                                                                                                                                                            | <sub>Audio, Video</sub>       | <sub>--</sub>           | <sub>--</sub>                                                     | <sub>[Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions](https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view)</sub>                                                                                                                                                                               | <sub>Restricted</sub>     | <sub>Academic License & Commercial License</sub>                                                                                          |
+| <sub>[GEMEP corpus](https://www.unige.ch/cisa/gemep)</sub>                                                                                        | <sub>2012</sub> | <sub>Videos10 actors portraying 10 states.</sub>                                                                                                      | <sub>12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage),  fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.</sub> | <sub>Audio, Video</sub>       | <sub>--</sub>           | <sub>French</sub>                                                 | <sub>[Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception](https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception)</sub>                                                                                   | <sub>Restricted</sub>     | <sub>--</sub>                                                                                                                             |
+| <sub>[OGVC](https://sites.google.com/site/ogcorpus/home/en)</sub>                                                                                 | <sub>2012</sub> | <sub>9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).</sub>                                  | <sub>9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.</sub>                                                                                                                                                 | <sub>Audio</sub>              | <sub>--</sub>           | <sub>Japanese</sub>                                               | <sub>[Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment](https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf)</sub>                                                                                                                                                               | <sub>Restricted</sub>     | <sub>--</sub>                                                                                                                             |
+| <sub>[LEGO corpus](https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/)</sub>                                                            | <sub>2012</sub> | <sub>347 dialogs with 9,083 system-user exchanges.</sub>                                                                                              | <sub>Emotions classified as garbage, non-angry, slightly angry and very angry.</sub>                                                                                                                                                                                         | <sub>Audio</sub>              | <sub>1.1 GB</sub>       | <sub>--</sub>                                                     | <sub>[A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System](http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf)</sub>                                                                                                                                                                               | <sub>Open</sub>           | <sub>License available with the data. Free of charges for research purposes only.</sub>                                                   |
+| <sub>[SEMAINE](https://semaine-db.eu/)</sub>                                                                                                      | <sub>2012</sub> | <sub>95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.</sub>                | <sub>5 FeelTrace annotations: activation, valence, dominance, power, intensity</sub>                                                                                                                                                                                         | <sub>Audio, Video, Text</sub> | <sub>104 GB</sub>       | <sub>English</sub>                                                | <sub>[The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent](https://ieeexplore.ieee.org/document/5959155)</sub>                                                                                                                                                                   | <sub>Restricted</sub>     | <sub>Academic EULA</sub>                                                                                                                  |
+| <sub>[SAVEE](http://kahlan.eps.surrey.ac.uk/savee/Database.html)</sub>                                                                            | <sub>2011</sub> | <sub>480 British English utterances by 4 males actors.</sub>                                                                                          | <sub>7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.</sub>                                                                                                                                                                                       | <sub>Audio, Video</sub>       | <sub>--</sub>           | <sub>English (British)</sub>                                      | <sub>[Multimodal Emotion Recognition](http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf)</sub>                                                                                                                                                                                                        | <sub>Restricted</sub>     | <sub>Free of charges for research purposes only.</sub>                                                                                    |
+| <sub>[TESS](https://tspace.library.utoronto.ca/handle/1807/24487)</sub>                                                                           | <sub>2010</sub> | <sub>2800 recording by 2 actresses.</sub>                                                                                                             | <sub>7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.</sub>                                                                                                                                                                             | <sub>Audio</sub>              | <sub>--</sub>           | <sub>English</sub>                                                | <sub>[BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET](https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2)</sub>                                                                                                                | <sub>Open</sub>           | <sub>[CC BY-NC-ND 4.0](https://creativecommons.org/licenses/by-nc-nd/4.0/)</sub>                                                          |
+| <sub>[EEKK](https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/)</sub>                  | <sub>2007</sub> | <sub>26 text passage read by 10 speakers.</sub>                                                                                                       | <sub>4 main emotions: joy, sadness, anger and neutral.</sub>                                                                                                                                                                                                                 | <sub>--</sub>                 | <sub>0.352 GB</sub>     | <sub>Estonian</sub>                                               | <sub>[Estonian Emotional Speech Corpus](https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1)</sub>                                                                                                                                                                                                              | <sub>Open</sub>           | <sub>[CC-BY license](https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/)</sub> |
+| <sub>[IEMOCAP](https://sail.usc.edu/iemocap/iemocap_release.htm)</sub>                                                                            | <sub>2007</sub> | <sub>12 hours of audiovisual data by 10 actors.</sub>                                                                                                 | <sub>5 emotions: happiness, anger, sadness, frustration and neutral.</sub>                                                                                                                                                                                                   | <sub>--</sub>                 | <sub>--</sub>           | <sub>English</sub>                                                | <sub>[IEMOCAP: Interactive emotional dyadic motion capture database](https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf)</sub>                                                                                                                                                                                                                           | <sub>Restricted</sub>     | <sub>[IEMOCAP license](https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf)</sub>                                                  |
+| <sub>[Keio-ESD](http://research.nii.ac.jp/src/en/Keio-ESD.html)</sub>                                                                             | <sub>2006</sub> | <sub>A set of human speech with vocal emotion spoken by a Japanese male speaker.</sub>                                                                | <sub>47 emotions including angry, joyful, disgusting, downgrading, funny,  worried, gentle, relief, indignation, shameful, etc.</sub>                                                                                                                                        | <sub>Audio</sub>              | <sub>--</sub>           | <sub>Japanese</sub>                                               | <sub>[EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf)</sub>                                                                                                                                                                                        | <sub>Restricted</sub>     | <sub>Available for research purposes only.</sub>                                                                                          |
+| <sub>[EMO-DB](http://emodb.bilderbar.info/index-1280.html)</sub>                                                                                  | <sub>2005</sub> | <sub>800 recording spoken by 10 actors (5 males and 5 females).</sub>                                                                                 | <sub>7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.</sub>                                                                                                                                                                                           | <sub>Audio</sub>              | <sub>--</sub>           | <sub>German</sub>                                                 | <sub>[A Database of German Emotional Speech](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf)</sub>                                                                                                                                                                                                                   | <sub>Open</sub>           | <sub>--</sub>                                                                                                                             |
+| <sub>[eNTERFACE05](http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip)</sub>                                       | <sub>2005</sub> | <sub>Videos by 42 subjects, coming from 14 different nationalities.</sub>                                                                             | <sub>6 emotions: anger, fear, surprise, happiness, sadness and disgust.</sub>                                                                                                                                                                                                | <sub>Audio, Video</sub>       | <sub>0.8 GB</sub>       | <sub>German</sub>                                                 | <sub>--</sub>                                                                                                                                                                                                                                                                                                                                             | <sub>Open</sub>           | <sub>Free of charges for research purposes only.</sub>                                                                                    |
+| <sub>[DES](http://kom.aau.dk/~tb/speech/Emotions/)</sub>                                                                                          | <sub>2002</sub> | <sub>4 speakers (2 males and 2 females).</sub>                                                                                                        | <sub>5 emotions: neutral,  surprise,  happiness,  sadness  and  anger</sub>                                                                                                                                                                                                  | <sub>--</sub>                 | <sub>--</sub>           | <sub>Danish</sub>                                                 | <sub>[Documentation of the Danish Emotional Speech Database](http://kom.aau.dk/~tb/speech/Emotions/des.pdf)</sub>                                                                                                                                                                                                                                         | <sub>--</sub>             | <sub>--</sub>                                                                                                                             |## References
 
 - Swain, Monorama & Routray, Aurobinda & Kabisatpathy, Prithviraj, Databases, features and classifiers for speech emotion recognition: a review, International Journal of Speech Technology, [paper](https://www.researchgate.net/publication/322602563_Databases_features_and_classifiers_for_speech_emotion_recognition_a_review#pf19)
 - Dimitrios Ververidis and Constantine Kotropoulos, A State of the Art Review on Emotional Speech Databases, Artificial Intelligence & Information Analysis Laboratory, Department of Informatics Aristotle, University of Thessaloniki, [paper](http://poseidon.csd.auth.gr/papers/PUBLISHED/CONFERENCE/pdf/Ververidis2003b.pdf)
@@ -67,106 +62,4 @@ The table can be browsed, sorted and searched under https://superkogito.github.i
 
 
 ## Disclaimer
-
 - The mainter and the contributors try their best to keep this list up-to-date, and to only include working links (using automated verification with the help of the [urlchecker-action](https://github.com/marketplace/actions/urlchecker-action)). However, we cannot guarantee that all listed links are up-to-date. Read more in [DISCLAIMER.md](https://github.com/SuperKogito/SER-datasets/blob/master/DISCLAIMER.md).
-
-
-
-
-[//]: # (datasets)
-
-[MESD]: https://data.mendeley.com/datasets/cy34mh68j9/5
-[MLEnd]: https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals
-[ASVP-ESD]: https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances
-[ESD]: https://hltsingapore.github.io/ESD/
-[MuSe-CAR]: https://zenodo.org/record/4134758
-[MSP-Podcast corpus]: https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html
-[emotiontts open db]: https://github.com/emotiontts/emotiontts_open_db
-[URDU-Dataset]: https://github.com/siddiquelatif/urdu-dataset
-[BAVED]: https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset
-[VIVAE]: https://zenodo.org/record/4066235
-[SEWA]: https://db.sewaproject.eu/
-[MELD]: https://affective-meld.github.io/
-[ShEMO]: https://github.com/mansourehk/ShEMO
-[DEMoS]: https://zenodo.org/record/2544829
-[VERBO]:https://sites.google.com/view/verbodatabase/home
-[AESDD]: http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/
-[Emov-DB]: https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K
-[RAVDESS]: https://zenodo.org/record/1188976#.XrC7a5NKjOR
-[JL corpus]: https://www.kaggle.com/tli725/jl-corpus
-[CaFE]: https://zenodo.org/record/1478765
-[EmoFilm]: https://zenodo.org/record/1326428
-[ANAD]: https://www.kaggle.com/suso172/arabic-natural-audio-dataset
-[EmoSynth]: https://zenodo.org/record/3727593
-[CMU-MOSEI]: https://www.amir-zadeh.com/datasets
-[CMU-MOSI]: https://www.amir-zadeh.com/datasets
-[MSP-IMPROV]: https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html
-[CREMA-D]: https://github.com/CheyneyComputerScience/CREMA-D
-[Example emotion videos used in investigation of emotion perception in schizophrenia]: https://espace.library.uq.edu.au/view/UQ:446541
-[EMOVO]: http://voice.fub.it/activities/corpora/emovo/index.html
-[RECOLA]: https://diuf.unifr.ch/main/diva/recola/download.html
-[GEMEP corpus]: https://www.unige.ch/cisa/gemep
-[OGVC]: https://sites.google.com/site/ogcorpus/home/en
-[LEGO corpus]: https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/
-[SEMAINE]: https://semaine-db.eu/
-[SAVEE]: http://kahlan.eps.surrey.ac.uk/savee/Database.html
-[TESS]: https://tspace.library.utoronto.ca/handle/1807/24487
-[EEKK]: https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/
-[IEMOCAP]: https://sail.usc.edu/iemocap/iemocap_release.htm
-[Keio-ESD]: http://research.nii.ac.jp/src/en/Keio-ESD.html
-[EMO-DB]: http://emodb.bilderbar.info/index-1280.html
-[eNTERFACE05]: http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip
-[DES]: http://kom.aau.dk/~tb/speech/Emotions/
-
-[//]: # (license)
-
-[CC BY 4.0]: https://creativecommons.org/licenses/by/4.0/
-[CC BY-NC-SA 4.0]: https://creativecommons.org/licenses/by-nc-sa/4.0/
-[CC BY-NC-ND 4.0]: https://creativecommons.org/licenses/by-nc-nd/4.0/
-[CC-BY license]: https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/
-[Permitted Non-commercial Re-use with Acknowledgment]: https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions
-[Open Database License & Database Content License]: https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt
-[CC0 1.0]: https://creativecommons.org/publicdomain/zero/1.0/
-[CMU-MOSEI License]: https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt
-[CMU-MOSI License]: https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt
-[IEMOCAP license]: https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf
-[SEWA EULA]: https://db.sewaproject.eu/media/doc/eula.pdf
-[Meld: GPL-3.0 License]: https://github.com/declare-lab/MELD/blob/master/LICENSE
-
-[//]: # (papers)
-
-[The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning]: https://pubmed.ncbi.nlm.nih.gov/34891601/
-[Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset]: https://arxiv.org/pdf/2010.14794.pdf
-[The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements]: https://arxiv.org/pdf/2101.06053.pdf
-[The MSP-Conversation Corpus]: http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684
-[Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages]: https://arxiv.org/pdf/1812.10411.pdf
-[Estonian Emotional Speech Corpus]: https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1
-[IEMOCAP: Interactive emotional dyadic motion capture database]: https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf
-[A Database of German Emotional Speech]: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf
-[SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild]: https://arxiv.org/pdf/1901.02839.pdf
-[Documentation of the Danish Emotional Speech Database]: http://kom.aau.dk/~tb/speech/Emotions/des.pdf
-[EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY]: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf
-[Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment]: https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf
-[EMOVO Corpus: an Italian Emotional Speech Database]: https://core.ac.uk/download/pdf/53857389.pdf
-[The eNTERFACE’05 Audio-Visual Emotion Database]: http://poseidon.csd.auth.gr/papers/PUBLISHED/CONFERENCE/pdf/Martin06a.pdf
-[Arabic Natural Audio Dataset]: https://data.mendeley.com/datasets/xm232yxf7t/1
-[Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception]: https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception
-[Speech Emotion Recognition for Performance Interaction]: https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction
-[MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations]: https://arxiv.org/pdf/1810.02508.pdf
-[BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET]: https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2
-[CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset]: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/
-[DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception]: https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D
-[VERBO: Voice Emotion Recognition dataBase in Portuguese Language]: https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf
-[A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System]: http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf
-[Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions]: https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view
-[Multimodal Emotion Recognition]: http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf
-[The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results]: https://dl.acm.org/doi/10.1145/3243274.3243277
-[MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception]: https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf
-[Multi-attention Recurrent Network for Human Communication Comprehension]: https://arxiv.org/pdf/1802.00923.pdf
-[Categorical vs Dimensional Perception of Italian Emotional Speech]: https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf
-[Multi-attention Recurrent Network for Human Communication Comprehension]: https://arxiv.org/pdf/1802.00923.pdf
-[ShEMO: a large-scale validated database for Persian speech emotion detection]: https://link.springer.com/article/10.1007/s10579-018-9427-x
-[The emotional voices database: Towards controlling the emotion dimension in voice generation systems]: https://arxiv.org/pdf/1806.09514.pdf
-[The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English]: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391
-[An Open Source Emotional Speech Corpus for Human Robot Interaction Applications]: https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf
-[The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent]: https://ieeexplore.ieee.org/document/5959155
diff --git a/src/index.rst b/src/index.rst
index 21715af..8aae0cd 100644
--- a/src/index.rst
+++ b/src/index.rst
@@ -40,103 +40,4 @@ Disclaimer
 ===========
 
 The maintainer and the contributors try their best to keep this list up-to-date, and to only include working links (using automated verification with the help of the `urlchecker-action <https://github.com/marketplace/actions/urlchecker-action>`_).
-However, we cannot guarantee that all listed links are up-to-date. Read more in `DISCLAIMER.md <https://github.com/SuperKogito/SER-datasets/blob/master/DISCLAIMER.md>`_.
-
-
-.. datasets
-
-.. _`MESD`: https://data.mendeley.com/datasets/cy34mh68j9/5
-.. _`MLEnd`: https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals
-.. _`ASVP-ESD`: https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances
-.. _`ESD`: https://hltsingapore.github.io/ESD/
-.. _`MuSe-CAR`: https://zenodo.org/record/4134758
-.. _`MSP-Podcast corpus`: https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html
-.. _`emotiontts open db`: https://github.com/emotiontts/emotiontts_open_db
-.. _`URDU-Dataset`: https://github.com/siddiquelatif/urdu-dataset
-.. _`BAVED`: https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset
-.. _`VIVAE`: https://zenodo.org/record/4066235
-.. _`SEWA`: https://db.sewaproject.eu/
-.. _`MELD`: https://affective-meld.github.io/
-.. _`ShEMO`: https://github.com/mansourehk/ShEMO
-.. _`DEMoS`: https://zenodo.org/record/2544829
-.. _`VERBO`: https://sites.google.com/view/verbodatabase/home
-.. _`AESDD`: http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/
-.. _`Emov-DB`: https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K
-.. _`RAVDESS`: https://zenodo.org/record/1188976#.XrC7a5NKjOR
-.. _`JL corpus`: https://www.kaggle.com/tli725/jl-corpus
-.. _`CaFE`: https://zenodo.org/record/1478765
-.. _`EmoFilm`: https://zenodo.org/record/1326428
-.. _`ANAD`: https://www.kaggle.com/suso172/arabic-natural-audio-dataset
-.. _`EmoSynth`: https://zenodo.org/record/3727593
-.. _`CMU-MOSEI`: https://www.amir-zadeh.com/datasets
-.. _`CMU-MOSI`: https://www.amir-zadeh.com/datasets
-.. _`MSP-IMPROV`: https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html
-.. _`CREMA-D`: https://github.com/CheyneyComputerScience/CREMA-D
-.. _`Example emotion videos used in investigation of emotion perception in schizophrenia`: https://espace.library.uq.edu.au/view/UQ:446541
-.. _`EMOVO`: http://voice.fub.it/activities/corpora/emovo/index.html
-.. _`RECOLA`: https://diuf.unifr.ch/main/diva/recola/download.html
-.. _`GEMEP corpus`: https://www.unige.ch/cisa/gemep
-.. _`OGVC`: https://sites.google.com/site/ogcorpus/home/en
-.. _`LEGO corpus`: https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/
-.. _`SEMAINE`: https://semaine-db.eu/
-.. _`SAVEE`: http://kahlan.eps.surrey.ac.uk/savee/Database.html
-.. _`TESS`: https://tspace.library.utoronto.ca/handle/1807/24487
-.. _`EEKK`: https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/
-.. _`IEMOCAP`: https://sail.usc.edu/iemocap/iemocap_release.htm
-.. _`Keio-ESD`: http://research.nii.ac.jp/src/en/Keio-ESD.html
-.. _`EMO-DB`: http://emodb.bilderbar.info/index-1280.html
-.. _`eNTERFACE05`: http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip
-.. _`DES`: http://kom.aau.dk/~tb/speech/Emotions/
-
-.. license
-
-.. _`CC BY 4.0`: https://creativecommons.org/licenses/by/4.0/
-.. _`CC BY-NC-SA 4.0`: https://creativecommons.org/licenses/by-nc-sa/4.0/
-.. _`CC BY-NC-ND 4.0`: https://creativecommons.org/licenses/by-nc-nd/4.0/
-.. _`CC-BY license`: https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/
-.. _`Permitted Non-commercial Re-use with Acknowledgment`: https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions
-.. _`Open Database License & Database Content License`: https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt
-.. _`CC0 1.0`: https://creativecommons.org/publicdomain/zero/1.0/
-.. _`CMU-MOSEI License`: https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt
-.. _`CMU-MOSI License`: https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt
-.. _`IEMOCAP license`: https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf
-.. _`SEWA EULA`: https://db.sewaproject.eu/media/doc/eula.pdf
-.. _`Meld: GPL-3.0 License`: https://github.com/declare-lab/MELD/blob/master/LICENSE
-
-.. papers
-
-.. _`The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning`: https://pubmed.ncbi.nlm.nih.gov/34891601/
-.. _`Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset`: https://arxiv.org/pdf/2010.14794.pdf
-.. _`The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements`: https://arxiv.org/pdf/2101.06053.pdf
-.. _`The MSP-Conversation Corpus`: http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684
-.. _`Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages`: https://arxiv.org/pdf/1812.10411.pdf
-.. _`Estonian Emotional Speech Corpus`: https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1
-.. _`IEMOCAP: Interactive emotional dyadic motion capture database`: https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf
-.. _`A Database of German Emotional Speech`: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf
-.. _`SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild`: https://arxiv.org/pdf/1901.02839.pdf
-.. _`Documentation of the Danish Emotional Speech Database`: http://kom.aau.dk/~tb/speech/Emotions/des.pdf
-.. _`EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY`: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf
-.. _`Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment`: https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf
-.. _`EMOVO Corpus: an Italian Emotional Speech Database`: https://core.ac.uk/download/pdf/53857389.pdf
-.. _`VERBO: Voice Emotion Recognition dataBase in Portuguese Language`: https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf
-.. _`The eNTERFACE’05 Audio-Visual Emotion Database`: http://poseidon.csd.auth.gr/papers/PUBLISHED/CONFERENCE/pdf/Martin06a.pdf
-.. _`Arabic Natural Audio Dataset`: https://data.mendeley.com/datasets/xm232yxf7t/1
-.. _`Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception`: https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception
-.. _`Speech Emotion Recognition for Performance Interaction`: https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction
-.. _`MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations`: https://arxiv.org/pdf/1810.02508.pdf
-.. _`BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET`: https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2
-.. _`CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset`: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/
-.. _`DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception`: https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D
-.. _`A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System`: http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf
-.. _`Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions`: https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view
-.. _`Multimodal Emotion Recognition`: http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf
-.. _`The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results`: https://dl.acm.org/doi/10.1145/3243274.3243277
-.. _`MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception`: https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf
-.. _`Multi-attention Recurrent Network for Human Communication Comprehension`: https://arxiv.org/pdf/1802.00923.pdf
-.. _`Categorical vs Dimensional Perception of Italian Emotional Speech`: https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf
-.. _`Multi-attention Recurrent Network for Human Communication Comprehension`: https://arxiv.org/pdf/1802.00923.pdf
-.. _`ShEMO: a large-scale validated database for Persian speech emotion detection`: https://link.springer.com/article/10.1007/s10579-018-9427-x
-.. _`The emotional voices database: Towards controlling the emotion dimension in voice generation systems`: https://arxiv.org/pdf/1806.09514.pdf
-.. _`The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English`: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391
-.. _`An Open Source Emotional Speech Corpus for Human Robot Interaction Applications`: https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf
-.. _`The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent`: https://ieeexplore.ieee.org/document/5959155
+However, we cannot guarantee that all listed links are up-to-date. Read more in `DISCLAIMER.md <https://github.com/SuperKogito/SER-datasets/blob/master/DISCLAIMER.md>`_.
\ No newline at end of file
diff --git a/src/ser-datasets.csv b/src/ser-datasets.csv
index f5e57ab..fd7c32f 100644
--- a/src/ser-datasets.csv
+++ b/src/ser-datasets.csv
@@ -1,43 +1,43 @@
-Dataset,Year,Content,Emotions,Format,Size,Language,Paper,Access,License
-`MESD`_,2022,864 audio files of single-word emotional utterances with Mexican cultural shaping.,"6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.",Audio,"0,097 GB",Spanish (Mexican),`The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning`_,Open,`CC BY 4.0`_
-`MLEnd`_,2021,"~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from ""zero"" to ""billion"")","Intonations: neutral, bored, excited and question",Audio,2.27 GB,--,--,Open,Unknown
-`ASVP-ESD`_,2021,"~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.","12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity.",Audio,2 GB,"Chinese, English, French, Russian and others",--,Open,Unknown
-`ESD`_,2021,"29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.","5 emotions: angry, happy, neutral, sad, and surprise.","Audio,  Text",2.4 GB (zip),"Chinese, English",`Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset`_,Open,Academic License
-`MuSe-CAR`_,2021,"40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).","continuous emotion dimensions characterized using valence, arousal, and trustworthiness.","Audio, Video, Text",15 GB,English,"`The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements`_",Restricted,Academic License & Commercial License
-`MSP-Podcast corpus`_,2020,100 hours by over 100 speakers (see db link for details).,"This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other).",Audio,--,--,`The MSP-Conversation Corpus`_,Restricted,Academic License & Commercial License
-`emotiontts open db`_,2020,Recordings and their associated transcriptions by a diverse group of speakers.,"4 emotions: general, joy, anger, and sadness.","Audio, Text",--,Korean,--,Partial Open,`CC BY-NC-SA 4.0`_
-`URDU-Dataset`_,2020,400 utterances by 38 speakers (27 male and 11 female).,"4 emotions: angry, happy, neutral, and sad.",Audio,0.072 GB,Urdu,`Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages`_,Open,--
-`BAVED`_,2020,1935 recording by 61 speakers (45 male and 16 female).,3 levels of emotion.,Audio,0.195 GB,Arabic,--,Open,--
-`VIVAE`_,2020,"non-speech, 1085 audio file by 12 speakers.","non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).",Audio,--,--,--,Restricted,`CC BY-NC-SA 4.0`_
-`SEWA`_,2019,more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.,emotions are characterized using valence and arousal.,"Audio, Video",--,"Chinese, English, German, Greek, Hungarian and Serbian",`SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild`_,Restricted,`SEWA EULA`_
-`MELD`_,2019,1400 dialogues and 14000 utterances from Friends TV series  by multiple speakers.,"7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear.  MELD also has sentiment (positive, negative and neutral) annotation  for each utterance.","Audio, Video, Text",10.1 GB,English,`MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations`_,Open,`MELD: GPL-3.0 License`_
-`ShEMO`_,2019,"3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers.","6 emotions: anger, fear, happiness, sadness, neutral and surprise.",Audio,0.101 GB,Persian,`ShEMO: a large-scale validated database for Persian speech emotion detection`_,Open,--
-`DEMoS`_,2019,"9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males).","7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt.",Audio,--,Italian,"`DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception`_",Restricted,EULA: End User License Agreement
-`AESDD`_,2018,around 500 utterances by a diverse group of actors (over 5 actors) siumlating various emotions.,"5 emotions: anger, disgust, fear, happiness, and sadness.",Audio,0.392 GB,Greek,`Speech Emotion Recognition for Performance Interaction`_,Open,--
-`Emov-DB`_,2018,Recordings for 4 speakers- 2 males and 2 females.,"The emotional styles are neutral, sleepiness, anger, disgust and amused.",Audio,5.88 GB,English,`The emotional voices database: Towards controlling the emotion dimension in voice generation systems`_,Open,--
-`RAVDESS`_,2018,7356 recordings by 24 actors.,"7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust","Audio, Video",24.8 GB,English,"`The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English`_",Open,`CC BY-NC-SA 4.0`_
-`JL corpus`_,2018,2400 recording of 240 sentences by 4 actors (2 males and 2 females).,"5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.",Audio,--,English,`An Open Source Emotional Speech Corpus for Human Robot Interaction Applications`_,Open,`CC0 1.0`_
-`CaFE`_,2018,6 different sentences by 12 speakers (6 fmelaes + 6 males).,"7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.",Audio,2 GB,French (Canadian),--,Open,`CC BY-NC-SA 4.0`_
-`EmoFilm`_,2018,1115 audio instances sentences extracted from various films.,"5 emotions: anger, contempt, happiness, fear, and sadness.",Audio,--,"English, Italian & Spanish",`Categorical vs Dimensional Perception of Italian Emotional Speech`_,Restricted,EULA: End User License Agreement
-`ANAD`_,2018,1384 recording by multiple speakers.,"3 emotions: angry, happy, surprised.",Audio,2 GB,Arabic,`Arabic Natural Audio Dataset`_,Open,`CC BY-NC-SA 4.0`_
-`EmoSynth`_,2018,144 audio file labelled by 40 listeners.,Emotion (no speech) defined in regard of valence and arousal.,Audio,0.1034 GB,--,`The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results`_,Open,`CC BY 4.0`_
-`CMU-MOSEI`_,2018,65 hours of annotated video from more than 1000 speakers and 250 topics.,"6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.","Audio, Video",--,English,`Multi-attention Recurrent Network for Human Communication Comprehension`_,Open,`CMU-MOSEI License`_
-`VERBO`_,2018,14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings.,"7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness",Audio,--,Portuguese,`VERBO: Voice Emotion Recognition dataBase in Portuguese Language`_,Restricted,Available for research purposes only
-`CMU-MOSI`_,2017,2199 opinion utterances with annotated sentiment.,Sentiment annotated between very negative to very positive in seven Likert steps.,"Audio, Video",--,English,`Multi-attention Recurrent Network for Human Communication Comprehension`_,Open,`CMU-MOSI License`_
-`MSP-IMPROV`_,2017,20 sentences by 12 actors.,"4 emotions: angry, sad, happy, neutral, other, without agreement","Audio, Video",--,English,`MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception`_,Restricted,Academic License & Commercial License
-`CREMA-D`_,2017,7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).,"6 emotions: angry, disgusted, fearful, happy, neutral, and sad","Audio, Video",--,English,`CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset`_,Open,`Open Database License & Database Content License`_
-`Example emotion videos used in investigation of emotion perception in schizophrenia`_,2017,"6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.","3 emotions: angry, happy and neutral.","Audio, Video",0.063 GB,English,--,Open,`Permitted Non-commercial Re-use with Acknowledgment`_
-`EMOVO`_,2014,6 actors  who  played  14  sentences.,"6 emotions: disgust, fear, anger, joy, surprise, sadness.",Audio,0.355 GB,Italian,`EMOVO Corpus: an Italian Emotional Speech Database`_,Open,--
-`RECOLA`_,2013,3.8 hours of recordings by 46 participants.,negative and positive sentiment (valence and arousal).,"Audio, Video",--,--,`Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions`_,Restricted,Academic License & Commercial License
-`GEMEP corpus`_,2012,Videos10 actors portraying 10 states.,"12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage),  fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.","Audio, Video",--,French,`Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception`_,Restricted,--
-`OGVC`_,2012,9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).,"9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.",Audio,--,Japanese,`Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment`_,Restricted,--
-`LEGO corpus`_,2012,"347 dialogs with 9,083 system-user exchanges.","Emotions classified as garbage, non-angry, slightly angry and very angry.",Audio,1.1 GB,--,`A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System`_,Open,License available with the data. Free of charges for research purposes only.
-`SEMAINE`_,2012,95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.,"5 FeelTrace annotations: activation, valence, dominance, power, intensity","Audio, Video, Text",104 GB,English,`The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent`_,Restricted,Academic EULA
-`SAVEE`_,2011,480 British English utterances by 4 males actors.,"7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.","Audio, Video",--,English (British),`Multimodal Emotion Recognition`_,Restricted,Free of charges for research purposes only.
-`TESS`_,2010,2800 recording by 2 actresses.,"7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.",Audio,--,English,`BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET`_,Open,`CC BY-NC-ND 4.0`_
-`EEKK`_,2007,26 text passage read by 10 speakers.,"4 main emotions: joy, sadness, anger and neutral.",--,0.352 GB,Estonian,`Estonian Emotional Speech Corpus`_,Open,`CC-BY license`_
-`IEMOCAP`_,2007,12 hours of audiovisual data by 10 actors.,"5 emotions: happiness, anger, sadness, frustration and neutral.",--,--,English,`IEMOCAP: Interactive emotional dyadic motion capture database`_,Restricted,`IEMOCAP license`_
-`Keio-ESD`_,2006,A set of human speech with vocal emotion spoken by a Japanese male speaker.,"47 emotions including angry, joyful, disgusting, downgrading, funny,  worried, gentle, relief, indignation, shameful, etc.",Audio,--,Japanese,`EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY`_,Restricted,Available for research purposes only
-`EMO-DB`_,2005,800 recording spoken by 10 actors (5 males and 5 females).,"7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.",Audio,--,German,`A Database of German Emotional Speech`_,Open,--
-`eNTERFACE05`_,2005,"Videos by 42 subjects, coming from 14 different nationalities.","6 emotions: anger, fear, surprise, happiness, sadness and disgust.","Audio, Video",0.8 GB,German,,Open,Free of charges for research purposes only
-`DES`_,2002,4 speakers (2 males and 2 females).,"5 emotions: neutral,  surprise,  happiness,  sadness  and  anger",--,--,Danish,`Documentation of the Danish Emotional Speech Database`_,,
+Dataset,Year,Content,Emotions,Format,Size,Language,Paper,Access,License
+`MESD <https://data.mendeley.com/datasets/cy34mh68j9/5>`_,2022,864 audio files of single-word emotional utterances with Mexican cultural shaping.,"6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.",Audio,"0,097 GB",Spanish (Mexican),`The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning <https://pubmed.ncbi.nlm.nih.gov/34891601/>`_,Open,`CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_
+`MLEnd <https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals>`_,2021,"~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from ""zero"" to ""billion"")","Intonations: neutral, bored, excited and question",Audio,2.27 GB,--,--,Open,Unknown
+`ASVP-ESD <https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances>`_,2021,"~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.","12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity.",Audio,2 GB,"Chinese, English, French, Russian and others",--,Open,Unknown
+`ESD <https://hltsingapore.github.io/ESD/>`_,2021,"29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.","5 emotions: angry, happy, neutral, sad, and surprise.","Audio,  Text",2.4 GB (zip),"Chinese, English",`Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset <https://arxiv.org/pdf/2010.14794.pdf>`_,Open,Academic License
+`MuSe-CAR <https://zenodo.org/record/4134758>`_,2021,"40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).","continuous emotion dimensions characterized using valence, arousal, and trustworthiness.","Audio, Video, Text",15 GB,English,"`The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements <https://arxiv.org/pdf/2101.06053.pdf>`_",Restricted,Academic License & Commercial License
+`MSP-Podcast corpus <https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html>`_,2020,100 hours by over 100 speakers (see db link for details).,"This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other).",Audio,--,--,`The MSP-Conversation Corpus <http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684>`_,Restricted,Academic License & Commercial License
+`emotiontts open db <https://github.com/emotiontts/emotiontts_open_db>`_,2020,Recordings and their associated transcriptions by a diverse group of speakers.,"4 emotions: general, joy, anger, and sadness.","Audio, Text",--,Korean,--,Partially open,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_
+`URDU-Dataset <https://github.com/siddiquelatif/urdu-dataset>`_,2020,400 utterances by 38 speakers (27 male and 11 female).,"4 emotions: angry, happy, neutral, and sad.",Audio,0.072 GB,Urdu,`Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages <https://arxiv.org/pdf/1812.10411.pdf>`_,Open,--
+`BAVED <https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset>`_,2020,1935 recording by 61 speakers (45 male and 16 female).,3 levels of emotion.,Audio,0.195 GB,Arabic,--,Open,--
+`VIVAE <https://zenodo.org/record/4066235>`_,2020,"non-speech, 1085 audio file by 12 speakers.","non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).",Audio,--,--,--,Restricted,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_
+`SEWA <https://db.sewaproject.eu/>`_,2019,more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.,emotions are characterized using valence and arousal.,"Audio, Video",--,"Chinese, English, German, Greek, Hungarian and Serbian",`SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild <https://arxiv.org/pdf/1901.02839.pdf>`_,Restricted,`SEWA EULA <https://db.sewaproject.eu/media/doc/eula.pdf>`_
+`MELD <https://affective-meld.github.io/>`_,2019,1400 dialogues and 14000 utterances from Friends TV series  by multiple speakers.,"7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear.  MELD also has sentiment (positive, negative and neutral) annotation  for each utterance.","Audio, Video, Text",10.1 GB,English,`MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations <https://arxiv.org/pdf/1810.02508.pdf>`_,Open,`MELD: GPL-3.0 License <https://github.com/declare-lab/MELD/blob/master/LICENSE>`_
+`ShEMO <https://github.com/mansourehk/ShEMO>`_,2019,"3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers.","6 emotions: anger, fear, happiness, sadness, neutral and surprise.",Audio,0.101 GB,Persian,`ShEMO: a large-scale validated database for Persian speech emotion detection <https://link.springer.com/article/10.1007/s10579-018-9427-x>`_,Open,--
+`DEMoS <https://zenodo.org/record/2544829>`_,2019,"9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males).","7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt.",Audio,--,Italian,"`DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception <https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D>`_",Restricted,EULA: End User License Agreement
+`AESDD <http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/>`_,2018,around 500 utterances by a diverse group of actors (over 5 actors) siumlating various emotions.,"5 emotions: anger, disgust, fear, happiness, and sadness.",Audio,0.392 GB,Greek,`Speech Emotion Recognition for Performance Interaction <https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction>`_,Open,--
+`Emov-DB <https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K>`_,2018,Recordings for 4 speakers- 2 males and 2 females.,"The emotional styles are neutral, sleepiness, anger, disgust and amused.",Audio,5.88 GB,English,`The emotional voices database: Towards controlling the emotion dimension in voice generation systems <https://arxiv.org/pdf/1806.09514.pdf>`_,Open,--
+`RAVDESS <https://zenodo.org/record/1188976#.XrC7a5NKjOR>`_,2018,7356 recordings by 24 actors.,"7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust","Audio, Video",24.8 GB,English,"`The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English <https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391>`_",Open,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_
+`JL corpus <https://www.kaggle.com/tli725/jl-corpus>`_,2018,2400 recording of 240 sentences by 4 actors (2 males and 2 females).,"5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.",Audio,--,English,`An Open Source Emotional Speech Corpus for Human Robot Interaction Applications <https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf>`_,Open,`CC0 1.0 <https://creativecommons.org/publicdomain/zero/1.0/>`_
+`CaFE <https://zenodo.org/record/1478765>`_,2018,6 different sentences by 12 speakers (6 fmelaes + 6 males).,"7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.",Audio,2 GB,French (Canadian),--,Open,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_
+`EmoFilm <https://zenodo.org/record/1326428>`_,2018,1115 audio instances sentences extracted from various films.,"5 emotions: anger, contempt, happiness, fear, and sadness.",Audio,--,"English, Italian & Spanish",`Categorical vs Dimensional Perception of Italian Emotional Speech <https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf>`_,Restricted,EULA: End User License Agreement
+`ANAD <https://www.kaggle.com/suso172/arabic-natural-audio-dataset>`_,2018,1384 recording by multiple speakers.,"3 emotions: angry, happy, surprised.",Audio,2 GB,Arabic,`Arabic Natural Audio Dataset <https://data.mendeley.com/datasets/xm232yxf7t/1>`_,Open,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_
+`EmoSynth <https://zenodo.org/record/3727593>`_,2018,144 audio file labelled by 40 listeners.,Emotion (no speech) defined in regard of valence and arousal.,Audio,0.1034 GB,--,`The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results <https://dl.acm.org/doi/10.1145/3243274.3243277>`_,Open,`CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_
+`CMU-MOSEI <https://www.amir-zadeh.com/datasets>`_,2018,65 hours of annotated video from more than 1000 speakers and 250 topics.,"6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.","Audio, Video",--,English,`Multi-attention Recurrent Network for Human Communication Comprehension <https://arxiv.org/pdf/1802.00923.pdf>`_,Open,`CMU-MOSEI License <https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt>`_
+`VERBO <https://sites.google.com/view/verbodatabase/home>`_,2018,14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings.,"7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness",Audio,--,Portuguese,`VERBO: Voice Emotion Recognition dataBase in Portuguese Language <https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf>`_,Restricted,Available for research purposes only
+`CMU-MOSI <https://www.amir-zadeh.com/datasets>`_,2017,2199 opinion utterances with annotated sentiment.,Sentiment annotated between very negative to very positive in seven Likert steps.,"Audio, Video",--,English,`Multi-attention Recurrent Network for Human Communication Comprehension <https://arxiv.org/pdf/1802.00923.pdf>`_,Open,`CMU-MOSI License <https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt>`_
+`MSP-IMPROV <https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html>`_,2017,20 sentences by 12 actors.,"4 emotions: angry, sad, happy, neutral, other, without agreement","Audio, Video",--,English,`MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception <https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf>`_,Restricted,Academic License & Commercial License
+`CREMA-D <https://github.com/CheyneyComputerScience/CREMA-D>`_,2017,7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).,"6 emotions: angry, disgusted, fearful, happy, neutral, and sad","Audio, Video",--,English,`CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/>`_,Open,`Open Database License & Database Content License <https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt>`_
+`Example emotion videos used in investigation of emotion perception in schizophrenia <https://espace.library.uq.edu.au/view/UQ:446541>`_,2017,"6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.","3 emotions: angry, happy and neutral.","Audio, Video",0.063 GB,English,--,Open,`Permitted Non-commercial Re-use with Acknowledgment <https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions>`_
+`EMOVO <http://voice.fub.it/activities/corpora/emovo/index.html>`_,2014,6 actors  who  played  14  sentences.,"6 emotions: disgust, fear, anger, joy, surprise, sadness.",Audio,0.355 GB,Italian,`EMOVO Corpus: an Italian Emotional Speech Database <https://core.ac.uk/download/pdf/53857389.pdf>`_,Open,--
+`RECOLA <https://diuf.unifr.ch/main/diva/recola/download.html>`_,2013,3.8 hours of recordings by 46 participants.,negative and positive sentiment (valence and arousal).,"Audio, Video",--,--,`Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions <https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view>`_,Restricted,Academic License & Commercial License
+`GEMEP corpus <https://www.unige.ch/cisa/gemep>`_,2012,Videos10 actors portraying 10 states.,"12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage),  fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.","Audio, Video",--,French,`Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception <https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception>`_,Restricted,--
+`OGVC <https://sites.google.com/site/ogcorpus/home/en>`_,2012,9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).,"9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.",Audio,--,Japanese,`Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment <https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf>`_,Restricted,--
+`LEGO corpus <https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/>`_,2012,"347 dialogs with 9,083 system-user exchanges.","Emotions classified as garbage, non-angry, slightly angry and very angry.",Audio,1.1 GB,--,`A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System <http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf>`_,Open,License available with the data. Free of charges for research purposes only.
+`SEMAINE <https://semaine-db.eu/>`_,2012,95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.,"5 FeelTrace annotations: activation, valence, dominance, power, intensity","Audio, Video, Text",104 GB,English,`The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent <https://ieeexplore.ieee.org/document/5959155>`_,Restricted,Academic EULA
+`SAVEE <http://kahlan.eps.surrey.ac.uk/savee/Database.html>`_,2011,480 British English utterances by 4 males actors.,"7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.","Audio, Video",--,English (British),`Multimodal Emotion Recognition <http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf>`_,Restricted,Free of charges for research purposes only.
+`TESS <https://tspace.library.utoronto.ca/handle/1807/24487>`_,2010,2800 recording by 2 actresses.,"7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.",Audio,--,English,`BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET <https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2>`_,Open,`CC BY-NC-ND 4.0 <https://creativecommons.org/licenses/by-nc-nd/4.0/>`_
+`EEKK <https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/>`_,2007,26 text passage read by 10 speakers.,"4 main emotions: joy, sadness, anger and neutral.",--,0.352 GB,Estonian,`Estonian Emotional Speech Corpus <https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1>`_,Open,`CC-BY license <https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/>`_
+`IEMOCAP <https://sail.usc.edu/iemocap/iemocap_release.htm>`_,2007,12 hours of audiovisual data by 10 actors.,"5 emotions: happiness, anger, sadness, frustration and neutral.",--,--,English,`IEMOCAP: Interactive emotional dyadic motion capture database <https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf>`_,Restricted,`IEMOCAP license <https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf>`_
+`Keio-ESD <http://research.nii.ac.jp/src/en/Keio-ESD.html>`_,2006,A set of human speech with vocal emotion spoken by a Japanese male speaker.,"47 emotions including angry, joyful, disgusting, downgrading, funny,  worried, gentle, relief, indignation, shameful, etc.",Audio,--,Japanese,`EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf>`_,Restricted,Available for research purposes only.
+`EMO-DB <http://emodb.bilderbar.info/index-1280.html>`_,2005,800 recording spoken by 10 actors (5 males and 5 females).,"7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.",Audio,--,German,`A Database of German Emotional Speech <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf>`_,Open,--
+`eNTERFACE05 <http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip>`_,2005,"Videos by 42 subjects, coming from 14 different nationalities.","6 emotions: anger, fear, surprise, happiness, sadness and disgust.","Audio, Video",0.8 GB,German,--,Open,Free of charges for research purposes only.
+`DES <http://kom.aau.dk/~tb/speech/Emotions/>`_,2002,4 speakers (2 males and 2 females).,"5 emotions: neutral,  surprise,  happiness,  sadness  and  anger",--,--,Danish,`Documentation of the Danish Emotional Speech Database <http://kom.aau.dk/~tb/speech/Emotions/des.pdf>`_,--,--

From ffe2cc1b37ecd330e286216cefb1ca83f17a98b9 Mon Sep 17 00:00:00 2001
From: SuperKogito <superkogito@gmail.com>
Date: Mon, 13 Feb 2023 22:22:35 +0100
Subject: [PATCH 4/5] add json file for datasets

---
 src/ser-datasets.json | 590 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 590 insertions(+)
 create mode 100644 src/ser-datasets.json

diff --git a/src/ser-datasets.json b/src/ser-datasets.json
new file mode 100644
index 0000000..00f9f0d
--- /dev/null
+++ b/src/ser-datasets.json
@@ -0,0 +1,590 @@
+{
+    "MESD": {
+        "Year": 2022,
+        "Content": "864 audio files of single-word emotional utterances with Mexican cultural shaping.",
+        "Emotions": "6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.",
+        "Format": "Audio",
+        "Size": "0,097 GB",
+        "Language": "Spanish (Mexican)",
+        "Paper": "The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning",
+        "Access": "Open",
+        "License": "CC BY 4.0",
+        "Dataset-link": "https://data.mendeley.com/datasets/cy34mh68j9/5",
+        "Paper-link": "https://pubmed.ncbi.nlm.nih.gov/34891601/",
+        "License-link": "https://creativecommons.org/licenses/by/4.0/"
+    },
+    "MLEnd": {
+        "Year": 2021,
+        "Content": "~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from \"zero\" to \"billion\")",
+        "Emotions": "Intonations: neutral, bored, excited and question",
+        "Format": "Audio",
+        "Size": "2.27 GB",
+        "Language": "--",
+        "Paper": "--",
+        "Access": "Open",
+        "License": "Unknown",
+        "Dataset-link": "https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals",
+        "Paper-link": "--",
+        "License-link": "Unknown"
+    },
+    "ASVP-ESD": {
+        "Year": 2021,
+        "Content": "~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.",
+        "Emotions": "12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity.",
+        "Format": "Audio",
+        "Size": "2 GB",
+        "Language": "Chinese, English, French, Russian and others",
+        "Paper": "--",
+        "Access": "Open",
+        "License": "Unknown",
+        "Dataset-link": "https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances",
+        "Paper-link": "--",
+        "License-link": "Unknown"
+    },
+    "ESD": {
+        "Year": 2021,
+        "Content": "29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.",
+        "Emotions": "5 emotions: angry, happy, neutral, sad, and surprise.",
+        "Format": "Audio,  Text",
+        "Size": "2.4 GB (zip)",
+        "Language": "Chinese, English",
+        "Paper": "Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset",
+        "Access": "Open",
+        "License": "Academic License",
+        "Dataset-link": "https://hltsingapore.github.io/ESD/",
+        "Paper-link": "https://arxiv.org/pdf/2010.14794.pdf",
+        "License-link": "Academic License"
+    },
+    "MuSe-CAR": {
+        "Year": 2021,
+        "Content": "40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).",
+        "Emotions": "continuous emotion dimensions characterized using valence, arousal, and trustworthiness.",
+        "Format": "Audio, Video, Text",
+        "Size": "15 GB",
+        "Language": "English",
+        "Paper": "The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements",
+        "Access": "Restricted",
+        "License": "Academic License & Commercial License",
+        "Dataset-link": "https://zenodo.org/record/4134758",
+        "Paper-link": "https://arxiv.org/pdf/2101.06053.pdf",
+        "License-link": "Academic License & Commercial License"
+    },
+    "MSP-Podcast corpus": {
+        "Year": 2020,
+        "Content": "100 hours by over 100 speakers (see db link for details).",
+        "Emotions": "This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other).",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "--",
+        "Paper": "The MSP-Conversation Corpus",
+        "Access": "Restricted",
+        "License": "Academic License & Commercial License",
+        "Dataset-link": "https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html",
+        "Paper-link": "http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684",
+        "License-link": "Academic License & Commercial License"
+    },
+    "emotiontts open db": {
+        "Year": 2020,
+        "Content": "Recordings and their associated transcriptions by a diverse group of speakers.",
+        "Emotions": "4 emotions: general, joy, anger, and sadness.",
+        "Format": "Audio, Text",
+        "Size": "--",
+        "Language": "Korean",
+        "Paper": "--",
+        "Access": "Partially open",
+        "License": "CC BY-NC-SA 4.0",
+        "Dataset-link": "https://github.com/emotiontts/emotiontts_open_db",
+        "Paper-link": "--",
+        "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/"
+    },
+    "URDU-Dataset": {
+        "Year": 2020,
+        "Content": "400 utterances by 38 speakers (27 male and 11 female).",
+        "Emotions": "4 emotions: angry, happy, neutral, and sad.",
+        "Format": "Audio",
+        "Size": "0.072 GB",
+        "Language": "Urdu",
+        "Paper": "Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages",
+        "Access": "Open",
+        "License": "--",
+        "Dataset-link": "https://github.com/siddiquelatif/urdu-dataset",
+        "Paper-link": "https://arxiv.org/pdf/1812.10411.pdf",
+        "License-link": "--"
+    },
+    "BAVED": {
+        "Year": 2020,
+        "Content": "1935 recording by 61 speakers (45 male and 16 female).",
+        "Emotions": "3 levels of emotion.",
+        "Format": "Audio",
+        "Size": "0.195 GB",
+        "Language": "Arabic",
+        "Paper": "--",
+        "Access": "Open",
+        "License": "--",
+        "Dataset-link": "https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset",
+        "Paper-link": "--",
+        "License-link": "--"
+    },
+    "VIVAE": {
+        "Year": 2020,
+        "Content": "non-speech, 1085 audio file by 12 speakers.",
+        "Emotions": "non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "--",
+        "Paper": "--",
+        "Access": "Restricted",
+        "License": "CC BY-NC-SA 4.0",
+        "Dataset-link": "https://zenodo.org/record/4066235",
+        "Paper-link": "--",
+        "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/"
+    },
+    "SEWA": {
+        "Year": 2019,
+        "Content": "more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.",
+        "Emotions": "emotions are characterized using valence and arousal.",
+        "Format": "Audio, Video",
+        "Size": "--",
+        "Language": "Chinese, English, German, Greek, Hungarian and Serbian",
+        "Paper": "SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild",
+        "Access": "Restricted",
+        "License": "SEWA EULA",
+        "Dataset-link": "https://db.sewaproject.eu/",
+        "Paper-link": "https://arxiv.org/pdf/1901.02839.pdf",
+        "License-link": "https://db.sewaproject.eu/media/doc/eula.pdf"
+    },
+    "MELD": {
+        "Year": 2019,
+        "Content": "1400 dialogues and 14000 utterances from Friends TV series  by multiple speakers.",
+        "Emotions": "7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear.  MELD also has sentiment (positive, negative and neutral) annotation  for each utterance.",
+        "Format": "Audio, Video, Text",
+        "Size": "10.1 GB",
+        "Language": "English",
+        "Paper": "MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations",
+        "Access": "Open",
+        "License": "MELD: GPL-3.0 License",
+        "Dataset-link": "https://affective-meld.github.io/",
+        "Paper-link": "https://arxiv.org/pdf/1810.02508.pdf",
+        "License-link": "https://github.com/declare-lab/MELD/blob/master/LICENSE"
+    },
+    "ShEMO": {
+        "Year": 2019,
+        "Content": "3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers.",
+        "Emotions": "6 emotions: anger, fear, happiness, sadness, neutral and surprise.",
+        "Format": "Audio",
+        "Size": "0.101 GB",
+        "Language": "Persian",
+        "Paper": "ShEMO: a large-scale validated database for Persian speech emotion detection",
+        "Access": "Open",
+        "License": "--",
+        "Dataset-link": "https://github.com/mansourehk/ShEMO",
+        "Paper-link": "https://link.springer.com/article/10.1007/s10579-018-9427-x",
+        "License-link": "--"
+    },
+    "DEMoS": {
+        "Year": 2019,
+        "Content": "9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males).",
+        "Emotions": "7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt.",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "Italian",
+        "Paper": "DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception",
+        "Access": "Restricted",
+        "License": "EULA: End User License Agreement",
+        "Dataset-link": "https://zenodo.org/record/2544829",
+        "Paper-link": "https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D",
+        "License-link": "EULA: End User License Agreement"
+    },
+    "AESDD": {
+        "Year": 2018,
+        "Content": "around 500 utterances by a diverse group of actors (over 5 actors) siumlating various emotions.",
+        "Emotions": "5 emotions: anger, disgust, fear, happiness, and sadness.",
+        "Format": "Audio",
+        "Size": "0.392 GB",
+        "Language": "Greek",
+        "Paper": "Speech Emotion Recognition for Performance Interaction",
+        "Access": "Open",
+        "License": "--",
+        "Dataset-link": "http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/",
+        "Paper-link": "https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction",
+        "License-link": "--"
+    },
+    "Emov-DB": {
+        "Year": 2018,
+        "Content": "Recordings for 4 speakers- 2 males and 2 females.",
+        "Emotions": "The emotional styles are neutral, sleepiness, anger, disgust and amused.",
+        "Format": "Audio",
+        "Size": "5.88 GB",
+        "Language": "English",
+        "Paper": "The emotional voices database: Towards controlling the emotion dimension in voice generation systems",
+        "Access": "Open",
+        "License": "--",
+        "Dataset-link": "https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K",
+        "Paper-link": "https://arxiv.org/pdf/1806.09514.pdf",
+        "License-link": "--"
+    },
+    "RAVDESS": {
+        "Year": 2018,
+        "Content": "7356 recordings by 24 actors.",
+        "Emotions": "7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust",
+        "Format": "Audio, Video",
+        "Size": "24.8 GB",
+        "Language": "English",
+        "Paper": "The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English",
+        "Access": "Open",
+        "License": "CC BY-NC-SA 4.0",
+        "Dataset-link": "https://zenodo.org/record/1188976#.XrC7a5NKjOR",
+        "Paper-link": "https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391",
+        "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/"
+    },
+    "JL corpus": {
+        "Year": 2018,
+        "Content": "2400 recording of 240 sentences by 4 actors (2 males and 2 females).",
+        "Emotions": "5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "English",
+        "Paper": "An Open Source Emotional Speech Corpus for Human Robot Interaction Applications",
+        "Access": "Open",
+        "License": "CC0 1.0",
+        "Dataset-link": "https://www.kaggle.com/tli725/jl-corpus",
+        "Paper-link": "https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf",
+        "License-link": "https://creativecommons.org/publicdomain/zero/1.0/"
+    },
+    "CaFE": {
+        "Year": 2018,
+        "Content": "6 different sentences by 12 speakers (6 fmelaes + 6 males).",
+        "Emotions": "7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.",
+        "Format": "Audio",
+        "Size": "2 GB",
+        "Language": "French (Canadian)",
+        "Paper": "--",
+        "Access": "Open",
+        "License": "CC BY-NC-SA 4.0",
+        "Dataset-link": "https://zenodo.org/record/1478765",
+        "Paper-link": "--",
+        "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/"
+    },
+    "EmoFilm": {
+        "Year": 2018,
+        "Content": "1115 audio instances sentences extracted from various films.",
+        "Emotions": "5 emotions: anger, contempt, happiness, fear, and sadness.",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "English, Italian & Spanish",
+        "Paper": "Categorical vs Dimensional Perception of Italian Emotional Speech",
+        "Access": "Restricted",
+        "License": "EULA: End User License Agreement",
+        "Dataset-link": "https://zenodo.org/record/1326428",
+        "Paper-link": "https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf",
+        "License-link": "EULA: End User License Agreement"
+    },
+    "ANAD": {
+        "Year": 2018,
+        "Content": "1384 recording by multiple speakers.",
+        "Emotions": "3 emotions: angry, happy, surprised.",
+        "Format": "Audio",
+        "Size": "2 GB",
+        "Language": "Arabic",
+        "Paper": "Arabic Natural Audio Dataset",
+        "Access": "Open",
+        "License": "CC BY-NC-SA 4.0",
+        "Dataset-link": "https://www.kaggle.com/suso172/arabic-natural-audio-dataset",
+        "Paper-link": "https://data.mendeley.com/datasets/xm232yxf7t/1",
+        "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/"
+    },
+    "EmoSynth": {
+        "Year": 2018,
+        "Content": "144 audio file labelled by 40 listeners.",
+        "Emotions": "Emotion (no speech) defined in regard of valence and arousal.",
+        "Format": "Audio",
+        "Size": "0.1034 GB",
+        "Language": "--",
+        "Paper": "The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results",
+        "Access": "Open",
+        "License": "CC BY 4.0",
+        "Dataset-link": "https://zenodo.org/record/3727593",
+        "Paper-link": "https://dl.acm.org/doi/10.1145/3243274.3243277",
+        "License-link": "https://creativecommons.org/licenses/by/4.0/"
+    },
+    "CMU-MOSEI": {
+        "Year": 2018,
+        "Content": "65 hours of annotated video from more than 1000 speakers and 250 topics.",
+        "Emotions": "6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.",
+        "Format": "Audio, Video",
+        "Size": "--",
+        "Language": "English",
+        "Paper": "Multi-attention Recurrent Network for Human Communication Comprehension",
+        "Access": "Open",
+        "License": "CMU-MOSEI License",
+        "Dataset-link": "https://www.amir-zadeh.com/datasets",
+        "Paper-link": "https://arxiv.org/pdf/1802.00923.pdf",
+        "License-link": "https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt"
+    },
+    "VERBO": {
+        "Year": 2018,
+        "Content": "14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings.",
+        "Emotions": "7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "Portuguese",
+        "Paper": "VERBO: Voice Emotion Recognition dataBase in Portuguese Language",
+        "Access": "Restricted",
+        "License": "Available for research purposes only",
+        "Dataset-link": "https://sites.google.com/view/verbodatabase/home",
+        "Paper-link": "https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf",
+        "License-link": "Available for research purposes only"
+    },
+    "CMU-MOSI": {
+        "Year": 2017,
+        "Content": "2199 opinion utterances with annotated sentiment.",
+        "Emotions": "Sentiment annotated between very negative to very positive in seven Likert steps.",
+        "Format": "Audio, Video",
+        "Size": "--",
+        "Language": "English",
+        "Paper": "Multi-attention Recurrent Network for Human Communication Comprehension",
+        "Access": "Open",
+        "License": "CMU-MOSI License",
+        "Dataset-link": "https://www.amir-zadeh.com/datasets",
+        "Paper-link": "https://arxiv.org/pdf/1802.00923.pdf",
+        "License-link": "https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt"
+    },
+    "MSP-IMPROV": {
+        "Year": 2017,
+        "Content": "20 sentences by 12 actors.",
+        "Emotions": "4 emotions: angry, sad, happy, neutral, other, without agreement",
+        "Format": "Audio, Video",
+        "Size": "--",
+        "Language": "English",
+        "Paper": "MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception",
+        "Access": "Restricted",
+        "License": "Academic License & Commercial License",
+        "Dataset-link": "https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html",
+        "Paper-link": "https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf",
+        "License-link": "Academic License & Commercial License"
+    },
+    "CREMA-D": {
+        "Year": 2017,
+        "Content": "7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).",
+        "Emotions": "6 emotions: angry, disgusted, fearful, happy, neutral, and sad",
+        "Format": "Audio, Video",
+        "Size": "--",
+        "Language": "English",
+        "Paper": "CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset",
+        "Access": "Open",
+        "License": "Open Database License & Database Content License",
+        "Dataset-link": "https://github.com/CheyneyComputerScience/CREMA-D",
+        "Paper-link": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/",
+        "License-link": "https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt"
+    },
+    "Example emotion videos used in investigation of emotion perception in schizophrenia": {
+        "Year": 2017,
+        "Content": "6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.",
+        "Emotions": "3 emotions: angry, happy and neutral.",
+        "Format": "Audio, Video",
+        "Size": "0.063 GB",
+        "Language": "English",
+        "Paper": "--",
+        "Access": "Open",
+        "License": "Permitted Non-commercial Re-use with Acknowledgment",
+        "Dataset-link": "https://espace.library.uq.edu.au/view/UQ:446541",
+        "Paper-link": "--",
+        "License-link": "https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions"
+    },
+    "EMOVO": {
+        "Year": 2014,
+        "Content": "6 actors  who  played  14  sentences.",
+        "Emotions": "6 emotions: disgust, fear, anger, joy, surprise, sadness.",
+        "Format": "Audio",
+        "Size": "0.355 GB",
+        "Language": "Italian",
+        "Paper": "EMOVO Corpus: an Italian Emotional Speech Database",
+        "Access": "Open",
+        "License": "--",
+        "Dataset-link": "http://voice.fub.it/activities/corpora/emovo/index.html",
+        "Paper-link": "https://core.ac.uk/download/pdf/53857389.pdf",
+        "License-link": "--"
+    },
+    "RECOLA": {
+        "Year": 2013,
+        "Content": "3.8 hours of recordings by 46 participants.",
+        "Emotions": "negative and positive sentiment (valence and arousal).",
+        "Format": "Audio, Video",
+        "Size": "--",
+        "Language": "--",
+        "Paper": "Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions",
+        "Access": "Restricted",
+        "License": "Academic License & Commercial License",
+        "Dataset-link": "https://diuf.unifr.ch/main/diva/recola/download.html",
+        "Paper-link": "https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view",
+        "License-link": "Academic License & Commercial License"
+    },
+    "GEMEP corpus": {
+        "Year": 2012,
+        "Content": "Videos10 actors portraying 10 states.",
+        "Emotions": "12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage),  fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.",
+        "Format": "Audio, Video",
+        "Size": "--",
+        "Language": "French",
+        "Paper": "Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception",
+        "Access": "Restricted",
+        "License": "--",
+        "Dataset-link": "https://www.unige.ch/cisa/gemep",
+        "Paper-link": "https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception",
+        "License-link": "--"
+    },
+    "OGVC": {
+        "Year": 2012,
+        "Content": "9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).",
+        "Emotions": "9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "Japanese",
+        "Paper": "Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment",
+        "Access": "Restricted",
+        "License": "--",
+        "Dataset-link": "https://sites.google.com/site/ogcorpus/home/en",
+        "Paper-link": "https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf",
+        "License-link": "--"
+    },
+    "LEGO corpus": {
+        "Year": 2012,
+        "Content": "347 dialogs with 9,083 system-user exchanges.",
+        "Emotions": "Emotions classified as garbage, non-angry, slightly angry and very angry.",
+        "Format": "Audio",
+        "Size": "1.1 GB",
+        "Language": "--",
+        "Paper": "A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let\u2019s Go Bus Information System",
+        "Access": "Open",
+        "License": "License available with the data. Free of charges for research purposes only.",
+        "Dataset-link": "https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/",
+        "Paper-link": "http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf",
+        "License-link": "License available with the data. Free of charges for research purposes only."
+    },
+    "SEMAINE": {
+        "Year": 2012,
+        "Content": "95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.",
+        "Emotions": "5 FeelTrace annotations: activation, valence, dominance, power, intensity",
+        "Format": "Audio, Video, Text",
+        "Size": "104 GB",
+        "Language": "English",
+        "Paper": "The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent",
+        "Access": "Restricted",
+        "License": "Academic EULA",
+        "Dataset-link": "https://semaine-db.eu/",
+        "Paper-link": "https://ieeexplore.ieee.org/document/5959155",
+        "License-link": "Academic EULA"
+    },
+    "SAVEE": {
+        "Year": 2011,
+        "Content": "480 British English utterances by 4 males actors.",
+        "Emotions": "7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.",
+        "Format": "Audio, Video",
+        "Size": "--",
+        "Language": "English (British)",
+        "Paper": "Multimodal Emotion Recognition",
+        "Access": "Restricted",
+        "License": "Free of charges for research purposes only.",
+        "Dataset-link": "http://kahlan.eps.surrey.ac.uk/savee/Database.html",
+        "Paper-link": "http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf",
+        "License-link": "Free of charges for research purposes only."
+    },
+    "TESS": {
+        "Year": 2010,
+        "Content": "2800 recording by 2 actresses.",
+        "Emotions": "7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "English",
+        "Paper": "BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET",
+        "Access": "Open",
+        "License": "CC BY-NC-ND 4.0",
+        "Dataset-link": "https://tspace.library.utoronto.ca/handle/1807/24487",
+        "Paper-link": "https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2",
+        "License-link": "https://creativecommons.org/licenses/by-nc-nd/4.0/"
+    },
+    "EEKK": {
+        "Year": 2007,
+        "Content": "26 text passage read by 10 speakers.",
+        "Emotions": "4 main emotions: joy, sadness, anger and neutral.",
+        "Format": "--",
+        "Size": "0.352 GB",
+        "Language": "Estonian",
+        "Paper": "Estonian Emotional Speech Corpus",
+        "Access": "Open",
+        "License": "CC-BY license",
+        "Dataset-link": "https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/",
+        "Paper-link": "https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1",
+        "License-link": "https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/"
+    },
+    "IEMOCAP": {
+        "Year": 2007,
+        "Content": "12 hours of audiovisual data by 10 actors.",
+        "Emotions": "5 emotions: happiness, anger, sadness, frustration and neutral.",
+        "Format": "--",
+        "Size": "--",
+        "Language": "English",
+        "Paper": "IEMOCAP: Interactive emotional dyadic motion capture database",
+        "Access": "Restricted",
+        "License": "IEMOCAP license",
+        "Dataset-link": "https://sail.usc.edu/iemocap/iemocap_release.htm",
+        "Paper-link": "https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf",
+        "License-link": "https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf"
+    },
+    "Keio-ESD": {
+        "Year": 2006,
+        "Content": "A set of human speech with vocal emotion spoken by a Japanese male speaker.",
+        "Emotions": "47 emotions including angry, joyful, disgusting, downgrading, funny,  worried, gentle, relief, indignation, shameful, etc.",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "Japanese",
+        "Paper": "EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY",
+        "Access": "Restricted",
+        "License": "Available for research purposes only.",
+        "Dataset-link": "http://research.nii.ac.jp/src/en/Keio-ESD.html",
+        "Paper-link": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf",
+        "License-link": "Available for research purposes only."
+    },
+    "EMO-DB": {
+        "Year": 2005,
+        "Content": "800 recording spoken by 10 actors (5 males and 5 females).",
+        "Emotions": "7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.",
+        "Format": "Audio",
+        "Size": "--",
+        "Language": "German",
+        "Paper": "A Database of German Emotional Speech",
+        "Access": "Open",
+        "License": "--",
+        "Dataset-link": "http://emodb.bilderbar.info/index-1280.html",
+        "Paper-link": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf",
+        "License-link": "--"
+    },
+    "eNTERFACE05": {
+        "Year": 2005,
+        "Content": "Videos by 42 subjects, coming from 14 different nationalities.",
+        "Emotions": "6 emotions: anger, fear, surprise, happiness, sadness and disgust.",
+        "Format": "Audio, Video",
+        "Size": "0.8 GB",
+        "Language": "German",
+        "Paper": "--",
+        "Access": "Open",
+        "License": "Free of charges for research purposes only.",
+        "Dataset-link": "http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip",
+        "Paper-link": "--",
+        "License-link": "Free of charges for research purposes only."
+    },
+    "DES": {
+        "Year": 2002,
+        "Content": "4 speakers (2 males and 2 females).",
+        "Emotions": "5 emotions: neutral,  surprise,  happiness,  sadness  and  anger",
+        "Format": "--",
+        "Size": "--",
+        "Language": "Danish",
+        "Paper": "Documentation of the Danish Emotional Speech Database",
+        "Access": "--",
+        "License": "--",
+        "Dataset-link": "http://kom.aau.dk/~tb/speech/Emotions/",
+        "Paper-link": "http://kom.aau.dk/~tb/speech/Emotions/des.pdf",
+        "License-link": "--"
+    }
+}
\ No newline at end of file

From acf5264a8e081375c12c44e78ba82038c11e4614 Mon Sep 17 00:00:00 2001
From: SuperKogito <superkogito@gmail.com>
Date: Mon, 13 Feb 2023 22:32:10 +0100
Subject: [PATCH 5/5] updata contributions guide

---
 CONTRIBUTING.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d882db4..0bb78e4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -22,3 +22,8 @@ please feel free to add it.
 * The dataset should not be provided in an active PR.
 * The dataset should be available for researchers for free.
 * The information about the dataset must be accessible for verification.
+
+## How to contribute
+First go to `src/` using `cd src`. Then add a the dictionary / part json data of the contributed dataset to `src/ser-datasets`. 
+Make sure the json is valid, then run the `python generate_files.py` to update the restructured text file, csv file and the README.
+That's it, Congrats! and thank you for your contribution. Now open a PR with your changes. I will review it and then publish the results :))
\ No newline at end of file