From 5fea686b91ce241708dbf13eabaf5c0d7deb99fe Mon Sep 17 00:00:00 2001 From: SuperKogito <superkogito@gmail.com> Date: Mon, 13 Feb 2023 22:21:25 +0100 Subject: [PATCH 1/5] edit code to generate files automatically --- src/build_project.sh | 7 ++++ src/conf.py | 3 +- src/generate_files.py | 91 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 2 deletions(-) create mode 100755 src/build_project.sh create mode 100644 src/generate_files.py diff --git a/src/build_project.sh b/src/build_project.sh new file mode 100755 index 0000000..05f4449 --- /dev/null +++ b/src/build_project.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +echo "Rst and Md files generations" +python generate_files.py + +echo "Make html files" +make html diff --git a/src/conf.py b/src/conf.py index b938dcc..92137ae 100644 --- a/src/conf.py +++ b/src/conf.py @@ -36,7 +36,7 @@ 'sphinx.ext.coverage', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', - 'sphinx_contributors', + #'sphinx_contributors', 'crate.sphinx.csv', ] @@ -59,7 +59,6 @@ html_theme_options = { "github_url": "https://github.com/superkogito/ser-datasets", "search_bar_text": "Search this site...", - "google_analytics_id": "UA-133660046-1", "navbar_start": ["navbar-logo"], "navbar_center": ["navbar-nav"], diff --git a/src/generate_files.py b/src/generate_files.py new file mode 100644 index 0000000..755a97e --- /dev/null +++ b/src/generate_files.py @@ -0,0 +1,91 @@ + +import csv +import json +from tabulate import tabulate + + +# load datasets +json_file_path = "ser-datasets.json" +with open(json_file_path, 'r') as j: + content = json.loads(j.read()) + +# init keys +keys = ["Dataset", "Year", "Content", "Emotions", "Format", "Size", "Language", "Paper", "Access", "License", "Dataset-link", "Paper-link", "License-link"] +header = ["Dataset", "Year", "Content", "Emotions", "Format", "Size", "Language", "Paper", "Access", "License"] + +md_1 = """***Spoken Emotion Recognition Datasets:*** *A collection of datasets (count=42) for the purpose of emotion recognition/detection in speech. +The table is chronologically ordered and includes a description of the content of each dataset along with the emotions included. +The table can be browsed, sorted and searched under https://superkogito.github.io/SER-datasets/* +""" + +md_2 = """## References + +- Swain, Monorama & Routray, Aurobinda & Kabisatpathy, Prithviraj, Databases, features and classifiers for speech emotion recognition: a review, International Journal of Speech Technology, [paper](https://www.researchgate.net/publication/322602563_Databases_features_and_classifiers_for_speech_emotion_recognition_a_review#pf19) +- Dimitrios Ververidis and Constantine Kotropoulos, A State of the Art Review on Emotional Speech Databases, Artificial Intelligence & Information Analysis Laboratory, Department of Informatics Aristotle, University of Thessaloniki, [paper](http://poseidon.csd.auth.gr/papers/PUBLISHED/CONFERENCE/pdf/Ververidis2003b.pdf) +- A. Pramod Reddy and V. Vijayarajan, Extraction of Emotions from Speech-A Survey, VIT University, International Journal of Applied Engineering Research, [paper](https://www.ripublication.com/ijaer17/ijaerv12n16_46.pdf) +- Emotional Speech Databases, [document](https://link.springer.com/content/pdf/bbm%3A978-90-481-3129-7%2F1.pdf) +- Expressive Synthetic Speech, [website](http://emosamples.syntheticspeech.de/) +- Towards a standard set of acoustic features for the processing of emotion in speech, Technical university Munich, [document](https://asa.scitation.org/doi/pdf/10.1121/1.4739483) + + +## Contribution + +- All contributions are welcome! If you know a dataset that belongs here (see [criteria](https://github.com/SuperKogito/SER-datasets/blob/master/CONTRIBUTING.md#criteria)) but is not listed, please feel free to add it. For more information on Contributing, please refer to [CONTRIBUTING.md](https://github.com/SuperKogito/SER-datasets/blob/master/CONTRIBUTING.md). + +- If you notice a typo or a mistake, please [report this as an issue](https://github.com/SuperKogito/SER-datasets/issues/new) and help us improve the quality of this list. + + +## Disclaimer +- The mainter and the contributors try their best to keep this list up-to-date, and to only include working links (using automated verification with the help of the [urlchecker-action](https://github.com/marketplace/actions/urlchecker-action)). However, we cannot guarantee that all listed links are up-to-date. Read more in [DISCLAIMER.md](https://github.com/SuperKogito/SER-datasets/blob/master/DISCLAIMER.md). +""" + + +print(" -> Generate Markdown Text") +def format_md_link(label, link): + res = "[{0}]({1})".format(label, link) if "http" in link else label + return res + +# tabulate +table = [] +for key, values in content.items(): + # add elements to row + row = [format_md_link(key, values["Dataset-link"])] + row += [values[k] for k in ["Year", "Content", "Emotions", "Format", "Size", "Language"]] + row += [format_md_link(values["Paper"], values["Paper-link"]), values["Access"], format_md_link(values["License"], values["License-link"])] + + # add styles and add row to table + row = ["<sub>{0}</sub>".format(e) for e in row] + table.append(row) + +table = tabulate(table, keys, tablefmt="pipe") +with open("../README.md", "w") as f: + f.write(md_1) + f.write(table) + f.write(md_2) + + +print(" -> Generate Restructured Text") +def format_rst_link(label, link): + res = "`{0} <{1}>`_".format(label, link) if "http" in link else label + return res + +# tabulate +table = [] +for key, values in content.items(): + # add elements to row + row = [format_rst_link(key, values["Dataset-link"])] + row += [values[k] for k in ["Year", "Content", "Emotions", "Format", "Size", "Language"]] + row += [format_rst_link(values["Paper"], values["Paper-link"]), values["Access"]] + row += [format_rst_link(values["License"], values["License-link"])] + + # format and add row to csv + table.append(row) + +with open('ser-datasets.csv', 'w', encoding='UTF8', newline='') as f: + writer = csv.writer(f) + + # write the header + writer.writerow(header) + + # write multiple rows + writer.writerows(table) From 525dffd1c0e87d3fb291173aed3f4376b4265922 Mon Sep 17 00:00:00 2001 From: SuperKogito <superkogito@gmail.com> Date: Mon, 13 Feb 2023 22:21:40 +0100 Subject: [PATCH 2/5] add requirements file --- src/requirements.txt | 67 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 src/requirements.txt diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000..5182820 --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,67 @@ +ablog==0.10.25 +alabaster==0.7.12 +Babel==2.10.1 +beautifulsoup4==4.11.1 +bleach==6.0.0 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.12 +commonmark==0.9.1 +cryptography==39.0.1 +docutils==0.17.1 +entrypoints==0.4 +feedgen==0.9.0 +idna==3.3 +imagesize==1.3.0 +importlib-metadata==4.11.3 +invoke==1.7.0 +jeepney==0.8.0 +Jinja2==3.1.2 +keyring==23.6.0 +latexcodec==2.0.1 +lxml==4.8.0 +MarkupSafe==2.1.1 +nest-asyncio==1.5.6 +packaging==21.3 +pkginfo==1.8.3 +pybtex==0.24.0 +pybtex-docutils==1.0.1 +pycparser==2.21 +pydata-sphinx-theme==0.8.1 +Pygments==2.12.0 +pyparsing==3.0.8 +python-dateutil==2.8.2 +pytz==2022.1 +PyYAML==6.0 +pyzmq==24.0.1 +readme-renderer==35.0 +requests==2.27.1 +requests-toolbelt==0.9.1 +rich==12.5.1 +SecretStorage==3.3.2 +six==1.16.0 +snowballstemmer==2.2.0 +soupsieve==2.3.2.post1 +Sphinx==4.5.0 +sphinx-copybutton==0.5.0 +sphinx-csv-filter==0.4.0 +sphinx-panels==0.6.0 +sphinx-sitemap==2.2.0 +sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-bibtex==2.4.2 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-pdfembed @ git+https://github.com/SuperKogito/sphinxcontrib-pdfembed@d75fb37f9e4a303888a61f265b568f7729826c4a +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-tikz==0.4.16 +sphinxemoji==0.2.0 +sphinxext-opengraph==0.6.3 +tabulate==0.9.0 +tornado==6.2 +tqdm==4.64.0 +urllib3==1.26.9 +watchdog==2.1.7 +webencodings==0.5.1 +zipp==3.8.0 From 2c6923caba06772da05850b502746f3554f21246 Mon Sep 17 00:00:00 2001 From: SuperKogito <superkogito@gmail.com> Date: Mon, 13 Feb 2023 22:22:08 +0100 Subject: [PATCH 3/5] update readme and rst resources --- README.md | 195 ++++++++++--------------------------------- src/index.rst | 101 +--------------------- src/ser-datasets.csv | 86 +++++++++---------- 3 files changed, 88 insertions(+), 294 deletions(-) diff --git a/README.md b/README.md index bc5b209..5bd3f97 100644 --- a/README.md +++ b/README.md @@ -1,55 +1,50 @@ ***Spoken Emotion Recognition Datasets:*** *A collection of datasets (count=42) for the purpose of emotion recognition/detection in speech. The table is chronologically ordered and includes a description of the content of each dataset along with the emotions included. The table can be browsed, sorted and searched under https://superkogito.github.io/SER-datasets/* - -| <sub>Dataset</sub> | <sub>Year</sub> | <sub>Content</sub> | <sub>Emotions</sub> | <sub>Format</sub> | <sub>Size</sub> | <sub>Language</sub> | <sub>Paper</sub> | <sub>Access</sub> | <sub>License</sub> | -|---------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------|---------------------|-------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|----------------------------------------------------------------------------------------------| -| <sub>[MESD]</sub> | <sub>2022</sub> | <sub>864 audio files of single-word emotional utterances with Mexican cultural shaping.</sub> | <sub>6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.</sub> | <sub>Audio</sub> | <sub>0,097 GB</sub> | <sub>Spanish (Mexican)</sub> | <sub>[The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning]</sub> | <sub>Open</sub> | <sub>[CC BY 4.0]</sub> -|<sub>[MLEnd]</sub> | <sub>2021</sub> | <sub>~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from "zero" to "billion")</sub> | <sub> Intonations: neutral, bored, excited and question.</sub> | <sub>Audio</sub> | <sub>2.27 GB</sub> | <sub>--</sub> | <sub>--</sub> | <sub>Open</sub> | <sub>Unknown</sub> | -|<sub>[ASVP-ESD]</sub> | <sub>2021</sub> | <sub>~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.</sub> | <sub>12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity. </sub> | <sub>Audio</sub> | <sub> 2 GB </sub> | <sub>Chinese, English, French, Russian and others<sub> | <sub>--<sub> | <sub> Open access<sub> | <sub>Unknown</sub> | -| <sub>[ESD]</sub> | <sub>2021</sub> | <sub>29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.</sub> | <sub>5 emotions: angry, happy, neutral, sad, and surprise.</sub> | <sub>Audio, Text</sub> | <sub> 2.4 GB (zip) </sub> | <sub> Chinese, English </sub> | <sub>[Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset]</sub> | <sub>Open access</sub> | <sub>Available under an Academic License </sub> | -| <sub>[MuSe-CAR]</sub> | <sub>2021</sub> | <sub>40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).</sub> | <sub>continuous emotion dimensions characterized using valence, arousal, and trustworthiness.</sub> | <sub>Audio, Video, Text</sub> | <sub> 15 GB </sub> | <sub> English </sub> | <sub>[The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements]</sub> | <sub>Restricted access</sub> | <sub>Available under an Academic License & Commercial License </sub> | -| <sub>[MSP-Podcast corpus]</sub> | <sub>2020</sub> | <sub>100 hours by over 100 speakers (see db link for details).</sub> | <sub>This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other). </sub> | <sub>Audio</sub> | <sub> -- </sub> | <sub> -- </sub> | <sub>[The MSP-Conversation Corpus]</sub> | <sub>Restricted access</sub> | <sub>Available under an Academic License & Commercial License </sub> | -| <sub>[emotiontts open db]</sub> | <sub>2020</sub> | <sub>Recordings and their associated transcriptions by a diverse group of speakers.</sub> | <sub>4 emotions: general, joy, anger, and sadness.</sub> | <sub>Audio, Text</sub> | <sub>--</sub> | <sub>Korean</sub> | <sub>--</sub> | <sub>Partial open access</sub> | <sub>[CC BY-NC-SA 4.0] </sub> | -| <sub>[URDU-Dataset]</sub> | <sub>2020</sub> | <sub>400 utterances by 38 speakers (27 male and 11 female).</sub> | <sub>4 emotions: angry, happy, neutral, and sad.</sub> | <sub>Audio</sub> | <sub>~72.1 MB</sub> | <sub>Urdu</sub> | <sub>[Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages]</sub> | <sub>Open access</sub> | <sub>None specified </sub> | -| <sub>[BAVED]</sub> | <sub>2020</sub> | <sub>1935 recording by 61 speakers (45 male and 16 female).</sub> | <sub>3 levels of emotion.</sub> | <sub>Audio</sub> | <sub>~195 MB</sub> | <sub>Arabic</sub> | <sub>--</sub> | <sub>Open access</sub> | <sub>None specified </sub> | -| <sub>[VIVAE]</sub> | <sub>2020</sub> | <sub>non-speech, 1085 audio file by ~12 speakers.</sub> | <sub>non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>--</sub> | <sub>--</sub> | <sub>Restricted access</sub> | <sub>[CC BY-NC-SA 4.0] </sub> | -| <sub>[SEWA]</sub> | <sub>2019</sub> | <sub> more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.</sub> | <sub> emotions are characterized using valence and arousal.</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>Chinese, English, German, Greek, Hungarian and Serbian</sub> | <sub>[SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild]</sub> | <sub>Restricted access</sub> | <sub>[SEWA EULA] </sub> | -| <sub>[MELD]</sub> | <sub>2019</sub> | <sub>1400 dialogues and 14000 utterances from Friends TV series by multiple speakers.</sub> | <sub>7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear. MELD also has sentiment (positive, negative and neutral) annotation for each utterance.</sub> | <sub>Audio, Video, Text</sub> | <sub>~10.1 GB</sub> | <sub>English</sub> | <sub>[MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations]</sub> | <sub>Open access</sub> | <sub>[MELD: GPL-3.0 License] </sub> | -| <sub>[ShEMO]</sub> | <sub>2019</sub> | <sub>3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers. </sub> | <sub>6 emotions: anger, fear, happiness, sadness, neutral and surprise. </sub> | <sub>Audio</sub> | <sub>~1014 MB</sub> | <sub>Persian</sub> | <sub>[ShEMO: a large-scale validated database for Persian speech emotion detection]</sub> | <sub>Open access</sub> | <sub>None sepcified </sub> | -| <sub>[DEMoS]</sub> | <sub>2019</sub> | <sub>9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males). </sub> | <sub>7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt. </sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>Italian</sub> | <sub>[DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception]</sub> | <sub>Restricted access</sub> | <sub>EULA: End User License Agreement</sub> | -| <sub>[AESDD]</sub> | <sub>2018</sub> | <sub>around 500 utterances by a diverse group of actors (over 5 actors) simlating various emotions.</sub> | <sub> 5 emotions: anger, disgust, fear, happiness, and sadness.</sub> | <sub>Audio</sub> | <sub>~392 MB</sub> | <sub>Greek</sub> | <sub>[Speech Emotion Recognition for Performance Interaction]</sub> | <sub>Open access</sub> | <sub>None specified </sub> | -| <sub>[Emov-DB]</sub> | <sub>2018</sub> | <sub>Recordings for 4 speakers- 2 males and 2 females.</sub> | <sub>The emotional styles are neutral, sleepiness, anger, disgust and amused.</sub> | <sub>Audio</sub> | <sub>5.88 GB</sub> | <sub>English</sub> | <sub>[The emotional voices database: Towards controlling the emotion dimension in voice generation systems]</sub> | <sub>Open access</sub> | <sub>None specified </sub> | -| <sub>[RAVDESS]</sub> | <sub>2018</sub> | <sub>7356 recordings by 24 actors.</sub> | <sub>7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust</sub> | <sub>Audio, Video</sub> | <sub>~24.8 GB</sub> | <sub>English</sub> | <sub>[The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English]</sub> | <sub>Open access</sub> | <sub>[CC BY-NC-SA 4.0] </sub> | -| <sub>[JL corpus]</sub> | <sub>2018</sub> | <sub>2400 recording of 240 sentences by 4 actors (2 males and 2 females).</sub> | <sub>5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.</sub> | <sub>Audio</sub> | <sub> -- </sub> | <sub>English</sub> | <sub>[An Open Source Emotional Speech Corpus for Human Robot Interaction Applications]</sub> | <sub>Open access</sub> | <sub>[CC0 1.0] </sub> | -| <sub>[CaFE]</sub> | <sub>2018</sub> | <sub>6 different sentences by 12 speakers (6 fmelaes + 6 males).</sub> | <sub>7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.</sub> | <sub>Audio</sub> | <sub>~2 GB</sub> | <sub>French (Canadian)</sub> | <sub>--</sub> | <sub>Open access</sub> | <sub>[CC BY-NC-SA 4.0] </sub> | -| <sub>[EmoFilm]</sub> | <sub>2018</sub> | <sub>1115 audio instances sentences extracted from various films.</sub> | <sub>5 emotions: anger, contempt, happiness, fear, and sadness.</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>English, Italian & Spanish</sub> | <sub>[Categorical vs Dimensional Perception of Italian Emotional Speech]</sub> | <sub>Restricted access</sub> | <sub>EULA: End User License Agreement </sub> | -| <sub>[ANAD]</sub> | <sub>2018</sub> | <sub>1384 recording by multiple speakers.</sub> | <sub>3 emotions: angry, happy, surprised.</sub> | <sub>Audio</sub> | <sub>~2 GB</sub> | <sub>Arabic</sub> | <sub>[Arabic Natural Audio Dataset] </sub> | <sub>Open access</sub> | <sub>[CC BY-NC-SA 4.0] </sub> | -| <sub>[EmoSynth]</sub> | <sub>2018</sub> | <sub>144 audio file labelled by 40 listeners.</sub> | <sub>Emotion (no speech) defined in regard of valence and arousal.</sub> | <sub>Audio</sub> | <sub>103.4 MB</sub> | <sub>--</sub> | <sub>[The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results] </sub> | <sub>Open access</sub> | <sub>[CC BY 4.0] </sub> | -| <sub>[CMU-MOSEI]</sub> | <sub>2018</sub> | <sub>65 hours of annotated video from more than 1000 speakers and 250 topics.</sub> | <sub>6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[Multi-attention Recurrent Network for Human Communication Comprehension] </sub> | <sub>Open access</sub> | <sub>[CMU-MOSEI License] </sub> | -| <sub>[VERBO]</sub> | <sub>2018</sub> | <sub>14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings. </sub> | <sub>7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness </sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>Portuguese</sub> | <sub>[VERBO: Voice Emotion Recognition dataBase in Portuguese Language]</sub> | <sub>Restricted access</sub> | <sub>Available for research purposes only</sub> -| <sub>[CMU-MOSI]</sub> | <sub>2017</sub> | <sub>2199 opinion utterances with annotated sentiment.</sub> | <sub>Sentiment annotated between very negative to very positive in seven Likert steps.</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[Multi-attention Recurrent Network for Human Communication Comprehension] </sub> | <sub>Open access</sub> | <sub>[CMU-MOSI License] </sub> | -| <sub>[MSP-IMPROV]</sub> | <sub>2017</sub> | <sub>20 sentences by 12 actors.</sub> | <sub>4 emotions: angry, sad, happy, neutral, other, without agreement</sub> | <sub>Audio, Video</sub> | <sub> -- </sub> | <sub>English</sub> | <sub>[MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception]</sub> | <sub>Restricted access</sub> | <sub>Available under an Academic License & Commercial License </sub> | -| <sub>[CREMA-D]</sub> | <sub>2017</sub> | <sub>7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).</sub> | <sub>6 emotions: angry, disgusted, fearful, happy, neutral, and sad</sub> | <sub>Audio, Video</sub> | <sub> -- </sub> | <sub>English</sub> | <sub>[CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset]</sub> | <sub>Open access</sub> | <sub>Available under the [Open Database License & Database Content License] </sub> | -| <sub>[Example emotion videos used in investigation of emotion perception in schizophrenia]</sub> | <sub>2017</sub> | <sub>6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.</sub> | <sub>3 emotions: angry, happy and neutral.</sub> | <sub>Audio, Video</sub> | <sub>~63 MB</sub> | <sub>English</sub> | <sub>--</sub> | <sub>Open access</sub> | <sub>Available under the [Permitted Non-commercial Re-use with Acknowledgment] </sub> | -| <sub>[EMOVO]</sub> | <sub>2014</sub> | <sub>6 actors who played 14 sentences.</sub> | <sub>6 emotions: disgust, fear, anger, joy, surprise, sadness.</sub> | <sub>Audio</sub> | <sub> ~355 MB</sub> | <sub>Italian</sub> | <sub>[EMOVO Corpus: an Italian Emotional Speech Database]</sub> | <sub>Open access</sub> | <sub>None specified </sub> | -| <sub>[RECOLA]</sub> | <sub>2013</sub> | <sub>3.8 hours of recordings by 46 participants.</sub> | <sub>negative and positive sentiment (valence and arousal).</sub> | <sub>Audio, Video</sub> | <sub> -- </sub> | <sub>--</sub> | <sub>[Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions]</sub> | <sub>Restricted access</sub> | <sub>Available under an Academic License & Commercial License </sub> | -| <sub>[GEMEP corpus]</sub> | <sub>2012</sub> | <sub>Videos10 actors portraying 10 states.</sub> | <sub>12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage), fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.</sub> | <sub>Audio, Video</sub> | <sub> -- </sub> | <sub>French</sub> | <sub>[Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception]</sub> | <sub>Restricted access</sub> | <sub>None specified </sub> | -| <sub>[OGVC]</sub> | <sub>2012</sub> | <sub> 9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).</sub> | <sub>9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>Japanese</sub> | <sub>[Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment]</sub> | <sub>Restricted access</sub> | <sub>None specified </sub> | -| <sub>[LEGO corpus]</sub> | <sub>2012</sub> | <sub>347 dialogs with 9,083 system-user exchanges.</sub> | <sub>Emotions classified as garbage, non-angry, slightly angry and very angry.</sub> | <sub>Audio</sub> | <sub>1.1 GB</sub> | <sub>--</sub> | <sub>[A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System]</sub> | <sub>Open access</sub> | <sub>License available with the data. Free of charges for research purposes only. </sub> | -| <sub>[SEMAINE]</sub> | <sub>2012</sub> | <sub>95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.</sub> | <sub>5 FeelTrace annotations: activation, valence, dominance, power, intensity</sub> | <sub>Audio, Video, Text</sub> | <sub>104 GB</sub> | <sub>English</sub> | <sub>[The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent]</sub> | <sub>Restricted access<sub> | <sub>Academic EULA</sub> | -| <sub>[SAVEE]</sub> | <sub>2011</sub> | <sub>480 British English utterances by 4 males actors.</sub> | <sub>7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>English (British)</sub> | <sub>[Multimodal Emotion Recognition]</sub> | <sub>Restrictted access</sub> | <sub>Free of charges for research purposes only. </sub> | -| <sub>[TESS]</sub> | <sub>2010</sub> | <sub>2800 recording by 2 actresses.</sub> | <sub>7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.</sub> | <sub>Audio</sub> | <sub> -- </sub> | <sub>English</sub> | <sub>[BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET]</sub> | <sub>Open access</sub> | <sub>[CC BY-NC-ND 4.0] </sub> | -| <sub>[EEKK]</sub> | <sub>2007</sub> | <sub>26 text passage read by 10 speakers.</sub> | <sub>4 main emotions: joy, sadness, anger and neutral.</sub> | <sub>--</sub> | <sub>~352 MB</sub> | <sub>Estonian</sub> | <sub>[Estonian Emotional Speech Corpus]</sub> | <sub>Open access</sub> | <sub>[CC-BY license] </sub> | -| <sub>[IEMOCAP]</sub> | <sub>2007</sub> | <sub>12 hours of audiovisual data by 10 actors.</sub> | <sub>5 emotions: happiness, anger, sadness, frustration and neutral.</sub> | <sub>--</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[IEMOCAP: Interactive emotional dyadic motion capture database]</sub> | <sub>Restricted access</sub> | <sub>[IEMOCAP license] </sub> | -| <sub>[Keio-ESD]</sub> | <sub>2006</sub> | <sub>A set of human speech with vocal emotion spoken by a Japanese male speaker.</sub> | <sub>47 emotions including angry, joyful, disgusting, downgrading, funny, worried, gentle, relief, indignation, shameful, etc.</sub> | <sub>Audio</sub> | <sub> -- </sub> | <sub>Japanese</sub> | <sub>[EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY]</sub> | <sub>Restricted access</sub> | <sub>Available for research purposes only </sub> | -| <sub>[EMO-DB]</sub> | <sub>2005</sub> | <sub>800 recording spoken by 10 actors (5 males and 5 females).</sub> | <sub>7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.</sub> | <sub>Audio</sub> | <sub> -- </sub> | <sub>German</sub> | <sub>[A Database of German Emotional Speech]</sub> | <sub>Open access</sub> | <sub>None specified </sub> | -| <sub>[eNTERFACE05]</sub> | <sub>2005</sub> | <sub>Videos by 42 subjects, coming from 14 different nationalities.</sub> | <sub>6 emotions: anger, fear, surprise, happiness, sadness and disgust.</sub> | <sub>Audio, Video</sub> | <sub>~0.8 GB</sub> | <sub>German</sub> | <sub>[The eNTERFACE’05 Audio-Visual Emotion Database]</sub> | <sub>Open access</sub> | <sub>Free of charges for research purposes only </sub> | -| <sub>[DES]</sub> | <sub>2002</sub> | <sub>4 speakers (2 males and 2 females).</sub> | <sub>5 emotions: neutral, surprise, happiness, sadness and anger</sub> | <sub> -- </sub> | <sub> -- </sub> | <sub>Danish</sub> | <sub>[Documentation of the Danish Emotional Speech Database]</sub> | <sub> -- </sub> | <sub> -- </sub> | - - - -## References +| Dataset | Year | Content | Emotions | Format | Size | Language | Paper | Access | License | +|:--------------------------------------------------------------------------------------------------------------------------------------------------|:----------------|:------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------|:------------------------|:------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------|:------------------------------------------------------------------------------------------------------------------------------------------| +| <sub>[MESD](https://data.mendeley.com/datasets/cy34mh68j9/5)</sub> | <sub>2022</sub> | <sub>864 audio files of single-word emotional utterances with Mexican cultural shaping.</sub> | <sub>6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.</sub> | <sub>Audio</sub> | <sub>0,097 GB</sub> | <sub>Spanish (Mexican)</sub> | <sub>[The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning](https://pubmed.ncbi.nlm.nih.gov/34891601/)</sub> | <sub>Open</sub> | <sub>[CC BY 4.0](https://creativecommons.org/licenses/by/4.0/)</sub> | +| <sub>[MLEnd](https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals)</sub> | <sub>2021</sub> | <sub>~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from "zero" to "billion")</sub> | <sub>Intonations: neutral, bored, excited and question</sub> | <sub>Audio</sub> | <sub>2.27 GB</sub> | <sub>--</sub> | <sub>--</sub> | <sub>Open</sub> | <sub>Unknown</sub> | +| <sub>[ASVP-ESD](https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances)</sub> | <sub>2021</sub> | <sub>~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.</sub> | <sub>12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity.</sub> | <sub>Audio</sub> | <sub>2 GB</sub> | <sub>Chinese, English, French, Russian and others</sub> | <sub>--</sub> | <sub>Open</sub> | <sub>Unknown</sub> | +| <sub>[ESD](https://hltsingapore.github.io/ESD/)</sub> | <sub>2021</sub> | <sub>29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.</sub> | <sub>5 emotions: angry, happy, neutral, sad, and surprise.</sub> | <sub>Audio, Text</sub> | <sub>2.4 GB (zip)</sub> | <sub>Chinese, English</sub> | <sub>[Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset](https://arxiv.org/pdf/2010.14794.pdf)</sub> | <sub>Open</sub> | <sub>Academic License</sub> | +| <sub>[MuSe-CAR](https://zenodo.org/record/4134758)</sub> | <sub>2021</sub> | <sub>40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).</sub> | <sub>continuous emotion dimensions characterized using valence, arousal, and trustworthiness.</sub> | <sub>Audio, Video, Text</sub> | <sub>15 GB</sub> | <sub>English</sub> | <sub>[The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements](https://arxiv.org/pdf/2101.06053.pdf)</sub> | <sub>Restricted</sub> | <sub>Academic License & Commercial License</sub> | +| <sub>[MSP-Podcast corpus](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html)</sub> | <sub>2020</sub> | <sub>100 hours by over 100 speakers (see db link for details).</sub> | <sub>This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other).</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>--</sub> | <sub>[The MSP-Conversation Corpus](http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684)</sub> | <sub>Restricted</sub> | <sub>Academic License & Commercial License</sub> | +| <sub>[emotiontts open db](https://github.com/emotiontts/emotiontts_open_db)</sub> | <sub>2020</sub> | <sub>Recordings and their associated transcriptions by a diverse group of speakers.</sub> | <sub>4 emotions: general, joy, anger, and sadness.</sub> | <sub>Audio, Text</sub> | <sub>--</sub> | <sub>Korean</sub> | <sub>--</sub> | <sub>Partially open</sub> | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub> | +| <sub>[URDU-Dataset](https://github.com/siddiquelatif/urdu-dataset)</sub> | <sub>2020</sub> | <sub>400 utterances by 38 speakers (27 male and 11 female).</sub> | <sub>4 emotions: angry, happy, neutral, and sad.</sub> | <sub>Audio</sub> | <sub>0.072 GB</sub> | <sub>Urdu</sub> | <sub>[Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages](https://arxiv.org/pdf/1812.10411.pdf)</sub> | <sub>Open</sub> | <sub>--</sub> | +| <sub>[BAVED](https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset)</sub> | <sub>2020</sub> | <sub>1935 recording by 61 speakers (45 male and 16 female).</sub> | <sub>3 levels of emotion.</sub> | <sub>Audio</sub> | <sub>0.195 GB</sub> | <sub>Arabic</sub> | <sub>--</sub> | <sub>Open</sub> | <sub>--</sub> | +| <sub>[VIVAE](https://zenodo.org/record/4066235)</sub> | <sub>2020</sub> | <sub>non-speech, 1085 audio file by 12 speakers.</sub> | <sub>non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>--</sub> | <sub>--</sub> | <sub>Restricted</sub> | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub> | +| <sub>[SEWA](https://db.sewaproject.eu/)</sub> | <sub>2019</sub> | <sub>more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.</sub> | <sub>emotions are characterized using valence and arousal.</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>Chinese, English, German, Greek, Hungarian and Serbian</sub> | <sub>[SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild](https://arxiv.org/pdf/1901.02839.pdf)</sub> | <sub>Restricted</sub> | <sub>[SEWA EULA](https://db.sewaproject.eu/media/doc/eula.pdf)</sub> | +| <sub>[MELD](https://affective-meld.github.io/)</sub> | <sub>2019</sub> | <sub>1400 dialogues and 14000 utterances from Friends TV series by multiple speakers.</sub> | <sub>7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear. MELD also has sentiment (positive, negative and neutral) annotation for each utterance.</sub> | <sub>Audio, Video, Text</sub> | <sub>10.1 GB</sub> | <sub>English</sub> | <sub>[MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations](https://arxiv.org/pdf/1810.02508.pdf)</sub> | <sub>Open</sub> | <sub>[MELD: GPL-3.0 License](https://github.com/declare-lab/MELD/blob/master/LICENSE)</sub> | +| <sub>[ShEMO](https://github.com/mansourehk/ShEMO)</sub> | <sub>2019</sub> | <sub>3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers.</sub> | <sub>6 emotions: anger, fear, happiness, sadness, neutral and surprise.</sub> | <sub>Audio</sub> | <sub>0.101 GB</sub> | <sub>Persian</sub> | <sub>[ShEMO: a large-scale validated database for Persian speech emotion detection](https://link.springer.com/article/10.1007/s10579-018-9427-x)</sub> | <sub>Open</sub> | <sub>--</sub> | +| <sub>[DEMoS](https://zenodo.org/record/2544829)</sub> | <sub>2019</sub> | <sub>9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males).</sub> | <sub>7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt.</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>Italian</sub> | <sub>[DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception](https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D)</sub> | <sub>Restricted</sub> | <sub>EULA: End User License Agreement</sub> | +| <sub>[AESDD](http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/)</sub> | <sub>2018</sub> | <sub>around 500 utterances by a diverse group of actors (over 5 actors) siumlating various emotions.</sub> | <sub>5 emotions: anger, disgust, fear, happiness, and sadness.</sub> | <sub>Audio</sub> | <sub>0.392 GB</sub> | <sub>Greek</sub> | <sub>[Speech Emotion Recognition for Performance Interaction](https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction)</sub> | <sub>Open</sub> | <sub>--</sub> | +| <sub>[Emov-DB](https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K)</sub> | <sub>2018</sub> | <sub>Recordings for 4 speakers- 2 males and 2 females.</sub> | <sub>The emotional styles are neutral, sleepiness, anger, disgust and amused.</sub> | <sub>Audio</sub> | <sub>5.88 GB</sub> | <sub>English</sub> | <sub>[The emotional voices database: Towards controlling the emotion dimension in voice generation systems](https://arxiv.org/pdf/1806.09514.pdf)</sub> | <sub>Open</sub> | <sub>--</sub> | +| <sub>[RAVDESS](https://zenodo.org/record/1188976#.XrC7a5NKjOR)</sub> | <sub>2018</sub> | <sub>7356 recordings by 24 actors.</sub> | <sub>7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust</sub> | <sub>Audio, Video</sub> | <sub>24.8 GB</sub> | <sub>English</sub> | <sub>[The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391)</sub> | <sub>Open</sub> | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub> | +| <sub>[JL corpus](https://www.kaggle.com/tli725/jl-corpus)</sub> | <sub>2018</sub> | <sub>2400 recording of 240 sentences by 4 actors (2 males and 2 females).</sub> | <sub>5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[An Open Source Emotional Speech Corpus for Human Robot Interaction Applications](https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf)</sub> | <sub>Open</sub> | <sub>[CC0 1.0](https://creativecommons.org/publicdomain/zero/1.0/)</sub> | +| <sub>[CaFE](https://zenodo.org/record/1478765)</sub> | <sub>2018</sub> | <sub>6 different sentences by 12 speakers (6 fmelaes + 6 males).</sub> | <sub>7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.</sub> | <sub>Audio</sub> | <sub>2 GB</sub> | <sub>French (Canadian)</sub> | <sub>--</sub> | <sub>Open</sub> | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub> | +| <sub>[EmoFilm](https://zenodo.org/record/1326428)</sub> | <sub>2018</sub> | <sub>1115 audio instances sentences extracted from various films.</sub> | <sub>5 emotions: anger, contempt, happiness, fear, and sadness.</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>English, Italian & Spanish</sub> | <sub>[Categorical vs Dimensional Perception of Italian Emotional Speech](https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf)</sub> | <sub>Restricted</sub> | <sub>EULA: End User License Agreement</sub> | +| <sub>[ANAD](https://www.kaggle.com/suso172/arabic-natural-audio-dataset)</sub> | <sub>2018</sub> | <sub>1384 recording by multiple speakers.</sub> | <sub>3 emotions: angry, happy, surprised.</sub> | <sub>Audio</sub> | <sub>2 GB</sub> | <sub>Arabic</sub> | <sub>[Arabic Natural Audio Dataset](https://data.mendeley.com/datasets/xm232yxf7t/1)</sub> | <sub>Open</sub> | <sub>[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)</sub> | +| <sub>[EmoSynth](https://zenodo.org/record/3727593)</sub> | <sub>2018</sub> | <sub>144 audio file labelled by 40 listeners.</sub> | <sub>Emotion (no speech) defined in regard of valence and arousal.</sub> | <sub>Audio</sub> | <sub>0.1034 GB</sub> | <sub>--</sub> | <sub>[The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results](https://dl.acm.org/doi/10.1145/3243274.3243277)</sub> | <sub>Open</sub> | <sub>[CC BY 4.0](https://creativecommons.org/licenses/by/4.0/)</sub> | +| <sub>[CMU-MOSEI](https://www.amir-zadeh.com/datasets)</sub> | <sub>2018</sub> | <sub>65 hours of annotated video from more than 1000 speakers and 250 topics.</sub> | <sub>6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[Multi-attention Recurrent Network for Human Communication Comprehension](https://arxiv.org/pdf/1802.00923.pdf)</sub> | <sub>Open</sub> | <sub>[CMU-MOSEI License](https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt)</sub> | +| <sub>[VERBO](https://sites.google.com/view/verbodatabase/home)</sub> | <sub>2018</sub> | <sub>14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings.</sub> | <sub>7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>Portuguese</sub> | <sub>[VERBO: Voice Emotion Recognition dataBase in Portuguese Language](https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf)</sub> | <sub>Restricted</sub> | <sub>Available for research purposes only</sub> | +| <sub>[CMU-MOSI](https://www.amir-zadeh.com/datasets)</sub> | <sub>2017</sub> | <sub>2199 opinion utterances with annotated sentiment.</sub> | <sub>Sentiment annotated between very negative to very positive in seven Likert steps.</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[Multi-attention Recurrent Network for Human Communication Comprehension](https://arxiv.org/pdf/1802.00923.pdf)</sub> | <sub>Open</sub> | <sub>[CMU-MOSI License](https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt)</sub> | +| <sub>[MSP-IMPROV](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html)</sub> | <sub>2017</sub> | <sub>20 sentences by 12 actors.</sub> | <sub>4 emotions: angry, sad, happy, neutral, other, without agreement</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception](https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf)</sub> | <sub>Restricted</sub> | <sub>Academic License & Commercial License</sub> | +| <sub>[CREMA-D](https://github.com/CheyneyComputerScience/CREMA-D)</sub> | <sub>2017</sub> | <sub>7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).</sub> | <sub>6 emotions: angry, disgusted, fearful, happy, neutral, and sad</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/)</sub> | <sub>Open</sub> | <sub>[Open Database License & Database Content License](https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt)</sub> | +| <sub>[Example emotion videos used in investigation of emotion perception in schizophrenia](https://espace.library.uq.edu.au/view/UQ:446541)</sub> | <sub>2017</sub> | <sub>6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.</sub> | <sub>3 emotions: angry, happy and neutral.</sub> | <sub>Audio, Video</sub> | <sub>0.063 GB</sub> | <sub>English</sub> | <sub>--</sub> | <sub>Open</sub> | <sub>[Permitted Non-commercial Re-use with Acknowledgment](https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions)</sub> | +| <sub>[EMOVO](http://voice.fub.it/activities/corpora/emovo/index.html)</sub> | <sub>2014</sub> | <sub>6 actors who played 14 sentences.</sub> | <sub>6 emotions: disgust, fear, anger, joy, surprise, sadness.</sub> | <sub>Audio</sub> | <sub>0.355 GB</sub> | <sub>Italian</sub> | <sub>[EMOVO Corpus: an Italian Emotional Speech Database](https://core.ac.uk/download/pdf/53857389.pdf)</sub> | <sub>Open</sub> | <sub>--</sub> | +| <sub>[RECOLA](https://diuf.unifr.ch/main/diva/recola/download.html)</sub> | <sub>2013</sub> | <sub>3.8 hours of recordings by 46 participants.</sub> | <sub>negative and positive sentiment (valence and arousal).</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>--</sub> | <sub>[Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions](https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view)</sub> | <sub>Restricted</sub> | <sub>Academic License & Commercial License</sub> | +| <sub>[GEMEP corpus](https://www.unige.ch/cisa/gemep)</sub> | <sub>2012</sub> | <sub>Videos10 actors portraying 10 states.</sub> | <sub>12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage), fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>French</sub> | <sub>[Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception](https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception)</sub> | <sub>Restricted</sub> | <sub>--</sub> | +| <sub>[OGVC](https://sites.google.com/site/ogcorpus/home/en)</sub> | <sub>2012</sub> | <sub>9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).</sub> | <sub>9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>Japanese</sub> | <sub>[Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment](https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf)</sub> | <sub>Restricted</sub> | <sub>--</sub> | +| <sub>[LEGO corpus](https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/)</sub> | <sub>2012</sub> | <sub>347 dialogs with 9,083 system-user exchanges.</sub> | <sub>Emotions classified as garbage, non-angry, slightly angry and very angry.</sub> | <sub>Audio</sub> | <sub>1.1 GB</sub> | <sub>--</sub> | <sub>[A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System](http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf)</sub> | <sub>Open</sub> | <sub>License available with the data. Free of charges for research purposes only.</sub> | +| <sub>[SEMAINE](https://semaine-db.eu/)</sub> | <sub>2012</sub> | <sub>95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.</sub> | <sub>5 FeelTrace annotations: activation, valence, dominance, power, intensity</sub> | <sub>Audio, Video, Text</sub> | <sub>104 GB</sub> | <sub>English</sub> | <sub>[The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent](https://ieeexplore.ieee.org/document/5959155)</sub> | <sub>Restricted</sub> | <sub>Academic EULA</sub> | +| <sub>[SAVEE](http://kahlan.eps.surrey.ac.uk/savee/Database.html)</sub> | <sub>2011</sub> | <sub>480 British English utterances by 4 males actors.</sub> | <sub>7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.</sub> | <sub>Audio, Video</sub> | <sub>--</sub> | <sub>English (British)</sub> | <sub>[Multimodal Emotion Recognition](http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf)</sub> | <sub>Restricted</sub> | <sub>Free of charges for research purposes only.</sub> | +| <sub>[TESS](https://tspace.library.utoronto.ca/handle/1807/24487)</sub> | <sub>2010</sub> | <sub>2800 recording by 2 actresses.</sub> | <sub>7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET](https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2)</sub> | <sub>Open</sub> | <sub>[CC BY-NC-ND 4.0](https://creativecommons.org/licenses/by-nc-nd/4.0/)</sub> | +| <sub>[EEKK](https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/)</sub> | <sub>2007</sub> | <sub>26 text passage read by 10 speakers.</sub> | <sub>4 main emotions: joy, sadness, anger and neutral.</sub> | <sub>--</sub> | <sub>0.352 GB</sub> | <sub>Estonian</sub> | <sub>[Estonian Emotional Speech Corpus](https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1)</sub> | <sub>Open</sub> | <sub>[CC-BY license](https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/)</sub> | +| <sub>[IEMOCAP](https://sail.usc.edu/iemocap/iemocap_release.htm)</sub> | <sub>2007</sub> | <sub>12 hours of audiovisual data by 10 actors.</sub> | <sub>5 emotions: happiness, anger, sadness, frustration and neutral.</sub> | <sub>--</sub> | <sub>--</sub> | <sub>English</sub> | <sub>[IEMOCAP: Interactive emotional dyadic motion capture database](https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf)</sub> | <sub>Restricted</sub> | <sub>[IEMOCAP license](https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf)</sub> | +| <sub>[Keio-ESD](http://research.nii.ac.jp/src/en/Keio-ESD.html)</sub> | <sub>2006</sub> | <sub>A set of human speech with vocal emotion spoken by a Japanese male speaker.</sub> | <sub>47 emotions including angry, joyful, disgusting, downgrading, funny, worried, gentle, relief, indignation, shameful, etc.</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>Japanese</sub> | <sub>[EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf)</sub> | <sub>Restricted</sub> | <sub>Available for research purposes only.</sub> | +| <sub>[EMO-DB](http://emodb.bilderbar.info/index-1280.html)</sub> | <sub>2005</sub> | <sub>800 recording spoken by 10 actors (5 males and 5 females).</sub> | <sub>7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.</sub> | <sub>Audio</sub> | <sub>--</sub> | <sub>German</sub> | <sub>[A Database of German Emotional Speech](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf)</sub> | <sub>Open</sub> | <sub>--</sub> | +| <sub>[eNTERFACE05](http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip)</sub> | <sub>2005</sub> | <sub>Videos by 42 subjects, coming from 14 different nationalities.</sub> | <sub>6 emotions: anger, fear, surprise, happiness, sadness and disgust.</sub> | <sub>Audio, Video</sub> | <sub>0.8 GB</sub> | <sub>German</sub> | <sub>--</sub> | <sub>Open</sub> | <sub>Free of charges for research purposes only.</sub> | +| <sub>[DES](http://kom.aau.dk/~tb/speech/Emotions/)</sub> | <sub>2002</sub> | <sub>4 speakers (2 males and 2 females).</sub> | <sub>5 emotions: neutral, surprise, happiness, sadness and anger</sub> | <sub>--</sub> | <sub>--</sub> | <sub>Danish</sub> | <sub>[Documentation of the Danish Emotional Speech Database](http://kom.aau.dk/~tb/speech/Emotions/des.pdf)</sub> | <sub>--</sub> | <sub>--</sub> |## References - Swain, Monorama & Routray, Aurobinda & Kabisatpathy, Prithviraj, Databases, features and classifiers for speech emotion recognition: a review, International Journal of Speech Technology, [paper](https://www.researchgate.net/publication/322602563_Databases_features_and_classifiers_for_speech_emotion_recognition_a_review#pf19) - Dimitrios Ververidis and Constantine Kotropoulos, A State of the Art Review on Emotional Speech Databases, Artificial Intelligence & Information Analysis Laboratory, Department of Informatics Aristotle, University of Thessaloniki, [paper](http://poseidon.csd.auth.gr/papers/PUBLISHED/CONFERENCE/pdf/Ververidis2003b.pdf) @@ -67,106 +62,4 @@ The table can be browsed, sorted and searched under https://superkogito.github.i ## Disclaimer - - The mainter and the contributors try their best to keep this list up-to-date, and to only include working links (using automated verification with the help of the [urlchecker-action](https://github.com/marketplace/actions/urlchecker-action)). However, we cannot guarantee that all listed links are up-to-date. Read more in [DISCLAIMER.md](https://github.com/SuperKogito/SER-datasets/blob/master/DISCLAIMER.md). - - - - -[//]: # (datasets) - -[MESD]: https://data.mendeley.com/datasets/cy34mh68j9/5 -[MLEnd]: https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals -[ASVP-ESD]: https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances -[ESD]: https://hltsingapore.github.io/ESD/ -[MuSe-CAR]: https://zenodo.org/record/4134758 -[MSP-Podcast corpus]: https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html -[emotiontts open db]: https://github.com/emotiontts/emotiontts_open_db -[URDU-Dataset]: https://github.com/siddiquelatif/urdu-dataset -[BAVED]: https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset -[VIVAE]: https://zenodo.org/record/4066235 -[SEWA]: https://db.sewaproject.eu/ -[MELD]: https://affective-meld.github.io/ -[ShEMO]: https://github.com/mansourehk/ShEMO -[DEMoS]: https://zenodo.org/record/2544829 -[VERBO]:https://sites.google.com/view/verbodatabase/home -[AESDD]: http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/ -[Emov-DB]: https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K -[RAVDESS]: https://zenodo.org/record/1188976#.XrC7a5NKjOR -[JL corpus]: https://www.kaggle.com/tli725/jl-corpus -[CaFE]: https://zenodo.org/record/1478765 -[EmoFilm]: https://zenodo.org/record/1326428 -[ANAD]: https://www.kaggle.com/suso172/arabic-natural-audio-dataset -[EmoSynth]: https://zenodo.org/record/3727593 -[CMU-MOSEI]: https://www.amir-zadeh.com/datasets -[CMU-MOSI]: https://www.amir-zadeh.com/datasets -[MSP-IMPROV]: https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html -[CREMA-D]: https://github.com/CheyneyComputerScience/CREMA-D -[Example emotion videos used in investigation of emotion perception in schizophrenia]: https://espace.library.uq.edu.au/view/UQ:446541 -[EMOVO]: http://voice.fub.it/activities/corpora/emovo/index.html -[RECOLA]: https://diuf.unifr.ch/main/diva/recola/download.html -[GEMEP corpus]: https://www.unige.ch/cisa/gemep -[OGVC]: https://sites.google.com/site/ogcorpus/home/en -[LEGO corpus]: https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/ -[SEMAINE]: https://semaine-db.eu/ -[SAVEE]: http://kahlan.eps.surrey.ac.uk/savee/Database.html -[TESS]: https://tspace.library.utoronto.ca/handle/1807/24487 -[EEKK]: https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/ -[IEMOCAP]: https://sail.usc.edu/iemocap/iemocap_release.htm -[Keio-ESD]: http://research.nii.ac.jp/src/en/Keio-ESD.html -[EMO-DB]: http://emodb.bilderbar.info/index-1280.html -[eNTERFACE05]: http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip -[DES]: http://kom.aau.dk/~tb/speech/Emotions/ - -[//]: # (license) - -[CC BY 4.0]: https://creativecommons.org/licenses/by/4.0/ -[CC BY-NC-SA 4.0]: https://creativecommons.org/licenses/by-nc-sa/4.0/ -[CC BY-NC-ND 4.0]: https://creativecommons.org/licenses/by-nc-nd/4.0/ -[CC-BY license]: https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/ -[Permitted Non-commercial Re-use with Acknowledgment]: https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions -[Open Database License & Database Content License]: https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt -[CC0 1.0]: https://creativecommons.org/publicdomain/zero/1.0/ -[CMU-MOSEI License]: https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt -[CMU-MOSI License]: https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt -[IEMOCAP license]: https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf -[SEWA EULA]: https://db.sewaproject.eu/media/doc/eula.pdf -[Meld: GPL-3.0 License]: https://github.com/declare-lab/MELD/blob/master/LICENSE - -[//]: # (papers) - -[The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning]: https://pubmed.ncbi.nlm.nih.gov/34891601/ -[Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset]: https://arxiv.org/pdf/2010.14794.pdf -[The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements]: https://arxiv.org/pdf/2101.06053.pdf -[The MSP-Conversation Corpus]: http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684 -[Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages]: https://arxiv.org/pdf/1812.10411.pdf -[Estonian Emotional Speech Corpus]: https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1 -[IEMOCAP: Interactive emotional dyadic motion capture database]: https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf -[A Database of German Emotional Speech]: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf -[SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild]: https://arxiv.org/pdf/1901.02839.pdf -[Documentation of the Danish Emotional Speech Database]: http://kom.aau.dk/~tb/speech/Emotions/des.pdf -[EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY]: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf -[Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment]: https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf -[EMOVO Corpus: an Italian Emotional Speech Database]: https://core.ac.uk/download/pdf/53857389.pdf -[The eNTERFACE’05 Audio-Visual Emotion Database]: http://poseidon.csd.auth.gr/papers/PUBLISHED/CONFERENCE/pdf/Martin06a.pdf -[Arabic Natural Audio Dataset]: https://data.mendeley.com/datasets/xm232yxf7t/1 -[Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception]: https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception -[Speech Emotion Recognition for Performance Interaction]: https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction -[MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations]: https://arxiv.org/pdf/1810.02508.pdf -[BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET]: https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2 -[CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset]: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/ -[DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception]: https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D -[VERBO: Voice Emotion Recognition dataBase in Portuguese Language]: https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf -[A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System]: http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf -[Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions]: https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view -[Multimodal Emotion Recognition]: http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf -[The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results]: https://dl.acm.org/doi/10.1145/3243274.3243277 -[MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception]: https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf -[Multi-attention Recurrent Network for Human Communication Comprehension]: https://arxiv.org/pdf/1802.00923.pdf -[Categorical vs Dimensional Perception of Italian Emotional Speech]: https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf -[Multi-attention Recurrent Network for Human Communication Comprehension]: https://arxiv.org/pdf/1802.00923.pdf -[ShEMO: a large-scale validated database for Persian speech emotion detection]: https://link.springer.com/article/10.1007/s10579-018-9427-x -[The emotional voices database: Towards controlling the emotion dimension in voice generation systems]: https://arxiv.org/pdf/1806.09514.pdf -[The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English]: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391 -[An Open Source Emotional Speech Corpus for Human Robot Interaction Applications]: https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf -[The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent]: https://ieeexplore.ieee.org/document/5959155 diff --git a/src/index.rst b/src/index.rst index 21715af..8aae0cd 100644 --- a/src/index.rst +++ b/src/index.rst @@ -40,103 +40,4 @@ Disclaimer =========== The maintainer and the contributors try their best to keep this list up-to-date, and to only include working links (using automated verification with the help of the `urlchecker-action <https://github.com/marketplace/actions/urlchecker-action>`_). -However, we cannot guarantee that all listed links are up-to-date. Read more in `DISCLAIMER.md <https://github.com/SuperKogito/SER-datasets/blob/master/DISCLAIMER.md>`_. - - -.. datasets - -.. _`MESD`: https://data.mendeley.com/datasets/cy34mh68j9/5 -.. _`MLEnd`: https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals -.. _`ASVP-ESD`: https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances -.. _`ESD`: https://hltsingapore.github.io/ESD/ -.. _`MuSe-CAR`: https://zenodo.org/record/4134758 -.. _`MSP-Podcast corpus`: https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html -.. _`emotiontts open db`: https://github.com/emotiontts/emotiontts_open_db -.. _`URDU-Dataset`: https://github.com/siddiquelatif/urdu-dataset -.. _`BAVED`: https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset -.. _`VIVAE`: https://zenodo.org/record/4066235 -.. _`SEWA`: https://db.sewaproject.eu/ -.. _`MELD`: https://affective-meld.github.io/ -.. _`ShEMO`: https://github.com/mansourehk/ShEMO -.. _`DEMoS`: https://zenodo.org/record/2544829 -.. _`VERBO`: https://sites.google.com/view/verbodatabase/home -.. _`AESDD`: http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/ -.. _`Emov-DB`: https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K -.. _`RAVDESS`: https://zenodo.org/record/1188976#.XrC7a5NKjOR -.. _`JL corpus`: https://www.kaggle.com/tli725/jl-corpus -.. _`CaFE`: https://zenodo.org/record/1478765 -.. _`EmoFilm`: https://zenodo.org/record/1326428 -.. _`ANAD`: https://www.kaggle.com/suso172/arabic-natural-audio-dataset -.. _`EmoSynth`: https://zenodo.org/record/3727593 -.. _`CMU-MOSEI`: https://www.amir-zadeh.com/datasets -.. _`CMU-MOSI`: https://www.amir-zadeh.com/datasets -.. _`MSP-IMPROV`: https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html -.. _`CREMA-D`: https://github.com/CheyneyComputerScience/CREMA-D -.. _`Example emotion videos used in investigation of emotion perception in schizophrenia`: https://espace.library.uq.edu.au/view/UQ:446541 -.. _`EMOVO`: http://voice.fub.it/activities/corpora/emovo/index.html -.. _`RECOLA`: https://diuf.unifr.ch/main/diva/recola/download.html -.. _`GEMEP corpus`: https://www.unige.ch/cisa/gemep -.. _`OGVC`: https://sites.google.com/site/ogcorpus/home/en -.. _`LEGO corpus`: https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/ -.. _`SEMAINE`: https://semaine-db.eu/ -.. _`SAVEE`: http://kahlan.eps.surrey.ac.uk/savee/Database.html -.. _`TESS`: https://tspace.library.utoronto.ca/handle/1807/24487 -.. _`EEKK`: https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/ -.. _`IEMOCAP`: https://sail.usc.edu/iemocap/iemocap_release.htm -.. _`Keio-ESD`: http://research.nii.ac.jp/src/en/Keio-ESD.html -.. _`EMO-DB`: http://emodb.bilderbar.info/index-1280.html -.. _`eNTERFACE05`: http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip -.. _`DES`: http://kom.aau.dk/~tb/speech/Emotions/ - -.. license - -.. _`CC BY 4.0`: https://creativecommons.org/licenses/by/4.0/ -.. _`CC BY-NC-SA 4.0`: https://creativecommons.org/licenses/by-nc-sa/4.0/ -.. _`CC BY-NC-ND 4.0`: https://creativecommons.org/licenses/by-nc-nd/4.0/ -.. _`CC-BY license`: https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/ -.. _`Permitted Non-commercial Re-use with Acknowledgment`: https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions -.. _`Open Database License & Database Content License`: https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt -.. _`CC0 1.0`: https://creativecommons.org/publicdomain/zero/1.0/ -.. _`CMU-MOSEI License`: https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt -.. _`CMU-MOSI License`: https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt -.. _`IEMOCAP license`: https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf -.. _`SEWA EULA`: https://db.sewaproject.eu/media/doc/eula.pdf -.. _`Meld: GPL-3.0 License`: https://github.com/declare-lab/MELD/blob/master/LICENSE - -.. papers - -.. _`The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning`: https://pubmed.ncbi.nlm.nih.gov/34891601/ -.. _`Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset`: https://arxiv.org/pdf/2010.14794.pdf -.. _`The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements`: https://arxiv.org/pdf/2101.06053.pdf -.. _`The MSP-Conversation Corpus`: http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684 -.. _`Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages`: https://arxiv.org/pdf/1812.10411.pdf -.. _`Estonian Emotional Speech Corpus`: https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1 -.. _`IEMOCAP: Interactive emotional dyadic motion capture database`: https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf -.. _`A Database of German Emotional Speech`: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf -.. _`SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild`: https://arxiv.org/pdf/1901.02839.pdf -.. _`Documentation of the Danish Emotional Speech Database`: http://kom.aau.dk/~tb/speech/Emotions/des.pdf -.. _`EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY`: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf -.. _`Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment`: https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf -.. _`EMOVO Corpus: an Italian Emotional Speech Database`: https://core.ac.uk/download/pdf/53857389.pdf -.. _`VERBO: Voice Emotion Recognition dataBase in Portuguese Language`: https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf -.. _`The eNTERFACE’05 Audio-Visual Emotion Database`: http://poseidon.csd.auth.gr/papers/PUBLISHED/CONFERENCE/pdf/Martin06a.pdf -.. _`Arabic Natural Audio Dataset`: https://data.mendeley.com/datasets/xm232yxf7t/1 -.. _`Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception`: https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception -.. _`Speech Emotion Recognition for Performance Interaction`: https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction -.. _`MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations`: https://arxiv.org/pdf/1810.02508.pdf -.. _`BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET`: https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2 -.. _`CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset`: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/ -.. _`DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception`: https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D -.. _`A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System`: http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf -.. _`Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions`: https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view -.. _`Multimodal Emotion Recognition`: http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf -.. _`The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results`: https://dl.acm.org/doi/10.1145/3243274.3243277 -.. _`MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception`: https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf -.. _`Multi-attention Recurrent Network for Human Communication Comprehension`: https://arxiv.org/pdf/1802.00923.pdf -.. _`Categorical vs Dimensional Perception of Italian Emotional Speech`: https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf -.. _`Multi-attention Recurrent Network for Human Communication Comprehension`: https://arxiv.org/pdf/1802.00923.pdf -.. _`ShEMO: a large-scale validated database for Persian speech emotion detection`: https://link.springer.com/article/10.1007/s10579-018-9427-x -.. _`The emotional voices database: Towards controlling the emotion dimension in voice generation systems`: https://arxiv.org/pdf/1806.09514.pdf -.. _`The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English`: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391 -.. _`An Open Source Emotional Speech Corpus for Human Robot Interaction Applications`: https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf -.. _`The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent`: https://ieeexplore.ieee.org/document/5959155 +However, we cannot guarantee that all listed links are up-to-date. Read more in `DISCLAIMER.md <https://github.com/SuperKogito/SER-datasets/blob/master/DISCLAIMER.md>`_. \ No newline at end of file diff --git a/src/ser-datasets.csv b/src/ser-datasets.csv index f5e57ab..fd7c32f 100644 --- a/src/ser-datasets.csv +++ b/src/ser-datasets.csv @@ -1,43 +1,43 @@ -Dataset,Year,Content,Emotions,Format,Size,Language,Paper,Access,License -`MESD`_,2022,864 audio files of single-word emotional utterances with Mexican cultural shaping.,"6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.",Audio,"0,097 GB",Spanish (Mexican),`The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning`_,Open,`CC BY 4.0`_ -`MLEnd`_,2021,"~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from ""zero"" to ""billion"")","Intonations: neutral, bored, excited and question",Audio,2.27 GB,--,--,Open,Unknown -`ASVP-ESD`_,2021,"~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.","12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity.",Audio,2 GB,"Chinese, English, French, Russian and others",--,Open,Unknown -`ESD`_,2021,"29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.","5 emotions: angry, happy, neutral, sad, and surprise.","Audio, Text",2.4 GB (zip),"Chinese, English",`Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset`_,Open,Academic License -`MuSe-CAR`_,2021,"40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).","continuous emotion dimensions characterized using valence, arousal, and trustworthiness.","Audio, Video, Text",15 GB,English,"`The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements`_",Restricted,Academic License & Commercial License -`MSP-Podcast corpus`_,2020,100 hours by over 100 speakers (see db link for details).,"This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other).",Audio,--,--,`The MSP-Conversation Corpus`_,Restricted,Academic License & Commercial License -`emotiontts open db`_,2020,Recordings and their associated transcriptions by a diverse group of speakers.,"4 emotions: general, joy, anger, and sadness.","Audio, Text",--,Korean,--,Partial Open,`CC BY-NC-SA 4.0`_ -`URDU-Dataset`_,2020,400 utterances by 38 speakers (27 male and 11 female).,"4 emotions: angry, happy, neutral, and sad.",Audio,0.072 GB,Urdu,`Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages`_,Open,-- -`BAVED`_,2020,1935 recording by 61 speakers (45 male and 16 female).,3 levels of emotion.,Audio,0.195 GB,Arabic,--,Open,-- -`VIVAE`_,2020,"non-speech, 1085 audio file by 12 speakers.","non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).",Audio,--,--,--,Restricted,`CC BY-NC-SA 4.0`_ -`SEWA`_,2019,more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.,emotions are characterized using valence and arousal.,"Audio, Video",--,"Chinese, English, German, Greek, Hungarian and Serbian",`SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild`_,Restricted,`SEWA EULA`_ -`MELD`_,2019,1400 dialogues and 14000 utterances from Friends TV series by multiple speakers.,"7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear. MELD also has sentiment (positive, negative and neutral) annotation for each utterance.","Audio, Video, Text",10.1 GB,English,`MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations`_,Open,`MELD: GPL-3.0 License`_ -`ShEMO`_,2019,"3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers.","6 emotions: anger, fear, happiness, sadness, neutral and surprise.",Audio,0.101 GB,Persian,`ShEMO: a large-scale validated database for Persian speech emotion detection`_,Open,-- -`DEMoS`_,2019,"9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males).","7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt.",Audio,--,Italian,"`DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception`_",Restricted,EULA: End User License Agreement -`AESDD`_,2018,around 500 utterances by a diverse group of actors (over 5 actors) siumlating various emotions.,"5 emotions: anger, disgust, fear, happiness, and sadness.",Audio,0.392 GB,Greek,`Speech Emotion Recognition for Performance Interaction`_,Open,-- -`Emov-DB`_,2018,Recordings for 4 speakers- 2 males and 2 females.,"The emotional styles are neutral, sleepiness, anger, disgust and amused.",Audio,5.88 GB,English,`The emotional voices database: Towards controlling the emotion dimension in voice generation systems`_,Open,-- -`RAVDESS`_,2018,7356 recordings by 24 actors.,"7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust","Audio, Video",24.8 GB,English,"`The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English`_",Open,`CC BY-NC-SA 4.0`_ -`JL corpus`_,2018,2400 recording of 240 sentences by 4 actors (2 males and 2 females).,"5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.",Audio,--,English,`An Open Source Emotional Speech Corpus for Human Robot Interaction Applications`_,Open,`CC0 1.0`_ -`CaFE`_,2018,6 different sentences by 12 speakers (6 fmelaes + 6 males).,"7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.",Audio,2 GB,French (Canadian),--,Open,`CC BY-NC-SA 4.0`_ -`EmoFilm`_,2018,1115 audio instances sentences extracted from various films.,"5 emotions: anger, contempt, happiness, fear, and sadness.",Audio,--,"English, Italian & Spanish",`Categorical vs Dimensional Perception of Italian Emotional Speech`_,Restricted,EULA: End User License Agreement -`ANAD`_,2018,1384 recording by multiple speakers.,"3 emotions: angry, happy, surprised.",Audio,2 GB,Arabic,`Arabic Natural Audio Dataset`_,Open,`CC BY-NC-SA 4.0`_ -`EmoSynth`_,2018,144 audio file labelled by 40 listeners.,Emotion (no speech) defined in regard of valence and arousal.,Audio,0.1034 GB,--,`The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results`_,Open,`CC BY 4.0`_ -`CMU-MOSEI`_,2018,65 hours of annotated video from more than 1000 speakers and 250 topics.,"6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.","Audio, Video",--,English,`Multi-attention Recurrent Network for Human Communication Comprehension`_,Open,`CMU-MOSEI License`_ -`VERBO`_,2018,14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings.,"7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness",Audio,--,Portuguese,`VERBO: Voice Emotion Recognition dataBase in Portuguese Language`_,Restricted,Available for research purposes only -`CMU-MOSI`_,2017,2199 opinion utterances with annotated sentiment.,Sentiment annotated between very negative to very positive in seven Likert steps.,"Audio, Video",--,English,`Multi-attention Recurrent Network for Human Communication Comprehension`_,Open,`CMU-MOSI License`_ -`MSP-IMPROV`_,2017,20 sentences by 12 actors.,"4 emotions: angry, sad, happy, neutral, other, without agreement","Audio, Video",--,English,`MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception`_,Restricted,Academic License & Commercial License -`CREMA-D`_,2017,7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).,"6 emotions: angry, disgusted, fearful, happy, neutral, and sad","Audio, Video",--,English,`CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset`_,Open,`Open Database License & Database Content License`_ -`Example emotion videos used in investigation of emotion perception in schizophrenia`_,2017,"6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.","3 emotions: angry, happy and neutral.","Audio, Video",0.063 GB,English,--,Open,`Permitted Non-commercial Re-use with Acknowledgment`_ -`EMOVO`_,2014,6 actors who played 14 sentences.,"6 emotions: disgust, fear, anger, joy, surprise, sadness.",Audio,0.355 GB,Italian,`EMOVO Corpus: an Italian Emotional Speech Database`_,Open,-- -`RECOLA`_,2013,3.8 hours of recordings by 46 participants.,negative and positive sentiment (valence and arousal).,"Audio, Video",--,--,`Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions`_,Restricted,Academic License & Commercial License -`GEMEP corpus`_,2012,Videos10 actors portraying 10 states.,"12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage), fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.","Audio, Video",--,French,`Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception`_,Restricted,-- -`OGVC`_,2012,9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).,"9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.",Audio,--,Japanese,`Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment`_,Restricted,-- -`LEGO corpus`_,2012,"347 dialogs with 9,083 system-user exchanges.","Emotions classified as garbage, non-angry, slightly angry and very angry.",Audio,1.1 GB,--,`A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System`_,Open,License available with the data. Free of charges for research purposes only. -`SEMAINE`_,2012,95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.,"5 FeelTrace annotations: activation, valence, dominance, power, intensity","Audio, Video, Text",104 GB,English,`The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent`_,Restricted,Academic EULA -`SAVEE`_,2011,480 British English utterances by 4 males actors.,"7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.","Audio, Video",--,English (British),`Multimodal Emotion Recognition`_,Restricted,Free of charges for research purposes only. -`TESS`_,2010,2800 recording by 2 actresses.,"7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.",Audio,--,English,`BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET`_,Open,`CC BY-NC-ND 4.0`_ -`EEKK`_,2007,26 text passage read by 10 speakers.,"4 main emotions: joy, sadness, anger and neutral.",--,0.352 GB,Estonian,`Estonian Emotional Speech Corpus`_,Open,`CC-BY license`_ -`IEMOCAP`_,2007,12 hours of audiovisual data by 10 actors.,"5 emotions: happiness, anger, sadness, frustration and neutral.",--,--,English,`IEMOCAP: Interactive emotional dyadic motion capture database`_,Restricted,`IEMOCAP license`_ -`Keio-ESD`_,2006,A set of human speech with vocal emotion spoken by a Japanese male speaker.,"47 emotions including angry, joyful, disgusting, downgrading, funny, worried, gentle, relief, indignation, shameful, etc.",Audio,--,Japanese,`EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY`_,Restricted,Available for research purposes only -`EMO-DB`_,2005,800 recording spoken by 10 actors (5 males and 5 females).,"7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.",Audio,--,German,`A Database of German Emotional Speech`_,Open,-- -`eNTERFACE05`_,2005,"Videos by 42 subjects, coming from 14 different nationalities.","6 emotions: anger, fear, surprise, happiness, sadness and disgust.","Audio, Video",0.8 GB,German,,Open,Free of charges for research purposes only -`DES`_,2002,4 speakers (2 males and 2 females).,"5 emotions: neutral, surprise, happiness, sadness and anger",--,--,Danish,`Documentation of the Danish Emotional Speech Database`_,, +Dataset,Year,Content,Emotions,Format,Size,Language,Paper,Access,License +`MESD <https://data.mendeley.com/datasets/cy34mh68j9/5>`_,2022,864 audio files of single-word emotional utterances with Mexican cultural shaping.,"6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.",Audio,"0,097 GB",Spanish (Mexican),`The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning <https://pubmed.ncbi.nlm.nih.gov/34891601/>`_,Open,`CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_ +`MLEnd <https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals>`_,2021,"~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from ""zero"" to ""billion"")","Intonations: neutral, bored, excited and question",Audio,2.27 GB,--,--,Open,Unknown +`ASVP-ESD <https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances>`_,2021,"~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.","12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity.",Audio,2 GB,"Chinese, English, French, Russian and others",--,Open,Unknown +`ESD <https://hltsingapore.github.io/ESD/>`_,2021,"29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.","5 emotions: angry, happy, neutral, sad, and surprise.","Audio, Text",2.4 GB (zip),"Chinese, English",`Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset <https://arxiv.org/pdf/2010.14794.pdf>`_,Open,Academic License +`MuSe-CAR <https://zenodo.org/record/4134758>`_,2021,"40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).","continuous emotion dimensions characterized using valence, arousal, and trustworthiness.","Audio, Video, Text",15 GB,English,"`The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements <https://arxiv.org/pdf/2101.06053.pdf>`_",Restricted,Academic License & Commercial License +`MSP-Podcast corpus <https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html>`_,2020,100 hours by over 100 speakers (see db link for details).,"This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other).",Audio,--,--,`The MSP-Conversation Corpus <http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684>`_,Restricted,Academic License & Commercial License +`emotiontts open db <https://github.com/emotiontts/emotiontts_open_db>`_,2020,Recordings and their associated transcriptions by a diverse group of speakers.,"4 emotions: general, joy, anger, and sadness.","Audio, Text",--,Korean,--,Partially open,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_ +`URDU-Dataset <https://github.com/siddiquelatif/urdu-dataset>`_,2020,400 utterances by 38 speakers (27 male and 11 female).,"4 emotions: angry, happy, neutral, and sad.",Audio,0.072 GB,Urdu,`Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages <https://arxiv.org/pdf/1812.10411.pdf>`_,Open,-- +`BAVED <https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset>`_,2020,1935 recording by 61 speakers (45 male and 16 female).,3 levels of emotion.,Audio,0.195 GB,Arabic,--,Open,-- +`VIVAE <https://zenodo.org/record/4066235>`_,2020,"non-speech, 1085 audio file by 12 speakers.","non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).",Audio,--,--,--,Restricted,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_ +`SEWA <https://db.sewaproject.eu/>`_,2019,more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.,emotions are characterized using valence and arousal.,"Audio, Video",--,"Chinese, English, German, Greek, Hungarian and Serbian",`SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild <https://arxiv.org/pdf/1901.02839.pdf>`_,Restricted,`SEWA EULA <https://db.sewaproject.eu/media/doc/eula.pdf>`_ +`MELD <https://affective-meld.github.io/>`_,2019,1400 dialogues and 14000 utterances from Friends TV series by multiple speakers.,"7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear. MELD also has sentiment (positive, negative and neutral) annotation for each utterance.","Audio, Video, Text",10.1 GB,English,`MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations <https://arxiv.org/pdf/1810.02508.pdf>`_,Open,`MELD: GPL-3.0 License <https://github.com/declare-lab/MELD/blob/master/LICENSE>`_ +`ShEMO <https://github.com/mansourehk/ShEMO>`_,2019,"3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers.","6 emotions: anger, fear, happiness, sadness, neutral and surprise.",Audio,0.101 GB,Persian,`ShEMO: a large-scale validated database for Persian speech emotion detection <https://link.springer.com/article/10.1007/s10579-018-9427-x>`_,Open,-- +`DEMoS <https://zenodo.org/record/2544829>`_,2019,"9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males).","7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt.",Audio,--,Italian,"`DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception <https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D>`_",Restricted,EULA: End User License Agreement +`AESDD <http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/>`_,2018,around 500 utterances by a diverse group of actors (over 5 actors) siumlating various emotions.,"5 emotions: anger, disgust, fear, happiness, and sadness.",Audio,0.392 GB,Greek,`Speech Emotion Recognition for Performance Interaction <https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction>`_,Open,-- +`Emov-DB <https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K>`_,2018,Recordings for 4 speakers- 2 males and 2 females.,"The emotional styles are neutral, sleepiness, anger, disgust and amused.",Audio,5.88 GB,English,`The emotional voices database: Towards controlling the emotion dimension in voice generation systems <https://arxiv.org/pdf/1806.09514.pdf>`_,Open,-- +`RAVDESS <https://zenodo.org/record/1188976#.XrC7a5NKjOR>`_,2018,7356 recordings by 24 actors.,"7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust","Audio, Video",24.8 GB,English,"`The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English <https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391>`_",Open,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_ +`JL corpus <https://www.kaggle.com/tli725/jl-corpus>`_,2018,2400 recording of 240 sentences by 4 actors (2 males and 2 females).,"5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.",Audio,--,English,`An Open Source Emotional Speech Corpus for Human Robot Interaction Applications <https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf>`_,Open,`CC0 1.0 <https://creativecommons.org/publicdomain/zero/1.0/>`_ +`CaFE <https://zenodo.org/record/1478765>`_,2018,6 different sentences by 12 speakers (6 fmelaes + 6 males).,"7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.",Audio,2 GB,French (Canadian),--,Open,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_ +`EmoFilm <https://zenodo.org/record/1326428>`_,2018,1115 audio instances sentences extracted from various films.,"5 emotions: anger, contempt, happiness, fear, and sadness.",Audio,--,"English, Italian & Spanish",`Categorical vs Dimensional Perception of Italian Emotional Speech <https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf>`_,Restricted,EULA: End User License Agreement +`ANAD <https://www.kaggle.com/suso172/arabic-natural-audio-dataset>`_,2018,1384 recording by multiple speakers.,"3 emotions: angry, happy, surprised.",Audio,2 GB,Arabic,`Arabic Natural Audio Dataset <https://data.mendeley.com/datasets/xm232yxf7t/1>`_,Open,`CC BY-NC-SA 4.0 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_ +`EmoSynth <https://zenodo.org/record/3727593>`_,2018,144 audio file labelled by 40 listeners.,Emotion (no speech) defined in regard of valence and arousal.,Audio,0.1034 GB,--,`The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results <https://dl.acm.org/doi/10.1145/3243274.3243277>`_,Open,`CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_ +`CMU-MOSEI <https://www.amir-zadeh.com/datasets>`_,2018,65 hours of annotated video from more than 1000 speakers and 250 topics.,"6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.","Audio, Video",--,English,`Multi-attention Recurrent Network for Human Communication Comprehension <https://arxiv.org/pdf/1802.00923.pdf>`_,Open,`CMU-MOSEI License <https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt>`_ +`VERBO <https://sites.google.com/view/verbodatabase/home>`_,2018,14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings.,"7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness",Audio,--,Portuguese,`VERBO: Voice Emotion Recognition dataBase in Portuguese Language <https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf>`_,Restricted,Available for research purposes only +`CMU-MOSI <https://www.amir-zadeh.com/datasets>`_,2017,2199 opinion utterances with annotated sentiment.,Sentiment annotated between very negative to very positive in seven Likert steps.,"Audio, Video",--,English,`Multi-attention Recurrent Network for Human Communication Comprehension <https://arxiv.org/pdf/1802.00923.pdf>`_,Open,`CMU-MOSI License <https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt>`_ +`MSP-IMPROV <https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html>`_,2017,20 sentences by 12 actors.,"4 emotions: angry, sad, happy, neutral, other, without agreement","Audio, Video",--,English,`MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception <https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf>`_,Restricted,Academic License & Commercial License +`CREMA-D <https://github.com/CheyneyComputerScience/CREMA-D>`_,2017,7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).,"6 emotions: angry, disgusted, fearful, happy, neutral, and sad","Audio, Video",--,English,`CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/>`_,Open,`Open Database License & Database Content License <https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt>`_ +`Example emotion videos used in investigation of emotion perception in schizophrenia <https://espace.library.uq.edu.au/view/UQ:446541>`_,2017,"6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.","3 emotions: angry, happy and neutral.","Audio, Video",0.063 GB,English,--,Open,`Permitted Non-commercial Re-use with Acknowledgment <https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions>`_ +`EMOVO <http://voice.fub.it/activities/corpora/emovo/index.html>`_,2014,6 actors who played 14 sentences.,"6 emotions: disgust, fear, anger, joy, surprise, sadness.",Audio,0.355 GB,Italian,`EMOVO Corpus: an Italian Emotional Speech Database <https://core.ac.uk/download/pdf/53857389.pdf>`_,Open,-- +`RECOLA <https://diuf.unifr.ch/main/diva/recola/download.html>`_,2013,3.8 hours of recordings by 46 participants.,negative and positive sentiment (valence and arousal).,"Audio, Video",--,--,`Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions <https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view>`_,Restricted,Academic License & Commercial License +`GEMEP corpus <https://www.unige.ch/cisa/gemep>`_,2012,Videos10 actors portraying 10 states.,"12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage), fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.","Audio, Video",--,French,`Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception <https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception>`_,Restricted,-- +`OGVC <https://sites.google.com/site/ogcorpus/home/en>`_,2012,9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).,"9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.",Audio,--,Japanese,`Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment <https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf>`_,Restricted,-- +`LEGO corpus <https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/>`_,2012,"347 dialogs with 9,083 system-user exchanges.","Emotions classified as garbage, non-angry, slightly angry and very angry.",Audio,1.1 GB,--,`A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let’s Go Bus Information System <http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf>`_,Open,License available with the data. Free of charges for research purposes only. +`SEMAINE <https://semaine-db.eu/>`_,2012,95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.,"5 FeelTrace annotations: activation, valence, dominance, power, intensity","Audio, Video, Text",104 GB,English,`The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent <https://ieeexplore.ieee.org/document/5959155>`_,Restricted,Academic EULA +`SAVEE <http://kahlan.eps.surrey.ac.uk/savee/Database.html>`_,2011,480 British English utterances by 4 males actors.,"7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.","Audio, Video",--,English (British),`Multimodal Emotion Recognition <http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf>`_,Restricted,Free of charges for research purposes only. +`TESS <https://tspace.library.utoronto.ca/handle/1807/24487>`_,2010,2800 recording by 2 actresses.,"7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.",Audio,--,English,`BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET <https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2>`_,Open,`CC BY-NC-ND 4.0 <https://creativecommons.org/licenses/by-nc-nd/4.0/>`_ +`EEKK <https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/>`_,2007,26 text passage read by 10 speakers.,"4 main emotions: joy, sadness, anger and neutral.",--,0.352 GB,Estonian,`Estonian Emotional Speech Corpus <https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1>`_,Open,`CC-BY license <https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/>`_ +`IEMOCAP <https://sail.usc.edu/iemocap/iemocap_release.htm>`_,2007,12 hours of audiovisual data by 10 actors.,"5 emotions: happiness, anger, sadness, frustration and neutral.",--,--,English,`IEMOCAP: Interactive emotional dyadic motion capture database <https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf>`_,Restricted,`IEMOCAP license <https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf>`_ +`Keio-ESD <http://research.nii.ac.jp/src/en/Keio-ESD.html>`_,2006,A set of human speech with vocal emotion spoken by a Japanese male speaker.,"47 emotions including angry, joyful, disgusting, downgrading, funny, worried, gentle, relief, indignation, shameful, etc.",Audio,--,Japanese,`EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf>`_,Restricted,Available for research purposes only. +`EMO-DB <http://emodb.bilderbar.info/index-1280.html>`_,2005,800 recording spoken by 10 actors (5 males and 5 females).,"7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.",Audio,--,German,`A Database of German Emotional Speech <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf>`_,Open,-- +`eNTERFACE05 <http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip>`_,2005,"Videos by 42 subjects, coming from 14 different nationalities.","6 emotions: anger, fear, surprise, happiness, sadness and disgust.","Audio, Video",0.8 GB,German,--,Open,Free of charges for research purposes only. +`DES <http://kom.aau.dk/~tb/speech/Emotions/>`_,2002,4 speakers (2 males and 2 females).,"5 emotions: neutral, surprise, happiness, sadness and anger",--,--,Danish,`Documentation of the Danish Emotional Speech Database <http://kom.aau.dk/~tb/speech/Emotions/des.pdf>`_,--,-- From ffe2cc1b37ecd330e286216cefb1ca83f17a98b9 Mon Sep 17 00:00:00 2001 From: SuperKogito <superkogito@gmail.com> Date: Mon, 13 Feb 2023 22:22:35 +0100 Subject: [PATCH 4/5] add json file for datasets --- src/ser-datasets.json | 590 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 590 insertions(+) create mode 100644 src/ser-datasets.json diff --git a/src/ser-datasets.json b/src/ser-datasets.json new file mode 100644 index 0000000..00f9f0d --- /dev/null +++ b/src/ser-datasets.json @@ -0,0 +1,590 @@ +{ + "MESD": { + "Year": 2022, + "Content": "864 audio files of single-word emotional utterances with Mexican cultural shaping.", + "Emotions": "6 emotions provides single-word utterances for anger, disgust, fear, happiness, neutral, and sadness.", + "Format": "Audio", + "Size": "0,097 GB", + "Language": "Spanish (Mexican)", + "Paper": "The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning", + "Access": "Open", + "License": "CC BY 4.0", + "Dataset-link": "https://data.mendeley.com/datasets/cy34mh68j9/5", + "Paper-link": "https://pubmed.ncbi.nlm.nih.gov/34891601/", + "License-link": "https://creativecommons.org/licenses/by/4.0/" + }, + "MLEnd": { + "Year": 2021, + "Content": "~32700 audio recordings files produced by 154 speakers. Each audio recording corresponds to one English numeral (from \"zero\" to \"billion\")", + "Emotions": "Intonations: neutral, bored, excited and question", + "Format": "Audio", + "Size": "2.27 GB", + "Language": "--", + "Paper": "--", + "Access": "Open", + "License": "Unknown", + "Dataset-link": "https://www.kaggle.com/datasets/jesusrequena/mlend-spoken-numerals", + "Paper-link": "--", + "License-link": "Unknown" + }, + "ASVP-ESD": { + "Year": 2021, + "Content": "~13285 audio files collected from movies, tv shows and youtube containing speech and non-speech.", + "Emotions": "12 different natural emotions (boredom, neutral, happiness, sadness, anger, fear, surprise, disgust, excitement, pleasure, pain, disappointment) with 2 levels of intensity.", + "Format": "Audio", + "Size": "2 GB", + "Language": "Chinese, English, French, Russian and others", + "Paper": "--", + "Access": "Open", + "License": "Unknown", + "Dataset-link": "https://www.kaggle.com/datasets/dejolilandry/asvpesdspeech-nonspeech-emotional-utterances", + "Paper-link": "--", + "License-link": "Unknown" + }, + "ESD": { + "Year": 2021, + "Content": "29 hours, 3500 sentences, by 10 native English speakers and 10 native Chinese speakers.", + "Emotions": "5 emotions: angry, happy, neutral, sad, and surprise.", + "Format": "Audio, Text", + "Size": "2.4 GB (zip)", + "Language": "Chinese, English", + "Paper": "Seen And Unseen Emotional Style Transfer For Voice Conversion With A New Emotional Speech Dataset", + "Access": "Open", + "License": "Academic License", + "Dataset-link": "https://hltsingapore.github.io/ESD/", + "Paper-link": "https://arxiv.org/pdf/2010.14794.pdf", + "License-link": "Academic License" + }, + "MuSe-CAR": { + "Year": 2021, + "Content": "40 hours, 6,000+ recordings of 25,000+ sentences by 70+ English speakers (see db link for details).", + "Emotions": "continuous emotion dimensions characterized using valence, arousal, and trustworthiness.", + "Format": "Audio, Video, Text", + "Size": "15 GB", + "Language": "English", + "Paper": "The Multimodal Sentiment Analysis in Car Reviews (MuSe-CaR) Dataset: Collection, Insights and Improvements", + "Access": "Restricted", + "License": "Academic License & Commercial License", + "Dataset-link": "https://zenodo.org/record/4134758", + "Paper-link": "https://arxiv.org/pdf/2101.06053.pdf", + "License-link": "Academic License & Commercial License" + }, + "MSP-Podcast corpus": { + "Year": 2020, + "Content": "100 hours by over 100 speakers (see db link for details).", + "Emotions": "This corpus is annotated with emotional labels using attribute-based descriptors (activation, dominance and valence) and categorical labels (anger, happiness, sadness, disgust, surprised, fear, contempt, neutral and other).", + "Format": "Audio", + "Size": "--", + "Language": "--", + "Paper": "The MSP-Conversation Corpus", + "Access": "Restricted", + "License": "Academic License & Commercial License", + "Dataset-link": "https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html", + "Paper-link": "http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=290&id=684", + "License-link": "Academic License & Commercial License" + }, + "emotiontts open db": { + "Year": 2020, + "Content": "Recordings and their associated transcriptions by a diverse group of speakers.", + "Emotions": "4 emotions: general, joy, anger, and sadness.", + "Format": "Audio, Text", + "Size": "--", + "Language": "Korean", + "Paper": "--", + "Access": "Partially open", + "License": "CC BY-NC-SA 4.0", + "Dataset-link": "https://github.com/emotiontts/emotiontts_open_db", + "Paper-link": "--", + "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "URDU-Dataset": { + "Year": 2020, + "Content": "400 utterances by 38 speakers (27 male and 11 female).", + "Emotions": "4 emotions: angry, happy, neutral, and sad.", + "Format": "Audio", + "Size": "0.072 GB", + "Language": "Urdu", + "Paper": "Cross Lingual Speech Emotion Recognition: Urdu vs. Western Languages", + "Access": "Open", + "License": "--", + "Dataset-link": "https://github.com/siddiquelatif/urdu-dataset", + "Paper-link": "https://arxiv.org/pdf/1812.10411.pdf", + "License-link": "--" + }, + "BAVED": { + "Year": 2020, + "Content": "1935 recording by 61 speakers (45 male and 16 female).", + "Emotions": "3 levels of emotion.", + "Format": "Audio", + "Size": "0.195 GB", + "Language": "Arabic", + "Paper": "--", + "Access": "Open", + "License": "--", + "Dataset-link": "https://www.kaggle.com/a13x10/basic-arabic-vocal-emotions-dataset", + "Paper-link": "--", + "License-link": "--" + }, + "VIVAE": { + "Year": 2020, + "Content": "non-speech, 1085 audio file by 12 speakers.", + "Emotions": "non-speech 6 emotions: achievement, anger, fear, pain, pleasure, and surprise with 3 emotional intensities (low, moderate, strong, peak).", + "Format": "Audio", + "Size": "--", + "Language": "--", + "Paper": "--", + "Access": "Restricted", + "License": "CC BY-NC-SA 4.0", + "Dataset-link": "https://zenodo.org/record/4066235", + "Paper-link": "--", + "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "SEWA": { + "Year": 2019, + "Content": "more than 2000 minutes of audio-visual data of 398 people (201 male and 197 female) coming from 6 cultures.", + "Emotions": "emotions are characterized using valence and arousal.", + "Format": "Audio, Video", + "Size": "--", + "Language": "Chinese, English, German, Greek, Hungarian and Serbian", + "Paper": "SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild", + "Access": "Restricted", + "License": "SEWA EULA", + "Dataset-link": "https://db.sewaproject.eu/", + "Paper-link": "https://arxiv.org/pdf/1901.02839.pdf", + "License-link": "https://db.sewaproject.eu/media/doc/eula.pdf" + }, + "MELD": { + "Year": 2019, + "Content": "1400 dialogues and 14000 utterances from Friends TV series by multiple speakers.", + "Emotions": "7 emotions: Anger, disgust, sadness, joy, neutral, surprise and fear. MELD also has sentiment (positive, negative and neutral) annotation for each utterance.", + "Format": "Audio, Video, Text", + "Size": "10.1 GB", + "Language": "English", + "Paper": "MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations", + "Access": "Open", + "License": "MELD: GPL-3.0 License", + "Dataset-link": "https://affective-meld.github.io/", + "Paper-link": "https://arxiv.org/pdf/1810.02508.pdf", + "License-link": "https://github.com/declare-lab/MELD/blob/master/LICENSE" + }, + "ShEMO": { + "Year": 2019, + "Content": "3000 semi-natural utterances, equivalent to 3 hours and 25 minutes of speech data from online radio plays by 87 native-Persian speakers.", + "Emotions": "6 emotions: anger, fear, happiness, sadness, neutral and surprise.", + "Format": "Audio", + "Size": "0.101 GB", + "Language": "Persian", + "Paper": "ShEMO: a large-scale validated database for Persian speech emotion detection", + "Access": "Open", + "License": "--", + "Dataset-link": "https://github.com/mansourehk/ShEMO", + "Paper-link": "https://link.springer.com/article/10.1007/s10579-018-9427-x", + "License-link": "--" + }, + "DEMoS": { + "Year": 2019, + "Content": "9365 emotional and 332 neutral samples produced by 68 native speakers (23 females, 45 males).", + "Emotions": "7/6 emotions: anger, sadness, happiness, fear, surprise, disgust, and the secondary emotion guilt.", + "Format": "Audio", + "Size": "--", + "Language": "Italian", + "Paper": "DEMoS: An Italian emotional speech corpus. Elicitation methods, machine learning, and perception", + "Access": "Restricted", + "License": "EULA: End User License Agreement", + "Dataset-link": "https://zenodo.org/record/2544829", + "Paper-link": "https://link.springer.com/epdf/10.1007/s10579-019-09450-y?author_access_token=5pf0w_D4k9z28TM6n4PbVPe4RwlQNchNByi7wbcMAY5hiA-aXzXNbZYfsMDDq2CdHD-w5ArAxIwlsk2nC_26pSyEAcu1xlKJ1c9m3JZj2ZlFmlVoCZUTcG3Hq2_2ozMLo3Hq3Y0CHzLdTxihQwch5Q%3D%3D", + "License-link": "EULA: End User License Agreement" + }, + "AESDD": { + "Year": 2018, + "Content": "around 500 utterances by a diverse group of actors (over 5 actors) siumlating various emotions.", + "Emotions": "5 emotions: anger, disgust, fear, happiness, and sadness.", + "Format": "Audio", + "Size": "0.392 GB", + "Language": "Greek", + "Paper": "Speech Emotion Recognition for Performance Interaction", + "Access": "Open", + "License": "--", + "Dataset-link": "http://m3c.web.auth.gr/research/aesdd-speech-emotion-recognition/", + "Paper-link": "https://www.researchgate.net/publication/326005164_Speech_Emotion_Recognition_for_Performance_Interaction", + "License-link": "--" + }, + "Emov-DB": { + "Year": 2018, + "Content": "Recordings for 4 speakers- 2 males and 2 females.", + "Emotions": "The emotional styles are neutral, sleepiness, anger, disgust and amused.", + "Format": "Audio", + "Size": "5.88 GB", + "Language": "English", + "Paper": "The emotional voices database: Towards controlling the emotion dimension in voice generation systems", + "Access": "Open", + "License": "--", + "Dataset-link": "https://mega.nz/#F!KBp32apT!gLIgyWf9iQ-yqnWFUFuUHg!mYwUnI4K", + "Paper-link": "https://arxiv.org/pdf/1806.09514.pdf", + "License-link": "--" + }, + "RAVDESS": { + "Year": 2018, + "Content": "7356 recordings by 24 actors.", + "Emotions": "7 emotions: calm, happy, sad, angry, fearful, surprise, and disgust", + "Format": "Audio, Video", + "Size": "24.8 GB", + "Language": "English", + "Paper": "The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English", + "Access": "Open", + "License": "CC BY-NC-SA 4.0", + "Dataset-link": "https://zenodo.org/record/1188976#.XrC7a5NKjOR", + "Paper-link": "https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0196391", + "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "JL corpus": { + "Year": 2018, + "Content": "2400 recording of 240 sentences by 4 actors (2 males and 2 females).", + "Emotions": "5 primary emotions: angry, sad, neutral, happy, excited. 5 secondary emotions: anxious, apologetic, pensive, worried, enthusiastic.", + "Format": "Audio", + "Size": "--", + "Language": "English", + "Paper": "An Open Source Emotional Speech Corpus for Human Robot Interaction Applications", + "Access": "Open", + "License": "CC0 1.0", + "Dataset-link": "https://www.kaggle.com/tli725/jl-corpus", + "Paper-link": "https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1349.pdf", + "License-link": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "CaFE": { + "Year": 2018, + "Content": "6 different sentences by 12 speakers (6 fmelaes + 6 males).", + "Emotions": "7 emotions: happy, sad, angry, fearful, surprise, disgust and neutral. Each emotion is acted in 2 different intensities.", + "Format": "Audio", + "Size": "2 GB", + "Language": "French (Canadian)", + "Paper": "--", + "Access": "Open", + "License": "CC BY-NC-SA 4.0", + "Dataset-link": "https://zenodo.org/record/1478765", + "Paper-link": "--", + "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "EmoFilm": { + "Year": 2018, + "Content": "1115 audio instances sentences extracted from various films.", + "Emotions": "5 emotions: anger, contempt, happiness, fear, and sadness.", + "Format": "Audio", + "Size": "--", + "Language": "English, Italian & Spanish", + "Paper": "Categorical vs Dimensional Perception of Italian Emotional Speech", + "Access": "Restricted", + "License": "EULA: End User License Agreement", + "Dataset-link": "https://zenodo.org/record/1326428", + "Paper-link": "https://pdfs.semanticscholar.org/e70e/fcf7f5b4c366a7b7e2c16267d7f7691a5391.pdf", + "License-link": "EULA: End User License Agreement" + }, + "ANAD": { + "Year": 2018, + "Content": "1384 recording by multiple speakers.", + "Emotions": "3 emotions: angry, happy, surprised.", + "Format": "Audio", + "Size": "2 GB", + "Language": "Arabic", + "Paper": "Arabic Natural Audio Dataset", + "Access": "Open", + "License": "CC BY-NC-SA 4.0", + "Dataset-link": "https://www.kaggle.com/suso172/arabic-natural-audio-dataset", + "Paper-link": "https://data.mendeley.com/datasets/xm232yxf7t/1", + "License-link": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "EmoSynth": { + "Year": 2018, + "Content": "144 audio file labelled by 40 listeners.", + "Emotions": "Emotion (no speech) defined in regard of valence and arousal.", + "Format": "Audio", + "Size": "0.1034 GB", + "Language": "--", + "Paper": "The Perceived Emotion of Isolated Synthetic Audio: The EmoSynth Dataset and Results", + "Access": "Open", + "License": "CC BY 4.0", + "Dataset-link": "https://zenodo.org/record/3727593", + "Paper-link": "https://dl.acm.org/doi/10.1145/3243274.3243277", + "License-link": "https://creativecommons.org/licenses/by/4.0/" + }, + "CMU-MOSEI": { + "Year": 2018, + "Content": "65 hours of annotated video from more than 1000 speakers and 250 topics.", + "Emotions": "6 Emotion (happiness, sadness, anger,fear, disgust, surprise) + Likert scale.", + "Format": "Audio, Video", + "Size": "--", + "Language": "English", + "Paper": "Multi-attention Recurrent Network for Human Communication Comprehension", + "Access": "Open", + "License": "CMU-MOSEI License", + "Dataset-link": "https://www.amir-zadeh.com/datasets", + "Paper-link": "https://arxiv.org/pdf/1802.00923.pdf", + "License-link": "https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt" + }, + "VERBO": { + "Year": 2018, + "Content": "14 different phrases by 12 speakers (6 female + 6 male) for a total of 1167 recordings.", + "Emotions": "7 emotions: Happiness, Disgust, Fear, Neutral, Anger, Surprise, Sadness", + "Format": "Audio", + "Size": "--", + "Language": "Portuguese", + "Paper": "VERBO: Voice Emotion Recognition dataBase in Portuguese Language", + "Access": "Restricted", + "License": "Available for research purposes only", + "Dataset-link": "https://sites.google.com/view/verbodatabase/home", + "Paper-link": "https://thescipub.com/pdf/jcssp.2018.1420.1430.pdf", + "License-link": "Available for research purposes only" + }, + "CMU-MOSI": { + "Year": 2017, + "Content": "2199 opinion utterances with annotated sentiment.", + "Emotions": "Sentiment annotated between very negative to very positive in seven Likert steps.", + "Format": "Audio, Video", + "Size": "--", + "Language": "English", + "Paper": "Multi-attention Recurrent Network for Human Communication Comprehension", + "Access": "Open", + "License": "CMU-MOSI License", + "Dataset-link": "https://www.amir-zadeh.com/datasets", + "Paper-link": "https://arxiv.org/pdf/1802.00923.pdf", + "License-link": "https://github.com/A2Zadeh/CMU-MultimodalSDK/blob/master/LICENSE.txt" + }, + "MSP-IMPROV": { + "Year": 2017, + "Content": "20 sentences by 12 actors.", + "Emotions": "4 emotions: angry, sad, happy, neutral, other, without agreement", + "Format": "Audio, Video", + "Size": "--", + "Language": "English", + "Paper": "MSP-IMPROV: An Acted Corpus of Dyadic Interactions to Study Emotion Perception", + "Access": "Restricted", + "License": "Academic License & Commercial License", + "Dataset-link": "https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Improv.html", + "Paper-link": "https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2017.pdf", + "License-link": "Academic License & Commercial License" + }, + "CREMA-D": { + "Year": 2017, + "Content": "7442 clip of 12 sentences spoken by 91 actors (48 males and 43 females).", + "Emotions": "6 emotions: angry, disgusted, fearful, happy, neutral, and sad", + "Format": "Audio, Video", + "Size": "--", + "Language": "English", + "Paper": "CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset", + "Access": "Open", + "License": "Open Database License & Database Content License", + "Dataset-link": "https://github.com/CheyneyComputerScience/CREMA-D", + "Paper-link": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313618/", + "License-link": "https://github.com/CheyneyComputerScience/CREMA-D/blob/master/LICENSE.txt" + }, + "Example emotion videos used in investigation of emotion perception in schizophrenia": { + "Year": 2017, + "Content": "6 videos:Two example videos from each emotion category (angry, happy and neutral) by one female speaker.", + "Emotions": "3 emotions: angry, happy and neutral.", + "Format": "Audio, Video", + "Size": "0.063 GB", + "Language": "English", + "Paper": "--", + "Access": "Open", + "License": "Permitted Non-commercial Re-use with Acknowledgment", + "Dataset-link": "https://espace.library.uq.edu.au/view/UQ:446541", + "Paper-link": "--", + "License-link": "https://guides.library.uq.edu.au/deposit_your_data/terms_and_conditions" + }, + "EMOVO": { + "Year": 2014, + "Content": "6 actors who played 14 sentences.", + "Emotions": "6 emotions: disgust, fear, anger, joy, surprise, sadness.", + "Format": "Audio", + "Size": "0.355 GB", + "Language": "Italian", + "Paper": "EMOVO Corpus: an Italian Emotional Speech Database", + "Access": "Open", + "License": "--", + "Dataset-link": "http://voice.fub.it/activities/corpora/emovo/index.html", + "Paper-link": "https://core.ac.uk/download/pdf/53857389.pdf", + "License-link": "--" + }, + "RECOLA": { + "Year": 2013, + "Content": "3.8 hours of recordings by 46 participants.", + "Emotions": "negative and positive sentiment (valence and arousal).", + "Format": "Audio, Video", + "Size": "--", + "Language": "--", + "Paper": "Introducing the RECOLA Multimodal Corpus of Remote Collaborative and Affective Interactions", + "Access": "Restricted", + "License": "Academic License & Commercial License", + "Dataset-link": "https://diuf.unifr.ch/main/diva/recola/download.html", + "Paper-link": "https://drive.google.com/file/d/0B2V_I9XKBODhNENKUnZWNFdVXzQ/view", + "License-link": "Academic License & Commercial License" + }, + "GEMEP corpus": { + "Year": 2012, + "Content": "Videos10 actors portraying 10 states.", + "Emotions": "12 emotions: amusement, anxiety, cold anger (irritation), despair, hot anger (rage), fear (panic), interest, joy (elation), pleasure(sensory), pride, relief, and sadness. Plus, 5 additional emotions: admiration, contempt, disgust, surprise, and tenderness.", + "Format": "Audio, Video", + "Size": "--", + "Language": "French", + "Paper": "Introducing the Geneva Multimodal Expression Corpus for Experimental Research on Emotion Perception", + "Access": "Restricted", + "License": "--", + "Dataset-link": "https://www.unige.ch/cisa/gemep", + "Paper-link": "https://www.researchgate.net/publication/51796867_Introducing_the_Geneva_Multimodal_Expression_Corpus_for_Experimental_Research_on_Emotion_Perception", + "License-link": "--" + }, + "OGVC": { + "Year": 2012, + "Content": "9114 spontaneous utterances and 2656 acted utterances by 4 professional actors (two male and two female).", + "Emotions": "9 emotional states: fear, surprise, sadness, disgust, anger, anticipation, joy, acceptance and the neutral state.", + "Format": "Audio", + "Size": "--", + "Language": "Japanese", + "Paper": "Naturalistic emotional speech collectionparadigm with online game and its psychological and acoustical assessment", + "Access": "Restricted", + "License": "--", + "Dataset-link": "https://sites.google.com/site/ogcorpus/home/en", + "Paper-link": "https://www.jstage.jst.go.jp/article/ast/33/6/33_E1175/_pdf", + "License-link": "--" + }, + "LEGO corpus": { + "Year": 2012, + "Content": "347 dialogs with 9,083 system-user exchanges.", + "Emotions": "Emotions classified as garbage, non-angry, slightly angry and very angry.", + "Format": "Audio", + "Size": "1.1 GB", + "Language": "--", + "Paper": "A Parameterized and Annotated Spoken Dialog Corpus of the CMU Let\u2019s Go Bus Information System", + "Access": "Open", + "License": "License available with the data. Free of charges for research purposes only.", + "Dataset-link": "https://www.ultes.eu/ressources/lego-spoken-dialogue-corpus/", + "Paper-link": "http://www.lrec-conf.org/proceedings/lrec2012/pdf/333_Paper.pdf", + "License-link": "License available with the data. Free of charges for research purposes only." + }, + "SEMAINE": { + "Year": 2012, + "Content": "95 dyadic conversations from 21 subjects. Each subject converses with another playing one of four characters with emotions.", + "Emotions": "5 FeelTrace annotations: activation, valence, dominance, power, intensity", + "Format": "Audio, Video, Text", + "Size": "104 GB", + "Language": "English", + "Paper": "The SEMAINE Database: Annotated Multimodal Records of Emotionally Colored Conversations between a Person and a Limited Agent", + "Access": "Restricted", + "License": "Academic EULA", + "Dataset-link": "https://semaine-db.eu/", + "Paper-link": "https://ieeexplore.ieee.org/document/5959155", + "License-link": "Academic EULA" + }, + "SAVEE": { + "Year": 2011, + "Content": "480 British English utterances by 4 males actors.", + "Emotions": "7 emotions: anger, disgust, fear, happiness, sadness, surprise and neutral.", + "Format": "Audio, Video", + "Size": "--", + "Language": "English (British)", + "Paper": "Multimodal Emotion Recognition", + "Access": "Restricted", + "License": "Free of charges for research purposes only.", + "Dataset-link": "http://kahlan.eps.surrey.ac.uk/savee/Database.html", + "Paper-link": "http://personal.ee.surrey.ac.uk/Personal/P.Jackson/pub/ma10/HaqJackson_MachineAudition10_approved.pdf", + "License-link": "Free of charges for research purposes only." + }, + "TESS": { + "Year": 2010, + "Content": "2800 recording by 2 actresses.", + "Emotions": "7 emotions: anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral.", + "Format": "Audio", + "Size": "--", + "Language": "English", + "Paper": "BEHAVIOURAL FINDINGS FROM THE TORONTO EMOTIONAL SPEECH SET", + "Access": "Open", + "License": "CC BY-NC-ND 4.0", + "Dataset-link": "https://tspace.library.utoronto.ca/handle/1807/24487", + "Paper-link": "https://www.semanticscholar.org/paper/BEHAVIOURAL-FINDINGS-FROM-THE-TORONTO-EMOTIONAL-SET-Dupuis-Pichora-Fuller/d7f746b3aee801a353b6929a65d9a34a68e71c6f/figure/2", + "License-link": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + }, + "EEKK": { + "Year": 2007, + "Content": "26 text passage read by 10 speakers.", + "Emotions": "4 main emotions: joy, sadness, anger and neutral.", + "Format": "--", + "Size": "0.352 GB", + "Language": "Estonian", + "Paper": "Estonian Emotional Speech Corpus", + "Access": "Open", + "License": "CC-BY license", + "Dataset-link": "https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/", + "Paper-link": "https://www.researchgate.net/publication/261724574_Estonian_Emotional_Speech_Corpus_Release_1", + "License-link": "https://metashare.ut.ee/repository/download/4d42d7a8463411e2a6e4005056b40024a19021a316b54b7fb707757d43d1a889/" + }, + "IEMOCAP": { + "Year": 2007, + "Content": "12 hours of audiovisual data by 10 actors.", + "Emotions": "5 emotions: happiness, anger, sadness, frustration and neutral.", + "Format": "--", + "Size": "--", + "Language": "English", + "Paper": "IEMOCAP: Interactive emotional dyadic motion capture database", + "Access": "Restricted", + "License": "IEMOCAP license", + "Dataset-link": "https://sail.usc.edu/iemocap/iemocap_release.htm", + "Paper-link": "https://sail.usc.edu/iemocap/Busso_2008_iemocap.pdf", + "License-link": "https://sail.usc.edu/iemocap/Data_Release_Form_IEMOCAP.pdf" + }, + "Keio-ESD": { + "Year": 2006, + "Content": "A set of human speech with vocal emotion spoken by a Japanese male speaker.", + "Emotions": "47 emotions including angry, joyful, disgusting, downgrading, funny, worried, gentle, relief, indignation, shameful, etc.", + "Format": "Audio", + "Size": "--", + "Language": "Japanese", + "Paper": "EMOTIONAL SPEECH SYNTHESIS USING SUBSPACE CONSTRAINTS IN PROSODY", + "Access": "Restricted", + "License": "Available for research purposes only.", + "Dataset-link": "http://research.nii.ac.jp/src/en/Keio-ESD.html", + "Paper-link": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.420.8899&rep=rep1&type=pdf", + "License-link": "Available for research purposes only." + }, + "EMO-DB": { + "Year": 2005, + "Content": "800 recording spoken by 10 actors (5 males and 5 females).", + "Emotions": "7 emotions: anger, neutral, fear, boredom, happiness, sadness, disgust.", + "Format": "Audio", + "Size": "--", + "Language": "German", + "Paper": "A Database of German Emotional Speech", + "Access": "Open", + "License": "--", + "Dataset-link": "http://emodb.bilderbar.info/index-1280.html", + "Paper-link": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.8506&rep=rep1&type=pdf", + "License-link": "--" + }, + "eNTERFACE05": { + "Year": 2005, + "Content": "Videos by 42 subjects, coming from 14 different nationalities.", + "Emotions": "6 emotions: anger, fear, surprise, happiness, sadness and disgust.", + "Format": "Audio, Video", + "Size": "0.8 GB", + "Language": "German", + "Paper": "--", + "Access": "Open", + "License": "Free of charges for research purposes only.", + "Dataset-link": "http://www.enterface.net/enterface05/docs/results/databases/project2_database.zip", + "Paper-link": "--", + "License-link": "Free of charges for research purposes only." + }, + "DES": { + "Year": 2002, + "Content": "4 speakers (2 males and 2 females).", + "Emotions": "5 emotions: neutral, surprise, happiness, sadness and anger", + "Format": "--", + "Size": "--", + "Language": "Danish", + "Paper": "Documentation of the Danish Emotional Speech Database", + "Access": "--", + "License": "--", + "Dataset-link": "http://kom.aau.dk/~tb/speech/Emotions/", + "Paper-link": "http://kom.aau.dk/~tb/speech/Emotions/des.pdf", + "License-link": "--" + } +} \ No newline at end of file From acf5264a8e081375c12c44e78ba82038c11e4614 Mon Sep 17 00:00:00 2001 From: SuperKogito <superkogito@gmail.com> Date: Mon, 13 Feb 2023 22:32:10 +0100 Subject: [PATCH 5/5] updata contributions guide --- CONTRIBUTING.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d882db4..0bb78e4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,3 +22,8 @@ please feel free to add it. * The dataset should not be provided in an active PR. * The dataset should be available for researchers for free. * The information about the dataset must be accessible for verification. + +## How to contribute +First go to `src/` using `cd src`. Then add a the dictionary / part json data of the contributed dataset to `src/ser-datasets`. +Make sure the json is valid, then run the `python generate_files.py` to update the restructured text file, csv file and the README. +That's it, Congrats! and thank you for your contribution. Now open a PR with your changes. I will review it and then publish the results :)) \ No newline at end of file