diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d4709b64..00000000 --- a/.travis.yml +++ /dev/null @@ -1,10 +0,0 @@ -language: ruby -rvm: - - 2.2 -before_script: - - gem install awesome_bot -script: - - site404=www.datawrangling.com,getglue-data.s3.amazonaws.com,archive.org/details/2011-05-calufa-twitter-sql,www.stats4stem.org,lib.stat.cmu.edu,http://www.oecd.org/document/0,census.gov/acs/www/data_documentation/data_release_info/ - - whtlist=travis,crawdad.cs.dartmouth.edu,data.nasdaq.com,137.189.35.203/WebUI/CatDatabase/catData.html,numbrary.com,www.cmr.osu.edu,gutenberg.org,donnees.gouv.qc.ca,data.rio.rj.gov.br,ntrl.ntis.gov,openflights.org,www.data.gov.bc.ca,earthdata.nasa,pgp-hms,cru.uea.ac.uk,networkdata.ics,datos.argentina,data.gov.ie,isi.edu,data.go.id,wiki.dbpedia,www.laval.ca,www.wunderground.com,data.lexingtonky.gov,arcgis,bixi - - site503=datamob.org,research.microsoft.com - - awesome_bot README.rst --allow-dupe --allow-redirect --set-timeout 5 --allow-timeout --white-list $site404,$whtlist,$site503 diff --git a/Government.rst b/Government.rst deleted file mode 100644 index 26555da3..00000000 --- a/Government.rst +++ /dev/null @@ -1,103 +0,0 @@ -Government ----------- - -* `Alberta, Province of Canada <http://open.alberta.ca>`_ -* `Antwerp, Belgium <http://opendata.antwerpen.be/datasets>`_ -* `Argentina (non official) <http://datar.noip.me/>`_ -* `Argentina <http://datos.argentina.gob.ar/>`_ -* `Austin, TX, US <https://data.austintexas.gov/>`_ -* `Australia (abs.gov.au) <http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/3301.02009?OpenDocument>`_ -* `Australia (data.gov.au) <https://data.gov.au/>`_ -* `Austria (data.gv.at) <https://www.data.gv.at/>`_ -* `Baton Rouge, LA, US <https://data.brla.gov/>`_ -* `Belgium <http://data.gov.be/>`_ -* `Brazil <http://dados.gov.br/dataset>`_ -* `Buenos Aires, Argentina <http://data.buenosaires.gob.ar/>`_ -* `Calgary, AB, Canada <https://data.calgary.ca/OpenData/Pages/DatasetListingAlphabetical.aspx>`_ -* `Cambridge, MA, US <https://data.cambridgema.gov/>`_ -* `Canada <http://open.canada.ca/en?lang=En&n=5BCD274E-1>`_ -* `Chicago <https://data.cityofchicago.org/>`_ -* `Chile <http://datos.gob.cl/dataset>`_ -* `Dallas Open Data <https://www.dallasopendata.com/>`_ -* `DataBC - data from the Province of British Columbia <http://www.data.gov.bc.ca/>`_ -* `Denver Open Data <http://data.denvergov.org//>`_ -* `Durham, NC Open Data <https://opendurham.nc.gov/explore/>`_ -* `Edmonton, AB, Canada <https://data.edmonton.ca/>`_ -* `England LGInform <http://lginform.local.gov.uk/>`_ -* `EuroStat <http://ec.europa.eu/eurostat/data/database>`_ -* `FedStats <http://fedstats.sites.usa.gov/>`_ -* `Finland <https://www.opendata.fi/en>`_ -* `France <https://www.data.gouv.fr/en/datasets/>`_ -* `Fredericton, NB, Canada <http://www.fredericton.ca/en/citygovernment/Catalogue.asp>`_ -* `Gatineau, QC, Canada <http://www.gatineau.ca/donneesouvertes/default_fr.aspx>`_ -* `Germany <https://www-genesis.destatis.de/genesis/online>`_ -* `Ghent, Belgium <https://data.stad.gent/datasets>`_ -* `Glasgow, Scotland, UK <https://data.glasgow.gov.uk/>`_ -* `Greece <http://www.data.gov.gr/>`_ -* `Guardian world governments <http://www.guardian.co.uk/world-government-data>`_ -* `Halifax, NS, Canada <http://www.halifax.ca/opendata/index.php>`_ -* `Helsinki Region, Finland <http://www.hri.fi/en/>`_ -* `Hong Kong, China <https://data.gov.hk/en/>`_ -* `Houston Open Data <http://data.ohouston.org>`_ -* `Indian Government Data <https://data.gov.in/>`_ -* `Indonesian Data Portal <http://data.go.id/>`_ -* `Ireland's Open Data Portal <https://data.gov.ie/data>`_ -* `Japan <http://www.e-stat.go.jp/SG1/estat/eStatTopPortalE.do>`_ -* `Laval, QC, Canada <http://www.laval.ca/Pages/Fr/Citoyens/donnees.aspx>`_ -* `Lexington, KY <http://data.lexingtonky.gov/>`_ -* `London Datastore, UK <http://data.london.gov.uk/dataset>`_ -* `London, ON, Canada <http://www.london.ca/city-hall/open-data/Pages/default.aspx>`_ -* `Los Angeles Open Data <https://data.lacity.org/>`_ -* `MassGIS, Massachusetts, U.S. <http://www.mass.gov/anf/research-and-tech/it-serv-and-support/application-serv/office-of-geographic-information-massgis/>`_ -* `Mexico <http://catalogo.datos.gob.mx/dataset>`_ -* `Missisauga, ON, Canada <http://www.mississauga.ca/portal/residents/publicationsopendatacatalogue>`_ -* `Moldova <http://data.gov.md/>`_ -* `Moncton, NB, Canada <http://www.moncton.ca/Government/Terms_of_use/Open_Data_Purpose/Data_Catalogue.htm>`_ -* `Montreal, QC, Canada <http://donnees.ville.montreal.qc.ca/>`_ -* `Netherlands <https://data.overheid.nl/>`_ -* `New Zealand <http://www.stats.govt.nz/browse_for_stats.aspx>`_ -* `NYC betanyc <http://betanyc.us/>`_ -* `NYC Open Data <https://nycplatform.socrata.com/>`_ -* `OECD <https://data.oecd.org/>`_ -* `Oklahoma <https://data.ok.gov/>`_ -* `Open Government Data (OGD) Platform India <https://data.gov.in/>`_ -* `Oregon <https://data.oregon.gov/>`_ -* `Ottawa, ON, Canada <http://data.ottawa.ca/en/>`_ -* `Portland, Oregon <https://www.portlandoregon.gov/28130>`_ -* `Portugal - Pordata organization <http://www.pordata.pt/en/Home>`_ -* `Puerto Rico Government <https://data.pr.gov//>`_ -* `Quebec City, QC, Canada <http://donnees.ville.quebec.qc.ca/>`_ -* `Quebec Province of Canada <http://donnees.gouv.qc.ca/>`_ -* `Regina SK, Canada <http://open.regina.ca/>`_ -* `Rio de Janeiro, Brazil <http://data.rio.rj.gov.br/>`_ -* `Romania <http://data.gov.ro/>`_ -* `Russia <http://data.gov.ru>`_ -* `San Francisco Data sets <http://datasf.org/>`_ -* `Saskatchewan, Province of Canada <http://opendatask.ca/data/>`_ -* `Seattle <https://data.seattle.gov/>`_ -* `Singapore Government Data <https://data.gov.sg/>`_ -* `South Africa <http://beta2.statssa.gov.za/>`_ -* `South Africa Trade Statistics <http://www.econostatistics.co.za/>`_ -* `State of Utah, US <https://opendata.utah.gov/>`_ -* `Switzerland <http://www.opendata.admin.ch/>`_ -* `Taiwan <http://data.gov.tw/>`_ -* `Taiwan g0v <http://data.g0v.tw/>`_ -* `Texas Open Data <https://data.texas.gov/>`_ -* `The World Bank <http://wdronline.worldbank.org/>`_ -* `Toronto, ON, Canada <http://www1.toronto.ca/wps/portal/contentonly?vgnextoid=1a66e03bb8d1e310VgnVCM10000071d60f89RCRD>`_ -* `U.K. Government Data <http://data.gov.uk/data>`_ -* `U.S. American Community Survey <http://www.census.gov/acs/www/data_documentation/data_release_info/>`_ -* `U.S. CDC Public Health datasets <http://www.cdc.gov/nchs/data_access/ftp_data.htm>`_ -* `U.S. Census Bureau <http://www.census.gov/data.html>`_ -* `U.S. Department of Housing and Urban Development (HUD) <http://www.huduser.gov/portal/datasets/pdrdatas.html>`_ -* `U.S. Federal Government Agencies <http://www.data.gov/metrics>`_ -* `U.S. Federal Government Data Catalog <http://catalog.data.gov/dataset>`_ -* `U.S. Food and Drug Administration (FDA) <https://open.fda.gov/index.html>`_ -* `U.S. National Center for Education Statistics (NCES) <http://nces.ed.gov/>`_ -* `U.S. Open Government <http://www.data.gov/open-gov/>`_ -* `UK 2011 Census Open Atlas Project <http://www.alex-singleton.com/r/2014/02/05/2011-census-open-atlas-project-version-two/>`_ -* `United Nations <http://data.un.org/>`_ -* `Uruguay <https://catalogodatos.gub.uy/>`_ -* `Vancouver, BC Open Data Catalog <http://data.vancouver.ca/datacatalogue/>`_ -* `Victoria, BC, Canada <http://www.victoria.ca/EN/main/city/open-data-catalogue.html>`_ -* `Vienna, Austria <https://open.wien.gv.at/site/open-data/>`_ \ No newline at end of file diff --git a/README.rst b/README.rst old mode 100755 new mode 100644 index dee2bf20..fa55f149 --- a/README.rst +++ b/README.rst @@ -1,550 +1,1270 @@ Awesome Public Datasets ======================= + .. image:: https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg :alt: Awesome :target: https://github.com/sindresorhus/awesome -.. image:: https://travis-ci.org/caesar0301/awesome-public-datasets.svg - :target: https://travis-ci.org/caesar0301/awesome-public-datasets - -`This list of public data sources <https://github.com/caesar0301/awesome-public-datasets>`_ -are collected and tidied from blogs, answers, and user responses. -Most of the data sets listed below are free, however, some are not. -Other amazingly awesome lists can be found in the -`awesome-awesomeness <https://github.com/bayandin/awesome-awesomeness>`_ and -`sindresorhus's awesome <https://github.com/sindresorhus/awesome>`_ list. - -.. contents:: Table of Contents -Agriculture ------------- -* `U.S. Department of Agriculture's PLANTS Database <http://www.plants.usda.gov/dl_all.html>`_ +.. |OK_ICON| image:: https://raw.githubusercontent.com/awesomedata/apd-core/master/deploy/ok-24.png +.. |FIXME_ICON| image:: https://raw.githubusercontent.com/awesomedata/apd-core/master/deploy/fixme-24.png -Biology -------- +**NOTICE**: This repo is automatically generated by `apd-core <https://github.com/awesomedata/apd-core/tree/master/core>`_. +Please **DO NOT** modify this file directly. We have provided +`a new way <https://github.com/awesomedata/apd-core/wiki/HOW_TO_CONTRIBUTE-%E5%A6%82%E4%BD%95%E8%B4%A1%E7%8C%AE>`_ +to contribute to Awesome Public Datasets. The original PR entrance directly on repo is closed forever. -* `1000 Genomes <http://www.1000genomes.org/data>`_ -* `American Gut (Microbiome Project) <https://github.com/biocore/American-Gut>`_ -* `Broad Cancer Cell Line Encyclopedia (CCLE) <http://www.broadinstitute.org/ccle/home>`_ -* `Cell Image Library <http://www.cellimagelibrary.org>`_ -* `Collaborative Research in Computational Neuroscience (CRCNS) <http://crcns.org/data-sets>`_ -* `Complete Genomics Public Data <http://www.completegenomics.com/public-data/69-genomes/>`_ -* `EBI ArrayExpress <http://www.ebi.ac.uk/arrayexpress/>`_ -* `EBI Protein Data Bank in Europe <http://www.ebi.ac.uk/pdbe/emdb/index.html/>`_ -* `ENCODE project <https://www.encodeproject.org>`_ -* `Ensembl Genomes <http://ensemblgenomes.org/info/genomes>`_ -* `Gene Expression Omnibus (GEO) <http://www.ncbi.nlm.nih.gov/geo/>`_ -* `Gene Ontology (GO) <http://geneontology.org/page/download-annotations>`_ -* `Global Biotic Interactions (GloBI) <https://github.com/jhpoelen/eol-globi-data/wiki#accessing-species-interaction-data>`_ -* `Harvard Medical School (HMS) LINCS Project <http://lincs.hms.harvard.edu>`_ -* `Human Genome Diversity Project <http://www.hagsc.org/hgdp/files.html>`_ -* `Human Microbiome Project (HMP) <http://www.hmpdacc.org/reference_genomes/reference_genomes.php>`_ -* `ICOS PSP Benchmark <http://ico2s.org/datasets/psp_benchmark.html>`_ -* `International HapMap Project <http://hapmap.ncbi.nlm.nih.gov/downloads/index.html.en>`_ -* `Journal of Cell Biology DataViewer <http://jcb-dataviewer.rupress.org>`_ -* `MIT Cancer Genomics Data <http://www.broadinstitute.org/cgi-bin/cancer/datasets.cgi>`_ -* `NCBI Proteins <http://www.ncbi.nlm.nih.gov/guide/proteins/#databases>`_ -* `NCBI Taxonomy <http://www.ncbi.nlm.nih.gov/taxonomy>`_ -* `NeuroData <http://neurodata.io>`_ -* `NIH Microarray data <http://bit.do/VVW6>`_ or `FTP <ftp://ftp.ncbi.nih.gov/pub/geo/DATA/supplementary/series/GSE6532/>`_ -* `OpenSNP genotypes data <https://opensnp.org/>`_ -* `Pathguid - Protein-Protein Interactions Catalog <http://www.pathguide.org/>`_ -* `Protein Data Bank <http://www.rcsb.org/>`_ -* `Psychiatric Genomics Consortium <https://www.med.unc.edu/pgc/downloads>`_ -* `PubChem Project <https://pubchem.ncbi.nlm.nih.gov/>`_ -* `PubGene (now Coremine Medical) <http://www.pubgene.org/>`_ -* `Sanger Catalogue of Somatic Mutations in Cancer (COSMIC) <http://cancer.sanger.ac.uk/cosmic>`_ -* `Sanger Genomics of Drug Sensitivity in Cancer Project (GDSC) <http://www.cancerrxgene.org/>`_ -* `Sequence Read Archive(SRA) <http://www.ncbi.nlm.nih.gov/Traces/sra/>`_ -* `Stanford Microarray Data <http://smd.stanford.edu/>`_ -* `Stowers Institute Original Data Repository <http://www.stowers.org/research/publications/odr>`_ -* `Systems Science of Biological Dynamics (SSBD) Database <http://ssbd.qbic.riken.jp>`_ -* `Temple University Hospital EEG Database <https://www.nedcdata.org/drupal/node/12>`_ -* `The Cancer Genome Atlas (TCGA), available via Broad GDAC <https://gdac.broadinstitute.org/>`_ -* `The Catalogue of Life <http://www.catalogueoflife.org/content/annual-checklist-archive>`_ -* `The Personal Genome Project <http://www.personalgenomes.org/>`_ or `PGP <https://my.pgp-hms.org/public_genetic_data>`_ -* `UCSC Public Data <http://hgdownload.soe.ucsc.edu/downloads.html>`_ -* `Universal Protein Resource (UnitProt) <http://www.uniprot.org/downloads>`_ -* `UniGene <http://www.ncbi.nlm.nih.gov/unigene>`_ - - -Climate/Weather ---------------- - -* `Australian Weather <http://www.bom.gov.au/climate/dwo/>`_ -* `Brazilian Weather - Historical data (In Portuguese) <http://sinda.crn2.inpe.br/PCD/SITE/novo/site/>`_ -* `Canadian Meteorological Centre <http://weather.gc.ca/grib/index_e.html>`_ -* `Climate Data from UEA (updated monthly) <https://crudata.uea.ac.uk/cru/data/temperature/#datter and ftp://ftp.cmdl.noaa.gov/>`_ -* `European Climate Assessment & Dataset <http://eca.knmi.nl/>`_ -* `Global Climate Data Since 1929 <http://en.tutiempo.net/climate>`_ -* `NASA Global Imagery Browse Services <https://wiki.earthdata.nasa.gov/display/GIBS>`_ -* `NOAA Bering Sea Climate <http://www.beringclimate.noaa.gov/>`_ -* `NOAA Climate Datasets <http://www.ncdc.noaa.gov/data-access/quick-links>`_ -* `NOAA Realtime Weather Models <http://www.ncdc.noaa.gov/data-access/model-data/model-datasets/numerical-weather-prediction>`_ -* `The World Bank Open Data Resources for Climate Change <http://data.worldbank.org/developers/climate-data-api>`_ -* `UEA Climatic Research Unit <http://www.cru.uea.ac.uk/data>`_ -* `WorldClim - Global Climate Data <http://www.worldclim.org>`_ -* `WU Historical Weather Worldwide <https://www.wunderground.com/history/index.html>`_ - - -Complex Networks ----------------- +* |OK_ICON| I am well. +* |FIXME_ICON| Please fix me. -* `AMiner Citation Network Dataset <http://aminer.org/citation>`_ -* `CrossRef DOI URLs <https://archive.org/details/doi-urls>`_ -* `DBLP Citation dataset <https://kdl.cs.umass.edu/display/public/DBLP>`_ -* `NBER Patent Citations <http://nber.org/patents/>`_ -* `Network Repository with Interactive Exploratory Analysis Tools <http://networkrepository.com/>`_ -* `NIST complex networks data collection <http://math.nist.gov/~RPozo/complex_datasets.html>`_ -* `Protein-protein interaction network <http://vlado.fmf.uni-lj.si/pub/networks/data/bio/Yeast/Yeast.htm>`_ -* `PyPI and Maven Dependency Network <https://ogirardot.wordpress.com/2013/01/31/sharing-pypimaven-dependency-data/>`_ -* `Scopus Citation Database <https://www.elsevier.com/solutions/scopus>`_ -* `Small Network Data <http://www-personal.umich.edu/~mejn/netdata/>`_ -* `Stanford GraphBase (Steven Skiena) <http://www3.cs.stonybrook.edu/~algorith/implement/graphbase/implement.shtml>`_ -* `Stanford Large Network Dataset Collection <http://snap.stanford.edu/data/>`_ -* `Stanford Longitudinal Network Data Sources <http://stanford.edu/group/sonia/dataSources/index.html>`_ -* `The Koblenz Network Collection <http://konect.uni-koblenz.de/>`_ -* `The Laboratory for Web Algorithmics (UNIMI) <http://law.di.unimi.it/datasets.php>`_ -* `The Nexus Network Repository <http://nexus.igraph.org/>`_ -* `UCI Network Data Repository <https://networkdata.ics.uci.edu/resources.php>`_ -* `UFL sparse matrix collection <http://www.cise.ufl.edu/research/sparse/matrices/>`_ -* `WSU Graph Database <http://www.eecs.wsu.edu/mgd/gdb.html>`_ -* `DIMACS Road Networks Collection <http://www.dis.uniroma1.it/challenge9/download.shtml>`_ - -Computer Networks ------------------ +`This list of a topic-centric public data sources <https://github.com/awesomedata/awesome-public-datasets>`_ +in high quality. They are collected and tidied from blogs, answers, and user responses. +Most of the data sets listed below are free, however, some are not. +Other amazingly awesome lists can be found in `sindresorhus's awesome <https://github.com/sindresorhus/awesome>`_ list. -* `3.5B Web Pages from CommonCraw 2012 <http://www.bigdatanews.com/profiles/blogs/big-data-set-3-5-billion-web-pages-made-available-for-all-of-us>`_ -* `53.5B Web clicks of 100K users in Indiana Univ. <http://cnets.indiana.edu/groups/nan/webtraffic/click-dataset/>`_ -* `CAIDA Internet Datasets <http://www.caida.org/data/overview/>`_ -* `ClueWeb09 - 1B web pages <http://lemurproject.org/clueweb09/>`_ -* `ClueWeb12 - 733M web pages <http://lemurproject.org/clueweb12/>`_ -* `CommonCrawl Web Data over 7 years <http://commoncrawl.org/the-data/get-started/>`_ -* `CRAWDAD Wireless datasets from Dartmouth Univ. <https://crawdad.cs.dartmouth.edu/>`_ -* `Criteo click-through data <http://labs.criteo.com/2015/03/criteo-releases-its-new-dataset/>`_ -* `Open Mobile Data by MobiPerf <https://console.developers.google.com/storage/openmobiledata_public/>`_ -* `Rapid7 Sonar Internet Scans <https://sonar.labs.rapid7.com/>`_ -* `UCSD Network Telescope, IPv4 /8 net <http://www.caida.org/projects/network_telescope/>`_ +.. contents:: **Table of Contents** -Contextual Data + +Agriculture +----------- + +* |OK_ICON| `U.S. Department of Agriculture's Nutrient Database <https://www.ars.usda.gov/northeast-area/beltsville-md/beltsville-human-nutrition-research-center/nutrient-data-laboratory/docs/sr28-download-files/>`_ + +* |OK_ICON| `U.S. Department of Agriculture's PLANTS Database <http://www.plants.usda.gov/dl_all.html>`_ + +Biology +------- + +* |OK_ICON| `1000 Genomes <http://www.1000genomes.org/data>`_ + +* |OK_ICON| `American Gut (Microbiome Project) <https://github.com/biocore/American-Gut>`_ + +* |OK_ICON| `Broad Bioimage Benchmark Collection (BBBC) <https://www.broadinstitute.org/bbbc>`_ + +* |OK_ICON| `Broad Cancer Cell Line Encyclopedia (CCLE) <http://www.broadinstitute.org/ccle/home>`_ + +* |OK_ICON| `Cell Image Library <http://www.cellimagelibrary.org>`_ + +* |OK_ICON| `Complete Genomics Public Data <http://www.completegenomics.com/public-data/69-genomes/>`_ + +* |OK_ICON| `EBI ArrayExpress <http://www.ebi.ac.uk/arrayexpress/>`_ + +* |OK_ICON| `EBI Protein Data Bank in Europe <http://www.ebi.ac.uk/pdbe/emdb/index.html/>`_ + +* |OK_ICON| `ENCODE project <https://www.encodeproject.org>`_ + +* |OK_ICON| `Electron Microscopy Pilot Image Archive (EMPIAR) <http://www.ebi.ac.uk/pdbe/emdb/empiar/>`_ + +* |OK_ICON| `Ensembl Genomes <http://ensemblgenomes.org/info/genomes>`_ + +* |OK_ICON| `Gene Expression Omnibus (GEO) <http://www.ncbi.nlm.nih.gov/geo/>`_ + +* |OK_ICON| `Gene Ontology (GO) <http://geneontology.org/page/download-annotations>`_ + +* |OK_ICON| `Global Biotic Interactions (GloBI) <https://github.com/jhpoelen/eol-globi-data/wiki#accessing-species-interaction-data>`_ + +* |OK_ICON| `Harvard Medical School (HMS) LINCS Project <http://lincs.hms.harvard.edu>`_ + +* |OK_ICON| `Human Genome Diversity Project <http://www.hagsc.org/hgdp/files.html>`_ + +* |OK_ICON| `Human Microbiome Project (HMP) <http://www.hmpdacc.org/reference_genomes/reference_genomes.php>`_ + +* |OK_ICON| `ICOS PSP Benchmark <http://ico2s.org/datasets/psp_benchmark.html>`_ + +* |OK_ICON| `International HapMap Project <http://hapmap.ncbi.nlm.nih.gov/downloads/index.html.en>`_ + +* |OK_ICON| `Journal of Cell Biology DataViewer <http://jcb-dataviewer.rupress.org>`_ + +* |OK_ICON| `KEGG - KEGG is a database resource for understanding high-level functions [...] <http://www.genome.jp/kegg/>`_ + +* |OK_ICON| `MIT Cancer Genomics Data <http://www.broadinstitute.org/cgi-bin/cancer/datasets.cgi>`_ + +* |OK_ICON| `NCBI Proteins <http://www.ncbi.nlm.nih.gov/guide/proteins/#databases>`_ + +* |OK_ICON| `NCBI Taxonomy <http://www.ncbi.nlm.nih.gov/taxonomy>`_ + +* |OK_ICON| `NCI Genomic Data Commons <https://gdc-portal.nci.nih.gov>`_ + +* |FIXME_ICON| `NIH Microarray data <http://bit.do/VVW6>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Biology/NIH-Microarray-data.yml>`_] + +* |OK_ICON| `OpenSNP genotypes data <https://opensnp.org/>`_ + +* |OK_ICON| `Pathguid - Protein-Protein Interactions Catalog <http://www.pathguide.org/>`_ + +* |OK_ICON| `Protein Data Bank <http://www.rcsb.org/>`_ + +* |OK_ICON| `Psychiatric Genomics Consortium <https://www.med.unc.edu/pgc/downloads>`_ + +* |OK_ICON| `PubChem Project <https://pubchem.ncbi.nlm.nih.gov/>`_ + +* |OK_ICON| `PubGene (now Coremine Medical) <http://www.pubgene.org/>`_ + +* |OK_ICON| `Sanger Catalogue of Somatic Mutations in Cancer (COSMIC) <http://cancer.sanger.ac.uk/cosmic>`_ + +* |OK_ICON| `Sanger Genomics of Drug Sensitivity in Cancer Project (GDSC) <http://www.cancerrxgene.org/>`_ + +* |OK_ICON| `Sequence Read Archive(SRA) <http://www.ncbi.nlm.nih.gov/Traces/sra/>`_ + +* |FIXME_ICON| `Stanford Microarray Data <http://smd.stanford.edu/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Biology/Stanford-Microarray-Data.yml>`_] + +* |OK_ICON| `Stowers Institute Original Data Repository <http://www.stowers.org/research/publications/odr>`_ + +* |OK_ICON| `Systems Science of Biological Dynamics (SSBD) Database <http://ssbd.qbic.riken.jp>`_ + +* |OK_ICON| `The Cancer Genome Atlas (TCGA), available via Broad GDAC <https://gdac.broadinstitute.org/>`_ + +* |OK_ICON| `The Catalogue of Life <http://www.catalogueoflife.org/content/annual-checklist-archive>`_ + +* |OK_ICON| `The Personal Genome Project <http://www.personalgenomes.org/>`_ + +* |OK_ICON| `UCSC Public Data <http://hgdownload.soe.ucsc.edu/downloads.html>`_ + +* |OK_ICON| `UniGene <http://www.ncbi.nlm.nih.gov/unigene>`_ + +* |OK_ICON| `Universal Protein Resource (UnitProt) <http://www.uniprot.org/downloads>`_ + +Climate+Weather --------------- - -* `Context-aware data sets from five domains <http://students.depaul.edu/~yzheng8/DataSets.html#Data>`_ or `GitHub <https://github.com/irecsys/CARSKit/tree/master/context-aware_data_sets>`_ - - -Data Challenges + +* |OK_ICON| `Actuaries Climate Index <http://actuariesclimateindex.org/data/>`_ + +* |OK_ICON| `Australian Weather <http://www.bom.gov.au/climate/dwo/>`_ + +* |OK_ICON| `Aviation Weather Center - Consistent, timely and accurate weather [...] <https://aviationweather.gov/adds/dataserver>`_ + +* |OK_ICON| `Brazilian Weather - Historical data (In Portuguese) <http://sinda.crn2.inpe.br/PCD/SITE/novo/site/>`_ + +* |OK_ICON| `Canadian Meteorological Centre <http://weather.gc.ca/grib/index_e.html>`_ + +* |OK_ICON| `Climate Data from UEA (updated monthly) <https://crudata.uea.ac.uk/cru/data/temperature/#datter and ftp://ftp.cmdl.noaa.gov/>`_ + +* |FIXME_ICON| `European Climate Assessment & Dataset <http://eca.knmi.nl/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Climate+Weather/European-Climate-Assessment-&-Dataset.yml>`_] + +* |OK_ICON| `Global Climate Data Since 1929 <http://en.tutiempo.net/climate>`_ + +* |OK_ICON| `NASA Global Imagery Browse Services <https://wiki.earthdata.nasa.gov/display/GIBS>`_ + +* |OK_ICON| `NOAA Bering Sea Climate <http://www.beringclimate.noaa.gov/>`_ + +* |OK_ICON| `NOAA Climate Datasets <http://www.ncdc.noaa.gov/data-access/quick-links>`_ + +* |OK_ICON| `NOAA Realtime Weather Models <http://www.ncdc.noaa.gov/data-access/model-data/model-datasets/numerical-weather-prediction>`_ + +* |OK_ICON| `NOAA SURFRAD Meteorology and Radiation Datasets <https://www.esrl.noaa.gov/gmd/grad/stardata.html>`_ + +* |OK_ICON| `The World Bank Open Data Resources for Climate Change <http://data.worldbank.org/developers/climate-data-api>`_ + +* |OK_ICON| `UEA Climatic Research Unit <http://www.cru.uea.ac.uk/data>`_ + +* |OK_ICON| `WU Historical Weather Worldwide <https://www.wunderground.com/history/index.html>`_ + +* |OK_ICON| `WorldClim - Global Climate Data <http://www.worldclim.org>`_ + +ComplexNetworks --------------- - -* `Challenges in Machine Learning <http://www.chalearn.org/>`_ -* `CrowdANALYTIX dataX <http://data.crowdanalytix.com>`_ -* `D4D Challenge of Orange <http://www.d4d.orange.com/en/home>`_ -* `DrivenData Competitions for Social Good <http://www.drivendata.org/>`_ -* `ICWSM Data Challenge (since 2009) <http://icwsm.cs.umbc.edu/>`_ -* `Kaggle Competition Data <https://www.kaggle.com/>`_ -* `KDD Cup by Tencent 2012 <http://www.kddcup2012.org/>`_ -* `Localytics Data Visualization Challenge <https://github.com/localytics/data-viz-challenge>`_ -* `Netflix Prize <http://www.netflixprize.com/leaderboard>`_ -* `Space Apps Challenge <https://2015.spaceappschallenge.org>`_ -* `Telecom Italia Big Data Challenge <https://dandelion.eu/datamine/open-big-data/>`_ -* `Yelp Dataset Challenge <http://www.yelp.com/dataset_challenge>`_ -* `Bruteforce Database <https://github.com/duyetdev/bruteforce-database>`_ - + +* |OK_ICON| `AMiner Citation Network Dataset <http://aminer.org/citation>`_ + +* |OK_ICON| `CrossRef DOI URLs <https://archive.org/details/doi-urls>`_ + +* |FIXME_ICON| `DBLP Citation dataset <https://kdl.cs.umass.edu/display/public/DBLP>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//ComplexNetworks/DBLP-Citation-dataset.yml>`_] + +* |OK_ICON| `DIMACS Road Networks Collection <http://www.dis.uniroma1.it/challenge9/download.shtml>`_ + +* |OK_ICON| `NBER Patent Citations <http://nber.org/patents/>`_ + +* |OK_ICON| `NIST complex networks data collection <http://math.nist.gov/~RPozo/complex_datasets.html>`_ + +* |OK_ICON| `Network Repository with Interactive Exploratory Analysis Tools <http://networkrepository.com/>`_ + +* |OK_ICON| `Protein-protein interaction network <http://vlado.fmf.uni-lj.si/pub/networks/data/bio/Yeast/Yeast.htm>`_ + +* |OK_ICON| `PyPI and Maven Dependency Network <https://ogirardot.wordpress.com/2013/01/31/sharing-pypimaven-dependency-data/>`_ + +* |OK_ICON| `Scopus Citation Database <https://www.elsevier.com/solutions/scopus>`_ + +* |OK_ICON| `Small Network Data <http://www-personal.umich.edu/~mejn/netdata/>`_ + +* |OK_ICON| `Stanford GraphBase <http://www3.cs.stonybrook.edu/~algorith/implement/graphbase/implement.shtml>`_ + +* |OK_ICON| `Stanford Large Network Dataset Collection <http://snap.stanford.edu/data/>`_ + +* |OK_ICON| `Stanford Longitudinal Network Data Sources <http://stanford.edu/group/sonia/dataSources/index.html>`_ + +* |OK_ICON| `The Koblenz Network Collection <http://konect.uni-koblenz.de/>`_ + +* |OK_ICON| `The Laboratory for Web Algorithmics (UNIMI) <http://law.di.unimi.it/datasets.php>`_ + +* |FIXME_ICON| `The Nexus Network Repository <http://nexus.igraph.org/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//ComplexNetworks/The-Nexus-Network-Repository.yml>`_] + +* |OK_ICON| `UCI Network Data Repository <https://networkdata.ics.uci.edu/resources.php>`_ + +* |OK_ICON| `UFL sparse matrix collection <http://www.cise.ufl.edu/research/sparse/matrices/>`_ + +* |OK_ICON| `WSU Graph Database <http://www.eecs.wsu.edu/mgd/gdb.html>`_ + +ComputerNetworks +---------------- + +* |OK_ICON| `3.5B Web Pages from CommonCrawl 2012 <http://www.bigdatanews.com/profiles/blogs/big-data-set-3-5-billion-web-pages-made-available-for-all-of-us>`_ + +* |OK_ICON| `53.5B Web clicks of 100K users in Indiana Univ. <http://cnets.indiana.edu/groups/nan/webtraffic/click-dataset/>`_ + +* |OK_ICON| `CAIDA Internet Datasets <http://www.caida.org/data/overview/>`_ + +* |OK_ICON| `CRAWDAD Wireless datasets from Dartmouth Univ. <https://crawdad.cs.dartmouth.edu/>`_ + +* |OK_ICON| `ClueWeb09 - 1B web pages <http://lemurproject.org/clueweb09/>`_ + +* |OK_ICON| `ClueWeb12 - 733M web pages <http://lemurproject.org/clueweb12/>`_ + +* |OK_ICON| `CommonCrawl Web Data over 7 years <http://commoncrawl.org/the-data/get-started/>`_ + +* |OK_ICON| `Criteo click-through data <http://labs.criteo.com/2015/03/criteo-releases-its-new-dataset/>`_ + +* |OK_ICON| `Internet-Wide Scan Data Repository <https://scans.io/>`_ + +* |OK_ICON| `OONI: Open Observatory of Network Interference - Internet censorship data <https://ooni.torproject.org/data/>`_ + +* |OK_ICON| `Open Mobile Data by MobiPerf <https://console.developers.google.com/storage/openmobiledata_public/>`_ + +* |OK_ICON| `Rapid7 Sonar Internet Scans <https://sonar.labs.rapid7.com/>`_ + +* |OK_ICON| `UCSD Network Telescope, IPv4 /8 net <http://www.caida.org/projects/network_telescope/>`_ + +DataChallenges +-------------- + +* |OK_ICON| `Bruteforce Database <https://github.com/duyetdev/bruteforce-database>`_ + +* |OK_ICON| `Challenges in Machine Learning <http://www.chalearn.org/>`_ + +* |OK_ICON| `CrowdANALYTIX dataX <http://data.crowdanalytix.com>`_ + +* |FIXME_ICON| `D4D Challenge of Orange <http://www.d4d.orange.com/en/home>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//DataChallenges/D4D-Challenge-of-Orange.yml>`_] + +* |OK_ICON| `DrivenData Competitions for Social Good <http://www.drivendata.org/>`_ + +* |FIXME_ICON| `ICWSM Data Challenge (since 2009) <http://icwsm.cs.umbc.edu/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//DataChallenges/ICWSM-Data-Challenge-since-2009.yml>`_] + +* |OK_ICON| `KDD Cup by Tencent 2012 <http://www.kddcup2012.org/>`_ + +* |OK_ICON| `Kaggle Competition Data <https://www.kaggle.com/>`_ + +* |OK_ICON| `Localytics Data Visualization Challenge <https://github.com/localytics/data-viz-challenge>`_ + +* |OK_ICON| `Netflix Prize <http://netflixprize.com/leaderboard.html>`_ + +* |OK_ICON| `Space Apps Challenge <https://2015.spaceappschallenge.org>`_ + +* |OK_ICON| `Telecom Italia Big Data Challenge <https://dandelion.eu/datamine/open-big-data/>`_ + +* |OK_ICON| `TravisTorrent Dataset - MSR'2017 Mining Challenge <https://travistorrent.testroots.org/>`_ + +* |OK_ICON| `TunedIT - Data mining & machine learning data sets, algorithms, challenges <http://tunedit.org/challenges/>`_ + +* |OK_ICON| `Yelp Dataset Challenge <http://www.yelp.com/dataset_challenge>`_ + +EarthScience +------------ + +* |OK_ICON| `AQUASTAT - Global water resources and uses <http://www.fao.org/nr/water/aquastat/data/query/index.html?lang=en>`_ + +* |OK_ICON| `BODC - marine data of ~22K vars <https://www.bodc.ac.uk/data/>`_ + +* |OK_ICON| `EOSDIS - NASA's earth observing system data <http://sedac.ciesin.columbia.edu/data/sets/browse>`_ + +* |OK_ICON| `Earth Models <http://www.earthmodels.org/>`_ + +* |OK_ICON| `Integrated Marine Observing System (IMOS) - roughly 30TB of ocean measurements <https://imos.aodn.org.au>`_ + +* |OK_ICON| `Marinexplore - Open Oceanographic Data <http://marinexplore.org/>`_ + +* |OK_ICON| `Smithsonian Institution Global Volcano and Eruption Database <http://volcano.si.edu/>`_ + +* |OK_ICON| `USGS Earthquake Archives <http://earthquake.usgs.gov/earthquakes/search/>`_ + Economics --------- - -* `American Economic Ass (AEA) <https://www.aeaweb.org/RFE/toc.php?show=complete>`_ -* `EconData from UMD <http://inforumweb.umd.edu/econdata/econdata.html>`_ -* `Economic Freedom of the World Data <http://www.freetheworld.com/datasets_efw.html>`_ -* `Historical MacroEconomc Statistics <http://www.historicalstatistics.org/>`_ -* `International Trade Statistics <http://www.econostatistics.co.za/>`_ -* `Internet Product Code Database <http://www.upcdatabase.com/>`_ -* `Joint External Debt Data Hub <http://www.jedh.org/>`_ -* `Jon Haveman International Trade Data Links <http://www.macalester.edu/research/economics/PAGE/HAVEMAN/Trade.Resources/TradeData.html>`_ -* `OpenCorporates Database of Companies in the World <https://opencorporates.com/>`_ -* `Our World in Data <http://ourworldindata.org/>`_ -* `SciencesPo World Trade Gravity Datasets <http://econ.sciences-po.fr/thierry-mayer/data>`_ -* `The Atlas of Economic Complexity <http://atlas.cid.harvard.edu>`_ -* `The Center for International Data <http://cid.econ.ucdavis.edu>`_ -* `The Observatory of Economic Complexity <http://atlas.media.mit.edu/en/>`_ -* `UN Commodity Trade Statistics <http://comtrade.un.org/db/>`_ -* `UN Human Development Reports <http://hdr.undp.org/en>`_ - - + +* |OK_ICON| `American Economic Association (AEA) <https://www.aeaweb.org/resources/data>`_ + +* |OK_ICON| `EconData from UMD <http://inforumweb.umd.edu/econdata/econdata.html>`_ + +* |FIXME_ICON| `Economic Freedom of the World Data <http://www.freetheworld.com/datasets_efw.html>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Economics/Economic-Freedom-of-the-World-Data.yml>`_] + +* |OK_ICON| `Historical MacroEconomc Statistics <http://www.historicalstatistics.org/>`_ + +* |OK_ICON| `INFORUM - Interindustry Forecasting at the University of Maryland <http://inforumweb.umd.edu/>`_ + +* |OK_ICON| `International Economics Database <http://widukind.cepremap.org/>`_ + +* |OK_ICON| `International Trade Statistics <http://www.econostatistics.co.za/>`_ + +* |OK_ICON| `Internet Product Code Database <http://www.upcdatabase.com/>`_ + +* |OK_ICON| `Joint External Debt Data Hub <http://www.jedh.org/>`_ + +* |OK_ICON| `Jon Haveman International Trade Data Links <http://www.macalester.edu/research/economics/PAGE/HAVEMAN/Trade.Resources/TradeData.html>`_ + +* |OK_ICON| `OpenCorporates Database of Companies in the World <https://opencorporates.com/>`_ + +* |OK_ICON| `Our World in Data <http://ourworldindata.org/>`_ + +* |OK_ICON| `SciencesPo World Trade Gravity Datasets <http://econ.sciences-po.fr/thierry-mayer/data>`_ + +* |OK_ICON| `The Atlas of Economic Complexity <http://atlas.cid.harvard.edu>`_ + +* |OK_ICON| `The Center for International Data <http://cid.econ.ucdavis.edu>`_ + +* |OK_ICON| `The Observatory of Economic Complexity <http://atlas.media.mit.edu/en/>`_ + +* |OK_ICON| `UN Commodity Trade Statistics <http://comtrade.un.org/db/>`_ + +* |OK_ICON| `UN Human Development Reports <http://hdr.undp.org/en>`_ + Education ------------- - -* `Student Data from Free Code Camp <http://academictorrents.com/details/030b10dad0846b5aecc3905692890fb02404adbf>`_ - - +--------- + +* |OK_ICON| `College Scorecard Data <https://collegescorecard.ed.gov/data/>`_ + +* |OK_ICON| `Student Data from Free Code Camp <http://academictorrents.com/details/030b10dad0846b5aecc3905692890fb02404adbf>`_ + Energy ------ - -* `AMPds <http://ampds.org/>`_ -* `BLUEd <http://nilm.cmubi.org/>`_ -* `COMBED <http://combed.github.io/>`_ -* `Dataport <https://dataport.pecanstreet.org/>`_ -* `ECO <http://www.vs.inf.ethz.ch/res/show.html?what=eco-data>`_ -* `EIA <http://www.eia.gov/electricity/data/eia923/>`_ -* `HFED <http://hfed.github.io/>`_ -* `iAWE <http://iawe.github.io/>`_ -* `Plaid <http://plaidplug.com/>`_ -* `REDD <http://redd.csail.mit.edu/>`_ -* `UK-Dale <http://www.doc.ic.ac.uk/~dk3810/data/>`_ - - + +* |OK_ICON| `AMPds <http://ampds.org/>`_ + +* |OK_ICON| `BLUEd <http://nilm.cmubi.org/>`_ + +* |OK_ICON| `COMBED <http://combed.github.io/>`_ + +* |OK_ICON| `DRED <http://www.st.ewi.tudelft.nl/~akshay/dred/>`_ + +* |OK_ICON| `ECO <http://www.vs.inf.ethz.ch/res/show.html?what=eco-data>`_ + +* |OK_ICON| `EIA <http://www.eia.gov/electricity/data/eia923/>`_ + +* |OK_ICON| `Global Power Plant Database - The Global Power Plant Database is a [...] <http://datasets.wri.org/dataset/globalpowerplantdatabase>`_ + +* |OK_ICON| `HES - Household Electricity Study, UK <http://randd.defra.gov.uk/Default.aspx?Menu=Menu&Module=More&Location=None&ProjectID=17359&FromSearch=Y&Publisher=1&SearchText=EV0702&SortString=ProjectCode&SortOrder=Asc&Paging=10#Description>`_ + +* |OK_ICON| `HFED <http://hfed.github.io/>`_ + +* |OK_ICON| `PLAID - The Plug Load Appliance Identification Dataset <http://plaidplug.com/>`_ + +* |OK_ICON| `REDD <http://redd.csail.mit.edu/>`_ + +* |OK_ICON| `Tracebase <https://www.tracebase.org>`_ + +* |OK_ICON| `UK-DALE - UK Domestic Appliance-Level Electricity <http://www.doc.ic.ac.uk/~dk3810/data/>`_ + +* |OK_ICON| `WHITED <http://nilmworkshop.org/2016/proceedings/Poster_ID18.pdf>`_ + +* |OK_ICON| `iAWE <http://iawe.github.io/>`_ + Finance ------- - -* `CBOE Futures Exchange <http://cfe.cboe.com/Data/>`_ -* `Google Finance <https://www.google.com/finance>`_ -* `Google Trends <http://www.google.com/trends?q=google&ctab=0&geo=all&date=all&sort=0>`_ -* `NASDAQ <https://data.nasdaq.com/>`_ -* `OANDA <http://www.oanda.com/>`_ -* `OSU Financial data <http://fisher.osu.edu/fin/fdf/osudata.htm>`_ -* `Quandl <https://www.quandl.com/>`_ -* `St Louis Federal <https://research.stlouisfed.org/fred2/>`_ -* `Yahoo Finance <http://finance.yahoo.com/>`_ - - -Geology -------- - -* `Earth Models <http://www.earthmodels.org/>`_ -* `Smithsonian Institution Global Volcano and Eruption Database <http://volcano.si.edu/>`_ -* `USGS Earthquake Archives <http://earthquake.usgs.gov/earthquakes/search/>`_ - - -GIS/Environment ---------------- - -* `BODC - marine data of ~22K vars <http://www.bodc.ac.uk/data/where_to_find_data/>`_ -* `Cambridge, MA, US, GIS data on GitHub <http://cambridgegis.github.io/gisdata.html>`_ -* `EOSDIS - NASA's earth observing system data <http://sedac.ciesin.columbia.edu/data/sets/browse>`_ -* `Factual Global Location Data <https://www.factual.com/>`_ -* `Geo Spatial Data from ASU <http://geodacenter.asu.edu/datalist/>`_ -* `Geo Wiki Project - Citizen-driven Environmental Monitoring <http://geo-wiki.org/>`_ -* `GeoFabrik - OSM data extracted to a variety of formats and areas <http://download.geofabrik.de/>`_ -* `GeoNames Worldwide <http://www.geonames.org/>`_ -* `Global Administrative Areas Database (GADM) <http://www.gadm.org/>`_ -* `Homeland Infrastructure Foundation-Level Data <https://hifld-dhs-gii.opendata.arcgis.com/>`_ -* `Integrated Marine Observing System (IMOS) - roughly 30TB of ocean measurements <https://imos.aodn.org.au>`_ or `on S3 <http://imos-data.s3-website-ap-southeast-2.amazonaws.com/>`_ -* `International Institute for Systems Analysis - GIS Datasets <http://www.iiasa.ac.at/web/home/research/modelsData/Models--Tools--Data.en.html>`_ -* `Landsat 8 on AWS <https://aws.amazon.com/public-data-sets/landsat/>`_ -* `List of all countries in all languages <https://github.com/umpirsky/country-list>`_ -* `Marinexplore - Open Oceanographic Data <http://marinexplore.org/>`_ -* `National Weather Service GIS Data Portal <http://www.nws.noaa.gov/gis/>`_ -* `Natural Earth - vectors and rasters of the world <http://www.naturalearthdata.com/>`_ -* `OpenAddresses <http://openaddresses.io/>`_ -* `OpenStreetMap (OSM) <http://wiki.openstreetmap.org/wiki/Downloading_data>`_ -* `Pleiades - Gazetteer and graph of ancient places <http://pleiades.stoa.org/>`_ -* `Reverse Geocoder using OSM data <https://github.com/kno10/reversegeocode>`_ & `additional high-resolution data files <http://data.ub.uni-muenchen.de/61/>`_ -* `TIGER/Line - U.S. boundaries and roads <http://www.census.gov/geo/maps-data/data/tiger-line.html>`_ -* `TwoFishes - Foursquare's coarse geocoder <https://github.com/foursquare/twofishes>`_ -* `TZ Timezones shapfiles <http://efele.net/maps/tz/world/>`_ -* `UN Environmental Data <http://geodata.grid.unep.ch/>`_ -* `World boundaries from the U.S. Department of State <https://hiu.state.gov/data/data.aspx>`_ -* `World countries in multiple formats <https://github.com/mledoze/countries>`_ - + +* |FIXME_ICON| `CBOE Futures Exchange <http://cfe.cboe.com/Data/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Finance/CBOE-Futures-Exchange.yml>`_] + +* |OK_ICON| `Google Finance <https://www.google.com/finance>`_ + +* |OK_ICON| `Google Trends <http://www.google.com/trends?q=google&ctab=0&geo=all&date=all&sort=0>`_ + +* |OK_ICON| `NASDAQ <https://data.nasdaq.com/>`_ + +* |OK_ICON| `NYSE Market Data <ftp://ftp.nyxdata.com>`_ + +* |OK_ICON| `OANDA <http://www.oanda.com/>`_ + +* |OK_ICON| `OSU Financial data <http://fisher.osu.edu/fin/fdf/osudata.htm>`_ + +* |OK_ICON| `Quandl <https://www.quandl.com/>`_ + +* |OK_ICON| `St Louis Federal <https://research.stlouisfed.org/fred2/>`_ + +* |OK_ICON| `Yahoo Finance <http://finance.yahoo.com/>`_ + +GIS +--- + +* |OK_ICON| `ArcGIS Open Data portal <http://opendata.arcgis.com/>`_ + +* |OK_ICON| `Cambridge, MA, US, GIS data on GitHub <http://cambridgegis.github.io/gisdata.html>`_ + +* |FIXME_ICON| `Factual Global Location Data <https://www.factual.com/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//GIS/Factual-Global-Location-Data.yml>`_] + +* |OK_ICON| `Geo Maps - High Quality GeoJSON maps programmatically generated <https://github.com/simonepri/geo-maps>`_ + +* |OK_ICON| `Geo Spatial Data from ASU <http://geodacenter.asu.edu/datalist/>`_ + +* |OK_ICON| `Geo Wiki Project - Citizen-driven Environmental Monitoring <http://geo-wiki.org/>`_ + +* |OK_ICON| `GeoFabrik - OSM data extracted to a variety of formats and areas <http://download.geofabrik.de/>`_ + +* |OK_ICON| `GeoNames Worldwide <http://www.geonames.org/>`_ + +* |FIXME_ICON| `Global Administrative Areas Database (GADM) <http://www.gadm.org/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//GIS/Global-Administrative-Areas-Database-GADM.yml>`_] + +* |OK_ICON| `Homeland Infrastructure Foundation-Level Data <https://hifld-geoplatform.opendata.arcgis.com/>`_ + +* |OK_ICON| `Landsat 8 on AWS <https://aws.amazon.com/public-data-sets/landsat/>`_ + +* |OK_ICON| `List of all countries in all languages <https://github.com/umpirsky/country-list>`_ + +* |OK_ICON| `National Weather Service GIS Data Portal <http://www.nws.noaa.gov/gis/>`_ + +* |OK_ICON| `Natural Earth - vectors and rasters of the world <http://www.naturalearthdata.com/>`_ + +* |OK_ICON| `OpenAddresses <http://openaddresses.io/>`_ + +* |OK_ICON| `OpenStreetMap (OSM) <http://wiki.openstreetmap.org/wiki/Downloading_data>`_ + +* |OK_ICON| `Pleiades - Gazetteer and graph of ancient places <http://pleiades.stoa.org/>`_ + +* |OK_ICON| `Reverse Geocoder using OSM data <https://github.com/kno10/reversegeocode>`_ + +* |FIXME_ICON| `TIGER/Line - U.S. boundaries and roads <http://www.census.gov/geo/maps-data/data/tiger-line.html>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//GIS/TIGER-Line.yml>`_] + +* |OK_ICON| `TZ Timezones shapfiles <http://efele.net/maps/tz/world/>`_ + +* |OK_ICON| `TwoFishes - Foursquare's coarse geocoder <https://github.com/foursquare/twofishes>`_ + +* |OK_ICON| `UN Environmental Data <http://geodata.grid.unep.ch/>`_ + +* |FIXME_ICON| `World boundaries from the U.S. Department of State <https://hiu.state.gov/data/data.aspx>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//GIS/World-boundaries-from--the-U.S.-Department-of-State.yml>`_] + +* |OK_ICON| `World countries in multiple formats <https://github.com/mledoze/countries>`_ + Government ---------- - -* `OpenDataSoft's list of 1,600 open data portals <https://www.opendatasoft.com/a-comprehensive-list-of-all-open-data-portals-around-the-world/>`_ -* `A list of cities and countries contributed by community <https://github.com/caesar0301/awesome-public-datasets/blob/master/Government.rst>`_ - - + +* |OK_ICON| `Alberta, Province of Canada <http://open.alberta.ca>`_ + +* |OK_ICON| `Antwerp, Belgium <http://opendata.antwerpen.be/datasets>`_ + +* |OK_ICON| `Argentina (non official) <http://datar.noip.me/>`_ + +* |OK_ICON| `Datos Argentina - Portal de datos abiertos de la República Argentina. [...] <http://datos.gob.ar/>`_ + +* |OK_ICON| `Austin, TX, US <https://data.austintexas.gov/>`_ + +* |OK_ICON| `Australia (abs.gov.au) <http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/3301.02009?OpenDocument>`_ + +* |OK_ICON| `Australia (data.gov.au) <https://data.gov.au/>`_ + +* |OK_ICON| `Austria (data.gv.at) <https://www.data.gv.at/>`_ + +* |OK_ICON| `Baton Rouge, LA, US <https://data.brla.gov/>`_ + +* |OK_ICON| `Belgium <http://data.gov.be/>`_ + +* |OK_ICON| `Brazil <http://dados.gov.br/dataset>`_ + +* |OK_ICON| `Buenos Aires, Argentina <http://data.buenosaires.gob.ar/>`_ + +* |FIXME_ICON| `Calgary, AB, Canada <https://data.calgary.ca/OpenData/Pages/DatasetListingAlphabetical.aspx>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Government/Calgary-AB-Canada.yml>`_] + +* |OK_ICON| `Cambridge, MA, US <https://data.cambridgema.gov/>`_ + +* |OK_ICON| `Canada <http://open.canada.ca/>`_ + +* |OK_ICON| `Chicago <https://data.cityofchicago.org/>`_ + +* |OK_ICON| `Chile <http://datos.gob.cl/dataset>`_ + +* |OK_ICON| `Dallas Open Data <https://www.dallasopendata.com/>`_ + +* |OK_ICON| `DataBC - data from the Province of British Columbia <http://www.data.gov.bc.ca/>`_ + +* |OK_ICON| `Denver Open Data <http://data.denvergov.org//>`_ + +* |OK_ICON| `Durham, NC Open Data <https://opendurham.nc.gov/explore/>`_ + +* |OK_ICON| `Edmonton, AB, Canada <https://data.edmonton.ca/>`_ + +* |OK_ICON| `England LGInform <http://lginform.local.gov.uk/>`_ + +* |OK_ICON| `EuroStat <http://ec.europa.eu/eurostat/data/database>`_ + +* |OK_ICON| `EveryPolitician - Ongoing project collating and sharing data on every [...] <http://everypolitician.org/>`_ + +* |OK_ICON| `FedStats <http://fedstats.sites.usa.gov/>`_ + +* |OK_ICON| `Finland <https://www.opendata.fi/en>`_ + +* |OK_ICON| `France <https://www.data.gouv.fr/en/datasets/>`_ + +* |OK_ICON| `Fredericton, NB, Canada <http://www.fredericton.ca/en/citygovernment/Catalogue.asp>`_ + +* |OK_ICON| `Gatineau, QC, Canada <http://www.gatineau.ca/donneesouvertes/default_fr.aspx>`_ + +* |OK_ICON| `Germany <https://www-genesis.destatis.de/genesis/online>`_ + +* |OK_ICON| `Ghent, Belgium <https://data.stad.gent/data>`_ + +* |OK_ICON| `Glasgow, Scotland, UK <https://data.glasgow.gov.uk/>`_ + +* |OK_ICON| `Greece <http://www.data.gov.gr/>`_ + +* |OK_ICON| `Guardian world governments <http://www.guardian.co.uk/world-government-data>`_ + +* |FIXME_ICON| `Halifax, NS, Canada <http://www.halifax.ca/opendata/index.php>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Government/Halifax-NS-Canada.yml>`_] + +* |OK_ICON| `Helsinki Region, Finland <http://www.hri.fi/en/>`_ + +* |OK_ICON| `Hong Kong, China <https://data.gov.hk/en/>`_ + +* |FIXME_ICON| `Houston Open Data <http://data.ohouston.org>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Government/Houston-Open-Data.yml>`_] + +* |OK_ICON| `Indian Government Data <https://data.gov.in/>`_ + +* |OK_ICON| `Indonesian Data Portal <http://data.go.id/>`_ + +* |OK_ICON| `Ireland's Open Data Portal <https://data.gov.ie/data>`_ + +* |OK_ICON| `Italy - Il Portale dati.gov.it è il catalogo nazionale dei metadati [...] <https://www.dati.gov.it/>`_ + +* |OK_ICON| `Japan <http://www.e-stat.go.jp/SG1/estat/eStatTopPortalE.do>`_ + +* |OK_ICON| `Laval, QC, Canada <http://www.laval.ca/Pages/Fr/Citoyens/donnees.aspx>`_ + +* |OK_ICON| `Lexington, KY <http://data.lexingtonky.gov/>`_ + +* |OK_ICON| `London Datastore, UK <http://data.london.gov.uk/dataset>`_ + +* |OK_ICON| `London, ON, Canada <http://www.london.ca/city-hall/open-data/Pages/default.aspx>`_ + +* |OK_ICON| `Los Angeles Open Data <https://data.lacity.org/>`_ + +* |OK_ICON| `Luxembourg - Luxembourgish Open Data Portal <https://data.public.lu/en/>`_ + +* |OK_ICON| `MassGIS, Massachusetts, U.S. <http://www.mass.gov/anf/research-and-tech/it-serv-and-support/application-serv/office-of-geographic-information-massgis/>`_ + +* |OK_ICON| `Metropolitain Transportation Commission (MTC), California, US <http://mtc.ca.gov/tools-resources/data-tools/open-data-library>`_ + +* |OK_ICON| `Mexico <http://catalogo.datos.gob.mx/dataset>`_ + +* |OK_ICON| `Missisauga, ON, Canada <http://www.mississauga.ca/portal/residents/publicationsopendatacatalogue>`_ + +* |OK_ICON| `Moldova <http://data.gov.md/>`_ + +* |OK_ICON| `Moncton, NB, Canada <http://www.moncton.ca/Government/Terms_of_use/Open_Data_Purpose/Data_Catalogue.htm>`_ + +* |OK_ICON| `Montreal, QC, Canada <http://donnees.ville.montreal.qc.ca/>`_ + +* |OK_ICON| `Mountain View, California, US (GIS) <http://data-mountainview.opendata.arcgis.com/>`_ + +* |FIXME_ICON| `NYC Open Data <https://nycplatform.socrata.com/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Government/NYC-Open-Data.yml>`_] + +* |OK_ICON| `NYC betanyc <http://betanyc.us/>`_ + +* |OK_ICON| `Netherlands <https://data.overheid.nl/>`_ + +* |OK_ICON| `New Zealand <http://www.stats.govt.nz/browse_for_stats.aspx>`_ + +* |OK_ICON| `OECD <https://data.oecd.org/>`_ + +* |OK_ICON| `Oakland, California, US <https://data.oaklandnet.com/>`_ + +* |OK_ICON| `Oklahoma <https://data.ok.gov/>`_ + +* |OK_ICON| `Open Data for Africa <http://opendataforafrica.org/>`_ + +* |OK_ICON| `Open Government Data (OGD) Platform India <https://data.gov.in/>`_ + +* |OK_ICON| `OpenDataSoft's list of 1,600 open data <https://www.opendatasoft.com/a-comprehensive-list-of-all-open-data-portals-around-the-world/>`_ + +* |OK_ICON| `Oregon <https://data.oregon.gov/>`_ + +* |OK_ICON| `Ottawa, ON, Canada <http://data.ottawa.ca/en/>`_ + +* |OK_ICON| `Palo Alto, California, US <http://data.cityofpaloalto.org/home>`_ + +* |OK_ICON| `OpenDataPhilly - OpenDataPhilly is a catalog of open data in the [...] <https://www.opendataphilly.org/>`_ + +* |OK_ICON| `Portland, Oregon <https://www.portlandoregon.gov/28130>`_ + +* |OK_ICON| `Portugal - Pordata organization <http://www.pordata.pt/en/Home>`_ + +* |OK_ICON| `Puerto Rico Government <https://data.pr.gov//>`_ + +* |OK_ICON| `Quebec City, QC, Canada <http://donnees.ville.quebec.qc.ca/>`_ + +* |FIXME_ICON| `Quebec Province of Canada <http://donnees.gouv.qc.ca/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Government/Quebec-Province-of-Canada.yml>`_] + +* |OK_ICON| `Regina SK, Canada <http://open.regina.ca/>`_ + +* |OK_ICON| `Rio de Janeiro, Brazil <http://data.rio.rj.gov.br/>`_ + +* |OK_ICON| `Romania <http://data.gov.ro/>`_ + +* |OK_ICON| `Russia <http://data.gov.ru>`_ + +* |OK_ICON| `San Antonio, TX - Community Information Now - CI:Now is a nonprofit [...] <http://cinow.info/>`_ + +* |OK_ICON| `San Francisco Data sets <http://datasf.org/>`_ + +* |OK_ICON| `San Jose, California, US <http://data.sanjoseca.gov/home/>`_ + +* |OK_ICON| `San Mateo County, California, US <https://data.smcgov.org/>`_ + +* |OK_ICON| `Saskatchewan, Province of Canada <http://opendatask.ca/data/>`_ + +* |OK_ICON| `Seattle <https://data.seattle.gov/>`_ + +* |OK_ICON| `Singapore Government Data <https://data.gov.sg/>`_ + +* |OK_ICON| `South Africa Trade Statistics <http://www.econostatistics.co.za/>`_ + +* |OK_ICON| `South Africa <http://beta2.statssa.gov.za/>`_ + +* |OK_ICON| `State of Utah, US <https://opendata.utah.gov/>`_ + +* |OK_ICON| `Switzerland <http://www.opendata.admin.ch/>`_ + +* |OK_ICON| `Taiwan g0v <http://data.g0v.tw/>`_ + +* |OK_ICON| `Taiwan <http://data.gov.tw/>`_ + +* |OK_ICON| `Tel-Aviv Open Data <https://opendata.tel-aviv.gov.il/index_en.html#/>`_ + +* |OK_ICON| `Texas Open Data <https://data.texas.gov/>`_ + +* |FIXME_ICON| `The World Bank <http://wdronline.worldbank.org/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Government/The-World-Bank.yml>`_] + +* |FIXME_ICON| `Toronto, ON, Canada <http://www1.toronto.ca/wps/portal/contentonly?vgnextoid=1a66e03bb8d1e310VgnVCM10000071d60f89RCRD>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Government/Toronto-ON-Canada.yml>`_] + +* |OK_ICON| `Tunisia <http://www.data.gov.tn/>`_ + +* |OK_ICON| `U.K. Government Data <http://data.gov.uk/data>`_ + +* |OK_ICON| `U.S. American Community Survey <https://www.census.gov/programs-surveys/acs/data.html/>`_ + +* |OK_ICON| `U.S. CDC Public Health datasets <https://www.cdc.gov/nchs/data_access/ftp_data.htm>`_ + +* |OK_ICON| `U.S. Census Bureau <http://www.census.gov/data.html>`_ + +* |OK_ICON| `U.S. Department of Housing and Urban Development (HUD) <http://www.huduser.gov/portal/datasets/pdrdatas.html>`_ + +* |OK_ICON| `U.S. Federal Government Agencies <http://www.data.gov/metrics>`_ + +* |OK_ICON| `U.S. Federal Government Data Catalog <http://catalog.data.gov/dataset>`_ + +* |OK_ICON| `U.S. Food and Drug Administration (FDA) <https://open.fda.gov/index.html>`_ + +* |OK_ICON| `U.S. National Center for Education Statistics (NCES) <http://nces.ed.gov/>`_ + +* |OK_ICON| `U.S. Open Government <http://www.data.gov/open-gov/>`_ + +* |FIXME_ICON| `UK 2011 Census Open Atlas Project <http://www.alex-singleton.com/r/2014/02/05/2011-census-open-atlas-project-version-two/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Government/UK-2011-Census-Open-Atlas-Project.yml>`_] + +* |OK_ICON| `U.S. Patent and Trademark Office (USPTO) Bulk Data Products <https://www.uspto.gov/learning-and-resources/bulk-data-products>`_ + +* |OK_ICON| `Uganda Bureau of Statistics <http://www.ubos.org/unda/index.php/catalog>`_ + +* |OK_ICON| `United Nations <http://data.un.org/>`_ + +* |OK_ICON| `Uruguay <https://catalogodatos.gub.uy/>`_ + +* |OK_ICON| `Valley Transportation Authority (VTA), California, US <https://data.vta.org/>`_ + +* |OK_ICON| `Vancouver, BC Open Data Catalog <http://data.vancouver.ca/datacatalogue/>`_ + +* |FIXME_ICON| `Victoria, BC, Canada <http://www.victoria.ca/EN/main/city/open-data-catalogue.html>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Government/Victoria-BC-Canada.yml>`_] + +* |OK_ICON| `Vienna, Austria <https://open.wien.gv.at/site/open-data/>`_ + Healthcare ---------- - -* `EHDP Large Health Data Sets <http://www.ehdp.com/vitalnet/datasets.htm>`_ -* `Gapminder World demographic databases <http://www.gapminder.org/data/>`_ -* `Medicare Coverage Database (MCD), U.S. <https://www.cms.gov/medicare-coverage-database/>`_ -* `Medicare Data Engine of medicare.gov Data <https://data.medicare.gov/>`_ -* `Medicare Data File <http://go.cms.gov/19xxPN4>`_ -* `MeSH, the vocabulary thesaurus used for indexing articles for PubMed <https://www.nlm.nih.gov/mesh/filelist.html>`_ -* `Number of Ebola Cases and Deaths in Affected Countries (2014) <https://data.hdx.rwlabs.org/dataset/ebola-cases-2014>`_ -* `Open-ODS (structure of the UK NHS) <http://www.openods.co.uk>`_ -* `OpenPaymentsData, Healthcare financial relationship data <https://openpaymentsdata.cms.gov>`_ -* `The Cancer Genome Atlas project (TCGA) <https://tcga-data.nci.nih.gov/tcga/tcgaDownload.jsp>`_ and `BigQuery table <http://google-genomics.readthedocs.org/en/latest/use_cases/discover_public_data/isb_cgc_data.html>`_ -* `World Health Organization Global Health Observatory <http://www.who.int/gho/en/>`_ - - -Image Processing ----------------- - -* `10k US Adult Faces Database <http://wilmabainbridge.com/facememorability2.html>`_ -* `2GB of Photos of Cats <http://137.189.35.203/WebUI/CatDatabase/catData.html>`_ or `Archive version <https://web.archive.org/web/20150520175645/http://137.189.35.203/WebUI/CatDatabase/catData.html>`_ -* `Affective Image Classification <http://www.imageemotion.org/>`_ -* `Animals with attributes <http://attributes.kyb.tuebingen.mpg.de/>`_ -* `Face Recognition Benchmark <http://www.face-rec.org/databases/>`_ -* `ImageNet (in WordNet hierarchy) <http://www.image-net.org/>`_ -* `Indoor Scene Recognition <http://web.mit.edu/torralba/www/indoor.html>`_ -* `International Affective Picture System, UFL <http://csea.phhp.ufl.edu/media/iapsmessage.html>`_ -* `Massive Visual Memory Stimuli, MIT <http://cvcl.mit.edu/MM/stimuli.html>`_ -* `Several Shape-from-Silhouette Datasets <http://kaiwolf.no-ip.org/3d-model-repository.html>`_ -* `Stanford Dogs Dataset <http://vision.stanford.edu/aditya86/ImageNetDogs/>`_ -* `SUN database, MIT <http://groups.csail.mit.edu/vision/SUN/hierarchy.html>`_ -* `The Oxford-IIIT Pet Dataset <http://www.robots.ox.ac.uk/~vgg/data/pets/>`_ -* `YouTube Faces Database <http://www.cs.tau.ac.il/~wolf/ytfaces/>`_ -* `Adience Unfiltered faces for gender and age classification <http://www.openu.ac.il/home/hassner/Adience/data.html>`_ -* `The Action Similarity Labeling (ASLAN) Challenge <http://www.openu.ac.il/home/hassner/data/ASLAN/ASLAN.html>`_ -* `Violent-Flows - Crowd Violence \ Non-violence Database and benchmark <http://www.openu.ac.il/home/hassner/data/violentflows/>`_ - -Machine Learning ----------------- - -* `Delve Datasets for classification and regression (Univ. of Toronto) <http://www.cs.toronto.edu/~delve/data/datasets.html>`_ -* `Discogs Monthly Data <http://data.discogs.com/>`_ -* `eBay Online Auctions (2012) <http://www.modelingonlineauctions.com/datasets>`_ -* `IMDb Database <http://www.imdb.com/interfaces>`_ -* `Keel Repository for classification, regression and time series <http://sci2s.ugr.es/keel/datasets.php>`_ -* `Labeled Faces in the Wild (LFW) <http://vis-www.cs.umass.edu/lfw/>`_ -* `Lending Club Loan Data <https://www.lendingclub.com/info/download-data.action>`_ -* `Machine Learning Data Set Repository <http://mldata.org/>`_ -* `Million Song Dataset <http://labrosa.ee.columbia.edu/millionsong/>`_ -* `More Song Datasets <http://labrosa.ee.columbia.edu/millionsong/pages/additional-datasets>`_ -* `MovieLens Data Sets <http://grouplens.org/datasets/movielens/>`_ -* `RDataMining - "R and Data Mining" ebook data <http://www.rdatamining.com/data>`_ -* `Registered Meteorites on Earth <http://healthintelligence.drupalgardens.com/content/registered-meteorites-has-impacted-earth-visualized>`_ -* `Restaurants Health Score Data in San Francisco <http://missionlocal.org/san-francisco-restaurant-health-inspections/>`_ -* `UCI Machine Learning Repository <http://archive.ics.uci.edu/ml/>`_ -* `Yahoo! Ratings and Classification Data <http://webscope.sandbox.yahoo.com/catalog.php?datatype=r>`_ - - + +* |OK_ICON| `Composition of Foods Raw, Processed, Prepared USDA National Nutrient Database for Standard [...] <https://data.nal.usda.gov/dataset/composition-foods-raw-processed-prepared-usda-national-nutrient-database-standard-reference-release-27>`_ + +* |OK_ICON| `EHDP Large Health Data Sets <http://www.ehdp.com/vitalnet/datasets.htm>`_ + +* |OK_ICON| `GDC - GDC supports several cancer genome programs for CCG, TCGA, TARGET etc. <https://gdc.cancer.gov/>`_ + +* |OK_ICON| `Gapminder World demographic databases <http://www.gapminder.org/data/>`_ + +* |OK_ICON| `MeSH, the vocabulary thesaurus used for indexing articles for PubMed <https://www.nlm.nih.gov/mesh/filelist.html>`_ + +* |OK_ICON| `Medicare Coverage Database (MCD), U.S. <https://www.cms.gov/medicare-coverage-database/>`_ + +* |OK_ICON| `Medicare Data Engine of medicare.gov Data <https://data.medicare.gov/>`_ + +* |OK_ICON| `Medicare Data File <http://go.cms.gov/19xxPN4>`_ + +* |FIXME_ICON| `Number of Ebola Cases and Deaths in Affected Countries (2014) <https://data.hdx.rwlabs.org/dataset/ebola-cases-2014>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Healthcare/Number-of-Ebola-Cases-and-Deaths-in-Affected-Countries-2014.yml>`_] + +* |OK_ICON| `Open-ODS (structure of the UK NHS) <http://www.openods.co.uk>`_ + +* |OK_ICON| `OpenPaymentsData, Healthcare financial relationship data <https://openpaymentsdata.cms.gov>`_ + +* |OK_ICON| `PhysioBank Databases - A large and growing archive of physiological data. <https://www.physionet.org/physiobank/database/>`_ + +* |OK_ICON| `The Cancer Imaging Archive (TCIA) <https://www.cancerimagingarchive.net>`_ + +* |OK_ICON| `The Cancer Genome Atlas project (TCGA) <https://portal.gdc.cancer.gov/>`_ + +* |OK_ICON| `World Health Organization Global Health Observatory <http://www.who.int/gho/en/>`_ + +ImageProcessing +--------------- + +* |OK_ICON| `10k US Adult Faces Database <http://wilmabainbridge.com/facememorability2.html>`_ + +* |FIXME_ICON| `2GB of Photos of Cats <http://137.189.35.203/WebUI/CatDatabase/catData.html>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//ImageProcessing/2GB-of-Photos-of-Cats.yml>`_] + +* |OK_ICON| `Adience Unfiltered faces for gender and age classification <http://www.openu.ac.il/home/hassner/Adience/data.html>`_ + +* |OK_ICON| `Affective Image Classification <http://www.imageemotion.org/>`_ + +* |OK_ICON| `Animals with attributes <http://attributes.kyb.tuebingen.mpg.de/>`_ + +* |OK_ICON| `Caltech Pedestrian Detection Benchmark <http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/>`_ + +* |OK_ICON| `Chars74K dataset - Character Recognition in Natural Images (both English [...] <http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/>`_ + +* |OK_ICON| `Face Recognition Benchmark <http://www.face-rec.org/databases/>`_ + +* |OK_ICON| `Flickr: 32 Class Brand Logos <http://www.multimedia-computing.de/flickrlogos/>`_ + +* |OK_ICON| `GDXray - X-ray images for X-ray testing and Computer Vision <http://dmery.ing.puc.cl/index.php/material/gdxray/>`_ + +* |FIXME_ICON| `ImageNet (in WordNet hierarchy) <http://www.image-net.org/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//ImageProcessing/ImageNet.yml>`_] + +* |OK_ICON| `Indoor Scene Recognition <http://web.mit.edu/torralba/www/indoor.html>`_ + +* |OK_ICON| `International Affective Picture System, UFL <http://csea.phhp.ufl.edu/media/iapsmessage.html>`_ + +* |OK_ICON| `MNIST database of handwritten digits, near 1 million examples <http://yann.lecun.com/exdb/mnist/>`_ + +* |OK_ICON| `Massive Visual Memory Stimuli, MIT <http://cvcl.mit.edu/MM/stimuli.html>`_ + +* |OK_ICON| `SUN database, MIT <http://groups.csail.mit.edu/vision/SUN/hierarchy.html>`_ + +* |FIXME_ICON| `Several Shape-from-Silhouette Datasets <http://kaiwolf.no-ip.org/3d-model-repository.html>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//ImageProcessing/Several-Shape-from-Silhouette-Datasets.yml>`_] + +* |OK_ICON| `Stanford Dogs Dataset <http://vision.stanford.edu/aditya86/ImageNetDogs/>`_ + +* |OK_ICON| `The Action Similarity Labeling (ASLAN) Challenge <http://www.openu.ac.il/home/hassner/data/ASLAN/ASLAN.html>`_ + +* |OK_ICON| `The Oxford-IIIT Pet Dataset <http://www.robots.ox.ac.uk/~vgg/data/pets/>`_ + +* |OK_ICON| `Violent-Flows - Crowd Violence / Non-violence Database and benchmark <http://www.openu.ac.il/home/hassner/data/violentflows/>`_ + +* |OK_ICON| `Visual genome <http://visualgenome.org/api/v0/api_home.html>`_ + +* |OK_ICON| `YouTube Faces Database <http://www.cs.tau.ac.il/~wolf/ytfaces/>`_ + +MachineLearning +--------------- + +* |OK_ICON| `Context-aware data sets from five domains <https://github.com/irecsys/CARSKit/tree/master/context-aware_data_sets>`_ + +* |OK_ICON| `Delve Datasets for classification and regression <http://www.cs.toronto.edu/~delve/data/datasets.html>`_ + +* |OK_ICON| `Discogs Monthly Data <http://data.discogs.com/>`_ + +* |OK_ICON| `Free Music Archive <https://github.com/mdeff/fma>`_ + +* |OK_ICON| `IMDb Database <http://www.imdb.com/interfaces>`_ + +* |OK_ICON| `Keel Repository for classification, regression and time series <http://sci2s.ugr.es/keel/datasets.php>`_ + +* |OK_ICON| `Labeled Faces in the Wild (LFW) <http://vis-www.cs.umass.edu/lfw/>`_ + +* |OK_ICON| `Lending Club Loan Data <https://www.lendingclub.com/info/download-data.action>`_ + +* |OK_ICON| `Machine Learning Data Set Repository <http://mldata.org/>`_ + +* |OK_ICON| `Million Song Dataset <http://labrosa.ee.columbia.edu/millionsong/>`_ + +* |OK_ICON| `More Song Datasets <http://labrosa.ee.columbia.edu/millionsong/pages/additional-datasets>`_ + +* |OK_ICON| `MovieLens Data Sets <http://grouplens.org/datasets/movielens/>`_ + +* |OK_ICON| `New Yorker caption contest ratings <https://github.com/nextml/caption-contest-data>`_ + +* |OK_ICON| `RDataMining - "R and Data Mining" ebook data <http://www.rdatamining.com/data>`_ + +* |OK_ICON| `Registered Meteorites on Earth <http://publichealthintelligence.org/content/registered-meteorites-has-impacted-earth-visualized>`_ + +* |FIXME_ICON| `Restaurants Health Score Data in San Francisco <http://missionlocal.org/san-francisco-restaurant-health-inspections/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//MachineLearning/Restaurants-Health-Score-Data-in-San-Francisco.yml>`_] + +* |OK_ICON| `UCI Machine Learning Repository <http://archive.ics.uci.edu/ml/>`_ + +* |FIXME_ICON| `Yahoo! Ratings and Classification Data <http://webscope.sandbox.yahoo.com/catalog.php?datatype=r>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//MachineLearning/Yahoo-Ratings-and-Classification-Data.yml>`_] + +* |OK_ICON| `YouTube-BoundingBoxes <https://research.google.com/youtube-bb/>`_ + +* |OK_ICON| `Youtube 8m <https://research.google.com/youtube8m/download.html>`_ + +* |OK_ICON| `eBay Online Auctions (2012) <http://www.modelingonlineauctions.com/datasets>`_ + Museums ------- - -* `Canada Science and Technology Museums Corporation's Open Data <http://techno-science.ca/en/data.php>`_ -* `Cooper-Hewitt's Collection Database <https://github.com/cooperhewitt/collection>`_ -* `Minneapolis Institute of Arts metadata <https://github.com/artsmia/collection>`_ -* `Natural History Museum (London) Data Portal <http://data.nhm.ac.uk/>`_ -* `Rijksmuseum Historical Art Collection <https://www.rijksmuseum.nl/en/api>`_ -* `Tate Collection metadata <https://github.com/tategallery/collection>`_ -* `The Getty vocabularies <http://vocab.getty.edu>`_ - - -Natural Language ----------------- - -* `Blogger Corpus <http://u.cs.biu.ac.il/~koppel/BlogCorpus.htm>`_ -* `CLiPS Stylometry Investigation Corpus <http://www.clips.uantwerpen.be/datasets/csi-corpus>`_ -* `ClueWeb09 FACC <http://lemurproject.org/clueweb09/FACC1/>`_ -* `ClueWeb12 FACC <http://lemurproject.org/clueweb12/FACC1/>`_ -* `DBpedia - 4.58M things with 583M facts <http://wiki.dbpedia.org/Datasets>`_ -* `Flickr Personal Taxonomies <http://www.isi.edu/~lerman/downloads/flickr/flickr_taxonomies.html>`_ -* `Freebase.com of people, places, and things <http://www.freebase.com/>`_ -* `Google Books Ngrams (2.2TB) <https://aws.amazon.com/datasets/google-books-ngrams/>`_ -* `Google Web 5gram (1TB, 2006) <https://catalog.ldc.upenn.edu/LDC2006T13>`_ -* `Gutenberg eBooks List <http://www.gutenberg.org/wiki/Gutenberg:Offline_Catalogs>`_ -* `Hansards text chunks of Canadian Parliament <http://www.isi.edu/natural-language/download/hansard/>`_ -* `Machine Comprehension Test (MCTest) of text from Microsoft Research <http://research.microsoft.com/en-us/um/redmond/projects/mctest/index.html>`_ -* `Machine Translation of European languages <http://statmt.org/wmt11/translation-task.html#download>`_ -* `Personae Corpus <http://www.clips.uantwerpen.be/datasets/personae-corpus>`_ -* `SaudiNewsNet Collection of Saudi Newspaper Articles (Arabic, 30K articles) <https://github.com/ParallelMazen/SaudiNewsNet>`_ -* `SMS Spam Collection in English <http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/>`_ -* `USENET postings corpus of 2005~2011 <http://www.psych.ualberta.ca/~westburylab/downloads/usenetcorpus.download.html>`_ -* `Wikidata - Wikipedia databases <https://www.wikidata.org/wiki/Wikidata:Database_download>`_ -* `Wikipedia Links data - 40 Million Entities in Context <https://code.google.com/p/wiki-links/downloads/list>`_ -* `WordNet databases and tools <http://wordnet.princeton.edu/wordnet/download/>`_ - - + +* |OK_ICON| `Canada Science and Technology Museums Corporation's Open Data <http://techno-science.ca/en/data.php>`_ + +* |OK_ICON| `Cooper-Hewitt's Collection Database <https://github.com/cooperhewitt/collection>`_ + +* |OK_ICON| `Minneapolis Institute of Arts metadata <https://github.com/artsmia/collection>`_ + +* |OK_ICON| `Natural History Museum (London) Data Portal <http://data.nhm.ac.uk/>`_ + +* |OK_ICON| `Rijksmuseum Historical Art Collection <https://www.rijksmuseum.nl/en/api>`_ + +* |OK_ICON| `Tate Collection metadata <https://github.com/tategallery/collection>`_ + +* |OK_ICON| `The Getty vocabularies <http://vocab.getty.edu>`_ + +NaturalLanguage +--------------- + +* |OK_ICON| `Automatic Keyphrase Extraction <https://github.com/snkim/AutomaticKeyphraseExtraction/>`_ + +* |OK_ICON| `Blogger Corpus <http://u.cs.biu.ac.il/~koppel/BlogCorpus.htm>`_ + +* |OK_ICON| `CLiPS Stylometry Investigation Corpus <http://www.clips.uantwerpen.be/datasets/csi-corpus>`_ + +* |OK_ICON| `ClueWeb09 FACC <http://lemurproject.org/clueweb09/FACC1/>`_ + +* |OK_ICON| `ClueWeb12 FACC <http://lemurproject.org/clueweb12/FACC1/>`_ + +* |OK_ICON| `DBpedia - 4.58M things with 583M facts <http://wiki.dbpedia.org/Datasets>`_ + +* |OK_ICON| `Flickr Personal Taxonomies <http://www.isi.edu/~lerman/downloads/flickr/flickr_taxonomies.html>`_ + +* |OK_ICON| `Freebase of people, places, and things <http://www.freebase.com/>`_ + +* |OK_ICON| `Google Books Ngrams (2.2TB) <https://aws.amazon.com/datasets/google-books-ngrams/>`_ + +* |OK_ICON| `Google MC-AFP - Generated based on the public available Gigaword dataset [...] <https://github.com/google/mcafp>`_ + +* |OK_ICON| `Google Web 5gram (1TB, 2006) <https://catalog.ldc.upenn.edu/LDC2006T13>`_ + +* |OK_ICON| `Gutenberg eBooks List <http://www.gutenberg.org/wiki/Gutenberg:Offline_Catalogs>`_ + +* |OK_ICON| `Hansards text chunks of Canadian Parliament <http://www.isi.edu/natural-language/download/hansard/>`_ + +* |OK_ICON| `Microsoft MAchine Reading COmprehension Dataset (or MS MARCO) <http://www.msmarco.org/dataset.aspx>`_ + +* |OK_ICON| `Machine Comprehension Test (MCTest) of text from Microsoft Research <http://research.microsoft.com/en-us/um/redmond/projects/mctest/index.html>`_ + +* |OK_ICON| `Machine Translation of European languages <http://statmt.org/wmt11/translation-task.html#download>`_ + +* |FIXME_ICON| `Making Sense of Microposts 2013 - Concept Extraction <http://oak.dcs.shef.ac.uk/msm2013/challenge.html>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//NaturalLanguage/Making-Sense-of-Microposts-2013.yml>`_] + +* |OK_ICON| `Making Sense of Microposts 2016 - Named Entity rEcognition and Linking <http://microposts2016.seas.upenn.edu/challenge.html>`_ + +* |OK_ICON| `Multi-Domain Sentiment Dataset (version 2.0) <http://www.cs.jhu.edu/~mdredze/datasets/sentiment/>`_ + +* |OK_ICON| `Open Multilingual Wordnet <http://compling.hss.ntu.edu.sg/omw/>`_ + +* |OK_ICON| `POS/NER/Chunk annotated data <https://github.com/aritter/twitter_nlp/tree/master/data/annotated>`_ + +* |OK_ICON| `Personae Corpus <http://www.clips.uantwerpen.be/datasets/personae-corpus>`_ + +* |OK_ICON| `SMS Spam Collection in English <http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/>`_ + +* |OK_ICON| `SaudiNewsNet Collection of Saudi Newspaper Articles (Arabic, 30K articles) <https://github.com/ParallelMazen/SaudiNewsNet>`_ + +* |OK_ICON| `Stanford Question Answering Dataset (SQuAD) <https://rajpurkar.github.io/SQuAD-explorer/>`_ + +* |OK_ICON| `USENET postings corpus of 2005~2011 <http://www.psych.ualberta.ca/~westburylab/downloads/usenetcorpus.download.html>`_ + +* |OK_ICON| `Universal Dependencies <http://universaldependencies.org>`_ + +* |OK_ICON| `Webhose - News/Blogs in multiple languages <https://webhose.io/datasets>`_ + +* |OK_ICON| `Wikidata - Wikipedia databases <https://www.wikidata.org/wiki/Wikidata:Database_download>`_ + +* |OK_ICON| `Wikipedia Links data - 40 Million Entities in Context <https://code.google.com/p/wiki-links/downloads/list>`_ + +* |FIXME_ICON| `WordNet databases and tools <http://wordnet.princeton.edu/wordnet/download/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//NaturalLanguage/WordNet-databases-and-tools.yml>`_] + +Neuroscience +------------ + +* |OK_ICON| `Allen Institute Datasets <http://www.brain-map.org/>`_ + +* |OK_ICON| `Brain Catalogue <http://braincatalogue.org/>`_ + +* |OK_ICON| `Brainomics <http://brainomics.cea.fr/localizer>`_ + +* |FIXME_ICON| `CodeNeuro Datasets <http://datasets.codeneuro.org/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Neuroscience/CodeNeuro-Datasets.yml>`_] + +* |OK_ICON| `Collaborative Research in Computational Neuroscience (CRCNS) <http://crcns.org/data-sets>`_ + +* |OK_ICON| `FCP-INDI <http://fcon_1000.projects.nitrc.org/index.html>`_ + +* |OK_ICON| `Human Connectome Project <http://www.humanconnectome.org/data/>`_ + +* |OK_ICON| `NDAR <https://ndar.nih.gov/>`_ + +* |OK_ICON| `NIMH Data Archive <http://data-archive.nimh.nih.gov/>`_ + +* |OK_ICON| `NeuroData <http://neurodata.io>`_ + +* |OK_ICON| `Neuroelectro <http://neuroelectro.org/>`_ + +* |OK_ICON| `OASIS <http://www.oasis-brains.org/>`_ + +* |OK_ICON| `OpenfMRI <https://openfmri.org/>`_ + +* |OK_ICON| `Study Forrest <http://studyforrest.org>`_ + Physics ------- - -* `CERN Open Data Portal <http://opendata.cern.ch/>`_ -* `Crystallography Open Database <http://www.crystallography.net/>`_ -* `NASA Exoplanet Archive <http://exoplanetarchive.ipac.caltech.edu/>`_ -* `NSSDC (NASA) data of 550 space spacecraft <http://nssdc.gsfc.nasa.gov/nssdc/obtaining_data.html>`_ -* `Sloan Digital Sky Survey (SDSS) - Mapping the Universe <http://www.sdss.org/>`_ - - -Psychology/Cognition + +* |OK_ICON| `CERN Open Data Portal <http://opendata.cern.ch/>`_ + +* |OK_ICON| `Crystallography Open Database <http://www.crystallography.net/>`_ + +* |OK_ICON| `IceCube - South Pole Neutrino Observatory <http://icecube.wisc.edu/science/data>`_ + +* |OK_ICON| `NASA Exoplanet Archive <http://exoplanetarchive.ipac.caltech.edu/>`_ + +* |OK_ICON| `NSSDC (NASA) data of 550 space spacecraft <http://nssdc.gsfc.nasa.gov/nssdc/obtaining_data.html>`_ + +* |OK_ICON| `Sloan Digital Sky Survey (SDSS) - Mapping the Universe <http://www.sdss.org/>`_ + +Psychology+Cognition -------------------- - -* `OSU Cognitive Modeling Repository Datasets <http://www.cmr.osu.edu/browse/datasets>`_ - - -Public Domains + +* |FIXME_ICON| `OSU Cognitive Modeling Repository Datasets <http://www.cmr.osu.edu/browse/datasets>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Psychology+Cognition/OSU-Cognitive-Modeling-Repository-Datasets.yml>`_] + +PublicDomains +------------- + +* |OK_ICON| `Amazon <http://aws.amazon.com/datasets/>`_ + +* |OK_ICON| `Archive.org Datasets <https://archive.org/details/datasets>`_ + +* |OK_ICON| `Archive-it from Internet Archive <https://www.archive-it.org/explore?show=Collections>`_ + +* |OK_ICON| `CMU JASA data archive <http://lib.stat.cmu.edu/jasadata/>`_ + +* |OK_ICON| `CMU StatLab collections <http://lib.stat.cmu.edu/datasets/>`_ + +* |OK_ICON| `Data.World <https://data.world>`_ + +* |OK_ICON| `Data360 <http://www.data360.org/index.aspx>`_ + +* |OK_ICON| `Enigma Public <https://public.enigma.com/>`_ + +* |OK_ICON| `Google <http://www.google.com/publicdata/directory>`_ + +* |FIXME_ICON| `Infochimps <http://www.infochimps.com/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//PublicDomains/Infochimps.yml>`_] + +* |OK_ICON| `KDNuggets Data Collections <http://www.kdnuggets.com/datasets/index.html>`_ + +* |FIXME_ICON| `Microsoft Azure Data Market Free DataSets <http://datamarket.azure.com/browse/data?price=free>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//PublicDomains/Microsoft-Azure-Data-Market-Free-DataSets.yml>`_] + +* |OK_ICON| `Microsoft Data Science for Research <http://aka.ms/Data-Science>`_ + +* |FIXME_ICON| `Numbray <http://numbrary.com/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//PublicDomains/Numbray.yml>`_] + +* |OK_ICON| `Open Library Data Dumps <https://openlibrary.org/developers/dumps>`_ + +* |OK_ICON| `Reddit Datasets <https://www.reddit.com/r/datasets>`_ + +* |OK_ICON| `RevolutionAnalytics Collection <http://packages.revolutionanalytics.com/datasets/>`_ + +* |OK_ICON| `Sample R data sets <http://stat.ethz.ch/R-manual/R-patched/library/datasets/html/00Index.html>`_ + +* |OK_ICON| `StatSci.org <http://www.statsci.org/datasets.html>`_ + +* |FIXME_ICON| `Stats4Stem R data sets <http://www.stats4stem.org/data-sets.html>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//PublicDomains/Stats4Stem-R-data-sets.yml>`_] + +* |OK_ICON| `The Washington Post List <http://www.washingtonpost.com/wp-srv/metro/data/datapost.html>`_ + +* |OK_ICON| `UCLA SOCR data collection <http://wiki.stat.ucla.edu/socr/index.php/SOCR_Data>`_ + +* |OK_ICON| `UFO Reports <http://www.nuforc.org/webreports.html>`_ + +* |OK_ICON| `Wikileaks 911 pager intercepts <https://911.wikileaks.org/files/index.html>`_ + +* |FIXME_ICON| `Yahoo Webscope <http://webscope.sandbox.yahoo.com/catalog.php>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//PublicDomains/Yahoo-Webscope.yml>`_] + +SearchEngines +------------- + +* |OK_ICON| `Academic Torrents of data sharing from UMB <http://academictorrents.com/>`_ + +* |OK_ICON| `DataMarket (Qlik) <https://datamarket.com/data/list/?q=all>`_ + +* |OK_ICON| `Datahub.io <https://datahub.io/dataset>`_ + +* |OK_ICON| `Harvard Dataverse Network of scientific data <https://dataverse.harvard.edu/>`_ + +* |OK_ICON| `ICPSR (UMICH) <http://www.icpsr.umich.edu/icpsrweb/ICPSR/index.jsp>`_ + +* |OK_ICON| `Institute of Education Sciences <http://eric.ed.gov>`_ + +* |FIXME_ICON| `National Technical Reports Library <http://www.ntis.gov/products/ntrl/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SearchEngines/National-Technical-Reports-Library.yml>`_] + +* |OK_ICON| `Open Data Certificates (beta) <https://certificates.theodi.org/en/datasets>`_ + +* |OK_ICON| `OpenDataNetwork - A search engine of all Socrata powered data portals <http://www.opendatanetwork.com/>`_ + +* |OK_ICON| `Statista.com - statistics and Studies <http://www.statista.com/>`_ + +* |OK_ICON| `Zenodo - An open dependable home for the long-tail of science <https://zenodo.org/collection/datasets>`_ + +SocialNetworks -------------- - -* `Amazon <http://aws.amazon.com/datasets/>`_ -* `Archive-it from Internet Archive <https://www.archive-it.org/explore?show=Collections>`_ -* `Archive.org Datasets <https://archive.org/details/datasets>`_ -* `CMU JASA data archive <http://lib.stat.cmu.edu/jasadata/>`_ -* `CMU StatLab collections <http://lib.stat.cmu.edu/datasets/>`_ -* `Data360 <http://www.data360.org/index.aspx>`_ -* `Datamob.org <http://datamob.org/datasets>`_ -* `Google <http://www.google.com/publicdata/directory>`_ -* `Infochimps <http://www.infochimps.com/>`_ -* `KDNuggets Data Collections <http://www.kdnuggets.com/datasets/index.html>`_ -* `Microsoft Azure Data Market Free DataSets <http://datamarket.azure.com/browse/data?price=free>`_ -* `Numbray <http://numbrary.com/>`_ -* `Open Library Data Dumps <https://openlibrary.org/developers/dumps>`_ -* `Reddit Datasets <https://www.reddit.com/r/datasets>`_ -* `RevolutionAnalytics Collection <http://packages.revolutionanalytics.com/datasets/>`_ -* `Sample R data sets <http://stat.ethz.ch/R-manual/R-patched/library/datasets/html/00Index.html>`_ -* `Stats4Stem R data sets <http://www.stats4stem.org/data-sets.html>`_ -* `StatSci.org <http://www.statsci.org/datasets.html>`_ -* `The Washington Post List <http://www.washingtonpost.com/wp-srv/metro/data/datapost.html>`_ -* `UCLA SOCR data collection <http://wiki.stat.ucla.edu/socr/index.php/SOCR_Data>`_ -* `UFO Reports <http://www.nuforc.org/webreports.html>`_ -* `Wikileaks 911 pager intercepts <https://911.wikileaks.org/files/index.html>`_ -* `Yahoo Webscope <http://webscope.sandbox.yahoo.com/catalog.php>`_ - - -Search Engines + +* |OK_ICON| `72 hours #gamergate Twitter Scrape <http://waxy.org/random/misc/gamergate_tweets.csv>`_ + +* |OK_ICON| `Ancestry.com Forum Dataset over 10 years <http://www.cs.cmu.edu/~jelsas/data/ancestry.com/>`_ + +* |OK_ICON| `CMU Enron Email of 150 users <http://www.cs.cmu.edu/~enron/>`_ + +* |OK_ICON| `Cheng-Caverlee-Lee September 2009 - January 2010 Twitter Scrape <https://archive.org/details/twitter_cikm_2010>`_ + +* |OK_ICON| `EDRM Enron EMail of 151 users, hosted on S3 <https://aws.amazon.com/datasets/enron-email-data/>`_ + +* |OK_ICON| `Facebook Data Scrape (2005) <https://archive.org/details/oxford-2005-facebook-matrix>`_ + +* |OK_ICON| `Facebook Social Networks from LAW (since 2007) <http://law.di.unimi.it/datasets.php>`_ + +* |OK_ICON| `Foursquare from UMN/Sarwat (2013) <https://archive.org/details/201309_foursquare_dataset_umn>`_ + +* |OK_ICON| `GitHub Collaboration Archive <https://www.githubarchive.org/>`_ + +* |OK_ICON| `Google Scholar citation relations <http://www3.cs.stonybrook.edu/~leman/data/gscholar.db>`_ + +* |OK_ICON| `High-Resolution Contact Networks from Wearable Sensors <http://www.sociopatterns.org/datasets/>`_ + +* |OK_ICON| `Indie Map: social graph and crawl of top IndieWeb sites <http://www.indiemap.org/>`_ + +* |FIXME_ICON| `Mobile Social Networks from UMASS <https://kdl.cs.umass.edu/display/public/Mobile+Social+Networks>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SocialNetworks/Mobile-Social-Networks-from-UMASS.yml>`_] + +* |OK_ICON| `Network Twitter Data <http://snap.stanford.edu/data/higgs-twitter.html>`_ + +* |OK_ICON| `Reddit Comments <https://www.reddit.com/r/datasets/comments/3bxlg7/i_have_every_publicly_available_reddit_comment/>`_ + +* |OK_ICON| `Skytrax' Air Travel Reviews Dataset <https://github.com/quankiquanki/skytrax-reviews-dataset>`_ + +* |OK_ICON| `Social Twitter Data <http://snap.stanford.edu/data/egonets-Twitter.html>`_ + +* |OK_ICON| `SourceForge.net Research Data <http://www3.nd.edu/~oss/Data/data.html>`_ + +* |OK_ICON| `Twitter Data for Online Reputation Management <http://nlp.uned.es/replab2013/>`_ + +* |OK_ICON| `Twitter Data for Sentiment Analysis <http://help.sentiment140.com/for-students/>`_ + +* |OK_ICON| `Twitter Graph of entire Twitter site <http://an.kaist.ac.kr/traces/WWW2010.html>`_ + +* |FIXME_ICON| `Twitter Scrape Calufa May 2011 <http://archive.org/details/2011-05-calufa-twitter-sql>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SocialNetworks/Twitter-Scrape-Calufa-May-2011.yml>`_] + +* |OK_ICON| `UNIMI/LAW Social Network Datasets <http://law.di.unimi.it/datasets.php>`_ + +* |FIXME_ICON| `Yahoo! Graph and Social Data <http://webscope.sandbox.yahoo.com/catalog.php?datatype=g>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SocialNetworks/Yahoo-Graph-and-Social-Data.yml>`_] + +* |OK_ICON| `Youtube Video Social Graph in 2007,2008 <http://netsg.cs.sfu.ca/youtubedata/>`_ + +SocialSciences -------------- - -* `Academic Torrents of data sharing from UMB <http://academictorrents.com/>`_ -* `Datahub.io <https://datahub.io/dataset>`_ -* `DataMarket (Qlik) <https://datamarket.com/data/list/?q=all>`_ -* `Harvard Dataverse Network of scientific data <https://dataverse.harvard.edu/>`_ -* `ICPSR (UMICH) <http://www.icpsr.umich.edu/icpsrweb/ICPSR/index.jsp>`_ -* `Institute of Education Sciences <http://eric.ed.gov>`_ -* `National Technical Reports Library <http://www.ntis.gov/products/ntrl/>`_ -* `Open Data Certificates (beta) <https://certificates.theodi.org/en/datasets>`_ -* `OpenDataNetwork - A search engine of all Socrata powered data portals <http://www.opendatanetwork.com/>`_ -* `Statista.com - statistics and Studies <http://www.statista.com/>`_ -* `Zenodo - An open dependable home for the long-tail of science <https://zenodo.org/collection/datasets>`_ - - -Social Networks ---------------- - -* `72 hours #gamergate Twitter Scrape <http://waxy.org/random/misc/gamergate_tweets.csv>`_ -* `Ancestry.com Forum Dataset over 10 years <http://www.cs.cmu.edu/~jelsas/data/ancestry.com/>`_ -* `Cheng-Caverlee-Lee September 2009 - January 2010 Twitter Scrape <https://archive.org/details/twitter_cikm_2010>`_ -* `CMU Enron Email of 150 users <http://www.cs.cmu.edu/~enron/>`_ -* `EDRM Enron EMail of 151 users, hosted on S3 <https://aws.amazon.com/datasets/enron-email-data/>`_ -* `Facebook Data Scrape (2005) <https://archive.org/details/oxford-2005-facebook-matrix>`_ -* `Facebook Social Networks from LAW (since 2007) <http://law.di.unimi.it/datasets.php>`_ -* `Foursquare from UMN/Sarwat (2013) <https://archive.org/details/201309_foursquare_dataset_umn>`_ -* `GetGlue - users rating TV shows <http://getglue-data.s3.amazonaws.com/getglue_sample.tar.gz>`_ -* `GitHub Collaboration Archive <https://www.githubarchive.org/>`_ -* `Google Scholar citation relations <http://www3.cs.stonybrook.edu/~leman/data/gscholar.db>`_ -* `High-Resolution Contact Networks from Wearable Sensors <http://www.sociopatterns.org/datasets/>`_ -* `Mobile Social Networks from UMASS <https://kdl.cs.umass.edu/display/public/Mobile+Social+Networks>`_ -* `Network Twitter Data <http://snap.stanford.edu/data/higgs-twitter.html>`_ -* `Reddit Comments <https://www.reddit.com/r/datasets/comments/3bxlg7/i_have_every_publicly_available_reddit_comment/>`_ -* `Skytrax' Air Travel Reviews Dataset <https://github.com/quankiquanki/skytrax-reviews-dataset>`_ -* `Social Twitter Data <http://snap.stanford.edu/data/egonets-Twitter.html>`_ -* `SourceForge.net Research Data <http://www3.nd.edu/~oss/Data/data.html>`_ -* `Twitter Data for Sentiment Analysis <http://help.sentiment140.com/for-students/>`_ -* `Twitter Data for Online Reputation Management <http://nlp.uned.es/replab2013/>`_ -* `Twitter Graph of entire Twitter site <http://an.kaist.ac.kr/traces/WWW2010.html>`_ -* `Twitter Scrape Calufa May 2011 <http://archive.org/details/2011-05-calufa-twitter-sql>`_ -* `UNIMI/LAW Social Network Datasets <http://law.di.unimi.it/datasets.php>`_ -* `Yahoo! Graph and Social Data <http://webscope.sandbox.yahoo.com/catalog.php?datatype=g>`_ -* `Youtube Video Social Graph in 2007,2008 <http://netsg.cs.sfu.ca/youtubedata/>`_ - - -Social Sciences ---------------- - -* `ACLED (Armed Conflict Location & Event Data Project) <http://www.acleddata.com/>`_ -* `Canadian Legal Information Institute <https://www.canlii.org/en/index.php>`_ -* `Center for Systemic Peace Datasets - Conflict Trends, Polities, State Fragility, etc <http://www.systemicpeace.org/>`_ -* `Correlates of War Project <http://www.correlatesofwar.org/>`_ -* `Cryptome Conspiracy Theory Items <http://cryptome.org>`_ -* `Datacards <http://datacards.org>`_ -* `European Social Survey <http://www.europeansocialsurvey.org/data/>`_ -* `FBI Hate Crime 2013 - aggregated data <https://github.com/emorisse/FBI-Hate-Crime-Statistics/tree/master/2013>`_ -* `GDELT Global Events Database <http://gdeltproject.org/data.html>`_ -* `General Social Survey (GSS) since 1972 <http://gss.norc.org>`_ -* `German Social Survey <http://www.gesis.org/en/home/>`_ -* `Global Religious Futures Project <http://www.globalreligiousfutures.org/>`_ -* `Humanitarian Data Exchange <https://data.hdx.rwlabs.org/>`_ -* `Institute for Demographic Studies <http://www.ined.fr/en/>`_ -* `International Networks Archive <http://www.princeton.edu/~ina/>`_ -* `International Social Survey Program ISSP <http://www.issp.org>`_ -* `International Studies Compendium Project <http://www.isacompendium.com/public/>`_ -* `James McGuire Cross National Data <http://jmcguire.faculty.wesleyan.edu/welcome/cross-national-data/>`_ -* `MacroData Guide by Norsk samfunnsvitenskapelig datatjeneste <http://nsd.uib.no>`_ -* `MIT Reality Mining Dataset <http://realitycommons.media.mit.edu/realitymining.html>`_ -* `Open Crime and Policing Data in England, Wales and Northern Ireland <https://data.police.uk/data/>`_ -* `Paul Hensel General International Data Page <http://www.paulhensel.org/dataintl.html>`_ -* `PewResearch Internet Survey Project <http://www.pewinternet.org/datasets/pages/2/>`_ -* `PewResearch Society Data Collection <http://www.pewresearch.org/data/download-datasets/>`_ -* `Political Polarity Data <http://www3.cs.stonybrook.edu/~leman/data/14-icwsm-political-polarity-data.zip>`_ -* `StackExchange Data Explorer <http://data.stackexchange.com/help>`_ -* `Terrorism Research and Analysis Consortium <http://www.trackingterrorism.org/>`_ -* `Texas Inmates Executed Since 1984 <http://www.tdcj.state.tx.us/death_row/dr_executed_offenders.html>`_ -* `Titanic Survival Data Set <https://github.com/caesar0301/awesome-public-datasets/tree/master/Datasets>`_ -* `UCB's Archive of Social Science Data (D-Lab) <http://ucdata.berkeley.edu/>`_ -* `UCLA Social Sciences Data Archive <http://dataarchives.ss.ucla.edu/Home.DataPortals.htm>`_ -* `UN Civil Society Database <http://esango.un.org/civilsociety/>`_ -* `Universities Worldwide <http://univ.cc/>`_ -* `UPJOHN for Labor Employment Research <http://www.upjohn.org/services/resources/employment-research-data-center>`_ -* `World Bank Data <http://data.worldbank.org/>`_ -* `WorldPop project - Worldwide human population distributions <http://www.worldpop.org.uk/data/get_data/>`_ - - + +* |OK_ICON| `ACLED (Armed Conflict Location & Event Data Project) <http://www.acleddata.com/>`_ + +* |FIXME_ICON| `Canadian Legal Information Institute <https://www.canlii.org/en/index.php>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SocialSciences/Canadian-Legal-Information-Institute.yml>`_] + +* |OK_ICON| `Center for Systemic Peace Datasets - Conflict Trends, Polities, State Fragility, etc <http://www.systemicpeace.org/>`_ + +* |OK_ICON| `Correlates of War Project <http://www.correlatesofwar.org/>`_ + +* |OK_ICON| `Cryptome Conspiracy Theory Items <http://cryptome.org>`_ + +* |FIXME_ICON| `Datacards <http://datacards.org>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SocialSciences/Datacards.yml>`_] + +* |OK_ICON| `European Social Survey <http://www.europeansocialsurvey.org/data/>`_ + +* |OK_ICON| `FBI Hate Crime 2013 - aggregated data <https://github.com/emorisse/FBI-Hate-Crime-Statistics/tree/master/2013>`_ + +* |FIXME_ICON| `Fragile States Index <http://fsi.fundforpeace.org/data>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SocialSciences/Fragile-States-Index.yml>`_] + +* |OK_ICON| `GDELT Global Events Database <http://gdeltproject.org/data.html>`_ + +* |OK_ICON| `General Social Survey (GSS) since 1972 <http://gss.norc.org>`_ + +* |OK_ICON| `German Social Survey <http://www.gesis.org/en/home/>`_ + +* |OK_ICON| `Global Religious Futures Project <http://www.globalreligiousfutures.org/>`_ + +* |OK_ICON| `Gun Violence Data - A comprehensive, accessible database that contains [...] <https://github.com/jamesqo/gun-violence-data>`_ + +* |FIXME_ICON| `Humanitarian Data Exchange <https://data.hdx.rwlabs.org/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SocialSciences/Humanitarian-Data-Exchange.yml>`_] + +* |OK_ICON| `INFORM Index for Risk Management <http://www.inform-index.org/Results/Global>`_ + +* |OK_ICON| `Institute for Demographic Studies <http://www.ined.fr/en/>`_ + +* |OK_ICON| `International Networks Archive <http://www.princeton.edu/~ina/>`_ + +* |OK_ICON| `International Social Survey Program ISSP <http://www.issp.org>`_ + +* |OK_ICON| `International Studies Compendium Project <http://www.isacompendium.com/public/>`_ + +* |OK_ICON| `James McGuire Cross National Data <http://jmcguire.faculty.wesleyan.edu/welcome/cross-national-data/>`_ + +* |OK_ICON| `MIT Reality Mining Dataset <http://realitycommons.media.mit.edu/realitymining.html>`_ + +* |OK_ICON| `MacroData Guide by Norsk samfunnsvitenskapelig datatjeneste <http://nsd.uib.no>`_ + +* |OK_ICON| `Minnesota Population Center <https://www.ipums.org/>`_ + +* |OK_ICON| `Notre Dame Global Adaptation Index (NG-DAIN) <http://index.gain.org/about/download>`_ + +* |OK_ICON| `Open Crime and Policing Data in England, Wales and Northern Ireland <https://data.police.uk/data/>`_ + +* |OK_ICON| `OpenSanctions - A global database of persons and companies of political, [...] <http://www.opensanctions.org/#downloads>`_ + +* |OK_ICON| `Paul Hensel General International Data Page <http://www.paulhensel.org/dataintl.html>`_ + +* |FIXME_ICON| `PewResearch Internet Survey Project <http://www.pewinternet.org/datasets/pages/2/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SocialSciences/PewResearch-Internet-Survey-Project.yml>`_] + +* |OK_ICON| `PewResearch Society Data Collection <http://www.pewresearch.org/data/download-datasets/>`_ + +* |OK_ICON| `Political Polarity Data <http://www3.cs.stonybrook.edu/~leman/data/14-icwsm-political-polarity-data.zip>`_ + +* |OK_ICON| `StackExchange Data Explorer <http://data.stackexchange.com/help>`_ + +* |OK_ICON| `Terrorism Research and Analysis Consortium <http://www.trackingterrorism.org/>`_ + +* |OK_ICON| `Texas Inmates Executed Since 1984 <http://www.tdcj.state.tx.us/death_row/dr_executed_offenders.html>`_ + +* |OK_ICON| `Titanic Survival Data Set <https://github.com/awesomedata/awesome-public-datasets/tree/master/Datasets>`_ + +* |OK_ICON| `UCB's Archive of Social Science Data (D-Lab) <http://ucdata.berkeley.edu/>`_ + +* |FIXME_ICON| `UCLA Social Sciences Data Archive <http://dataarchives.ss.ucla.edu/Home.DataPortals.htm>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//SocialSciences/UCLA-Social-Sciences-Data-Archive.yml>`_] + +* |OK_ICON| `UN Civil Society Database <http://esango.un.org/civilsociety/>`_ + +* |OK_ICON| `UPJOHN for Labor Employment Research <http://www.upjohn.org/services/resources/employment-research-data-center>`_ + +* |OK_ICON| `Universities Worldwide <http://univ.cc/>`_ + +* |OK_ICON| `Uppsala Conflict Data Program <http://ucdp.uu.se/>`_ + +* |OK_ICON| `World Bank Open Data <http://data.worldbank.org/>`_ + +* |OK_ICON| `WorldPop project - Worldwide human population distributions <http://www.worldpop.org.uk/data/get_data/>`_ + Software -------- - -* `FLOSSmole data about free, libre, and open source software development <http://flossdata.syr.edu/data/>`_ - + +* |OK_ICON| `FLOSSmole data about free, libre, and open source software development <http://flossdata.syr.edu/data/>`_ + +* |OK_ICON| `Libraries.io Open Source Repository and Dependency Metadata <https://doi.org/10.5281/zenodo.1068916>`_ + Sports ------ - -* `Basketball (NBA/NCAA/Euro) Player Database and Statistics <http://www.draftexpress.com/stats.php>`_ -* `Betfair Historical Exchange Data <http://data.betfair.com/>`_ -* `Cricsheet Matches (cricket) <http://cricsheet.org/>`_ -* `Ergast Formula 1, from 1950 up to date (API) <http://ergast.com/mrd/db>`_ -* `Football/Soccer resources (data and APIs) <http://www.jokecamp.com/blog/guide-to-football-and-soccer-data-and-apis/>`_ -* `Lahman's Baseball Database <http://www.seanlahman.com/baseball-archive/statistics/>`_ -* `Pinhooker: Thoroughbred Bloodstock Sale Data <https://github.com/phillc73/pinhooker>`_ -* `Retrosheet Baseball Statistics <http://www.retrosheet.org/game.htm>`_ - - -Time Series ------------ - -* `Databanks International Cross National Time Series Data Archive <http://www.cntsdata.com>`_ -* `Hard Drive Failure Rates <https://www.backblaze.com/hard-drive-test-data.html>`_ -* `Heart Rate Time Series from MIT <http://ecg.mit.edu/time-series/>`_ -* `Time Series Data Library (TSDL) from MU <https://datamarket.com/data/list/?q=provider:tsdl>`_ -* `UC Riverside Time Series Dataset <http://www.cs.ucr.edu/~eamonn/time_series_data/>`_ - - + +* |OK_ICON| `Betfair Historical Exchange Data <http://data.betfair.com/>`_ + +* |OK_ICON| `Cricsheet Matches (cricket) <http://cricsheet.org/>`_ + +* |OK_ICON| `Ergast Formula 1, from 1950 up to date (API) <http://ergast.com/mrd/db>`_ + +* |OK_ICON| `Football/Soccer resources (data and APIs) <http://www.jokecamp.com/blog/guide-to-football-and-soccer-data-and-apis/>`_ + +* |FIXME_ICON| `Lahman's Baseball Database <http://www.seanlahman.com/baseball-archive/statistics/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Sports/Lahmans-Baseball-Database.yml>`_] + +* |OK_ICON| `Pinhooker: Thoroughbred Bloodstock Sale Data <https://github.com/phillc73/pinhooker>`_ + +* |OK_ICON| `Retrosheet Baseball Statistics <http://www.retrosheet.org/game.htm>`_ + +* |OK_ICON| `Tennis database of rankings, results, and stats for ATP <https://github.com/JeffSackmann/tennis_atp>`_ + +* |OK_ICON| `Tennis database of rankings, results, and stats for WTA <https://github.com/JeffSackmann/tennis_wta>`_ + +TimeSeries +---------- + +* |OK_ICON| `Databanks International Cross National Time Series Data Archive <http://www.cntsdata.com>`_ + +* |OK_ICON| `Hard Drive Failure Rates <https://www.backblaze.com/hard-drive-test-data.html>`_ + +* |OK_ICON| `Heart Rate Time Series from MIT <http://ecg.mit.edu/time-series/>`_ + +* |OK_ICON| `Time Series Data Library (TSDL) from MU <https://datamarket.com/data/list/?q=provider:tsdl>`_ + +* |OK_ICON| `UC Riverside Time Series Dataset <http://www.cs.ucr.edu/~eamonn/time_series_data/>`_ + Transportation -------------- - -* `Airlines OD Data 1987-2008 <http://stat-computing.org/dataexpo/2009/the-data.html>`_ -* `Bay Area Bike Share Data <http://www.bayareabikeshare.com/open-data>`_ -* `Bike Share Systems (BSS) collection <https://github.com/BetaNYC/Bike-Share-Data-Best-Practices/wiki/Bike-Share-Data-Systems>`_ -* `GeoLife GPS Trajectory from Microsoft Research <http://research.microsoft.com/en-us/downloads/b16d359d-d164-469e-9fd4-daa38f2b2e13/>`_ -* `German train system by Deutsche Bahn <http://data.deutschebahn.com/datasets/>`_ -* `Hubway Million Rides in MA <http://hubwaydatachallenge.org/trip-history-data/>`_ -* `Marine Traffic - ship tracks, port calls and more <http://www.marinetraffic.com/de/ais-api-services>`_ -* `Montreal BIXI Bike Share <https://montreal.bixi.com/donn%C3%A9es-libre-service>`_ -* `NYC Taxi Trip Data 2009- <http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml>`_ -* `NYC Taxi Trip Data 2013 (FOIA/FOILed) <https://archive.org/details/nycTaxiTripData2013>`_ -* `NYC Uber trip data April 2014 to September 2014 <https://github.com/fivethirtyeight/uber-tlc-foil-response>`_ -* `Open Traffic collection <https://github.com/graphhopper/open-traffic-collection>`_ -* `OpenFlights - airport, airline and route data <http://openflights.org/data.html>`_ -* `Philadelphia Bike Share Stations (JSON) <https://www.rideindego.com/stations/json/>`_ -* `Plane Crash Database, since 1920 <http://www.planecrashinfo.com/database.htm>`_ -* `RITA Airline On-Time Performance data <http://www.transtats.bts.gov/Tables.asp?DB_ID=120>`_ -* `RITA/BTS transport data collection (TranStat) <http://www.transtats.bts.gov/DataIndex.asp>`_ -* `Toronto Bike Share Stations (XML file) <http://www.bikesharetoronto.com/data/stations/bikeStations.xml>`_ -* `Transport for London (TFL) <https://tfl.gov.uk/info-for/open-data-users/our-feeds>`_ -* `Travel Tracker Survey (TTS) for Chicago <http://www.cmap.illinois.gov/data/transportation/travel-tracker-survey>`_ -* `U.S. Bureau of Transportation Statistics (BTS) <http://www.rita.dot.gov/bts/>`_ -* `U.S. Domestic Flights 1990 to 2009 <http://academictorrents.com/details/a2ccf94bbb4af222bf8e69dad60a68a29f310d9a>`_ -* `U.S. Freight Analysis Framework since 2007 <http://ops.fhwa.dot.gov/freight/freight_analysis/faf/index.htm>`_ + +* |OK_ICON| `Airlines OD Data 1987-2008 <http://stat-computing.org/dataexpo/2009/the-data.html>`_ + +* |OK_ICON| `Bay Area Bike Share Data <http://www.bayareabikeshare.com/open-data>`_ + +* |OK_ICON| `Bike Share Systems (BSS) collection <https://github.com/BetaNYC/Bike-Share-Data-Best-Practices/wiki/Bike-Share-Data-Systems>`_ + +* |OK_ICON| `GeoLife GPS Trajectory from Microsoft Research <http://research.microsoft.com/en-us/downloads/b16d359d-d164-469e-9fd4-daa38f2b2e13/>`_ + +* |OK_ICON| `German train system by Deutsche Bahn <http://data.deutschebahn.com/datasets/>`_ + +* |OK_ICON| `Hubway Million Rides in MA <http://hubwaydatachallenge.org/trip-history-data/>`_ + +* |OK_ICON| `Montreal BIXI Bike Share <https://montreal.bixi.com/en/open-data>`_ + +* |OK_ICON| `NYC Taxi Trip Data 2009- <http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml>`_ + +* |OK_ICON| `NYC Taxi Trip Data 2013 (FOIA/FOILed) <https://archive.org/details/nycTaxiTripData2013>`_ + +* |OK_ICON| `NYC Uber trip data April 2014 to September 2014 <https://github.com/fivethirtyeight/uber-tlc-foil-response>`_ + +* |OK_ICON| `Open Traffic collection <https://github.com/graphhopper/open-traffic-collection>`_ + +* |OK_ICON| `OpenFlights - airport, airline and route data <http://openflights.org/data.html>`_ + +* |FIXME_ICON| `Philadelphia Bike Share Stations (JSON) <https://www.rideindego.com/stations/json/>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Transportation/Philadelphia-Bike-Share-Stations-JSON.yml>`_] + +* |OK_ICON| `Plane Crash Database, since 1920 <http://www.planecrashinfo.com/database.htm>`_ + +* |OK_ICON| `RITA Airline On-Time Performance data <http://www.transtats.bts.gov/Tables.asp?DB_ID=120>`_ + +* |OK_ICON| `RITA/BTS transport data collection (TranStat) <http://www.transtats.bts.gov/DataIndex.asp>`_ + +* |FIXME_ICON| `Toronto Bike Share Stations (XML file) <http://www.bikesharetoronto.com/data/stations/bikeStations.xml>`_ [`fixme <https://github.com/awesomedata/apd-core/tree/master/core//Transportation/Toronto-Bike-Share-Stations-XML-file.yml>`_] + +* |OK_ICON| `Transport for London (TFL) <https://tfl.gov.uk/info-for/open-data-users/our-open-data>`_ + +* |OK_ICON| `Travel Tracker Survey (TTS) for Chicago <http://www.cmap.illinois.gov/data/transportation/travel-tracker-survey>`_ + +* |OK_ICON| `U.S. Bureau of Transportation Statistics (BTS) <http://www.rita.dot.gov/bts/>`_ + +* |OK_ICON| `U.S. Domestic Flights 1990 to 2009 <http://academictorrents.com/details/a2ccf94bbb4af222bf8e69dad60a68a29f310d9a>`_ + +* |OK_ICON| `U.S. Freight Analysis Framework since 2007 <http://ops.fhwa.dot.gov/freight/freight_analysis/faf/index.htm>`_ Complementary Collections ------------------------- * `Data Packaged Core Datasets <https://github.com/datasets/>`_ + * `Database of Scientific Code Contributions <https://mozillascience.org/collaborate>`_ + +* A growing collection of public datasets: `CoolDatasets. <http://cooldatasets.com/>`_ + * DataWrangling: `Some Datasets Available on the Web <http://www.datawrangling.com/some-datasets-available-on-the-web>`_ + * Inside-r: `Finding Data on the Internet <http://www.inside-r.org/howto/finding-data-internet>`_ + * OpenDataMonitor: `An overview of available open data resources in Europe <http://opendatamonitor.eu>`_ + * Quora: `Where can I find large datasets open to the public? <http://www.quora.com/Where-can-I-find-large-datasets-open-to-the-public>`_ + * RS.io: `100+ Interesting Data Sets for Statistics <http://rs.io/100-interesting-data-sets-for-statistics/>`_ + * StaTrek: `Leveraging open data to understand urban lives <http://xiaming.me/posts/2014/10/23/leveraging-open-data-to-understand-urban-lives/>`_