Skip to content

Commit

Permalink
add the data prep by Greg
Browse files Browse the repository at this point in the history
  • Loading branch information
brunj7 committed Mar 4, 2024
1 parent b14790a commit 14648b8
Show file tree
Hide file tree
Showing 11 changed files with 3,842 additions and 0 deletions.
979 changes: 979 additions & 0 deletions data/01_ASDN_Readme.txt

Large diffs are not rendered by default.

407 changes: 407 additions & 0 deletions data/ASDN_Bird_eggs.csv

Large diffs are not rendered by default.

1,548 changes: 1,548 additions & 0 deletions data/ASDN_Bird_nests.csv

Large diffs are not rendered by default.

442 changes: 442 additions & 0 deletions data/ASDN_Camp_assignment.csv

Large diffs are not rendered by default.

Binary file added data/asdn-er-diagram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/bird_database.duckdb
Binary file not shown.
3 changes: 3 additions & 0 deletions data/build-database
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
rm -f bird_database.duckdb
duckdb bird_database.duckdb < schema-build-script.sql
270 changes: 270 additions & 0 deletions data/personnel.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
Abbreviation,Name
agottesman,Aaron Gottesman
asterling,Abby Sterling
abradley,Adam Bradley
akneidel,Alan Kneidel
akataluk-primeau,Alannah Kataluk-Primeau
aissisgaitok,Albert Issisgaitok
amatsyna,Alexader Matsyna
asoto,Alfredo Soto
abuckley,Alvan Buckley
ataqtaq,Amanda Taqtaq
aguerra,Ana Guerra
amines,Anaka Mines
apopovkina,Anastasia Popovkina
abankert,Andrew Bankert
adoll,Andrew Doll
aperry,Andrew Perry
arbankert,Andrew R. Bankert
asjohnson,Andrew S. Johnson
abaltensperger,Andy Baltensperger
ajohnson,Andy Johnson
asmith,Angus Smith
anja,Anja
acorkery,Anne Corkery
amould,Anthony Mould
ancook,Ashley Nicole Cook
atufts,Ashley Tufts
amylnikova,Asya Mylnikova
blalibert,Benoît Laliberté
bkaselow,Billy Kaselow
bwinn,Brad Winn
bmwalker,Bradley M. Walker
bwilkinson,Bradley Wilkinson
bverheijen,Bram Verheijen
bhiggins,Brendan Higgins
bksandercock,Brett K. Sandercock
bharrington,Brian Harrington
brobinson,Brian Robinson
bdalziel,Bronwyn Dalziel
bhill,Brooke Hill
cdavis,Caitlin Davis
cbishop,Caitlyn Bishop
cmorcos,Camila Morcos
clahoda,Candace Lahoda
cjrguerra,Carlos Jose Ruiz Guerra
clishman,Carmen Lishman
codwyer,Carol O'Dwyer
cdoucet,Catherine Doucet
cpham,Catherine Pham
cwoodworth,Chelsea Woodworth
cgratto-trevor,Cheri Gratto-Trevor
chojnowski,Cheryl Hojnowski
csmith,Chris Smith
cbrunner,Christina Brunner
canderson,Christine Anderson
chelms,Clinton Helms
cmurchison,Colleen Murchison
dgough,Danielle Gough
dedwards,Darryl Edwards
dmcgeachy,Dave Mcgeachy
dsaalfeld,Dave Saalfeld
dattagutaluk,David Attagutaluk
dblank,David B. Lank
dhodkinson,David Hodkinson
dpavlik,David Pavlik
dward,David Ward
dturner,Devin Turner
dsolovyeva,Diana Solovyeva
dborden,Diane Borden
dfillman,Don Fillman
djleandry,Don-Jean Léandry
dmcrae,Doug McRae
dkessler,Dylan Kessler
ecorp,Edward Corp
epierce,Elin Pierce
epayne,Elizabeth Payne
epero,Ellen Pero
egirard,Eloise Girard
echalifour,Émilie Chalifour
edastrous,Emilie D'Astrous
eweiser,Emily Weiser
edavis,Emma Davis
ereed,Eric Reed
estroud,Eric Stroud
eescajeda,Erica Escajeda
enol,Erica Nol
ekwon,Eunbi Kwon
fylin,Fang-Yee Lin
fsgagnon,Fanny Senez Gagnon
fsanders,Felicia Sanders
fsmith,Fletcher Smith
frousseaux,Francois Rousseaux
fstetler,Frank Stetler
gmccune,Gennyne Mccune
gpavlyukov,Georgiy Pavlyukov
gracey,Gerry Racey
gholmes,Gillian Holmes
gdanilov,Gleb Danilov
gsedash,Gleb Sedash
ggilchrist,Grant Gilchrist
gsolovyev,Grigory Solovyev
gszor,Guillaume Szor
hspecht,Hannah Specht
hcraig,Heather Craig
hhelling,Heidi Helling
hbatcheller,Hope Batcheller
idavies,Ian Davies
ijong,Ian Jong
jeschenroeder,Jackman Eschenroeder
jkorol,Jaimlyn Korol
jcaville,James Caville
jhughey,Jared Hughey
jmarin,Javier Marin
jiron,Jean Iron
jflamarre,Jean-François Lamarre
jrjulian,Jean-Remi Julian
jzamuido,Jeisson Zamuido
jcoughlan,Jennifer Coughlan
jgale,Jennifer Gale
jkardiak,Jennifer Kardiak
jcunningham,Jenny Cunningham
jhixon,Jerrid Hixon
jhupp,Jerry Hupp
jrobinson,Jill Robinson
jsauer,Jim Sauer
jpanipak,Joanna Panipak
jliebezeit,Joe Liebezeit
jbety,Joël Bêty
jperz,Johanna Perz
jbrunjes,John Brunjes
jdiener,John Diener
jlautenbach,Jonathan Lautenbach
jotis,José Otis
jbelliveau,Julie Belliveau
jdanby,Julie Danby
jkelso,Julie Kelso
jwebber,Julie Webber
jloshchagina,Julya Loshchagina
kaward,Kara-Anne Ward
kreischke,Karissa Reischke
kmatsyna,Katya Matsyna
knuyaviak,Kayla Nuyaviak
ksheimreif,Kayla Sheimreif
kabraham,Ken Abraham
kkalasz,Kevin Kalasz
kkardynal,Kevin Kardynal
kpietrzak,Kevin Pietrzak
kbennett,Kim Bennett
kjones,Kim Jones
kgrond,Kirsten Grond
kbeckmann,Kristina Beckmann
kbertrand,Kyle Bertrand
khogrefe,Kyle Hogrefe
lkoloski,Laura Koloski
lmckinnon,Laura McKinnon
lpaquette,Laurence Paquette
lrenzel,Leah Renzel
lmercier,Léonie Mercier
lyen,Leslie Yen
lworing,Lewis W. Oring
lbachellier,Lisa Bachellier
lkennedy,Lisa Kennedy
lpirie,Lisa Pirie
lpollock,Lisa Pollock
lgoodrick,Lizzie Goodrick
ljolicoeur,Ludovic Jolicoeur
lpadegimas,Lukas Padegimas
lburlingame,Luke Burlingame
mvheyden,Madeline Vander Heyden
mmcconnell,Madison McConnell
myyang,Man-Yu Yang (Mona)
magiroux,Marie-Andrée Giroux
mcfrenette,Marie-Christine Frenette
mtrudel,Marion Trudel
mdodds,Mark Dodds
mgibson,Mark Gibson
mlafaver,Mark Lafaver
mpeck,Mark Peck
mpatenaude-monette,Martin Patenaude-Monette
mheung,Mary Heung
mjones,Mary Jones
mbirarda,Matt Birarda
mmichaud,Matt Michaud
msmith,Matt Smith
mmudge,Mckenzie Mudge
mmccloskey,Meagan Mccloskey
mboldenow,Megan Boldenow
mchown,Melanie Chown
mvezina,Melanie Vezina
mwilson,Melanie Wilson
mmcgarvey,Metta Mcgarvey
mbwunder,Michael B. Wunder
mevers,Michael Evers
mharing,Michaela Haring
mballvanzee,Michelangelo Ballvanzee
mallen,Mike Allen
mburrell,Mike Burrell
mqrunnut,Mike Qrunnut
mettuvgii,Mikhail Ettuvgii
mtrottier-paquet,Myriam Trottier-Paquet
nmveld,Naomi Manin'T Veld
nhentze,Nathan Hentze
nkirby,Nathan Kirby
nrsenner,Nathan R. Senner
nlecomte,Nicolas Lecomte
ncook,Nicole Cook
nmacdonald,Nicole MacDonald
nboulanger-lapointe,Noémie Boulanger-Lapointe
ojames,Oliver James
ohicks,Olivia Hicks
oguy,Other Guy
proyer-boutin,Pascal Royer-Boutin
pherzog,Patrick Herzog
psmith,Paul Smith
pwoodard,Paul Woodard
pchilton,Penelope Chilton
pbertrand,Philippe Bertrand
rhildebrandt,Reid Hildebrandt
rlanctot,Richard Lanctot
rgates,River Gates
rnakoochee,Roberta Nakoochee
rhunnewell,Robin Hunnewell
radugan,Ronan A. Dugan
rharned,Ronnie Harned
rchurchwell,Roy Churchwell
rhammond,Ruby Hammond
rburner,Ryan Burner
sfranks,Samantha Franks
sjeswiet,Sarah Jeswiet
sneima,Sarah Neima
ssaalfeld,Sarah Saalfeld
sflemming,Scott Flemming
sfreeman,Scott Freeman
svartanyan,Sergey Vartanyan
scarvey,Shannon Carvey
smbillerman,Shawn M. Billerman
sskinner,Shelby Skinner
sschulte,Shiloh Schulte
sbuckell,Simon Buckell
ssapora,Slade Sapora
sbrown,Stephen Brown
sparmiter,Stephen Parmiter
sbennett,Steve Bennett
shart,Steve Hart
skendall,Steve Kendall
skolbe,Steve Kolbe
svissault,Steve Vissault
ssheedy,Sydney Sheedy
tdoyle,Terry Doyle
tfarrugia,Thomas Farrugia
triecke,Thomas Riecke
tmankis,Tobias Mankis
tstclair,Toby St Clair
tesson,Tommy Esson
tkydd,Tyler Kydd
tdonnelly,Tyrone Donnelly
vamarualik,Valerie Amarualik
vloverti,Vanessa Loverti
vbaranyuk,Vasily Baranyuk
vkokhanova,Vera Kokhanova
vputinski,Victoria Putinski
vjirinec,Vitek Jirinec
wkennerley,Will Kennerley
wenglish,Willow English
yhhsu,Yu-Hsun Hsu (Echo)
zharris,Zack Harris
zlebrun-southcott,Zoe Lebrun-Southcott
cmontgomerie,Claire Montgomerie
emagnuson,Emily Magnuson
mcorrell,Maureen Correll
76 changes: 76 additions & 0 deletions data/schema-build-script.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
.nullvalue -NULL-

CREATE TABLE Species (
Code VARCHAR PRIMARY KEY,
Common_name VARCHAR UNIQUE NOT NULL,
Scientific_name VARCHAR, -- can't make NOT NULL, missing data in some rows
Relevance VARCHAR
);
COPY Species FROM 'species.csv' (header TRUE);

CREATE TABLE Site (
Code VARCHAR PRIMARY KEY,
Site_name VARCHAR UNIQUE NOT NULL,
Location VARCHAR NOT NULL,
Latitude FLOAT NOT NULL CHECK (Latitude BETWEEN -90 AND 90),
Longitude FLOAT NOT NULL CHECK (Longitude BETWEEN -180 AND 180),
Area FLOAT NOT NULL CHECK (Area > 0),
UNIQUE (Latitude, Longitude)
);
COPY Site FROM 'site.csv' (header TRUE);

CREATE TABLE Personnel (
Abbreviation VARCHAR PRIMARY KEY,
Name VARCHAR UNIQUE NOT NULL
);
COPY Personnel FROM 'personnel.csv' (header TRUE);

CREATE TABLE Camp_assignment (
Year INTEGER NOT NULL CHECK (Year BETWEEN 1950 AND 2015),
Site VARCHAR NOT NULL,
Observer VARCHAR NOT NULL,
Start DATE,
"End" DATE,
FOREIGN KEY (Site) REFERENCES Site (Code),
FOREIGN KEY (Observer) REFERENCES Personnel (Abbreviation),
CHECK (Start <= "End"),
CHECK (Start BETWEEN (Year||'-01-01')::DATE AND (Year||'-12-31')::DATE),
CHECK ("End" BETWEEN (Year||'-01-01')::DATE AND (Year||'-12-31')::DATE)
);
COPY Camp_assignment FROM 'ASDN_Camp_assignment.csv' (header TRUE);

CREATE TABLE Bird_nests (
Book_page VARCHAR,
Year INTEGER NOT NULL CHECK (Year BETWEEN 1950 AND 2015),
Site VARCHAR NOT NULL,
Nest_ID VARCHAR PRIMARY KEY,
Species VARCHAR NOT NULL,
Observer VARCHAR,
Date_found DATE NOT NULL
CHECK (
Date_found BETWEEN (Year||'-01-01')::DATE
AND (Year||'-12-31')::DATE
),
how_found VARCHAR CHECK (how_found IN ('searcher', 'rope', 'bander')),
Clutch_max INTEGER CHECK (Clutch_max BETWEEN 0 AND 20),
floatAge FLOAT CHECK (floatAge BETWEEN 0 AND 30),
ageMethod VARCHAR CHECK (ageMethod IN ('float', 'lay', 'hatch')),
FOREIGN KEY (Site) REFERENCES Site (Code),
FOREIGN KEY (Species) REFERENCES Species (Code),
FOREIGN KEY (Observer) REFERENCES Personnel (Abbreviation)
);
COPY Bird_nests FROM 'ASDN_Bird_nests.csv' (header TRUE);

CREATE TABLE Bird_eggs (
Book_page VARCHAR,
Year INTEGER NOT NULL CHECK (Year BETWEEN 1950 AND 2015),
Site VARCHAR NOT NULL,
Nest_ID VARCHAR NOT NULL,
Egg_num INTEGER NOT NULL CHECK (Egg_num BETWEEN 1 AND 20),
Length FLOAT NOT NULL CHECK (Length > 0 AND Length < 100),
Width FLOAT NOT NULL CHECK (Width > 0 AND Width < 100),
PRIMARY KEY (Nest_ID, Egg_num),
FOREIGN KEY (Site) REFERENCES Site (Code),
FOREIGN KEY (Nest_ID) REFERENCES Bird_nests (Nest_ID)
);
COPY Bird_eggs FROM 'ASDN_Bird_eggs.csv' (header TRUE);
17 changes: 17 additions & 0 deletions data/site.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Code,Site name,Location,Latitude,Longitude,Total_Study_Plot_Area_(ha)
barr,Barrow,"Alaska, USA",71.3,-156.6,220.4
burn,Burntpoint Creek,"Ontario, Canada",55.2,-84.3,63.0
bylo,Bylot Island,"Nunavut, Canada",73.2,-80.0,723.6
cakr,Cape Krusenstern,"Alaska, USA",67.1,-163.5,54.1
cari,Canning River Delta,"Alaska, USA",70.1,-145.8,722.0
chau,Chaun River Delta,"Chukotka, Russia",68.8,170.6,248.2
chur,Churchill,"Manitoba, Canada",58.7,-93.8,866.9
coat,Coats Island,"Nunavut, Canada",62.9,-82.5,1239.1
colv,Colville River Delta,"Alaska, USA",70.4,-150.7,324.8
eaba,East Bay,"Nunavut, Canada",64.0,-81.7,1205.5
iglo,Igloolik,"Nunavut, Canada",69.4,-81.6,59.8
ikpi,Ikpikpuk,"Alaska, USA",70.6,-154.7,174.1
lkri,Lower Khatanga River,"Krasnoyarsk, Russia",72.9,106.1,270.9
made,Mackenzie River Delta,"Northwest Territories, Canada",69.4,-135.0,667.3
nome,Nome,"Alaska, USA",64.4,-164.9,90.1
prba,Prudhoe Bay,"Alaska, USA",70.3,-148.6,120.0
Loading

0 comments on commit 14648b8

Please sign in to comment.