-
Notifications
You must be signed in to change notification settings - Fork 0
/
DB-create.sql
2120 lines (1814 loc) · 65.7 KB
/
DB-create.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
file: DB-setup.sql
authors: Daniel Mangin
Mandy Chan
Andrew Voorhees
Start Date: 6/26/2015
End Usage:
Description: Used to create all of the tables for Digital Democracy
note: this will only work on the currently used database
/*****************************************************************************/
/*
Represents a state. e.g. California, Arizona
Sources: None (Done manually)
*/
CREATE TABLE IF NOT EXISTS State (
abbrev VARCHAR(2), -- eg CA, AZ
country VARCHAR(200), -- eg United States
name VARCHAR(200), -- eg Caliornia, Arizona
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
The start and end year for a specific legislative session. E.g. most recent CA session was
2015-2016.
Sources: None (Done manually)
*/
CREATE TABLE IF NOT EXISTS Session (
state VARCHAR(2),
start_year YEAR,
end_year YEAR,
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY (state, start_year, end_year),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Legislators are elected to represent specific districts.
Sources: CA: refactored_Get_Districts.py
NY: ny_import_districts.py
*/
CREATE TABLE IF NOT EXISTS District (
state VARCHAR(2),
house VARCHAR(100),
did INTEGER,
note VARCHAR(40) DEFAULT '',
year INTEGER,
region TEXT,
geoData MEDIUMTEXT,
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY(state, house, did, year),
FOREIGN KEY (state) REFERENCES State(abbrev),
FOREIGN KEY (house, state) REFERENCES House(name, state)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* One to one relationship with District. Contains extra district information. I just
did not want to confuse this with geographic info, but could totally be one table.
Sources: One time add
*/
CREATE TABLE DistrictInfo (
district INT,
house VARCHAR(200),
republicans INT,
democrats INT,
year INT,
state VARCHAR(2),
PRIMARY KEY (district, house, state, year)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
A house in a legislature. Necessary because different states can have
different names for their houses.
Sources: None (Done manually)
*/
CREATE TABLE IF NOT EXISTS House (
name VARCHAR(100), -- Name for the house. eg Assembly, Senate
state VARCHAR(2),
type VARCHAR(100),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (name, state),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Entity::Person
Describes any person. Can be a Legislator, GeneralPublic, Lobbyist, etc.
Sources: CA: Transcription Tool,
refactored_legislator_migrate.py, refactored_Contributions.py,
refactored_Get_Committees_Web.py, refactored_Cal-Access-Accessor.py
NY: Transcription Tool,
ny_import_legislators.py, ny_import_lobbyists.py
*/
CREATE TABLE IF NOT EXISTS Person (
pid INTEGER AUTO_INCREMENT, -- Person id
last VARCHAR(50) NOT NULL, -- last name
middle VARCHAR(50), -- middle name
first VARCHAR(50) NOT NULL, -- first name
source VARCHAR(255),
image VARCHAR(256), -- path to image (if exists)
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_changed INT DEFAULT NOW() ON UPDATE NOW(),
dr_changed_ts INT AS (UNIX_TIMESTAMP(dr_changed)),
PRIMARY KEY (pid)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Tracks person ids given to us by open states. Used exclusively by importation scripts at
the moment.
Sources: FL: fl_import_legislators.py
CA: No Data
NY: No Data
*/
CREATE TABLE IF NOT EXISTS AlternateId (
pid INTEGER, -- Person pid
alt_id VARCHAR(15), -- eg "CAL000001" for OpenStates
source VARCHAR(200), -- eg "OpenStates"
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY (alt_id),
FOREIGN KEY (pid) REFERENCES Person(pid)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Used to capture the affiliation between a person and a state.
Sources: CA: Transcription Tool,
refactored_legislator_migrate.py, refactored_Contributions.py,
refactored_Get_Committees_Web.py, refactored_Cal-Access-Accessor.py
NY: Transcription Tool,
ny_import_legislators.py, ny_import_lobbyists.py
*/
CREATE TABLE IF NOT EXISTS PersonStateAffiliation (
pid INTEGER,
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY (pid, state),
FOREIGN KEY (pid) REFERENCES Person(pid),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Entity::Organizations
Organizations are companies or organizations.
Sources: CA: Transcription Tool,
refactored_Contributions.py, refactored_Cal-Access-Accessor.py
NY: Transcription Tool,
ny_import_lobbyist.py
*/
CREATE TABLE IF NOT EXISTS Organizations (
oid INTEGER AUTO_INCREMENT, -- Organization id
name VARCHAR(200), -- name
type INTEGER DEFAULT 0, -- type (not fleshed out yet)
city VARCHAR(200), -- city
stateHeadquartered VARCHAR(2), -- U.S. state, where it's based
analysis_flag BOOL DEFAULT FALSE,
source VARCHAR(255),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY (oid),
FOREIGN KEY (stateHeadquarterd) REFERENCES State(abbrev),
INDEX name_idx (name)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Holds information about merged organizations. We
use this because we want to retain the names of Organizations
we think should be considered the same.
*/
CREATE TABLE IF NOT EXISTS MergedOrgs (
oid INT,
merged_name VARCHAR(255),
PRIMARY KEY (oid, merged_name),
FOREIGN KEY (oid) REFERENCES Organizations(oid)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Tracks the relationship between Organizations and their affiliated states.
Sources: CA: Transcription Tool,
refactored_Contributions.py, refactored_Cal-Access-Accessor.py
NY: Transcription Tool,
ny_import_lobbyist.py
*/
CREATE TABLE IF NOT EXISTS OrganizationStateAffiliation (
oid INTEGER,
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts int(11) GENERATED ALWAYS AS ((to_seconds(`lastTouched`) - to_seconds('1970-01-01'))) VIRTUAL,
PRIMARY KEY (oid, state),
FOREIGN KEY (oid) REFERENCES Organizations(oid),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Holds information on governors and lieutenant-governors.
Sources: All filled by hand
*/
CREATE TABLE IF NOT EXISTS Governor (
pid INTEGER,
office ENUM('Governor', 'Lieutenant Governor'),
mailing_address VARCHAR(255),
city VARCHAR(255),
zip VARCHAR(20),
phone VARCHAR(20),
fax VARCHAR(20),
email VARCHAR(255),
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY (pid),
FOREIGN KEY (pid) REFERENCES Person(pid),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Entity::Legislator
A legislator has a description and bio and several contact information.
Sources: CA: refactored_legislator_migrate.py
NY: ny_import_legislators.py, ny_import_spreadsheet_data.py
*/
CREATE TABLE IF NOT EXISTS Legislator (
pid INTEGER, -- Person id (ref. Person.pid)
description VARCHAR(1000), -- description
twitter_handle VARCHAR(100), -- twitter handle (ex: @example)
capitol_phone VARCHAR(30), -- phone number (format: (xxx) xxx-xxxx)
capitol_fax VARCHAR(30), -- fax number (format: (xxx) xxx-xxxx)
website_url VARCHAR(200), -- url
room_number VARCHAR(10), -- room number
email VARCHAR(255),
email_form_link VARCHAR(200), -- email link
state VARCHAR(2), -- state where term was served
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (pid, state),
FOREIGN KEY (pid) REFERENCES Person(pid),
FOREIGN KEY (state) REFERENCES State(abbrev),
INDEX state_idx (state)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Weak Entity::Term:
Legislators have Terms. For each term a legislator serves, keep track of
what district, house, and party they are associated with because legislators
can change those every term.
Sources: CA: refactored_legislator_migrate.py
NY: ny_import_legislators.py
*/
CREATE TABLE IF NOT EXISTS Term (
pid INTEGER, -- Person id (ref. Person.pised)
official_bio TEXT,
year YEAR, -- year served
district INTEGER(3), -- district legislator served in
house VARCHAR(100), -- house they serve in,
party ENUM('Republican', 'Democrat', 'Other'),
start DATE, -- start date of term
start_ts INT(11) AS (UNIX_TIMESTAMP(start)), -- Used by Drupal
end DATE, -- end date of term
end_ts INT(11) AS (UNIX_TIMESTAMP(end)), -- Used by Drupal
current_term TINYINT(4), -- Whether this is a current term
state VARCHAR(2), -- state where term was served
-- caucus VARCHAR(200), -- group that generally votes together. Not currently in use
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (pid, year, house, state),
FOREIGN KEY (pid) REFERENCES Legislator(pid), -- change to
FOREIGN KEY (house, state) REFERENCES House(name, state),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Entity::Committee
When a bill is introduced in either the Senate or the House, it is sent to a
standing committee for study and to receive public comment. The committee
makes an initial determination if the proposal should go forward in the
legislature. If it votes to do so, the committee can suggest amendments to
the bill, approve it for further action by the full Senate or House, or
disapprove it.
Sources: CA: refractored_Get_Committees_Web.py
NY: ny_import_committees.py, ny_import_committees_scrape_assembly.py
*/
CREATE TABLE IF NOT EXISTS Committee (
cid INTEGER(3) AUTO_INCREMENT, -- Committee id
session_year YEAR,
current_flag BOOL,
house VARCHAR(200) NOT NULL,
name VARCHAR(200) NOT NULL, -- committee name
short_name VARCHAR(200) NOT NULL, -- committee name
type VARCHAR(100),
room VARCHAR(255),
phone VARCHAR(30),
fax VARCHAR(30),
email VARCHAR(256),
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_changed INT DEFAULT NOW() ON UPDATE NOW(),
dr_changed_ts INT AS (UNIX_TIMESTAMP(dr_changed)),
PRIMARY KEY (cid),
UNIQUE (session_year, house, name, state),
UNIQUE (cid, session_year, house, state), -- This shouldn't be necessary, but we have
-- redundant info in servesOn and I want to enforce a foreign key
FOREIGN KEY (state) REFERENCES State(abbrev),
FOREIGN KEY (house, state) REFERENCES House(name, state),
FOREIGN KEY (name, house, state) REFERENCES CommitteeNames(name, house, state),
INDEX name_idx (name),
INDEX stae_idx (state)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*The same committees can exist across multiple session years and they have their own pages,
so the site needs away to track committees over time. This table capture the many-to-one relationship
from Committee to CommitteeNames*/
CREATE TABLE IF NOT EXISTS CommitteeNames (
cn_id INTEGER AUTO_INCREMENT,
name VARCHAR(200) NOT NULL,
house VARCHAR(200),
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY (cn_id),
UNIQUE (name, house, state),
FOREIGN KEY (house, state) REFERENCES House(name, state),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Relationship::servesOn(many-to-many) << [Committee, Term]
A legislator (in a specific term) can serve on one or more committees.
Sources: CA: refractored_Get_Committees_Web.py
NY: ny_import_committees.py, ny_import_committees_scrape_assembly.py
*/
CREATE TABLE IF NOT EXISTS servesOn (
pid INTEGER, -- Person id (ref. Person.pid)
year YEAR, -- The session year
house VARCHAR(100),
cid INTEGER(3), -- Committee id (ref. Committee.cid)
position ENUM('Chair', 'Vice-Chair', 'Co-Chair', 'Member'),
current_flag BOOL,
start_date DATE,
end_date DATE,
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (pid, year, house, state, cid),
FOREIGN KEY (cid, year, house, state) REFERENCES Committee(cid, year, house, state),
-- ^^ This key is really gross, but we have this data in servesOn and I need
-- to enforce this constraint to avoid problems.
FOREIGN KEY (cid) REFERENCES Committee(cid),
FOREIGN KEY (house, state) REFERENCES House(name, state),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Similar to servesOn, but the relationship between staff members and committees.
Sources: TODO write a script that does this
*/
CREATE TABLE IF NOT EXISTS ConsultantServesOn (
pid INTEGER, -- Person id (ref. Person.pid)
session_year YEAR, -- year served
cid INTEGER(3), -- Committee id (ref. Committee.cid)
position VARCHAR(255) NOT NULL,
current_flag BOOL,
start_date DATE,
start_date_ts INT,
end_date DATE DEFAULT NULL,
end_date_ts INT,
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (pid, cid, start_date),
FOREIGN KEY (cid) REFERENCES Committee(cid),
FOREIGN KEY (pid) REFERENCES LegislativeStaff(pid),
FOREIGN KEY (state) REFERENCES State(abbrev),
INDEX start_date_ts_idx (start_date_ts),
INDEX end_date_ts_idx (end_date_ts),
INDEX current_flag_idx (current_flag_idx)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Entity::Bill
A legislator (Senator/Assembly Member) or Committee can author a bill. It
goes through the legislative process and changes states and versions multiple
times. The house is where the bill was introduced in. The session indicates
what legislative session was occurring when the bill was introduced.
Sources: CA: refactored_Bill_Extract
NY: ny_import_bills.py
*/
CREATE TABLE IF NOT EXISTS Bill (
bid VARCHAR(23), -- Bill id (concat of state+years+session+type+number)
type VARCHAR(5) NOT NULL, -- bill type abbreviation
number INTEGER NOT NULL, -- bill number
billState VARCHAR(255) NOT NULL,
status VARCHAR(60), -- current bill status
house VARCHAR(100),
session INTEGER(1), -- 0: Normal session, 1: Special session
sessionYear YEAR(4),
visibility_flag BOOLEAN DEFAULT 0,
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
dr_changed INT DEFAULT NOW() ON UPDATE NOW(),
dr_changed_ts INT AS (UNIX_TIMESTAMP(dr_changed)),
PRIMARY KEY (bid),
FOREIGN KEY (state) REFERENCES State(abbrev),
INDEX name (type, number),
INDEX state_idx (state)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Bills go through different versions. This table captures a specific version of a bill. Vids do
not necessarily follow a logical order, but they do always contain the bid
Sources: CA: refactored_Bill_Extract.py
NY: ny_import_bills.py
*/
CREATE TABLE IF NOT EXISTS BillVersion (
vid VARCHAR(33),
bid VARCHAR(23),
date DATE,
date_ts INT(11) AS (UNIX_TIMESTAMP(date_ts)), -- Used by Drupal
billState VARCHAR(255) NOT NULL,
subject TEXT,
appropriation BOOLEAN,
substantive_changes BOOLEAN,
title TEXT,
digest MEDIUMTEXT,
text MEDIUMTEXT,
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (vid),
FOREIGN KEY (bid) REFERENCES Bill(bid),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Entity::Hearing
There are many hearings per day. A bill is presented during a hearing and
testimonies may be heard in support or opposition to the bill. During the
hearing, a committee will vote on the bill.
Sources: CA: Transcription Tool,
ca_agenda.py
NY: Transcription Tool
*/
CREATE TABLE IF NOT EXISTS Hearing (
hid INTEGER AUTO_INCREMENT, -- Hearing id
date DATE, -- date of hearing
date_ts INT(11) AS (UNIX_TIMESTAMP(date)), -- Used by Drupal
type ENUM('Regular', 'Budget', 'Informational', 'Summary') DEFAULT 'Regular',
session_year YEAR NOT NULL,
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_changed INT DEFAULT NOW() ON UPDATE NOW(),
dr_changed_ts INT AS (UNIX_TIMESTAMP(dr_changed)),
PRIMARY KEY (hid),
FOREIGN KEY (state) REFERENCES State(abbrev),
INDEX session_year_idx (session_year),
INDEX date_idx (date),
INDEX date_ts_idx (date_ts),
INDEX state_idx (state)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/* Relationship::CommitteeHearings(many-to-many) << [Committee, Hearing]
After the bill is introduced, a bill is assigned a policy committee according
to subject area. During the committee hearing, the author presents the bill to
the committee. Testimonies may be heard in support or opposition to the bill.
The committee then votes on whether to pass the bill out of the committee, or
that it be passed as amended.
Sources: CA: ca_agenda.py
NY: Transcription Tool
*/
CREATE TABLE IF NOT EXISTS CommitteeHearings (
cid INTEGER, -- Committee id (ref. Committee.cid)
hid INTEGER, -- Hearing id (ref. Hearing.hid)
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (cid, hid),
FOREIGN KEY (cid) REFERENCES Committee(cid),
FOREIGN KEY (hid) REFERENCES Hearing(hid)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Used to hold the many-to-many relationship btw Hearings and Bills.
Sources: CA: ca_agenda.py
NY: No data
*/
CREATE TABLE IF NOT EXISTS HearingAgenda (
hid INTEGER, -- Hearing id (ref. Hearing.hid)
bid VARCHAR(23),
date_created DATE, -- The date the agenda info was posted
date_created_ts INT(11) AS (UNIX_TIMESTAMP(date_created)), -- Used by Drupal
current_flag TINYINT(1), -- Whether this is the most recent agenda
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (hid, bid, date_created),
FOREIGN KEY (bid) REFERENCES Bill(bid),
FOREIGN KEY (hid) REFERENCES Hearing(hid)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Actions that take place on bills. E.g. "first time read", "Moved to floor"
Sources: CA: refactored_Action_Extract_Aug.py
NY: ny_import_actions.py
*/
CREATE TABLE IF NOT EXISTS Action (
bid VARCHAR(23),
date DATE,
date_ts INT(11) AS (UNIX_TIMESTAMP(date)), -- Used by Drupal
text TEXT,
seq_num INT,
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
FOREIGN KEY (bid) REFERENCES Bill(bid),
INDEX date_ts_idx (date_ts)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Info for the videos of the hearings hosted on the site.
Sources: CA: Transcription Tool
NY: Transcription Tool
*/
CREATE TABLE IF NOT EXISTS Video (
vid INTEGER AUTO_INCREMENT,
fileId VARCHAR(50), -- formerly youtubeId. Our name for file
hid INTEGER,
position INTEGER,
startOffset INTEGER,
duration INTEGER,
srtFlag TINYINT(1) DEFAULT 0,
state VARCHAR(2),
source ENUM("YouTube", "Local", "Other"),
doVFT TINYINT(1) DEFAULT 0,
rank TINYINT(1) DEFAULT 0,
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY (vid),
FOREIGN KEY (hid) REFERENCES Hearing(hid),
FOREIGN KEY (state) REFERENCES State(abbrev),
INDEX state_idx (state)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Used for diarization of Vidoes
Sources: CA: Transcription Tool
NY: Transcription Tool
*/
CREATE TABLE IF NOT EXISTS Video_ttml (
vid INTEGER,
version INTEGER DEFAULT 0,
ttml MEDIUMTEXT,
source VARCHAR(20) DEFAULT 0,
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
FOREIGN KEY (vid) REFERENCES Video(vid)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
The portion of a Hearing where a specific bill was discussed.
Sources: CA: Transcription Tool
NY: Transcription Tool
*/
CREATE TABLE IF NOT EXISTS BillDiscussion (
did INTEGER AUTO_INCREMENT,
bid VARCHAR(23),
hid INTEGER NOT NULL,
startVideo INTEGER,
startTime INTEGER,
endVideo INTEGER,
endTime INTEGER,
numVideos INTEGER(4),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_changed INT DEFAULT NOW() ON UPDATE NOW(),
dr_changed_ts INT AS (UNIX_TIMESTAMP(dr_changed)),
PRIMARY KEY (did),
UNIQUE KEY (bid, startVideo, startTime),
FOREIGN KEY (bid) REFERENCES Bill(bid),
FOREIGN KEY (hid) REFERENCES Hearing(hid),
FOREIGN KEY (startVideo) REFERENCES Video(vid),
FOREIGN KEY (endVideo) REFERENCES Video(vid)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Specific motions on bills. E.g. "Motion to ammend", "Motion to table"
Sources: CA: refactored_Motion_Extract.py
NY: No Data
*/
CREATE TABLE IF NOT EXISTS Motion (
mid INTEGER(20),
text TEXT,
doPass TINYINT(1),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (mid)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
The aggregated vote information for a specific vote on a bill.
Sources: CA: refactored_Vote_Extract.py
NY: ny_import_billvotes.py
*/
CREATE TABLE IF NOT EXISTS BillVoteSummary (
voteId INTEGER AUTO_INCREMENT,
bid VARCHAR(23),
mid INTEGER(20),
cid INTEGER,
VoteDate DATETIME,
VoteDate_ts INT(11) AS (UNIX_TIMESTAMP(VoteDate)), -- Used by Drupal
ayes INTEGER,
naes INTEGER,
abstain INTEGER,
result VARCHAR(20),
VoteDateSeq INT,
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY(voteId),
FOREIGN KEY (mid) REFERENCES Motion(mid),
FOREIGN KEY (bid) REFERENCES Bill(bid),
FOREIGN KEY (cid) REFERENCES Committee(cid),
INDEX VoteDate_ts_idex (VoteDate_ts)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
The vote information of a specific legislators on a given vote.
Sources: CA: refactored_Vote_Extract.py
NY: ny_import_billvotes.py
*/
CREATE TABLE IF NOT EXISTS BillVoteDetail (
pid INTEGER,
voteId INTEGER,
result VARCHAR(20),
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY(pid, voteId),
FOREIGN KEY (pid) REFERENCES Legislator(pid),
FOREIGN KEY (state) REFERENCES State(abbrev),
FOREIGN KEY (voteId) REFERENCES BillVoteSummary(voteId)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
The legislators that author specific bills.
Sources: CA: refactored_Author_Extract.py
NY: ny_import_authors.py
**Note that for now contribution is either always lead author
or blank
*/
CREATE TABLE IF NOT EXISTS authors (
pid INTEGER,
bid VARCHAR(23),
vid VARCHAR(33),
contribution ENUM('Lead Author', 'Principal Coauthor', 'Coauthor') DEFAULT 'Coauthor',
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (pid, bid, vid),
FOREIGN KEY (pid) REFERENCES Legislator(pid), -- change to Person
FOREIGN KEY (bid, vid) REFERENCES BillVersion(bid, vid)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
This is just the different author rolls enumerated. So why isn't it an enum Andrew?
Don't ask too many questions.
Sources: CA: TODO something should fill this
NY: No data
*/
CREATE TABLE IF NOT EXISTS BillSponsorRolls (
roll VARCHAR(100),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (roll)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Table basically just the same info as authors, but it clarifies their
role. We have a second table as not to confuse the druple scripts that
pull author names. Ideally we role this into authors soon.
Sources: CA: TODO something should fill this
NY: No data
*/
CREATE TABLE IF NOT EXISTS BillSponsors (
pid INTEGER,
bid VARCHAR(23),
vid VARCHAR(33),
contribution VARCHAR(100),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY (pid, bid, vid, contribution),
FOREIGN KEY (pid) REFERENCES Legislator(pid),
FOREIGN KEY (bid, vid) REFERENCES BillVersion(bid, vid),
FOREIGN KEY (contribution) REFERENCES BillSponsorRolls(roll)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Bills can be authored by legislators or whole committees. This table captures the case where
a bill was authored by a committee.
Sources: CA: refactored_Author_Extract.py
NY: ny_import_committeeauthors.py
*/
CREATE TABLE IF NOT EXISTS CommitteeAuthors(
cid INTEGER,
bid VARCHAR(23),
vid VARCHAR(33),
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_id INTEGER UNIQUE AUTO_INCREMENT,
PRIMARY KEY(cid, bid, vid),
FOREIGN KEY (bid) REFERENCES Bill(bid),
FOREIGN KEY (cid) REFERENCES Committee(cid),
FOREIGN KEY (vid) REFERENCES BillVersion(vid),
FOREIGN KEY (state) REFERENCES State(abbrev)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
The individual blocks of text our transcripts are broken up into.
Sources: CA: Transcription Tool
NY: Transcription Tool
*/
CREATE TABLE IF NOT EXISTS Utterance (
uid INTEGER AUTO_INCREMENT,
vid INTEGER,
pid INTEGER,
time INTEGER,
endTime INTEGER,
text TEXT,
current BOOLEAN NOT NULL,
finalized BOOLEAN NOT NULL,
type ENUM('Author', 'Testimony', 'Discussion'),
alignment ENUM('For', 'Against', 'For_if_amend', 'Against_unless_amend', 'Neutral', 'Indeterminate', 'NA'),
dataFlag INTEGER DEFAULT 0,
diarizationTag VARCHAR(5) DEFAULT '',
did INT,
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
dr_changed INT DEFAULT NOW() ON UPDATE NOW(),
dr_changed_ts INT AS (UNIX_TIMESTAMP(dr_changed))
PRIMARY KEY (uid),
UNIQUE KEY (uid, vid, pid, current, time),
FOREIGN KEY (pid) REFERENCES Person(pid),
FOREIGN KEY (vid) REFERENCES Video(vid),
FOREIGN KEY (did) REFERENCES BillDiscussion(did),
FOREIGN KEY (state) REFERENCES State(abbrev),
INDEX state_idx (state)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
Whenever an organization gives something to a legislator, that must be recorded. This table
keeps track of all those "gifts".
Sources: CA: Hans provides this data
NY: None
*/
CREATE TABLE IF NOT EXISTS Gift (
RecordId INTEGER AUTO_INCREMENT,
pid INTEGER,
schedule ENUM('D', 'E'), -- D is a normal gift whereas E is a travel gift
sourceName VARCHAR(50),
activity VARCHAR(40),
city VARCHAR(30),
cityState VARCHAR(10),
value DOUBLE,
giftDate DATE,
giftDate_ts INT(11) AS (UNIX_TIMESTAMP(giftDate)), -- Used by Drupal
sessionYear YEAR,
reimbursed TINYINT(1),
giftIncomeFlag TINYINT(1) DEFAULT 0,
speechFlag TINYINT(1) DEFAULT 0,
description VARCHAR(80),
oid INT, -- Just matched from sourceName
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY(RecordId),
FOREIGN KEY (oid) REFERENCES Organizations(oid),
FOREIGN KEY (pid) REFERENCES Person(pid),
FOREIGN KEY (state) REFERENCES State(abbrev),
INDEX session_year_idx (session_year)
)
ENGINE = INNODB
CHARACTER SET utf8 COLLATE utf8_general_ci;
/*
These are formal contributions to legislators campaigns.
Sources: CA: refactored_Contributions.py
NY: TODO ??
*/
CREATE TABLE IF NOT EXISTS Contribution (
id VARCHAR(20),
pid INTEGER, -- The legislator
year INTEGER,
date DATETIME,
date_ts INT(11) AS (UNIX_TIMESTAMP(date)), -- Used by Drupal
sesssionYear YEAR,
house VARCHAR(10),
donorName VARCHAR(255), -- Uhh this should probs tie to Person at some point
donorOrg VARCHAR(255),
amount DOUBLE,
oid INT, -- just matched from donorOrg
state VARCHAR(2),
lastTouched TIMESTAMP DEFAULT NOW() ON UPDATE NOW(),
lastTouched_ts INT(11) AS (UNIX_TIMESTAMP(lastTouched)),
PRIMARY KEY(id),
FOREIGN KEY (oid) REFERENCES Organizations(oid),