Skip to content

Commit

Permalink
Merge branch 'upgrade-csvkit' into add-workflow-to-gen-data
Browse files Browse the repository at this point in the history
  • Loading branch information
ChenglimEar committed Apr 14, 2024
2 parents 44d37b0 + 1f54189 commit ffdc7e6
Show file tree
Hide file tree
Showing 319 changed files with 53,175 additions and 163,312 deletions.
3 changes: 2 additions & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM mcr.microsoft.com/vscode/devcontainers/python:3.9-bullseye
#FROM mcr.microsoft.com/vscode/devcontainers/python:3.9-bullseye
FROM mcr.microsoft.com/vscode/devcontainers/python:3.9-bookworm

RUN curl -fsSL https://aka.ms/install-azd.sh | bash

Expand Down
4 changes: 3 additions & 1 deletion .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ services:
network_mode: service:db

db:
image: postgres:16.0-bullseye
#image: postgres:16.0-bullseye
#image: postgres:latest
image: postgres:15.4
restart: unless-stopped
volumes:
- postgres-data:/var/lib/postgresql/data
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ downloads/cached-db
inputs
.local
**/__pycache__
build/candidates.xlsx
30 changes: 15 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ process: process.rb
# todo: remove RUBYOPT variable when activerecord fixes deprecation warnings
echo 'delete from calculations;'| psql $(DATABASE_NAME)
rm -rf build && RUBYOPT="-W:no-deprecated -W:no-experimental" bundle exec ruby process.rb
python bin/create-digests.py
python bin/report-candidates.py
bin/report-schema $(DATABASE_NAME)
bin/create-digests
bin/report-candidates
git --no-pager diff build/digests.json

download-netfile-v2:
Expand Down Expand Up @@ -70,46 +71,45 @@ import: recreatedb
import-cached: recreatedb
cat downloads/cached-db/$(DATABASE_NAME).sql | psql $(DATABASE_NAME)

import-spreadsheets: prep-import-spreadsheets do-import-spreadsheets
import-spreadsheets: do-import-spreadsheets
./bin/make_view

prep-import-spreadsheets:
echo 'DROP VIEW "Measure_Expenditures";' | psql $(DATABASE_NAME)
echo 'DROP VIEW "all_contributions" CASCADE;' | psql $(DATABASE_NAME)
echo 'DROP VIEW "independent_candidate_expenditures";' | psql $(DATABASE_NAME)


do-import-spreadsheets:
echo 'DROP TABLE IF EXISTS candidates;' | psql $(DATABASE_NAME)
echo 'DROP TABLE IF EXISTS candidates CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) candidates
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference $(CSV_PATH)/candidates.csv
echo 'ALTER TABLE "candidates" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)
./bin/remove-whitespace $(DATABASE_NAME) candidates Candidate
./bin/remove-whitespace $(DATABASE_NAME) candidates Committee_Name
./bin/remove-whitespace $(DATABASE_NAME) candidates Facebook
./bin/remove-whitespace $(DATABASE_NAME) candidates Instagram
./bin/remove-whitespace $(DATABASE_NAME) candidates Twitter
./bin/remove-whitespace $(DATABASE_NAME) candidates Bio

echo 'DROP TABLE IF EXISTS referendums;' | psql $(DATABASE_NAME)
echo 'DROP TABLE IF EXISTS referendums CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) referendums
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference $(CSV_PATH)/referendums.csv
echo 'ALTER TABLE "referendums" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)
./bin/remove-whitespace $(DATABASE_NAME) referendums Short_Title
./bin/remove-whitespace $(DATABASE_NAME) referendums Summary

echo 'DROP TABLE IF EXISTS name_to_number;' | psql $(DATABASE_NAME)
echo 'DROP TABLE IF EXISTS name_to_number CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) name_to_number
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference $(CSV_PATH)/name_to_number.csv

echo 'DROP TABLE IF EXISTS committees;' | psql $(DATABASE_NAME)
echo 'DROP TABLE IF EXISTS committees CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) committees
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference $(CSV_PATH)/committees.csv
echo 'ALTER TABLE "committees" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)
./bin/remove-whitespace $(DATABASE_NAME) committees Filer_NamL

echo 'DROP TABLE IF EXISTS office_elections;' | psql $(DATABASE_NAME)
echo 'DROP TABLE IF EXISTS office_elections CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) office_elections
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference downloads/csv/office_elections.csv
echo 'ALTER TABLE "office_elections" ALTER COLUMN title TYPE varchar(50);' | psql $(DATABASE_NAME)
echo 'ALTER TABLE "office_elections" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)

echo 'DROP TABLE IF EXISTS elections;' | psql $(DATABASE_NAME)
echo 'DROP TABLE IF EXISTS elections CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) elections
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference downloads/csv/elections.csv
echo 'ALTER TABLE "elections" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)
Expand Down
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,19 @@ gem install pg bundler
bundle install
```

**Note:** It appears there is a problem on Macintosh systems using the Apple Chips.
If, when running ```make import``` you get:
```
ImportError: You don't appear to have the necessary database backend installed for connection string you're trying to use. Available backends include:
PostgreSQL: pip install psycopg2
```
Try the following:
```
pip uninstall psycopg2-binary
pip install psycopg2-binary --no-cache-dir
```

### Codespaces

This repository is set up to work in a container under Codespaces. In other words, you can start up an environment that is already set up without having to do any of the installation steps required to set up a local environment. This can be used as a way to trouble-shoot code before it is committed to the production pipeline. The following information may be helpful to get started using Codespaces:
Expand Down Expand Up @@ -175,5 +188,4 @@ wget: command not found
Run `brew install wget`.



[form_460]: http://www.fppc.ca.gov/content/dam/fppc/NS-Documents/TAD/Campaign%20Forms/460.pdf
1 change: 1 addition & 0 deletions bin/create-digests.py → bin/create-digests
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python
import os
import json
import hashlib
Expand Down
18 changes: 12 additions & 6 deletions bin/create-table
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,24 @@ if [ $# -ne 3 ]; then
fi

DATABASE_NAME=$1
CSV_PATH=$2
csv_path=$2
table_name=$3
filename_glob=$csv_path'/*'${table_name}'.csv'

# create schema file if it's not there
DBSCHEMA_FILEPATH="dbschema/$table_name.sql"
if [ ! -f $DBSCHEMA_FILEPATH ]; then
echo "Creating db schema file $DBSCHEMA_FILEPATH for table $table_name"
csvsql -i postgresql --tables $table_name $CSV_PATH/$table_name.csv > $DBSCHEMA_FILEPATH
csvstack $filename_glob 2> /dev/null | \
csvsql -i postgresql --tables $table_name > $DBSCHEMA_FILEPATH
fi

# create table
echo "Creating $table_name using $DBSCHEMA_FILEPATH"
psql --dbname $DATABASE_NAME -f $DBSCHEMA_FILEPATH
#psql --dbname $DATABASE_NAME -c '\d "'$table_name'"'
# create table if it's not there
if psql $DATABASE_NAME -c '\d "'${table_name}'"' >/dev/null 2>&1; then
echo "Table $table_name exists, so no need to create"
else
echo "Creating $table_name using $DBSCHEMA_FILEPATH"
psql --dbname $DATABASE_NAME -f $DBSCHEMA_FILEPATH
fi


19 changes: 4 additions & 15 deletions bin/import-file
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,11 @@ if psql disclosure-backend -c '\d "'${table_name}'"' >/dev/null 2>&1; then
fi

if ls $filename_glob 2>/dev/null >/dev/null; then
# create schema file if it's not there
DBSCHEMA_FILEPATH="dbschema/$table_name.sql"
if [ ! -f $DBSCHEMA_FILEPATH ]; then
echo "Creating db schema file $DBSCHEMA_FILEPATH for table $table_name"
csvstack $filename_glob 2> /dev/null | \
csvsql -i postgresql --tables $table_name > $DBSCHEMA_FILEPATH
fi
# create table if it's not there
if [ "$table_exists" = '' ]; then
echo "Creating $table_name using $DBSCHEMA_FILEPATH"
psql --dbname $DATABASE_NAME -f $DBSCHEMA_FILEPATH
#psql --dbname $DATABASE_NAME -c '\d "'$table_name'"'
table_exists=true
fi
./bin/create-table $DATABASE_NAME $csv_path $table_name

# insert data
csvstack $filename_glob 2> /dev/null | \
csvsql --db postgresql:///$DATABASE_NAME --tables $table_name --insert --no-inference ${table_exists:+--no-create}
csvsql --db postgresql:///$DATABASE_NAME --tables $table_name --insert --no-inference --no-create
echo -n ' Removing empty Tran_Date... '
./bin/clean "$DATABASE_NAME" "$table_name"
if [ "$fix_pending" = "1" ]; then
Expand Down
23 changes: 20 additions & 3 deletions bin/make_view
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,13 @@ CREATE VIEW independent_candidate_expenditures AS
ON c.election_name = e.name
JOIN
(
SELECT DISTINCT ON ("Filer_ID") "Filer_ID", "Filer_NamL"
FROM committees
-- TODO: possible join on "Ballot_Measure_Election"
SELECT "Filer_ID", "Filer_NamL"
FROM (
SELECT "Filer_ID", "Filer_NamL", ROW_NUMBER() OVER (PARTITION BY "Filer_ID" ORDER BY "Ballot_Measure_Election" DESC NULLS LAST) AS rn
FROM committees
) AS c
WHERE rn=1
) committee
ON committee."Filer_ID" = all_data."Filer_ID"
WHERE (e."Start_Date" IS NULL OR "Exp_Date" >= e."Start_Date")
Expand All @@ -198,10 +203,22 @@ CREATE VIEW independent_candidate_expenditures AS
AND "FPPC" IS NOT NULL
AND "Cand_NamL" IS NOT NULL;
-- Remove summary data that is covered by a report that includes that time period
DROP VIEW IF EXISTS clean_summary CASCADE;
CREATE VIEW clean_summary AS
SELECT * from "Summary" s
WHERE
NOT EXISTS
(select t."Rpt_Date" from "Summary" t where s."Filer_ID" = t."Filer_ID"
and t."From_Date" <= s."From_Date" and s."Thru_Date" <= t."Thru_Date"
and (t."From_Date" <> s."From_Date" or s."Thru_Date" <> t."Thru_Date")
)
;
DROP VIEW IF EXISTS candidate_summary;
CREATE VIEW candidate_summary AS
SELECT election_name, "Candidate", s.*
FROM "Summary" s, candidates c
FROM clean_summary s, candidates c
WHERE cast ("FPPC" as character varying) = "Filer_ID"
AND ("Start_Date" IS NULL OR "From_Date" >= "Start_Date")
AND ("End_Date" IS NULL OR "Thru_Date" <= "End_Date");
Expand Down
9 changes: 2 additions & 7 deletions bin/remove-whitespace
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,9 @@ database_name=$1
table_name=$2
column_name=$3

# trim leading and trailing white spaces
cat <<-QUERY | psql ${database_name}
\\set ON_ERROR_STOP on
UPDATE "$table_name" t SET "$column_name" = REGEXP_REPLACE("$column_name", '\s+$', '');
QUERY

cat <<-QUERY | psql ${database_name}
\\set ON_ERROR_STOP on
UPDATE "$table_name" t SET "$column_name" = REGEXP_REPLACE("$column_name", '^\s+', '');
UPDATE "$table_name" t SET "$column_name" = REGEXP_REPLACE(REGEXP_REPLACE("$column_name", '^\s+', ''), '\s+$', '');
QUERY
1 change: 1 addition & 0 deletions bin/report-candidates.py → bin/report-candidates
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python
import os
import json
import hashlib
Expand Down
14 changes: 14 additions & 0 deletions bin/report-schema
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
# Usage: ./bin/report-schema [database name]
# ./bin/report-schema disclosure-backend
set -e

if [ $# -ne 1 ]; then
echo 'Usage: ./bin/report-schema [database name]'
exit 1
fi

DATABASE_NAME=$1

pg_dump --schema-only $DATABASE_NAME > build/schema.sql

13 changes: 9 additions & 4 deletions bin/travis-deploy
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,16 @@ This is an automated update by travis-ci at
'"$(date)"'
[skip ci]'
# Push to the same branch instead of master
git push \
"https://$GITHUB_AUTH_TOKEN@github.com/caciviclab/disclosure-backend-static.git" \
HEAD:master \
HEAD:${TRAVIS_BRANCH} \
| sed -e "s/$GITHUB_AUTH_TOKEN/[removed]/"

make upload-cache
if [ "${TRAVIS_BRANCH}" = "master" -a ! "${TRAVIS_EVENT_TYPE}" = "pull_request" ]; then
# only upload cache if we're merging onto the main branch
make upload-cache
fi
}

if [ "${TRAVIS_EVENT_TYPE}" = "pull_request" ]; then
Expand All @@ -42,8 +46,9 @@ elif [ ! -d "build" ]; then
echo "The 'build' directory is missing. Bailing!"
elif git diff --exit-code --quiet; then
echo "No changes to deploy!"
elif [ "${TRAVIS_BRANCH}" = "master" -a ! "${TRAVIS_EVENT_TYPE}" = "pull_request" ]; then
elif [ ! "${TRAVIS_EVENT_TYPE}" = "pull_request" ]; then
echo "Deploying build on all branches when build directory changed and not pull request build"
deploy
else
echo "Not deploying since not a build on master branch"
echo "Not deploying build on pull requests"
fi
4 changes: 1 addition & 3 deletions build/_candidates/oakland/2016-11-08/noni-session.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ name: Noni Session
occupation: Assistant Librarian
party_affiliation: Democrat
photo_url: https://s3-us-west-1.amazonaws.com/odca-candidate-photos/Noni-Session2.png
twitter_url: 'NoniSession
'
twitter_url: NoniSession
votersedge_url: http://votersedge.org/ca/en/ballot/election/area/42/contests/contest/13236/candidate/130758?&county=Alameda%20County&election_authority_id=1
website_url: http://www.nonifordistrict3.com/
---
2 changes: 1 addition & 1 deletion build/_candidates/oakland/2018-11-06/jesse-a-j-smith.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ is_incumbent: false
name: Jesse A.J. Smith
occupation: Writer
photo_url: Jesse-Smith.png
twitter_url: 'OakSmith2018 '
twitter_url: OakSmith2018
votersedge_url: https://votersedge.org/ca/en/ballot/election/area/73/contests/contest/17342/candidate/139775?&county=alameda%20county&election_authority_id=1
website_url: https://oaksmith2018.com/
---
2 changes: 1 addition & 1 deletion build/_candidates/oakland/2018-11-06/joseph-tanios.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ name: Joseph Tanios
occupation: Construction Inspection Supervisor
photo_url: Joseph-Tanios.png
public_funding_received: "$7,463"
twitter_url: 'taniosfor '
twitter_url: taniosfor
votersedge_url: https://votersedge.org/ca/en/ballot/election/area/73/contests/contest/17340/candidate/139761?&county=alameda%20county&election_authority_id=1
website_url: https://www.joetanios.com/
---
2 changes: 1 addition & 1 deletion build/_candidates/oakland/2018-11-06/loren-taylor.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ name: Loren Taylor
occupation: Entrepreneur/Non-Profit Boardmember
photo_url: Loren_Taylor.png
public_funding_received: "$18,345"
twitter_url: 'lorenmtaylor '
twitter_url: lorenmtaylor
votersedge_url: https://votersedge.org/ca/en/ballot/election/area/73/contests/contest/17341/candidate/139766?&county=alameda%20county&election_authority_id=1
website_url: https://www.lorentaylor.org/
---
2 changes: 1 addition & 1 deletion build/_candidates/oakland/2018-11-06/natasha-middleton.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ name: Natasha Middleton
occupation: Management Analyst
photo_url: Natasha-Middleton.png
public_funding_received: "$18,345"
twitter_url: 'MiddletonNat '
twitter_url: MiddletonNat
votersedge_url: https://votersedge.org/ca/en/ballot/election/area/73/contests/contest/17341/candidate/139764?&county=alameda%20county&election_authority_id=1
website_url: https://www.natashaforoakland.com/
---
2 changes: 1 addition & 1 deletion build/_candidates/oakland/2018-11-06/nayeli-maxson.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ name: Nayeli Maxson
occupation: Executive Director/Attorney
photo_url: Nayeli-Maxson.png
public_funding_received: "$18,345"
twitter_url: 'nayelimax '
twitter_url: nayelimax
votersedge_url: https://votersedge.org/ca/en/ballot/election/area/73/contests/contest/17340/candidate/139758?&county=alameda%20county&election_authority_id=1
website_url: https://nayeliforoakland.com/
---
2 changes: 1 addition & 1 deletion build/_candidates/oakland/2018-11-06/shanthi-gonzales.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ is_incumbent: true
name: Shanthi Gonzales
occupation: School Board Director
photo_url: Shanthi-Gonzales.png
twitter_url: 'ShanthiGonzales '
twitter_url: ShanthiGonzales
votersedge_url: https://votersedge.org/ca/en/ballot/election/area/73/contests/contest/17850?&county=alameda%20county&election_authority_id=1
website_url: http://gonzalesforschools.nationbuilder.com/
---
2 changes: 1 addition & 1 deletion build/_candidates/oakland/2018-11-06/sheng-thao.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ name: Sheng Thao
occupation: Chief of Staff
photo_url: Sheng-Thao.png
public_funding_received: "$18,345"
twitter_url: 'sheng_tha0 '
twitter_url: sheng_tha0
votersedge_url: https://votersedge.org/ca/en/ballot/election/area/73/contests/contest/17340/candidate/139762?&county=alameda%20county&election_authority_id=1
website_url: https://www.shengforoakland.com/
---
4 changes: 1 addition & 3 deletions build/_candidates/oakland/2020-11-03/derreck-b-johnson.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ photo_url: derreck_johnson_sub.jpg
twitter_url: derreckbjohnson
votersedge_url: https://votersedge.org/ca/en/ballot/election/87-f810b9/address/null/zip/94611/contests/contest/21265/candidate/151385?cty=ca%2falm
website_url: https://www.johnsonforoakland.com/
facebook_url: |2-
https://www.facebook.com/derreckbjohnson/
facebook_url: https://www.facebook.com/derreckbjohnson/
instagram_url: derreckbjohnson
---
4 changes: 2 additions & 2 deletions build/_committees/1284523.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
filer_id: '1284523'
name: ALAMEDA LABOR COUNCIL, AFL-CIO SOLIDARITY PAC
name: Alameda Labor Council, AFL-CIO Solidarity PAC
candidate_controlled_id: ''
data_warning:
title: ALAMEDA LABOR COUNCIL, AFL-CIO SOLIDARITY PAC
title: Alameda Labor Council, AFL-CIO Solidarity PAC
---
4 changes: 2 additions & 2 deletions build/_committees/1294190.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
filer_id: '1294190'
name: Unity PAC, a sponsored committee of the Alameda Labor Council, AFL-CIO
name: Unity PAC, a Sponsored Committee of the Alameda Labor Council, AFL-CIO
candidate_controlled_id: ''
data_warning:
title: Unity PAC, a sponsored committee of the Alameda Labor Council, AFL-CIO
title: Unity PAC, a Sponsored Committee of the Alameda Labor Council, AFL-CIO
---
Loading

0 comments on commit ffdc7e6

Please sign in to comment.