-
-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6d60e93
commit 5476677
Showing
42 changed files
with
12,838 additions
and
4,960 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
-- Begin the transaction | ||
BEGIN; | ||
|
||
ALTER TABLE peers DROP CONSTRAINT fk_peers_protocols_set_id; | ||
ALTER TABLE peers DROP COLUMN protocols_set_id; | ||
|
||
ALTER TABLE visits DROP CONSTRAINT fk_visits_protocols_set_id; | ||
ALTER TABLE visits DROP COLUMN protocols_set_id; | ||
|
||
DROP TABLE protocols_sets; | ||
DROP TABLE protocols; | ||
|
||
-- End the transaction | ||
COMMIT; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
-- Begin the transaction | ||
BEGIN; | ||
|
||
-- Activate intarray extension for efficient array operations | ||
CREATE EXTENSION IF NOT EXISTS intarray; | ||
|
||
-- The protocols table holds all the different protocols that the crawler came across | ||
CREATE TABLE protocols | ||
( | ||
-- The ID for this protocol | ||
id SERIAL, | ||
-- When was this protocol updated the last time, used to retrieve the ID after an upsert operation | ||
updated_at TIMESTAMPTZ NOT NULL, | ||
-- When was this protocol created | ||
created_at TIMESTAMPTZ NOT NULL, | ||
|
||
-- The property name | ||
protocol VARCHAR(1000) NOT NULL, | ||
|
||
-- There should only be one protocol | ||
CONSTRAINT uq_protocols_protocol UNIQUE (protocol), | ||
|
||
PRIMARY KEY (id) | ||
); | ||
|
||
-- migrate protocols from the properties table | ||
INSERT INTO protocols (protocol, updated_at, created_at) | ||
SELECT value, p.updated_at, p.created_at | ||
FROM properties p | ||
WHERE p.property = 'protocol'; | ||
|
||
-- Since the set of protocols for a particular peer doesn't change very often in between crawls. The | ||
-- visits_x_properties table is blowing up quite quickly. This table holds particular sets of protocols | ||
-- that peers support. Each visit is then linked to just one of these sets. | ||
CREATE TABLE protocols_sets | ||
( | ||
-- The ID for this set of properties | ||
id SERIAL, | ||
-- The properties in this set | ||
protocol_ids INT ARRAY NOT NULL, | ||
|
||
-- Don't allow identical sets in the database | ||
EXCLUDE USING GIST(protocol_ids WITH =), | ||
|
||
PRIMARY KEY (id) | ||
); | ||
|
||
-- Allow efficient lookups of particular protocol sets. | ||
CREATE INDEX idx_protocols_sets_protocol_ids on protocols_sets USING GIN (protocol_ids); | ||
|
||
-- A temporary table for this transaction. This table holds all sets of protocols for each visit. | ||
CREATE TEMP TABLE visits_agg_protocols ON COMMIT DROP AS ( | ||
SELECT visit_id, uniq(sort(array_agg(prot.id))) as protocols_set | ||
FROM visits_x_properties vxp | ||
INNER JOIN properties p on p.id = vxp.property_id | ||
INNER JOIN protocols prot ON p.value = prot.protocol | ||
WHERE p.property = 'protocol' | ||
GROUP BY 1); | ||
|
||
-- This temporary table holds all distinct protocol sets | ||
CREATE TEMP TABLE distinct_visits_agg_protocols ON COMMIT DROP AS ( | ||
SELECT DISTINCT protocols_set | ||
FROM visits_agg_protocols); | ||
|
||
-- Save all the distinct sets in to the protocols_sets table | ||
INSERT | ||
INTO protocols_sets (protocol_ids) | ||
SELECT distinct_visits_agg_protocols.protocols_set | ||
FROM distinct_visits_agg_protocols; | ||
|
||
-- Create a column on the visits table to associate a visit with a set of protocols | ||
ALTER TABLE visits | ||
ADD COLUMN protocols_set_id INT; | ||
|
||
-- For each visit in the visits_agg_protocols table find the associated protocol set | ||
-- then set the protocols_set_id column to that set. | ||
WITH visits_x_protocols_sets AS ( | ||
SELECT ag.visit_id AS visit_id, ps.id AS protocols_set_id | ||
FROM protocols_sets ps | ||
INNER JOIN visits_agg_protocols ag ON ag.protocols_set = ps.protocol_ids | ||
) | ||
UPDATE visits | ||
SET protocols_set_id = vxps.protocols_set_id | ||
FROM visits_x_protocols_sets vxps | ||
WHERE vxps.visit_id = visits.id; | ||
|
||
ALTER TABLE visits ADD CONSTRAINT fk_visits_protocols_set_id FOREIGN KEY (protocols_set_id) | ||
REFERENCES protocols_sets (id) | ||
ON DELETE NO ACTION; | ||
|
||
|
||
CREATE TEMP TABLE peers_agg_protocols ON COMMIT DROP AS ( | ||
SELECT peer_id, uniq(sort(array_agg(prot.id))) as protocols_set | ||
FROM peers_x_properties vxp | ||
INNER JOIN properties p on p.id = vxp.property_id | ||
INNER JOIN protocols prot ON p.value = prot.protocol | ||
WHERE p.property = 'protocol' | ||
GROUP BY 1); | ||
|
||
-- Create a column on the visits table to associate a visit with a set of protocols | ||
ALTER TABLE peers | ||
ADD COLUMN protocols_set_id INT; | ||
|
||
WITH peers_x_protocols_sets AS ( | ||
SELECT ag.peer_id AS visit_id, ps.id AS protocols_set_id | ||
FROM protocols_sets ps | ||
INNER JOIN peers_agg_protocols ag ON ag.protocols_set = ps.protocol_ids | ||
) | ||
UPDATE peers | ||
SET protocols_set_id = pxps.protocols_set_id | ||
FROM peers_x_protocols_sets pxps | ||
WHERE pxps.visit_id = peers.id; | ||
|
||
ALTER TABLE peers ADD CONSTRAINT fk_peers_protocols_set_id FOREIGN KEY (protocols_set_id) | ||
REFERENCES protocols_sets (id) | ||
ON DELETE NO ACTION; | ||
|
||
-- End the transaction | ||
COMMIT; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
-- Begin the transaction | ||
BEGIN; | ||
|
||
ALTER TABLE peers DROP CONSTRAINT fk_peers_agent_version_id; | ||
ALTER TABLE peers DROP COLUMN agent_version_id; | ||
|
||
ALTER TABLE visits DROP CONSTRAINT fk_visits_agent_version_id; | ||
ALTER TABLE visits DROP COLUMN agent_version_id; | ||
|
||
DROP TABLE agent_versions; | ||
|
||
-- End the transaction | ||
COMMIT; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
-- Begin the transaction | ||
BEGIN; | ||
|
||
-- agent_versions | ||
CREATE TABLE agent_versions | ||
( | ||
-- The ID for this agent version | ||
id SERIAL, | ||
-- When was this agent version updated the last time, used to retrieve the ID after an upsert operation | ||
updated_at TIMESTAMPTZ NOT NULL, | ||
-- When was this agent version created | ||
created_at TIMESTAMPTZ NOT NULL, | ||
|
||
-- The property name | ||
agent_version VARCHAR(1000) NOT NULL, | ||
|
||
-- There should only be one protocol | ||
CONSTRAINT uq_agent_versions_agent_version UNIQUE (agent_version), | ||
|
||
PRIMARY KEY (id) | ||
); | ||
|
||
-- migrate agent_versions | ||
INSERT INTO agent_versions (agent_version, updated_at, created_at) | ||
SELECT value, p.updated_at, p.created_at | ||
FROM properties p | ||
WHERE p.property = 'agent_version'; | ||
|
||
-- add agent version column | ||
ALTER TABLE visits | ||
ADD COLUMN agent_version_id INT; | ||
|
||
-- migrate agent versions | ||
UPDATE visits | ||
SET agent_version_id = subquery.agent_version_id | ||
FROM (SELECT visit_id, av.id as agent_version_id | ||
FROM visits_x_properties vxp | ||
INNER JOIN properties p ON p.id = vxp.property_id | ||
INNER JOIN agent_versions av ON p.value = av.agent_version | ||
WHERE p.property = 'agent_version') AS subquery | ||
WHERE visits.id = subquery.visit_id; | ||
|
||
|
||
ALTER TABLE visits | ||
ADD CONSTRAINT fk_visits_agent_version_id FOREIGN KEY (agent_version_id) | ||
REFERENCES agent_versions (id) | ||
ON DELETE NO ACTION; | ||
|
||
|
||
-- Create a column on the visits table to associate a visit with a set of protocols | ||
ALTER TABLE peers | ||
ADD COLUMN agent_version_id INT; | ||
|
||
-- migrate agent versions | ||
UPDATE peers | ||
SET agent_version_id = subquery.agent_version_id | ||
FROM (SELECT peer_id, av.id as agent_version_id | ||
FROM peers_x_properties vxp | ||
INNER JOIN properties p ON p.id = vxp.property_id | ||
INNER JOIN agent_versions av ON p.value = av.agent_version | ||
WHERE p.property = 'agent_version') AS subquery | ||
WHERE peers.id = subquery.peer_id; | ||
|
||
ALTER TABLE peers | ||
ADD CONSTRAINT fk_peers_agent_version_id FOREIGN KEY (agent_version_id) | ||
REFERENCES agent_versions (id) | ||
ON DELETE NO ACTION; | ||
|
||
-- End the transaction | ||
COMMIT; |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
-- Begin the transaction | ||
BEGIN; | ||
|
||
ALTER TABLE crawl_properties | ||
RENAME TO crawl_properties_old; | ||
|
||
CREATE TABLE crawl_properties | ||
( | ||
id SERIAL PRIMARY KEY, | ||
crawl_id SERIAL NOT NULL, | ||
protocol_id INT, | ||
agent_version_id INT, | ||
error dial_error, | ||
count INT NOT NULL, | ||
created_at TIMESTAMPTZ NOT NULL, | ||
updated_at TIMESTAMPTZ NOT NULL | ||
); | ||
|
||
ALTER TABLE crawl_properties | ||
ADD CONSTRAINT fk_crawl_properties_crawl_id | ||
FOREIGN KEY (crawl_id) | ||
REFERENCES crawls (id) | ||
ON DELETE CASCADE; | ||
|
||
ALTER TABLE crawl_properties | ||
ADD CONSTRAINT fk_crawl_properties_protocol_id | ||
FOREIGN KEY (protocol_id) | ||
REFERENCES protocols (id) | ||
ON DELETE NO ACTION; | ||
|
||
ALTER TABLE crawl_properties | ||
ADD CONSTRAINT fk_crawl_properties_agent_version_id | ||
FOREIGN KEY (agent_version_id) | ||
REFERENCES agent_versions (id) | ||
ON DELETE NO ACTION; | ||
|
||
INSERT INTO crawl_properties (crawl_id, protocol_id, count, created_at, updated_at) | ||
SELECT cp.crawl_id, prot.id protocol_id, count, cp.created_at, cp.updated_at | ||
FROM crawl_properties_old cp | ||
INNER JOIN properties p on cp.property_id = p.id | ||
INNER JOIN protocols prot on prot.protocol = p.value | ||
WHERE p.property = 'protocol'; | ||
|
||
INSERT INTO crawl_properties (crawl_id, agent_version_id, count, created_at, updated_at) | ||
SELECT cp.crawl_id, av.id agent_version_id, count, cp.created_at, cp.updated_at | ||
FROM crawl_properties_old cp | ||
INNER JOIN properties p on cp.property_id = p.id | ||
INNER JOIN agent_versions av on av.agent_version = p.value | ||
WHERE p.property = 'agent_version'; | ||
|
||
INSERT INTO crawl_properties (crawl_id, error, count, created_at, updated_at) | ||
SELECT cp.crawl_id, p.value::dial_error, count, cp.created_at, cp.updated_at | ||
FROM crawl_properties_old cp | ||
INNER JOIN properties p on cp.property_id = p.id | ||
WHERE p.property = 'error'; | ||
|
||
DROP TABLE crawl_properties_old; | ||
|
||
-- End the transaction | ||
COMMIT; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
-- can't bother |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
-- Begin the transaction | ||
BEGIN; | ||
|
||
DROP TABLE visits_x_properties; | ||
DROP TABLE peers_x_properties; | ||
DROP TABLE properties; | ||
|
||
-- End the transaction | ||
COMMIT; |
10 changes: 10 additions & 0 deletions
10
migrations/000041_create_multi_addresses_set_table.down.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
-- Begin the transaction | ||
BEGIN; | ||
|
||
ALTER TABLE visits DROP CONSTRAINT fk_visits_multi_addresses_set_id; | ||
ALTER TABLE visits DROP COLUMN multi_addresses_set_id; | ||
|
||
DROP TABLE multi_addresses_sets; | ||
|
||
-- End the transaction | ||
COMMIT; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
-- Begin the transaction | ||
BEGIN; | ||
|
||
CREATE TABLE multi_addresses_sets | ||
( | ||
-- The ID for this set of multi addresses | ||
id SERIAL, | ||
|
||
created_at TIMESTAMPTZ NOT NULL, | ||
updated_at TIMESTAMPTZ NOT NULL, | ||
|
||
-- The multi addresses in this set | ||
multi_address_ids INT ARRAY NOT NULL, | ||
|
||
-- Don't allow identical sets in the database | ||
EXCLUDE USING GIST(multi_address_ids gist__intbig_ops WITH =), | ||
|
||
PRIMARY KEY (id) | ||
); | ||
|
||
CREATE TEMP TABLE visits_agg_multi_addresses ON COMMIT DROP AS ( | ||
SELECT visit_id, uniq(sort(array_agg(multi_address_id))) as multi_address_ids | ||
FROM visits_x_multi_addresses | ||
GROUP BY 1); | ||
|
||
-- This temporary table holds all distinct protocol sets | ||
CREATE TEMP TABLE distinct_visits_agg_protocols ON COMMIT DROP AS ( | ||
SELECT DISTINCT multi_address_ids | ||
FROM visits_agg_multi_addresses); | ||
|
||
|
||
-- Create distinct sets of multi_addresses from all visits | ||
INSERT | ||
INTO multi_addresses_sets (multi_address_ids, updated_at, created_at) | ||
SELECT distinct_visits_agg_protocols.multi_address_ids, NOW(), NOW() | ||
FROM distinct_visits_agg_protocols; | ||
|
||
|
||
-- Create a column to associate a visit with a set of protocols | ||
ALTER TABLE visits | ||
ADD COLUMN multi_addresses_set_id INT; | ||
|
||
-- associate a visit with the protocol | ||
WITH visit_x_multi_addresses_set AS ( | ||
SELECT ag.visit_id AS visit_id, mas.id AS multi_addresses_set | ||
FROM multi_addresses_sets mas | ||
INNER JOIN visits_agg_multi_addresses ag ON ag.multi_address_ids = mas.multi_address_ids | ||
) | ||
UPDATE visits | ||
SET multi_addresses_set_id = vxma.multi_addresses_set | ||
FROM visit_x_multi_addresses_set vxma | ||
WHERE vxma.visit_id = visits.id; | ||
|
||
ALTER TABLE visits | ||
ADD CONSTRAINT fk_visits_multi_addresses_set_id FOREIGN KEY (multi_addresses_set_id) | ||
REFERENCES multi_addresses_sets (id) | ||
ON DELETE NO ACTION; | ||
|
||
CREATE INDEX idx_multi_addresses_sets_multi_address_ids ON multi_addresses_sets USING GIN (multi_address_ids); | ||
|
||
-- End the transaction | ||
COMMIT; |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
-- Begin the transaction | ||
BEGIN; | ||
|
||
DROP TABLE visits_x_multi_addresses; | ||
|
||
-- End the transaction | ||
COMMIT; |
Oops, something went wrong.