Skip to content
Keith Moss edited this page Jan 5, 2023 · 1 revision

Welcome to the Scremsong wiki!

Standard reporting SQL

Per X minutes reporting of tweets on election day

WITH 
	config AS (
		SELECT 
			'8 hour'::interval AS utc_diff_interval,
			to_timestamp('2019-05-18 00:00:00', 'YYYY-MM-DD HH24:MI:SS') AS timestamp_start_awst,
			to_timestamp('2019-05-18 23:59:59', 'YYYY-MM-DD HH24:MI:SS') AS timestamp_end_awst,
--			to_timestamp('2019-05-03 00:00:00', 'YYYY-MM-DD HH24:MI:SS') AS timestamp_start_awst,
--			to_timestamp('2019-05-17 23:59:59', 'YYYY-MM-DD HH24:MI:SS') AS timestamp_end_awst,
			'15 minute'::interval AS bin_by_step_interval
	),
	election_tweets AS (
		SELECT 
			*,
			COALESCE(data->'extended_tweet'->>'full_text', data->>'full_text', data->>'text') AS tweet_text,
			to_timestamp(data->>'created_at', 'Dy Mon DD HH24:MI:SS +0000 YYYY') + config.utc_diff_interval AS created_at_awst 
		FROM scremsong.app_tweets 
		CROSS JOIN config 
		WHERE to_timestamp(data->>'created_at', 'Dy Mon DD HH24:MI:SS +0000 YYYY') + config.utc_diff_interval BETWEEN config.timestamp_start_awst AND config.timestamp_end_awst
	),
	tweets AS (
--		Tweets from us
		SELECT created_at_awst FROM election_tweets WHERE data->'user'->>'screen_name' = 'DemSausage'
--		Tweets from us that got replies
--		SELECT created_at_awst FROM election_tweets WHERE data->'user'->>'screen_name' = 'DemSausage' AND tweet_id IN (SELECT data->>'in_reply_to_status_id_str' FROM election_tweets WHERE data->>'in_reply_to_status_id_str' IS NOT NULL)
--		Tweets from us that got no replies
--		SELECT created_at_awst FROM election_tweets WHERE data->'user'->>'screen_name' = 'DemSausage' AND tweet_id NOT IN (SELECT data->>'in_reply_to_status_id_str' FROM election_tweets WHERE data->>'in_reply_to_status_id_str' IS NOT NULL)
--		Tweets in reply to us
--		SELECT created_at_awst FROM election_tweets WHERE data->>'in_reply_to_screen_name' = 'DemSausage'
--		Tweets mentioning us
--		SELECT created_at_awst FROM election_tweets WHERE data->'user'->>'screen_name' != 'DemSausage' AND tweet_text ILIKE '%@DemSausage%'
--		All tweets sent to triage
--		SELECT created_at_awst, column_id FROM election_tweets WHERE column_id IS NOT NULL
--		All tweets sent to triage that got triaged
--		SELECT created_at_awst, column_id FROM election_tweets WHERE column_id IS NOT NULL AND state != 'Active'
--		All tweets ingested
--		SELECT created_at_awst FROM election_tweets
	),
	timestamp_minmax AS (
		SELECT 
			date_trunc('hour', MIN(created_at_awst) - '1 hour'::interval) AS min_timestamp, 
			date_trunc('hour', MAX(created_at_awst)) + '1 hour'::interval AS max_timestamp 
		FROM tweets
	),
	timestamp_range AS (
		SELECT 
			GREATEST(min_timestamp, config.timestamp_start_awst) AS min_timestamp,
			LEAST(max_timestamp, config.timestamp_end_awst) AS max_timestamp
		FROM timestamp_minmax
		CROSS JOIN config 
	),
	timestamp_series AS (
		SELECT 
			generate_series(min_timestamp, max_timestamp, config.bin_by_step_interval) AS interval_start 
		FROM timestamp_range
		CROSS JOIN config 
	)
-- Generate data for the reporting period
SELECT 
	COALESCE(COUNT(tweets.*), 0) AS sum_tweets, TO_CHAR(interval_start, 'HH24:MI') AS hh_mm
FROM tweets
CROSS JOIN config 
RIGHT JOIN timestamp_series ON tweets.created_at_awst BETWEEN interval_start AND interval_start + (config.bin_by_step_interval - '1 second'::interval)
GROUP BY interval_start
ORDER BY interval_start;

-- Generate tweet triage data for the reporting period
--SELECT 
--	jsonb_array_elements_text(scremsong.app_socialcolumns.search_phrases), COALESCE(COUNT(tweets.*), 0) AS sum_tweets, TO_CHAR(interval_start, 'HH24:MI') AS hh_mm
--FROM tweets
--CROSS JOIN config 
--RIGHT JOIN scremsong.app_socialcolumns ON scremsong.app_socialcolumns.id = column_id
--RIGHT JOIN timestamp_series ON tweets.created_at_awst BETWEEN interval_start AND interval_start + (config.bin_by_step_interval - '1 second'::interval)
--GROUP BY scremsong.app_socialcolumns.id, interval_start
--ORDER BY interval_start, scremsong.app_socialcolumns.priority;

-- Total tweets in reporting period
--SELECT 
--	COUNT(tweets.*) AS ttl_tweets
--FROM tweets;

Inspect tweets between two dates

WITH tweets AS (
	SELECT 
		COALESCE(data->'extended_tweet'->>'full_text', data->>'full_text', data->>'text') AS tweet_text, *
	FROM scremsong.app_tweets 
	WHERE to_timestamp(data->>'created_at', 'Dy Mon DD HH24:MI:SS +0000 YYYY') BETWEEN '2019-05-17 16:00:00+00:00' AND '2019-05-17 16:00:00+00:00'::timestamp + '1 day'::interval
)
SELECT *
FROM tweets
WHERE 
--	data->>'text' ILIKE '%@DemSausage%';
--	data->'user'->>'screen_name' = 'DemSausage' AND column_id IS NOT NULL;
--	data->'user'->>'screen_name' = 'DemSausage';
	data->>'in_reply_to_screen_name' = 'DemSausage';
--	data->'user'->>'screen_name' != 'DemSausage' AND tweet_text ILIKE '%@DemSausage%';