diff --git a/.gitignore b/.gitignore
index 0d6142f..a1f0790 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@
 google-credentials.json
 google-credentials-shared.json
 
+data/*.csv
 
 data/*/*.csv
 data/*/*.csv.gz
diff --git a/README.md b/README.md
index f64396a..eebb3da 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,8 @@ OPENAI_API_KEY="sk__________"
 
 GOOGLE_APPLICATION_CREDENTIALS="/path/to/openai-embeddings-2023/google-credentials.json"
 BUCKET_NAME="my-bucket"
+
+DATASET_ADDRESS="my_project.my_dataset"
 ```
 
 ## Usage
@@ -62,7 +64,7 @@ python -m app.openai_service
 ```
 
 
-### Dataset Loading
+### Embeddings per User (v1)
 
 Demonstrate ability to load the dataset:
 
@@ -70,8 +72,6 @@ Demonstrate ability to load the dataset:
 python -m app.dataset
 ```
 
-### Data Analysis
-
 Perform machine learning and other analyses on the data:
 
 OpenAI Embeddings:
@@ -87,6 +87,13 @@ Word2Vec Embeddings:
   + [Classification](app/word2vec_classification/README.md)
 
 
+### Embeddings per Tweet (v1)
+
+OpenAI Embeddings:
+
+  + [Fetching Embeddings](app/openai_embeddings/per_tweet/README.md)
+
+
 ## Testing
 
 ```sh
diff --git a/app/bq_service.py b/app/bq_service.py
new file mode 100644
index 0000000..2574008
--- /dev/null
+++ b/app/bq_service.py
@@ -0,0 +1,82 @@
+# https://raw.githubusercontent.com/s2t2/tweet-analysis-2023/main/app/bq_service.py
+
+import os
+from datetime import datetime
+
+from dotenv import load_dotenv
+from google.cloud import bigquery
+#from google.cloud.bigquery import QueryJobConfig, ScalarQueryParameter
+from pandas import DataFrame
+
+from app.google_apis import GOOGLE_APPLICATION_CREDENTIALS # implicit check by google.cloud
+
+load_dotenv()
+
+#GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") # implicit check by google.cloud
+
+# used by child classes only, defined here for convenience
+DATASET_ADDRESS = os.getenv("DATASET_ADDRESS", default="tweet-collector-py.impeachment_development") # "MY_PROJECT.MY_DATASET"
+
+
+class BigQueryService():
+
+    def __init__(self, client=None, dataset_address=DATASET_ADDRESS):
+        self.client = client or bigquery.Client()
+        self.dataset_address = dataset_address
+
+    def execute_query(self, sql, verbose=True):
+        if verbose == True:
+            print(sql)
+        job = self.client.query(sql)
+        return job.result()
+
+    def query_to_df(self, sql, verbose=True):
+        """high-level wrapper to return a DataFrame"""
+        results = self.execute_query(sql, verbose=verbose)
+        records = [dict(row) for row in list(results)]
+        df = DataFrame(records)
+        return df
+
+    @staticmethod
+    def split_into_batches(my_list, batch_size=10_000):
+        """Splits a list into evenly sized batches"""
+        # h/t: https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
+        for i in range(0, len(my_list), batch_size):
+            yield my_list[i : i + batch_size]
+
+    @staticmethod
+    def generate_timestamp(dt=None):
+        """Formats datetime object for storing in BigQuery. Uses current time by default. """
+        dt = dt or datetime.now()
+        return dt.strftime("%Y-%m-%d %H:%M:%S")
+
+    def insert_records_in_batches(self, table, records, batch_size=5_000):
+        """
+        Inserts records in batches because attempting to insert too many rows at once
+            may result in google.api_core.exceptions.BadRequest: 400
+
+        Params:
+            table (table ID string, Table, or TableReference)
+            records (list of dictionaries)
+        """
+        rows_to_insert = [list(d.values()) for d in records]
+        #errors = self.client.insert_rows(table, rows_to_insert)
+        errors = []
+        batches = list(BigQueryService.split_into_batches(rows_to_insert, batch_size=batch_size))
+        for batch in batches:
+            errors += self.client.insert_rows(table, batch)
+        return errors
+
+
+
+if __name__ == "__main__":
+
+    service = BigQueryService()
+    client = service.client
+    print("PROJECT:", client.project)
+
+    print("DATASETS:")
+    datasets = list(client.list_datasets())
+    for ds in datasets:
+        #print("...", ds.project, ds.dataset_id)
+        print("...", ds.reference)
diff --git a/app/google_apis.py b/app/google_apis.py
new file mode 100644
index 0000000..02ddc60
--- /dev/null
+++ b/app/google_apis.py
@@ -0,0 +1,11 @@
+
+
+
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") # implicit check by google.cloud
diff --git a/app/model_storage.py b/app/model_storage.py
index a18803f..ce153f4 100644
--- a/app/model_storage.py
+++ b/app/model_storage.py
@@ -5,9 +5,11 @@
 from google.cloud import storage as gcs
 from dotenv import load_dotenv
 
+from app.google_apis import GOOGLE_APPLICATION_CREDENTIALS # implicit check by google.cloud
+
 load_dotenv()
 
-GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") # implicit check by google.cloud for env var
+#GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") # implicit check by google.cloud for env var
 
 #PROJECT_ID = os.getenv("GOOGLE_PROJECT_NAME") # "my-project"
 BUCKET_NAME = os.getenv("BUCKET_NAME") # "my-bucket" needs to be globally unique!
diff --git a/app/openai_embeddings_v2/README.md b/app/openai_embeddings_v2/README.md
new file mode 100644
index 0000000..a25d5a3
--- /dev/null
+++ b/app/openai_embeddings_v2/README.md
@@ -0,0 +1,226 @@
+# OpenAI Embeddings (v2)
+
+Get embeddings, not only per user, but also per tweet, so we can compare the two approaches. Pull a new sample of tweets for the users we have been analyzing, but this time make sure to keep track of which tweets are being used, which will aid comparisons.
+
+## Setup
+
+Migrations, as necessary. Here we create a table of all tweets from each user in the sample:
+
+```sql
+--CREATE TABLE `tweet-collector-py.impeachment_production.botometer_sample_max_10` as (
+--CREATE TABLE `tweet-collector-py.impeachment_production.botometer_sample_max_50` as (
+CREATE TABLE `tweet-collector-py.impeachment_production.botometer_sample` as (
+    WITH ranked_tweets AS (
+      SELECT
+          u.user_id, t.status_id, t.status_text, t.created_at,
+          ROW_NUMBER() OVER (PARTITION BY u.user_id ORDER BY RAND()) AS row_num
+      FROM (
+        SELECT DISTINCT user_id
+        FROM `tweet-collector-py.impeachment_production.botometer_sample_openai_tweet_embeddings_20230724`
+      ) u
+      JOIN `tweet-collector-py.impeachment_production.tweets_v2` t on t.user_id = u.user_id
+      ORDER BY u.user_id, t.created_at
+      --LIMIT 10
+    )
+
+    SELECT user_id, row_num,
+        status_id, status_text, created_at,
+    FROM ranked_tweets
+    -- WHERE row_num <= 10 -- MAX_TWEETS_PER_USER
+    -- WHERE row_num <= 50 -- MAX_TWEETS_PER_USER
+
+);
+```
+
+
+How to sample from this table (choose a `MAX_TWEETS_PER_USER`, which we set as 50 by default):
+
+```sql
+SELECT
+  count(distinct user_id) as user_count -- 7566
+  ,count(distinct status_id) as status_count -- 183727
+FROM `tweet-collector-py.impeachment_production.botometer_sample`
+WHERE row_num <= 50 -- MAX_TWEETS_PER_USER
+```
+
+```sql
+CREATE TABLE `tweet-collector-py.impeachment_production.botometer_sample_max_50` as (
+    SELECT *
+    FROM `tweet-collector-py.impeachment_production.botometer_sample`
+    WHERE row_num <= 50
+    ORDER BY user_id, row_num
+)
+```
+
+The 7,566 users in this sample have 183,727 tweets.
+
+Unique table of texts with identifiers:
+
+```sql
+DROP TABLE IF EXISTS `tweet-collector-py.impeachment_production.botometer_sample_max_50_texts_map`;
+CREATE TABLE IF NOT EXISTS `tweet-collector-py.impeachment_production.botometer_sample_max_50_texts_map` as (
+    --WITH texts_map as (
+        SELECT --s.user_id, s.row_num, s.status_id, s.status_text, s.created_at
+            ROW_NUMBER() OVER () AS status_text_id
+            ,s.status_text
+            ,count(DISTINCT s.status_id) as status_count
+            ,array_agg(DISTINCT s.status_id) as status_ids
+            ,count(DISTINCT s.user_id) as user_count
+            ,array_agg(DISTINCT s.user_id) as user_ids
+        FROM `tweet-collector-py.impeachment_production.botometer_sample` s
+        WHERE s.row_num <= 50 -- MAX_TWEETS_PER_USER
+        GROUP BY 2
+        --ORDER BY status_count desc
+    --)
+    --SELECT status_text, status_count, status_id
+    --FROM texts_map,
+    --UNNEST(status_ids) AS status_id
+)
+```
+
+Of the 183,727 tweets in this sample, there are 80,205 unique texts.
+
+Migrate table to receive text embeddings:
+
+```sql
+CREATE TABLE IF NOT EXISTS `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_text_embeddings` (
+    status_text_id	INT64,
+    embeddings ARRAY<FLOAT64>
+)
+```
+
+Migrate table to receive user embeddings:
+
+```sql
+DROP TABLE IF EXISTS `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_user_embeddings`;
+CREATE TABLE IF NOT EXISTS `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_user_embeddings` (
+    user_id	    INT64,
+    embeddings ARRAY<FLOAT64>
+)
+```
+
+## Embeddings
+
+### User level Embeddings
+
+Fetch user-level embeddings, and store in BQ:
+
+```sh
+python -m app.openai_embeddings.per_user
+
+USERS_LIMIT=10 python -m app.openai_embeddings.per_user
+USERS_LIMIT=100 python -m app.openai_embeddings.per_user
+USERS_LIMIT=1000 python -m app.openai_embeddings.per_user
+```
+
+Monitoring the results:
+
+```sql
+SELECT
+    count(distinct s.user_id) as user_count
+    ,count(distinct case when emb.user_id is not null then s.user_id end) as users_collected
+    ,count(distinct case when emb.user_id is not null then s.user_id end) / count(distinct s.user_id) as pct_collected
+FROM `tweet-collector-py.impeachment_production.botometer_sample` s
+LEFT JOIN `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_user_embeddings`  emb
+  ON s.user_id = emb.user_id
+
+```
+
+
+### Tweet level Embeddings
+
+Fetch tweet-level embeddings, and store in BQ:
+
+```sh
+python -m app.openai_embeddings.per_tweet.embeddings_job
+
+TEXTS_LIMIT=10 python -m app.openai_embeddings.per_tweet
+TEXTS_LIMIT=1500 python -m app.openai_embeddings.per_tweet
+TEXTS_LIMIT=10000 python -m app.openai_embeddings.per_tweet
+TEXTS_LIMIT=250000 python -m app.openai_embeddings.per_tweet
+```
+
+Monitoring the results:
+
+```sql
+SELECT count(distinct status_text_id) as text_count
+FROM `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_text_embeddings`  emb
+```
+
+
+Reconstruct table of embedding per status (as they were originally fetched for each distinct text):
+
+
+```sql
+CREATE TABLE `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_status_embeddings` as (
+    WITH lookup_table as (
+        SELECT txt.status_text_id ,status_id
+        FROM `tweet-collector-py.impeachment_production.botometer_sample_max_50_texts_map` txt,
+        UNNEST(txt.status_ids) as status_id
+    )
+
+    SELECT txt.status_id, txt.status_text_id, emb.embeddings
+    FROM lookup_table txt
+    JOIN `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_text_embeddings`  emb
+      ON txt.status_text_id = emb.status_text_id
+    ORDER BY 2
+    --LIMIT 10
+
+)
+```
+
+Looks like we may have some duplicates, so update the table to remove dups:
+
+```sql
+-- SELECT status_id, count(*) as row_count
+-- FROM `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_status_embeddings`
+-- GROUP BY 1
+-- HAVING row_count > 1
+-- ORDER BY 2 DESC
+-- -- 14652 example status ids: 1212493877673779200, 1212848708171321344, 1217970948529364992
+
+-- SELECT status_id, status_text_id, count(*) as row_count
+-- FROM `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_status_embeddings`
+-- GROUP BY 1,2
+-- HAVING row_count > 1
+-- ORDER BY 2 DESC
+-- -- 14652 dups, example status ids: 1212493877673779200, 1212848708171321344, 1217970948529364992
+
+CREATE TABLE `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_status_embeddings_v2` as (
+    -- DE-DUPLICATED :-)
+    SELECT status_id, status_text_id, any_value(embeddings) as embeddings
+    FROM `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_status_embeddings`
+    GROUP BY 1,2
+)
+
+--SELECT count(distinct status_id) as status_count
+--FROM `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_status_embeddings_v2`
+---- 183727
+```
+
+Add the user-level info back to the table for convenience of future queries. Can always not select it later.
+
+```sql
+CREATE TABLE `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_status_embeddings_v3` as (
+  SELECT s.user_id, s.status_id, s.status_text, s.created_at
+    , array_length(emb.embeddings) as embeds_length
+    ,emb.embeddings
+  FROM `tweet-collector-py.impeachment_production.botometer_sample` s
+  JOIN `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_status_embeddings_v2` emb
+      ON s.status_id = emb.status_id
+  -- LIMIT 10000
+)
+
+```
+
+The contents of the embeddings alone are greater than the BQ export limit of 1GB, so we have to [export to GCS](https://cloud.google.com/bigquery/docs/exporting-data), or stream via notebook.
+
+
+
+## Exporting CSV files to Drive
+
+See [notebooks](/notebooks/openai_embeddings_v2/README.md).
+
+## Analysis
+
+See [notebooks](/notebooks/openai_embeddings_v2/README.md).
diff --git a/app/openai_embeddings_v2/per_tweet.py b/app/openai_embeddings_v2/per_tweet.py
new file mode 100644
index 0000000..55d0b52
--- /dev/null
+++ b/app/openai_embeddings_v2/per_tweet.py
@@ -0,0 +1,66 @@
+
+
+import os
+from dotenv import load_dotenv
+
+from app.bq_service import BigQueryService
+from app.openai_service import OpenAIService
+
+
+load_dotenv()
+
+TEXTS_LIMIT = os.getenv("TEXTS_LIMIT")
+
+
+if __name__ == "__main__":
+
+    bq = BigQueryService()
+    print(bq)
+    print("DATASET ADDRESS:", bq.dataset_address)
+
+    print("---------------")
+    print("TEXTS...")
+    #print("LIMIT: ", TEXTS_LIMIT)
+
+    sql = f"""
+        -- FETCH STATUSES WE HAVEN'T ALREADY RETRIEVED EMBEDDINGS FOR
+        SELECT DISTINCT txt.status_text_id, txt.status_text --, emb.status_text_id
+        FROM `{bq.dataset_address}.botometer_sample_max_50_texts_map` txt
+        LEFT JOIN  `{bq.dataset_address}.botometer_sample_max_50_openai_text_embeddings` emb
+            ON emb.status_text_id = txt.status_text_id
+        WHERE emb.status_text_id IS NULL
+        ORDER BY txt.status_text_id
+    """
+
+    if TEXTS_LIMIT:
+        texts_limit = int(TEXTS_LIMIT)
+        sql += f"    LIMIT {texts_limit} "
+
+    df = bq.query_to_df(sql)
+    print(len(df))
+    if df.empty:
+        print("NO MORE TEXTS TO PROCESS... GOODBYE!")
+        exit()
+
+    print("---------------")
+    print("EMBEDDINGS...")
+    texts = df["status_text"].tolist()
+
+    ai = OpenAIService()
+    embeddings = ai.get_embeddings_in_dynamic_batches(texts, batch_char_limit=15_000)
+    #print(len(embeddings))
+
+    df["embeddings"] = embeddings
+    records = df[["status_text_id", "embeddings"]].to_dict("records")
+
+    print("---------------")
+    print("SAVING...")
+
+    embeddings_table = bq.client.get_table(f"{bq.dataset_address}.botometer_sample_max_50_openai_text_embeddings") # API call!
+    errors = bq.insert_records_in_batches(embeddings_table, records, batch_size=50) # running into google api issues with larger batches - there are so many embeddings for each row, so we lower the batch count substantially
+    if any(errors):
+        print("ERRORS:")
+        print(errors)
+
+    print("---------------")
+    print("JOB COMPLETE!")
diff --git a/app/openai_embeddings_v2/per_user.py b/app/openai_embeddings_v2/per_user.py
new file mode 100644
index 0000000..b350c29
--- /dev/null
+++ b/app/openai_embeddings_v2/per_user.py
@@ -0,0 +1,82 @@
+
+
+import os
+from dotenv import load_dotenv
+
+from app.bq_service import BigQueryService
+from app.openai_service import OpenAIService
+
+
+load_dotenv()
+
+USERS_LIMIT = os.getenv("USERS_LIMIT")
+
+MAX_TWEETS_PER_USER = 50
+
+
+if __name__ == "__main__":
+
+    bq = BigQueryService()
+    print(bq)
+    print("DATASET ADDRESS:", bq.dataset_address)
+
+    print("---------------")
+    print("USERS...")
+    #print("LIMIT: ", USERS_LIMIT)
+
+    sql = f"""
+        -- FETCH USERS WE HAVEN'T ALREADY RETRIEVED EMBEDDINGS FOR
+
+        WITH users_sample as (
+            SELECT
+                s.user_id --,min(s.row_num) as row_min, max(s.row_num) as row_max
+                ,count(distinct s.status_id) as status_count_max_50
+                ,array_agg(distinct s.status_id) as status_ids
+                ,string_agg(distinct s.status_text, " ") as status_texts
+            FROM `{bq.dataset_address}.botometer_sample` s
+            WHERE s.row_num <= {int(MAX_TWEETS_PER_USER)}
+            GROUP BY 1
+            -- ORDER BY user_id
+        )
+
+        SELECT u.user_id, u.status_count_max_50, u.status_ids, u.status_texts
+        FROM users_sample u
+        LEFT JOIN  `{bq.dataset_address}.botometer_sample_max_50_openai_user_embeddings` emb
+            ON u.user_id = emb.user_id
+        WHERE emb.user_id IS NULL
+        ORDER BY u.user_id
+    """
+
+    if USERS_LIMIT:
+        users_limit = int(USERS_LIMIT)
+        sql += f"    LIMIT {users_limit} "
+
+    df = bq.query_to_df(sql)
+    print(len(df))
+    if df.empty:
+        print("NO MORE USERS TO PROCESS... GOODBYE!")
+        exit()
+
+    print("---------------")
+    print("EMBEDDINGS...")
+    texts = df["status_texts"].tolist()
+
+    ai = OpenAIService()
+    embeddings = ai.get_embeddings_in_dynamic_batches(texts, batch_char_limit=15_000)
+    #print(len(embeddings))
+
+    df["embeddings"] = embeddings
+    records = df[["user_id", "embeddings"]].to_dict("records")
+
+    print("---------------")
+    print("SAVING...")
+
+    embeddings_table_name = f"{bq.dataset_address}.botometer_sample_max_50_openai_user_embeddings"
+    embeddings_table = bq.client.get_table(embeddings_table_name) # API call!
+    errors = bq.insert_records_in_batches(embeddings_table, records, batch_size=50) # running into google api issues with larger batches - there are so many embeddings for each row, so we lower the batch count substantially
+    if any(errors):
+        print("ERRORS:")
+        print(errors)
+
+    print("---------------")
+    print("JOB COMPLETE!")
diff --git a/app/openai_service.py b/app/openai_service.py
index 7f4a94c..79446fb 100644
--- a/app/openai_service.py
+++ b/app/openai_service.py
@@ -186,7 +186,7 @@ def get_embeddings_in_dynamic_batches(self, texts, batch_char_limit=30_000, slee
         embeddings = []
         counter = 1
         for texts_batch in dynamic_batches(texts, batch_char_limit=batch_char_limit):
-            print(counter, len(texts_batch))
+            print("BATCH:", counter, "SIZE:", len(texts_batch))
             # retry loop
             while True:
                 try:
@@ -197,6 +197,11 @@ def get_embeddings_in_dynamic_batches(self, texts, batch_char_limit=30_000, slee
                     print(f"... Rate limit reached. Sleeping for {sleep_seconds} seconds.")
                     sleep(sleep_seconds)
                     # retry the same batch
+                except openai.error.ServiceUnavailableError as err:
+                    print(f"... Service Unavailz. Sleeping for {sleep_seconds} seconds.")
+                    print(err)
+                    sleep(sleep_seconds)
+                    # retry the same batch
             counter += 1
         return embeddings
 
diff --git a/notebooks/1_Botometer_Users_Sample_and_OpenAI_Embeddings_20230704.ipynb b/notebooks/openai_embeddings_v1/1_Botometer_Users_Sample_and_OpenAI_Embeddings_20230704.ipynb
similarity index 100%
rename from notebooks/1_Botometer_Users_Sample_and_OpenAI_Embeddings_20230704.ipynb
rename to notebooks/openai_embeddings_v1/1_Botometer_Users_Sample_and_OpenAI_Embeddings_20230704.ipynb
diff --git a/notebooks/1_botometer_users_sample_and_openai_embeddings_20230704.py b/notebooks/openai_embeddings_v1/1_botometer_users_sample_and_openai_embeddings_20230704.py
similarity index 100%
rename from notebooks/1_botometer_users_sample_and_openai_embeddings_20230704.py
rename to notebooks/openai_embeddings_v1/1_botometer_users_sample_and_openai_embeddings_20230704.py
diff --git a/notebooks/2_Embeddings_Data_Export.ipynb b/notebooks/openai_embeddings_v1/2_Embeddings_Data_Export.ipynb
similarity index 100%
rename from notebooks/2_Embeddings_Data_Export.ipynb
rename to notebooks/openai_embeddings_v1/2_Embeddings_Data_Export.ipynb
diff --git a/notebooks/2_embeddings_data_export.py b/notebooks/openai_embeddings_v1/2_embeddings_data_export.py
similarity index 100%
rename from notebooks/2_embeddings_data_export.py
rename to notebooks/openai_embeddings_v1/2_embeddings_data_export.py
diff --git a/notebooks/3_Merging_Remaining_BOM_Scores.ipynb b/notebooks/openai_embeddings_v1/3_Merging_Remaining_BOM_Scores.ipynb
similarity index 100%
rename from notebooks/3_Merging_Remaining_BOM_Scores.ipynb
rename to notebooks/openai_embeddings_v1/3_Merging_Remaining_BOM_Scores.ipynb
diff --git a/notebooks/3_merging_remaining_bom_scores.py b/notebooks/openai_embeddings_v1/3_merging_remaining_bom_scores.py
similarity index 100%
rename from notebooks/3_merging_remaining_bom_scores.py
rename to notebooks/openai_embeddings_v1/3_merging_remaining_bom_scores.py
diff --git a/notebooks/Analysis_Single_Results_File_v4.ipynb b/notebooks/openai_embeddings_v1/Analysis_Single_Results_File_v4.ipynb
similarity index 100%
rename from notebooks/Analysis_Single_Results_File_v4.ipynb
rename to notebooks/openai_embeddings_v1/Analysis_Single_Results_File_v4.ipynb
diff --git a/notebooks/README.md b/notebooks/openai_embeddings_v1/README.md
similarity index 99%
rename from notebooks/README.md
rename to notebooks/openai_embeddings_v1/README.md
index 3db7bf6..af78fdf 100644
--- a/notebooks/README.md
+++ b/notebooks/openai_embeddings_v1/README.md
@@ -4,7 +4,7 @@
 
 
 
-# Notebooks and Code
+# Notebooks and Code (v1)
 
 This section provides a walk-through of the methods, with working code for reference. The process starts with three Python notebooks and follows up with Python scripts in a larger code repository.
 
diff --git a/notebooks/analysis_single_results_file/bars_fourway_label.png b/notebooks/openai_embeddings_v1/analysis_single_results_file/bars_fourway_label.png
similarity index 100%
rename from notebooks/analysis_single_results_file/bars_fourway_label.png
rename to notebooks/openai_embeddings_v1/analysis_single_results_file/bars_fourway_label.png
diff --git a/notebooks/analysis_single_results_file/bars_is_bom_astroturf.png b/notebooks/openai_embeddings_v1/analysis_single_results_file/bars_is_bom_astroturf.png
similarity index 100%
rename from notebooks/analysis_single_results_file/bars_is_bom_astroturf.png
rename to notebooks/openai_embeddings_v1/analysis_single_results_file/bars_is_bom_astroturf.png
diff --git a/notebooks/analysis_single_results_file/bars_is_bot.png b/notebooks/openai_embeddings_v1/analysis_single_results_file/bars_is_bot.png
similarity index 100%
rename from notebooks/analysis_single_results_file/bars_is_bot.png
rename to notebooks/openai_embeddings_v1/analysis_single_results_file/bars_is_bot.png
diff --git a/notebooks/analysis_single_results_file/bars_is_factual.png b/notebooks/openai_embeddings_v1/analysis_single_results_file/bars_is_factual.png
similarity index 100%
rename from notebooks/analysis_single_results_file/bars_is_factual.png
rename to notebooks/openai_embeddings_v1/analysis_single_results_file/bars_is_factual.png
diff --git a/notebooks/analysis_single_results_file/bars_is_toxic.png b/notebooks/openai_embeddings_v1/analysis_single_results_file/bars_is_toxic.png
similarity index 100%
rename from notebooks/analysis_single_results_file/bars_is_toxic.png
rename to notebooks/openai_embeddings_v1/analysis_single_results_file/bars_is_toxic.png
diff --git a/notebooks/analysis_single_results_file/bars_opinion_community.png b/notebooks/openai_embeddings_v1/analysis_single_results_file/bars_opinion_community.png
similarity index 100%
rename from notebooks/analysis_single_results_file/bars_opinion_community.png
rename to notebooks/openai_embeddings_v1/analysis_single_results_file/bars_opinion_community.png
diff --git a/notebooks/analysis_single_results_file/dumbbells_all.png b/notebooks/openai_embeddings_v1/analysis_single_results_file/dumbbells_all.png
similarity index 100%
rename from notebooks/analysis_single_results_file/dumbbells_all.png
rename to notebooks/openai_embeddings_v1/analysis_single_results_file/dumbbells_all.png
diff --git a/notebooks/analysis_single_results_file_v4.py b/notebooks/openai_embeddings_v1/analysis_single_results_file_v4.py
similarity index 100%
rename from notebooks/analysis_single_results_file_v4.py
rename to notebooks/openai_embeddings_v1/analysis_single_results_file_v4.py
diff --git a/notebooks/openai_embeddings_v2/Exporting_Embeddings_to_Drive_20240201_v3.ipynb b/notebooks/openai_embeddings_v2/Exporting_Embeddings_to_Drive_20240201_v3.ipynb
new file mode 100644
index 0000000..4d3e7ce
--- /dev/null
+++ b/notebooks/openai_embeddings_v2/Exporting_Embeddings_to_Drive_20240201_v3.ipynb
@@ -0,0 +1,3596 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "collapsed_sections": [
+        "FF154lGK_1N6",
+        "EuDR5mjnq3fV"
+      ],
+      "toc_visible": true,
+      "machine_shape": "hm"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "a9534258cd5d4015abe53b5cc42bea56": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_ac0e74c8f58e49c6bb3fd4c1c1896f88",
+              "IPY_MODEL_adb6537bd87a414bb31ea9b94a5cea32",
+              "IPY_MODEL_81f254cc026e4d00a32635eb396c9ddf"
+            ],
+            "layout": "IPY_MODEL_039ed011d14044338c28dce8b1d5e4c4"
+          }
+        },
+        "ac0e74c8f58e49c6bb3fd4c1c1896f88": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d0dc7020e8d14f468b3492c069634a45",
+            "placeholder": "​",
+            "style": "IPY_MODEL_322b984ee24e4027a9c2e08d862434b3",
+            "value": "Downloading: 100%"
+          }
+        },
+        "adb6537bd87a414bb31ea9b94a5cea32": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_8992d57e6b884e38bdcbe229b021cf1d",
+            "max": 1,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_218cf7dbd973421e9bdcbfade62433b5",
+            "value": 1
+          }
+        },
+        "81f254cc026e4d00a32635eb396c9ddf": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d33ad5423db544268200ce28b0736a0b",
+            "placeholder": "​",
+            "style": "IPY_MODEL_aa4ab83e43404f3181111e4fafb2d586",
+            "value": ""
+          }
+        },
+        "039ed011d14044338c28dce8b1d5e4c4": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d0dc7020e8d14f468b3492c069634a45": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "322b984ee24e4027a9c2e08d862434b3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "8992d57e6b884e38bdcbe229b021cf1d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "218cf7dbd973421e9bdcbfade62433b5": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "d33ad5423db544268200ce28b0736a0b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "aa4ab83e43404f3181111e4fafb2d586": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "39f9a290d00b42a2b880aa81eec76fa6": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_25dfedf9a54843bb80caaeed5ab6db34",
+              "IPY_MODEL_a7ff0760fb244fb7a4903b85c7bc7ad6",
+              "IPY_MODEL_56a8b786d34e4742a2450df0be1dbae8"
+            ],
+            "layout": "IPY_MODEL_d4b9ca844460431fb72287a7356486f6"
+          }
+        },
+        "25dfedf9a54843bb80caaeed5ab6db34": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_6b7a4772df3c46cc9be56e1fe11a5e88",
+            "placeholder": "​",
+            "style": "IPY_MODEL_efdd95d3dd5b4cedba40aa01b6cb94b9",
+            "value": "Downloading: 100%"
+          }
+        },
+        "a7ff0760fb244fb7a4903b85c7bc7ad6": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_cfd1c20bbcbd4c62bf430d7c7f4624e5",
+            "max": 183815,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_e265dcb3e51c49d3ad32c8dbf73cdf30",
+            "value": 183815
+          }
+        },
+        "56a8b786d34e4742a2450df0be1dbae8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_f238e606a590433b8db7da9c314b3f00",
+            "placeholder": "​",
+            "style": "IPY_MODEL_f65e9b709410414eae7a36a53527de65",
+            "value": ""
+          }
+        },
+        "d4b9ca844460431fb72287a7356486f6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "6b7a4772df3c46cc9be56e1fe11a5e88": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "efdd95d3dd5b4cedba40aa01b6cb94b9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "cfd1c20bbcbd4c62bf430d7c7f4624e5": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e265dcb3e51c49d3ad32c8dbf73cdf30": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "f238e606a590433b8db7da9c314b3f00": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f65e9b709410414eae7a36a53527de65": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_-39w0IS18f-"
+      },
+      "source": [
+        "We fetched OpenAI embeddings and stored on BQ. Let's download a CSV file to drive for further analysis."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ymoi-E5OjZD5"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Google Drive"
+      ],
+      "metadata": {
+        "id": "FF154lGK_1N6"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "from google.colab import drive\n",
+        "\n",
+        "drive.mount('/content/drive')\n",
+        "print(os.getcwd(), os.listdir(os.getcwd()))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "i_eMkJ5fpKDp",
+        "outputId": "8b5deedd-9b30-499d-a9d4-43cb37d83864"
+      },
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n",
+            "/content ['.config', 'drive', 'sample_data']\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "LNuZpKWOGmFZ"
+      },
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "5OKjyFQ0owen",
+        "outputId": "82bef74a-d6e3-410a-d066-d4dd791c5e25"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "/content/drive/MyDrive/Research/DS Research Shared 2024\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 2
+        }
+      ],
+      "source": [
+        "# you might need to create a google drive SHORTCUT that has this same path\n",
+        "# ... or update the path to use your own google drive organization\n",
+        "#DIRPATH = '/content/drive/MyDrive/Research/Disinfo Research Shared 2022'\n",
+        "#DIRPATH = '/content/drive/MyDrive/Research/DS Research Shared 2023'\n",
+        "DIRPATH = '/content/drive/MyDrive/Research/DS Research Shared 2024'\n",
+        "\n",
+        "print(DIRPATH)\n",
+        "os.path.isdir(DIRPATH)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "DATA_DIRPATH = os.path.join(DIRPATH, \"projects\", \"Impeachment 2020 Embeddings\", \"data\")\n",
+        "os.path.isdir(DATA_DIRPATH)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jjkYs5KJ99LX",
+        "outputId": "667d345f-a72e-4631-d555-5deaa2b89277"
+      },
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D7AHRh645FX3"
+      },
+      "source": [
+        "### BigQuery Service"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import auth\n",
+        "\n",
+        "# asks you to login\n",
+        "auth.authenticate_user()"
+      ],
+      "metadata": {
+        "id": "rfJKRImngZAw"
+      },
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "yOz8eD9JkA7-"
+      },
+      "source": [
+        "from google.cloud import bigquery\n",
+        "from pandas import DataFrame, read_gbq\n",
+        "\n",
+        "\n",
+        "PROJECT_ID = \"tweet-collector-py\"\n",
+        "\n",
+        "class BigQueryService():\n",
+        "    def __init__(self, project_id=PROJECT_ID):\n",
+        "        self.project_id = project_id\n",
+        "        self.client = bigquery.Client(project=self.project_id)\n",
+        "\n",
+        "    def execute_query(self, sql, verbose=True):\n",
+        "        if verbose == True:\n",
+        "            print(sql)\n",
+        "        job = self.client.query(sql)\n",
+        "        return job.result()\n",
+        "\n",
+        "    #def query_to_df(self, sql, verbose=True):\n",
+        "    #    \"\"\"high-level wrapper to return a DataFrame\"\"\"\n",
+        "    #    results = self.execute_query(sql, verbose=verbose)\n",
+        "    #    return DataFrame([dict(row) for row in results])\n",
+        "\n",
+        "    def query_to_df(self, sql, verbose=True):\n",
+        "        \"\"\"high-level wrapper to return a DataFrame\"\"\"\n",
+        "        if verbose == True:\n",
+        "            print(sql)\n",
+        "        # https://pandas.pydata.org/docs/reference/api/pandas.read_gbq.html#pandas-read-gbq\n",
+        "        #return read_gbq(sql, project_id=self.project_id) # progress_bar_type=\"tqdm_notebook\"\n",
+        "        #progress_bar_type=\"tqdm_notebook\"\n",
+        "        return read_gbq(sql, project_id=self.project_id, progress_bar_type=\"tqdm_notebook\")\n",
+        "\n",
+        "\n"
+      ],
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-qBZo9ezksZz",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "9129e2b5-6b1b-4866-c1b3-7c65f4e338e3"
+      },
+      "source": [
+        "bq = BigQueryService()\n",
+        "print(bq)"
+      ],
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "<__main__.BigQueryService object at 0x785284479510>\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(\"DATASETS:\")\n",
+        "datasets = list(bq.client.list_datasets())\n",
+        "for ds in datasets:\n",
+        "    #print(\"...\", ds.project, ds.dataset_id)\n",
+        "    print(\"...\", ds.reference)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "RZU3pXwscjGG",
+        "outputId": "56343dd5-b460-4245-ea47-dae3d7e32422"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "DATASETS:\n",
+            "... tweet-collector-py.analysis_2021\n",
+            "... tweet-collector-py.analysis_2021_development\n",
+            "... tweet-collector-py.collection_2021\n",
+            "... tweet-collector-py.disinfo_2021_development\n",
+            "... tweet-collector-py.disinfo_2021_production\n",
+            "... tweet-collector-py.election_2020_analysis\n",
+            "... tweet-collector-py.election_2020_development\n",
+            "... tweet-collector-py.election_2020_production\n",
+            "... tweet-collector-py.f1_racing_2023_development\n",
+            "... tweet-collector-py.f1_racing_2023_production\n",
+            "... tweet-collector-py.impeachment_2021_development\n",
+            "... tweet-collector-py.impeachment_2021_production\n",
+            "... tweet-collector-py.impeachment_backup\n",
+            "... tweet-collector-py.impeachment_development\n",
+            "... tweet-collector-py.impeachment_production\n",
+            "... tweet-collector-py.impeachment_test\n",
+            "... tweet-collector-py.jan6_committee_development\n",
+            "... tweet-collector-py.jan6_committee_production\n",
+            "... tweet-collector-py.transition_2021_development\n",
+            "... tweet-collector-py.transition_2021_production\n",
+            "... tweet-collector-py.truth_2023_development\n",
+            "... tweet-collector-py.truth_2023_production\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Helper Functions"
+      ],
+      "metadata": {
+        "id": "P29oXQGyVQys"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Unpacking Embeddings"
+      ],
+      "metadata": {
+        "id": "ynQMv14-qE9O"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import json\n",
+        "from pandas import DataFrame\n",
+        "\n",
+        "\n",
+        "def unpack(embeddings_str):\n",
+        "    \"\"\"Takes a string value containing an array of OpenAI embeddings,\n",
+        "        and returns a list of floats.\n",
+        "    \"\"\"\n",
+        "    if isinstance(embeddings_str, str):\n",
+        "        return json.loads(embeddings_str)\n",
+        "    else:\n",
+        "        return embeddings_str\n",
+        "\n",
+        "\n",
+        "def unpacked(df, col_prefix=\"openai\"):\n",
+        "    \"\"\"Takes a dataframe witha single column of OpenAI embeddings,\n",
+        "        and unpacks them into their own separate columns,\n",
+        "        and returns a modified version of the original dataframe,\n",
+        "        with the original embeddings column replaced by the new unpacked columns\n",
+        "    \"\"\"\n",
+        "\n",
+        "    print(\"UNPACKING...\")\n",
+        "    embeds = df[\"embeddings\"].apply(unpack)\n",
+        "    print(type(embeds))\n",
+        "\n",
+        "    print(\"RECONSTRUCTING...\")\n",
+        "    embeds = DataFrame(embeds.values.tolist())\n",
+        "    embeds.columns = [f\"{col_prefix}_{col}\" for col in embeds.columns]\n",
+        "    embeds.index = df.index\n",
+        "    print(embeds.shape)\n",
+        "    #embeds.head()\n",
+        "\n",
+        "    print(\"MERGING...\")\n",
+        "    df_unpacked = df.merge(embeds, left_index=True, right_index=True)\n",
+        "    df_unpacked.drop(columns=[\"embeddings\"], inplace=True)\n",
+        "    print(df_unpacked.shape)\n",
+        "    return df_unpacked\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "wgjELMJBVTaa"
+      },
+      "execution_count": 30,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Embeddings"
+      ],
+      "metadata": {
+        "id": "OQulQbvbFpHo"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "DATASET_ADDRESS = \"tweet-collector-py.impeachment_production\""
+      ],
+      "metadata": {
+        "id": "tkyUzzQsCFRN"
+      },
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sql = f\"\"\"\n",
+        "    SELECT\n",
+        "        count(distinct s.user_id) as user_count\n",
+        "        ,count(distinct s.status_id) as status_count\n",
+        "    FROM `{DATASET_ADDRESS}.botometer_sample` s\n",
+        "    JOIN `{DATASET_ADDRESS}.botometer_sample_max_50_openai_status_embeddings_v2` emb\n",
+        "        ON s.status_id = emb.status_id\n",
+        "\"\"\"\n",
+        "bq.query_to_df(sql, verbose=False)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 113,
+          "referenced_widgets": [
+            "a9534258cd5d4015abe53b5cc42bea56",
+            "ac0e74c8f58e49c6bb3fd4c1c1896f88",
+            "adb6537bd87a414bb31ea9b94a5cea32",
+            "81f254cc026e4d00a32635eb396c9ddf",
+            "039ed011d14044338c28dce8b1d5e4c4",
+            "d0dc7020e8d14f468b3492c069634a45",
+            "322b984ee24e4027a9c2e08d862434b3",
+            "8992d57e6b884e38bdcbe229b021cf1d",
+            "218cf7dbd973421e9bdcbfade62433b5",
+            "d33ad5423db544268200ce28b0736a0b",
+            "aa4ab83e43404f3181111e4fafb2d586"
+          ]
+        },
+        "id": "lVEKM1LiB4i2",
+        "outputId": "a5fd9b78-0b3c-431d-c02e-ed95da064244"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Downloading:   0%|          |"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "a9534258cd5d4015abe53b5cc42bea56"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "   user_count  status_count\n",
+              "0        7566        183727"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-5ca7e3dd-68a4-48ee-9d93-2d21686f115c\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>user_count</th>\n",
+              "      <th>status_count</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>7566</td>\n",
+              "      <td>183727</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5ca7e3dd-68a4-48ee-9d93-2d21686f115c')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-5ca7e3dd-68a4-48ee-9d93-2d21686f115c button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-5ca7e3dd-68a4-48ee-9d93-2d21686f115c');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 9
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## User Embeddings"
+      ],
+      "metadata": {
+        "id": "TJUWWC48HcGk"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "7566 users"
+      ],
+      "metadata": {
+        "id": "CGpJ-kDaHfi5"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sql = f\"\"\"\n",
+        "     SELECT\n",
+        "        u.user_id, u.created_on\n",
+        "        --, u.screen_name_count, u.screen_names, split(u.screen_names, \",\")[0] as screen_name\n",
+        "        ,u.status_count, u.rt_count\n",
+        "        ,u.is_bot --, u.bot_rt_network\n",
+        "        ,u.opinion_community --, u.avg_score_lr, avg_score_nb, avg_score_bert\n",
+        "        , u.is_q --, u.q_status_count\n",
+        "        --, u.follower_count, u.follower_count_b, u.follower_count_h\n",
+        "        --, u.friend_count, u.friend_count_b, u.friend_count_h\n",
+        "\n",
+        "        ,u.avg_toxicity --, u.avg_severe_toxicity, u.avg_insult, u.avg_obscene, u.avg_threat, u.avg_identity_hate\n",
+        "        , u.avg_fact_score -- ,u.fact_scored_count\n",
+        "\n",
+        "        ,u.bom_astroturf, u.bom_overall --, u.bom_cap  --,u.bom_lookup_count\n",
+        "        --,u.bom_fake_follower, u.bom_financial, u.bom_other, u.bom_self_declared, u.bom_spammer\n",
+        "\n",
+        "        ,emb.embeddings\n",
+        "\n",
+        "    FROM `{DATASET_ADDRESS}.user_details_v20240128_slim` u\n",
+        "    JOIN `{DATASET_ADDRESS}.botometer_sample_max_50_openai_user_embeddings` emb\n",
+        "        ON emb.user_id = u.user_id\n",
+        "    -- LIMIT 10\n",
+        "\"\"\"\n",
+        "\n",
+        "users_df = bq.query_to_df(sql, verbose=False)\n",
+        "print(users_df.shape)"
+      ],
+      "metadata": {
+        "id": "KoCuPC6FHdoZ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df.head()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 313
+        },
+        "id": "yzm8dWf_Xm3N",
+        "outputId": "36d24c1b-0aae-48e8-f4cc-4a4e8c216fbf"
+      },
+      "execution_count": 20,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "              user_id  created_on  status_count  rt_count  is_bot  \\\n",
+              "0          3420436216  2015-08-13           555       540    True   \n",
+              "1           108121958  2010-01-24             2         2   False   \n",
+              "2          3038308638  2015-02-23           755       665    True   \n",
+              "3           332396536  2011-07-09           951       951    True   \n",
+              "4  955082522479808512  2018-01-21           570       533    True   \n",
+              "\n",
+              "   opinion_community   is_q  avg_toxicity  avg_fact_score  bom_astroturf  \\\n",
+              "0                  0  False      0.056113        1.983193          0.295   \n",
+              "1                  0  False      0.456710             NaN          0.580   \n",
+              "2                  0  False      0.069860        3.401786          0.970   \n",
+              "3                  1  False      0.044264        2.304511          0.580   \n",
+              "4                  0  False      0.049325        4.714286          0.355   \n",
+              "\n",
+              "   bom_overall                                         embeddings  \n",
+              "0        0.190  [-0.018801862373948097, -0.007904230616986752,...  \n",
+              "1        0.110  [-0.030551623553037643, -0.0053298575803637505...  \n",
+              "2        0.970  [-0.007297390140593052, 0.0010276929242536426,...  \n",
+              "3        0.750  [-0.01834747940301895, -0.007322159130126238, ...  \n",
+              "4        0.225  [-0.024803657084703445, 0.007516898214817047, ...  "
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-16d5066e-7a04-4b55-99c5-432d2289ab84\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>user_id</th>\n",
+              "      <th>created_on</th>\n",
+              "      <th>status_count</th>\n",
+              "      <th>rt_count</th>\n",
+              "      <th>is_bot</th>\n",
+              "      <th>opinion_community</th>\n",
+              "      <th>is_q</th>\n",
+              "      <th>avg_toxicity</th>\n",
+              "      <th>avg_fact_score</th>\n",
+              "      <th>bom_astroturf</th>\n",
+              "      <th>bom_overall</th>\n",
+              "      <th>embeddings</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>3420436216</td>\n",
+              "      <td>2015-08-13</td>\n",
+              "      <td>555</td>\n",
+              "      <td>540</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.056113</td>\n",
+              "      <td>1.983193</td>\n",
+              "      <td>0.295</td>\n",
+              "      <td>0.190</td>\n",
+              "      <td>[-0.018801862373948097, -0.007904230616986752,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>108121958</td>\n",
+              "      <td>2010-01-24</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.456710</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>0.580</td>\n",
+              "      <td>0.110</td>\n",
+              "      <td>[-0.030551623553037643, -0.0053298575803637505...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3038308638</td>\n",
+              "      <td>2015-02-23</td>\n",
+              "      <td>755</td>\n",
+              "      <td>665</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.069860</td>\n",
+              "      <td>3.401786</td>\n",
+              "      <td>0.970</td>\n",
+              "      <td>0.970</td>\n",
+              "      <td>[-0.007297390140593052, 0.0010276929242536426,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>332396536</td>\n",
+              "      <td>2011-07-09</td>\n",
+              "      <td>951</td>\n",
+              "      <td>951</td>\n",
+              "      <td>True</td>\n",
+              "      <td>1</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.044264</td>\n",
+              "      <td>2.304511</td>\n",
+              "      <td>0.580</td>\n",
+              "      <td>0.750</td>\n",
+              "      <td>[-0.01834747940301895, -0.007322159130126238, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>955082522479808512</td>\n",
+              "      <td>2018-01-21</td>\n",
+              "      <td>570</td>\n",
+              "      <td>533</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.049325</td>\n",
+              "      <td>4.714286</td>\n",
+              "      <td>0.355</td>\n",
+              "      <td>0.225</td>\n",
+              "      <td>[-0.024803657084703445, 0.007516898214817047, ...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-16d5066e-7a04-4b55-99c5-432d2289ab84')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-16d5066e-7a04-4b55-99c5-432d2289ab84 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-16d5066e-7a04-4b55-99c5-432d2289ab84');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-1a9a61b7-e4fb-48ad-8ddb-b9596694b58e\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-1a9a61b7-e4fb-48ad-8ddb-b9596694b58e')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-1a9a61b7-e4fb-48ad-8ddb-b9596694b58e button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 20
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Saving CSV to drive:"
+      ],
+      "metadata": {
+        "id": "1TYFGOn7Ow-P"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "csv_filepath = os.path.join(DATA_DIRPATH, \"botometer_sample_max_50_openai_user_embeddings.csv.gz\")\n",
+        "users_df.to_csv(csv_filepath, index=False, compression=\"gzip\")"
+      ],
+      "metadata": {
+        "id": "V5m_ZmDFHeLx"
+      },
+      "execution_count": 21,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### ... Unpacked"
+      ],
+      "metadata": {
+        "id": "D0A_V2nXWoIm"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df_unpacked = unpacked(users_df)\n",
+        "print(users_df.shape)\n",
+        "users_df_unpacked.head()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 359
+        },
+        "id": "sucTnQwdW6vH",
+        "outputId": "cc8eac4c-8a73-4f6c-da63-5ab167763510"
+      },
+      "execution_count": 31,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "UNPACKING...\n",
+            "<class 'pandas.core.series.Series'>\n",
+            "RECONSTRUCTING...\n",
+            "(7566, 1536)\n",
+            "MERGING...\n",
+            "(7566, 1547)\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "              user_id  created_on  status_count  rt_count  is_bot  \\\n",
+              "0          3420436216  2015-08-13           555       540    True   \n",
+              "1           108121958  2010-01-24             2         2   False   \n",
+              "2          3038308638  2015-02-23           755       665    True   \n",
+              "3           332396536  2011-07-09           951       951    True   \n",
+              "4  955082522479808512  2018-01-21           570       533    True   \n",
+              "\n",
+              "   opinion_community   is_q  avg_toxicity  avg_fact_score  bom_astroturf  ...  \\\n",
+              "0                  0  False      0.056113        1.983193          0.295  ...   \n",
+              "1                  0  False      0.456710             NaN          0.580  ...   \n",
+              "2                  0  False      0.069860        3.401786          0.970  ...   \n",
+              "3                  1  False      0.044264        2.304511          0.580  ...   \n",
+              "4                  0  False      0.049325        4.714286          0.355  ...   \n",
+              "\n",
+              "   openai_1526  openai_1527  openai_1528  openai_1529  openai_1530  \\\n",
+              "0    -0.001867    -0.013167     0.020885    -0.022568    -0.033631   \n",
+              "1     0.017651    -0.009439     0.024375    -0.032553    -0.042185   \n",
+              "2    -0.026273    -0.008139     0.030285    -0.029902    -0.030887   \n",
+              "3    -0.005520    -0.005288     0.017071    -0.033637    -0.040202   \n",
+              "4     0.009959     0.004695     0.005555    -0.012851    -0.032229   \n",
+              "\n",
+              "   openai_1531  openai_1532  openai_1533  openai_1534  openai_1535  \n",
+              "0     0.016153     0.024127    -0.017519     0.002636    -0.039838  \n",
+              "1     0.013782     0.011320    -0.014862    -0.010413    -0.020359  \n",
+              "2     0.022481    -0.005476    -0.016279    -0.010138    -0.021454  \n",
+              "3     0.041773    -0.009370     0.003352     0.009391    -0.042671  \n",
+              "4     0.031443     0.008163    -0.018501    -0.008724    -0.042027  \n",
+              "\n",
+              "[5 rows x 1547 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-92ad1d30-2cdf-4bda-b71b-62fce49ac019\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>user_id</th>\n",
+              "      <th>created_on</th>\n",
+              "      <th>status_count</th>\n",
+              "      <th>rt_count</th>\n",
+              "      <th>is_bot</th>\n",
+              "      <th>opinion_community</th>\n",
+              "      <th>is_q</th>\n",
+              "      <th>avg_toxicity</th>\n",
+              "      <th>avg_fact_score</th>\n",
+              "      <th>bom_astroturf</th>\n",
+              "      <th>...</th>\n",
+              "      <th>openai_1526</th>\n",
+              "      <th>openai_1527</th>\n",
+              "      <th>openai_1528</th>\n",
+              "      <th>openai_1529</th>\n",
+              "      <th>openai_1530</th>\n",
+              "      <th>openai_1531</th>\n",
+              "      <th>openai_1532</th>\n",
+              "      <th>openai_1533</th>\n",
+              "      <th>openai_1534</th>\n",
+              "      <th>openai_1535</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>3420436216</td>\n",
+              "      <td>2015-08-13</td>\n",
+              "      <td>555</td>\n",
+              "      <td>540</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.056113</td>\n",
+              "      <td>1.983193</td>\n",
+              "      <td>0.295</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.001867</td>\n",
+              "      <td>-0.013167</td>\n",
+              "      <td>0.020885</td>\n",
+              "      <td>-0.022568</td>\n",
+              "      <td>-0.033631</td>\n",
+              "      <td>0.016153</td>\n",
+              "      <td>0.024127</td>\n",
+              "      <td>-0.017519</td>\n",
+              "      <td>0.002636</td>\n",
+              "      <td>-0.039838</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>108121958</td>\n",
+              "      <td>2010-01-24</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.456710</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>0.580</td>\n",
+              "      <td>...</td>\n",
+              "      <td>0.017651</td>\n",
+              "      <td>-0.009439</td>\n",
+              "      <td>0.024375</td>\n",
+              "      <td>-0.032553</td>\n",
+              "      <td>-0.042185</td>\n",
+              "      <td>0.013782</td>\n",
+              "      <td>0.011320</td>\n",
+              "      <td>-0.014862</td>\n",
+              "      <td>-0.010413</td>\n",
+              "      <td>-0.020359</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3038308638</td>\n",
+              "      <td>2015-02-23</td>\n",
+              "      <td>755</td>\n",
+              "      <td>665</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.069860</td>\n",
+              "      <td>3.401786</td>\n",
+              "      <td>0.970</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.026273</td>\n",
+              "      <td>-0.008139</td>\n",
+              "      <td>0.030285</td>\n",
+              "      <td>-0.029902</td>\n",
+              "      <td>-0.030887</td>\n",
+              "      <td>0.022481</td>\n",
+              "      <td>-0.005476</td>\n",
+              "      <td>-0.016279</td>\n",
+              "      <td>-0.010138</td>\n",
+              "      <td>-0.021454</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>332396536</td>\n",
+              "      <td>2011-07-09</td>\n",
+              "      <td>951</td>\n",
+              "      <td>951</td>\n",
+              "      <td>True</td>\n",
+              "      <td>1</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.044264</td>\n",
+              "      <td>2.304511</td>\n",
+              "      <td>0.580</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.005520</td>\n",
+              "      <td>-0.005288</td>\n",
+              "      <td>0.017071</td>\n",
+              "      <td>-0.033637</td>\n",
+              "      <td>-0.040202</td>\n",
+              "      <td>0.041773</td>\n",
+              "      <td>-0.009370</td>\n",
+              "      <td>0.003352</td>\n",
+              "      <td>0.009391</td>\n",
+              "      <td>-0.042671</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>955082522479808512</td>\n",
+              "      <td>2018-01-21</td>\n",
+              "      <td>570</td>\n",
+              "      <td>533</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.049325</td>\n",
+              "      <td>4.714286</td>\n",
+              "      <td>0.355</td>\n",
+              "      <td>...</td>\n",
+              "      <td>0.009959</td>\n",
+              "      <td>0.004695</td>\n",
+              "      <td>0.005555</td>\n",
+              "      <td>-0.012851</td>\n",
+              "      <td>-0.032229</td>\n",
+              "      <td>0.031443</td>\n",
+              "      <td>0.008163</td>\n",
+              "      <td>-0.018501</td>\n",
+              "      <td>-0.008724</td>\n",
+              "      <td>-0.042027</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1547 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-92ad1d30-2cdf-4bda-b71b-62fce49ac019')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-92ad1d30-2cdf-4bda-b71b-62fce49ac019 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-92ad1d30-2cdf-4bda-b71b-62fce49ac019');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-6238a625-c498-4ef8-b79a-0f03412cb25f\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-6238a625-c498-4ef8-b79a-0f03412cb25f')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-6238a625-c498-4ef8-b79a-0f03412cb25f button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 31
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "csv_filepath = os.path.join(DATA_DIRPATH, \"botometer_sample_max_50_openai_user_embeddings_unpacked.csv.gz\")\n",
+        "users_df_unpacked.to_csv(csv_filepath, index=False, compression=\"gzip\")"
+      ],
+      "metadata": {
+        "id": "6Ll2G8XpXa2O"
+      },
+      "execution_count": 32,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4t48ewACjXQy"
+      },
+      "source": [
+        "## Tweet Embeddings"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "183K statuses"
+      ],
+      "metadata": {
+        "id": "5sJsvSTWCVVX"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Wow wow wow this is taking a long time (1hr +...) to stream the data down over the network..."
+      ],
+      "metadata": {
+        "id": "4gIJd0h_-rXO"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Re-doing with the statuses table v2, that has duplicate lookups removed (row per unique status)...\n",
+        "\n",
+        "Re-doing with statuses table v3, which has status texts as well..."
+      ],
+      "metadata": {
+        "id": "Ho6uZl7csvkf"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sql = f\"\"\"\n",
+        "    SELECT user_id, status_id, status_text, created_at, embeds_length, embeddings\n",
+        "    FROM `{DATASET_ADDRESS}.botometer_sample_max_50_openai_status_embeddings_v3`\n",
+        "    -- LIMIT 10000\n",
+        "\"\"\"\n",
+        "\n",
+        "tweets_df = bq.query_to_df(sql, verbose=True)\n",
+        "print(tweets_df.shape)\n",
+        "tweets_df.head()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 692,
+          "referenced_widgets": [
+            "39f9a290d00b42a2b880aa81eec76fa6",
+            "25dfedf9a54843bb80caaeed5ab6db34",
+            "a7ff0760fb244fb7a4903b85c7bc7ad6",
+            "56a8b786d34e4742a2450df0be1dbae8",
+            "d4b9ca844460431fb72287a7356486f6",
+            "6b7a4772df3c46cc9be56e1fe11a5e88",
+            "efdd95d3dd5b4cedba40aa01b6cb94b9",
+            "cfd1c20bbcbd4c62bf430d7c7f4624e5",
+            "e265dcb3e51c49d3ad32c8dbf73cdf30",
+            "f238e606a590433b8db7da9c314b3f00",
+            "f65e9b709410414eae7a36a53527de65"
+          ]
+        },
+        "id": "VYBVlVBN9tIf",
+        "outputId": "9a24052f-7a26-4ba8-ceb1-7d74deabf42d"
+      },
+      "execution_count": 33,
+      "outputs": [
+        {
+          "metadata": {
+            "tags": null
+          },
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "    SELECT user_id, status_id, status_text, created_at, embeds_length, embeddings\n",
+            "    FROM `tweet-collector-py.impeachment_production.botometer_sample_max_50_openai_status_embeddings_v3` \n",
+            "    -- LIMIT 10000\n",
+            "\n"
+          ]
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "39f9a290d00b42a2b880aa81eec76fa6",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          |"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "(183815, 6)\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "              user_id            status_id  \\\n",
+              "0  897845802701377536  1221540755451392001   \n",
+              "1  935739601301458947  1223458629837295619   \n",
+              "2           571774622  1217445781663363072   \n",
+              "3           384679808  1223705594818748416   \n",
+              "4  701264221653217281  1218459840277729281   \n",
+              "\n",
+              "                                         status_text  \\\n",
+              "0  Doubt it..It appears they all have gone the wa...   \n",
+              "1  RT @Wyn1745: Democrats are ‘setting the stage’...   \n",
+              "2  RT @sarahdwire: I’m loathe to insert myself in...   \n",
+              "3  RT @RepRatcliffe: We warned them...As Schiff a...   \n",
+              "4  RT @chipfranklin: Because \"impeachment\" in the...   \n",
+              "\n",
+              "                 created_at  embeds_length  \\\n",
+              "0 2020-01-26 21:09:45+00:00           1536   \n",
+              "1 2020-02-01 04:10:42+00:00           1536   \n",
+              "2 2020-01-15 13:57:48+00:00           1536   \n",
+              "3 2020-02-01 20:32:03+00:00           1536   \n",
+              "4 2020-01-18 09:07:18+00:00           1536   \n",
+              "\n",
+              "                                          embeddings  \n",
+              "0  [-0.020428381860256195, -0.006719687487930059,...  \n",
+              "1  [-0.03668860346078873, -0.0074811591766774654,...  \n",
+              "2  [-0.033381544053554535, -0.006886449176818132,...  \n",
+              "3  [-0.008476617746055126, -0.007363526616245508,...  \n",
+              "4  [-0.009453612379729748, 0.017376383766531944, ...  "
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-bbda89e9-7ae8-4a56-a100-1b7dafbc5144\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>user_id</th>\n",
+              "      <th>status_id</th>\n",
+              "      <th>status_text</th>\n",
+              "      <th>created_at</th>\n",
+              "      <th>embeds_length</th>\n",
+              "      <th>embeddings</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>897845802701377536</td>\n",
+              "      <td>1221540755451392001</td>\n",
+              "      <td>Doubt it..It appears they all have gone the wa...</td>\n",
+              "      <td>2020-01-26 21:09:45+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.020428381860256195, -0.006719687487930059,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>935739601301458947</td>\n",
+              "      <td>1223458629837295619</td>\n",
+              "      <td>RT @Wyn1745: Democrats are ‘setting the stage’...</td>\n",
+              "      <td>2020-02-01 04:10:42+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.03668860346078873, -0.0074811591766774654,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>571774622</td>\n",
+              "      <td>1217445781663363072</td>\n",
+              "      <td>RT @sarahdwire: I’m loathe to insert myself in...</td>\n",
+              "      <td>2020-01-15 13:57:48+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.033381544053554535, -0.006886449176818132,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>384679808</td>\n",
+              "      <td>1223705594818748416</td>\n",
+              "      <td>RT @RepRatcliffe: We warned them...As Schiff a...</td>\n",
+              "      <td>2020-02-01 20:32:03+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.008476617746055126, -0.007363526616245508,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>701264221653217281</td>\n",
+              "      <td>1218459840277729281</td>\n",
+              "      <td>RT @chipfranklin: Because \"impeachment\" in the...</td>\n",
+              "      <td>2020-01-18 09:07:18+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.009453612379729748, 0.017376383766531944, ...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-bbda89e9-7ae8-4a56-a100-1b7dafbc5144')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-bbda89e9-7ae8-4a56-a100-1b7dafbc5144 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-bbda89e9-7ae8-4a56-a100-1b7dafbc5144');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-9430f16f-442d-4c5d-96a9-1b62839f5cd5\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-9430f16f-442d-4c5d-96a9-1b62839f5cd5')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-9430f16f-442d-4c5d-96a9-1b62839f5cd5 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 33
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "tweets_df.head()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 556
+        },
+        "id": "IuWH0hQ_rNVd",
+        "outputId": "662aaecc-17a2-4e96-e477-72d4b4cc3489"
+      },
+      "execution_count": 35,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "              user_id            status_id  \\\n",
+              "0  897845802701377536  1221540755451392001   \n",
+              "1  935739601301458947  1223458629837295619   \n",
+              "2           571774622  1217445781663363072   \n",
+              "3           384679808  1223705594818748416   \n",
+              "4  701264221653217281  1218459840277729281   \n",
+              "\n",
+              "                                         status_text  \\\n",
+              "0  Doubt it..It appears they all have gone the wa...   \n",
+              "1  RT @Wyn1745: Democrats are ‘setting the stage’...   \n",
+              "2  RT @sarahdwire: I’m loathe to insert myself in...   \n",
+              "3  RT @RepRatcliffe: We warned them...As Schiff a...   \n",
+              "4  RT @chipfranklin: Because \"impeachment\" in the...   \n",
+              "\n",
+              "                 created_at  embeds_length  \\\n",
+              "0 2020-01-26 21:09:45+00:00           1536   \n",
+              "1 2020-02-01 04:10:42+00:00           1536   \n",
+              "2 2020-01-15 13:57:48+00:00           1536   \n",
+              "3 2020-02-01 20:32:03+00:00           1536   \n",
+              "4 2020-01-18 09:07:18+00:00           1536   \n",
+              "\n",
+              "                                          embeddings  \n",
+              "0  [-0.020428381860256195, -0.006719687487930059,...  \n",
+              "1  [-0.03668860346078873, -0.0074811591766774654,...  \n",
+              "2  [-0.033381544053554535, -0.006886449176818132,...  \n",
+              "3  [-0.008476617746055126, -0.007363526616245508,...  \n",
+              "4  [-0.009453612379729748, 0.017376383766531944, ...  "
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-b07e6ab6-ea5d-45cc-9acf-0996243b1647\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>user_id</th>\n",
+              "      <th>status_id</th>\n",
+              "      <th>status_text</th>\n",
+              "      <th>created_at</th>\n",
+              "      <th>embeds_length</th>\n",
+              "      <th>embeddings</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>897845802701377536</td>\n",
+              "      <td>1221540755451392001</td>\n",
+              "      <td>Doubt it..It appears they all have gone the wa...</td>\n",
+              "      <td>2020-01-26 21:09:45+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.020428381860256195, -0.006719687487930059,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>935739601301458947</td>\n",
+              "      <td>1223458629837295619</td>\n",
+              "      <td>RT @Wyn1745: Democrats are ‘setting the stage’...</td>\n",
+              "      <td>2020-02-01 04:10:42+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.03668860346078873, -0.0074811591766774654,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>571774622</td>\n",
+              "      <td>1217445781663363072</td>\n",
+              "      <td>RT @sarahdwire: I’m loathe to insert myself in...</td>\n",
+              "      <td>2020-01-15 13:57:48+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.033381544053554535, -0.006886449176818132,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>384679808</td>\n",
+              "      <td>1223705594818748416</td>\n",
+              "      <td>RT @RepRatcliffe: We warned them...As Schiff a...</td>\n",
+              "      <td>2020-02-01 20:32:03+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.008476617746055126, -0.007363526616245508,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>701264221653217281</td>\n",
+              "      <td>1218459840277729281</td>\n",
+              "      <td>RT @chipfranklin: Because \"impeachment\" in the...</td>\n",
+              "      <td>2020-01-18 09:07:18+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>[-0.009453612379729748, 0.017376383766531944, ...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b07e6ab6-ea5d-45cc-9acf-0996243b1647')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-b07e6ab6-ea5d-45cc-9acf-0996243b1647 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-b07e6ab6-ea5d-45cc-9acf-0996243b1647');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-ed579cad-860a-4980-b2c5-58d9d12670e8\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-ed579cad-860a-4980-b2c5-58d9d12670e8')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-ed579cad-860a-4980-b2c5-58d9d12670e8 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 35
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Saving CSV to drive:"
+      ],
+      "metadata": {
+        "id": "hkOZyyS2SbEt"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "csv_filepath = os.path.join(DATA_DIRPATH, \"botometer_sample_max_50_openai_status_embeddings_v3.csv.gz\")\n",
+        "tweets_df.to_csv(csv_filepath, index=False, compression=\"gzip\")"
+      ],
+      "metadata": {
+        "id": "_QvWANEiSbE3"
+      },
+      "execution_count": 36,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### ... Unpacked"
+      ],
+      "metadata": {
+        "id": "y-0275gvxwO4"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "unpacked_tweets_df = unpacked(tweets_df)\n",
+        "unpacked_tweets_df.head()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 689
+        },
+        "id": "IY02mObwyH8-",
+        "outputId": "fa0d10c2-fb12-40ca-a6c2-f7f546ae8e38"
+      },
+      "execution_count": 37,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "UNPACKING...\n",
+            "<class 'pandas.core.series.Series'>\n",
+            "RECONSTRUCTING...\n",
+            "(183815, 1536)\n",
+            "MERGING...\n",
+            "(183815, 1541)\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "              user_id            status_id  \\\n",
+              "0  897845802701377536  1221540755451392001   \n",
+              "1  935739601301458947  1223458629837295619   \n",
+              "2           571774622  1217445781663363072   \n",
+              "3           384679808  1223705594818748416   \n",
+              "4  701264221653217281  1218459840277729281   \n",
+              "\n",
+              "                                         status_text  \\\n",
+              "0  Doubt it..It appears they all have gone the wa...   \n",
+              "1  RT @Wyn1745: Democrats are ‘setting the stage’...   \n",
+              "2  RT @sarahdwire: I’m loathe to insert myself in...   \n",
+              "3  RT @RepRatcliffe: We warned them...As Schiff a...   \n",
+              "4  RT @chipfranklin: Because \"impeachment\" in the...   \n",
+              "\n",
+              "                 created_at  embeds_length  openai_0  openai_1  openai_2  \\\n",
+              "0 2020-01-26 21:09:45+00:00           1536 -0.020428 -0.006720  0.007308   \n",
+              "1 2020-02-01 04:10:42+00:00           1536 -0.036689 -0.007481  0.007968   \n",
+              "2 2020-01-15 13:57:48+00:00           1536 -0.033382 -0.006886 -0.003244   \n",
+              "3 2020-02-01 20:32:03+00:00           1536 -0.008477 -0.007364  0.000919   \n",
+              "4 2020-01-18 09:07:18+00:00           1536 -0.009454  0.017376  0.007016   \n",
+              "\n",
+              "   openai_3  openai_4  ...  openai_1526  openai_1527  openai_1528  \\\n",
+              "0 -0.022157 -0.041841  ...     0.014616     0.004705     0.012661   \n",
+              "1 -0.006632 -0.022805  ...    -0.001696     0.002522     0.020397   \n",
+              "2 -0.015834  0.000172  ...     0.001027     0.002464     0.002013   \n",
+              "3 -0.006435  0.008101  ...    -0.028269     0.003193     0.015056   \n",
+              "4 -0.020075 -0.023674  ...    -0.013590     0.015564     0.005130   \n",
+              "\n",
+              "   openai_1529  openai_1530  openai_1531  openai_1532  openai_1533  \\\n",
+              "0    -0.020974    -0.003458     0.045166     0.029871    -0.021186   \n",
+              "1    -0.046374    -0.046611     0.021068    -0.000085    -0.003701   \n",
+              "2    -0.032766    -0.034265     0.006545     0.014804     0.003027   \n",
+              "3    -0.015333    -0.028137     0.032510     0.010327    -0.013621   \n",
+              "4     0.003077    -0.029167     0.015523     0.017914    -0.008789   \n",
+              "\n",
+              "   openai_1534  openai_1535  \n",
+              "0    -0.003376    -0.024937  \n",
+              "1    -0.015370    -0.019213  \n",
+              "2    -0.001518    -0.030946  \n",
+              "3    -0.007686    -0.016216  \n",
+              "4    -0.019767    -0.042353  \n",
+              "\n",
+              "[5 rows x 1541 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-9f737e82-e774-49dc-b63a-8be0eb609edc\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>user_id</th>\n",
+              "      <th>status_id</th>\n",
+              "      <th>status_text</th>\n",
+              "      <th>created_at</th>\n",
+              "      <th>embeds_length</th>\n",
+              "      <th>openai_0</th>\n",
+              "      <th>openai_1</th>\n",
+              "      <th>openai_2</th>\n",
+              "      <th>openai_3</th>\n",
+              "      <th>openai_4</th>\n",
+              "      <th>...</th>\n",
+              "      <th>openai_1526</th>\n",
+              "      <th>openai_1527</th>\n",
+              "      <th>openai_1528</th>\n",
+              "      <th>openai_1529</th>\n",
+              "      <th>openai_1530</th>\n",
+              "      <th>openai_1531</th>\n",
+              "      <th>openai_1532</th>\n",
+              "      <th>openai_1533</th>\n",
+              "      <th>openai_1534</th>\n",
+              "      <th>openai_1535</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>897845802701377536</td>\n",
+              "      <td>1221540755451392001</td>\n",
+              "      <td>Doubt it..It appears they all have gone the wa...</td>\n",
+              "      <td>2020-01-26 21:09:45+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.020428</td>\n",
+              "      <td>-0.006720</td>\n",
+              "      <td>0.007308</td>\n",
+              "      <td>-0.022157</td>\n",
+              "      <td>-0.041841</td>\n",
+              "      <td>...</td>\n",
+              "      <td>0.014616</td>\n",
+              "      <td>0.004705</td>\n",
+              "      <td>0.012661</td>\n",
+              "      <td>-0.020974</td>\n",
+              "      <td>-0.003458</td>\n",
+              "      <td>0.045166</td>\n",
+              "      <td>0.029871</td>\n",
+              "      <td>-0.021186</td>\n",
+              "      <td>-0.003376</td>\n",
+              "      <td>-0.024937</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>935739601301458947</td>\n",
+              "      <td>1223458629837295619</td>\n",
+              "      <td>RT @Wyn1745: Democrats are ‘setting the stage’...</td>\n",
+              "      <td>2020-02-01 04:10:42+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.036689</td>\n",
+              "      <td>-0.007481</td>\n",
+              "      <td>0.007968</td>\n",
+              "      <td>-0.006632</td>\n",
+              "      <td>-0.022805</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.001696</td>\n",
+              "      <td>0.002522</td>\n",
+              "      <td>0.020397</td>\n",
+              "      <td>-0.046374</td>\n",
+              "      <td>-0.046611</td>\n",
+              "      <td>0.021068</td>\n",
+              "      <td>-0.000085</td>\n",
+              "      <td>-0.003701</td>\n",
+              "      <td>-0.015370</td>\n",
+              "      <td>-0.019213</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>571774622</td>\n",
+              "      <td>1217445781663363072</td>\n",
+              "      <td>RT @sarahdwire: I’m loathe to insert myself in...</td>\n",
+              "      <td>2020-01-15 13:57:48+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.033382</td>\n",
+              "      <td>-0.006886</td>\n",
+              "      <td>-0.003244</td>\n",
+              "      <td>-0.015834</td>\n",
+              "      <td>0.000172</td>\n",
+              "      <td>...</td>\n",
+              "      <td>0.001027</td>\n",
+              "      <td>0.002464</td>\n",
+              "      <td>0.002013</td>\n",
+              "      <td>-0.032766</td>\n",
+              "      <td>-0.034265</td>\n",
+              "      <td>0.006545</td>\n",
+              "      <td>0.014804</td>\n",
+              "      <td>0.003027</td>\n",
+              "      <td>-0.001518</td>\n",
+              "      <td>-0.030946</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>384679808</td>\n",
+              "      <td>1223705594818748416</td>\n",
+              "      <td>RT @RepRatcliffe: We warned them...As Schiff a...</td>\n",
+              "      <td>2020-02-01 20:32:03+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.008477</td>\n",
+              "      <td>-0.007364</td>\n",
+              "      <td>0.000919</td>\n",
+              "      <td>-0.006435</td>\n",
+              "      <td>0.008101</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.028269</td>\n",
+              "      <td>0.003193</td>\n",
+              "      <td>0.015056</td>\n",
+              "      <td>-0.015333</td>\n",
+              "      <td>-0.028137</td>\n",
+              "      <td>0.032510</td>\n",
+              "      <td>0.010327</td>\n",
+              "      <td>-0.013621</td>\n",
+              "      <td>-0.007686</td>\n",
+              "      <td>-0.016216</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>701264221653217281</td>\n",
+              "      <td>1218459840277729281</td>\n",
+              "      <td>RT @chipfranklin: Because \"impeachment\" in the...</td>\n",
+              "      <td>2020-01-18 09:07:18+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.009454</td>\n",
+              "      <td>0.017376</td>\n",
+              "      <td>0.007016</td>\n",
+              "      <td>-0.020075</td>\n",
+              "      <td>-0.023674</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.013590</td>\n",
+              "      <td>0.015564</td>\n",
+              "      <td>0.005130</td>\n",
+              "      <td>0.003077</td>\n",
+              "      <td>-0.029167</td>\n",
+              "      <td>0.015523</td>\n",
+              "      <td>0.017914</td>\n",
+              "      <td>-0.008789</td>\n",
+              "      <td>-0.019767</td>\n",
+              "      <td>-0.042353</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1541 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9f737e82-e774-49dc-b63a-8be0eb609edc')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-9f737e82-e774-49dc-b63a-8be0eb609edc button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-9f737e82-e774-49dc-b63a-8be0eb609edc');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-a95450bb-59c1-45d5-a600-ba1bcf1515fd\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a95450bb-59c1-45d5-a600-ba1bcf1515fd')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-a95450bb-59c1-45d5-a600-ba1bcf1515fd button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 37
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_parquet.html\n",
+        "\n",
+        "pq_filepath = os.path.join(DATA_DIRPATH, \"botometer_sample_max_50_openai_status_embeddings_v3_unpacked.parquet.gzip\")\n",
+        "unpacked_tweets_df.to_parquet(pq_filepath, compression=\"gzip\")"
+      ],
+      "metadata": {
+        "id": "2QaBlr5cYWIa"
+      },
+      "execution_count": 39,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "csv_filepath = os.path.join(DATA_DIRPATH, \"botometer_sample_max_50_openai_status_embeddings_v3_unpacked.csv.gz\")\n",
+        "unpacked_tweets_df.to_csv(csv_filepath, index=False, compression=\"gzip\")"
+      ],
+      "metadata": {
+        "id": "GWc253mgrSx2"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#arrow_filepath = os.path.join(DATA_DIRPATH, \"botometer_sample_max_50_openai_status_embeddings_v3_unpacked.arrow\")\n",
+        "#df.to_feather(arrow_filepath)"
+      ],
+      "metadata": {
+        "id": "SjU2t7PJXyEC"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Scratch Work"
+      ],
+      "metadata": {
+        "id": "EuDR5mjnq3fV"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "##from pandas import concat\n",
+        "##\n",
+        "##limit = 1_000\n",
+        "##offset = 0\n",
+        "##\n",
+        "##all = DataFrame()\n",
+        "##\n",
+        "##while offset < 5_500:\n",
+        "##    sql = f\"\"\"\n",
+        "##        SELECT s.user_id, s.status_id, s.status_text, s.created_at, emb.embeddings\n",
+        "##        FROM `{DATASET_ADDRESS}.botometer_sample` s\n",
+        "##        JOIN `{DATASET_ADDRESS}.botometer_sample_max_50_openai_status_embeddings` emb\n",
+        "##            ON s.status_id = emb.status_id\n",
+        "##        LIMIT {int(limit)}\n",
+        "##        OFFSET {int(offset)}\n",
+        "##    \"\"\"\n",
+        "##\n",
+        "##    batch = bq.query_to_df(sql, verbose=True)\n",
+        "##    print(tweets_df.shape)\n",
+        "##    if batch.empty:\n",
+        "##        print(\"ALL DONE!\")\n",
+        "##        break\n",
+        "##\n",
+        "##    concat(all, batch)\n",
+        "##    offset += limit\n",
+        "\n",
+        ""
+      ],
+      "metadata": {
+        "id": "B9bIY-wb-fHb"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Compressed Table"
+      ],
+      "metadata": {
+        "id": "zDo0Yqm2ujxN"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "https://cloud.google.com/bigquery/docs/exporting-data#bigquery_extract_table_compressed-python"
+      ],
+      "metadata": {
+        "id": "P6ACmvi1ulKu"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# from google.cloud import bigquery\n",
+        "# client = bigquery.Client()\n",
+        "# bucket_name = 'my-bucket'\n",
+        "\n",
+        "#destination_uri = \"gs://{}/{}\".format(bucket_name, \"shakespeare.csv.gz\")\n",
+        "#dataset_ref = bigquery.DatasetReference(project, dataset_id)\n",
+        "#table_ref = dataset_ref.table(\"shakespeare\")\n",
+        "#job_config = bigquery.job.ExtractJobConfig()\n",
+        "#job_config.compression = bigquery.Compression.GZIP\n",
+        "#\n",
+        "#extract_job = client.extract_table(\n",
+        "#    table_ref,\n",
+        "#    destination_uri,\n",
+        "#    # Location must match that of the source table.\n",
+        "#    location=\"US\",\n",
+        "#    job_config=job_config,\n",
+        "#)  # API request\n",
+        "#extract_job.result()  # Waits for job to complete."
+      ],
+      "metadata": {
+        "id": "g7p7KRN7ulhz"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# from google.cloud import bigquery\n",
+        "# client = bigquery.Client()\n",
+        "# bucket_name = 'my-bucket'\n",
+        "\n",
+        "\n",
+        "#from google.cloud import bigquery\n",
+        "#\n",
+        "#\n",
+        "##ds_ref = bigquery.DatasetReference(PROJECT_ID, DATASET_ADDRESS)\n",
+        "#DATASET_ID = \"impeachment_production\"\n",
+        "#ds_ref = bigquery.DatasetReference(PROJECT_ID, DATASET_ID)\n",
+        "#table_ref = ds_ref.table(\"botometer_sample_max_50_openai_status_embeddings_v3\")\n",
+        "#\n",
+        "#job_config = bigquery.job.ExtractJobConfig()\n",
+        "#job_config.compression = bigquery.Compression.GZIP\n",
+        "#\n",
+        "#BUCKET_NAME = \"impeachment-analysis-2020\"\n",
+        "##destination_uri = f\"gs://{BUCKET_NAME}/impeachment_production/botometer_sample_max_50_openai_status_embeddings_v4.csv.gz\"\n",
+        "##> too large to be exported to a single file. Specify a uri including a * to shard export. See 'Exporting data into one or more files' in https://cloud.google.com/bigquery/docs/exporting-data.\n",
+        "#destination_uri = f\"gs://{BUCKET_NAME}/impeachment_production/botometer_sample_max_50_openai_status_embeddings_v4_*.csv.gz\"\n",
+        "#\n",
+        "#client = bq.client\n",
+        "#extract_job = client.extract_table(\n",
+        "#    table_ref,\n",
+        "#    destination_uri,\n",
+        "#    # Location must match that of the source table.\n",
+        "#    location=\"US\",\n",
+        "#    job_config=job_config,\n",
+        "#)  # API request\n",
+        "#extract_job.result()  # Waits for job to complete."
+      ],
+      "metadata": {
+        "id": "Iz1mp4C1ujG0"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/notebooks/openai_embeddings_v2/Impeachment_2020_Embeddings_Analysis_Template_(20240129).ipynb b/notebooks/openai_embeddings_v2/Impeachment_2020_Embeddings_Analysis_Template_(20240129).ipynb
new file mode 100644
index 0000000..1b4265b
--- /dev/null
+++ b/notebooks/openai_embeddings_v2/Impeachment_2020_Embeddings_Analysis_Template_(20240129).ipynb
@@ -0,0 +1,1955 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "collapsed_sections": [
+        "Ymoi-E5OjZD5"
+      ],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_-39w0IS18f-"
+      },
+      "source": [
+        "We fetched user-level and tweet-level OpenAI embeddings and stored on BQ, and copied the data to CSV files on Drive.\n",
+        "\n",
+        "This notebook provides an example of how to load those CSV files. Feel free to make a copy of this notebook and perform your own analyses."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ymoi-E5OjZD5"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Google Drive"
+      ],
+      "metadata": {
+        "id": "FF154lGK_1N6"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "from google.colab import drive\n",
+        "\n",
+        "drive.mount('/content/drive')\n",
+        "print(os.getcwd(), os.listdir(os.getcwd()))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "i_eMkJ5fpKDp",
+        "outputId": "b82c2891-d6b0-45ce-ff4b-dddf370e6716"
+      },
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n",
+            "/content ['.config', 'drive', 'sample_data']\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "5OKjyFQ0owen",
+        "outputId": "d148f498-8af6-4de7-90c1-1072a0309607"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "/content/drive/MyDrive/Research/DS Research Shared 2024\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 4
+        }
+      ],
+      "source": [
+        "# you might need to create a google drive SHORTCUT that has this same path\n",
+        "# ... or update the path to use your own google drive organization\n",
+        "#DIRPATH = '/content/drive/MyDrive/Research/Disinfo Research Shared 2022'\n",
+        "#DIRPATH = '/content/drive/MyDrive/Research/DS Research Shared 2023'\n",
+        "DIRPATH = '/content/drive/MyDrive/Research/DS Research Shared 2024'\n",
+        "\n",
+        "print(DIRPATH)\n",
+        "os.path.isdir(DIRPATH)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "New project-based directory structure for 2024:\n",
+        "\n",
+        "https://drive.google.com/drive/folders/1SuXkqVT400uZ2OYFGGV8SYBf7NhtBo5k?usp=drive_link"
+      ],
+      "metadata": {
+        "id": "dNCNBPJkg9St"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "DATA_DIRPATH = os.path.join(DIRPATH, \"projects\", \"Impeachment 2020 Embeddings\", \"data\")\n",
+        "os.path.isdir(DATA_DIRPATH)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jjkYs5KJ99LX",
+        "outputId": "3ca78c5f-4fa0-4519-b126-e02403785ec9"
+      },
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 5
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "os.listdir(DATA_DIRPATH)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "x9QGLQH_dUGV",
+        "outputId": "abc2ba42-9476-453f-fc95-70eea47f31e6"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['botometer_sample_max_50_openai_user_embeddings.csv.gz',\n",
+              " 'botometer_sample_max_50_openai_user_embeddings_unpacked.csv.gz',\n",
+              " 'botometer_sample_max_50_openai_status_embeddings_v3.csv.gz',\n",
+              " 'botometer_sample_max_50_openai_status_embeddings_v3_unpacked.parquet.gzip',\n",
+              " 'botometer_sample_max_50_openai_status_embeddings_v3_unpacked.csv.gz']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 7
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The \"unpacked\" versions have a column per embedding, and are generally easier to work with.\n",
+        "\n",
+        "The files we will be working with are:\n",
+        "  +  \"botometer_sample_max_50_openai_user_embeddings_unpacked.csv.gz\" and\n",
+        "  + \"botometer_sample_max_50_openai_status_embeddings_v3_unpacked.parquet.gzip\"."
+      ],
+      "metadata": {
+        "id": "JCNrEG7vhOKo"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## User Embeddings"
+      ],
+      "metadata": {
+        "id": "TJUWWC48HcGk"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "7566 users"
+      ],
+      "metadata": {
+        "id": "CGpJ-kDaHfi5"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Loading CSV from drive:"
+      ],
+      "metadata": {
+        "id": "1TYFGOn7Ow-P"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from pandas import read_csv\n",
+        "\n",
+        "csv_filepath = os.path.join(DATA_DIRPATH, \"botometer_sample_max_50_openai_user_embeddings_unpacked.csv.gz\")\n",
+        "users_df = read_csv(csv_filepath, compression=\"gzip\")\n",
+        "print(users_df.shape)\n",
+        "print(users_df.columns)\n",
+        "users_df.head()"
+      ],
+      "metadata": {
+        "id": "V5m_ZmDFHeLx",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 416
+        },
+        "outputId": "ad620cd6-6ecb-408a-ec34-c75cf0718e8d"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "(7566, 1547)\n",
+            "Index(['user_id', 'created_on', 'status_count', 'rt_count', 'is_bot',\n",
+            "       'opinion_community', 'is_q', 'avg_toxicity', 'avg_fact_score',\n",
+            "       'bom_astroturf',\n",
+            "       ...\n",
+            "       'openai_1526', 'openai_1527', 'openai_1528', 'openai_1529',\n",
+            "       'openai_1530', 'openai_1531', 'openai_1532', 'openai_1533',\n",
+            "       'openai_1534', 'openai_1535'],\n",
+            "      dtype='object', length=1547)\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "              user_id  created_on  status_count  rt_count  is_bot  \\\n",
+              "0          3420436216  2015-08-13           555       540    True   \n",
+              "1           108121958  2010-01-24             2         2   False   \n",
+              "2          3038308638  2015-02-23           755       665    True   \n",
+              "3           332396536  2011-07-09           951       951    True   \n",
+              "4  955082522479808512  2018-01-21           570       533    True   \n",
+              "\n",
+              "   opinion_community   is_q  avg_toxicity  avg_fact_score  bom_astroturf  ...  \\\n",
+              "0                  0  False      0.056113        1.983193          0.295  ...   \n",
+              "1                  0  False      0.456710             NaN          0.580  ...   \n",
+              "2                  0  False      0.069860        3.401786          0.970  ...   \n",
+              "3                  1  False      0.044264        2.304511          0.580  ...   \n",
+              "4                  0  False      0.049325        4.714286          0.355  ...   \n",
+              "\n",
+              "   openai_1526  openai_1527  openai_1528  openai_1529  openai_1530  \\\n",
+              "0    -0.001867    -0.013167     0.020885    -0.022568    -0.033631   \n",
+              "1     0.017651    -0.009439     0.024375    -0.032553    -0.042185   \n",
+              "2    -0.026273    -0.008139     0.030285    -0.029902    -0.030887   \n",
+              "3    -0.005520    -0.005288     0.017071    -0.033637    -0.040202   \n",
+              "4     0.009959     0.004695     0.005555    -0.012851    -0.032229   \n",
+              "\n",
+              "   openai_1531  openai_1532  openai_1533  openai_1534  openai_1535  \n",
+              "0     0.016153     0.024127    -0.017519     0.002636    -0.039838  \n",
+              "1     0.013782     0.011320    -0.014862    -0.010413    -0.020359  \n",
+              "2     0.022481    -0.005476    -0.016279    -0.010138    -0.021454  \n",
+              "3     0.041773    -0.009370     0.003352     0.009391    -0.042671  \n",
+              "4     0.031443     0.008163    -0.018501    -0.008724    -0.042027  \n",
+              "\n",
+              "[5 rows x 1547 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-06d6296e-3c17-45ee-bf59-e20ccbff91ca\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>user_id</th>\n",
+              "      <th>created_on</th>\n",
+              "      <th>status_count</th>\n",
+              "      <th>rt_count</th>\n",
+              "      <th>is_bot</th>\n",
+              "      <th>opinion_community</th>\n",
+              "      <th>is_q</th>\n",
+              "      <th>avg_toxicity</th>\n",
+              "      <th>avg_fact_score</th>\n",
+              "      <th>bom_astroturf</th>\n",
+              "      <th>...</th>\n",
+              "      <th>openai_1526</th>\n",
+              "      <th>openai_1527</th>\n",
+              "      <th>openai_1528</th>\n",
+              "      <th>openai_1529</th>\n",
+              "      <th>openai_1530</th>\n",
+              "      <th>openai_1531</th>\n",
+              "      <th>openai_1532</th>\n",
+              "      <th>openai_1533</th>\n",
+              "      <th>openai_1534</th>\n",
+              "      <th>openai_1535</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>3420436216</td>\n",
+              "      <td>2015-08-13</td>\n",
+              "      <td>555</td>\n",
+              "      <td>540</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.056113</td>\n",
+              "      <td>1.983193</td>\n",
+              "      <td>0.295</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.001867</td>\n",
+              "      <td>-0.013167</td>\n",
+              "      <td>0.020885</td>\n",
+              "      <td>-0.022568</td>\n",
+              "      <td>-0.033631</td>\n",
+              "      <td>0.016153</td>\n",
+              "      <td>0.024127</td>\n",
+              "      <td>-0.017519</td>\n",
+              "      <td>0.002636</td>\n",
+              "      <td>-0.039838</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>108121958</td>\n",
+              "      <td>2010-01-24</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.456710</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>0.580</td>\n",
+              "      <td>...</td>\n",
+              "      <td>0.017651</td>\n",
+              "      <td>-0.009439</td>\n",
+              "      <td>0.024375</td>\n",
+              "      <td>-0.032553</td>\n",
+              "      <td>-0.042185</td>\n",
+              "      <td>0.013782</td>\n",
+              "      <td>0.011320</td>\n",
+              "      <td>-0.014862</td>\n",
+              "      <td>-0.010413</td>\n",
+              "      <td>-0.020359</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3038308638</td>\n",
+              "      <td>2015-02-23</td>\n",
+              "      <td>755</td>\n",
+              "      <td>665</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.069860</td>\n",
+              "      <td>3.401786</td>\n",
+              "      <td>0.970</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.026273</td>\n",
+              "      <td>-0.008139</td>\n",
+              "      <td>0.030285</td>\n",
+              "      <td>-0.029902</td>\n",
+              "      <td>-0.030887</td>\n",
+              "      <td>0.022481</td>\n",
+              "      <td>-0.005476</td>\n",
+              "      <td>-0.016279</td>\n",
+              "      <td>-0.010138</td>\n",
+              "      <td>-0.021454</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>332396536</td>\n",
+              "      <td>2011-07-09</td>\n",
+              "      <td>951</td>\n",
+              "      <td>951</td>\n",
+              "      <td>True</td>\n",
+              "      <td>1</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.044264</td>\n",
+              "      <td>2.304511</td>\n",
+              "      <td>0.580</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.005520</td>\n",
+              "      <td>-0.005288</td>\n",
+              "      <td>0.017071</td>\n",
+              "      <td>-0.033637</td>\n",
+              "      <td>-0.040202</td>\n",
+              "      <td>0.041773</td>\n",
+              "      <td>-0.009370</td>\n",
+              "      <td>0.003352</td>\n",
+              "      <td>0.009391</td>\n",
+              "      <td>-0.042671</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>955082522479808512</td>\n",
+              "      <td>2018-01-21</td>\n",
+              "      <td>570</td>\n",
+              "      <td>533</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.049325</td>\n",
+              "      <td>4.714286</td>\n",
+              "      <td>0.355</td>\n",
+              "      <td>...</td>\n",
+              "      <td>0.009959</td>\n",
+              "      <td>0.004695</td>\n",
+              "      <td>0.005555</td>\n",
+              "      <td>-0.012851</td>\n",
+              "      <td>-0.032229</td>\n",
+              "      <td>0.031443</td>\n",
+              "      <td>0.008163</td>\n",
+              "      <td>-0.018501</td>\n",
+              "      <td>-0.008724</td>\n",
+              "      <td>-0.042027</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1547 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-06d6296e-3c17-45ee-bf59-e20ccbff91ca')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-06d6296e-3c17-45ee-bf59-e20ccbff91ca button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-06d6296e-3c17-45ee-bf59-e20ccbff91ca');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-10dbd47e-a5ad-4d71-af21-47d363222c87\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-10dbd47e-a5ad-4d71-af21-47d363222c87')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-10dbd47e-a5ad-4d71-af21-47d363222c87 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 9
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df[\"user_id\"].nunique()"
+      ],
+      "metadata": {
+        "id": "nQGfxCyBHeIi",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "e3d7456a-6c0d-4424-8d8d-64bca24c552f"
+      },
+      "execution_count": 13,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "7566"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df[\"is_bot\"].value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "JIwbbnB71suN",
+        "outputId": "dad6b8ba-2ab5-49b2-a957-934272d76e84"
+      },
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "False    4466\n",
+              "True     3100\n",
+              "Name: is_bot, dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 14
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df[\"opinion_community\"].value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Yi8Qlxi_1spO",
+        "outputId": "0589fbde-5029-41a4-a234-0fd47e3823a9"
+      },
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0    4891\n",
+              "1    2675\n",
+              "Name: opinion_community, dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 15
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df[\"avg_fact_score\"].info()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dG4-L7nDeQC-",
+        "outputId": "cba8adc6-210e-419d-f766-677a66174714"
+      },
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "<class 'pandas.core.series.Series'>\n",
+            "RangeIndex: 7566 entries, 0 to 7565\n",
+            "Series name: avg_fact_score\n",
+            "Non-Null Count  Dtype  \n",
+            "--------------  -----  \n",
+            "3292 non-null   float64\n",
+            "dtypes: float64(1)\n",
+            "memory usage: 59.2 KB\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [],
+      "metadata": {
+        "id": "n-pHzzQyi-dT"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [],
+      "metadata": {
+        "id": "HRKFH1UTi-Yu"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\n",
+        "\n",
+        "from pandas import isnull\n",
+        "\n",
+        "def add_labels(users_df):\n",
+        "    # APPLY SAME LABELS AS THE ORIGINAL SOURCE CODE\n",
+        "    # https://github.com/s2t2/openai-embeddings-2023/blob/1b8372dd36982009df5d4a80871f4c182ada743d/notebooks/2_embeddings_data_export.py#L51\n",
+        "    # https://github.com/s2t2/openai-embeddings-2023/blob/main/app/dataset.py#L37-L64\n",
+        "\n",
+        "    # labels:\n",
+        "    users_df[\"opinion_label\"] = users_df[\"opinion_community\"].map({0:\"Anti-Trump\", 1:\"Pro-Trump\"})\n",
+        "    users_df[\"bot_label\"] = users_df[\"is_bot\"].map({True:\"Bot\", False:\"Human\"})\n",
+        "    users_df[\"fourway_label\"] = users_df[\"opinion_label\"] + \" \" + users_df[\"bot_label\"]\n",
+        "\n",
+        "    # language toxicity scores (0 low - 1 high)\n",
+        "    toxic_threshold = 0.1\n",
+        "    users_df[\"is_toxic\"] = users_df[\"avg_toxicity\"] >= toxic_threshold\n",
+        "    users_df[\"is_toxic\"] = users_df[\"is_toxic\"].map({True: 1, False :0 })\n",
+        "    users_df[\"toxic_label\"] = users_df[\"is_toxic\"].map({1: \"Toxic\", 0 :\"Normal\" })\n",
+        "\n",
+        "    # fact check / media quality scores (1 low - 5 high)\n",
+        "    fact_threshold = 3.0\n",
+        "    users_df[\"is_factual\"] = users_df[\"avg_fact_score\"].apply(lambda score: score if isnull(score) else score >= fact_threshold)\n",
+        "\n",
+        "    # botometer binary and labels:\n",
+        "    users_df[\"is_bom_overall\"] = users_df[\"bom_overall\"].round()\n",
+        "    users_df[\"is_bom_astroturf\"] = users_df[\"bom_astroturf\"].round()\n",
+        "    users_df[\"bom_overall_label\"] = users_df[\"is_bom_overall\"].map({1:\"Bot\", 0:\"Human\"})\n",
+        "    users_df[\"bom_astroturf_label\"] = users_df[\"is_bom_astroturf\"].map({1:\"Bot\", 0:\"Human\"})\n",
+        "    users_df[\"bom_overall_fourway_label\"] = users_df[\"opinion_label\"] + \" \" + users_df[\"bom_overall_label\"]\n",
+        "    users_df[\"bom_astroturf_fourway_label\"] = users_df[\"opinion_label\"] + \" \" + users_df[\"bom_astroturf_label\"]\n",
+        "\n",
+        "    return users_df\n",
+        "\n",
+        "\n",
+        "users_df = add_labels(users_df)\n",
+        "print(users_df.shape)\n",
+        "print(users_df.columns.tolist())\n",
+        "users_df.head()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 309
+        },
+        "id": "jK9I2mpri_ER",
+        "outputId": "724101e9-f34c-4363-f680-57f71ba15bb7"
+      },
+      "execution_count": 29,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "(7566, 1559)\n",
+            "['user_id', 'created_on', 'status_count', 'rt_count', 'is_bot', 'opinion_community', 'is_q', 'avg_toxicity', 'avg_fact_score', 'bom_astroturf', 'bom_overall', 'openai_0', 'openai_1', 'openai_2', 'openai_3', 'openai_4', 'openai_5', 'openai_6', 'openai_7', 'openai_8', 'openai_9', 'openai_10', 'openai_11', 'openai_12', 'openai_13', 'openai_14', 'openai_15', 'openai_16', 'openai_17', 'openai_18', 'openai_19', 'openai_20', 'openai_21', 'openai_22', 'openai_23', 'openai_24', 'openai_25', 'openai_26', 'openai_27', 'openai_28', 'openai_29', 'openai_30', 'openai_31', 'openai_32', 'openai_33', 'openai_34', 'openai_35', 'openai_36', 'openai_37', 'openai_38', 'openai_39', 'openai_40', 'openai_41', 'openai_42', 'openai_43', 'openai_44', 'openai_45', 'openai_46', 'openai_47', 'openai_48', 'openai_49', 'openai_50', 'openai_51', 'openai_52', 'openai_53', 'openai_54', 'openai_55', 'openai_56', 'openai_57', 'openai_58', 'openai_59', 'openai_60', 'openai_61', 'openai_62', 'openai_63', 'openai_64', 'openai_65', 'openai_66', 'openai_67', 'openai_68', 'openai_69', 'openai_70', 'openai_71', 'openai_72', 'openai_73', 'openai_74', 'openai_75', 'openai_76', 'openai_77', 'openai_78', 'openai_79', 'openai_80', 'openai_81', 'openai_82', 'openai_83', 'openai_84', 'openai_85', 'openai_86', 'openai_87', 'openai_88', 'openai_89', 'openai_90', 'openai_91', 'openai_92', 'openai_93', 'openai_94', 'openai_95', 'openai_96', 'openai_97', 'openai_98', 'openai_99', 'openai_100', 'openai_101', 'openai_102', 'openai_103', 'openai_104', 'openai_105', 'openai_106', 'openai_107', 'openai_108', 'openai_109', 'openai_110', 'openai_111', 'openai_112', 'openai_113', 'openai_114', 'openai_115', 'openai_116', 'openai_117', 'openai_118', 'openai_119', 'openai_120', 'openai_121', 'openai_122', 'openai_123', 'openai_124', 'openai_125', 'openai_126', 'openai_127', 'openai_128', 'openai_129', 'openai_130', 'openai_131', 'openai_132', 'openai_133', 'openai_134', 'openai_135', 'openai_136', 'openai_137', 'openai_138', 'openai_139', 'openai_140', 'openai_141', 'openai_142', 'openai_143', 'openai_144', 'openai_145', 'openai_146', 'openai_147', 'openai_148', 'openai_149', 'openai_150', 'openai_151', 'openai_152', 'openai_153', 'openai_154', 'openai_155', 'openai_156', 'openai_157', 'openai_158', 'openai_159', 'openai_160', 'openai_161', 'openai_162', 'openai_163', 'openai_164', 'openai_165', 'openai_166', 'openai_167', 'openai_168', 'openai_169', 'openai_170', 'openai_171', 'openai_172', 'openai_173', 'openai_174', 'openai_175', 'openai_176', 'openai_177', 'openai_178', 'openai_179', 'openai_180', 'openai_181', 'openai_182', 'openai_183', 'openai_184', 'openai_185', 'openai_186', 'openai_187', 'openai_188', 'openai_189', 'openai_190', 'openai_191', 'openai_192', 'openai_193', 'openai_194', 'openai_195', 'openai_196', 'openai_197', 'openai_198', 'openai_199', 'openai_200', 'openai_201', 'openai_202', 'openai_203', 'openai_204', 'openai_205', 'openai_206', 'openai_207', 'openai_208', 'openai_209', 'openai_210', 'openai_211', 'openai_212', 'openai_213', 'openai_214', 'openai_215', 'openai_216', 'openai_217', 'openai_218', 'openai_219', 'openai_220', 'openai_221', 'openai_222', 'openai_223', 'openai_224', 'openai_225', 'openai_226', 'openai_227', 'openai_228', 'openai_229', 'openai_230', 'openai_231', 'openai_232', 'openai_233', 'openai_234', 'openai_235', 'openai_236', 'openai_237', 'openai_238', 'openai_239', 'openai_240', 'openai_241', 'openai_242', 'openai_243', 'openai_244', 'openai_245', 'openai_246', 'openai_247', 'openai_248', 'openai_249', 'openai_250', 'openai_251', 'openai_252', 'openai_253', 'openai_254', 'openai_255', 'openai_256', 'openai_257', 'openai_258', 'openai_259', 'openai_260', 'openai_261', 'openai_262', 'openai_263', 'openai_264', 'openai_265', 'openai_266', 'openai_267', 'openai_268', 'openai_269', 'openai_270', 'openai_271', 'openai_272', 'openai_273', 'openai_274', 'openai_275', 'openai_276', 'openai_277', 'openai_278', 'openai_279', 'openai_280', 'openai_281', 'openai_282', 'openai_283', 'openai_284', 'openai_285', 'openai_286', 'openai_287', 'openai_288', 'openai_289', 'openai_290', 'openai_291', 'openai_292', 'openai_293', 'openai_294', 'openai_295', 'openai_296', 'openai_297', 'openai_298', 'openai_299', 'openai_300', 'openai_301', 'openai_302', 'openai_303', 'openai_304', 'openai_305', 'openai_306', 'openai_307', 'openai_308', 'openai_309', 'openai_310', 'openai_311', 'openai_312', 'openai_313', 'openai_314', 'openai_315', 'openai_316', 'openai_317', 'openai_318', 'openai_319', 'openai_320', 'openai_321', 'openai_322', 'openai_323', 'openai_324', 'openai_325', 'openai_326', 'openai_327', 'openai_328', 'openai_329', 'openai_330', 'openai_331', 'openai_332', 'openai_333', 'openai_334', 'openai_335', 'openai_336', 'openai_337', 'openai_338', 'openai_339', 'openai_340', 'openai_341', 'openai_342', 'openai_343', 'openai_344', 'openai_345', 'openai_346', 'openai_347', 'openai_348', 'openai_349', 'openai_350', 'openai_351', 'openai_352', 'openai_353', 'openai_354', 'openai_355', 'openai_356', 'openai_357', 'openai_358', 'openai_359', 'openai_360', 'openai_361', 'openai_362', 'openai_363', 'openai_364', 'openai_365', 'openai_366', 'openai_367', 'openai_368', 'openai_369', 'openai_370', 'openai_371', 'openai_372', 'openai_373', 'openai_374', 'openai_375', 'openai_376', 'openai_377', 'openai_378', 'openai_379', 'openai_380', 'openai_381', 'openai_382', 'openai_383', 'openai_384', 'openai_385', 'openai_386', 'openai_387', 'openai_388', 'openai_389', 'openai_390', 'openai_391', 'openai_392', 'openai_393', 'openai_394', 'openai_395', 'openai_396', 'openai_397', 'openai_398', 'openai_399', 'openai_400', 'openai_401', 'openai_402', 'openai_403', 'openai_404', 'openai_405', 'openai_406', 'openai_407', 'openai_408', 'openai_409', 'openai_410', 'openai_411', 'openai_412', 'openai_413', 'openai_414', 'openai_415', 'openai_416', 'openai_417', 'openai_418', 'openai_419', 'openai_420', 'openai_421', 'openai_422', 'openai_423', 'openai_424', 'openai_425', 'openai_426', 'openai_427', 'openai_428', 'openai_429', 'openai_430', 'openai_431', 'openai_432', 'openai_433', 'openai_434', 'openai_435', 'openai_436', 'openai_437', 'openai_438', 'openai_439', 'openai_440', 'openai_441', 'openai_442', 'openai_443', 'openai_444', 'openai_445', 'openai_446', 'openai_447', 'openai_448', 'openai_449', 'openai_450', 'openai_451', 'openai_452', 'openai_453', 'openai_454', 'openai_455', 'openai_456', 'openai_457', 'openai_458', 'openai_459', 'openai_460', 'openai_461', 'openai_462', 'openai_463', 'openai_464', 'openai_465', 'openai_466', 'openai_467', 'openai_468', 'openai_469', 'openai_470', 'openai_471', 'openai_472', 'openai_473', 'openai_474', 'openai_475', 'openai_476', 'openai_477', 'openai_478', 'openai_479', 'openai_480', 'openai_481', 'openai_482', 'openai_483', 'openai_484', 'openai_485', 'openai_486', 'openai_487', 'openai_488', 'openai_489', 'openai_490', 'openai_491', 'openai_492', 'openai_493', 'openai_494', 'openai_495', 'openai_496', 'openai_497', 'openai_498', 'openai_499', 'openai_500', 'openai_501', 'openai_502', 'openai_503', 'openai_504', 'openai_505', 'openai_506', 'openai_507', 'openai_508', 'openai_509', 'openai_510', 'openai_511', 'openai_512', 'openai_513', 'openai_514', 'openai_515', 'openai_516', 'openai_517', 'openai_518', 'openai_519', 'openai_520', 'openai_521', 'openai_522', 'openai_523', 'openai_524', 'openai_525', 'openai_526', 'openai_527', 'openai_528', 'openai_529', 'openai_530', 'openai_531', 'openai_532', 'openai_533', 'openai_534', 'openai_535', 'openai_536', 'openai_537', 'openai_538', 'openai_539', 'openai_540', 'openai_541', 'openai_542', 'openai_543', 'openai_544', 'openai_545', 'openai_546', 'openai_547', 'openai_548', 'openai_549', 'openai_550', 'openai_551', 'openai_552', 'openai_553', 'openai_554', 'openai_555', 'openai_556', 'openai_557', 'openai_558', 'openai_559', 'openai_560', 'openai_561', 'openai_562', 'openai_563', 'openai_564', 'openai_565', 'openai_566', 'openai_567', 'openai_568', 'openai_569', 'openai_570', 'openai_571', 'openai_572', 'openai_573', 'openai_574', 'openai_575', 'openai_576', 'openai_577', 'openai_578', 'openai_579', 'openai_580', 'openai_581', 'openai_582', 'openai_583', 'openai_584', 'openai_585', 'openai_586', 'openai_587', 'openai_588', 'openai_589', 'openai_590', 'openai_591', 'openai_592', 'openai_593', 'openai_594', 'openai_595', 'openai_596', 'openai_597', 'openai_598', 'openai_599', 'openai_600', 'openai_601', 'openai_602', 'openai_603', 'openai_604', 'openai_605', 'openai_606', 'openai_607', 'openai_608', 'openai_609', 'openai_610', 'openai_611', 'openai_612', 'openai_613', 'openai_614', 'openai_615', 'openai_616', 'openai_617', 'openai_618', 'openai_619', 'openai_620', 'openai_621', 'openai_622', 'openai_623', 'openai_624', 'openai_625', 'openai_626', 'openai_627', 'openai_628', 'openai_629', 'openai_630', 'openai_631', 'openai_632', 'openai_633', 'openai_634', 'openai_635', 'openai_636', 'openai_637', 'openai_638', 'openai_639', 'openai_640', 'openai_641', 'openai_642', 'openai_643', 'openai_644', 'openai_645', 'openai_646', 'openai_647', 'openai_648', 'openai_649', 'openai_650', 'openai_651', 'openai_652', 'openai_653', 'openai_654', 'openai_655', 'openai_656', 'openai_657', 'openai_658', 'openai_659', 'openai_660', 'openai_661', 'openai_662', 'openai_663', 'openai_664', 'openai_665', 'openai_666', 'openai_667', 'openai_668', 'openai_669', 'openai_670', 'openai_671', 'openai_672', 'openai_673', 'openai_674', 'openai_675', 'openai_676', 'openai_677', 'openai_678', 'openai_679', 'openai_680', 'openai_681', 'openai_682', 'openai_683', 'openai_684', 'openai_685', 'openai_686', 'openai_687', 'openai_688', 'openai_689', 'openai_690', 'openai_691', 'openai_692', 'openai_693', 'openai_694', 'openai_695', 'openai_696', 'openai_697', 'openai_698', 'openai_699', 'openai_700', 'openai_701', 'openai_702', 'openai_703', 'openai_704', 'openai_705', 'openai_706', 'openai_707', 'openai_708', 'openai_709', 'openai_710', 'openai_711', 'openai_712', 'openai_713', 'openai_714', 'openai_715', 'openai_716', 'openai_717', 'openai_718', 'openai_719', 'openai_720', 'openai_721', 'openai_722', 'openai_723', 'openai_724', 'openai_725', 'openai_726', 'openai_727', 'openai_728', 'openai_729', 'openai_730', 'openai_731', 'openai_732', 'openai_733', 'openai_734', 'openai_735', 'openai_736', 'openai_737', 'openai_738', 'openai_739', 'openai_740', 'openai_741', 'openai_742', 'openai_743', 'openai_744', 'openai_745', 'openai_746', 'openai_747', 'openai_748', 'openai_749', 'openai_750', 'openai_751', 'openai_752', 'openai_753', 'openai_754', 'openai_755', 'openai_756', 'openai_757', 'openai_758', 'openai_759', 'openai_760', 'openai_761', 'openai_762', 'openai_763', 'openai_764', 'openai_765', 'openai_766', 'openai_767', 'openai_768', 'openai_769', 'openai_770', 'openai_771', 'openai_772', 'openai_773', 'openai_774', 'openai_775', 'openai_776', 'openai_777', 'openai_778', 'openai_779', 'openai_780', 'openai_781', 'openai_782', 'openai_783', 'openai_784', 'openai_785', 'openai_786', 'openai_787', 'openai_788', 'openai_789', 'openai_790', 'openai_791', 'openai_792', 'openai_793', 'openai_794', 'openai_795', 'openai_796', 'openai_797', 'openai_798', 'openai_799', 'openai_800', 'openai_801', 'openai_802', 'openai_803', 'openai_804', 'openai_805', 'openai_806', 'openai_807', 'openai_808', 'openai_809', 'openai_810', 'openai_811', 'openai_812', 'openai_813', 'openai_814', 'openai_815', 'openai_816', 'openai_817', 'openai_818', 'openai_819', 'openai_820', 'openai_821', 'openai_822', 'openai_823', 'openai_824', 'openai_825', 'openai_826', 'openai_827', 'openai_828', 'openai_829', 'openai_830', 'openai_831', 'openai_832', 'openai_833', 'openai_834', 'openai_835', 'openai_836', 'openai_837', 'openai_838', 'openai_839', 'openai_840', 'openai_841', 'openai_842', 'openai_843', 'openai_844', 'openai_845', 'openai_846', 'openai_847', 'openai_848', 'openai_849', 'openai_850', 'openai_851', 'openai_852', 'openai_853', 'openai_854', 'openai_855', 'openai_856', 'openai_857', 'openai_858', 'openai_859', 'openai_860', 'openai_861', 'openai_862', 'openai_863', 'openai_864', 'openai_865', 'openai_866', 'openai_867', 'openai_868', 'openai_869', 'openai_870', 'openai_871', 'openai_872', 'openai_873', 'openai_874', 'openai_875', 'openai_876', 'openai_877', 'openai_878', 'openai_879', 'openai_880', 'openai_881', 'openai_882', 'openai_883', 'openai_884', 'openai_885', 'openai_886', 'openai_887', 'openai_888', 'openai_889', 'openai_890', 'openai_891', 'openai_892', 'openai_893', 'openai_894', 'openai_895', 'openai_896', 'openai_897', 'openai_898', 'openai_899', 'openai_900', 'openai_901', 'openai_902', 'openai_903', 'openai_904', 'openai_905', 'openai_906', 'openai_907', 'openai_908', 'openai_909', 'openai_910', 'openai_911', 'openai_912', 'openai_913', 'openai_914', 'openai_915', 'openai_916', 'openai_917', 'openai_918', 'openai_919', 'openai_920', 'openai_921', 'openai_922', 'openai_923', 'openai_924', 'openai_925', 'openai_926', 'openai_927', 'openai_928', 'openai_929', 'openai_930', 'openai_931', 'openai_932', 'openai_933', 'openai_934', 'openai_935', 'openai_936', 'openai_937', 'openai_938', 'openai_939', 'openai_940', 'openai_941', 'openai_942', 'openai_943', 'openai_944', 'openai_945', 'openai_946', 'openai_947', 'openai_948', 'openai_949', 'openai_950', 'openai_951', 'openai_952', 'openai_953', 'openai_954', 'openai_955', 'openai_956', 'openai_957', 'openai_958', 'openai_959', 'openai_960', 'openai_961', 'openai_962', 'openai_963', 'openai_964', 'openai_965', 'openai_966', 'openai_967', 'openai_968', 'openai_969', 'openai_970', 'openai_971', 'openai_972', 'openai_973', 'openai_974', 'openai_975', 'openai_976', 'openai_977', 'openai_978', 'openai_979', 'openai_980', 'openai_981', 'openai_982', 'openai_983', 'openai_984', 'openai_985', 'openai_986', 'openai_987', 'openai_988', 'openai_989', 'openai_990', 'openai_991', 'openai_992', 'openai_993', 'openai_994', 'openai_995', 'openai_996', 'openai_997', 'openai_998', 'openai_999', 'openai_1000', 'openai_1001', 'openai_1002', 'openai_1003', 'openai_1004', 'openai_1005', 'openai_1006', 'openai_1007', 'openai_1008', 'openai_1009', 'openai_1010', 'openai_1011', 'openai_1012', 'openai_1013', 'openai_1014', 'openai_1015', 'openai_1016', 'openai_1017', 'openai_1018', 'openai_1019', 'openai_1020', 'openai_1021', 'openai_1022', 'openai_1023', 'openai_1024', 'openai_1025', 'openai_1026', 'openai_1027', 'openai_1028', 'openai_1029', 'openai_1030', 'openai_1031', 'openai_1032', 'openai_1033', 'openai_1034', 'openai_1035', 'openai_1036', 'openai_1037', 'openai_1038', 'openai_1039', 'openai_1040', 'openai_1041', 'openai_1042', 'openai_1043', 'openai_1044', 'openai_1045', 'openai_1046', 'openai_1047', 'openai_1048', 'openai_1049', 'openai_1050', 'openai_1051', 'openai_1052', 'openai_1053', 'openai_1054', 'openai_1055', 'openai_1056', 'openai_1057', 'openai_1058', 'openai_1059', 'openai_1060', 'openai_1061', 'openai_1062', 'openai_1063', 'openai_1064', 'openai_1065', 'openai_1066', 'openai_1067', 'openai_1068', 'openai_1069', 'openai_1070', 'openai_1071', 'openai_1072', 'openai_1073', 'openai_1074', 'openai_1075', 'openai_1076', 'openai_1077', 'openai_1078', 'openai_1079', 'openai_1080', 'openai_1081', 'openai_1082', 'openai_1083', 'openai_1084', 'openai_1085', 'openai_1086', 'openai_1087', 'openai_1088', 'openai_1089', 'openai_1090', 'openai_1091', 'openai_1092', 'openai_1093', 'openai_1094', 'openai_1095', 'openai_1096', 'openai_1097', 'openai_1098', 'openai_1099', 'openai_1100', 'openai_1101', 'openai_1102', 'openai_1103', 'openai_1104', 'openai_1105', 'openai_1106', 'openai_1107', 'openai_1108', 'openai_1109', 'openai_1110', 'openai_1111', 'openai_1112', 'openai_1113', 'openai_1114', 'openai_1115', 'openai_1116', 'openai_1117', 'openai_1118', 'openai_1119', 'openai_1120', 'openai_1121', 'openai_1122', 'openai_1123', 'openai_1124', 'openai_1125', 'openai_1126', 'openai_1127', 'openai_1128', 'openai_1129', 'openai_1130', 'openai_1131', 'openai_1132', 'openai_1133', 'openai_1134', 'openai_1135', 'openai_1136', 'openai_1137', 'openai_1138', 'openai_1139', 'openai_1140', 'openai_1141', 'openai_1142', 'openai_1143', 'openai_1144', 'openai_1145', 'openai_1146', 'openai_1147', 'openai_1148', 'openai_1149', 'openai_1150', 'openai_1151', 'openai_1152', 'openai_1153', 'openai_1154', 'openai_1155', 'openai_1156', 'openai_1157', 'openai_1158', 'openai_1159', 'openai_1160', 'openai_1161', 'openai_1162', 'openai_1163', 'openai_1164', 'openai_1165', 'openai_1166', 'openai_1167', 'openai_1168', 'openai_1169', 'openai_1170', 'openai_1171', 'openai_1172', 'openai_1173', 'openai_1174', 'openai_1175', 'openai_1176', 'openai_1177', 'openai_1178', 'openai_1179', 'openai_1180', 'openai_1181', 'openai_1182', 'openai_1183', 'openai_1184', 'openai_1185', 'openai_1186', 'openai_1187', 'openai_1188', 'openai_1189', 'openai_1190', 'openai_1191', 'openai_1192', 'openai_1193', 'openai_1194', 'openai_1195', 'openai_1196', 'openai_1197', 'openai_1198', 'openai_1199', 'openai_1200', 'openai_1201', 'openai_1202', 'openai_1203', 'openai_1204', 'openai_1205', 'openai_1206', 'openai_1207', 'openai_1208', 'openai_1209', 'openai_1210', 'openai_1211', 'openai_1212', 'openai_1213', 'openai_1214', 'openai_1215', 'openai_1216', 'openai_1217', 'openai_1218', 'openai_1219', 'openai_1220', 'openai_1221', 'openai_1222', 'openai_1223', 'openai_1224', 'openai_1225', 'openai_1226', 'openai_1227', 'openai_1228', 'openai_1229', 'openai_1230', 'openai_1231', 'openai_1232', 'openai_1233', 'openai_1234', 'openai_1235', 'openai_1236', 'openai_1237', 'openai_1238', 'openai_1239', 'openai_1240', 'openai_1241', 'openai_1242', 'openai_1243', 'openai_1244', 'openai_1245', 'openai_1246', 'openai_1247', 'openai_1248', 'openai_1249', 'openai_1250', 'openai_1251', 'openai_1252', 'openai_1253', 'openai_1254', 'openai_1255', 'openai_1256', 'openai_1257', 'openai_1258', 'openai_1259', 'openai_1260', 'openai_1261', 'openai_1262', 'openai_1263', 'openai_1264', 'openai_1265', 'openai_1266', 'openai_1267', 'openai_1268', 'openai_1269', 'openai_1270', 'openai_1271', 'openai_1272', 'openai_1273', 'openai_1274', 'openai_1275', 'openai_1276', 'openai_1277', 'openai_1278', 'openai_1279', 'openai_1280', 'openai_1281', 'openai_1282', 'openai_1283', 'openai_1284', 'openai_1285', 'openai_1286', 'openai_1287', 'openai_1288', 'openai_1289', 'openai_1290', 'openai_1291', 'openai_1292', 'openai_1293', 'openai_1294', 'openai_1295', 'openai_1296', 'openai_1297', 'openai_1298', 'openai_1299', 'openai_1300', 'openai_1301', 'openai_1302', 'openai_1303', 'openai_1304', 'openai_1305', 'openai_1306', 'openai_1307', 'openai_1308', 'openai_1309', 'openai_1310', 'openai_1311', 'openai_1312', 'openai_1313', 'openai_1314', 'openai_1315', 'openai_1316', 'openai_1317', 'openai_1318', 'openai_1319', 'openai_1320', 'openai_1321', 'openai_1322', 'openai_1323', 'openai_1324', 'openai_1325', 'openai_1326', 'openai_1327', 'openai_1328', 'openai_1329', 'openai_1330', 'openai_1331', 'openai_1332', 'openai_1333', 'openai_1334', 'openai_1335', 'openai_1336', 'openai_1337', 'openai_1338', 'openai_1339', 'openai_1340', 'openai_1341', 'openai_1342', 'openai_1343', 'openai_1344', 'openai_1345', 'openai_1346', 'openai_1347', 'openai_1348', 'openai_1349', 'openai_1350', 'openai_1351', 'openai_1352', 'openai_1353', 'openai_1354', 'openai_1355', 'openai_1356', 'openai_1357', 'openai_1358', 'openai_1359', 'openai_1360', 'openai_1361', 'openai_1362', 'openai_1363', 'openai_1364', 'openai_1365', 'openai_1366', 'openai_1367', 'openai_1368', 'openai_1369', 'openai_1370', 'openai_1371', 'openai_1372', 'openai_1373', 'openai_1374', 'openai_1375', 'openai_1376', 'openai_1377', 'openai_1378', 'openai_1379', 'openai_1380', 'openai_1381', 'openai_1382', 'openai_1383', 'openai_1384', 'openai_1385', 'openai_1386', 'openai_1387', 'openai_1388', 'openai_1389', 'openai_1390', 'openai_1391', 'openai_1392', 'openai_1393', 'openai_1394', 'openai_1395', 'openai_1396', 'openai_1397', 'openai_1398', 'openai_1399', 'openai_1400', 'openai_1401', 'openai_1402', 'openai_1403', 'openai_1404', 'openai_1405', 'openai_1406', 'openai_1407', 'openai_1408', 'openai_1409', 'openai_1410', 'openai_1411', 'openai_1412', 'openai_1413', 'openai_1414', 'openai_1415', 'openai_1416', 'openai_1417', 'openai_1418', 'openai_1419', 'openai_1420', 'openai_1421', 'openai_1422', 'openai_1423', 'openai_1424', 'openai_1425', 'openai_1426', 'openai_1427', 'openai_1428', 'openai_1429', 'openai_1430', 'openai_1431', 'openai_1432', 'openai_1433', 'openai_1434', 'openai_1435', 'openai_1436', 'openai_1437', 'openai_1438', 'openai_1439', 'openai_1440', 'openai_1441', 'openai_1442', 'openai_1443', 'openai_1444', 'openai_1445', 'openai_1446', 'openai_1447', 'openai_1448', 'openai_1449', 'openai_1450', 'openai_1451', 'openai_1452', 'openai_1453', 'openai_1454', 'openai_1455', 'openai_1456', 'openai_1457', 'openai_1458', 'openai_1459', 'openai_1460', 'openai_1461', 'openai_1462', 'openai_1463', 'openai_1464', 'openai_1465', 'openai_1466', 'openai_1467', 'openai_1468', 'openai_1469', 'openai_1470', 'openai_1471', 'openai_1472', 'openai_1473', 'openai_1474', 'openai_1475', 'openai_1476', 'openai_1477', 'openai_1478', 'openai_1479', 'openai_1480', 'openai_1481', 'openai_1482', 'openai_1483', 'openai_1484', 'openai_1485', 'openai_1486', 'openai_1487', 'openai_1488', 'openai_1489', 'openai_1490', 'openai_1491', 'openai_1492', 'openai_1493', 'openai_1494', 'openai_1495', 'openai_1496', 'openai_1497', 'openai_1498', 'openai_1499', 'openai_1500', 'openai_1501', 'openai_1502', 'openai_1503', 'openai_1504', 'openai_1505', 'openai_1506', 'openai_1507', 'openai_1508', 'openai_1509', 'openai_1510', 'openai_1511', 'openai_1512', 'openai_1513', 'openai_1514', 'openai_1515', 'openai_1516', 'openai_1517', 'openai_1518', 'openai_1519', 'openai_1520', 'openai_1521', 'openai_1522', 'openai_1523', 'openai_1524', 'openai_1525', 'openai_1526', 'openai_1527', 'openai_1528', 'openai_1529', 'openai_1530', 'openai_1531', 'openai_1532', 'openai_1533', 'openai_1534', 'openai_1535', 'is_factual', 'opinion_label', 'bot_label', 'fourway_label', 'is_toxic', 'toxic_label', 'is_bom_overall', 'is_bom_astroturf', 'bom_overall_label', 'bom_astroturf_label', 'bom_overall_fourway_label', 'bom_astroturf_fourway_label']\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "              user_id  created_on  status_count  rt_count  is_bot  \\\n",
+              "0          3420436216  2015-08-13           555       540    True   \n",
+              "1           108121958  2010-01-24             2         2   False   \n",
+              "2          3038308638  2015-02-23           755       665    True   \n",
+              "3           332396536  2011-07-09           951       951    True   \n",
+              "4  955082522479808512  2018-01-21           570       533    True   \n",
+              "\n",
+              "   opinion_community   is_q  avg_toxicity  avg_fact_score  bom_astroturf  ...  \\\n",
+              "0                  0  False      0.056113        1.983193          0.295  ...   \n",
+              "1                  0  False      0.456710             NaN          0.580  ...   \n",
+              "2                  0  False      0.069860        3.401786          0.970  ...   \n",
+              "3                  1  False      0.044264        2.304511          0.580  ...   \n",
+              "4                  0  False      0.049325        4.714286          0.355  ...   \n",
+              "\n",
+              "   bot_label     fourway_label  is_toxic  toxic_label  is_bom_overall  \\\n",
+              "0        Bot    Anti-Trump Bot         0       Normal             0.0   \n",
+              "1      Human  Anti-Trump Human         1        Toxic             0.0   \n",
+              "2        Bot    Anti-Trump Bot         0       Normal             1.0   \n",
+              "3        Bot     Pro-Trump Bot         0       Normal             1.0   \n",
+              "4        Bot    Anti-Trump Bot         0       Normal             0.0   \n",
+              "\n",
+              "   is_bom_astroturf  bom_overall_label  bom_astroturf_label  \\\n",
+              "0               0.0              Human                Human   \n",
+              "1               1.0              Human                  Bot   \n",
+              "2               1.0                Bot                  Bot   \n",
+              "3               1.0                Bot                  Bot   \n",
+              "4               0.0              Human                Human   \n",
+              "\n",
+              "   bom_overall_fourway_label  bom_astroturf_fourway_label  \n",
+              "0           Anti-Trump Human             Anti-Trump Human  \n",
+              "1           Anti-Trump Human               Anti-Trump Bot  \n",
+              "2             Anti-Trump Bot               Anti-Trump Bot  \n",
+              "3              Pro-Trump Bot                Pro-Trump Bot  \n",
+              "4           Anti-Trump Human             Anti-Trump Human  \n",
+              "\n",
+              "[5 rows x 1559 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-dcdd4449-f3e0-4259-8165-098394f6d3ea\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>user_id</th>\n",
+              "      <th>created_on</th>\n",
+              "      <th>status_count</th>\n",
+              "      <th>rt_count</th>\n",
+              "      <th>is_bot</th>\n",
+              "      <th>opinion_community</th>\n",
+              "      <th>is_q</th>\n",
+              "      <th>avg_toxicity</th>\n",
+              "      <th>avg_fact_score</th>\n",
+              "      <th>bom_astroturf</th>\n",
+              "      <th>...</th>\n",
+              "      <th>bot_label</th>\n",
+              "      <th>fourway_label</th>\n",
+              "      <th>is_toxic</th>\n",
+              "      <th>toxic_label</th>\n",
+              "      <th>is_bom_overall</th>\n",
+              "      <th>is_bom_astroturf</th>\n",
+              "      <th>bom_overall_label</th>\n",
+              "      <th>bom_astroturf_label</th>\n",
+              "      <th>bom_overall_fourway_label</th>\n",
+              "      <th>bom_astroturf_fourway_label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>3420436216</td>\n",
+              "      <td>2015-08-13</td>\n",
+              "      <td>555</td>\n",
+              "      <td>540</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.056113</td>\n",
+              "      <td>1.983193</td>\n",
+              "      <td>0.295</td>\n",
+              "      <td>...</td>\n",
+              "      <td>Bot</td>\n",
+              "      <td>Anti-Trump Bot</td>\n",
+              "      <td>0</td>\n",
+              "      <td>Normal</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>Human</td>\n",
+              "      <td>Human</td>\n",
+              "      <td>Anti-Trump Human</td>\n",
+              "      <td>Anti-Trump Human</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>108121958</td>\n",
+              "      <td>2010-01-24</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.456710</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>0.580</td>\n",
+              "      <td>...</td>\n",
+              "      <td>Human</td>\n",
+              "      <td>Anti-Trump Human</td>\n",
+              "      <td>1</td>\n",
+              "      <td>Toxic</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>Human</td>\n",
+              "      <td>Bot</td>\n",
+              "      <td>Anti-Trump Human</td>\n",
+              "      <td>Anti-Trump Bot</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3038308638</td>\n",
+              "      <td>2015-02-23</td>\n",
+              "      <td>755</td>\n",
+              "      <td>665</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.069860</td>\n",
+              "      <td>3.401786</td>\n",
+              "      <td>0.970</td>\n",
+              "      <td>...</td>\n",
+              "      <td>Bot</td>\n",
+              "      <td>Anti-Trump Bot</td>\n",
+              "      <td>0</td>\n",
+              "      <td>Normal</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>Bot</td>\n",
+              "      <td>Bot</td>\n",
+              "      <td>Anti-Trump Bot</td>\n",
+              "      <td>Anti-Trump Bot</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>332396536</td>\n",
+              "      <td>2011-07-09</td>\n",
+              "      <td>951</td>\n",
+              "      <td>951</td>\n",
+              "      <td>True</td>\n",
+              "      <td>1</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.044264</td>\n",
+              "      <td>2.304511</td>\n",
+              "      <td>0.580</td>\n",
+              "      <td>...</td>\n",
+              "      <td>Bot</td>\n",
+              "      <td>Pro-Trump Bot</td>\n",
+              "      <td>0</td>\n",
+              "      <td>Normal</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>Bot</td>\n",
+              "      <td>Bot</td>\n",
+              "      <td>Pro-Trump Bot</td>\n",
+              "      <td>Pro-Trump Bot</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>955082522479808512</td>\n",
+              "      <td>2018-01-21</td>\n",
+              "      <td>570</td>\n",
+              "      <td>533</td>\n",
+              "      <td>True</td>\n",
+              "      <td>0</td>\n",
+              "      <td>False</td>\n",
+              "      <td>0.049325</td>\n",
+              "      <td>4.714286</td>\n",
+              "      <td>0.355</td>\n",
+              "      <td>...</td>\n",
+              "      <td>Bot</td>\n",
+              "      <td>Anti-Trump Bot</td>\n",
+              "      <td>0</td>\n",
+              "      <td>Normal</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>Human</td>\n",
+              "      <td>Human</td>\n",
+              "      <td>Anti-Trump Human</td>\n",
+              "      <td>Anti-Trump Human</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1559 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-dcdd4449-f3e0-4259-8165-098394f6d3ea')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-dcdd4449-f3e0-4259-8165-098394f6d3ea button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-dcdd4449-f3e0-4259-8165-098394f6d3ea');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-1dfdf258-3b44-4832-a697-fcdee44f0a49\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-1dfdf258-3b44-4832-a697-fcdee44f0a49')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-1dfdf258-3b44-4832-a697-fcdee44f0a49 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 29
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df[\"is_factual\"].value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "CU_qpBVcjFD4",
+        "outputId": "b7b3e9d9-73e3-4c5c-a775-2bf87ee4bd09"
+      },
+      "execution_count": 24,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "False    1696\n",
+              "True     1596\n",
+              "Name: is_factual, dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 24
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df[\"is_toxic\"].value_counts()\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "pbaa5rTJh5NY",
+        "outputId": "6d98e7a3-a734-44ef-8d48-eca658d42c95"
+      },
+      "execution_count": 25,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0    6132\n",
+              "1    1434\n",
+              "Name: is_toxic, dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 25
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df[\"bot_label\"].value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "T31nFSuniKdY",
+        "outputId": "0513619c-f8ac-402c-bef8-7880506b33dc"
+      },
+      "execution_count": 26,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Human    4466\n",
+              "Bot      3100\n",
+              "Name: bot_label, dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 26
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df[\"opinion_label\"].value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4QX5FgjMk3E0",
+        "outputId": "c68929d4-2e82-4037-9b14-813efda2b105"
+      },
+      "execution_count": 27,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Anti-Trump    4891\n",
+              "Pro-Trump     2675\n",
+              "Name: opinion_label, dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 27
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "users_df[\"fourway_label\"].value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wKHKOfGplAv8",
+        "outputId": "7394ad50-5a86-4e98-cf4f-fb6cf42839e8"
+      },
+      "execution_count": 30,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Anti-Trump Human    3010\n",
+              "Anti-Trump Bot      1881\n",
+              "Pro-Trump Human     1456\n",
+              "Pro-Trump Bot       1219\n",
+              "Name: fourway_label, dtype: int64"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 30
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4t48ewACjXQy"
+      },
+      "source": [
+        "## Tweet Embeddings"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "183K statuses:"
+      ],
+      "metadata": {
+        "id": "5sJsvSTWCVVX"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from pandas import read_parquet\n",
+        "\n",
+        "pq_filepath = os.path.join(DATA_DIRPATH, \"botometer_sample_max_50_openai_status_embeddings_v3_unpacked.parquet.gzip\")\n",
+        "statuses_df = read_parquet(pq_filepath)\n",
+        "print(statuses_df.shape)\n",
+        "print(statuses_df.columns)\n",
+        "statuses_df.head()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 728
+        },
+        "id": "wy-OIPg_eYX-",
+        "outputId": "dd50b10f-a81b-4217-d794-bb0bd9f14c53"
+      },
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "(183815, 1541)\n",
+            "Index(['user_id', 'status_id', 'status_text', 'created_at', 'embeds_length',\n",
+            "       'openai_0', 'openai_1', 'openai_2', 'openai_3', 'openai_4',\n",
+            "       ...\n",
+            "       'openai_1526', 'openai_1527', 'openai_1528', 'openai_1529',\n",
+            "       'openai_1530', 'openai_1531', 'openai_1532', 'openai_1533',\n",
+            "       'openai_1534', 'openai_1535'],\n",
+            "      dtype='object', length=1541)\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "              user_id            status_id  \\\n",
+              "0  897845802701377536  1221540755451392001   \n",
+              "1  935739601301458947  1223458629837295619   \n",
+              "2           571774622  1217445781663363072   \n",
+              "3           384679808  1223705594818748416   \n",
+              "4  701264221653217281  1218459840277729281   \n",
+              "\n",
+              "                                         status_text  \\\n",
+              "0  Doubt it..It appears they all have gone the wa...   \n",
+              "1  RT @Wyn1745: Democrats are ‘setting the stage’...   \n",
+              "2  RT @sarahdwire: I’m loathe to insert myself in...   \n",
+              "3  RT @RepRatcliffe: We warned them...As Schiff a...   \n",
+              "4  RT @chipfranklin: Because \"impeachment\" in the...   \n",
+              "\n",
+              "                 created_at  embeds_length  openai_0  openai_1  openai_2  \\\n",
+              "0 2020-01-26 21:09:45+00:00           1536 -0.020428 -0.006720  0.007308   \n",
+              "1 2020-02-01 04:10:42+00:00           1536 -0.036689 -0.007481  0.007968   \n",
+              "2 2020-01-15 13:57:48+00:00           1536 -0.033382 -0.006886 -0.003244   \n",
+              "3 2020-02-01 20:32:03+00:00           1536 -0.008477 -0.007364  0.000919   \n",
+              "4 2020-01-18 09:07:18+00:00           1536 -0.009454  0.017376  0.007016   \n",
+              "\n",
+              "   openai_3  openai_4  ...  openai_1526  openai_1527  openai_1528  \\\n",
+              "0 -0.022157 -0.041841  ...     0.014616     0.004705     0.012661   \n",
+              "1 -0.006632 -0.022805  ...    -0.001696     0.002522     0.020397   \n",
+              "2 -0.015834  0.000172  ...     0.001027     0.002464     0.002013   \n",
+              "3 -0.006435  0.008101  ...    -0.028269     0.003193     0.015056   \n",
+              "4 -0.020075 -0.023674  ...    -0.013590     0.015564     0.005130   \n",
+              "\n",
+              "   openai_1529  openai_1530  openai_1531  openai_1532  openai_1533  \\\n",
+              "0    -0.020974    -0.003458     0.045166     0.029871    -0.021186   \n",
+              "1    -0.046374    -0.046611     0.021068    -0.000085    -0.003701   \n",
+              "2    -0.032766    -0.034265     0.006545     0.014804     0.003027   \n",
+              "3    -0.015333    -0.028137     0.032510     0.010327    -0.013621   \n",
+              "4     0.003077    -0.029167     0.015523     0.017914    -0.008789   \n",
+              "\n",
+              "   openai_1534  openai_1535  \n",
+              "0    -0.003376    -0.024937  \n",
+              "1    -0.015370    -0.019213  \n",
+              "2    -0.001518    -0.030946  \n",
+              "3    -0.007686    -0.016216  \n",
+              "4    -0.019767    -0.042353  \n",
+              "\n",
+              "[5 rows x 1541 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-47684efd-ee96-4ace-b2ab-c2b6866be722\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>user_id</th>\n",
+              "      <th>status_id</th>\n",
+              "      <th>status_text</th>\n",
+              "      <th>created_at</th>\n",
+              "      <th>embeds_length</th>\n",
+              "      <th>openai_0</th>\n",
+              "      <th>openai_1</th>\n",
+              "      <th>openai_2</th>\n",
+              "      <th>openai_3</th>\n",
+              "      <th>openai_4</th>\n",
+              "      <th>...</th>\n",
+              "      <th>openai_1526</th>\n",
+              "      <th>openai_1527</th>\n",
+              "      <th>openai_1528</th>\n",
+              "      <th>openai_1529</th>\n",
+              "      <th>openai_1530</th>\n",
+              "      <th>openai_1531</th>\n",
+              "      <th>openai_1532</th>\n",
+              "      <th>openai_1533</th>\n",
+              "      <th>openai_1534</th>\n",
+              "      <th>openai_1535</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>897845802701377536</td>\n",
+              "      <td>1221540755451392001</td>\n",
+              "      <td>Doubt it..It appears they all have gone the wa...</td>\n",
+              "      <td>2020-01-26 21:09:45+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.020428</td>\n",
+              "      <td>-0.006720</td>\n",
+              "      <td>0.007308</td>\n",
+              "      <td>-0.022157</td>\n",
+              "      <td>-0.041841</td>\n",
+              "      <td>...</td>\n",
+              "      <td>0.014616</td>\n",
+              "      <td>0.004705</td>\n",
+              "      <td>0.012661</td>\n",
+              "      <td>-0.020974</td>\n",
+              "      <td>-0.003458</td>\n",
+              "      <td>0.045166</td>\n",
+              "      <td>0.029871</td>\n",
+              "      <td>-0.021186</td>\n",
+              "      <td>-0.003376</td>\n",
+              "      <td>-0.024937</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>935739601301458947</td>\n",
+              "      <td>1223458629837295619</td>\n",
+              "      <td>RT @Wyn1745: Democrats are ‘setting the stage’...</td>\n",
+              "      <td>2020-02-01 04:10:42+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.036689</td>\n",
+              "      <td>-0.007481</td>\n",
+              "      <td>0.007968</td>\n",
+              "      <td>-0.006632</td>\n",
+              "      <td>-0.022805</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.001696</td>\n",
+              "      <td>0.002522</td>\n",
+              "      <td>0.020397</td>\n",
+              "      <td>-0.046374</td>\n",
+              "      <td>-0.046611</td>\n",
+              "      <td>0.021068</td>\n",
+              "      <td>-0.000085</td>\n",
+              "      <td>-0.003701</td>\n",
+              "      <td>-0.015370</td>\n",
+              "      <td>-0.019213</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>571774622</td>\n",
+              "      <td>1217445781663363072</td>\n",
+              "      <td>RT @sarahdwire: I’m loathe to insert myself in...</td>\n",
+              "      <td>2020-01-15 13:57:48+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.033382</td>\n",
+              "      <td>-0.006886</td>\n",
+              "      <td>-0.003244</td>\n",
+              "      <td>-0.015834</td>\n",
+              "      <td>0.000172</td>\n",
+              "      <td>...</td>\n",
+              "      <td>0.001027</td>\n",
+              "      <td>0.002464</td>\n",
+              "      <td>0.002013</td>\n",
+              "      <td>-0.032766</td>\n",
+              "      <td>-0.034265</td>\n",
+              "      <td>0.006545</td>\n",
+              "      <td>0.014804</td>\n",
+              "      <td>0.003027</td>\n",
+              "      <td>-0.001518</td>\n",
+              "      <td>-0.030946</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>384679808</td>\n",
+              "      <td>1223705594818748416</td>\n",
+              "      <td>RT @RepRatcliffe: We warned them...As Schiff a...</td>\n",
+              "      <td>2020-02-01 20:32:03+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.008477</td>\n",
+              "      <td>-0.007364</td>\n",
+              "      <td>0.000919</td>\n",
+              "      <td>-0.006435</td>\n",
+              "      <td>0.008101</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.028269</td>\n",
+              "      <td>0.003193</td>\n",
+              "      <td>0.015056</td>\n",
+              "      <td>-0.015333</td>\n",
+              "      <td>-0.028137</td>\n",
+              "      <td>0.032510</td>\n",
+              "      <td>0.010327</td>\n",
+              "      <td>-0.013621</td>\n",
+              "      <td>-0.007686</td>\n",
+              "      <td>-0.016216</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>701264221653217281</td>\n",
+              "      <td>1218459840277729281</td>\n",
+              "      <td>RT @chipfranklin: Because \"impeachment\" in the...</td>\n",
+              "      <td>2020-01-18 09:07:18+00:00</td>\n",
+              "      <td>1536</td>\n",
+              "      <td>-0.009454</td>\n",
+              "      <td>0.017376</td>\n",
+              "      <td>0.007016</td>\n",
+              "      <td>-0.020075</td>\n",
+              "      <td>-0.023674</td>\n",
+              "      <td>...</td>\n",
+              "      <td>-0.013590</td>\n",
+              "      <td>0.015564</td>\n",
+              "      <td>0.005130</td>\n",
+              "      <td>0.003077</td>\n",
+              "      <td>-0.029167</td>\n",
+              "      <td>0.015523</td>\n",
+              "      <td>0.017914</td>\n",
+              "      <td>-0.008789</td>\n",
+              "      <td>-0.019767</td>\n",
+              "      <td>-0.042353</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1541 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-47684efd-ee96-4ace-b2ab-c2b6866be722')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-47684efd-ee96-4ace-b2ab-c2b6866be722 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-47684efd-ee96-4ace-b2ab-c2b6866be722');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-3151b580-2272-4ab3-ad94-d8e966c7ba62\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-3151b580-2272-4ab3-ad94-d8e966c7ba62')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-3151b580-2272-4ab3-ad94-d8e966c7ba62 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "statuses_df[\"user_id\"].nunique()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "NGVktpyCkgJM",
+        "outputId": "8ec77e42-6b02-4c89-adb7-dfed2a6ded67"
+      },
+      "execution_count": 70,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "7566"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 70
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "TlotZx1R-fMZ"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/notebooks/openai_embeddings_v2/README.md b/notebooks/openai_embeddings_v2/README.md
new file mode 100644
index 0000000..f3877c9
--- /dev/null
+++ b/notebooks/openai_embeddings_v2/README.md
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+# OpenAI Embeddings (v2)
+
+This supercedes earlier approach to fetch embeddings. In this second attempt we are grabbing user-level as well as tweet-level embeddings, to compare the difference in these approaches.
+
+The "Exporting Embeddings" notebook takes embeddings stored in BigQuery (see app/openai_embeddings_v2/README.md), and exports them to CSV / parquet files on Google Drive for easier and cheaper access
+
+The "Analysis Template" notebook provides an example of how to load the files from drive for further analysis.
diff --git a/notebooks/openai_embeddings_v2/exporting_embeddings_to_drive_20240201_v3.py b/notebooks/openai_embeddings_v2/exporting_embeddings_to_drive_20240201_v3.py
new file mode 100644
index 0000000..2f3adcd
--- /dev/null
+++ b/notebooks/openai_embeddings_v2/exporting_embeddings_to_drive_20240201_v3.py
@@ -0,0 +1,314 @@
+# -*- coding: utf-8 -*-
+"""Exporting Embeddings to Drive - 20240201 - v3
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1tFWFj1yUgGxS-8WveeSnpEdrsgiG4-jh
+
+We fetched OpenAI embeddings and stored on BQ. Let's download a CSV file to drive for further analysis.
+
+## Setup
+
+### Google Drive
+"""
+
+import os
+from google.colab import drive
+
+drive.mount('/content/drive')
+print(os.getcwd(), os.listdir(os.getcwd()))
+
+
+
+# you might need to create a google drive SHORTCUT that has this same path
+# ... or update the path to use your own google drive organization
+#DIRPATH = '/content/drive/MyDrive/Research/Disinfo Research Shared 2022'
+#DIRPATH = '/content/drive/MyDrive/Research/DS Research Shared 2023'
+DIRPATH = '/content/drive/MyDrive/Research/DS Research Shared 2024'
+
+print(DIRPATH)
+os.path.isdir(DIRPATH)
+
+DATA_DIRPATH = os.path.join(DIRPATH, "projects", "Impeachment 2020 Embeddings", "data")
+os.path.isdir(DATA_DIRPATH)
+
+"""### BigQuery Service"""
+
+from google.colab import auth
+
+# asks you to login
+auth.authenticate_user()
+
+from google.cloud import bigquery
+from pandas import DataFrame, read_gbq
+
+
+PROJECT_ID = "tweet-collector-py"
+
+class BigQueryService():
+    def __init__(self, project_id=PROJECT_ID):
+        self.project_id = project_id
+        self.client = bigquery.Client(project=self.project_id)
+
+    def execute_query(self, sql, verbose=True):
+        if verbose == True:
+            print(sql)
+        job = self.client.query(sql)
+        return job.result()
+
+    #def query_to_df(self, sql, verbose=True):
+    #    """high-level wrapper to return a DataFrame"""
+    #    results = self.execute_query(sql, verbose=verbose)
+    #    return DataFrame([dict(row) for row in results])
+
+    def query_to_df(self, sql, verbose=True):
+        """high-level wrapper to return a DataFrame"""
+        if verbose == True:
+            print(sql)
+        # https://pandas.pydata.org/docs/reference/api/pandas.read_gbq.html#pandas-read-gbq
+        #return read_gbq(sql, project_id=self.project_id) # progress_bar_type="tqdm_notebook"
+        #progress_bar_type="tqdm_notebook"
+        return read_gbq(sql, project_id=self.project_id, progress_bar_type="tqdm_notebook")
+
+bq = BigQueryService()
+print(bq)
+
+print("DATASETS:")
+datasets = list(bq.client.list_datasets())
+for ds in datasets:
+    #print("...", ds.project, ds.dataset_id)
+    print("...", ds.reference)
+
+"""## Helper Functions
+
+### Unpacking Embeddings
+"""
+
+import json
+from pandas import DataFrame
+
+
+def unpack(embeddings_str):
+    """Takes a string value containing an array of OpenAI embeddings,
+        and returns a list of floats.
+    """
+    if isinstance(embeddings_str, str):
+        return json.loads(embeddings_str)
+    else:
+        return embeddings_str
+
+
+def unpacked(df, col_prefix="openai"):
+    """Takes a dataframe witha single column of OpenAI embeddings,
+        and unpacks them into their own separate columns,
+        and returns a modified version of the original dataframe,
+        with the original embeddings column replaced by the new unpacked columns
+    """
+
+    print("UNPACKING...")
+    embeds = df["embeddings"].apply(unpack)
+    print(type(embeds))
+
+    print("RECONSTRUCTING...")
+    embeds = DataFrame(embeds.values.tolist())
+    embeds.columns = [f"{col_prefix}_{col}" for col in embeds.columns]
+    embeds.index = df.index
+    print(embeds.shape)
+    #embeds.head()
+
+    print("MERGING...")
+    df_unpacked = df.merge(embeds, left_index=True, right_index=True)
+    df_unpacked.drop(columns=["embeddings"], inplace=True)
+    print(df_unpacked.shape)
+    return df_unpacked
+
+"""# Embeddings"""
+
+DATASET_ADDRESS = "tweet-collector-py.impeachment_production"
+
+sql = f"""
+    SELECT
+        count(distinct s.user_id) as user_count
+        ,count(distinct s.status_id) as status_count
+    FROM `{DATASET_ADDRESS}.botometer_sample` s
+    JOIN `{DATASET_ADDRESS}.botometer_sample_max_50_openai_status_embeddings_v2` emb
+        ON s.status_id = emb.status_id
+"""
+bq.query_to_df(sql, verbose=False)
+
+"""## User Embeddings
+
+7566 users
+"""
+
+sql = f"""
+     SELECT
+        u.user_id, u.created_on
+        --, u.screen_name_count, u.screen_names, split(u.screen_names, ",")[0] as screen_name
+        ,u.status_count, u.rt_count
+        ,u.is_bot --, u.bot_rt_network
+        ,u.opinion_community --, u.avg_score_lr, avg_score_nb, avg_score_bert
+        , u.is_q --, u.q_status_count
+        --, u.follower_count, u.follower_count_b, u.follower_count_h
+        --, u.friend_count, u.friend_count_b, u.friend_count_h
+
+        ,u.avg_toxicity --, u.avg_severe_toxicity, u.avg_insult, u.avg_obscene, u.avg_threat, u.avg_identity_hate
+        , u.avg_fact_score -- ,u.fact_scored_count
+
+        ,u.bom_astroturf, u.bom_overall --, u.bom_cap  --,u.bom_lookup_count
+        --,u.bom_fake_follower, u.bom_financial, u.bom_other, u.bom_self_declared, u.bom_spammer
+
+        ,emb.embeddings
+
+    FROM `{DATASET_ADDRESS}.user_details_v20240128_slim` u
+    JOIN `{DATASET_ADDRESS}.botometer_sample_max_50_openai_user_embeddings` emb
+        ON emb.user_id = u.user_id
+    -- LIMIT 10
+"""
+
+users_df = bq.query_to_df(sql, verbose=False)
+print(users_df.shape)
+
+users_df.head()
+
+"""Saving CSV to drive:"""
+
+csv_filepath = os.path.join(DATA_DIRPATH, "botometer_sample_max_50_openai_user_embeddings.csv.gz")
+users_df.to_csv(csv_filepath, index=False, compression="gzip")
+
+"""### ... Unpacked"""
+
+users_df_unpacked = unpacked(users_df)
+print(users_df.shape)
+users_df_unpacked.head()
+
+csv_filepath = os.path.join(DATA_DIRPATH, "botometer_sample_max_50_openai_user_embeddings_unpacked.csv.gz")
+users_df_unpacked.to_csv(csv_filepath, index=False, compression="gzip")
+
+"""## Tweet Embeddings
+
+183K statuses
+
+Wow wow wow this is taking a long time (1hr +...) to stream the data down over the network...
+
+Re-doing with the statuses table v2, that has duplicate lookups removed (row per unique status)...
+
+Re-doing with statuses table v3, which has status texts as well...
+"""
+
+sql = f"""
+    SELECT user_id, status_id, status_text, created_at, embeds_length, embeddings
+    FROM `{DATASET_ADDRESS}.botometer_sample_max_50_openai_status_embeddings_v3`
+    -- LIMIT 10000
+"""
+
+tweets_df = bq.query_to_df(sql, verbose=True)
+print(tweets_df.shape)
+tweets_df.head()
+
+tweets_df.head()
+
+"""Saving CSV to drive:"""
+
+csv_filepath = os.path.join(DATA_DIRPATH, "botometer_sample_max_50_openai_status_embeddings_v3.csv.gz")
+tweets_df.to_csv(csv_filepath, index=False, compression="gzip")
+
+"""### ... Unpacked"""
+
+unpacked_tweets_df = unpacked(tweets_df)
+unpacked_tweets_df.head()
+
+# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_parquet.html
+
+pq_filepath = os.path.join(DATA_DIRPATH, "botometer_sample_max_50_openai_status_embeddings_v3_unpacked.parquet.gzip")
+unpacked_tweets_df.to_parquet(pq_filepath, compression="gzip")
+
+csv_filepath = os.path.join(DATA_DIRPATH, "botometer_sample_max_50_openai_status_embeddings_v3_unpacked.csv.gz")
+unpacked_tweets_df.to_csv(csv_filepath, index=False, compression="gzip")
+
+#arrow_filepath = os.path.join(DATA_DIRPATH, "botometer_sample_max_50_openai_status_embeddings_v3_unpacked.arrow")
+#df.to_feather(arrow_filepath)
+
+"""## Scratch Work"""
+
+##from pandas import concat
+##
+##limit = 1_000
+##offset = 0
+##
+##all = DataFrame()
+##
+##while offset < 5_500:
+##    sql = f"""
+##        SELECT s.user_id, s.status_id, s.status_text, s.created_at, emb.embeddings
+##        FROM `{DATASET_ADDRESS}.botometer_sample` s
+##        JOIN `{DATASET_ADDRESS}.botometer_sample_max_50_openai_status_embeddings` emb
+##            ON s.status_id = emb.status_id
+##        LIMIT {int(limit)}
+##        OFFSET {int(offset)}
+##    """
+##
+##    batch = bq.query_to_df(sql, verbose=True)
+##    print(tweets_df.shape)
+##    if batch.empty:
+##        print("ALL DONE!")
+##        break
+##
+##    concat(all, batch)
+##    offset += limit
+
+"""### Compressed Table
+
+https://cloud.google.com/bigquery/docs/exporting-data#bigquery_extract_table_compressed-python
+"""
+
+# from google.cloud import bigquery
+# client = bigquery.Client()
+# bucket_name = 'my-bucket'
+
+#destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv.gz")
+#dataset_ref = bigquery.DatasetReference(project, dataset_id)
+#table_ref = dataset_ref.table("shakespeare")
+#job_config = bigquery.job.ExtractJobConfig()
+#job_config.compression = bigquery.Compression.GZIP
+#
+#extract_job = client.extract_table(
+#    table_ref,
+#    destination_uri,
+#    # Location must match that of the source table.
+#    location="US",
+#    job_config=job_config,
+#)  # API request
+#extract_job.result()  # Waits for job to complete.
+
+# from google.cloud import bigquery
+# client = bigquery.Client()
+# bucket_name = 'my-bucket'
+
+
+#from google.cloud import bigquery
+#
+#
+##ds_ref = bigquery.DatasetReference(PROJECT_ID, DATASET_ADDRESS)
+#DATASET_ID = "impeachment_production"
+#ds_ref = bigquery.DatasetReference(PROJECT_ID, DATASET_ID)
+#table_ref = ds_ref.table("botometer_sample_max_50_openai_status_embeddings_v3")
+#
+#job_config = bigquery.job.ExtractJobConfig()
+#job_config.compression = bigquery.Compression.GZIP
+#
+#BUCKET_NAME = "impeachment-analysis-2020"
+##destination_uri = f"gs://{BUCKET_NAME}/impeachment_production/botometer_sample_max_50_openai_status_embeddings_v4.csv.gz"
+##> too large to be exported to a single file. Specify a uri including a * to shard export. See 'Exporting data into one or more files' in https://cloud.google.com/bigquery/docs/exporting-data.
+#destination_uri = f"gs://{BUCKET_NAME}/impeachment_production/botometer_sample_max_50_openai_status_embeddings_v4_*.csv.gz"
+#
+#client = bq.client
+#extract_job = client.extract_table(
+#    table_ref,
+#    destination_uri,
+#    # Location must match that of the source table.
+#    location="US",
+#    job_config=job_config,
+#)  # API request
+#extract_job.result()  # Waits for job to complete.
\ No newline at end of file
diff --git a/notebooks/openai_embeddings_v2/impeachment_2020_embeddings_analysis_template_(20240129).py b/notebooks/openai_embeddings_v2/impeachment_2020_embeddings_analysis_template_(20240129).py
new file mode 100644
index 0000000..726c12d
--- /dev/null
+++ b/notebooks/openai_embeddings_v2/impeachment_2020_embeddings_analysis_template_(20240129).py
@@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+"""Impeachment 2020 Embeddings Analysis Template (20240129)
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1dAlLxG-SbQNzBVLyD84a9x_6xlBUPQjQ
+
+We fetched user-level and tweet-level OpenAI embeddings and stored on BQ, and copied the data to CSV files on Drive.
+
+This notebook provides an example of how to load those CSV files. Feel free to make a copy of this notebook and perform your own analyses.
+
+## Setup
+
+### Google Drive
+"""
+
+import os
+from google.colab import drive
+
+drive.mount('/content/drive')
+print(os.getcwd(), os.listdir(os.getcwd()))
+
+# you might need to create a google drive SHORTCUT that has this same path
+# ... or update the path to use your own google drive organization
+#DIRPATH = '/content/drive/MyDrive/Research/Disinfo Research Shared 2022'
+#DIRPATH = '/content/drive/MyDrive/Research/DS Research Shared 2023'
+DIRPATH = '/content/drive/MyDrive/Research/DS Research Shared 2024'
+
+print(DIRPATH)
+os.path.isdir(DIRPATH)
+
+"""New project-based directory structure for 2024:
+
+https://drive.google.com/drive/folders/1SuXkqVT400uZ2OYFGGV8SYBf7NhtBo5k?usp=drive_link
+"""
+
+DATA_DIRPATH = os.path.join(DIRPATH, "projects", "Impeachment 2020 Embeddings", "data")
+os.path.isdir(DATA_DIRPATH)
+
+os.listdir(DATA_DIRPATH)
+
+"""The "unpacked" versions have a column per embedding, and are generally easier to work with.
+
+The files we will be working with are:
+  +  "botometer_sample_max_50_openai_user_embeddings_unpacked.csv.gz" and
+  + "botometer_sample_max_50_openai_status_embeddings_v3_unpacked.parquet.gzip".
+
+## User Embeddings
+
+7566 users
+
+Loading CSV from drive:
+"""
+
+from pandas import read_csv
+
+csv_filepath = os.path.join(DATA_DIRPATH, "botometer_sample_max_50_openai_user_embeddings_unpacked.csv.gz")
+users_df = read_csv(csv_filepath, compression="gzip")
+print(users_df.shape)
+print(users_df.columns)
+users_df.head()
+
+users_df["user_id"].nunique()
+
+users_df["is_bot"].value_counts()
+
+users_df["opinion_community"].value_counts()
+
+users_df["avg_fact_score"].info()
+
+from pandas import isnull
+
+def add_labels(users_df):
+    # APPLY SAME LABELS AS THE ORIGINAL SOURCE CODE
+    # https://github.com/s2t2/openai-embeddings-2023/blob/1b8372dd36982009df5d4a80871f4c182ada743d/notebooks/2_embeddings_data_export.py#L51
+    # https://github.com/s2t2/openai-embeddings-2023/blob/main/app/dataset.py#L37-L64
+
+    # labels:
+    users_df["opinion_label"] = users_df["opinion_community"].map({0:"Anti-Trump", 1:"Pro-Trump"})
+    users_df["bot_label"] = users_df["is_bot"].map({True:"Bot", False:"Human"})
+    users_df["fourway_label"] = users_df["opinion_label"] + " " + users_df["bot_label"]
+
+    # language toxicity scores (0 low - 1 high)
+    toxic_threshold = 0.1
+    users_df["is_toxic"] = users_df["avg_toxicity"] >= toxic_threshold
+    users_df["is_toxic"] = users_df["is_toxic"].map({True: 1, False :0 })
+    users_df["toxic_label"] = users_df["is_toxic"].map({1: "Toxic", 0 :"Normal" })
+
+    # fact check / media quality scores (1 low - 5 high)
+    fact_threshold = 3.0
+    users_df["is_factual"] = users_df["avg_fact_score"].apply(lambda score: score if isnull(score) else score >= fact_threshold)
+
+    # botometer binary and labels:
+    users_df["is_bom_overall"] = users_df["bom_overall"].round()
+    users_df["is_bom_astroturf"] = users_df["bom_astroturf"].round()
+    users_df["bom_overall_label"] = users_df["is_bom_overall"].map({1:"Bot", 0:"Human"})
+    users_df["bom_astroturf_label"] = users_df["is_bom_astroturf"].map({1:"Bot", 0:"Human"})
+    users_df["bom_overall_fourway_label"] = users_df["opinion_label"] + " " + users_df["bom_overall_label"]
+    users_df["bom_astroturf_fourway_label"] = users_df["opinion_label"] + " " + users_df["bom_astroturf_label"]
+
+    return users_df
+
+
+users_df = add_labels(users_df)
+print(users_df.shape)
+print(users_df.columns.tolist())
+users_df.head()
+
+users_df["is_factual"].value_counts()
+
+users_df["is_toxic"].value_counts()
+
+users_df["bot_label"].value_counts()
+
+users_df["opinion_label"].value_counts()
+
+users_df["fourway_label"].value_counts()
+
+"""## Tweet Embeddings
+
+183K statuses:
+"""
+
+from pandas import read_parquet
+
+pq_filepath = os.path.join(DATA_DIRPATH, "botometer_sample_max_50_openai_status_embeddings_v3_unpacked.parquet.gzip")
+statuses_df = read_parquet(pq_filepath)
+print(statuses_df.shape)
+print(statuses_df.columns)
+statuses_df.head()
+
+statuses_df["user_id"].nunique()
+
diff --git a/requirements.txt b/requirements.txt
index f027615..93a1a81 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,6 +34,8 @@ gensim
 # model storage:
 google-cloud-storage
 
+# data storage:
+google-cloud-bigquery #==3.2.0
 
 # automated tests:
 pytest