Skip to content

Commit

Permalink
Merge pull request #23 from EleutherAI/categories_across_scale_time
Browse files Browse the repository at this point in the history
Categories Across Scale & Time
  • Loading branch information
Kyle1668 authored Jun 4, 2024
2 parents 0c4e010 + 5504999 commit 5fc689a
Show file tree
Hide file tree
Showing 22 changed files with 5,334 additions and 323 deletions.
18 changes: 18 additions & 0 deletions working_dirs/kyle/results_analysis/calculate_kl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from scipy.stats import entropy

def calculate_kl_divergence(all_memories_and_sample, memories_dataset, duplication_count):
all_memories_bootstrap_sample = all_memories_and_sample[all_memories_and_sample["sequence_duplicates"] == duplication_count].sample(frac=1, replace=True)
memories_at_duplication_count = memories_dataset[memories_dataset["sequence_duplicates"] == duplication_count]["generation_perplexity"]
non_memories_at_duplication_count = all_memories_bootstrap_sample[all_memories_bootstrap_sample["sequence_duplicates"] == duplication_count]["generation_perplexity"]

# downsample to calculate kl divergence
balance_size = min(len(memories_at_duplication_count), len(non_memories_at_duplication_count))
memories_at_duplication_count = memories_at_duplication_count.sample(balance_size)
non_memories_at_duplication_count = non_memories_at_duplication_count.sample(balance_size)

# generate kl divergence in perplexity with memories as the true distribution
kl_divergence = entropy(memories_at_duplication_count, non_memories_at_duplication_count)
return {
"Duplicates": duplication_count,
"KL Divergence": kl_divergence
}
Binary file not shown.
674 changes: 674 additions & 0 deletions working_dirs/kyle/results_analysis/hist.ipynb

Large diffs are not rendered by default.

Binary file not shown.
1,296 changes: 1,296 additions & 0 deletions working_dirs/kyle/results_analysis/ppl_dup_figure.ipynb

Large diffs are not rendered by default.

2,784 changes: 2,496 additions & 288 deletions working_dirs/kyle/results_analysis/results_analysis.ipynb

Large diffs are not rendered by default.

885 changes: 850 additions & 35 deletions working_dirs/kyle/results_analysis/results_analysis_whole_sequence.ipynb

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit 5fc689a

Please sign in to comment.