Skip to content

Commit

Permalink
Adds single sentence summarization (#3)
Browse files Browse the repository at this point in the history
* Adds single sentence summarization

* Expand Python version compatibility

* Formatting

* Address flake

* [0.1.5] Adds single sentence summarization

* formatting
  • Loading branch information
crodriguez1a authored Jul 1, 2020
1 parent 3e85b39 commit 0be2c6a
Show file tree
Hide file tree
Showing 7 changed files with 444 additions and 23 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ tfhub/
notebooks/*_experiment.ipynb

dist/
bpe_summarizer.egg-info
186 changes: 186 additions & 0 deletions notebooks/Evaluation Single Sentence.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"env: TFHUB_CACHE_DIR=tfhub/\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"%env TFHUB_CACHE_DIR=tfhub/"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"module_path = os.path.abspath(os.path.join('..'))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib\n",
"matplotlib.use(\"agg\")\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"from transformers import pipeline\n",
"import torch\n",
"from tqdm import tqdm\n",
"\n",
"from src.bpe_summarizer import bpe_summarize, sentencizer\n",
"from src.utils import remove_stopwords\n",
"from validation import scicummnet_validation, similarity_score"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'notification repeated overdraft'"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"The default percentile is 99, but the max percentile allowable for \n",
"a single sentence is derived from the mean of the token values\n",
"\"\"\"\n",
"bpe_summarize(\"I received a notification today about being subject to extended holds due to repeated overdrafts.\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'notification subject extended holds repeated overdraft'"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bpe_summarize(\n",
" \"I received a notification today about being subject to extended holds due to repeated overdrafts.\", \n",
" percentile=50\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'difference golden potatoes yellow potatoes'"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample = \"what's the difference between golden potatoes and yellow potatoes?\"\n",
"result = bpe_summarize(\n",
" sample, \n",
" percentile=50\n",
")\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"what's difference golden potatoes yellow potatoes?\""
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"re.sub(r\"\\s{2,}\", \" \", remove_stopwords(sample))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
12 changes: 2 additions & 10 deletions notebooks/Evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,7 @@
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:absl:Using tfhub/ to cache modules.\n"
]
}
],
"outputs": [],
"source": [
"import re\n",
"\n",
Expand All @@ -66,7 +58,7 @@
"from tqdm import tqdm\n",
"\n",
"from src.bpe_summarizer import bpe_summarize, sentencizer\n",
"from validation import scicummnet_validation, rouge_metric, similarity_score, mean_rouge_fscore"
"from validation import scicummnet_validation, similarity_score, mean_rouge_fscore"
]
},
{
Expand Down
Loading

0 comments on commit 0be2c6a

Please sign in to comment.