\n",
+ " \n",
- " express\n",
+ " [CLS]\n",
" \n",
" \n",
- " \n",
- " my\n",
+ " amazing\n",
" \n",
" \n",
- " \n",
- " appreciation\n",
+ " movie\n",
" \n",
" \n",
- " \n",
" .\n",
" \n",
" \n",
- " \n",
- " <\n",
- " \n",
- " \n",
- " \n",
- " br\n",
- " \n",
- " \n",
- " \n",
- " /\n",
+ " some\n",
" \n",
" \n",
" \n",
- " >\n",
- " \n",
- " \n",
- " \n",
- " <\n",
- " \n",
- " \n",
- " \n",
- " br\n",
- " \n",
- " \n",
- " \n",
- " /\n",
- " \n",
- " \n",
- " \n",
- " >\n",
- " \n",
- " \n",
- " \n",
- " spoiler\n",
- " \n",
- " \n",
- " \n",
- " :\n",
- " \n",
- " \n",
- " \n",
- " this\n",
+ " of\n",
" \n",
" \n",
- " \n",
- " movie\n",
+ " the\n",
" \n",
" \n",
- " \n",
- " doesn\n",
+ " script\n",
" \n",
" \n",
- " \n",
- " '\n",
+ " writing\n",
" \n",
" \n",
- " \n",
- " t\n",
+ " could\n",
" \n",
" \n",
- " \n",
" have\n",
" \n",
" \n",
- " \n",
- " a\n",
+ " been\n",
" \n",
" \n",
- " \n",
- " goofs\n",
+ " better\n",
" \n",
" \n",
- " \n",
- " section\n",
+ " (\n",
" \n",
" \n",
- " \n",
- " .\n",
+ " some\n",
" \n",
" \n",
" \n",
- " wonder\n",
- " \n",
- " \n",
- " \n",
- " ,\n",
- " \n",
- " \n",
- " \n",
- " didn\n",
- " \n",
- " \n",
- " \n",
- " '\n",
- " \n",
- " \n",
- " \n",
- " t\n",
- " \n",
- " \n",
- " \n",
- " anybody\n",
+ " cliched\n",
" \n",
" \n",
- " \n",
- " notice\n",
- " \n",
- " \n",
- " \n",
- " that\n",
+ " language\n",
" \n",
" \n",
- " \n",
- " hand\n",
+ " )\n",
" \n",
" \n",
- " \n",
- " in\n",
+ " .\n",
" \n",
" \n",
- " \n",
- " the\n",
+ " joyce\n",
" \n",
" \n",
- " \n",
- " 2\n",
+ " '\n",
" \n",
" \n",
- " \n",
- " part\n",
+ " s\n",
" \n",
" \n",
- " \n",
- " when\n",
+ " "\n",
" \n",
" \n",
- " \n",
" the\n",
" \n",
" \n",
- " \n",
- " kidnappers\n",
- " \n",
- " \n",
- " \n",
- " decided\n",
- " \n",
- " \n",
- " \n",
- " to\n",
- " \n",
- " \n",
- " \n",
- " go\n",
- " \n",
- " \n",
- " \n",
- " home\n",
- " \n",
- " \n",
- " \n",
- " ?\n",
- " \n",
- " \n",
" \n",
- " looks\n",
+ " dead\n",
" \n",
" \n",
- " \n",
- " like\n",
+ " "\n",
" \n",
" \n",
- " \n",
- " a\n",
+ " is\n",
" \n",
" \n",
- " \n",
- " part\n",
+ " alluded\n",
" \n",
" \n",
" \n",
- " of\n",
+ " to\n",
" \n",
" \n",
- " \n",
- " crew\n",
+ " throughout\n",
" \n",
" \n",
- " \n",
- " ,\n",
+ " the\n",
" \n",
" \n",
- " \n",
- " hehe\n",
+ " movie\n",
" \n",
" \n",
- " \n",
" .\n",
" \n",
" \n",
- " \n",
- " i\n",
- " \n",
- " \n",
- " \n",
- " know\n",
- " \n",
- " \n",
- " \n",
- " i\n",
- " \n",
- " \n",
- " \n",
- " should\n",
- " \n",
- " \n",
- " \n",
- " better\n",
- " \n",
- " \n",
- " \n",
- " post\n",
- " \n",
- " \n",
- " \n",
- " this\n",
- " \n",
- " \n",
- " \n",
- " in\n",
- " \n",
- " \n",
- " \n",
- " forums\n",
- " \n",
- " \n",
- " \n",
- " ,\n",
+ " beautiful\n",
" \n",
" \n",
- " \n",
- " but\n",
+ " scenery\n",
" \n",
" \n",
- " \n",
- " i\n",
+ " and\n",
" \n",
" \n",
- " \n",
- " don\n",
+ " great\n",
" \n",
" \n",
- " \n",
- " '\n",
+ " acting\n",
" \n",
" \n",
- " \n",
- " t\n",
+ " .\n",
" \n",
" \n",
- " \n",
- " agree\n",
+ " very\n",
" \n",
" \n",
- " \n",
- " with\n",
+ " poetic\n",
" \n",
" \n",
- " \n",
- " some\n",
+ " .\n",
" \n",
" \n",
- " \n",
- " policies\n",
+ " highly\n",
" \n",
" \n",
- " \n",
- " here\n",
+ " recommend\n",
" \n",
" \n",
- " \n",
" .\n",
" \n",
@@ -3132,7 +657,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -3178,7 +703,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -3194,7 +719,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -3574,7 +1099,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -3650,18 +1175,14 @@
"output_type": "stream",
"text": [
"Loading Thermostat configuration: multi_nli-bert-occ\n",
- "Dataset path is D:\\Working Student\\repo\\thermostat\\src\\thermostat\\dataset.py\n",
- "Additional parameters for loading: {}\n",
"Loading Thermostat configuration: multi_nli-bert-lig\n",
- "Dataset path is D:\\Working Student\\repo\\thermostat\\src\\thermostat\\dataset.py\n",
- "Additional parameters for loading: {}\n",
"Downloading and preparing dataset thermostat/multi_nli-bert-lig to C:\\Users\\49176\\.cache\\huggingface\\datasets\\thermostat\\multi_nli-bert-lig\\1.0.1\\0cbe93e1fbe5b8ed0217559442d8b49a80fd4c2787185f2d7940817c67d8707b...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "032671160c2b41f99c2683f3579196df",
+ "model_id": "98a45c0de0b04800b1974202833db926",
"version_major": 2,
"version_minor": 0
},
@@ -3692,15 +1213,13 @@
"text": [
"Dataset thermostat downloaded and prepared to C:\\Users\\49176\\.cache\\huggingface\\datasets\\thermostat\\multi_nli-bert-lig\\1.0.1\\0cbe93e1fbe5b8ed0217559442d8b49a80fd4c2787185f2d7940817c67d8707b. Subsequent calls will reuse this data.\n",
"Loading Thermostat configuration: multi_nli-bert-lime\n",
- "Dataset path is D:\\Working Student\\repo\\thermostat\\src\\thermostat\\dataset.py\n",
- "Additional parameters for loading: {}\n",
"Downloading and preparing dataset thermostat/multi_nli-bert-lime to C:\\Users\\49176\\.cache\\huggingface\\datasets\\thermostat\\multi_nli-bert-lime\\1.0.1\\0cbe93e1fbe5b8ed0217559442d8b49a80fd4c2787185f2d7940817c67d8707b...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "419d61c9f31343c3ac6b55bf386170d9",
+ "model_id": "96404ef25f5040a5af1358793cc69d19",
"version_major": 2,
"version_minor": 0
},
@@ -4289,9 +1808,67 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading Thermostat configuration: imdb-bert-lime\n",
+ "Downloading and preparing dataset thermostat/imdb-bert-lime to C:\\Users\\49176\\.cache\\huggingface\\datasets\\thermostat\\imdb-bert-lime\\1.0.1\\0cbe93e1fbe5b8ed0217559442d8b49a80fd4c2787185f2d7940817c67d8707b...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "017d3fe023654275a9bdf63662a4b2a6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/276M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "0 examples [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dataset thermostat downloaded and prepared to C:\\Users\\49176\\.cache\\huggingface\\datasets\\thermostat\\imdb-bert-lime\\1.0.1\\0cbe93e1fbe5b8ed0217559442d8b49a80fd4c2787185f2d7940817c67d8707b. Subsequent calls will reuse this data.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Reusing dataset thermostat (C:\\Users\\49176\\.cache\\huggingface\\datasets\\thermostat\\imdb-bert-lig\\1.0.1\\0cbe93e1fbe5b8ed0217559442d8b49a80fd4c2787185f2d7940817c67d8707b)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading Thermostat configuration: imdb-bert-lig\n"
+ ]
+ }
+ ],
"source": [
"imdb_lime = thermostat.load(\"imdb-bert-lime\")\n",
"imdb_intg = thermostat.load(\"imdb-bert-lig\")"
@@ -4308,7 +1885,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
diff --git a/setup.py b/setup.py
index d3bb012..9fb083a 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,7 @@
setup(
name="thermostat-datasets",
- version="1.0.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+ version="1.0.2.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
description="Collection of NLP model explanations and accompanying analysis tools",
long_description="Thermostat is a large collection of NLP model explanations and accompanying analysis tools. "
"Combines explainability methods from the captum library with Hugging Face's datasets and "
diff --git a/src/thermostat/data/thermostat_configs.py b/src/thermostat/data/thermostat_configs.py
index bd50c58..e7728a1 100644
--- a/src/thermostat/data/thermostat_configs.py
+++ b/src/thermostat/data/thermostat_configs.py
@@ -1,7 +1,7 @@
import datasets
-_VERSION = datasets.Version('1.0.1', '')
+_VERSION = datasets.Version('1.0.2', '')
# Base arguments for any dataset
diff --git a/src/thermostat/visualize.py b/src/thermostat/visualize.py
index 34b2753..041fb93 100644
--- a/src/thermostat/visualize.py
+++ b/src/thermostat/visualize.py
@@ -3,7 +3,7 @@
import os
import pandas as pd
import torch
-from datasets import tqdm
+from tqdm import tqdm
from spacy import displacy
from spacy.util import is_in_jupyter
from transformers import AutoTokenizer