Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scipy version update #257

Merged
merged 10 commits into from
Dec 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 29 additions & 20 deletions convokit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,31 @@
from .model import *
from .util import *
from .coordination import *
from .politenessStrategies import *
from .transformer import *
from .convokitPipeline import *
from .hyperconvo import *
from .speakerConvoDiversity import *
from .text_processing import *
from .phrasing_motifs import *
from .prompt_types import *
from .classifier import *
from .ranker import *
from .forecaster import *
from .fighting_words import *
from .paired_prediction import *
from .bag_of_words import *
from .expected_context_framework import *
from .surprise import *
from .convokitConfig import *
import warnings

try:
from .model import *
from .util import *
from .coordination import *
from .politenessStrategies import *
from .transformer import *
from .convokitPipeline import *
from .hyperconvo import *
from .speakerConvoDiversity import *
from .text_processing import *
from .phrasing_motifs import *
from .prompt_types import *
from .classifier import *
from .ranker import *
from .forecaster import *
from .fighting_words import *
from .paired_prediction import *
from .bag_of_words import *
from .expected_context_framework import *
from .surprise import *
from .convokitConfig import *
except Exception as e:
print(f"An error occurred: {e}")
warnings.warn(
"If you are using ConvoKit with Google Colab, incorrect versions of some packages (ex. scipy) may be imported while runtime start. To fix the issue, restart the session and run all codes again. Thank you!"
)


# __path__ = __import__('pkgutil').extend_path(__path__, __name__)
8 changes: 3 additions & 5 deletions convokit/model/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ class Conversation(CorpusComponent):
:param meta: Table of initial values for conversation-level metadata

:ivar id: the ID of the Conversation
:ivar meta: A dictionary-like view object providing read-write access to
conversation-level metadata.
:ivar meta: A dictionary-like view object providing read-write access to conversation-level metadata.
"""

def __init__(
Expand Down Expand Up @@ -67,9 +66,8 @@ def iter_utterances(
"""
Get utterances in the Corpus, with an optional selector that filters for Utterances that should be included.

:param selector: a (lambda) function that takes an Utterance and returns True or False (i.e. include / exclude).
By default, the selector includes all Utterances in the Conversation.
:return: a generator of Utterances
:param selector: a (lambda) function that takes an Utterance and returns True or False (i.e. include / exclude). By default, the selector includes all Utterances in the Conversation.
:return: a generator of Utterances
"""
for ut_id in self._utterance_ids:
utt = self._owner.get_utterance(ut_id)
Expand Down
18 changes: 10 additions & 8 deletions convokit/model/corpusComponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def set_data(self, property_name, value):
def retrieve_meta(self, key: str):
"""
Retrieves a value stored under the key of the metadata of corpus object

:param key: name of metadata attribute
:return: value
"""
Expand All @@ -137,6 +138,7 @@ def retrieve_meta(self, key: str):
def add_meta(self, key: str, value) -> None:
"""
Adds a key-value pair to the metadata of the corpus object

:param key: name of metadata attribute
:param value: value of metadata attribute
:return: None
Expand All @@ -148,11 +150,10 @@ def get_vector(
):
"""
Get the vector stored as `vector_name` for this object.

:param vector_name: name of vector
:param as_dataframe: whether to return the vector as a dataframe (True) or in its raw array form (False). False
by default.
:param columns: optional list of named columns of the vector to include. All columns returned otherwise. This
parameter is only used if as_dataframe is set to True
:param as_dataframe: whether to return the vector as a dataframe (True) or in its raw array form (False). False by default.
:param columns: optional list of named columns of the vector to include. All columns returned otherwise. This parameter is only used if as_dataframe is set to True
:return: a numpy / scipy array
"""
if vector_name not in self.vectors:
Expand All @@ -166,10 +167,10 @@ def get_vector(

def add_vector(self, vector_name: str):
"""
Logs in the Corpus component object's internal vectors list that the component object has a vector row
associated with it in the vector matrix named `vector_name`.
Transformers that add vectors to the Corpus should use this to update the relevant component objects during
the transform() step.
Logs in the Corpus component object's internal vectors list that the component object has a vector row associated with it in the vector matrix named `vector_name`.

Transformers that add vectors to the Corpus should use this to update the relevant component objects during the transform() step.

:param vector_name: name of vector matrix
:return: None
"""
Expand All @@ -182,6 +183,7 @@ def has_vector(self, vector_name: str):
def delete_vector(self, vector_name: str):
"""
Delete a vector associated with this Corpus component object.

:param vector_name:
:return: None
"""
Expand Down
7 changes: 2 additions & 5 deletions convokit/prompt_types/promptTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,8 @@ def fit(
Fits a PromptTypes model for a corpus -- that is, learns latent representations of prompt and response terms, as well as prompt types.

:param corpus: Corpus
:param prompt_selector: a boolean function of signature `filter(utterance)` that determines which
utterances will be considered as prompts in the fit step. defaults to using all utterances which have a response.
:param reference_selector: a boolean function of signature `filter(utterance)` that determines which utterances
will be considered as responses in the fit step. defaults to using all utterances which are responses to a
prompt.
:param prompt_selector: a boolean function of signature `filter(utterance)` that determines which utterances will be considered as prompts in the fit step. defaults to using all utterances which have a response.
:param reference_selector: a boolean function of signature `filter(utterance)` that determines which utterances will be considered as responses in the fit step. defaults to using all utterances which are responses to a prompt.

:return: None
"""
Expand Down
2 changes: 2 additions & 0 deletions docs/source/data_format.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ utterances.jsonl contains a list of such utterances. An example utterance is sho
::

{'id': '200', 'speaker': 'mr. srinivasan', 'conversation_id': '145', 'reply_to': '199', 'timestamp': None, 'text': 'It -- it does.', 'meta': {'case': '02-1472', 'side': 'respondent'}}

::


Expand Down Expand Up @@ -121,6 +122,7 @@ As an example, the corpus-level metadata for the Reddit corpus (small) is shown
::

"overall-index": {"subreddit": "<class 'str'>", "num_posts": "<class 'int'>", "num_comments": "<class 'int'>", "num_speakers": "<class 'int'>"}

::


Expand Down
6 changes: 3 additions & 3 deletions docs/source/supreme.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Supreme Court Oral Arguments Corpus
==============================
=======================================


A collection of cases from the U.S. Supreme Court, along with transcripts of oral arguments. Contains approximately 1,700,000 utterances over 8,000 oral arguments transcripts from 7,700 cases.
Expand All @@ -14,7 +14,7 @@ The following examples use this corpus:
* `computing linguistic coordination <https://github.com/CornellNLP/ConvoKit/blob/master/examples/coordination/examples.ipynb>`_

Some considerations regarding case and voting information
-------------------------------------------------
---------------------------------------------------------------

Each case in the data can have multiple conversations, corresponding to multiple sessions of oral arguments heard. For convenience, we include information for each conversation about how justices voted in the corresponding *case*, meaning that vote information will be repeated across each conversation corresponding to a case. The case metadata file also lists vote information.

Expand Down Expand Up @@ -126,7 +126,7 @@ Case information
* transcripts: a list of transcript names, URLs and IDs (corresponding to the IDs of conversations in the corpus).

Citation and other versions
^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

This corpus extends a `smaller dataset <https://confluence.cornell.edu/display/llresearch/Supreme+Court+Dialogs+Corpus>`_ of oral arguments that we previously released together with `Echoes of power\: Language effects and power differences in social interaction <https://www.cs.cornell.edu/~cristian/Echoes_of_power.html>`_. Cristian Danescu-Niculescu-Mizil, Bo Pang, Lillian Lee and Jon Kleinberg. WWW 2012. Please cite the Echoes of Powers paper if you use either version of the corpus. If you use the ConvoKit version please additionally cite: `ConvoKit\: A Toolkit for the Analysis of Conversations <https://www.cs.cornell.edu/~cristian/ConvoKit_Demo_Paper_files/convokit-demo-paper.pdf>`_. Jonathan P. Chang, Caleb Chiam, Liye Fu, Andrew Wang, Justine Zhang, Cristian Danescu-Niculescu-Mizil. Proceedings of SIGDIAL. 2020.

Expand Down
8 changes: 8 additions & 0 deletions docs/source/troubleshooting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ General checks
Issues
^^^^^^

**Google Colab User Note**

Running ConvoKit with Google Colab can trigger error with scipy package, which is likely due to Colab runtime preloading an older version that
is not compatible with other packages. When install ConvoKit in Colab environment, if the error occurs, a user warning message should display.
The error can be easily fixed by restarting the Colab runtime session and run the cells again.

-----------------------------

**Error Associated with Numpy 2.0.0**

The release of `numpy 2.0.0 <https://numpy.org/devdocs/release/2.0.0-notes.html>`_ is exciting,
Expand Down
23 changes: 12 additions & 11 deletions examples/Introduction_to_ConvoKit.ipynb

Large diffs are not rendered by default.

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions examples/hyperconvo/hyperconvo_demo.ipynb

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions examples/hyperconvo/predictive_tasks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@
"metadata": {},
"outputs": [],
"source": [
"threads_corpus = corpus.reindex_conversations(new_convo_roots=top_level_utterance_ids, \n",
"threads_corpus = Corpus.reindex_conversations(source_corpus=corpus, \n",
" new_convo_roots=top_level_utterance_ids, \n",
" preserve_convo_meta=True,\n",
" preserve_corpus_meta=False)"
]
Expand Down Expand Up @@ -226,7 +227,7 @@
"source": [
"## volume is the number of unique users in the first 10 comments\n",
"for convo in threads_corpus.iter_conversations():\n",
" convo.meta['volume'] = len(set([utt.user for utt in convo.get_chronological_utterance_list()[:10]]))"
" convo.meta['volume'] = len(set([utt.speaker for utt in convo.get_chronological_utterance_list()[:10]]))"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions examples/merging/corpus_merge_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
}
],
"source": [
"corpus3 = corpus1.merge(corpus2)"
"corpus3 = Corpus.merge(corpus1, corpus2)"
]
},
{
Expand Down Expand Up @@ -325,7 +325,7 @@
}
],
"source": [
"corpus6 = corpus4.merge(corpus5)"
"corpus6 = Corpus.merge(corpus4, corpus5)"
]
},
{
Expand Down

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion examples/text-processing/text_preprocessing_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@
"metadata": {},
"outputs": [],
"source": [
"adhoc_utt = prep.transform_utterance(adhoc_utt)"
"adhoc_utt = prep.transform_utterance(corpus.random_utterance())"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion examples/vectors/bag-of-words-demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@
}
],
"source": [
"threads_corpus = corpus.reindex_conversations(new_convo_roots=top_level_comment_ids)"
"threads_corpus = corpus.reindex_conversations(corpus, new_convo_roots=top_level_comment_ids)"
]
},
{
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
},
install_requires=[
"matplotlib>=3.0.0",
"scipy>1.14",
"pandas>=1.5.0",
"numpy>=2.0.0",
"msgpack-numpy>=0.4.3.2",
Expand Down
Loading