From f2a92bd7e360e39a4439e4d97540fd68f2721451 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 29 Jan 2024 10:04:39 +0100 Subject: [PATCH] Add reference to entropy implementation used (#3229) * Add reference to entropy implementation used Making it more clear that the entropy implementation in NLTK is the one based on the Shannon-McMillan-Breiman theorem, as used and referenced by Jurafsky and Jordan Boyd-Graber. * Consistently use full names Co-authored-by: Ilia Kurenkov * Consistency with other docstrings in the module Co-authored-by: Ilia Kurenkov --------- Co-authored-by: Ilia Kurenkov --- nltk/lm/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nltk/lm/api.py b/nltk/lm/api.py index 5ebdf38ed4..1da6719165 100644 --- a/nltk/lm/api.py +++ b/nltk/lm/api.py @@ -163,6 +163,9 @@ def context_counts(self, context): def entropy(self, text_ngrams): """Calculate cross-entropy of model for given evaluation text. + This implementation is based on the Shannon-McMillan-Breiman theorem, + as used and referenced by Dan Jurafsky and Jordan Boyd-Graber. + :param Iterable(tuple(str)) text_ngrams: A sequence of ngram tuples. :rtype: float