From f2a92bd7e360e39a4439e4d97540fd68f2721451 Mon Sep 17 00:00:00 2001
From: Michael <michael.bauwens@ucll.be>
Date: Mon, 29 Jan 2024 10:04:39 +0100
Subject: [PATCH] Add reference to entropy implementation used (#3229)

* Add reference to entropy implementation used

Making it more clear that the entropy implementation in NLTK is the one based on the Shannon-McMillan-Breiman theorem, as used and referenced by Jurafsky and Jordan Boyd-Graber.

* Consistently use full names

Co-authored-by: Ilia Kurenkov <ilia.kurenkov@gmail.com>

* Consistency with other docstrings in the module

Co-authored-by: Ilia Kurenkov <ilia.kurenkov@gmail.com>

---------

Co-authored-by: Ilia Kurenkov <ilia.kurenkov@gmail.com>
---
 nltk/lm/api.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nltk/lm/api.py b/nltk/lm/api.py
index 5ebdf38ed4..1da6719165 100644
--- a/nltk/lm/api.py
+++ b/nltk/lm/api.py
@@ -163,6 +163,9 @@ def context_counts(self, context):
     def entropy(self, text_ngrams):
         """Calculate cross-entropy of model for given evaluation text.
 
+        This implementation is based on the Shannon-McMillan-Breiman theorem,
+        as used and referenced by Dan Jurafsky and Jordan Boyd-Graber.
+
         :param Iterable(tuple(str)) text_ngrams: A sequence of ngram tuples.
         :rtype: float