diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c17bf25..f8b2178 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,4 +62,4 @@ jobs: - name: Run posthog tests run: | - python setup.py test + pytest --verbose --timeout=30 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b2fffe..887b21c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 3.1.0 - 2023-12-04 + +1. Increase maximum event size and batch size + ## 3.0.2 - 2023-08-17 1. Returns the current flag property with $feature_flag_called events, to make it easier to use in experiments diff --git a/posthog/consumer.py b/posthog/consumer.py index 5e403e8..794c6cc 100644 --- a/posthog/consumer.py +++ b/posthog/consumer.py @@ -12,11 +12,12 @@ except ImportError: from Queue import Empty -MAX_MSG_SIZE = 32 << 10 -# Our servers only accept batches less than 500KB. Here limit is set slightly -# lower to leave space for extra data that will be added later, eg. "sentAt". -BATCH_SIZE_LIMIT = 475000 +MAX_MSG_SIZE = 900 * 1024 # 900KiB per event + +# The maximum request body size is currently 20MiB, let's be conservative +# in case we want to lower it in the future. +BATCH_SIZE_LIMIT = 5 * 1024 * 1024 class Consumer(Thread): @@ -104,7 +105,7 @@ def next(self): item = queue.get(block=True, timeout=self.flush_interval - elapsed) item_size = len(json.dumps(item, cls=DatetimeSerializer).encode()) if item_size > MAX_MSG_SIZE: - self.log.error("Item exceeds 32kb limit, dropping. (%s)", str(item)) + self.log.error("Item exceeds 900kib limit, dropping. (%s)", str(item)) continue items.append(item) total_size += item_size diff --git a/posthog/test/test_consumer.py b/posthog/test/test_consumer.py index 58600b6..f7e6d96 100644 --- a/posthog/test/test_consumer.py +++ b/posthog/test/test_consumer.py @@ -145,15 +145,20 @@ def test_pause(self): def test_max_batch_size(self): q = Queue() consumer = Consumer(q, TEST_API_KEY, flush_at=100000, flush_interval=3) - track = {"type": "track", "event": "python event", "distinct_id": "distinct_id"} + properties = {} + for n in range(0, 500): + properties[str(n)] = "one_long_property_value_to_build_a_big_event" + track = {"type": "track", "event": "python event", "distinct_id": "distinct_id", "properties": properties} msg_size = len(json.dumps(track).encode()) - # number of messages in a maximum-size batch - n_msgs = int(475000 / msg_size) + # Let's capture 8MB of data to trigger two batches + n_msgs = int(8_000_000 / msg_size) def mock_post_fn(_, data, **kwargs): res = mock.Mock() res.status_code = 200 - self.assertTrue(len(data.encode()) < 500000, "batch size (%d) exceeds 500KB limit" % len(data.encode())) + request_size = len(data.encode()) + # Batches close after the first message bringing it bigger than BATCH_SIZE_LIMIT, let's add 10% of margin + self.assertTrue(request_size < (5 * 1024 * 1024) * 1.1, "batch size (%d) higher than limit" % request_size) return res with mock.patch("posthog.request._session.post", side_effect=mock_post_fn) as mock_post: diff --git a/posthog/version.py b/posthog/version.py index 749af35..51aac19 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "3.0.2" +VERSION = "3.1.0" if __name__ == "__main__": print(VERSION, end="") # noqa: T201 diff --git a/setup.py b/setup.py index b87480f..26acf6f 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ "flake8-print", "pre-commit", ], - "test": ["mock>=2.0.0", "freezegun==0.3.15", "pylint", "flake8", "coverage", "pytest"], + "test": ["mock>=2.0.0", "freezegun==0.3.15", "pylint", "flake8", "coverage", "pytest", "pytest-timeout"], "sentry": ["sentry-sdk", "django"], }