Skip to content

Commit

Permalink
tests: base test examples and strategies
Browse files Browse the repository at this point in the history
  • Loading branch information
Edward-Jackson-ONS committed Aug 20, 2024
1 parent 1f83415 commit 0c7659f
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 0 deletions.
37 changes: 37 additions & 0 deletions tests/readers/base/strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Composite strategies for testing the base reader."""

from hypothesis import strategies as st
from langchain.docstore.document import Document

from ...common import SEARCH_TERMS, ST_FREE_TEXT


@st.composite
def st_terms_and_texts(draw, terms=SEARCH_TERMS):
"""Create a possibly term-ridden string."""

term = draw(st.lists(st.sampled_from(terms), max_size=1))
string = draw(ST_FREE_TEXT)
add_in = draw(st.booleans())

text = " ".join((string, *term)) if add_in else string

return term, text


@st.composite
def st_chunks_contains_responses(draw):
"""Create a set of chunks, booleans, and responses for a test."""

chunks = draw(
st.lists(
ST_FREE_TEXT.map(lambda x: Document(page_content=x)),
min_size=1,
max_size=5,
)
)

contains = [True, *(draw(st.booleans()) for _ in chunks[1:])]
responses = [draw(ST_FREE_TEXT) for con in contains if con is True]

return chunks, contains, responses
31 changes: 31 additions & 0 deletions tests/readers/base/test_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Example tests for the base reader class."""

import requests
from bs4 import BeautifulSoup

from ...common import ToyReader


def test_does_not_match_for_extra_abbreviations():
"""Ensure the string checker does not flag ONS+ abbreviations."""

reader = ToyReader(urls=[], terms=["ONS"])
strings = (
"The ONSR is the Only National Sandwich Ranking.",
"I AM UNLUCKY! SOME MIGHT SAY I AM DONSY!",
)

for string in strings:
assert not reader.check_contains_terms(string)


def test_81_add_ons_not_matched():
"""Ensure the example from #81 does not match."""

reader = ToyReader([], terms=["ONS"])
url = "https://theyworkforyou.com/wrans/?id=2024-04-12.21381.h"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

assert not reader.check_contains_terms(soup.get_text())

0 comments on commit 0c7659f

Please sign in to comment.