Skip to content

Commit

Permalink
fix: Fix TypeError in record generation (#47)
Browse files Browse the repository at this point in the history
* fix: Fix `TypeError` in record generation

* Lint YAML files

* Seed database

* Lint

* Run explicitly as a script

* Fix script path

* Test on supported MongoDB versions

* Lint

* Fix datetime type
  • Loading branch information
edgarrmondragon authored Dec 17, 2024
1 parent de2cddd commit 2810211
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 29 deletions.
66 changes: 56 additions & 10 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ jobs:
- '3.13'
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install poetry
run: pipx install poetry
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
cache: poetry
Expand All @@ -39,7 +39,7 @@ jobs:
poetry env use ${{ matrix.python-version }}
poetry install
- name: Run pre-commit checks
uses: pre-commit/[email protected].0
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
smoke-test:
runs-on: ubuntu-latest
strategy:
Expand All @@ -54,9 +54,9 @@ jobs:
- '3.13'
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Install meltano
Expand All @@ -71,21 +71,25 @@ jobs:
fail-fast: false
matrix:
mongo-version:
- 3.6
- 4.4
- '8.0'
python-version:
- '3.8'
- '3.9'
- '3.10'
- '3.11'
- '3.12'
- '3.13'
include:
- mongo-version: '6.0'
python-version: '3.13'
- mongo-version: '7.0'
python-version: '3.13'
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install poetry
run: pipx install poetry
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
cache: poetry
Expand All @@ -94,11 +98,53 @@ jobs:
poetry env use ${{ matrix.python-version }}
poetry install
- name: Start MongoDB
uses: supercharge/mongodb-github-action@1.9.0
uses: supercharge/mongodb-github-action@5a87bd81f88e2a8b195f8b7b656f5cda1350815a # 1.11.0
with:
mongodb-version: ${{ matrix.mongo-version }}
mongodb-username: admin
mongodb-password: password
mongodb-db: test
- name: Test with pytest
run: poetry run pytest
integration:
runs-on: ubuntu-latest
name: Meltano integration test 🐉
env:
DATABASE: test
USERNAME: admin
PASSWORD: password
strategy:
matrix:
mongodb-version:
- '6.0'
- '7.0'
- '8.0'
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Start MongoDB
uses: supercharge/mongodb-github-action@5a87bd81f88e2a8b195f8b7b656f5cda1350815a # 1.11.0
with:
mongodb-version: ${{ matrix.mongodb-version }}
mongodb-username: ${{ env.USERNAME }}
mongodb-password: ${{ env.PASSWORD }}
mongodb-db: ${{ env.DATABASE }}
- uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: 3.x
- uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4.2.0
with:
version: '>=0.5'
- name: Install Meltano
run: uv tool install meltano
- run: meltano config meltano set venv.backend uv
- run: >
uv run scripts/seed.py
--host localhost
--port 27017
--database ${{ env.DATABASE }}
--username ${{ env.USERNAME }}
--password ${{ env.PASSWORD }}
- run: meltano run tap-mongodb target-jsonl
env:
TAP_MONGODB_DATABASE: ${{ env.DATABASE }}
TAP_MONGODB_MONGODB_CONNECTION_STRING: mongodb://${{ env.USERNAME }}:${{ env.PASSWORD }}@localhost:27017/
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.11.1
3.13
51 changes: 34 additions & 17 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mypy = "^1.0.0"
isort = "^5.11.5"
singer-sdk = { version = "^0.42.1", extras = ["testing"] }
pylint = "^3.0.0a6"
faker = "^33.1.0"

[tool.poetry.extras]
s3 = ["fs-s3fs"]
Expand Down
77 changes: 77 additions & 0 deletions scripts/seed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# /// script
# dependencies = [
# "faker",
# "pymongo",
# ]
# ///

"""Seed the MongoDB database with fake data."""

import argparse

import pymongo
from faker import Faker


class Params(argparse.Namespace): # pylint: disable=too-few-public-methods
"""Namespace for command line arguments."""

host: str
database: str
username: str
password: str
port: int


def seed():
"""Seed the database with fake data.
- users: 100
- posts: 1000
"""
parser = argparse.ArgumentParser()
parser.add_argument("--host", default="localhost", help="MongoDB host")
parser.add_argument("--database", required=True, help="MongoDB database")
parser.add_argument("--username", default=None, help="MongoDB username")
parser.add_argument("--password", default=None, help="MongoDB password")
parser.add_argument("--port", default=27017, help="MongoDB port")
args = parser.parse_args(namespace=Params())

fake = Faker()
uri = f"mongodb://{args.username}:{args.password}@{args.host}:{args.port}"
client = pymongo.MongoClient(uri)
db = client[args.database]

users = db["users"]
users.delete_many({})
users_oids = set()

for _ in range(100):
result = users.insert_one(
{
"name": fake.name(),
"address": fake.address(),
"email": fake.email(),
"joined_at": fake.date_time_this_decade(),
}
)
users_oids.add(result.inserted_id)

posts = db["posts"]
posts.delete_many({})
for _ in range(1000):
created_at = fake.date_time_this_decade()
updated_at = fake.date_time_between_dates(created_at)
posts.insert_one(
{
"title": fake.sentence(),
"content": fake.text(),
"user_id": fake.random_element(users_oids),
"created_at": created_at,
"updated_at": updated_at,
}
)


if __name__ == "__main__":
seed()
2 changes: 1 addition & 1 deletion tap_mongodb/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def _generate_record_messages(self, record: dict) -> Generator[singer.RecordMess
Record message objects.
"""
extracted_at: datetime = record.pop("_sdc_extracted_at", utc_now())
pop_deselected_record_properties(record, self.schema, self.mask, self.logger)
pop_deselected_record_properties(record, self.schema, self.mask)
record = conform_record_data_types(
stream_name=self.name,
record=record,
Expand Down

0 comments on commit 2810211

Please sign in to comment.