Skip to content

Commit

Permalink
Merge pull request #4 from keboola/adamvyborny-COM-1435-rewrite-tests
Browse files Browse the repository at this point in the history
Rewrite tests
  • Loading branch information
AdamVyborny authored May 10, 2022
2 parents 2e21045 + 0ee1b9d commit 80689d0
Show file tree
Hide file tree
Showing 424 changed files with 9,980 additions and 531 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
strategy:
fail-fast: false
matrix:
mongodb: [ latest, 5.0, 4.4, 3.2 ]
mongodb: [ latest, 5.0, 4.4, 3.6 ]
steps:
-
name: 'Check out the repo'
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
/.idea
/data
.env
.phpunit.result.cache
18 changes: 17 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@ COPY docker/php-prod.ini /usr/local/etc/php/php.ini
COPY docker/composer-install.sh /tmp/composer-install.sh

RUN apt-get update && apt-get install -y --no-install-recommends \
libicu-dev \
libssl-dev \
git \
ssh \
locales \
unzip \
&& rm -r /var/lib/apt/lists/* \
wget \
&& rm -r /var/lib/apt/lists/* \
&& sed -i 's/^# *\(en_US.UTF-8\)/\1/' /etc/locale.gen \
&& locale-gen \
&& chmod +x /tmp/composer-install.sh \
Expand All @@ -24,6 +28,18 @@ ENV LANGUAGE=en_US.UTF-8
ENV LANG=en_US.UTF-8
ENV LC_ALL=en_US.UTF-8

RUN wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-debian10-x86_64-100.5.2.deb \
&& wget https://downloads.mongodb.com/compass/mongodb-mongosh_1.3.1_amd64.deb \
&& apt install ./mongodb-database-tools-debian10-x86_64-100.5.2.deb \
&& apt install ./mongodb-mongosh_1.3.1_amd64.deb

# Intl is required for league/uri
RUN docker-php-ext-configure intl \
&& docker-php-ext-install intl

RUN pecl install mongodb \
&& docker-php-ext-enable mongodb

## Composer - deps always cached unless changed
# First copy only composer files
COPY composer.* /code/
Expand Down
191 changes: 189 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,196 @@

> Docker application for exporting data from MongoDB. Basically, it's a simple wrapper of mongoexport command, which exports data from specified database and collection. Then those data are processed by php-csvmap.
# Usage
## Configuration

> TODO
The configuration `config.json` contains following properties in `parameters` key:
- `db` - object (required): Configuration of the connection.
- `protocol` - string (optional): One of `mongodb` (default), `mongodb+srv` or `custom_uri`.
- **Additional parameters if `protocol` = `custom_uri`**:
- `uri` - string:
- [MongoDB Connection String](https://docs.mongodb.com/manual/reference/connection-string/)
- Eg. `mongodb://user@localhost,localhost:27018,localhost:27019/db?replicaSet=test&ssl=true`.
- The password must not be a part of URI. It must be encrypted in `#password` item.
- `#password` - string (required): Password for user specified in `uri`.
- **Additional parameters if `protocol` = `mongodb` or `mongodb+srv`**:
- `host` - string (required):
- If `protocol` = `mongodb`, then value is hostname of MongoDB server.
- If `protocol` = `mongodb+srv`, then value is [DNS Seedlist Connection Format](https://docs.mongodb.com/manual/reference/connection-string/#dns-seedlist-connection-format).
- `port` - string (optional): Server port (default port is `27017`).
- `database` - string (required): Database to connect to.
- `authenticationDatabase` - string (optional): [Authentication database](https://docs.mongodb.com/manual/reference/program/mongo/#authentication-options) for `user`.
- `user` - string (optional): User with correct access rights.
- `#password` - string (optional): Password for given `user`. Both or none of couple `user` and `#password` must be specified.
- `ssh` - object (optional): Settings for SSH tunnel.
- `enabled` - bool (required): Enables SSH tunnel.
- `sshHost` - string (required): IP address or hostname of SSH server.
- `sshPort` - integer (optional): SSH server port (default port is `22`).
- `localPort` - integer (required): SSH tunnel local port in Docker container (default `33006`).
- `user` - string (optional): SSH user (default same as `db.user`).
- `compression` - bool (optional): Enables SSH tunnel compression (default `false`).
- `keys` - object (optional): SSH keys.
- `public` - string (optional): Public SSH key.
- `#private` - string (optional): Private SSH key.
- `quiet` - boolean (optional): Default `false`, pass `--quiet` to `mongoexport` command to hide logs. It should help with `Failed: EOF` problem, [read more](https://stackoverflow.com/a/39122219).
- `exports` - object[] (required): [Exports configuration](https://help.keboola.com/components/extractors/database/mongodb/#configure-exports).
- `enabled` - boolean (optional): Default `true`.
- `id` - scalar (required): Internal `id` of the export.
- `name` - string (required): Name of the output CSV file.
- `collection` - string (required): Represents the collection name in your MongoDB database.
- `query`- string (optional):
- JSON string specifying a query which limits documents data in exported data.
- Must be specified in a [strict format](https://help.keboola.com/components/extractors/database/mongodb/#strict-format).
- `incremental` - boolean (optional): Enables [Incremental Loading](https://help.keboola.com/storage/tables/#incremental-loading). Default `false`.
- `incrementalFetchingColumn` - string (optional): Name of column for [Incremental Fetching](https://help.keboola.com/components/extractors/database/#incremental-fetching)
- `sort`- string (optional):
- JSON string specifying the order of documents in exported data.
- Must be specified in a [strict format](https://help.keboola.com/components/extractors/database/mongodb/#strict-format).
- `limit`- string (optional): Limits the number of exported documents.
- `mode` - enum (optional)
- `mapping` (default) - Values are exported using specified `mapping`, [read more](https://help.keboola.com/components/extractors/database/mongodb/#configure-mapping).
- `raw` - Documents are exported as plain JSON strings, [read more](https://help.keboola.com/components/extractors/database/mongodb/#raw-export-mode).
- `mapping` - string - required for `mode` = `mapping`, [read more](https://help.keboola.com/components/extractors/database/mongodb/#configure-mapping).
- `includeParentInPK` - boolean (optional): Default `false`
- Intended for `mapping` mode and ignored in `raw` mode.
- If `false`
- PK of sub-document depends ONLY on sub-document content,
- ... so same PK is generated for sub-documents with same content, but from different parent document
- this is legacy/default behaviour
- If `true`
- PK of sub-document depends on content AND hash of parent document
- ... so different PK is generated for sub-documents with same content, but from different parent document
- this is new behaviour, the UI automatically turns it on for new configs

### Protocol

#### mongodb://

When `parameters.db.protocol` is not defined or is set to `mongodb`, then extractor connects to single MongoDB node.

```json
{
"parameters": {
"db": {
"host": "127.0.0.1",
"port": 27017,
"database": "test",
"user": "username",
"#password": "password"
},
"exports": "..."
}
}
```

#### mongodb+srv://

When `parameters.db.protocol` = `mongodb+srv`, then extractor connects to
MongoDB cluster using [DNS Seedlist Connection Format](https://docs.mongodb.com/manual/reference/connection-string/#dns-seedlist-connection-format).

```json
{
"parameters": {
"db": {
"protocol": "mongodb+srv",
"host": "mongodb.cluster.local",
"database": "test",
"user": "username",
"#password": "password"
},
"exports": "..."
}
}
```

#### Custom URI

When `parameters.db.protocol` = `custom_uri`, then extractor connects to URI defined in `parameters.db.uri`:
- The password is not a part of URI, but it must be encrypted in `#password` item.
- `host`, `port`, `database`, `authenticationDatabase` are included in `uri` and must not be defined in separate items.
- Custom URI cannot be used with SSH tunnel.

```json
{
"parameters": {
"db": {
"protocol": "custom_uri",
"uri": "mongodb://user@localhost,localhost:27018,localhost:27019/db?replicaSet=test&ssl=true",
"#password": "password"
},
"exports": "..."
}
}
```

### Example
```json
{
"parameters": {
"db": {
"host": "127.0.0.1",
"port": 27017,
"database": "test",
"user": "username",
"#password": "password",
"ssh": {
"enabled": true,
"sshHost": "mongodb",
"sshPort": 22,
"user": "root",
"keys": {
"public": "ssh-rsa ...your public key...",
"private": "-----BEGIN RSA PRIVATE KEY-----\n...your private key...\n-----END RSA PRIVATE KEY-----\n"
}
}
},
"exports": [
{
"name": "bronx-bakeries-westchester",
"collection": "restaurants",
"query": "{borough: \"Bronx\", \"address.street\": \"Westchester Avenue\"}",
"incremental": true,
"mapping": {
"_id.$oid": {
"type": "column",
"mapping": {
"destination": "id",
"primaryKey": true
}
},
"name": "name",
"address": {
"type": "table",
"destination": "bakeries-coords",
"parentKey": {
"destination": "bakeries_id"
},
"tableMapping": {
"coord.0": "w",
"coord.1": "n",
"zipcode": {
"type": "column",
"mapping": {
"destination": "zipcode",
"primaryKey": true
}
},
"street": "street"
}
}
}
}
]
}
}
```

## Output

After successful extraction there are several CSV files, which contains exported data. First output
file is named after `name` parameter in export configuration. Other files are named after destination
parameter in mapping section.

Also, there is manifest file for each of the export.

## Development

Expand Down
31 changes: 25 additions & 6 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,31 @@
"description": "Keboola Extractor for MongoDB",
"license": "MIT",
"require": {
"ext-json": "*",
"ext-intl": "*",
"ext-mongodb": "*",
"keboola/csv": "^3.0",
"keboola/csvmap": "^2.1",
"keboola/php-component": "^9.0",
"keboola/ssh-tunnel": "^2.0",
"league/uri": "^6.2",
"league/uri-components": "^2.2",
"monolog/monolog": "^2.0",
"nette/utils": "^2.5",
"php": "^8.1",
"keboola/php-component": "^9.0"
"symfony/config": "^5.0",
"symfony/console": "^5.0",
"symfony/filesystem": "^5.0",
"symfony/process": "^5.0",
"symfony/serializer": "^5.0"
},
"require-dev": {
"php-parallel-lint/php-parallel-lint": "^1.3",
"keboola/coding-standard": ">=7.0.2",
"keboola/datadir-tests": "^5.3",
"keboola/php-temp": "^2.0",
"phpstan/phpstan": "^1.4",
"phpunit/phpunit": "^9.5",
"symfony/process": "^5.0"
"phpunit/phpunit": "^9.5"
},
"autoload": {
"psr-4": {
Expand All @@ -23,7 +37,8 @@
"autoload-dev": {
"psr-4": {
"MongoExtractor\\Tests\\": "tests/phpunit/",
"MongoExtractor\\FunctionalTests\\": "tests/functional/"
"MongoExtractor\\FunctionalTests\\": "tests/functional/",
"MongoExtractor\\Tests\\Traits\\": "tests/traits/"
}
},
"scripts": {
Expand All @@ -34,7 +49,7 @@
"@tests-datadir"
],

"phpstan": "phpstan analyse ./src ./tests --level=max --no-progress -c phpstan.neon",
"phpstan": "phpstan analyse ./src tests --level=max --no-progress -c phpstan.neon",
"phpcs": "phpcs -n --ignore=vendor --extensions=php .",
"phpcbf": "phpcbf -n --ignore=vendor --extensions=php .",
"phplint": "parallel-lint -j 10 --exclude vendor .",
Expand All @@ -44,9 +59,13 @@
"@phpstan",
"@tests"
],
"build-temp": [
"@phplint",
"@tests"
],
"ci": [
"@composer validate --no-check-publish --no-check-all",
"@build"
"@build-temp"
]
},
"config": {
Expand Down
Loading

0 comments on commit 80689d0

Please sign in to comment.