diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..66e4657
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,11 @@
+[[source]]
+url = "https://pypi.python.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+"e1839a8" = {path = ".", extras = ["nlp", "s3driver"], editable = true}
+
+[dev-packages]
+"pytest-flake8" = "*"
+ipython = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644
index 0000000..75fd26b
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,455 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "02bcc79cf52a20e5172c477a0efb44d8d0d455235abefd4bfbc641e8c2453af2"
+        },
+        "pipfile-spec": 6,
+        "requires": {},
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.python.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "boto3": {
+            "hashes": [
+                "sha256:95ac50b1905e0aa0344a2a733d76c44af81b2cc51304386b94b0ef669d8d19bc",
+                "sha256:b227764ab3dcb4b55d54dd90c7676846f153b1e29ed259081ffc34b064a6ff21"
+            ],
+            "version": "==1.8.5"
+        },
+        "botocore": {
+            "hashes": [
+                "sha256:4a2d4fc68fdc7113957cfc51b733a9900a9ba35e19e6d841a8b11fd6c20732f9",
+                "sha256:dcad4db0349dd11278d094a91434faf11500aae1991890a62d47a79923ca7ba3"
+            ],
+            "version": "==1.11.5"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:376690d6f16d32f9d1fe8932551d80b23e9d393a8578c5633a2ed39a64861638",
+                "sha256:456048c7e371c089d0a77a5212fb37a2c2dce1e24146e3b7e0261736aaeaa22a"
+            ],
+            "version": "==2018.8.24"
+        },
+        "chardet": {
+            "hashes": [
+                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
+                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
+            ],
+            "version": "==3.0.4"
+        },
+        "decorator": {
+            "hashes": [
+                "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
+                "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
+            ],
+            "version": "==4.3.0"
+        },
+        "docutils": {
+            "hashes": [
+                "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
+                "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
+                "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
+            ],
+            "version": "==0.14"
+        },
+        "e1839a8": {
+            "editable": true,
+            "extras": [
+                "nlp",
+                "s3driver"
+            ],
+            "path": "."
+        },
+        "idna": {
+            "hashes": [
+                "sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
+                "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
+            ],
+            "version": "==2.7"
+        },
+        "jmespath": {
+            "hashes": [
+                "sha256:6a81d4c9aa62caf061cb517b4d9ad1dd300374cd4706997aff9cd6aedd61fc64",
+                "sha256:f11b4461f425740a1d908e9a3f7365c3d2e569f6ca68a2ff8bc5bcd9676edd63"
+            ],
+            "version": "==0.9.3"
+        },
+        "joblib": {
+            "hashes": [
+                "sha256:333b9bf16ff015d6b56bf80b9831afdd243443cb84c7ff7b6e342f117e354c42",
+                "sha256:3e650621a6ec2b9cdda72ec3e0b0f04101f605a56ae0d0e54e3d18b16fcf29f4"
+            ],
+            "version": "==0.12.3"
+        },
+        "nltk": {
+            "hashes": [
+                "sha256:fe0eda251be65843be86d7de9abfbf7161732256f742e623b21243ec47bdb718"
+            ],
+            "version": "==3.3.0"
+        },
+        "numpy": {
+            "hashes": [
+                "sha256:1c362ad12dd09a43b348bb28dd2295dd9cdf77f41f0f45965e04ba97f525b864",
+                "sha256:2156a06bd407918df4ac0122df6497a9c137432118f585e5b17d543e593d1587",
+                "sha256:24e4149c38489b51fc774b1e1faa9103e82f73344d7a00ba66f6845ab4769f3f",
+                "sha256:340ec1697d9bb3a9c464028af7a54245298502e91178bddb4c37626d36e197b7",
+                "sha256:35db8d419345caa4eeaa65cd63f34a15208acd87530a30f0bc25fc84f55c8c80",
+                "sha256:361370e9b7f5e44c41eee29f2bb5cb3b755abb4b038bce6d6cbe08db7ff9cb74",
+                "sha256:36e8dcd1813ca92ce7e4299120cee6c03adad33d89b54862c1b1a100443ac399",
+                "sha256:378378973546ecc1dfaf9e24c160d683dd04df871ecd2dcc86ce658ca20f92c0",
+                "sha256:419e6faee16097124ee627ed31572c7e80a1070efa25260b78097cca240e219a",
+                "sha256:4287104c24e6a09b9b418761a1e7b1bbde65105f110690ca46a23600a3c606b8",
+                "sha256:549f3e9778b148a47f4fb4682955ed88057eb627c9fe5467f33507c536deda9d",
+                "sha256:5e359e9c531075220785603e5966eef20ccae9b3b6b8a06fdfb66c084361ce92",
+                "sha256:5ee7f3dbbdba0da75dec7e94bd7a2b10fe57a83e1b38e678200a6ad8e7b14fdc",
+                "sha256:62d55e96ec7b117d3d5e618c15efcf769e70a6effaee5842857b64fb4883887a",
+                "sha256:719b6789acb2bc86ea9b33a701d7c43dc2fc56d95107fd3c5b0a8230164d4dfb",
+                "sha256:7a70f2b60d48828cba94a54a8776b61a9c2657a803d47f5785f8062e3a9c7c55",
+                "sha256:7b9e37f194f8bcdca8e9e6af92e2cbad79e360542effc2dd6b98d63955d8d8a3",
+                "sha256:83b8fc18261b70f45bece2d392537c93dc81eb6c539a16c9ac994c47fc79f09a",
+                "sha256:9473ad28375710ab18378e72b59422399b27e957e9339c413bf00793b4b12df0",
+                "sha256:95b085b253080e5d09f7826f5e27dce067bae813a132023a77b739614a29de6e",
+                "sha256:98b86c62c08c2e5dc98a9c856d4a95329d11b1c6058cb9b5191d5ea6891acd09",
+                "sha256:a3bd01d6d3ed3d7c06d7f9979ba5d68281f15383fafd53b81aa44b9191047cf8",
+                "sha256:c81a6afc1d2531a9ada50b58f8c36197f8418ef3d0611d4c1d7af93fdcda764f",
+                "sha256:ce75ed495a746e3e78cfa22a77096b3bff2eda995616cb7a542047f233091268",
+                "sha256:dae8618c0bcbfcf6cf91350f8abcdd84158323711566a8c5892b5c7f832af76f",
+                "sha256:df0b02c6705c5d1c25cc35c7b5d6b6f9b3b30833f9d178843397ae55ecc2eebb",
+                "sha256:e3660744cda0d94b90141cdd0db9308b958a372cfeee8d7188fdf5ad9108ea82",
+                "sha256:f2362d0ca3e16c37782c1054d7972b8ad2729169567e3f0f4e5dd3cdf85f188e"
+            ],
+            "version": "==1.15.1"
+        },
+        "pandas": {
+            "hashes": [
+                "sha256:11975fad9edbdb55f1a560d96f91830e83e29bed6ad5ebf506abda09818eaf60",
+                "sha256:12e13d127ca1b585dd6f6840d3fe3fa6e46c36a6afe2dbc5cb0b57032c902e31",
+                "sha256:1c87fcb201e1e06f66e23a61a5fea9eeebfe7204a66d99df24600e3f05168051",
+                "sha256:242e9900de758e137304ad4b5663c2eff0d798c2c3b891250bd0bd97144579da",
+                "sha256:26c903d0ae1542890cb9abadb4adcb18f356b14c2df46e4ff657ae640e3ac9e7",
+                "sha256:2e1e88f9d3e5f107b65b59cd29f141995597b035d17cc5537e58142038942e1a",
+                "sha256:31b7a48b344c14691a8e92765d4023f88902ba3e96e2e4d0364d3453cdfd50db",
+                "sha256:4fd07a932b4352f8a8973761ab4e84f965bf81cc750fb38e04f01088ab901cb8",
+                "sha256:5b24ca47acf69222e82530e89111dd9d14f9b970ab2cd3a1c2c78f0c4fbba4f4",
+                "sha256:647b3b916cc8f6aeba240c8171be3ab799c3c1b2ea179a3be0bd2712c4237553",
+                "sha256:66b060946046ca27c0e03e9bec9bba3e0b918bafff84c425ca2cc2e157ce121e",
+                "sha256:6efa9fa6e1434141df8872d0fa4226fc301b17aacf37429193f9d70b426ea28f",
+                "sha256:be4715c9d8367e51dbe6bc6d05e205b1ae234f0dc5465931014aa1c4af44c1ba",
+                "sha256:bea90da782d8e945fccfc958585210d23de374fa9294a9481ed2abcef637ebfc",
+                "sha256:d785fc08d6f4207437e900ffead930a61e634c5e4f980ba6d3dc03c9581748c7",
+                "sha256:de9559287c4fe8da56e8c3878d2374abc19d1ba2b807bfa7553e912a8e5ba87c",
+                "sha256:f4f98b190bb918ac0bc0e3dd2ab74ff3573da9f43106f6dba6385406912ec00f",
+                "sha256:f71f1a7e2d03758f6e957896ed696254e2bc83110ddbc6942018f1a232dd9dad",
+                "sha256:fb944c8f0b0ab5c1f7846c686bc4cdf8cde7224655c12edcd59d5212cd57bec0"
+            ],
+            "version": "==0.23.4"
+        },
+        "python-dateutil": {
+            "hashes": [
+                "sha256:1adb80e7a782c12e52ef9a8182bebeb73f1d7e24e374397af06fb4956c8dc5c0",
+                "sha256:e27001de32f627c22380a688bcc43ce83504a7bc5da472209b4c70f02829f0b8"
+            ],
+            "version": "==2.7.3"
+        },
+        "pytz": {
+            "hashes": [
+                "sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053",
+                "sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277"
+            ],
+            "version": "==2018.5"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1",
+                "sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a"
+            ],
+            "version": "==2.19.1"
+        },
+        "s3transfer": {
+            "hashes": [
+                "sha256:90dc18e028989c609146e241ea153250be451e05ecc0c2832565231dacdf59c1",
+                "sha256:c7a9ec356982d5e9ab2d4b46391a7d6a950e2b04c472419f5fdec70cc0ada72f"
+            ],
+            "version": "==0.1.13"
+        },
+        "scikit-learn": {
+            "hashes": [
+                "sha256:0a718b5ffbd5053fb3f9e1a2e20b7c4f256dd8035e246b907d3117d20bac0260",
+                "sha256:1725540b754a9967778e9385e1ee2c8db50d5ab70ed835c9f5e36002ffabc169",
+                "sha256:3e3ce307d7c5c5811658ba8686b24b571a8244eaafe707665ad601f400d5ce98",
+                "sha256:42ad71502237c9fe300ecf157f5a394df717789a2dde541dd7034b539c70bdcc",
+                "sha256:42cba716db197e0d1670e2fc13c4cc4a86d5c5358120ccfee6ec427b154e74ff",
+                "sha256:47b4090b7686642e41176becb7c42ef3cc665d7ee0db5e7ea5d307ec9779327e",
+                "sha256:51d99a08c8bf689cf60c9d8dca6e3d3e5f6d762def85ad735dcea11fb528a89b",
+                "sha256:5f7577fbb2399a4712e96cf0e786638168940a876c33735a1b5d5a86ba4b1370",
+                "sha256:66bfc2b6b15db1725d03ea657ec9184ff09dcbf1ecd834ef85f2edc2c9cbba97",
+                "sha256:69a34d389d9ca4687ad00af4e11d53686771f484c37366f68617ef656bab16ab",
+                "sha256:75297f3dd6685f01555f1bb75846995d45650af417280b69c81bf11b6987aed5",
+                "sha256:9ebb38ab1d0ee143982aed561811903ac6c1abb512ae2b9019b3b65bde63ffb9",
+                "sha256:a402c1484fe65df42d5dbc22a58e0695fe3afe2b0b229aee2a09c6d60ba8e5c2",
+                "sha256:aad6b9aac1617bd7efa0450643888bbd3410679a94bc8680d9863825686ef369",
+                "sha256:ad4db28d3dc16c01df75ed6efb72524537de3839a5d179fcf94094359fc72ec5",
+                "sha256:b276739a5f863ccacb61999a3067d0895ee291c95502929b2ae56ea1f882e888",
+                "sha256:b3dc88c4d2bcb26ffc5afe16d053ae28317d7d1de083651defcd5453a04f1563",
+                "sha256:b3e4681253e95da5aa5c231889a32b084fd997962bf8beda6f796bf422f734b2",
+                "sha256:c3d852d49d6c1710089d4513702099fa6f8e1aebfedf222319d80c47b0a195f8",
+                "sha256:c6612e7e43988b8b5e1957150449493a55f9c059de641083df7a964f86f2d1e7",
+                "sha256:c69e5c6051366a6ac9600d730276db939b1a205e42504ec0b8371f154b0058db",
+                "sha256:ce121baa8e85ec27c3065281657dcd78adaab7dcb046c7fe96ad4e5a9dcb6610",
+                "sha256:ed2a9a9bea6ec443b7effe5695c9c168b7bf9a67df6d880729760feda871b6a3",
+                "sha256:efd842d70b87e3ef3429c3149840b9189d4441ca951ab0cec62c94a964e219d9",
+                "sha256:f1428af5c381f6eef30ffbc7e047b7c713d4efa5d7bf5e57b62b3fc8d387044b",
+                "sha256:f6c7bf8cd4de1640b760b47f4d28deb26dbbf9acbe0194cdff54a898e190d872",
+                "sha256:f8329ac2160ad8bbbac6a507374685ceca3f24ca427fa9ee61a501280e1972d9",
+                "sha256:fefba2a43b92f8393366093b60efbe984a72a2b41cce16b4002005e4104ef938"
+            ],
+            "version": "==0.19.2"
+        },
+        "scipy": {
+            "hashes": [
+                "sha256:0611ee97296265af4a21164a5323f8c1b4e8e15c582d3dfa7610825900136bb7",
+                "sha256:08237eda23fd8e4e54838258b124f1cd141379a5f281b0a234ca99b38918c07a",
+                "sha256:0e645dbfc03f279e1946cf07c9c754c2a1859cb4a41c5f70b25f6b3a586b6dbd",
+                "sha256:0e9bb7efe5f051ea7212555b290e784b82f21ffd0f655405ac4f87e288b730b3",
+                "sha256:108c16640849e5827e7d51023efb3bd79244098c3f21e4897a1007720cb7ce37",
+                "sha256:340ef70f5b0f4e2b4b43c8c8061165911bc6b2ad16f8de85d9774545e2c47463",
+                "sha256:3ad73dfc6f82e494195144bd3a129c7241e761179b7cb5c07b9a0ede99c686f3",
+                "sha256:3b243c77a822cd034dad53058d7c2abf80062aa6f4a32e9799c95d6391558631",
+                "sha256:404a00314e85eca9d46b80929571b938e97a143b4f2ddc2b2b3c91a4c4ead9c5",
+                "sha256:423b3ff76957d29d1cce1bc0d62ebaf9a3fdfaf62344e3fdec14619bb7b5ad3a",
+                "sha256:42d9149a2fff7affdd352d157fa5717033767857c11bd55aa4a519a44343dfef",
+                "sha256:625f25a6b7d795e8830cb70439453c9f163e6870e710ec99eba5722775b318f3",
+                "sha256:698c6409da58686f2df3d6f815491fd5b4c2de6817a45379517c92366eea208f",
+                "sha256:729f8f8363d32cebcb946de278324ab43d28096f36593be6281ca1ee86ce6559",
+                "sha256:8190770146a4c8ed5d330d5b5ad1c76251c63349d25c96b3094875b930c44692",
+                "sha256:878352408424dffaa695ffedf2f9f92844e116686923ed9aa8626fc30d32cfd1",
+                "sha256:8b984f0821577d889f3c7ca8445564175fb4ac7c7f9659b7c60bef95b2b70e76",
+                "sha256:8f841bbc21d3dad2111a94c490fb0a591b8612ffea86b8e5571746ae76a3deac",
+                "sha256:c22b27371b3866c92796e5d7907e914f0e58a36d3222c5d436ddd3f0e354227a",
+                "sha256:d0cdd5658b49a722783b8b4f61a6f1f9c75042d0e29a30ccb6cacc9b25f6d9e2",
+                "sha256:d40dc7f494b06dcee0d303e51a00451b2da6119acbeaccf8369f2d29e28917ac",
+                "sha256:d8491d4784aceb1f100ddb8e31239c54e4afab8d607928a9f7ef2469ec35ae01",
+                "sha256:dfc5080c38dde3f43d8fbb9c0539a7839683475226cf83e4b24363b227dfe552",
+                "sha256:e24e22c8d98d3c704bb3410bce9b69e122a8de487ad3dbfe9985d154e5c03a40",
+                "sha256:e7a01e53163818d56eabddcafdc2090e9daba178aad05516b20c6591c4811020",
+                "sha256:ee677635393414930541a096fc8e61634304bb0153e4e02b75685b11eba14cae",
+                "sha256:f0521af1b722265d824d6ad055acfe9bd3341765735c44b5a4d0069e189a0f40",
+                "sha256:f25c281f12c0da726c6ed00535ca5d1622ec755c30a3f8eafef26cf43fede694"
+            ],
+            "version": "==1.1.0"
+        },
+        "six": {
+            "hashes": [
+                "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
+                "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
+            ],
+            "version": "==1.11.0"
+        },
+        "sqlalchemy": {
+            "hashes": [
+                "sha256:ef6569ad403520ee13e180e1bfd6ed71a0254192a934ec1dbd3dbf48f4aa9524"
+            ],
+            "version": "==1.2.11"
+        },
+        "textblob": {
+            "hashes": [
+                "sha256:7c9ff21a47a382fa4f235e84ce9be10cca4b9d46b012b79af6e47ea81b478a18",
+                "sha256:8301812cbef9b2f288e14df904854f7457fccf2c52020b66d3f9bc1448cf042a"
+            ],
+            "version": "==0.15.1"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
+                "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
+            ],
+            "version": "==1.23"
+        }
+    },
+    "develop": {
+        "atomicwrites": {
+            "hashes": [
+                "sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
+                "sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
+            ],
+            "version": "==1.2.1"
+        },
+        "attrs": {
+            "hashes": [
+                "sha256:4b90b09eeeb9b88c35bc642cbac057e45a5fd85367b985bd2809c62b7b939265",
+                "sha256:e0d0eb91441a3b53dab4d9b743eafc1ac44476296a2053b6ca3af0b139faf87b"
+            ],
+            "version": "==18.1.0"
+        },
+        "backcall": {
+            "hashes": [
+                "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
+                "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
+            ],
+            "version": "==0.1.0"
+        },
+        "colorama": {
+            "hashes": [
+                "sha256:463f8483208e921368c9f306094eb6f725c6ca42b0f97e313cb5d5512459feda",
+                "sha256:48eb22f4f8461b1df5734a074b57042430fb06e1d61bd1e11b078c0fe6d7a1f1"
+            ],
+            "markers": "sys_platform == 'win32'",
+            "version": "==0.3.9"
+        },
+        "decorator": {
+            "hashes": [
+                "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
+                "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
+            ],
+            "version": "==4.3.0"
+        },
+        "flake8": {
+            "hashes": [
+                "sha256:7253265f7abd8b313e3892944044a365e3f4ac3fcdcfb4298f55ee9ddf188ba0",
+                "sha256:c7841163e2b576d435799169b78703ad6ac1bbb0f199994fc05f700b2a90ea37"
+            ],
+            "version": "==3.5.0"
+        },
+        "ipython": {
+            "hashes": [
+                "sha256:007dcd929c14631f83daff35df0147ea51d1af420da303fd078343878bd5fb62",
+                "sha256:b0f2ef9eada4a68ef63ee10b6dde4f35c840035c50fd24265f8052c98947d5a4"
+            ],
+            "index": "pypi",
+            "version": "==6.5.0"
+        },
+        "ipython-genutils": {
+            "hashes": [
+                "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
+                "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
+            ],
+            "version": "==0.2.0"
+        },
+        "jedi": {
+            "hashes": [
+                "sha256:b409ed0f6913a701ed474a614a3bb46e6953639033e31f769ca7581da5bd1ec1",
+                "sha256:c254b135fb39ad76e78d4d8f92765ebc9bf92cbc76f49e97ade1d5f5121e1f6f"
+            ],
+            "version": "==0.12.1"
+        },
+        "mccabe": {
+            "hashes": [
+                "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
+                "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
+            ],
+            "version": "==0.6.1"
+        },
+        "more-itertools": {
+            "hashes": [
+                "sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092",
+                "sha256:c476b5d3a34e12d40130bc2f935028b5f636df8f372dc2c1c01dc19681b2039e",
+                "sha256:fcbfeaea0be121980e15bc97b3817b5202ca73d0eae185b4550cbfce2a3ebb3d"
+            ],
+            "version": "==4.3.0"
+        },
+        "parso": {
+            "hashes": [
+                "sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
+                "sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
+            ],
+            "version": "==0.3.1"
+        },
+        "pickleshare": {
+            "hashes": [
+                "sha256:84a9257227dfdd6fe1b4be1319096c20eb85ff1e82c7932f36efccfe1b09737b",
+                "sha256:c9a2541f25aeabc070f12f452e1f2a8eae2abd51e1cd19e8430402bdf4c1d8b5"
+            ],
+            "version": "==0.7.4"
+        },
+        "pluggy": {
+            "hashes": [
+                "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
+                "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
+            ],
+            "version": "==0.7.1"
+        },
+        "prompt-toolkit": {
+            "hashes": [
+                "sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381",
+                "sha256:3f473ae040ddaa52b52f97f6b4a493cfa9f5920c255a12dc56a7d34397a398a4",
+                "sha256:858588f1983ca497f1cf4ffde01d978a3ea02b01c8a26a8bbc5cd2e66d816917"
+            ],
+            "version": "==1.0.15"
+        },
+        "py": {
+            "hashes": [
+                "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
+                "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
+            ],
+            "version": "==1.6.0"
+        },
+        "pycodestyle": {
+            "hashes": [
+                "sha256:682256a5b318149ca0d2a9185d365d8864a768a28db66a84a2ea946bcc426766",
+                "sha256:6c4245ade1edfad79c3446fadfc96b0de2759662dc29d07d80a6f27ad1ca6ba9"
+            ],
+            "version": "==2.3.1"
+        },
+        "pyflakes": {
+            "hashes": [
+                "sha256:08bd6a50edf8cffa9fa09a463063c425ecaaf10d1eb0335a7e8b1401aef89e6f",
+                "sha256:8d616a382f243dbf19b54743f280b80198be0bca3a5396f1d2e1fca6223e8805"
+            ],
+            "version": "==1.6.0"
+        },
+        "pygments": {
+            "hashes": [
+                "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
+                "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
+            ],
+            "version": "==2.2.0"
+        },
+        "pytest": {
+            "hashes": [
+                "sha256:2d7c49e931316cc7d1638a3e5f54f5d7b4e5225972b3c9838f3584788d27f349",
+                "sha256:ad0c7db7b5d4081631e0155f5c61b80ad76ce148551aaafe3a718d65a7508b18"
+            ],
+            "version": "==3.7.4"
+        },
+        "pytest-flake8": {
+            "hashes": [
+                "sha256:4f30f5be3efb89755f38f11bdb2a5e22d19a6f5faa73428f703a3292a9572cd3",
+                "sha256:c740ad6aa19e3958947d2118f70bed218caf1d2097039fb7318573a2a72f89a1"
+            ],
+            "index": "pypi",
+            "version": "==1.0.2"
+        },
+        "simplegeneric": {
+            "hashes": [
+                "sha256:dc972e06094b9af5b855b3df4a646395e43d1c9d0d39ed345b7393560d0b9173"
+            ],
+            "version": "==0.8.1"
+        },
+        "six": {
+            "hashes": [
+                "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
+                "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
+            ],
+            "version": "==1.11.0"
+        },
+        "traitlets": {
+            "hashes": [
+                "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
+                "sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9"
+            ],
+            "version": "==4.3.2"
+        },
+        "wcwidth": {
+            "hashes": [
+                "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
+                "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c"
+            ],
+            "version": "==0.1.7"
+        }
+    }
+}
diff --git a/quantgov/__init__.py b/quantgov/__init__.py
index c699782..37f7d26 100644
--- a/quantgov/__init__.py
+++ b/quantgov/__init__.py
@@ -1,16 +1,7 @@
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
 
-__all__ = [
-    'corpora',
-    'corpus',
-    'estimator',
-    'project',
-    'utils',
-]
-
-from . import corpora  # Backwards compatibility
-
+from . import corpus, nlp, ml, utils
 from .utils import load_driver
 
-__version__ = '0.4.2'
+__version__ = '0.5.0'
diff --git a/quantgov/__main__.py b/quantgov/__main__.py
index 22428fc..a57163f 100644
--- a/quantgov/__main__.py
+++ b/quantgov/__main__.py
@@ -11,11 +11,10 @@
 import sys
 import zipfile
 
-import joblib as jl
 import requests
 
+import joblib as jl
 import quantgov
-import quantgov.corpus.builtins
 
 from pathlib import Path
 
@@ -37,11 +36,11 @@ def parse_args():
     create.add_argument('path', type=Path)
     create.add_argument('--parent', default='master')
 
-    # Corpus command
-    corpus = subparsers.add_parser('corpus')
-    corpus_subcommands = corpus.add_subparsers(dest='subcommand')
-    for command, builtin in quantgov.corpus.builtins.commands.items():
-        subcommand = corpus_subcommands.add_parser(
+    # NLP command
+    nlp_subparser = subparsers.add_parser('nlp')
+    nlp_subcommands = nlp_subparser.add_subparsers(dest='subcommand')
+    for command, builtin in quantgov.nlp.commands.items():
+        subcommand = nlp_subcommands.add_parser(
             command, help=builtin.cli.help)
         subcommand.add_argument(
             'corpus', help='Path to a QuantGov Corpus directory')
@@ -56,21 +55,24 @@ def parse_args():
             default=sys.stdout
         )
 
-    # Estimator Command
-    estimator = subparsers.add_parser('estimator')
-    estimator_subcommands = estimator.add_subparsers(dest='subcommand')
+    # ML Command
+    ml_parser = subparsers.add_parser('ml')
+    ml_subcommands = ml_parser.add_subparsers(dest='subcommand')
 
-    # Estimator Evaluate
-    evaluate = estimator_subcommands.add_parser(
+    # ML Evaluate
+    evaluate = ml_subcommands.add_parser(
         'evaluate', help='Evaluate candidate models')
     evaluate.add_argument(
         'modeldefs', type=Path,
         help='python module containing candidate models'
     )
     evaluate.add_argument(
-        'trainers', type=jl.load, help='saved Trainers object')
+        'trainers',
+        type=quantgov.ml.Trainers.load,
+        help='saved Trainers object'
+    )
     evaluate.add_argument(
-        'labels', type=jl.load, help='saved Labels object')
+        'labels', type=quantgov.ml.Labels.load, help='saved Labels object')
     evaluate.add_argument(
         'output_results',
         type=lambda x: open(x, 'w', encoding=ENCODE_OUT),
@@ -86,31 +88,36 @@ def parse_args():
         help='Number of folds for cross-validation')
     evaluate.add_argument('--scoring', default='f1', help='scoring method')
 
-    # Estimator Train
-    train = estimator_subcommands.add_parser('train', help='Train a model')
+    # ML Train
+    train = ml_subcommands.add_parser('train', help='Train a model')
     train.add_argument(
         'modeldefs', type=Path,
         help='Python module containing candidate models'
     )
     train.add_argument('configfile', help='Model configuration file')
     train.add_argument(
-        'trainers', type=jl.load, help='saved Trainers object')
+        'vectorizer',
+        type=jl.load,
+        help='saved Vectorizer object'
+    )
+    train.add_argument(
+        'trainers',
+        type=quantgov.ml.Trainers.load,
+        help='saved Trainers object'
+    )
     train.add_argument(
-        'labels', type=jl.load, help='saved Labels object')
+        'labels', type=quantgov.ml.Labels.load, help='saved Labels object')
     train.add_argument(
-        '-o', '--outfile', help='location to save the trained model'
+        '-o', '--outfile', help='location to save the trained Estimator'
     )
 
-    # Estimator Estimate
-    estimate = estimator_subcommands.add_parser(
+    # ML Estimate
+    estimate = ml_subcommands.add_parser(
         'estimate', help='Estimate label values for a target corpus')
     estimate.add_argument(
-        'vectorizer', type=jl.load,
-        help='joblib-saved scikit-learn vectorizer'
-    )
-    estimate.add_argument(
-        'model', type=jl.load,
-        help='saved Model object'
+        'estimator',
+        type=quantgov.ml.Estimator.load,
+        help='saved Estimator object'
     )
     estimate.add_argument(
         'corpus', type=quantgov.load_driver,
@@ -164,7 +171,7 @@ def start_component(args):
 def run_corpus_builtin(args):
     driver = quantgov.load_driver(args.corpus)
     writer = csv.writer(args.outfile)
-    builtin = quantgov.corpus.builtins.commands[args.subcommand]
+    builtin = quantgov.nlp.commands[args.subcommand]
     func_args = {i: j for i, j in vars(args).items()
                  if i not in {'command', 'subcommand', 'outfile', 'corpus'}}
     writer.writerow(driver.index_labels + builtin.get_columns(func_args))
@@ -179,18 +186,43 @@ def run_corpus_builtin(args):
 
 def run_estimator(args):
     if args.subcommand == "evaluate":
-        quantgov.estimator.evaluate(
+        quantgov.ml.evaluate(
             args.modeldefs, args.trainers, args.labels, args.folds,
             args.scoring, args.output_results, args.output_suggestion
         )
     elif args.subcommand == "train":
-        quantgov.estimator.train_and_save_model(
-            args.modeldefs, args.configfile, args.trainers, args.labels,
-            args.outfile)
+        quantgov.ml.train_and_save_model(
+            args.modeldefs, args.configfile, args.vectorizer, args.trainers,
+            args.labels, args.outfile)
     elif args.subcommand == "estimate":
-        quantgov.estimator.estimate(
-            args.vectorizer, args.model, args.corpus, args.probability,
-            args.precision, args.outfile
+        writer = csv.writer(args.outfile)
+        labels = args.corpus.index_labels
+        if args.probability:
+            if args.estimator.multilabel:
+                if args.estimator.multiclass:
+                    writer.writerow(labels + ('label', 'class', 'probability'))
+                else:
+                    writer.writerow(labels + ('label', 'probability'))
+            elif args.estimator.multiclass:
+                writer.writerow(labels + ('class', 'probability'))
+            else:
+                writer.writerow(
+                    labels + ('{}_prob'.format(args.estimator.label_names[0]),)
+                )
+        else:
+            if args.estimator.multilabel:
+                writer.writerow(labels + ('label', 'prediction'))
+            else:
+                writer.writerow(
+                    labels + ('{}'.format(args.estimator.label_names[0]),)
+                )
+        writer.writerows(
+            docidx + result for docidx,
+            result in quantgov.ml.estimate(
+                args.estimator,
+                args.corpus,
+                args.probability,
+                args.precision)
         )
 
 
@@ -198,8 +230,8 @@ def main():
     args = parse_args()
     {
         'start': start_component,
-        'corpus': run_corpus_builtin,
-        'estimator': run_estimator
+        'nlp': run_corpus_builtin,
+        'ml': run_estimator,
     }[args.command](args)
 
 
diff --git a/quantgov/corpora/__init__.py b/quantgov/corpora/__init__.py
deleted file mode 100644
index da8e2b1..0000000
--- a/quantgov/corpora/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import warnings
-
-from ..corpus import (
-    Document,
-    CorpusStreamer,
-    CorpusDriver,
-    FlatFileCorpusDriver,
-    RecursiveDirectoryCorpusDriver,
-    NamePatternCorpusDriver,
-    IndexDriver,
-    S3Driver,
-    S3DatabaseDriver
-)
-
-warnings.warn(
-    ("quantgov.corpora has been moved to quantgov.corpus and will be removed"
-     " in a future version."),
-    DeprecationWarning)
diff --git a/quantgov/corpus/structures.py b/quantgov/corpus.py
similarity index 96%
rename from quantgov/corpus/structures.py
rename to quantgov/corpus.py
index 08e325e..c43c728 100644
--- a/quantgov/corpus/structures.py
+++ b/quantgov/corpus.py
@@ -1,5 +1,5 @@
 """
-quantgov.corpora.structures
+quantgov.corpus
 
 Classes for Writing QuantGov Corpora
 """
@@ -13,7 +13,7 @@
 from collections import namedtuple
 from pathlib import Path
 
-from .. import utils as qgutils
+from . import utils as qgutils
 
 try:
     import boto3
@@ -286,17 +286,10 @@ def __init__(self, index, bucket, encoding='utf-8', cache=True):
         super(IndexDriver, self).__init__(
             index_labels=index_labels, encoding=encoding, cache=cache)
 
-    def gen_indices_and_paths(self):
-        with self.index.open(encoding=self.encoding) as inf:
-            reader = csv.reader(inf)
-            next(reader)
-            for row in reader:
-                yield tuple(row[:-1]), row[-1]
-
     def read(self, docinfo):
         idx, path = docinfo
         body = self.client.get_object(Bucket=self.bucket,
-                                      Key=str(path))['Body']
+                                      Key=str(path).replace('\\', '/'))['Body']
         return Document(idx, body.read().decode(self.encoding))
 
     def filter(self, pattern):
diff --git a/quantgov/corpus/__init__.py b/quantgov/corpus/__init__.py
deleted file mode 100644
index f095957..0000000
--- a/quantgov/corpus/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from .structures import (
-    Document,
-    CorpusStreamer,
-    CorpusDriver,
-    FlatFileCorpusDriver,
-    RecursiveDirectoryCorpusDriver,
-    NamePatternCorpusDriver,
-    IndexDriver,
-    S3Driver,
-    S3DatabaseDriver
-)
diff --git a/quantgov/estimator/estimation.py b/quantgov/estimator/estimation.py
deleted file mode 100644
index c77dd2d..0000000
--- a/quantgov/estimator/estimation.py
+++ /dev/null
@@ -1,244 +0,0 @@
-"""
-quantgov.estimator.estimation
-
-Functionality for making predictions with an estimator
-"""
-import csv
-import logging
-
-import sklearn.pipeline
-
-log = logging.getLogger(__name__)
-
-
-def get_pipeline(vectorizer, model):
-    """
-    Get the full estimation pipeline
-
-    Arguments:
-        * vectorizer: a sklearn Vectorizer (or pipeline)
-        * model: a quantgov.estimator.Estimator
-
-    Returns: a sklearn Pipeline
-    """
-    return sklearn.pipeline.Pipeline((
-        ('vectorizer', vectorizer),
-        ('model', model.model)
-    ))
-
-
-def estimate_simple(vectorizer, model, streamer):
-    """
-    Generate predictions for an estimator
-
-    Arguments:
-        * vectorizer: a sklearn Vectorizer (or pipeline)
-        * model: a quantgov.estimator.Estimator
-        * streamer: a quantgov.corpora.CorpusStreamer
-
-    Yields:
-        2-tuples of docindex, prediction
-
-    """
-    pipeline = get_pipeline(vectorizer, model)
-    texts = (doc.text for doc in streamer)
-    yield from zip(streamer.index, pipeline.predict(texts))
-
-
-def estimate_probability(vectorizer, model, streamer, precision):
-    """
-    Generate probabilities for a one-label estimator
-
-    Arguments:
-        * vectorizer: a sklearn Vectorizer (or pipeline)
-        * model: a quantgov.estimator.Estimator
-        * streamer: a quantgov.corpora.CorpusStreamer
-
-    Yields:
-        2-tuples of docindex, probability
-
-    """
-    pipeline = get_pipeline(vectorizer, model)
-    texts = (doc.text for doc in streamer)
-    truecol = list(int(i) for i in model.model.classes_).index(1)
-    predicted = (
-        i[truecol] for i in pipeline.predict_proba(texts).round(precision)
-    )
-    yield from zip(streamer.index, predicted)
-
-
-def estimate_probability_multilabel(vectorizer, model, streamer, precision):
-    """
-    Generate probabilities for a multilabel binary estimator
-
-    Arguments:
-        * vectorizer: a sklearn Vectorizer (or pipeline)
-        * model: a quantgov.estimator.Estimator
-        * streamer: a quantgov.corpora.CorpusStreamer
-
-    Yields:
-        2-tuples of docindex, probability
-
-    """
-    pipeline = get_pipeline(vectorizer, model)
-    texts = (doc.text for doc in streamer)
-    try:
-        truecols = tuple(
-            list(int(i) for i in label_classes).index(1)
-            for label_classes in model.model.classes_
-        )
-    except (AttributeError, TypeError):
-        truecols = tuple(
-            list(int(i) for i in label_classes).index(1)
-            for label_classes in (
-                est.classes_ for est in model.model.steps[-1][-1].estimators_
-            )
-        )
-    predicted = pipeline.predict_proba(texts)
-    try:
-        for i, docidx in enumerate(streamer.index):
-            yield docidx, tuple(
-                label_predictions[i, truecols[j]].round(int(precision))
-                for j, label_predictions in enumerate(predicted))
-    except IndexError:
-        yield from zip(streamer.index, predicted.round(int(precision)))
-
-
-def estimate_probability_multiclass(vectorizer, model, streamer, precision):
-    """
-    Generate probabilities for a one-label, multiclass estimator
-
-    Arguments:
-        * vectorizer: a sklearn Vectorizer (or pipeline)
-        * model: a quantgov.estimator.Estimator
-        * streamer: a quantgov.corpora.CorpusStreamer
-
-    Yields:
-        2-tuples of docindex, probability
-
-    """
-    pipeline = get_pipeline(vectorizer, model)
-    texts = (doc.text for doc in streamer)
-    yield from zip(
-        streamer.index,
-        (i for i in pipeline.predict_proba(texts).round(precision))
-    )
-
-
-def estimate_probability_multilabel_multiclass(
-        vectorizer, model, streamer, precision):
-    """
-    Generate probabilities for a multilabel, multiclass estimator
-
-    Arguments:
-        * vectorizer: a sklearn Vectorizer (or pipeline)
-        * model: a quantgov.estimator.Estimator
-        * streamer: a quantgov.corpora.CorpusStreamer
-
-    Yields:
-        2-tuples of docindex, probability
-
-    """
-    pipeline = get_pipeline(vectorizer, model)
-    texts = (doc.text for doc in streamer)
-    predicted = pipeline.predict_proba(texts)
-    for i, docidx in enumerate(streamer.index):
-        yield docidx, tuple(label_predictions[i] for label_predictions
-                            in predicted.round(precision))
-
-
-def is_multiclass(classes):
-    """
-    Returns True if values in classes are anything but 1, 0, True, or False,
-    otherwise returns False.
-    """
-    try:
-        return len(set(int(i) for i in classes) - {0, 1}) != 0
-    except ValueError:
-        return True
-
-
-def estimate(vectorizer, model, corpus, probability, precision, outfile):
-    """
-    Estimate label values for documents in corpus
-
-    Arguments:
-
-        * **vectorizer**: joblib-saved vectorizer
-        * **model**: saved `quantgov.estimator.Model` object
-        * **corpus**: path to a quantgov corpus
-        * **probability**: if True, predict probability
-        * **outfile**: open file object for writing results
-    """
-    streamer = corpus.get_streamer()
-    writer = csv.writer(outfile)
-    if len(model.label_names) > 1:
-        multilabel = True
-        try:
-            multiclass = any(is_multiclass(i) for i in model.model.classes_)
-        except (AttributeError, TypeError):
-            multiclass = any(
-                is_multiclass(i.classes_) for i in
-                model.model.steps[-1][-1].estimators_
-            )
-    else:
-        multilabel = False
-        multiclass = is_multiclass(model.model.classes_)
-
-    # TODO: This is very ugly and complicated and should probably be refactored
-    if probability:
-        if multilabel:
-            if multiclass:  # Multilabel-multiclass probability
-                results = estimate_probability_multilabel_multiclass(
-                    vectorizer, model, streamer, precision)
-                writer.writerow(corpus.index_labels +
-                                ('label', 'class', 'probability'))
-                writer.writerows(
-                    docidx + (label_name, class_name, prediction)
-                    for docidx, predictions in results
-                    for label_name, label_classes, label_predictions
-                    in zip(
-                        model.label_names, model.model.classes_, predictions)
-                    for class_name, prediction
-                    in zip(label_classes, label_predictions)
-                )
-            else:  # Multilabel probability
-                results = estimate_probability_multilabel(
-                    vectorizer, model, streamer, precision)
-                writer.writerow(corpus.index_labels + ('label', 'probability'))
-                writer.writerows(
-                    docidx + (label_name, prediction)
-                    for docidx, predictions in results
-                    for label_name, prediction
-                    in zip(model.label_names, predictions)
-                )
-        elif multiclass:  # Multiclass probability
-            writer.writerow(corpus.index_labels + ('class', 'probability'))
-            results = estimate_probability_multiclass(
-                vectorizer, model, streamer, precision)
-            writer.writerows(
-                docidx + (class_name, prediction)
-                for docidx, predictions in results
-                for class_name, prediction in zip(
-                    model.model.classes_, predictions)
-            )
-        else:  # Simple probability
-            results = estimate_probability(
-                vectorizer, model, streamer, precision)
-            writer.writerow(
-                corpus.index_labels + (model.label_names[0] + '_prob',))
-            writer.writerows(
-                docidx + (prediction,) for docidx, prediction in results)
-    elif multilabel:  # Multilabel Prediction
-        results = estimate_simple(vectorizer, model, streamer)
-        writer.writerow(corpus.index_labels + ('label', 'prediction'))
-        writer.writerows(
-            docidx + (label_name, prediction,)
-            for docidx, predictions in results
-            for label_name, prediction in zip(model.label_names, predictions)
-        )
-    else:  # Simple Prediction
-        results = estimate_simple(vectorizer, model, streamer)
-        writer.writerow(corpus.index_labels + model.label_names)
-        writer.writerows(docidx + (prediction,)
-                         for docidx, prediction in results)
diff --git a/quantgov/estimator/__init__.py b/quantgov/ml/__init__.py
similarity index 95%
rename from quantgov/estimator/__init__.py
rename to quantgov/ml/__init__.py
index 73ae5bf..77f8a1b 100644
--- a/quantgov/estimator/__init__.py
+++ b/quantgov/ml/__init__.py
@@ -9,7 +9,7 @@
 from .structures import (
     Labels,
     Trainers,
-    Model,
+    Estimator,
     CandidateModel
 )
 
diff --git a/quantgov/estimator/candidate_sets.py b/quantgov/ml/candidate_sets.py
similarity index 89%
rename from quantgov/estimator/candidate_sets.py
rename to quantgov/ml/candidate_sets.py
index 97978fa..3ad7817 100644
--- a/quantgov/estimator/candidate_sets.py
+++ b/quantgov/ml/candidate_sets.py
@@ -1,5 +1,5 @@
 """
-quantgov.estimator.candidate_sets: Starter model candidate sets
+quantgov.ml.candidate_sets: Starter model candidate sets
 
 
 This module provides a few sample sets of models for common problems. These are
@@ -18,10 +18,10 @@
 import sklearn.pipeline
 import sklearn.feature_extraction
 
-import quantgov.estimator
+import quantgov.ml
 
 classification = [
-    quantgov.estimator.CandidateModel(
+    quantgov.ml.CandidateModel(
         name="Random Forests",
         model=sklearn.pipeline.Pipeline(steps=(
             ('tfidf', sklearn.feature_extraction.text.TfidfTransformer()),
@@ -31,7 +31,7 @@
             'rf__n_estimators': [5, 10, 25, 50, 100],
         }
     ),
-    quantgov.estimator.CandidateModel(
+    quantgov.ml.CandidateModel(
         name="Logistic Regression",
         model=sklearn.pipeline.Pipeline(steps=(
             ('tfidf', sklearn.feature_extraction.text.TfidfTransformer()),
@@ -45,7 +45,7 @@
 
 
 multilabel_classification = [
-    quantgov.estimator.CandidateModel(
+    quantgov.ml.CandidateModel(
         name="Random Forests",
         model=sklearn.pipeline.Pipeline(steps=(
             ('tfidf', sklearn.feature_extraction.text.TfidfTransformer()),
@@ -55,7 +55,7 @@
             'rf__n_estimators': [5, 10, 25, 50, 100],
         }
     ),
-    quantgov.estimator.CandidateModel(
+    quantgov.ml.CandidateModel(
         name="Logistic Regression",
         model=sklearn.pipeline.Pipeline(steps=(
             ('tfidf', sklearn.feature_extraction.text.TfidfTransformer()),
diff --git a/quantgov/ml/estimation.py b/quantgov/ml/estimation.py
new file mode 100644
index 0000000..84601f6
--- /dev/null
+++ b/quantgov/ml/estimation.py
@@ -0,0 +1,181 @@
+"""
+quantgov.ml.estimation
+
+Functionality for making predictions with an estimator
+"""
+import logging
+
+log = logging.getLogger(__name__)
+
+
+def estimate_simple(estimator, streamer):
+    """
+    Generate predictions for a one-label estimator
+
+    Arguments:
+        * estimator: a quantgov.ml.Estimator
+        * streamer: a quantgov.corpora.CorpusStreamer
+
+    Yields:
+        2-tuples of docindex, (prediction,)
+
+    """
+    texts = (doc.text for doc in streamer)
+    predicted = estimator.pipeline.predict(texts)
+    for docidx, prediction in zip(streamer.index, predicted):
+        yield docidx, (prediction,)
+
+
+def estimate_multilabel(estimator, streamer):
+    """
+    Generate predictions for a multi-label estimator
+
+    Arguments:
+        * estimator: a quantgov.ml.Estimator
+        * streamer: a quantgov.corpora.CorpusStreamer
+
+    Yields:
+        2-tuples of docindex, (label, prediction,)
+
+    """
+    for docidx, (prediction,) in estimate_simple(estimator, streamer):
+        for label, label_prediction in zip(estimator.label_names, prediction):
+            yield docidx, (label, label_prediction)
+
+
+def estimate_probability(estimator, streamer, precision):
+    """
+    Generate probabilities for a one-label estimator
+
+    Arguments:
+        * estimator: a quantgov.ml.Estimator
+        * streamer: a quantgov.corpora.CorpusStreamer
+
+    Yields:
+        2-tuples of docindex, (probability,)
+
+    """
+    texts = (doc.text for doc in streamer)
+    truecol = list(int(i) for i in estimator.pipeline.classes_).index(1)
+    predicted = (
+        estimator.pipeline.predict_proba(texts)[:, truecol].round(precision))
+    yield from zip(streamer.index, ((prob,) for prob in predicted))
+
+
+def estimate_probability_multilabel(estimator, streamer, precision):
+    """
+    Generate probabilities for a multilabel binary estimator
+
+    Arguments:
+        * estimator: a quantgov.ml.Estimator
+        * streamer: a quantgov.corpora.CorpusStreamer
+
+    Yields:
+        2-tuples of docindex, (label, probability)
+
+    """
+    texts = (doc.text for doc in streamer)
+    model = estimator.pipeline.steps[-1][1]
+    try:
+        truecols = tuple(
+            list(int(i) for i in label_classes).index(1)
+            for label_classes in model.classes_
+        )
+    except (AttributeError, TypeError):
+        truecols = tuple(
+            list(int(i) for i in label_classes).index(1)
+            for label_classes in (
+                est.classes_ for est in model.steps[-1][1].estimators_
+            )
+        )
+    predicted = estimator.pipeline.predict_proba(texts).round(int(precision))
+
+    try:
+        yield from (
+            (docidx, (label, label_prediction[truecol]))
+            for docidx, doc_predictions in zip(streamer.index, predicted)
+            for label, label_prediction, truecol
+            in zip(estimator.label_names, doc_predictions, truecols)
+        )
+    except IndexError:
+        yield from (
+            (docidx, (label, label_prediction))
+            for docidx, doc_predictions in zip(streamer.index, predicted)
+            for (label, label_prediction)
+            in zip(estimator.label_names, doc_predictions)
+        )
+
+
+def estimate_probability_multiclass(estimator, streamer, precision):
+    """
+    Generate probabilities for a one-label, multiclass estimator
+
+    Arguments:
+        * estimator: a quantgov.ml.Estimator
+        * streamer: a quantgov.corpora.CorpusStreamer
+
+    Yields:
+        2-tuples of docindex, (class, probability)
+
+    """
+    texts = (doc.text for doc in streamer)
+    probs = estimator.pipeline.predict_proba(texts).round(precision)
+    yield from (
+        (docidx, (class_, probability))
+        for docidx, doc_probs in zip(streamer.index, probs)
+        for class_, probability in zip(estimator.pipeline.classes_, doc_probs)
+    )
+
+
+def estimate_probability_multilabel_multiclass(estimator, streamer, precision):
+    """
+    Generate probabilities for a multilabel, multiclass estimator
+
+    Arguments:
+        * estimator: a quantgov.ml.Estimator
+        * streamer: a quantgov.corpora.CorpusStreamer
+
+    Yields:
+        2-tuples of docindex, (label, class, probability
+
+    """
+    texts = (doc.text for doc in streamer)
+    probs = estimator.pipeline.predict_proba(texts)
+    yield from (
+        (docidx, (label_name, class_, prob))
+        for label_name, label_probs in zip(estimator.label_names, probs)
+        for docidx, doc_probs in zip(streamer.index, label_probs)
+        for class_, prob in zip(estimator.pipeline.classes_, doc_probs)
+    )
+
+
+def estimate(estimator, corpus, probability, precision=4):
+    """
+    Estimate label values for documents in corpus
+
+    Arguments:
+
+        * **estimator**: path to a saved `quantgov.ml.Estimator` object
+        * **corpus**: path to a quantgov corpus
+        * **probability**: if True, predict probability
+        * **precision**: precision for probability prediction
+    """
+    streamer = corpus.get_streamer()
+    if probability:
+        if estimator.multilabel:
+            if estimator.multiclass:  # Multilabel-multiclass probability
+                yield from estimate_probability_multilabel_multiclass(
+                    estimator, streamer, precision)
+            else:  # Multilabel probability
+                yield from estimate_probability_multilabel(
+                    estimator, streamer, precision)
+        elif estimator.multiclass:  # Multiclass probability
+            yield from estimate_probability_multiclass(
+                estimator, streamer, precision)
+        else:  # Simple probability
+            yield from estimate_probability(
+                estimator, streamer, precision)
+    elif estimator.multilabel:  # Multilabel Prediction
+        yield from estimate_multilabel(estimator, streamer)
+    else:  # Binary and Multiclass
+        yield from estimate_simple(estimator, streamer)
diff --git a/quantgov/estimator/evaluation.py b/quantgov/ml/evaluation.py
similarity index 92%
rename from quantgov/estimator/evaluation.py
rename to quantgov/ml/evaluation.py
index 03b0914..e587053 100644
--- a/quantgov/estimator/evaluation.py
+++ b/quantgov/ml/evaluation.py
@@ -19,7 +19,7 @@ def evaluate_model(model, X, y, folds, scoring):
     Evaluate a single model
 
     Arguments:
-        * model: a quantgov.estimator.Model
+        * model: a quantgov.ml.CandidateModel
         * X: array-like of document vectors with shape [n_samples x n_features]
         * y: array-like of labels with shape [n_samples X n_labels]
         * folds: folds to use in cross-validation
@@ -53,7 +53,7 @@ def evaluate_all_models(models, X, y, folds, scoring):
     Evaluate a number of models
 
     Arguments:
-        * models: a sequence of quantgov.estimator.Model objects
+        * models: a sequence of quantgov.ml.CandidateModel objects
         * X: array-like of document vectors with shape [n_samples x n_features]
         * y: array-like of labels with shape [n_samples X n_labels]
         * folds: folds to use in cross-validation
@@ -104,10 +104,10 @@ def evaluate(modeldefs, trainers, labels, folds, scoring, results_file,
     Arguments:
 
         * **modeldefs**:  Path to a python module containing a list of
-            `quantgov.estimator.CandidateModel` objects in a module-level
+            `quantgov.ml.CandidateModel` objects in a module-level
             variable named `models'.
-        * **trainers**: a `quantgov.estimator.Trainers` object
-        * **labels**: a `quantgov.estimator.Labels` object
+        * **trainers**: a `quantgov.ml.Trainers` object
+        * **labels**: a `quantgov.ml.Labels` object
         * **folds**: folds to use in cross-validation
         * **scoring**: scoring method to use
         * **results_file**: open file object to which results should be written
diff --git a/quantgov/estimator/structures.py b/quantgov/ml/structures.py
similarity index 53%
rename from quantgov/estimator/structures.py
rename to quantgov/ml/structures.py
index 8ef59ea..7bd138a 100644
--- a/quantgov/estimator/structures.py
+++ b/quantgov/ml/structures.py
@@ -1,5 +1,5 @@
 """
-quantgov.estimator.structures
+quantgov.ml.structures
 
 Useful structures for evaluating and training estimators
 """
@@ -13,15 +13,28 @@ class _PersistanceMixin(object):
     object
     """
 
+    @classmethod
+    def load(cls, path):
+        """
+        Load a saved object at path `path`
+        """
+        loaded = jl.load(path)
+        if not isinstance(loaded, cls):
+            raise ValueError(
+                'Expected saved type {}, path {} contained saved type {}'
+                .format(cls, path, type(loaded))
+            )
+        return loaded
+
     def save(self, path):
         """
-        Use joblib to pickle the object.
+        Use joblib to save the object.
 
         Arguments:
             path: an open file object or string holding the path to where the
                 object should be saved
         """
-        jl.dump(self, path)
+        jl.dump(self, path, compress=True)
 
 
 class Labels(
@@ -56,19 +69,45 @@ class Trainers(
     pass
 
 
-class Model(
-    collections.namedtuple('Model', ['label_names', 'model']),
+def is_multiclass(classes):
+    """
+    Returns True if values in classes are anything but 1, 0, True, or False,
+    otherwise returns False.
+    """
+    try:
+        return len(set(int(i) for i in classes) - {0, 1}) != 0
+    except ValueError:
+        return True
+
+
+class Estimator(
+    collections.namedtuple('Estimator', ['label_names', 'pipeline']),
     _PersistanceMixin
 ):
     """
-    A Trained model
+    A Trained estimator
 
     Arguments:
         * label_names: sequence of names for each label the model estimates
-        * model: a trained sklearn-like model, implementing `.fit`,
-            `.fit_transform`, and `.predict` methods
+        * pipeline: a trained sklearn-like pipeline, implementing `.fit`,
+            `.fit_transform`, and `.predict` methods, where the X inputs are a
+            sequence of strings.
     """
-    pass
+
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        self.multilabel = len(self.label_names) > 1
+        model = self.pipeline.steps[-1][1]
+        if self.multilabel:
+            try:
+                self.multiclass = any(is_multiclass(i) for i in model.classes_)
+            except (AttributeError, TypeError):
+                self.multiclass = any(
+                    is_multiclass(i.classes_)
+                    for i in model.steps[-1][-1].estimators_
+                )
+        else:
+            self.multiclass = is_multiclass(model.classes_)
 
 
 class CandidateModel(
diff --git a/quantgov/estimator/training.py b/quantgov/ml/training.py
similarity index 58%
rename from quantgov/estimator/training.py
rename to quantgov/ml/training.py
index 3d405b6..3ec45a3 100644
--- a/quantgov/estimator/training.py
+++ b/quantgov/ml/training.py
@@ -1,6 +1,8 @@
 import configparser
 
-import quantgov.estimator
+import sklearn.pipeline
+
+import quantgov.ml
 
 
 def _autoconvert(value):
@@ -23,29 +25,38 @@ def get_model(modeldefs, configfile):
     config.optionxform = str
     config.read(configfile)
     models = {i.name: i for i in
-              quantgov.estimator.utils.load_models(modeldefs)}
+              quantgov.ml.utils.load_models(modeldefs)}
     model = models[config['Model']['name']].model
     model.set_params(
         **{i: _autoconvert(j) for i, j in config['Parameters'].items()})
     return model
 
 
-def train_and_save_model(modeldefs, configfile, trainers, labels, outfile):
+def train_and_save_model(
+        modeldefs,
+        configfile,
+        vectorizer,
+        trainers,
+        labels,
+        outfile):
     """
     Train and save model described in config file
 
     Arguments:
 
         * **modeldefs**:  Path to a python module containing a list of
-            `quantgov.estimator.CandidateModel` objects in a module-level
+            `quantgov.ml.CandidateModel` objects in a module-level
             variable named `models'.
         * **configfile**: config file as produced by
-            `quantgov estimator evaluate`
-        * **trainers**: a `quantgov.estimator.Trainers` object
-        * **labels**: a `quantgov.estimator.Labels` object
+            `quantgov ml evaluate`
+        * **vectorizer**: an sklearn-compatible Vectorizer object
+        * **trainers**: a `quantgov.ml.Trainers` object
+        * **labels**: a `quantgov.ml.Labels` object
         * **outfile**: file to which model should be saved
     """
-
     model = get_model(modeldefs, configfile)
-    model.fit(trainers.vectors, labels.labels)
-    quantgov.estimator.Model(labels.label_names, model).save(outfile)
+    pipeline = sklearn.pipeline.Pipeline((
+        ('vectorizer', vectorizer),
+        ('model', model.fit(trainers.vectors, labels.labels)),
+    ))
+    quantgov.ml.Estimator(labels.label_names, pipeline).save(outfile)
diff --git a/quantgov/estimator/utils.py b/quantgov/ml/utils.py
similarity index 88%
rename from quantgov/estimator/utils.py
rename to quantgov/ml/utils.py
index f275455..1d88dc5 100644
--- a/quantgov/estimator/utils.py
+++ b/quantgov/ml/utils.py
@@ -10,7 +10,7 @@ def load_models(path):
     Arguments:
 
         * **path**:  Path to a python module containing a list of
-            `quantgov.estimator.CandidateModel` objects in a module-level
+            `quantgov.ml.CandidateModel` objects in a module-level
     """
     path = Path(path).resolve()
     try:
diff --git a/quantgov/corpus/builtins.py b/quantgov/nlp.py
similarity index 91%
rename from quantgov/corpus/builtins.py
rename to quantgov/nlp.py
index 5a5e42c..8022fd8 100644
--- a/quantgov/corpus/builtins.py
+++ b/quantgov/nlp.py
@@ -1,12 +1,13 @@
 """
-quantgov.corpora.builtins: Functions for analyzing a single Document
+quantgov.nlp: Text-based analysis of documents
 """
 import re
 import collections
 import math
 
 from decorator import decorator
-import quantgov
+
+from . import utils
 
 try:
     import nltk.corpus
@@ -22,10 +23,7 @@
 if NLTK:
     try:
         nltk.corpus.wordnet.ensure_loaded()
-        nltk.corpus.stopwords.ensure_loaded()
     except LookupError:
-        nltk.download('stopwords')
-        nltk.corpus.stopwords.ensure_loaded()
         nltk.download('wordnet')
         nltk.corpus.wordnet.ensure_loaded()
 
@@ -48,10 +46,10 @@ def check_textblob(func, *args, **kwargs):
 
 class WordCounter():
 
-    cli = quantgov.utils.CLISpec(
+    cli = utils.CLISpec(
         help='Word Counter',
         arguments=[
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('--word_pattern', '-wp'),
                 kwargs={
                     'help': 'regular expression defining a "word"',
@@ -76,17 +74,17 @@ def process_document(doc, word_pattern):
 
 class OccurrenceCounter():
 
-    cli = quantgov.utils.CLISpec(
+    cli = utils.CLISpec(
         help="Term Counter for Specific Words",
         arguments=[
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('terms'),
                 kwargs={
                     'help': 'list of terms to be counted',
                     'nargs': '+'
                 }
             ),
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('--total_label'),
                 kwargs={
                     'metavar': 'LABEL',
@@ -96,7 +94,7 @@ class OccurrenceCounter():
                     ),
                 }
             ),
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('--pattern'),
                 kwargs={
                     'help': 'pattern to use in identifying words',
@@ -134,10 +132,10 @@ def process_document(doc, terms, pattern, total_label):
 
 class ShannonEntropy():
     lemmas = {}
-    cli = quantgov.utils.CLISpec(
+    cli = utils.CLISpec(
         help='Shannon Entropy',
         arguments=[
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('--word_pattern', '-wp'),
                 kwargs={
                     'help': 'regular expression defining a "word"',
@@ -145,7 +143,7 @@ class ShannonEntropy():
                     'default': re.compile(r'\b\w+\b')
                 }
             ),
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('--stopwords', '-sw'),
                 kwargs={
                     'help': 'stopwords to ignore',
@@ -155,7 +153,7 @@ class ShannonEntropy():
                     )
                 }
             ),
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('--precision'),
                 kwargs={
                     'help': 'decimal places to round',
@@ -200,7 +198,7 @@ def lemmatize(word):
 
 
 class ConditionalCounter():
-    cli = quantgov.utils.CLISpec(
+    cli = utils.CLISpec(
         help=('Count conditional words and phrases. Included terms are: '
               ' "if", "but", "except", "provided", "when", "where", '
               '"whenever", "unless", "notwithstanding", "in the event", '
@@ -228,10 +226,10 @@ def process_document(doc):
 
 class SentenceLength():
 
-    cli = quantgov.utils.CLISpec(
+    cli = utils.CLISpec(
         help='Sentence Length',
         arguments=[
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('--precision'),
                 kwargs={
                     'help': 'decimal places to round',
@@ -266,17 +264,17 @@ def process_document(doc, precision):
 
 class SentimentAnalysis():
 
-    cli = quantgov.utils.CLISpec(
+    cli = utils.CLISpec(
         help='Performs sentiment analysis on the text',
         arguments=[
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('--backend'),
                 kwargs={
                     'help': 'which program to use for the analysis',
                     'default': 'textblob'
                 }
             ),
-            quantgov.utils.CLIArg(
+            utils.CLIArg(
                 flags=('--precision'),
                 kwargs={
                     'help': 'decimal places to round',
diff --git a/quantgov/project/__init__.py b/quantgov/project/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/quantgov/project/builtins.py b/quantgov/project/builtins.py
deleted file mode 100644
index e69de29..0000000
diff --git a/setup.py b/setup.py
index eb1c880..672fc3f 100644
--- a/setup.py
+++ b/setup.py
@@ -58,7 +58,6 @@ def find_version(*file_paths):
         'requests',
         'scikit-learn',
         'scipy',
-        'snakemake',
     ],
     extras_require={
         'testing': ['pytest-flake8'],
diff --git a/tests/pseudo_corpus/driver.py b/tests/pseudo_corpus/driver.py
index 45d8980..3b00174 100644
--- a/tests/pseudo_corpus/driver.py
+++ b/tests/pseudo_corpus/driver.py
@@ -2,7 +2,7 @@
 
 from pathlib import Path
 
-driver = quantgov.corpora.RecursiveDirectoryCorpusDriver(
+driver = quantgov.corpus.RecursiveDirectoryCorpusDriver(
     directory=Path(__file__).parent.joinpath('data', 'clean'),
     index_labels=('file',)
 )
diff --git a/tests/pseudo_estimator/data/binary.qge b/tests/pseudo_estimator/data/binary.qge
new file mode 100644
index 0000000..ccf9ffa
Binary files /dev/null and b/tests/pseudo_estimator/data/binary.qge differ
diff --git a/tests/pseudo_estimator/data/model.pickle b/tests/pseudo_estimator/data/model.pickle
deleted file mode 100644
index 2ffaac6..0000000
Binary files a/tests/pseudo_estimator/data/model.pickle and /dev/null differ
diff --git a/tests/pseudo_estimator/data/modelmulticlass.pickle b/tests/pseudo_estimator/data/modelmulticlass.pickle
deleted file mode 100644
index 2071d94..0000000
Binary files a/tests/pseudo_estimator/data/modelmulticlass.pickle and /dev/null differ
diff --git a/tests/pseudo_estimator/data/multiclass.qge b/tests/pseudo_estimator/data/multiclass.qge
new file mode 100644
index 0000000..1f47c87
Binary files /dev/null and b/tests/pseudo_estimator/data/multiclass.qge differ
diff --git a/tests/pseudo_estimator/data/vectorizer.pickle b/tests/pseudo_estimator/data/vectorizer.pickle
deleted file mode 100644
index 0fdaee6..0000000
Binary files a/tests/pseudo_estimator/data/vectorizer.pickle and /dev/null differ
diff --git a/tests/test_estimator.py b/tests/test_ml.py
similarity index 68%
rename from tests/test_estimator.py
rename to tests/test_ml.py
index 07876d5..042ddbb 100644
--- a/tests/test_estimator.py
+++ b/tests/test_ml.py
@@ -1,5 +1,5 @@
 import pytest
-import quantgov.estimator
+import quantgov.ml
 import subprocess
 
 from pathlib import Path
@@ -21,9 +21,8 @@ def check_output(cmd):
 
 def test_simple_estimator():
     output = check_output(
-        ['quantgov', 'estimator', 'estimate',
-         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'vectorizer.pickle')),
-         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'model.pickle')),
+        ['quantgov', 'ml', 'estimate',
+         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'binary.qge')),
          str(PSEUDO_CORPUS_PATH)]
     )
     assert output == 'file,is_world\ncfr,False\nmoby,False\n'
@@ -31,9 +30,8 @@ def test_simple_estimator():
 
 def test_probability_estimator():
     output = check_output(
-        ['quantgov', 'estimator', 'estimate',
-         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'vectorizer.pickle')),
-         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'model.pickle')),
+        ['quantgov', 'ml', 'estimate',
+         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'binary.qge')),
          str(PSEUDO_CORPUS_PATH), '--probability']
     )
     assert output == ('file,is_world_prob\ncfr,0.0899\nmoby,0.0216\n')
@@ -41,9 +39,8 @@ def test_probability_estimator():
 
 def test_probability_estimator_6decimals():
     output = check_output(
-        ['quantgov', 'estimator', 'estimate',
-         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'vectorizer.pickle')),
-         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'model.pickle')),
+        ['quantgov', 'ml', 'estimate',
+         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'binary.qge')),
          str(PSEUDO_CORPUS_PATH), '--probability', '--precision', '6']
     )
     assert output == ('file,is_world_prob\ncfr,0.089898\nmoby,0.02162\n')
@@ -51,9 +48,8 @@ def test_probability_estimator_6decimals():
 
 def test_multiclass_probability_estimator():
     output = check_output(
-        ['quantgov', 'estimator', 'estimate',
-         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'vectorizer.pickle')),
-         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'modelmulticlass.pickle')),
+        ['quantgov', 'ml', 'estimate',
+         str(PSEUDO_ESTIMATOR_PATH.joinpath('data', 'multiclass.qge')),
          str(PSEUDO_CORPUS_PATH), '--probability']
     )
     assert output == ('file,class,probability\n'
diff --git a/tests/test_corpora.py b/tests/test_nlp.py
similarity index 82%
rename from tests/test_corpora.py
rename to tests/test_nlp.py
index 488fca9..db14cf5 100644
--- a/tests/test_corpora.py
+++ b/tests/test_nlp.py
@@ -35,7 +35,7 @@ def build_index_corpus(directory):
     with index_path.open('w', encoding='utf-8') as outf:
         outf.write('letter,number,path\n')
         outf.write('\n'.join(','.join(row) for row in rows))
-    return quantgov.corpora.IndexDriver(str(index_path))
+    return quantgov.corpus.IndexDriver(str(index_path))
 
 
 def build_s3_corpus(directory):
@@ -49,8 +49,8 @@ def build_s3_corpus(directory):
     with index_path.open('w', encoding='utf-8') as outf:
         outf.write('letter,number,path\n')
         outf.write('\n'.join(','.join(row) for row in rows))
-    return quantgov.corpora.S3Driver(str(index_path),
-                                     bucket='quantgov-databanks')
+    return quantgov.corpus.S3Driver(str(index_path),
+                                    bucket='quantgov-databanks')
 
 
 BUILDERS = {
@@ -107,14 +107,14 @@ def check_output(cmd):
 
 def test_wordcount():
     output = check_output(
-        ['quantgov', 'corpus', 'count_words', str(PSEUDO_CORPUS_PATH)],
+        ['quantgov', 'nlp', 'count_words', str(PSEUDO_CORPUS_PATH)],
     )
     assert output == 'file,words\ncfr,349153\nmoby,216645\n'
 
 
 def test_wordcount_pattern():
     output = check_output(
-        ['quantgov', 'corpus', 'count_words', str(PSEUDO_CORPUS_PATH),
+        ['quantgov', 'nlp', 'count_words', str(PSEUDO_CORPUS_PATH),
          '--word_pattern', '\S+']
     )
     assert output == 'file,words\ncfr,333237\nmoby,210130\n'
@@ -122,7 +122,7 @@ def test_wordcount_pattern():
 
 def test_termcount():
     output = check_output(
-        ['quantgov', 'corpus', 'count_occurrences', str(PSEUDO_CORPUS_PATH),
+        ['quantgov', 'nlp', 'count_occurrences', str(PSEUDO_CORPUS_PATH),
          'shall'],
     )
     assert output == 'file,shall\ncfr,1946\nmoby,94\n'
@@ -130,7 +130,7 @@ def test_termcount():
 
 def test_termcount_multiple():
     output = check_output(
-        ['quantgov', 'corpus', 'count_occurrences', str(PSEUDO_CORPUS_PATH),
+        ['quantgov', 'nlp', 'count_occurrences', str(PSEUDO_CORPUS_PATH),
          'shall', 'must', 'may not'],
     )
     assert output == ('file,shall,must,may not\n'
@@ -139,7 +139,7 @@ def test_termcount_multiple():
 
 def test_termcount_multiple_with_label():
     output = check_output(
-        ['quantgov', 'corpus', 'count_occurrences', str(PSEUDO_CORPUS_PATH),
+        ['quantgov', 'nlp', 'count_occurrences', str(PSEUDO_CORPUS_PATH),
          'shall', 'must', 'may not', '--total_label', 'allofthem'],
     )
     assert output == ('file,shall,must,may not,allofthem\n'
@@ -148,14 +148,14 @@ def test_termcount_multiple_with_label():
 
 def test_shannon_entropy():
     output = check_output(
-        ['quantgov', 'corpus', 'shannon_entropy', str(PSEUDO_CORPUS_PATH)],
+        ['quantgov', 'nlp', 'shannon_entropy', str(PSEUDO_CORPUS_PATH)],
     )
     assert output == 'file,shannon_entropy\ncfr,10.71\nmoby,11.81\n'
 
 
 def test_shannon_entropy_no_stopwords():
     output = check_output(
-        ['quantgov', 'corpus', 'shannon_entropy', str(PSEUDO_CORPUS_PATH),
+        ['quantgov', 'nlp', 'shannon_entropy', str(PSEUDO_CORPUS_PATH),
          '--stopwords', 'None'],
     )
     assert output == 'file,shannon_entropy\ncfr,9.52\nmoby,10.03\n'
@@ -163,7 +163,7 @@ def test_shannon_entropy_no_stopwords():
 
 def test_shannon_entropy_4decimals():
     output = check_output(
-        ['quantgov', 'corpus', 'shannon_entropy', str(PSEUDO_CORPUS_PATH),
+        ['quantgov', 'nlp', 'shannon_entropy', str(PSEUDO_CORPUS_PATH),
          '--precision', '4'],
     )
     assert output == 'file,shannon_entropy\ncfr,10.7127\nmoby,11.813\n'
@@ -171,21 +171,21 @@ def test_shannon_entropy_4decimals():
 
 def test_conditionalcount():
     output = check_output(
-        ['quantgov', 'corpus', 'count_conditionals', str(PSEUDO_CORPUS_PATH)],
+        ['quantgov', 'nlp', 'count_conditionals', str(PSEUDO_CORPUS_PATH)],
     )
     assert output == 'file,conditionals\ncfr,2132\nmoby,2374\n'
 
 
 def test_sentencelength():
     output = check_output(
-        ['quantgov', 'corpus', 'sentence_length', str(PSEUDO_CORPUS_PATH)],
+        ['quantgov', 'nlp', 'sentence_length', str(PSEUDO_CORPUS_PATH)],
     )
     assert output == 'file,sentence_length\ncfr,18.68\nmoby,25.09\n'
 
 
 def test_sentencelength_4decimals():
     output = check_output(
-        ['quantgov', 'corpus', 'sentence_length', str(PSEUDO_CORPUS_PATH),
+        ['quantgov', 'nlp', 'sentence_length', str(PSEUDO_CORPUS_PATH),
          '--precision', '4'],
     )
     assert output == 'file,sentence_length\ncfr,18.6827\nmoby,25.0936\n'
@@ -193,7 +193,7 @@ def test_sentencelength_4decimals():
 
 def test_sentiment_analysis():
     output = check_output(
-        ['quantgov', 'corpus', 'sentiment_analysis', str(PSEUDO_CORPUS_PATH)],
+        ['quantgov', 'nlp', 'sentiment_analysis', str(PSEUDO_CORPUS_PATH)],
     )
     assert output == ('file,sentiment_polarity,sentiment_subjectivity'
                       '\ncfr,0.01,0.42\nmoby,0.08,0.48\n')
@@ -201,7 +201,7 @@ def test_sentiment_analysis():
 
 def test_sentiment_analysis_4decimals():
     output = check_output(
-        ['quantgov', 'corpus', 'sentiment_analysis', str(PSEUDO_CORPUS_PATH),
+        ['quantgov', 'nlp', 'sentiment_analysis', str(PSEUDO_CORPUS_PATH),
          '--precision', '4'],
     )
     assert output == ('file,sentiment_polarity,sentiment_subjectivity'