From 4f4e4c2e6e5c776a13ec93e1358757973aa20bf2 Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 11:29:13 -0400
Subject: [PATCH 01/15] If we require lxml, remove the internal etree logic

---
 thredds_crawler/crawl.py |  2 +-
 thredds_crawler/etree.py | 20 --------------------
 2 files changed, 1 insertion(+), 21 deletions(-)
 delete mode 100644 thredds_crawler/etree.py

diff --git a/thredds_crawler/crawl.py b/thredds_crawler/crawl.py
index 7efc75e..806a5c4 100644
--- a/thredds_crawler/crawl.py
+++ b/thredds_crawler/crawl.py
@@ -1,4 +1,3 @@
-from thredds_crawler.etree import etree
 try:
     import urlparse
     from urllib import quote_plus
@@ -11,6 +10,7 @@
 import re
 from datetime import datetime
 import pytz
+from lxml import etree
 from thredds_crawler.utils import construct_url
 from dateutil.parser import parse
 import multiprocessing as mp
diff --git a/thredds_crawler/etree.py b/thredds_crawler/etree.py
deleted file mode 100644
index 4dfea6d..0000000
--- a/thredds_crawler/etree.py
+++ /dev/null
@@ -1,20 +0,0 @@
-try:
-    from lxml import etree
-except ImportError:
-    try:
-        # Python 2.5
-        import xml.etree.cElementTree as etree
-    except ImportError:
-        try:
-            # Python 2.5
-            import xml.etree.ElementTree as etree
-        except ImportError:
-            try:
-                # normal cElementTree install
-                import cElementTree as etree
-            except ImportError:
-                try:
-                    # normal ElementTree install
-                    import elementtree.ElementTree as etree
-                except ImportError:
-                    raise RuntimeError('You need either lxml or ElementTree')

From 969c374fe0c2e6f232e7dda819a0547c1bed868e Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 11:29:21 -0400
Subject: [PATCH 02/15] Fix testing URL

---
 tests/test_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index 7ad1c08..431f65b 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -33,7 +33,7 @@ def test_regex_skips(self):
         assert len(c.datasets) == 0
 
     def test_iso_links(self):
-        c = Crawl("http://thredds.axiomalaska.com/thredds/catalogs/global.html", debug=True)
+        c = Crawl("http://thredds.axiomdatascience.com/thredds/global.html", debug=True)
         isos = [s.get("url") for d in c.datasets for s in d.services if s.get("service").lower() == "iso"]
         assert "?dataset=" in isos[0]
         assert "&catalog=" in isos[0]

From 8fe6d56bf7ae0dae00c29237fd7101931ab8ff2f Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 11:29:35 -0400
Subject: [PATCH 03/15] Don't swallow logging during testing

---
 thredds_crawler/crawl.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/thredds_crawler/crawl.py b/thredds_crawler/crawl.py
index 806a5c4..25be118 100644
--- a/thredds_crawler/crawl.py
+++ b/thredds_crawler/crawl.py
@@ -28,7 +28,6 @@ class NullHandler(logging.Handler):
         def emit(self, record):
             pass
 logger = logging.getLogger("thredds_crawler")
-logger.addHandler(NullHandler())
 
 
 def request_xml(url):
@@ -66,6 +65,8 @@ def __init__(self, catalog_url, select=None, skip=None, before=None, after=None,
             formatter = logging.Formatter('%(asctime)s - [%(levelname)s] %(message)s')
             ch.setFormatter(formatter)
             logger.addHandler(ch)
+        else:
+            logger.addHandler(NullHandler())
 
         # Only process these dataset IDs
         if select is not None:

From 778b1f442fe7b35b8d155f7f8e1e9e285eee7fea Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 11:30:33 -0400
Subject: [PATCH 04/15] Provide better error messages when a LeadDataset fails

---
 thredds_crawler/crawl.py | 85 +++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/thredds_crawler/crawl.py b/thredds_crawler/crawl.py
index 25be118..2e04e25 100644
--- a/thredds_crawler/crawl.py
+++ b/thredds_crawler/crawl.py
@@ -246,49 +246,52 @@ def __init__(self, dataset_url):
         except etree.XMLSyntaxError:
             logger.error("Error procesing %s, invalid XML" % dataset_url)
         else:
-            dataset = tree.find("{%s}dataset" % INV_NS)
-            self.id = dataset.get("ID")
-            self.name = dataset.get("name")
-            self.metadata = dataset.find("{%s}metadata" % INV_NS)
-            self.catalog_url = dataset_url.split("?")[0]
-
-            # Data Size - http://www.unidata.ucar.edu/software/thredds/current/tds/catalog/InvCatalogSpec.html#dataSize
-            data_size = dataset.find("{%s}dataSize" % INV_NS)
-            if data_size is not None:
-                self.data_size = float(data_size.text)
-                data_units = data_size.get('units')
-                # Convert to MB
-                if data_units == "bytes":
-                    self.data_size *= 1e-6
-                elif data_units == "Kbytes":
-                    self.data_size *= 0.001
-                elif data_units == "Gbytes":
-                    self.data_size /= 0.001
-                elif data_units == "Tbytes":
-                    self.data_size /= 1e-6
-
-            # Services
-            service_tag = dataset.find("{%s}serviceName" % INV_NS)
-            if service_tag is None:
-                service_tag = self.metadata.find("{%s}serviceName" % INV_NS)
-            service_name = service_tag.text
-
-            for service in tree.findall(".//{%s}service[@name='%s']" % (INV_NS, service_name)):
-                if service.get("serviceType") == "Compound":
-                    for s in service.findall("{%s}service" % INV_NS):
-                        url = construct_url(dataset_url, s.get('base')) + dataset.get("urlPath")
-                        if s.get("suffix") is not None:
-                            url += s.get("suffix")
+            try:
+                dataset = tree.find("{%s}dataset" % INV_NS)
+                self.id = dataset.get("ID")
+                self.name = dataset.get("name")
+                self.metadata = dataset.find("{%s}metadata" % INV_NS)
+                self.catalog_url = dataset_url.split("?")[0]
+
+                # Data Size - http://www.unidata.ucar.edu/software/thredds/current/tds/catalog/InvCatalogSpec.html#dataSize
+                data_size = dataset.find("{%s}dataSize" % INV_NS)
+                if data_size is not None:
+                    self.data_size = float(data_size.text)
+                    data_units = data_size.get('units')
+                    # Convert to MB
+                    if data_units == "bytes":
+                        self.data_size *= 1e-6
+                    elif data_units == "Kbytes":
+                        self.data_size *= 0.001
+                    elif data_units == "Gbytes":
+                        self.data_size /= 0.001
+                    elif data_units == "Tbytes":
+                        self.data_size /= 1e-6
+
+                # Services
+                service_tag = dataset.find("{%s}serviceName" % INV_NS)
+                if service_tag is None:
+                    service_tag = self.metadata.find("{%s}serviceName" % INV_NS)
+                service_name = service_tag.text
+
+                for service in tree.findall(".//{%s}service[@name='%s']" % (INV_NS, service_name)):
+                    if service.get("serviceType") == "Compound":
+                        for s in service.findall("{%s}service" % INV_NS):
+                            url = construct_url(dataset_url, s.get('base')) + dataset.get("urlPath")
+                            if s.get("suffix") is not None:
+                                url += s.get("suffix")
+                            # ISO like services need additional parameters
+                            if s.get('name') in ["iso", "ncml", "uddc"]:
+                                url += "?dataset=%s&catalog=%s" % (self.id, quote_plus(self.catalog_url))
+                            self.services.append( {'name' : s.get('name'), 'service' : s.get('serviceType'), 'url' : url } )
+                    else:
+                        url = construct_url(dataset_url, service.get('base')) + dataset.get("urlPath") + service.get("suffix", "")
                         # ISO like services need additional parameters
-                        if s.get('name') in ["iso", "ncml", "uddc"]:
+                        if service.get('name') in ["iso", "ncml", "uddc"]:
                             url += "?dataset=%s&catalog=%s" % (self.id, quote_plus(self.catalog_url))
-                        self.services.append( {'name' : s.get('name'), 'service' : s.get('serviceType'), 'url' : url } )
-                else:
-                    url = construct_url(dataset_url, service.get('base')) + dataset.get("urlPath") + service.get("suffix", "")
-                    # ISO like services need additional parameters
-                    if service.get('name') in ["iso", "ncml", "uddc"]:
-                        url += "?dataset=%s&catalog=%s" % (self.id, quote_plus(self.catalog_url))
-                    self.services.append( {'name' : service.get('name'), 'service' : service.get('serviceType'), 'url' : url } )
+                        self.services.append( {'name' : service.get('name'), 'service' : service.get('serviceType'), 'url' : url } )
+            except BaseException as e:
+                logger.error('Could not process {}. {}.'.format(dataset_url, e))
 
     @property
     def size(self):

From 07dfb88e5dec06d6385f34b7088bc648d7f1210e Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 11:30:52 -0400
Subject: [PATCH 05/15] Catch an error when there is no serviceName tag found
 on a LeafDataset

---
 thredds_crawler/crawl.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/thredds_crawler/crawl.py b/thredds_crawler/crawl.py
index 2e04e25..b460c32 100644
--- a/thredds_crawler/crawl.py
+++ b/thredds_crawler/crawl.py
@@ -272,6 +272,8 @@ def __init__(self, dataset_url):
                 service_tag = dataset.find("{%s}serviceName" % INV_NS)
                 if service_tag is None:
                     service_tag = self.metadata.find("{%s}serviceName" % INV_NS)
+                    if service_tag is None:
+                        raise ValueError("No serviceName definition found!")
                 service_name = service_tag.text
 
                 for service in tree.findall(".//{%s}service[@name='%s']" % (INV_NS, service_name)):

From 80fd2d24ef06f2c06fd86dd6e76c885f51addf46 Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 11:31:07 -0400
Subject: [PATCH 06/15] Add a test for Unidata's motherload catalog

---
 tests/test_crawler.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index 431f65b..5e692a9 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -71,3 +71,23 @@ def test_modified_time(self):
     def test_ssl(self):
         c = Crawl("https://opendap.co-ops.nos.noaa.gov/thredds/catalog/NOAA/DBOFS/MODELS/201501/catalog.xml", debug=True)
         assert len(c.datasets) > 0
+
+    def test_unidata_parse(self):
+        selects = [".*Best.*"]
+        skips   = Crawl.SKIPS  + [".*grib2", ".*grib1", ".*GrbF.*", ".*ncx2",
+                                  "Radar Data", "Station Data",
+                                  "Point Feature Collections", "Satellite Data",
+                                  "Unidata NEXRAD Composites \(GINI\)",
+                                  "Unidata case studies",
+                                  ".*Reflectivity-[0-9]{8}"]
+        c = Crawl(
+            'http://thredds.ucar.edu/thredds/catalog.xml',
+            select=selects,
+            skip=skips,
+            debug=True
+        )
+
+        assert len(c.datasets) > 0
+
+        isos = [(d.id, s.get("url")) for d in c.datasets for s in d.services if s.get("service").lower() == "iso"]
+        assert len(isos) > 0

From 44d2a2bdb3ded6808e81aa0ed3f58ff6c3be1cc4 Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 11:50:14 -0400
Subject: [PATCH 07/15] Add auto-tag-release plugin to travis

---
 .travis.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 9fd187d..2452bfb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,3 +30,11 @@ install:
   - conda install -c conda-forge pytest
 
 script: py.test -rx -v
+
+deploy:
+  provider: releases
+  api_key:
+    secure: XAx2aeocMQWn2acXcQ5LevsO977glpvPKOnk/2yafHTMd+VROVy8jZjsVTTwOEhzag2xOYgTyDYbX5PRT2uG2Uz/RPwJA0PbB+9NIiT1gvHZ/sfFEm7AfOQ257I2IL72ZGUuSZoa0I1pZnIFaew84FZGQ/jsNtfWZzo1veXI6A0=
+  on:
+    tags: true
+    repo: ioos/thredds_crawler

From 0bb582553976149751ce16aa47d5191b2e1a5ff6 Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 11:52:05 -0400
Subject: [PATCH 08/15] Move tests folder under thredds_crawler to avoid a top
 level tests module

---
 {tests => thredds_crawler/tests}/__init__.py     | 0
 {tests => thredds_crawler/tests}/test_crawler.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename {tests => thredds_crawler/tests}/__init__.py (100%)
 rename {tests => thredds_crawler/tests}/test_crawler.py (100%)

diff --git a/tests/__init__.py b/thredds_crawler/tests/__init__.py
similarity index 100%
rename from tests/__init__.py
rename to thredds_crawler/tests/__init__.py
diff --git a/tests/test_crawler.py b/thredds_crawler/tests/test_crawler.py
similarity index 100%
rename from tests/test_crawler.py
rename to thredds_crawler/tests/test_crawler.py

From b28b385aa6814ad529d29c9e24ce4a74ac9021e9 Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 12:11:51 -0400
Subject: [PATCH 09/15] Don't duplicate logs when testing

---
 thredds_crawler/crawl.py              |  4 ++--
 thredds_crawler/tests/test_crawler.py | 23 +++++++++++++----------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/thredds_crawler/crawl.py b/thredds_crawler/crawl.py
index b460c32..1bc0879 100644
--- a/thredds_crawler/crawl.py
+++ b/thredds_crawler/crawl.py
@@ -8,6 +8,7 @@
 import os
 import sys
 import re
+import logging
 from datetime import datetime
 import pytz
 from lxml import etree
@@ -18,7 +19,6 @@
 INV_NS = "http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0"
 XLINK_NS = "http://www.w3.org/1999/xlink"
 
-import logging
 try:
     # Python >= 2.7
     from logging import NullHandler
@@ -27,7 +27,7 @@
     class NullHandler(logging.Handler):
         def emit(self, record):
             pass
-logger = logging.getLogger("thredds_crawler")
+logger = logging.getLogger(__name__)
 
 
 def request_xml(url):
diff --git a/thredds_crawler/tests/test_crawler.py b/thredds_crawler/tests/test_crawler.py
index 5e692a9..f5e8530 100644
--- a/thredds_crawler/tests/test_crawler.py
+++ b/thredds_crawler/tests/test_crawler.py
@@ -2,8 +2,12 @@
 from datetime import datetime, timedelta
 
 import pytz
+import logging
 
 from thredds_crawler.crawl import Crawl
+logger = logging.getLogger('thredds_crawler')
+logger.setLevel(logging.DEBUG)
+logger.handlers = [logging.StreamHandler()]
 
 
 class CrawlerTest(unittest.TestCase):
@@ -33,43 +37,43 @@ def test_regex_skips(self):
         assert len(c.datasets) == 0
 
     def test_iso_links(self):
-        c = Crawl("http://thredds.axiomdatascience.com/thredds/global.html", debug=True)
+        c = Crawl("http://thredds.axiomdatascience.com/thredds/global.html")
         isos = [s.get("url") for d in c.datasets for s in d.services if s.get("service").lower() == "iso"]
         assert "?dataset=" in isos[0]
         assert "&catalog=" in isos[0]
 
     def test_dataset_size_using_xml(self):
-        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Composites-1Day/2014/catalog.xml", debug=True)
+        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Composites-1Day/2014/catalog.xml")
         self.assertIsNotNone(c.datasets[0].size)
 
     def test_dataset_size_using_dap(self):
-        c = Crawl("http://tds.maracoos.org/thredds/MODIS.xml", select=["MODIS-One-Agg"], debug=True)
+        c = Crawl("http://tds.maracoos.org/thredds/MODIS.xml", select=["MODIS-One-Agg"])
         self.assertIsNotNone(c.datasets[0].size)
 
     def test_modified_time(self):
         # after with timezone
         af = datetime(2015, 12, 30, 0, 0, tzinfo=pytz.utc)
-        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2015/catalog.xml", after=af, debug=True)
+        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2015/catalog.xml", after=af)
         assert len(c.datasets) == 3
 
         # after without timezone
         af = datetime(2015, 12, 30, 0, 0)
-        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2015/catalog.xml", after=af, debug=True)
+        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2015/catalog.xml", after=af)
         assert len(c.datasets) == 3
 
         # before
         bf = datetime(2016, 1, 5, 0, 0)
-        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2016/catalog.xml", before=bf, debug=True)
+        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2016/catalog.xml", before=bf)
         assert len(c.datasets) == 3
 
         # both
         af = datetime(2016, 1, 20, 0, 0)
         bf = datetime(2016, 2, 1, 0, 0)
-        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2016/catalog.xml", before=bf, after=af, debug=True)
+        c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2016/catalog.xml", before=bf, after=af)
         assert len(c.datasets) == 11
 
     def test_ssl(self):
-        c = Crawl("https://opendap.co-ops.nos.noaa.gov/thredds/catalog/NOAA/DBOFS/MODELS/201501/catalog.xml", debug=True)
+        c = Crawl("https://opendap.co-ops.nos.noaa.gov/thredds/catalog/NOAA/DBOFS/MODELS/201501/catalog.xml")
         assert len(c.datasets) > 0
 
     def test_unidata_parse(self):
@@ -83,8 +87,7 @@ def test_unidata_parse(self):
         c = Crawl(
             'http://thredds.ucar.edu/thredds/catalog.xml',
             select=selects,
-            skip=skips,
-            debug=True
+            skip=skips
         )
 
         assert len(c.datasets) > 0

From 5a36f00379d2fd01fe7639b6e7bef2b23a5dd505 Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 12:12:09 -0400
Subject: [PATCH 10/15] Don't show SSL warnings

---
 thredds_crawler/crawl.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/thredds_crawler/crawl.py b/thredds_crawler/crawl.py
index 1bc0879..8a8fd40 100644
--- a/thredds_crawler/crawl.py
+++ b/thredds_crawler/crawl.py
@@ -5,6 +5,7 @@
     from urllib import parse as urlparse
     from urllib.parse import quote_plus
 import requests
+from requests.packages.urllib3.exceptions import InsecureRequestWarning
 import os
 import sys
 import re
@@ -18,6 +19,7 @@
 
 INV_NS = "http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0"
 XLINK_NS = "http://www.w3.org/1999/xlink"
+requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
 
 try:
     # Python >= 2.7

From 71bb0fb656474d7d6c529b6861a4dc0e76744af9 Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 12:12:54 -0400
Subject: [PATCH 11/15] Add a conda-recipe and test it in Travis

---
 .travis.yml            |  6 +++++-
 conda-recipe/meta.yaml | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 conda-recipe/meta.yaml

diff --git a/.travis.yml b/.travis.yml
index 2452bfb..30fca7a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -29,7 +29,11 @@ install:
   - conda install -c conda-forge netCDF4  # For optional DAP file size calculation
   - conda install -c conda-forge pytest
 
-script: py.test -rx -v
+script:
+  - py.test -rx -v
+  - conda install -n root conda-build anaconda-client
+  - conda build conda-recipe --python $TRAVIS_PYTHON_VERSION
+  - conda install thredds_crawler --use-local
 
 deploy:
   provider: releases
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
new file mode 100644
index 0000000..36bf6e5
--- /dev/null
+++ b/conda-recipe/meta.yaml
@@ -0,0 +1,33 @@
+package:
+    name: thredds_crawler
+    version: "1.5.1"
+
+source:
+    path: ../
+
+build:
+    number: 0
+    script: python setup.py install --single-version-externally-managed --record=record.txt
+
+requirements:
+    build:
+        - python
+        - setuptools
+        - requests
+        - lxml
+        - pytz
+    run:
+        - python
+        - requests
+        - lxml
+        - netcdf4
+        - pytz
+
+test:
+    imports:
+        - thredds_crawler
+
+about:
+    home: https://github.com/ioos/thredds_crawler
+    license:  MIT License
+    summary: 'A Python library for crawling THREDDS servers'

From a9813999a22f96a1e1802ecc5d106da8935704d2 Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 12:15:32 -0400
Subject: [PATCH 12/15] Ignore pytest meta folders

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index c724ba7..99b69ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
 dist
 build
 *.sublime*
+.cache
+__pycache__

From 103b28978961fe7b87dfef67382b6e1c08486f3a Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 12:15:46 -0400
Subject: [PATCH 13/15] Bump to 1.5.2

---
 VERSION                     | 2 +-
 conda-recipe/meta.yaml      | 2 +-
 thredds_crawler/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/VERSION b/VERSION
index 26ca594..4cda8f1 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.5.1
+1.5.2
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 36bf6e5..90dbcd6 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -1,6 +1,6 @@
 package:
     name: thredds_crawler
-    version: "1.5.1"
+    version: "1.5.2"
 
 source:
     path: ../
diff --git a/thredds_crawler/__init__.py b/thredds_crawler/__init__.py
index 77f1c8e..c3b3841 100644
--- a/thredds_crawler/__init__.py
+++ b/thredds_crawler/__init__.py
@@ -1 +1 @@
-__version__ = '1.5.0'
+__version__ = '1.5.2'

From aa767b4ad2d767aa1f79c3387cddd41b5e942d3f Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 16:06:16 -0400
Subject: [PATCH 14/15] Imrove the readability of the README code samples

---
 README.md | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 0e47eb4..e0fdcb2 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,4 @@
-thredds_crawler
-===============
+# thredds_crawler
 
 [![Build Status](https://travis-ci.org/ioos/thredds_crawler.svg?branch=master)](https://travis-ci.org/ioos/thredds_crawler)
 
@@ -26,7 +25,7 @@ You can select datasets based on their THREDDS ID using the 'select' parameter.
 
 ```python
 from thredds_crawler.crawl import Crawl
-c = Crawl("http://tds.maracoos.org/thredds/MODIS.xml", select=[".*-Agg"])
+c = Crawl('http://tds.maracoos.org/thredds/MODIS.xml', select=[".*-Agg"])
 print c.datasets
 [
   <LeafDataset id: MODIS-Agg, name: MODIS-Complete Aggregation, services: ['OPENDAP', 'ISO']>,
@@ -74,7 +73,11 @@ If you need to remove or add a new `skip`, it is **strongly** encouraged you use
 ```python
 from thredds_crawler.crawl import Crawl
 skips = Crawl.SKIPS + [".*-Day-Aggregation"]
-c = Crawl("http://tds.maracoos.org/thredds/MODIS.xml", select=[".*-Agg"], skip=skips)
+c = Crawl(
+  'http://tds.maracoos.org/thredds/MODIS.xml',
+  select=[".*-Agg"],
+  skip=skips
+)
 print c.datasets
 
 [
@@ -128,20 +131,22 @@ You can select data by the THREDDS `modified_time` by using a the `before` and `
 import pytz
 from thredds_crawler.crawl import Crawl
 
-# after
+bf = datetime(2016, 1, 5, 0, 0)
 af = datetime(2015, 12, 30, 0, 0, tzinfo=pytz.utc)
-c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2015/catalog.xml", after=af)
+url = 'http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2016/catalog.xml'
+
+# after
+c = Crawl(url, after=af)
 assert len(c.datasets) == 3
 
 # before
-bf = datetime(2016, 1, 5, 0, 0)
-c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2016/catalog.xml", before=bf)
+c = Crawl(url, before=bf)
 assert len(c.datasets) == 3
 
 # both
 af = datetime(2016, 1, 20, 0, 0)
 bf = datetime(2016, 2, 1, 0, 0)
-c = Crawl("http://tds.maracoos.org/thredds/catalog/MODIS-Chesapeake-Salinity/raw/2016/catalog.xml", before=bf, after=af)
+c = Crawl(url, before=bf, after=af)
 assert len(c.datasets) == 11
 ```
 
@@ -153,7 +158,12 @@ You can pass in a `debug=True` parameter to Crawl to log to STDOUT what is actua
 ```python
 from thredds_crawler.crawl import Crawl
 skips = Crawl.SKIPS + [".*-Day-Aggregation"]
-c = Crawl("http://tds.maracoos.org/thredds/MODIS.xml", select=[".*-Agg"], skip=skips, debug=True)
+c = Crawl(
+  'http://tds.maracoos.org/thredds/MODIS.xml',
+  select=['.*-Agg'],
+  skip=skips,
+  debug=True
+)
 
 Crawling: http://tds.maracoos.org/thredds/MODIS.xml
 Skipping catalogRef based on 'skips'.  Title: MODIS Individual Files
@@ -189,7 +199,7 @@ You can get some basic information about a LeafDataset, including the services a
 
 ```python
 from thredds_crawler.crawl import Crawl
-c = Crawl("http://tds.maracoos.org/thredds/MODIS.xml", select=[".*-Agg"])
+c = Crawl('http://tds.maracoos.org/thredds/MODIS.xml', select=['.*-Agg'])
 dataset = c.datasets[0]
 print dataset.id
 MODIS-Agg
@@ -214,7 +224,7 @@ If you have a list of datasets you can easily return all endpoints of a certain
 
 ```python
 from thredds_crawler.crawl import Crawl
-c = Crawl("http://tds.maracoos.org/thredds/MODIS.xml", select=[".*-Agg"])
+c = Crawl('http://tds.maracoos.org/thredds/MODIS.xml', select=['.*-Agg'])
 urls = [s.get("url") for d in c.datasets for s in d.services if s.get("service").lower() == "opendap"]
 print urls
 [
@@ -236,7 +246,10 @@ This isn't necessarialy the size on disk, because it does not account for `missi
 
 ```python
 from thredds_crawler.crawl import Crawl
-c = Crawl("http://thredds.axiomalaska.com/thredds/catalogs/cencoos.html", select=["MB_.*"])
+c = Crawl(
+  'http://thredds.axiomalaska.com/thredds/catalogs/cencoos.html',
+  select=['MB_.*']
+)
 sizes = [d.size for d in c.datasets]
 print sizes
 [29247.410283999998, 72166.289680000002]
@@ -249,7 +262,7 @@ The entire THREDDS catalog metadata record is saved along with the dataset objec
 
 ```python
 from thredds_crawler.crawl import Crawl
-c = Crawl("http://tds.maracoos.org/thredds/MODIS.xml", select=[".*-Agg"])
+c = Crawl('http://tds.maracoos.org/thredds/MODIS.xml', select=['.*-Agg'])
 dataset = c.datasets[0]
 print dataset.metadata.find("{http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0}documentation").text
 Ocean Color data are provided as a service to the broader community, and can be

From 6a6c67a8909a20e809e07476b036f6db50b71d1d Mon Sep 17 00:00:00 2001
From: Kyle Wilcox <kyle@axiomalaska.com>
Date: Mon, 3 Oct 2016 16:06:44 -0400
Subject: [PATCH 15/15] Adds authentication options. fixes #11.

---
 README.md                | 16 ++++++++++++++++
 thredds_crawler/crawl.py | 29 ++++++++++++++++-------------
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index e0fdcb2..7793932 100644
--- a/README.md
+++ b/README.md
@@ -151,6 +151,22 @@ assert len(c.datasets) == 11
 ```
 
 
+### Authentication
+
+You can pass an auth parameter as needed. It needs to be a [requests compatible auth object](http://docs.python-requests.org/en/latest/user/authentication/).
+
+```python
+from thredds_crawler.crawl import Crawl
+auth = ('user', 'password')
+c = Crawl(
+  'http://tds.maracoos.org/thredds/MODIS.xml',
+  select=['.*-Agg'],
+  skip=Crawl.SKIPS,
+  auth=auth
+)
+```
+
+
 ### Debugging
 
 You can pass in a `debug=True` parameter to Crawl to log to STDOUT what is actually happening.
diff --git a/thredds_crawler/crawl.py b/thredds_crawler/crawl.py
index 8a8fd40..c491ca4 100644
--- a/thredds_crawler/crawl.py
+++ b/thredds_crawler/crawl.py
@@ -32,32 +32,34 @@ def emit(self, record):
 logger = logging.getLogger(__name__)
 
 
-def request_xml(url):
+def request_xml(url, auth=None):
     '''
     Returns an etree.XMLRoot object loaded from the url
     :param str url: URL for the resource to load as an XML
     '''
     try:
-        r = requests.get(url, verify=False)
+        r = requests.get(url, auth=auth, verify=False)
         return r.text.encode('utf-8')
     except BaseException:
         logger.error("Skipping %s (error parsing the XML)" % url)
     return
 
 
-def make_leaf(url):
-    return LeafDataset(url)
+def make_leaf(url, auth):
+    return LeafDataset(url, auth=auth)
 
 
 class Crawl(object):
 
     SKIPS = [".*files.*", ".*Individual Files.*", ".*File_Access.*", ".*Forecast Model Run.*", ".*Constant Forecast Offset.*", ".*Constant Forecast Date.*"]
 
-    def __init__(self, catalog_url, select=None, skip=None, before=None, after=None, debug=None, workers=4):
+    def __init__(self, catalog_url, select=None, skip=None, before=None, after=None, debug=None, workers=None, auth=None):
         """
-        select: a list of dataset IDs. Python regex supported.
-        skip:   list of dataset names and/or a catalogRef titles.  Python regex supported.
+        :param select list: Dataset IDs. Python regex supported.
+        :param list skip: Dataset names and/or a catalogRef titles. Python regex supported.
+        :param requests.auth.AuthBase auth: requets auth object to use
         """
+        workers = workers or 4
         self.pool = mp.Pool(processes=workers)
 
         if debug is True:
@@ -105,9 +107,9 @@ def __init__(self, catalog_url, select=None, skip=None, before=None, after=None,
 
         self.visited  = []
         datasets = []
-        urls = list(self._run(url=catalog_url))
+        urls = list(self._run(url=catalog_url, auth=auth))
 
-        jobs = [self.pool.apply_async(make_leaf, args=(url,)) for url in urls]
+        jobs = [self.pool.apply_async(make_leaf, args=(url, auth)) for url in urls]
         datasets = [j.get() for j in jobs]
 
         self.datasets = [ x for x in datasets if x.id is not None ]
@@ -185,11 +187,12 @@ def _compile_references(self, url, tree):
             references.append(construct_url(url, ref.get("{%s}href" % XLINK_NS)))
         return references
 
-    def _run(self, url):
+    def _run(self, url, auth):
         '''
         Performs a multiprocess depth-first-search of the catalog references
         and yields a URL for each leaf dataset found
         :param str url: URL for the current catalog
+        :param requests.auth.AuthBase auth: requets auth object to use
         '''
         if url in self.visited:
             logger.debug("Skipping %s (already crawled)" % url)
@@ -200,7 +203,7 @@ def _run(self, url):
         url = self._get_catalog_url(url)
 
         # Get an etree object
-        xml_content = request_xml(url)
+        xml_content = request_xml(url, auth)
         for ds in self._build_catalog(url, xml_content):
             yield ds
 
@@ -232,7 +235,7 @@ def _build_catalog(self, url, xml_content):
 
 
 class LeafDataset(object):
-    def __init__(self, dataset_url):
+    def __init__(self, dataset_url, auth=None):
 
         self.services    = []
         self.id          = None
@@ -242,7 +245,7 @@ def __init__(self, dataset_url):
         self.data_size   = None
 
         # Get an etree object
-        r = requests.get(dataset_url, verify=False)
+        r = requests.get(dataset_url, auth=auth, verify=False)
         try:
             tree = etree.XML(r.text.encode('utf-8'))
         except etree.XMLSyntaxError: