-
-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #220 from pelias/download_adapter
abstract TIGER download source using adapter pattern
- Loading branch information
Showing
7 changed files
with
280 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
const JSFtp = require('jsftp'); | ||
|
||
class CensusFTP { | ||
constructor(){ | ||
this.client = new JSFtp({ host: 'ftp2.census.gov' }); | ||
this.prefix = '/geo/tiger/TIGER2016/ADDRFEAT'; | ||
} | ||
list(pattern, cb){ | ||
this.client.list(`${this.prefix}/${pattern}`, (err, res) => { | ||
if (err) { return cb(err); } | ||
// output of the list command looks like a typical ls command in unix | ||
// this line will split the output into lines, and from each line grab the end of the file | ||
// (all filenames are fixed length 27 chars) | ||
// then it will trim the names and filter out any empty ones | ||
let files = res.split('\n').map((file) => (file.substr(-27).trim())).filter((file) => (file.length > 0)); | ||
|
||
cb(null, files); | ||
}); | ||
} | ||
get(remoteFileName, localFilePath, cb){ | ||
this.client.get(`${this.prefix}/${remoteFileName}`, localFilePath, cb); | ||
} | ||
} | ||
|
||
module.exports = CensusFTP; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
const request = require('superagent'); | ||
const cheerio = require('cheerio'); | ||
const conform = /^tl_2016_(\d{5})_addrfeat\.zip$/; | ||
|
||
class CensusS3Mirror { | ||
constructor() { | ||
this.host = 'https://census-backup.s3.amazonaws.com'; | ||
this.prefix = '/tiger/2016/ADDRFEAT'; | ||
} | ||
list(pattern, cb) { | ||
|
||
// convert glob-style pattern to regex | ||
let regex = new RegExp('^' + pattern.replace(/\*/g, '.*').replace(/\?/g, '.') + '$'); | ||
|
||
request | ||
.get(`${this.host}${this.prefix}/index.html`) | ||
.set('accept', 'text/html') | ||
.end((err, res) => { | ||
if (err) { return cb(err); } | ||
if (res.status >= 400){ return cb(`status code: ${res.status}`); } | ||
|
||
// parse HTML | ||
const $ = cheerio.load(res.text); | ||
let links = $('a').map(function (i) { | ||
return $(this).attr('href'); | ||
}).get(); | ||
|
||
// remove path prefixes | ||
links = links.map(l => path.basename(l)); | ||
|
||
// filter by regex (to remove any other links on the page) | ||
links = links.filter(l => conform.test(l)); | ||
|
||
// apply pattern filter | ||
links = links.filter(l => regex.test(l)); | ||
|
||
cb(null, links); | ||
}); | ||
} | ||
get(remoteFileName, localFilePath, cb) { | ||
const sink = fs.createWriteStream(localFilePath); | ||
sink.on('finish', () => cb()); | ||
|
||
// download remote file to local file path | ||
request | ||
.get(`${this.host}${this.prefix}/${remoteFileName}`) | ||
.on('error', (error) => cb(error)) | ||
.pipe(sink); | ||
} | ||
} | ||
|
||
module.exports = CensusS3Mirror; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
const fs = require('fs'); | ||
const os = require('os'); | ||
const path = require('path'); | ||
const crypto = require('crypto'); | ||
const JSFtp = require('jsftp'); | ||
const CensusFTP = require('../../../../script/js/adapter/CensusFTP'); | ||
|
||
module.exports.tests = {}; | ||
|
||
module.exports.tests.constructor = function (test) { | ||
test('constructor', function (t) { | ||
const adapter = new CensusFTP(); | ||
t.true(adapter.client instanceof JSFtp); | ||
t.equal(typeof adapter.prefix, 'string'); | ||
adapter.client.socket.end(); | ||
t.end(); | ||
}); | ||
}; | ||
|
||
module.exports.tests.list = function (test) { | ||
const conform = /^tl_2016_(\d{5})_addrfeat\.zip$/; | ||
test('list - all', function (t) { | ||
const adapter = new CensusFTP(); | ||
adapter.list('tl_2016_*_addrfeat.zip', (err, files) => { | ||
t.equal(files.length, 3220); | ||
t.true(files.every(f => conform.test(f))); | ||
adapter.client.socket.end(); | ||
t.end(); | ||
}); | ||
}); | ||
test('list - whole state', function (t) { | ||
const adapter = new CensusFTP(); | ||
adapter.list('tl_2016_72*_addrfeat.zip', (err, files) => { | ||
t.equal(files.length, 78); | ||
t.true(files.every(f => conform.test(f))); | ||
adapter.client.socket.end(); | ||
t.end(); | ||
}); | ||
}); | ||
test('list - subset of state', function (t) { | ||
const adapter = new CensusFTP(); | ||
adapter.list('tl_2016_7200*_addrfeat.zip', (err, files) => { | ||
t.equal(files.length, 5); | ||
t.true(files.every(f => conform.test(f))); | ||
adapter.client.socket.end(); | ||
t.end(); | ||
}); | ||
}); | ||
test('list - single file', function (t) { | ||
const adapter = new CensusFTP(); | ||
adapter.list('tl_2016_72001_addrfeat.zip', (err, files) => { | ||
t.equal(files.length, 1); | ||
t.true(files.every(f => conform.test(f))); | ||
adapter.client.socket.end(); | ||
t.end(); | ||
}); | ||
}); | ||
}; | ||
|
||
module.exports.tests.get = function (test) { | ||
test('get - single file', function (t) { | ||
const adapter = new CensusFTP(); | ||
const tmpFile = path.join(os.tmpdir(), crypto.randomBytes(16).toString('hex')); | ||
adapter.get('tl_2016_72149_addrfeat.zip', tmpFile, (err) => { | ||
const stats = fs.statSync(tmpFile); | ||
t.equal(stats.size, 42950); | ||
adapter.client.socket.end(); | ||
fs.unlinkSync(tmpFile); // clean up | ||
t.end(); | ||
}); | ||
}); | ||
}; | ||
|
||
module.exports.all = function (tape) { | ||
|
||
function test(name, testFunction) { | ||
return tape('CensusFTP: ' + name, testFunction); | ||
} | ||
|
||
for (var testCase in module.exports.tests) { | ||
module.exports.tests[testCase](test); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
const os = require('os'); | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
const crypto = require('crypto'); | ||
const CensusS3Mirror = require('../../../../script/js/adapter/CensusS3Mirror'); | ||
|
||
module.exports.tests = {}; | ||
|
||
module.exports.tests.constructor = function (test) { | ||
test('constructor', function (t) { | ||
const adapter = new CensusS3Mirror(); | ||
t.equal(typeof adapter.host, 'string'); | ||
t.equal(typeof adapter.prefix, 'string'); | ||
t.end(); | ||
}); | ||
}; | ||
|
||
module.exports.tests.list = function (test) { | ||
const conform = /^tl_2016_(\d{5})_addrfeat\.zip$/; | ||
test('list - all', function (t) { | ||
const adapter = new CensusS3Mirror(); | ||
adapter.list('tl_2016_*_addrfeat.zip', (err, files) => { | ||
t.equal(files.length, 3220); | ||
t.true(files.every(f => conform.test(f))); | ||
t.end(); | ||
}); | ||
}); | ||
test('list - whole state', function (t) { | ||
const adapter = new CensusS3Mirror(); | ||
adapter.list('tl_2016_72*_addrfeat.zip', (err, files) => { | ||
t.equal(files.length, 78); | ||
t.true(files.every(f => conform.test(f))); | ||
t.end(); | ||
}); | ||
}); | ||
test('list - subset of state', function (t) { | ||
const adapter = new CensusS3Mirror(); | ||
adapter.list('tl_2016_7200*_addrfeat.zip', (err, files) => { | ||
t.equal(files.length, 5); | ||
t.true(files.every(f => conform.test(f))); | ||
t.end(); | ||
}); | ||
}); | ||
test('list - single file', function (t) { | ||
const adapter = new CensusS3Mirror(); | ||
adapter.list('tl_2016_72001_addrfeat.zip', (err, files) => { | ||
t.equal(files.length, 1); | ||
t.true(files.every(f => conform.test(f))); | ||
t.end(); | ||
}); | ||
}); | ||
}; | ||
|
||
module.exports.tests.get = function (test) { | ||
test('get - single file', function (t) { | ||
const adapter = new CensusS3Mirror(); | ||
const tmpFile = path.join(os.tmpdir(), crypto.randomBytes(16).toString('hex')); | ||
adapter.get('tl_2016_72149_addrfeat.zip', tmpFile, (err) => { | ||
const stats = fs.statSync(tmpFile); | ||
t.equal(stats.size, 42950); | ||
fs.unlinkSync(tmpFile); // clean up | ||
t.end(); | ||
}); | ||
}); | ||
}; | ||
|
||
module.exports.all = function (tape) { | ||
|
||
function test(name, testFunction) { | ||
return tape('CensusS3Mirror: ' + name, testFunction); | ||
} | ||
|
||
for (var testCase in module.exports.tests) { | ||
module.exports.tests[testCase](test); | ||
} | ||
}; |