NCBI.prototype.urls = function(db, term) {
var stream = through.obj(transform)
if (term) { stream.write(term); stream.end() }
return stream
function transform(obj, enc, next) {
var self = this
var searchdb = db === 'gff' ? 'genome' : db
var getdb = db
var getURLs = _getURLs(getdb)
ncbi.search(searchdb, obj)
.pipe(getURLs)
_attachStandardEvents(getURLs, self, next)
}
}
function _getURLs(db) {
return through.obj(transform)
function transform(obj, enc, next) {
var self = this
var parseURL = {
sra: sraURL,
assembly: assemblyURL,
gff: gffURL
}
parseURL[db]()
function sraURL() {
var runs = obj.runs.Run
async.eachSeries(runs, printSRAURL, next)
function printSRAURL(run, cb) {
var acc = run.acc
var runURL = [
'http://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
acc.slice(0,3) + '/',
acc.slice(0,6) + '/',
acc + '/',
acc + '.sra',
].join('')
self.push({url: runURL, uid: obj.uid})
cb()
}
}
function assemblyURL() {
var rootURL = obj.meta.FtpSites.FtpPath._.replace('ftp://', 'http://')
var assemblyURL = rootURL + 'Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz'
self.push({url: assemblyURL, uid: obj.uid})
next()
}
function gffURL() {
ncbi.search('assembly', obj.assembly_name).on('data', createURL)
function createURL(obj) {
debug('gffURL result', obj)
var gffURL
var ftpPath = obj.meta.FtpSites.FtpPath
var ftpArray = Array.isArray(ftpPath) ? ftpPath : [ ftpPath ]
ftpArray.forEach(function(ftp) {
if (ftp.type === 'RefSeq') {
gffURL = ftp._.replace('ftp://', 'http://') + 'GFF/ref_' + obj.assemblyname + '_top_level.gff3.gz'
}
})
if (gffURL) { self.push({url: gffURL, uid: obj.uid}) }
}
}
}
}
function _download(db, term) {
return through.obj(transform)
function transform(obj, enc, next) {
var self = this
var prevTime = Date.now()
var currTime
var chunkSizeMB = 1
var chunkSize = chunkSizeMB * 1024 * 1024
var folder = obj.uid + '/'
var path = folder + obj.url.replace(/.*\//, '')
mkdirp.sync(obj.uid)
if (!fs.existsSync(path)) {
debug('downloading', obj.url)
dld(obj.url, folder, chunkSize)
.on('data', log)
.on('end', function() {
self.push(path)
next()
})
.on('error', function(err) { self.emit('error', err) })
}
else {
self.push(path)
next()
}
function log(position, size) {
var progress = (position * 100 / size).toFixed(2) + ' %'
var sizeMB = Math.round(size / 1024 / 1024) + ' MB'
currTime = Date.now()
var diffTimeSec = (currTime - prevTime) / 1000
prevTime = currTime
var speed = (chunkSizeMB / diffTimeSec).toFixed(2) + ' MB/s'
var log = 'Downloading ' + path+' '+ progress + ' of ' + sizeMB + ' at ' + speed
self.push(log)
}
}
}
function _attachStandardEvents(stream, self, next) {
stream
.on('data', function(data) { self.push(data) })
.on('end', function() { next() })
.on('error', function(err) { self.emit('error', err) })
}
function _wait(ms) {
return through.obj(transform)
function transform(obj, enc, next) {
var self = this
setTimeout(pushObj, ms)
function pushObj() {
self.push(obj)
next()
}
}
}