URLs
Takes a NCBI database string and a optional search term and returns as stream of dataset/sequence files URLs.
Currently only supports sra and assembly databases.
The value of the uid property corresponds to the UID from NCBI.
ncbi.urls('assembly', 'solenopsis invicta')
.on('data', console.log)
=> {"url":"http://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/invertebrates/Solenopsis_invicta/Si_gnG/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz",
"uid":"244018/"}
NCBI.prototype.urls = function(db, term) {
var stream = through.obj(transform)
if (term) { stream.write(term); stream.end() }
return stream
function transform(obj, enc, next) {
var self = this
var getURLs = _getURLs(db)
ncbi.search(db, obj)
.pipe(getURLs)
_attachStandardEvents(getURLs, self, next)
}
}
function _getURLs(db) {
return through.obj(transform)
function transform(obj, enc, next) {
var self = this
var parseURL = {
sra: sraURL,
assembly: assemblyURL
}
parseURL[db]()
function sraURL() {
var runs = obj.runs.Run
async.eachSeries(runs, printSRAURL, next)
function printSRAURL(run, cb) {
var acc = run.acc
var runURL = [
'http://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
acc.slice(0,3) + '/',
acc.slice(0,6) + '/',
acc + '/',
acc + '.sra',
].join('')
self.push({url: runURL, uid: obj.uid})
cb()
}
}
function assemblyURL() {
var rootURL = obj.meta.FtpSites.FtpPath._.replace('ftp://', 'http://')
var assemblyURL = rootURL + 'Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz'
self.push({url: assemblyURL, uid: obj.uid})
next()
}
}
}
function _download(db, term) {
return through.obj(transform)
function transform(obj, enc, next) {
var self = this
var prevTime = Date.now()
var currTime
var chunkSizeMB = 1
var chunkSize = chunkSizeMB * 1024 * 1024
var folder = obj.uid + '/'
var path = folder + obj.url.replace(/.*\//, '')
mkdirp.sync(obj.uid)
if (!fs.existsSync(path)) {
dld(obj.url, folder, chunkSize)
.on('data', log)
.on('end', function() {
self.push(path)
next()
})
.on('error', function(err) { self.emit('error', err) })
}
else {
self.push(path)
next()
}
function log(position, size) {
var progress = (position * 100 / size).toFixed(2) + ' %'
var sizeMB = Math.round(size / 1024 / 1024) + ' MB'
currTime = Date.now()
var diffTimeSec = (currTime - prevTime) / 1000
prevTime = currTime
var speed = (chunkSizeMB / diffTimeSec).toFixed(2) + ' MB/s'
var log = 'Downloading ' + path+' '+ progress + ' of ' + sizeMB + ' at ' + speed
self.push(log)
}
}
}
function _attachStandardEvents(stream, self, next) {
stream
.on('data', function(data) { self.push(data) })
.on('end', function() { next() })
.on('error', function(err) { self.emit('error', err) })
}
function _wait(ms) {
return through.obj(transform)
function transform(obj, enc, next) {
var self = this
setTimeout(pushObj, ms)
function pushObj() {
self.push(obj)
next()
}
}
}