Search
Takes a NCBI database string and a optional search term and returns a stream of objects found:
ncbi.search('sra', 'solenopsis').on('data', console.log)
=> { uid: '280116',
expxml: {"Summary":{"Title":"Single Solenopsis invicta male","Platform":{"_":"ILLUMINA", [...],
runs: {"Run":[{"acc":"SRR620577","total_spots":"23699662","total_bases":"4787331724", [...],
extlinks: ' ',
createdate: '2013/02/07',
updatedate: '2012/11/28' }
=> { uid: '280243',
expxml: {"Summary":{"Title":"Illumina small-insert paired end","Platform":{"_":"ILLUMINA", [...],
runs: {"Run":[{"acc":"SRR621118","total_spots":"343209818","total_bases":"34320981800", [...],
extlinks: ' ',
createdate: '2013/02/07,
updatedate: '2012/11/28' }
=> [...]
Arguments can be passed as an object instead:
ncbi.search({ db: 'sra', term: 'solenopsis' })
.on('data', console.log)
Advanced options can be passed using the previous syntax:
var options = {
db: 'assembly',
term: 'human',
limit: 500,
throughput: 100
}
The search term can also be passed with write:
var search = ncbi.search('sra').on('data', console.log)
search.write('solenopsis')
Or piped, for example, from a file:
var split = require('split')
fs.createReadStream('searchTerms.txt')
.pipe(split())
.pipe(search)
ncbi.search = function(db, term, cb) {
var opts = typeof db === 'string' ? { db: db, term: term } : db
var cb = typeof term === 'function' ? term : cb
var xmlProperties = XMLPROPERTIES[opts.db] || through.obj()
var lastStream = LASTSTREAM[opts.db] || through.obj
var stream = pumpify.obj(
createAPISearchUrl(opts.db, opts.term),
requestStream(),
createAPIDataURL(opts),
requestStream(),
filterEmptyResults(),
tool.extractProperty('result'),
tool.deleteProperty('uids'),
tool.arraySplit(),
tool.XMLToJSProperties(xmlProperties),
lastStream()
)
if (opts.term) { stream.write(opts.term); stream.end() }
if (cb) { stream.pipe(concat(cb)) }
else { return stream }
}
function createAPISearchUrl(db, term) {
var stream = through.obj(transform)
return stream
function transform(obj, enc, next) {
var query = [
APIROOT + 'esearch.fcgi?',
DEFAULTS,
'db=' + db,
'term=' + encodeURI(obj),
'usehistory=y'
].join('&')
debug('esearch request', query)
this.push(query)
next()
}
}
function createAPIDataURL(opts) {
var counter = 0
var throughput = opts.throughput || RETURNMAX
if (opts.limit < throughput) { throughput = opts.limit }
var stream = through.obj(transform)
return stream
function transform(obj, enc, next) {
var count = opts.limit || obj.esearchresult.count
var numRequests = Math.ceil(count / throughput)
for (var i = 0; i < numRequests; i++) {
var retstart = i * throughput
var query = [
APIROOT + 'esummary.fcgi?',
DEFAULTS,
'db=' + obj.db,
'query_key=1',
'WebEnv=' + obj.esearchresult.webenv,
'retmax=' + throughput,
'retstart=' + retstart
].join('&')
debug('esummary request', query)
this.push(query)
}
next()
counter++
}
}
function filterEmptyResults() {
var stream = through.obj(transform)
return stream
function transform(obj, enc, next) {
if (obj.esummaryresult && obj.esummaryresult[0] === 'Empty result - nothing todo') {
return next()
}
if (obj.error && obj.error[0] === 'Empty result - nothing todo') {
return next()
}
if (obj.result) {
this.push(obj)
}
next()
}
}