node.js - Scanning a large number of files -
i'm trying scan large number of images , i'm having trouble. sample folder has 4k smallish files , keep running emfile
errors.
i'm using async.queue
try limit number of files being processed 10 @ time. should under file descriptor limit, doesn't work. few thousand scanned correctly, i'll start getting errors other files before first ones finished.
it seems me aren't being queued, i'm seeing lots of "file done" , "the end" long before data output console , i'm not sure why.
i'm pretty new node.js if has better ideas how handle , this, i'd appreciate it. help.
var fs = require('fs'); var crypt = require('crypto'); var path = '../pictures/'; var async = require('async'); function end(err) { console.log(err); } function filedone(err) { if(err) { console.log(err); } else { console.log('file done'); } } function scandirectory(path, queue, callback, recursive) { fs.readdir(path,function(err,files) { if(err) { callback(err); } files.foreach(function(file) { fs.stat(path + file, function(err,stats) { if(err) { console.log(err); } if(recursive && stats && stats.isdirectory()) { var newpath = path + file + '/'; scandirectory(newpath, queue, callback, recursive); } else if (!stats.isdirectory()) { queue.push(path + file, filedone); } }); }); }); } var q = async.queue(function(file, callback) { var hash = crypt.createhash('md5'); var stream = fs.createreadstream(file); stream.on('data',function(data) { hash.update(data,'utf8'); }); stream.on('end',function() { var md5 = hash.digest('hex'); console.log(file); console.log(md5); }); callback(); },10); q.empty = function() { console.log('the end'); } scandirectory(path, q, end, false);
in async.queue
, move callback in stream.on('end'
function, after console.log
and add stream.on('error', callback);
handle errors :)
Comments
Post a Comment