refactor mechanism for model updates

This commit is contained in:
Alexandre Storelli 2019-03-06 17:17:43 +01:00
parent c4300806ac
commit d985a909c7
5 changed files with 102 additions and 13001 deletions

View File

@ -4,67 +4,87 @@ const axios = require('axios');
const tar = require('tar'); const tar = require('tar');
const fs = require('fs-extra'); const fs = require('fs-extra');
const { log } = require('abr-log')('checkModelUpdates'); const { log } = require('abr-log')('checkModelUpdates');
const assert = require('assert');
const MODELS_REPOSITORY = 'https://www.adblockradio.com/models/'; const MODELS_REPOSITORY = 'https://www.adblockradio.com/models/';
const METADATA_REPOSITORY = 'https://www.adblockradio.com/metadata/'; const METADATA_REPOSITORY = 'https://www.adblockradio.com/metadata/';
const CHECKSUM_SUFFIX = '.sha256sum'; const CHECKSUM_SUFFIX = '.sha256sum';
const isToUpdate = async function(localPath, remotePath, file) { const isToUpdate = async function(localFile, remoteFile) {
let localChecksum = null; let localChecksum = null;
try { try {
localChecksum = await fs.readFile(localPath + '/' + file + CHECKSUM_SUFFIX); localChecksum = await fs.readFile(localFile + CHECKSUM_SUFFIX);
} catch (e) { } catch (e) {
log.info('checksum for ' + localPath + '/' + file + ' not found. Will fetch models.'); log.info('checksum for ' + localFile + ' not found. Will fetch models.');
return true; return true;
} }
const remoteFile = remotePath + file + CHECKSUM_SUFFIX;
try { try {
const remoteChecksum = await axios.get(encodeURI(remoteFile)); const remoteChecksum = await axios.get(encodeURI(remoteFile + CHECKSUM_SUFFIX));
if ('' + localChecksum !== '' + remoteChecksum.data) { if ('' + localChecksum !== '' + remoteChecksum.data) {
//log.info('different checksums local=' + localChecksum + ' remote=' + remoteChecksum.data); //log.info('different checksums local=' + localChecksum + ' remote=' + remoteChecksum.data);
return true; return true;
} else { } else {
log.info(file + ' is up to date'); log.info(localFile + ' is up to date');
return false; return false;
} }
} catch (e) { } catch (e) {
log.warn('could not fetch ' + remoteFile + '. err=' + e); log.warn('could not fetch ' + remoteFile + CHECKSUM_SUFFIX + '. err=' + e);
return false; return false;
} }
} }
const update = async function(localPath, remotePath, file) { const update = async function(remoteFile, localFile, options) { //localPath, remotePath, file) {
log.info('update ' + localPath + '/' + file); log.info('update ' + localFile);
try { try {
const checksumData = await axios.get(encodeURI(remotePath + file + CHECKSUM_SUFFIX)); const localFileSplit = localFile.split('/');
await fs.writeFile(localPath + '/' + file + CHECKSUM_SUFFIX, checksumData.data); const localPath = localFileSplit.slice(0, localFileSplit.length - 1).join('/');
const data = await axios.get(encodeURI(remotePath + file), { responseType: 'arraybuffer' }); //log.debug("localPath=" + localPath);
await fs.writeFile(localPath + '/' + file, data.data); try {
await tar.x({ file: localPath + '/' + file, cwd: localPath, strict: true }); await fs.mkdir(localPath, { recursive: true });
await fs.unlink(localPath + '/' + file); } catch (e) {
if (!('' + e).includes('EEXIST')) {
log.error("Cannot create model directory " + localPath);
throw e;
}
}
const checksumData = await axios.get(encodeURI(remoteFile + CHECKSUM_SUFFIX));
await fs.writeFile(localFile + CHECKSUM_SUFFIX, checksumData.data);
const data = await axios.get(encodeURI(remoteFile), { responseType: 'arraybuffer' });
await fs.writeFile(localFile, data.data);
if (options && options.untar) {
await tar.x({ file: localFile, cwd: localPath, strict: true });
await fs.unlink(localFile);
}
} catch (e) { } catch (e) {
log.warn('could not update with remote ' + remotePath + file + '. err=' + e); log.warn('could not update with remote ' + remoteFile + '. err=' + e);
} }
} }
exports.checkModelUpdates = async function(country, name, modelsPath, mlUpdateCallback, hotlistUpdateCallback) { exports.checkModelUpdates = async function(params) {
const canonical = country + '_' + name; assert(params.localPath);
const modelFile = canonical + '.keras.tar.gz'; assert(params.files);
if (await isToUpdate(modelsPath, MODELS_REPOSITORY, modelFile)) {
await update(modelsPath, MODELS_REPOSITORY, modelFile); //const canonical = params.country + '_' + params.name;
if (mlUpdateCallback) mlUpdateCallback();
} for (let i=0; i<params.files.length; i++) {
const hotlistFile = canonical + '.sqlite.tar.gz'; const modelFile = params.files[i].file;
if (await isToUpdate(modelsPath, MODELS_REPOSITORY, hotlistFile)) { const tared = !!params.files[i].tar;
await update(modelsPath, MODELS_REPOSITORY, hotlistFile); const localFile = params.localPath + '/' + modelFile + (tared ? '.tar.gz' : '');
if (hotlistUpdateCallback) hotlistUpdateCallback(); const remoteFile = MODELS_REPOSITORY + modelFile + (tared ? '.tar.gz' : '');
if (await isToUpdate(localFile, remoteFile)) {
await update(remoteFile, localFile, { untar: tared });
if (params.files[i].callback) params.files[i].callback();
}
} }
} }
exports.checkMetadataUpdates = async function(updateCallback) { exports.checkMetadataUpdates = async function(updateCallback) {
log.debug("check meta updates"); log.debug("check meta updates");
if (await isToUpdate(process.cwd(), METADATA_REPOSITORY, 'webradio-metadata.js.tar.gz')) { const file = 'webradio-metadata.js.tar.gz';
await update(process.cwd(), METADATA_REPOSITORY, 'webradio-metadata.js.tar.gz') const localFile = process.cwd() + '/' + file;
const remoteFile = METADATA_REPOSITORY + '/' + file;
if (await isToUpdate(localFile, remoteFile)) {
await update(remoteFile, localFile, { untar: true });
if (updateCallback) updateCallback(); if (updateCallback) updateCallback();
} }
} }

22
package-lock.json generated
View File

@ -370,7 +370,7 @@
}, },
"buffer": { "buffer": {
"version": "4.9.1", "version": "4.9.1",
"resolved": "https://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz", "resolved": "http://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz",
"integrity": "sha1-bRu2AbB6TvztlwlBMgkwJ8lbwpg=", "integrity": "sha1-bRu2AbB6TvztlwlBMgkwJ8lbwpg=",
"dev": true, "dev": true,
"requires": { "requires": {
@ -1028,7 +1028,7 @@
}, },
"minimist": { "minimist": {
"version": "1.2.0", "version": "1.2.0",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", "resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=", "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
"dev": true "dev": true
} }
@ -1169,7 +1169,7 @@
}, },
"events": { "events": {
"version": "1.1.1", "version": "1.1.1",
"resolved": "https://registry.npmjs.org/events/-/events-1.1.1.tgz", "resolved": "http://registry.npmjs.org/events/-/events-1.1.1.tgz",
"integrity": "sha1-nr23Y1rQmccNzEwqH1AEKI6L2SQ=", "integrity": "sha1-nr23Y1rQmccNzEwqH1AEKI6L2SQ=",
"dev": true "dev": true
}, },
@ -1378,7 +1378,7 @@
}, },
"get-stream": { "get-stream": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz", "resolved": "http://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz",
"integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ=", "integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ=",
"dev": true "dev": true
}, },
@ -1419,7 +1419,7 @@
}, },
"got": { "got": {
"version": "6.7.1", "version": "6.7.1",
"resolved": "https://registry.npmjs.org/got/-/got-6.7.1.tgz", "resolved": "http://registry.npmjs.org/got/-/got-6.7.1.tgz",
"integrity": "sha1-JAzQV4WpoY5WHcG0S0HHY+8ejbA=", "integrity": "sha1-JAzQV4WpoY5WHcG0S0HHY+8ejbA=",
"dev": true, "dev": true,
"requires": { "requires": {
@ -1623,7 +1623,7 @@
}, },
"is-obj": { "is-obj": {
"version": "1.0.1", "version": "1.0.1",
"resolved": "https://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz", "resolved": "http://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz",
"integrity": "sha1-PkcprB9f3gJc19g6iW2rn09n2w8=", "integrity": "sha1-PkcprB9f3gJc19g6iW2rn09n2w8=",
"dev": true "dev": true
}, },
@ -1733,7 +1733,7 @@
"dependencies": { "dependencies": {
"minimist": { "minimist": {
"version": "1.2.0", "version": "1.2.0",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", "resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=", "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
"dev": true "dev": true
} }
@ -2122,13 +2122,13 @@
"dependencies": { "dependencies": {
"semver": { "semver": {
"version": "5.3.0", "version": "5.3.0",
"resolved": "https://registry.npmjs.org/semver/-/semver-5.3.0.tgz", "resolved": "http://registry.npmjs.org/semver/-/semver-5.3.0.tgz",
"integrity": "sha1-myzl094C0XxgEq0yaqa00M9U+U8=", "integrity": "sha1-myzl094C0XxgEq0yaqa00M9U+U8=",
"dev": true "dev": true
}, },
"tar": { "tar": {
"version": "2.2.1", "version": "2.2.1",
"resolved": "https://registry.npmjs.org/tar/-/tar-2.2.1.tgz", "resolved": "http://registry.npmjs.org/tar/-/tar-2.2.1.tgz",
"integrity": "sha1-jk0qJWwOIYXGsYrWlK7JaLg8sdE=", "integrity": "sha1-jk0qJWwOIYXGsYrWlK7JaLg8sdE=",
"dev": true, "dev": true,
"requires": { "requires": {
@ -3140,7 +3140,7 @@
}, },
"strip-eof": { "strip-eof": {
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz", "resolved": "http://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz",
"integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=", "integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=",
"dev": true "dev": true
}, },
@ -3645,7 +3645,7 @@
}, },
"wrap-ansi": { "wrap-ansi": {
"version": "2.1.0", "version": "2.1.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz", "resolved": "http://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz",
"integrity": "sha1-2Pw9KE3QV5T+hJc8rs3Rz4JP3YU=", "integrity": "sha1-2Pw9KE3QV5T+hJc8rs3Rz4JP3YU=",
"dev": true, "dev": true,
"requires": { "requires": {

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
const { log } = require("abr-log")("post-processing"); const { log } = require("abr-log")("post-processing");
const PredictorFile = require("./predictor-file.js"); const PredictorFile = require("./predictor-file.js");
const { Transform, Readable } = require("stream"); const { Transform, Readable } = require("stream");
const fs = require("fs"); const fs = require("fs-extra");
const { checkModelUpdates, checkMetadataUpdates } = require("./check-updates.js"); const { checkModelUpdates, checkMetadataUpdates } = require("./check-updates.js");
@ -332,13 +332,19 @@ class Analyser extends Readable {
return log.error("Analyser needs to be constructed with: country (string) and name (string)"); return log.error("Analyser needs to be constructed with: country (string) and name (string)");
} }
const defaultModelPath = process.cwd() + '/model';
const defaultModelFile = this.country + '_' + this.name + '/model.keras';
const defaultHotlistFile = this.country + '_' + this.name + '/hotlist.sqlite';
// default module options // default module options
this.config = { this.config = {
saveMetadata: true, // save a JSON with predictions (saveDuration intervals) saveMetadata: true, // save a JSON with predictions (saveDuration intervals)
verbose: false, verbose: false,
file: null, // analyse a file instead of a HTTP stream. will not download stream file: null, // analyse a file instead of a HTTP stream. will not download stream
records: null, // analyse a series of previous records (relative paths). will not download stream records: null, // analyse a series of previous records (relative paths). will not download stream
modelPath: process.cwd() + '/model', // directory where ML models and hotlist DBs are stored modelPath: defaultModelPath, // directory where ML models and hotlist DBs are stored
modelFile: defaultModelFile, // TODO
hotlistFile: defaultHotlistFile, // TODO
modelUpdates: true, // periodically fetch ML and hotlist models and refresh predictors modelUpdates: true, // periodically fetch ML and hotlist models and refresh predictors
modelUpdateInterval: 60 // update model files every N minutes modelUpdateInterval: 60 // update model files every N minutes
} }
@ -394,33 +400,49 @@ class Analyser extends Readable {
}); });
if (this.config.file) { if (this.config.file) {
// analysis of a single recording
// suitable for e.g. podcasts.
// output a file containing time stamps of transitions.
if (fs.existsSync(process.cwd() + "/" + this.config.file + ".json")) fs.unlinkSync(process.cwd() + "/" + this.config.file + ".json"); if (fs.existsSync(process.cwd() + "/" + this.config.file + ".json")) fs.unlinkSync(process.cwd() + "/" + this.config.file + ".json");
this.predictor = new PredictorFile({ this.predictor = new PredictorFile({
country: this.country, country: this.country,
name: this.name, name: this.name,
file: this.config.file, file: this.config.file,
modelPath: this.config.modelPath, modelFile: this.config.modelPath + '/' + this.config.modelFile,
hotlistFile: this.config.modelPath + '/' + this.config.hotlistFile,
config: this.config, config: this.config,
listener: this.postProcessor listener: this.postProcessor
}); });
} else if (this.config.records) { } else if (this.config.records) {
// analysis of an array of recordings
// suitable for asynchronous analysis of chunks of live streams.
// outputs a complete analysis report for each audio chunk.
this.offlinets = +new Date(); this.offlinets = +new Date();
this.predictor = new PredictorFile({ this.predictor = new PredictorFile({
country: this.country, country: this.country,
name: this.name, name: this.name,
records: this.config.records, records: this.config.records,
modelPath: this.config.modelPath, modelFile: this.config.modelPath + '/' + this.config.modelFile,
hotlistFile: this.config.modelPath + '/' + this.config.hotlistFile,
config: this.config, config: this.config,
listener: this.postProcessor, listener: this.postProcessor,
verbose: true, verbose: true,
}); });
} else { } else {
// live stream analysis
// emits results with the Readable interface
(async function() { (async function() {
// download and/or update models at startup // download and/or update models at startup
if (self.config.modelUpdates) { if (self.config.modelUpdates) {
await checkModelUpdates(self.country, self.name, self.config.modelPath); await checkModelUpdates({
localPath: self.config.modelPath,
files: [
{ file: self.config.modelFile, tar: true },
{ file: self.config.hotlistFile, tar: true },
]
});
} else { } else {
log.info(self.country + '_' + self.name + ' model updates are disabled'); log.info(self.country + '_' + self.name + ' model updates are disabled');
} }
@ -433,15 +455,21 @@ class Analyser extends Readable {
self.predictor = new Predictor({ self.predictor = new Predictor({
country: self.country, country: self.country,
name: self.name, name: self.name,
modelPath: self.config.modelPath, modelFile: self.config.modelPath + '/' + self.config.modelFile,
hotlistFile: self.config.modelPath + '/' + self.config.hotlistFile,
config: self.config, config: self.config,
listener: self.postProcessor listener: self.postProcessor
}); });
self.modelUpdatesInterval = setInterval(function() { self.modelUpdatesInterval = setInterval(function() {
if (self.config.modelUpdates) { if (self.config.modelUpdates) {
checkModelUpdates(self.country, self.name, self.config.modelPath, checkModelUpdates({
self.predictor.refreshPredictorMl, self.predictor.refreshPredictorHotlist); localPath: self.config.localPath,
files: [
{ file: self.config.modelFile, tar: true, callback: self.predictor.refreshPredictorMl },
{ file: self.config.hotlistFile, tar: true, callback: self.predictor.refreshPredictorHotlist },
]
});
} }
checkMetadataUpdates(self.predictor.refreshMetadata); checkMetadataUpdates(self.predictor.refreshMetadata);
}, self.config.modelUpdateInterval * 60000); }, self.config.modelUpdateInterval * 60000);

View File

@ -42,8 +42,9 @@ class Predictor {
this.country = options.country; // mandatory argument this.country = options.country; // mandatory argument
this.name = options.name; // mandatory argument this.name = options.name; // mandatory argument
// directory where ML models and hotlist DBs are stored // paths for ML model and hotlist DB
this.modelPath = options.modelPath; // mandatory argument if ML or Hotlist is enabled, ignored otherwise this.modelFile = options.modelFile; // mandatory argument if ML is enabled, ignored otherwise
this.hotlistFile = options.hotlistFile; // mandatory argument if ML is enabled, ignored otherwise
// output of predictions // output of predictions
this.listener = options.listener; // mandatory argument, instance of a Writable Stream. this.listener = options.listener; // mandatory argument, instance of a Writable Stream.
@ -80,7 +81,7 @@ class Predictor {
} }
} }
log.info(this.canonical + " run predictor with config=" + JSON.stringify(this.config) + " modelPath=" + this.modelPath); log.info(this.canonical + " run predictor with config=" + JSON.stringify(this.config) + " modelFile=" + this.modelFile + " hotlistFile=" + this.hotlistFile);
this._onData = this._onData.bind(this); this._onData = this._onData.bind(this);
this._newAudioSegment = this._newAudioSegment.bind(this); this._newAudioSegment = this._newAudioSegment.bind(this);
@ -288,7 +289,7 @@ class Predictor {
this.hotlist = new Hotlist({ this.hotlist = new Hotlist({
country: this.country, country: this.country,
name: this.name, name: this.name,
fileDB: this.modelPath + '/' + this.country + '_' + this.name + '.sqlite' fileDB: this.hotlistFile,
}); });
this.decoder.stdout.pipe(this.hotlist); this.decoder.stdout.pipe(this.hotlist);
} else { } else {
@ -311,7 +312,7 @@ class Predictor {
// we pipe decoder into mlPredictor later, once mlPredictor is ready to process data. the flag for this is mlPredictor.ready2 // we pipe decoder into mlPredictor later, once mlPredictor is ready to process data. the flag for this is mlPredictor.ready2
const self = this; const self = this;
this.mlPredictor.ready2 = false; this.mlPredictor.ready2 = false;
this.mlPredictor.load(this.modelPath + '/' + this.country + '_' + this.name + '.keras', function(err) { this.mlPredictor.load(this.modelFile, function(err) {
if (err && ("" + err).indexOf("Lost remote after 30000ms") >= 0) { if (err && ("" + err).indexOf("Lost remote after 30000ms") >= 0) {
log.warn(self.canonical + " lost remote Python worker. will restart it"); log.warn(self.canonical + " lost remote Python worker. will restart it");
self.mlPredictor.destroy(); self.mlPredictor.destroy();