refactor mechanism for model updates

This commit is contained in:
Alexandre Storelli 2019-03-06 17:17:43 +01:00
parent c4300806ac
commit d985a909c7
5 changed files with 102 additions and 13001 deletions

View File

@ -4,67 +4,87 @@ const axios = require('axios');
const tar = require('tar');
const fs = require('fs-extra');
const { log } = require('abr-log')('checkModelUpdates');
const assert = require('assert');
const MODELS_REPOSITORY = 'https://www.adblockradio.com/models/';
const METADATA_REPOSITORY = 'https://www.adblockradio.com/metadata/';
const CHECKSUM_SUFFIX = '.sha256sum';
const isToUpdate = async function(localPath, remotePath, file) {
const isToUpdate = async function(localFile, remoteFile) {
let localChecksum = null;
try {
localChecksum = await fs.readFile(localPath + '/' + file + CHECKSUM_SUFFIX);
localChecksum = await fs.readFile(localFile + CHECKSUM_SUFFIX);
} catch (e) {
log.info('checksum for ' + localPath + '/' + file + ' not found. Will fetch models.');
log.info('checksum for ' + localFile + ' not found. Will fetch models.');
return true;
}
const remoteFile = remotePath + file + CHECKSUM_SUFFIX;
try {
const remoteChecksum = await axios.get(encodeURI(remoteFile));
const remoteChecksum = await axios.get(encodeURI(remoteFile + CHECKSUM_SUFFIX));
if ('' + localChecksum !== '' + remoteChecksum.data) {
//log.info('different checksums local=' + localChecksum + ' remote=' + remoteChecksum.data);
return true;
} else {
log.info(file + ' is up to date');
log.info(localFile + ' is up to date');
return false;
}
} catch (e) {
log.warn('could not fetch ' + remoteFile + '. err=' + e);
log.warn('could not fetch ' + remoteFile + CHECKSUM_SUFFIX + '. err=' + e);
return false;
}
}
const update = async function(localPath, remotePath, file) {
log.info('update ' + localPath + '/' + file);
const update = async function(remoteFile, localFile, options) { //localPath, remotePath, file) {
log.info('update ' + localFile);
try {
const checksumData = await axios.get(encodeURI(remotePath + file + CHECKSUM_SUFFIX));
await fs.writeFile(localPath + '/' + file + CHECKSUM_SUFFIX, checksumData.data);
const data = await axios.get(encodeURI(remotePath + file), { responseType: 'arraybuffer' });
await fs.writeFile(localPath + '/' + file, data.data);
await tar.x({ file: localPath + '/' + file, cwd: localPath, strict: true });
await fs.unlink(localPath + '/' + file);
const localFileSplit = localFile.split('/');
const localPath = localFileSplit.slice(0, localFileSplit.length - 1).join('/');
//log.debug("localPath=" + localPath);
try {
await fs.mkdir(localPath, { recursive: true });
} catch (e) {
if (!('' + e).includes('EEXIST')) {
log.error("Cannot create model directory " + localPath);
throw e;
}
}
const checksumData = await axios.get(encodeURI(remoteFile + CHECKSUM_SUFFIX));
await fs.writeFile(localFile + CHECKSUM_SUFFIX, checksumData.data);
const data = await axios.get(encodeURI(remoteFile), { responseType: 'arraybuffer' });
await fs.writeFile(localFile, data.data);
if (options && options.untar) {
await tar.x({ file: localFile, cwd: localPath, strict: true });
await fs.unlink(localFile);
}
} catch (e) {
log.warn('could not update with remote ' + remotePath + file + '. err=' + e);
log.warn('could not update with remote ' + remoteFile + '. err=' + e);
}
}
exports.checkModelUpdates = async function(country, name, modelsPath, mlUpdateCallback, hotlistUpdateCallback) {
const canonical = country + '_' + name;
const modelFile = canonical + '.keras.tar.gz';
if (await isToUpdate(modelsPath, MODELS_REPOSITORY, modelFile)) {
await update(modelsPath, MODELS_REPOSITORY, modelFile);
if (mlUpdateCallback) mlUpdateCallback();
}
const hotlistFile = canonical + '.sqlite.tar.gz';
if (await isToUpdate(modelsPath, MODELS_REPOSITORY, hotlistFile)) {
await update(modelsPath, MODELS_REPOSITORY, hotlistFile);
if (hotlistUpdateCallback) hotlistUpdateCallback();
exports.checkModelUpdates = async function(params) {
assert(params.localPath);
assert(params.files);
//const canonical = params.country + '_' + params.name;
for (let i=0; i<params.files.length; i++) {
const modelFile = params.files[i].file;
const tared = !!params.files[i].tar;
const localFile = params.localPath + '/' + modelFile + (tared ? '.tar.gz' : '');
const remoteFile = MODELS_REPOSITORY + modelFile + (tared ? '.tar.gz' : '');
if (await isToUpdate(localFile, remoteFile)) {
await update(remoteFile, localFile, { untar: tared });
if (params.files[i].callback) params.files[i].callback();
}
}
}
exports.checkMetadataUpdates = async function(updateCallback) {
log.debug("check meta updates");
if (await isToUpdate(process.cwd(), METADATA_REPOSITORY, 'webradio-metadata.js.tar.gz')) {
await update(process.cwd(), METADATA_REPOSITORY, 'webradio-metadata.js.tar.gz')
const file = 'webradio-metadata.js.tar.gz';
const localFile = process.cwd() + '/' + file;
const remoteFile = METADATA_REPOSITORY + '/' + file;
if (await isToUpdate(localFile, remoteFile)) {
await update(remoteFile, localFile, { untar: true });
if (updateCallback) updateCallback();
}
}

22
package-lock.json generated
View File

@ -370,7 +370,7 @@
},
"buffer": {
"version": "4.9.1",
"resolved": "https://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz",
"resolved": "http://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz",
"integrity": "sha1-bRu2AbB6TvztlwlBMgkwJ8lbwpg=",
"dev": true,
"requires": {
@ -1028,7 +1028,7 @@
},
"minimist": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
"resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
"dev": true
}
@ -1169,7 +1169,7 @@
},
"events": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/events/-/events-1.1.1.tgz",
"resolved": "http://registry.npmjs.org/events/-/events-1.1.1.tgz",
"integrity": "sha1-nr23Y1rQmccNzEwqH1AEKI6L2SQ=",
"dev": true
},
@ -1378,7 +1378,7 @@
},
"get-stream": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz",
"resolved": "http://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz",
"integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ=",
"dev": true
},
@ -1419,7 +1419,7 @@
},
"got": {
"version": "6.7.1",
"resolved": "https://registry.npmjs.org/got/-/got-6.7.1.tgz",
"resolved": "http://registry.npmjs.org/got/-/got-6.7.1.tgz",
"integrity": "sha1-JAzQV4WpoY5WHcG0S0HHY+8ejbA=",
"dev": true,
"requires": {
@ -1623,7 +1623,7 @@
},
"is-obj": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz",
"resolved": "http://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz",
"integrity": "sha1-PkcprB9f3gJc19g6iW2rn09n2w8=",
"dev": true
},
@ -1733,7 +1733,7 @@
"dependencies": {
"minimist": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
"resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
"dev": true
}
@ -2122,13 +2122,13 @@
"dependencies": {
"semver": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/semver/-/semver-5.3.0.tgz",
"resolved": "http://registry.npmjs.org/semver/-/semver-5.3.0.tgz",
"integrity": "sha1-myzl094C0XxgEq0yaqa00M9U+U8=",
"dev": true
},
"tar": {
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/tar/-/tar-2.2.1.tgz",
"resolved": "http://registry.npmjs.org/tar/-/tar-2.2.1.tgz",
"integrity": "sha1-jk0qJWwOIYXGsYrWlK7JaLg8sdE=",
"dev": true,
"requires": {
@ -3140,7 +3140,7 @@
},
"strip-eof": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz",
"resolved": "http://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz",
"integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=",
"dev": true
},
@ -3645,7 +3645,7 @@
},
"wrap-ansi": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz",
"resolved": "http://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz",
"integrity": "sha1-2Pw9KE3QV5T+hJc8rs3Rz4JP3YU=",
"dev": true,
"requires": {

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
const { log } = require("abr-log")("post-processing");
const PredictorFile = require("./predictor-file.js");
const { Transform, Readable } = require("stream");
const fs = require("fs");
const fs = require("fs-extra");
const { checkModelUpdates, checkMetadataUpdates } = require("./check-updates.js");
@ -332,13 +332,19 @@ class Analyser extends Readable {
return log.error("Analyser needs to be constructed with: country (string) and name (string)");
}
const defaultModelPath = process.cwd() + '/model';
const defaultModelFile = this.country + '_' + this.name + '/model.keras';
const defaultHotlistFile = this.country + '_' + this.name + '/hotlist.sqlite';
// default module options
this.config = {
saveMetadata: true, // save a JSON with predictions (saveDuration intervals)
verbose: false,
file: null, // analyse a file instead of a HTTP stream. will not download stream
records: null, // analyse a series of previous records (relative paths). will not download stream
modelPath: process.cwd() + '/model', // directory where ML models and hotlist DBs are stored
modelPath: defaultModelPath, // directory where ML models and hotlist DBs are stored
modelFile: defaultModelFile, // TODO
hotlistFile: defaultHotlistFile, // TODO
modelUpdates: true, // periodically fetch ML and hotlist models and refresh predictors
modelUpdateInterval: 60 // update model files every N minutes
}
@ -394,33 +400,49 @@ class Analyser extends Readable {
});
if (this.config.file) {
// analysis of a single recording
// suitable for e.g. podcasts.
// output a file containing time stamps of transitions.
if (fs.existsSync(process.cwd() + "/" + this.config.file + ".json")) fs.unlinkSync(process.cwd() + "/" + this.config.file + ".json");
this.predictor = new PredictorFile({
country: this.country,
name: this.name,
file: this.config.file,
modelPath: this.config.modelPath,
modelFile: this.config.modelPath + '/' + this.config.modelFile,
hotlistFile: this.config.modelPath + '/' + this.config.hotlistFile,
config: this.config,
listener: this.postProcessor
});
} else if (this.config.records) {
// analysis of an array of recordings
// suitable for asynchronous analysis of chunks of live streams.
// outputs a complete analysis report for each audio chunk.
this.offlinets = +new Date();
this.predictor = new PredictorFile({
country: this.country,
name: this.name,
records: this.config.records,
modelPath: this.config.modelPath,
modelFile: this.config.modelPath + '/' + this.config.modelFile,
hotlistFile: this.config.modelPath + '/' + this.config.hotlistFile,
config: this.config,
listener: this.postProcessor,
verbose: true,
});
} else {
// live stream analysis
// emits results with the Readable interface
(async function() {
// download and/or update models at startup
if (self.config.modelUpdates) {
await checkModelUpdates(self.country, self.name, self.config.modelPath);
await checkModelUpdates({
localPath: self.config.modelPath,
files: [
{ file: self.config.modelFile, tar: true },
{ file: self.config.hotlistFile, tar: true },
]
});
} else {
log.info(self.country + '_' + self.name + ' model updates are disabled');
}
@ -433,15 +455,21 @@ class Analyser extends Readable {
self.predictor = new Predictor({
country: self.country,
name: self.name,
modelPath: self.config.modelPath,
modelFile: self.config.modelPath + '/' + self.config.modelFile,
hotlistFile: self.config.modelPath + '/' + self.config.hotlistFile,
config: self.config,
listener: self.postProcessor
});
self.modelUpdatesInterval = setInterval(function() {
if (self.config.modelUpdates) {
checkModelUpdates(self.country, self.name, self.config.modelPath,
self.predictor.refreshPredictorMl, self.predictor.refreshPredictorHotlist);
checkModelUpdates({
localPath: self.config.localPath,
files: [
{ file: self.config.modelFile, tar: true, callback: self.predictor.refreshPredictorMl },
{ file: self.config.hotlistFile, tar: true, callback: self.predictor.refreshPredictorHotlist },
]
});
}
checkMetadataUpdates(self.predictor.refreshMetadata);
}, self.config.modelUpdateInterval * 60000);

View File

@ -42,8 +42,9 @@ class Predictor {
this.country = options.country; // mandatory argument
this.name = options.name; // mandatory argument
// directory where ML models and hotlist DBs are stored
this.modelPath = options.modelPath; // mandatory argument if ML or Hotlist is enabled, ignored otherwise
// paths for ML model and hotlist DB
this.modelFile = options.modelFile; // mandatory argument if ML is enabled, ignored otherwise
this.hotlistFile = options.hotlistFile; // mandatory argument if ML is enabled, ignored otherwise
// output of predictions
this.listener = options.listener; // mandatory argument, instance of a Writable Stream.
@ -80,7 +81,7 @@ class Predictor {
}
}
log.info(this.canonical + " run predictor with config=" + JSON.stringify(this.config) + " modelPath=" + this.modelPath);
log.info(this.canonical + " run predictor with config=" + JSON.stringify(this.config) + " modelFile=" + this.modelFile + " hotlistFile=" + this.hotlistFile);
this._onData = this._onData.bind(this);
this._newAudioSegment = this._newAudioSegment.bind(this);
@ -288,7 +289,7 @@ class Predictor {
this.hotlist = new Hotlist({
country: this.country,
name: this.name,
fileDB: this.modelPath + '/' + this.country + '_' + this.name + '.sqlite'
fileDB: this.hotlistFile,
});
this.decoder.stdout.pipe(this.hotlist);
} else {
@ -311,7 +312,7 @@ class Predictor {
// we pipe decoder into mlPredictor later, once mlPredictor is ready to process data. the flag for this is mlPredictor.ready2
const self = this;
this.mlPredictor.ready2 = false;
this.mlPredictor.load(this.modelPath + '/' + this.country + '_' + this.name + '.keras', function(err) {
this.mlPredictor.load(this.modelFile, function(err) {
if (err && ("" + err).indexOf("Lost remote after 30000ms") >= 0) {
log.warn(self.canonical + " lost remote Python worker. will restart it");
self.mlPredictor.destroy();