refactor mechanism for model updates
This commit is contained in:
parent
c4300806ac
commit
d985a909c7
|
@ -4,67 +4,87 @@ const axios = require('axios');
|
|||
const tar = require('tar');
|
||||
const fs = require('fs-extra');
|
||||
const { log } = require('abr-log')('checkModelUpdates');
|
||||
const assert = require('assert');
|
||||
|
||||
const MODELS_REPOSITORY = 'https://www.adblockradio.com/models/';
|
||||
const METADATA_REPOSITORY = 'https://www.adblockradio.com/metadata/';
|
||||
const CHECKSUM_SUFFIX = '.sha256sum';
|
||||
|
||||
const isToUpdate = async function(localPath, remotePath, file) {
|
||||
const isToUpdate = async function(localFile, remoteFile) {
|
||||
let localChecksum = null;
|
||||
try {
|
||||
localChecksum = await fs.readFile(localPath + '/' + file + CHECKSUM_SUFFIX);
|
||||
localChecksum = await fs.readFile(localFile + CHECKSUM_SUFFIX);
|
||||
} catch (e) {
|
||||
log.info('checksum for ' + localPath + '/' + file + ' not found. Will fetch models.');
|
||||
log.info('checksum for ' + localFile + ' not found. Will fetch models.');
|
||||
return true;
|
||||
}
|
||||
const remoteFile = remotePath + file + CHECKSUM_SUFFIX;
|
||||
try {
|
||||
const remoteChecksum = await axios.get(encodeURI(remoteFile));
|
||||
const remoteChecksum = await axios.get(encodeURI(remoteFile + CHECKSUM_SUFFIX));
|
||||
if ('' + localChecksum !== '' + remoteChecksum.data) {
|
||||
//log.info('different checksums local=' + localChecksum + ' remote=' + remoteChecksum.data);
|
||||
return true;
|
||||
} else {
|
||||
log.info(file + ' is up to date');
|
||||
log.info(localFile + ' is up to date');
|
||||
return false;
|
||||
}
|
||||
} catch (e) {
|
||||
log.warn('could not fetch ' + remoteFile + '. err=' + e);
|
||||
log.warn('could not fetch ' + remoteFile + CHECKSUM_SUFFIX + '. err=' + e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const update = async function(localPath, remotePath, file) {
|
||||
log.info('update ' + localPath + '/' + file);
|
||||
const update = async function(remoteFile, localFile, options) { //localPath, remotePath, file) {
|
||||
log.info('update ' + localFile);
|
||||
try {
|
||||
const checksumData = await axios.get(encodeURI(remotePath + file + CHECKSUM_SUFFIX));
|
||||
await fs.writeFile(localPath + '/' + file + CHECKSUM_SUFFIX, checksumData.data);
|
||||
const data = await axios.get(encodeURI(remotePath + file), { responseType: 'arraybuffer' });
|
||||
await fs.writeFile(localPath + '/' + file, data.data);
|
||||
await tar.x({ file: localPath + '/' + file, cwd: localPath, strict: true });
|
||||
await fs.unlink(localPath + '/' + file);
|
||||
const localFileSplit = localFile.split('/');
|
||||
const localPath = localFileSplit.slice(0, localFileSplit.length - 1).join('/');
|
||||
//log.debug("localPath=" + localPath);
|
||||
try {
|
||||
await fs.mkdir(localPath, { recursive: true });
|
||||
} catch (e) {
|
||||
if (!('' + e).includes('EEXIST')) {
|
||||
log.error("Cannot create model directory " + localPath);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
const checksumData = await axios.get(encodeURI(remoteFile + CHECKSUM_SUFFIX));
|
||||
await fs.writeFile(localFile + CHECKSUM_SUFFIX, checksumData.data);
|
||||
const data = await axios.get(encodeURI(remoteFile), { responseType: 'arraybuffer' });
|
||||
await fs.writeFile(localFile, data.data);
|
||||
if (options && options.untar) {
|
||||
await tar.x({ file: localFile, cwd: localPath, strict: true });
|
||||
await fs.unlink(localFile);
|
||||
}
|
||||
} catch (e) {
|
||||
log.warn('could not update with remote ' + remotePath + file + '. err=' + e);
|
||||
log.warn('could not update with remote ' + remoteFile + '. err=' + e);
|
||||
}
|
||||
}
|
||||
|
||||
exports.checkModelUpdates = async function(country, name, modelsPath, mlUpdateCallback, hotlistUpdateCallback) {
|
||||
const canonical = country + '_' + name;
|
||||
const modelFile = canonical + '.keras.tar.gz';
|
||||
if (await isToUpdate(modelsPath, MODELS_REPOSITORY, modelFile)) {
|
||||
await update(modelsPath, MODELS_REPOSITORY, modelFile);
|
||||
if (mlUpdateCallback) mlUpdateCallback();
|
||||
}
|
||||
const hotlistFile = canonical + '.sqlite.tar.gz';
|
||||
if (await isToUpdate(modelsPath, MODELS_REPOSITORY, hotlistFile)) {
|
||||
await update(modelsPath, MODELS_REPOSITORY, hotlistFile);
|
||||
if (hotlistUpdateCallback) hotlistUpdateCallback();
|
||||
exports.checkModelUpdates = async function(params) {
|
||||
assert(params.localPath);
|
||||
assert(params.files);
|
||||
|
||||
//const canonical = params.country + '_' + params.name;
|
||||
|
||||
for (let i=0; i<params.files.length; i++) {
|
||||
const modelFile = params.files[i].file;
|
||||
const tared = !!params.files[i].tar;
|
||||
const localFile = params.localPath + '/' + modelFile + (tared ? '.tar.gz' : '');
|
||||
const remoteFile = MODELS_REPOSITORY + modelFile + (tared ? '.tar.gz' : '');
|
||||
if (await isToUpdate(localFile, remoteFile)) {
|
||||
await update(remoteFile, localFile, { untar: tared });
|
||||
if (params.files[i].callback) params.files[i].callback();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
exports.checkMetadataUpdates = async function(updateCallback) {
|
||||
log.debug("check meta updates");
|
||||
if (await isToUpdate(process.cwd(), METADATA_REPOSITORY, 'webradio-metadata.js.tar.gz')) {
|
||||
await update(process.cwd(), METADATA_REPOSITORY, 'webradio-metadata.js.tar.gz')
|
||||
const file = 'webradio-metadata.js.tar.gz';
|
||||
const localFile = process.cwd() + '/' + file;
|
||||
const remoteFile = METADATA_REPOSITORY + '/' + file;
|
||||
if (await isToUpdate(localFile, remoteFile)) {
|
||||
await update(remoteFile, localFile, { untar: true });
|
||||
if (updateCallback) updateCallback();
|
||||
}
|
||||
}
|
|
@ -370,7 +370,7 @@
|
|||
},
|
||||
"buffer": {
|
||||
"version": "4.9.1",
|
||||
"resolved": "https://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz",
|
||||
"resolved": "http://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz",
|
||||
"integrity": "sha1-bRu2AbB6TvztlwlBMgkwJ8lbwpg=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
|
@ -1028,7 +1028,7 @@
|
|||
},
|
||||
"minimist": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
|
||||
"resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
|
||||
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
|
||||
"dev": true
|
||||
}
|
||||
|
@ -1169,7 +1169,7 @@
|
|||
},
|
||||
"events": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/events/-/events-1.1.1.tgz",
|
||||
"resolved": "http://registry.npmjs.org/events/-/events-1.1.1.tgz",
|
||||
"integrity": "sha1-nr23Y1rQmccNzEwqH1AEKI6L2SQ=",
|
||||
"dev": true
|
||||
},
|
||||
|
@ -1378,7 +1378,7 @@
|
|||
},
|
||||
"get-stream": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz",
|
||||
"resolved": "http://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz",
|
||||
"integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ=",
|
||||
"dev": true
|
||||
},
|
||||
|
@ -1419,7 +1419,7 @@
|
|||
},
|
||||
"got": {
|
||||
"version": "6.7.1",
|
||||
"resolved": "https://registry.npmjs.org/got/-/got-6.7.1.tgz",
|
||||
"resolved": "http://registry.npmjs.org/got/-/got-6.7.1.tgz",
|
||||
"integrity": "sha1-JAzQV4WpoY5WHcG0S0HHY+8ejbA=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
|
@ -1623,7 +1623,7 @@
|
|||
},
|
||||
"is-obj": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz",
|
||||
"resolved": "http://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz",
|
||||
"integrity": "sha1-PkcprB9f3gJc19g6iW2rn09n2w8=",
|
||||
"dev": true
|
||||
},
|
||||
|
@ -1733,7 +1733,7 @@
|
|||
"dependencies": {
|
||||
"minimist": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
|
||||
"resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
|
||||
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=",
|
||||
"dev": true
|
||||
}
|
||||
|
@ -2122,13 +2122,13 @@
|
|||
"dependencies": {
|
||||
"semver": {
|
||||
"version": "5.3.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-5.3.0.tgz",
|
||||
"resolved": "http://registry.npmjs.org/semver/-/semver-5.3.0.tgz",
|
||||
"integrity": "sha1-myzl094C0XxgEq0yaqa00M9U+U8=",
|
||||
"dev": true
|
||||
},
|
||||
"tar": {
|
||||
"version": "2.2.1",
|
||||
"resolved": "https://registry.npmjs.org/tar/-/tar-2.2.1.tgz",
|
||||
"resolved": "http://registry.npmjs.org/tar/-/tar-2.2.1.tgz",
|
||||
"integrity": "sha1-jk0qJWwOIYXGsYrWlK7JaLg8sdE=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
|
@ -3140,7 +3140,7 @@
|
|||
},
|
||||
"strip-eof": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz",
|
||||
"resolved": "http://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz",
|
||||
"integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=",
|
||||
"dev": true
|
||||
},
|
||||
|
@ -3645,7 +3645,7 @@
|
|||
},
|
||||
"wrap-ansi": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz",
|
||||
"resolved": "http://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz",
|
||||
"integrity": "sha1-2Pw9KE3QV5T+hJc8rs3Rz4JP3YU=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -8,7 +8,7 @@
|
|||
const { log } = require("abr-log")("post-processing");
|
||||
const PredictorFile = require("./predictor-file.js");
|
||||
const { Transform, Readable } = require("stream");
|
||||
const fs = require("fs");
|
||||
const fs = require("fs-extra");
|
||||
const { checkModelUpdates, checkMetadataUpdates } = require("./check-updates.js");
|
||||
|
||||
|
||||
|
@ -332,13 +332,19 @@ class Analyser extends Readable {
|
|||
return log.error("Analyser needs to be constructed with: country (string) and name (string)");
|
||||
}
|
||||
|
||||
const defaultModelPath = process.cwd() + '/model';
|
||||
const defaultModelFile = this.country + '_' + this.name + '/model.keras';
|
||||
const defaultHotlistFile = this.country + '_' + this.name + '/hotlist.sqlite';
|
||||
|
||||
// default module options
|
||||
this.config = {
|
||||
saveMetadata: true, // save a JSON with predictions (saveDuration intervals)
|
||||
verbose: false,
|
||||
file: null, // analyse a file instead of a HTTP stream. will not download stream
|
||||
records: null, // analyse a series of previous records (relative paths). will not download stream
|
||||
modelPath: process.cwd() + '/model', // directory where ML models and hotlist DBs are stored
|
||||
modelPath: defaultModelPath, // directory where ML models and hotlist DBs are stored
|
||||
modelFile: defaultModelFile, // TODO
|
||||
hotlistFile: defaultHotlistFile, // TODO
|
||||
modelUpdates: true, // periodically fetch ML and hotlist models and refresh predictors
|
||||
modelUpdateInterval: 60 // update model files every N minutes
|
||||
}
|
||||
|
@ -394,33 +400,49 @@ class Analyser extends Readable {
|
|||
});
|
||||
|
||||
if (this.config.file) {
|
||||
// analysis of a single recording
|
||||
// suitable for e.g. podcasts.
|
||||
// output a file containing time stamps of transitions.
|
||||
if (fs.existsSync(process.cwd() + "/" + this.config.file + ".json")) fs.unlinkSync(process.cwd() + "/" + this.config.file + ".json");
|
||||
this.predictor = new PredictorFile({
|
||||
country: this.country,
|
||||
name: this.name,
|
||||
file: this.config.file,
|
||||
modelPath: this.config.modelPath,
|
||||
modelFile: this.config.modelPath + '/' + this.config.modelFile,
|
||||
hotlistFile: this.config.modelPath + '/' + this.config.hotlistFile,
|
||||
config: this.config,
|
||||
listener: this.postProcessor
|
||||
});
|
||||
|
||||
} else if (this.config.records) {
|
||||
// analysis of an array of recordings
|
||||
// suitable for asynchronous analysis of chunks of live streams.
|
||||
// outputs a complete analysis report for each audio chunk.
|
||||
this.offlinets = +new Date();
|
||||
this.predictor = new PredictorFile({
|
||||
country: this.country,
|
||||
name: this.name,
|
||||
records: this.config.records,
|
||||
modelPath: this.config.modelPath,
|
||||
modelFile: this.config.modelPath + '/' + this.config.modelFile,
|
||||
hotlistFile: this.config.modelPath + '/' + this.config.hotlistFile,
|
||||
config: this.config,
|
||||
listener: this.postProcessor,
|
||||
verbose: true,
|
||||
});
|
||||
|
||||
} else {
|
||||
// live stream analysis
|
||||
// emits results with the Readable interface
|
||||
(async function() {
|
||||
// download and/or update models at startup
|
||||
if (self.config.modelUpdates) {
|
||||
await checkModelUpdates(self.country, self.name, self.config.modelPath);
|
||||
await checkModelUpdates({
|
||||
localPath: self.config.modelPath,
|
||||
files: [
|
||||
{ file: self.config.modelFile, tar: true },
|
||||
{ file: self.config.hotlistFile, tar: true },
|
||||
]
|
||||
});
|
||||
} else {
|
||||
log.info(self.country + '_' + self.name + ' model updates are disabled');
|
||||
}
|
||||
|
@ -433,15 +455,21 @@ class Analyser extends Readable {
|
|||
self.predictor = new Predictor({
|
||||
country: self.country,
|
||||
name: self.name,
|
||||
modelPath: self.config.modelPath,
|
||||
modelFile: self.config.modelPath + '/' + self.config.modelFile,
|
||||
hotlistFile: self.config.modelPath + '/' + self.config.hotlistFile,
|
||||
config: self.config,
|
||||
listener: self.postProcessor
|
||||
});
|
||||
|
||||
self.modelUpdatesInterval = setInterval(function() {
|
||||
if (self.config.modelUpdates) {
|
||||
checkModelUpdates(self.country, self.name, self.config.modelPath,
|
||||
self.predictor.refreshPredictorMl, self.predictor.refreshPredictorHotlist);
|
||||
checkModelUpdates({
|
||||
localPath: self.config.localPath,
|
||||
files: [
|
||||
{ file: self.config.modelFile, tar: true, callback: self.predictor.refreshPredictorMl },
|
||||
{ file: self.config.hotlistFile, tar: true, callback: self.predictor.refreshPredictorHotlist },
|
||||
]
|
||||
});
|
||||
}
|
||||
checkMetadataUpdates(self.predictor.refreshMetadata);
|
||||
}, self.config.modelUpdateInterval * 60000);
|
||||
|
|
11
predictor.js
11
predictor.js
|
@ -42,8 +42,9 @@ class Predictor {
|
|||
this.country = options.country; // mandatory argument
|
||||
this.name = options.name; // mandatory argument
|
||||
|
||||
// directory where ML models and hotlist DBs are stored
|
||||
this.modelPath = options.modelPath; // mandatory argument if ML or Hotlist is enabled, ignored otherwise
|
||||
// paths for ML model and hotlist DB
|
||||
this.modelFile = options.modelFile; // mandatory argument if ML is enabled, ignored otherwise
|
||||
this.hotlistFile = options.hotlistFile; // mandatory argument if ML is enabled, ignored otherwise
|
||||
|
||||
// output of predictions
|
||||
this.listener = options.listener; // mandatory argument, instance of a Writable Stream.
|
||||
|
@ -80,7 +81,7 @@ class Predictor {
|
|||
}
|
||||
}
|
||||
|
||||
log.info(this.canonical + " run predictor with config=" + JSON.stringify(this.config) + " modelPath=" + this.modelPath);
|
||||
log.info(this.canonical + " run predictor with config=" + JSON.stringify(this.config) + " modelFile=" + this.modelFile + " hotlistFile=" + this.hotlistFile);
|
||||
|
||||
this._onData = this._onData.bind(this);
|
||||
this._newAudioSegment = this._newAudioSegment.bind(this);
|
||||
|
@ -288,7 +289,7 @@ class Predictor {
|
|||
this.hotlist = new Hotlist({
|
||||
country: this.country,
|
||||
name: this.name,
|
||||
fileDB: this.modelPath + '/' + this.country + '_' + this.name + '.sqlite'
|
||||
fileDB: this.hotlistFile,
|
||||
});
|
||||
this.decoder.stdout.pipe(this.hotlist);
|
||||
} else {
|
||||
|
@ -311,7 +312,7 @@ class Predictor {
|
|||
// we pipe decoder into mlPredictor later, once mlPredictor is ready to process data. the flag for this is mlPredictor.ready2
|
||||
const self = this;
|
||||
this.mlPredictor.ready2 = false;
|
||||
this.mlPredictor.load(this.modelPath + '/' + this.country + '_' + this.name + '.keras', function(err) {
|
||||
this.mlPredictor.load(this.modelFile, function(err) {
|
||||
if (err && ("" + err).indexOf("Lost remote after 30000ms") >= 0) {
|
||||
log.warn(self.canonical + " lost remote Python worker. will restart it");
|
||||
self.mlPredictor.destroy();
|
||||
|
|
Loading…
Reference in New Issue