netdata/ml/Dimension.cc

136 lines
3.6 KiB
C++

// SPDX-License-Identifier: GPL-3.0-or-later
#include "Config.h"
#include "Dimension.h"
#include "Query.h"
using namespace ml;
std::pair<CalculatedNumber *, size_t>
TrainableDimension::getCalculatedNumbers() {
size_t MinN = Cfg.MinTrainSamples;
size_t MaxN = Cfg.MaxTrainSamples;
// Figure out what our time window should be.
time_t BeforeT = now_realtime_sec() - 1;
time_t AfterT = BeforeT - (MaxN * updateEvery());
BeforeT -= (BeforeT % updateEvery());
AfterT -= (AfterT % updateEvery());
BeforeT = std::min(BeforeT, latestTime());
AfterT = std::max(AfterT, oldestTime());
if (AfterT >= BeforeT)
return { nullptr, 0 };
CalculatedNumber *CNs = new CalculatedNumber[MaxN * (Cfg.LagN + 1)]();
// Start the query.
unsigned Idx = 0;
unsigned CollectedValues = 0;
unsigned TotalValues = 0;
CalculatedNumber LastValue = std::numeric_limits<CalculatedNumber>::quiet_NaN();
Query Q = Query(getRD());
Q.init(AfterT, BeforeT);
while (!Q.isFinished()) {
if (Idx == MaxN)
break;
auto P = Q.nextMetric();
CalculatedNumber Value = P.second;
if (netdata_double_isnumber(Value)) {
CNs[Idx] = Value;
LastValue = CNs[Idx];
CollectedValues++;
} else
CNs[Idx] = LastValue;
Idx++;
}
TotalValues = Idx;
if (CollectedValues < MinN) {
delete[] CNs;
return { nullptr, 0 };
}
// Find first non-NaN value.
for (Idx = 0; std::isnan(CNs[Idx]); Idx++, TotalValues--) { }
// Overwrite NaN values.
if (Idx != 0)
memmove(CNs, &CNs[Idx], sizeof(CalculatedNumber) * TotalValues);
return { CNs, TotalValues };
}
MLResult TrainableDimension::trainModel() {
auto P = getCalculatedNumbers();
CalculatedNumber *CNs = P.first;
unsigned N = P.second;
if (!CNs)
return MLResult::MissingData;
unsigned TargetNumSamples = Cfg.MaxTrainSamples * Cfg.RandomSamplingRatio;
double SamplingRatio = std::min(static_cast<double>(TargetNumSamples) / N, 1.0);
SamplesBuffer SB = SamplesBuffer(CNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
SamplingRatio, Cfg.RandomNums);
KM.train(SB, Cfg.MaxKMeansIters);
Trained = true;
ConstantModel = true;
delete[] CNs;
return MLResult::Success;
}
void PredictableDimension::addValue(CalculatedNumber Value, bool Exists) {
if (!Exists) {
CNs.clear();
return;
}
unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
if (CNs.size() < N) {
CNs.push_back(Value);
return;
}
std::rotate(std::begin(CNs), std::begin(CNs) + 1, std::end(CNs));
if (CNs[N - 1] != Value)
ConstantModel = false;
CNs[N - 1] = Value;
}
std::pair<MLResult, bool> PredictableDimension::predict() {
unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
if (CNs.size() != N) {
AnomalyBit = false;
return { MLResult::MissingData, AnomalyBit };
}
CalculatedNumber *TmpCNs = new CalculatedNumber[N * (Cfg.LagN + 1)]();
std::memcpy(TmpCNs, CNs.data(), N * sizeof(CalculatedNumber));
SamplesBuffer SB = SamplesBuffer(TmpCNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
1.0, Cfg.RandomNums);
AnomalyScore = computeAnomalyScore(SB);
delete[] TmpCNs;
if (AnomalyScore == std::numeric_limits<CalculatedNumber>::quiet_NaN()) {
AnomalyBit = false;
return { MLResult::NaN, AnomalyBit };
}
AnomalyBit = AnomalyScore >= (100 * Cfg.DimensionAnomalyScoreThreshold);
return { MLResult::Success, AnomalyBit };
}