Fix polling down retry (and other items) (#15483)

* Only update last_polled if polled
Because availability now runs always, we need to check if we ran any other modules successfully, if so, we can update last_polled

* Tally results and act accordingly

* Apply fixes from StyleCI

---------

Co-authored-by: StyleCI Bot <bot@styleci.io>
This commit is contained in:
Tony Murray 2023-10-19 06:55:41 -05:00 committed by GitHub
parent 27ef398907
commit 8f91fb3877
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 123 additions and 35 deletions

View File

@ -141,13 +141,16 @@ class OS implements
$this->graphs[$name] = true;
}
public function persistGraphs(): void
public function persistGraphs(bool $cleanup = true): void
{
$device = $this->getDevice();
$graphs = collect(array_keys($this->graphs));
// delete extra graphs
$device->graphs->keyBy('graph')->collect()->except($graphs)->each->delete();
if ($cleanup) {
// delete extra graphs
$device->graphs->keyBy('graph')->collect()->except($graphs)->each->delete();
}
// create missing graphs
$device->graphs()->saveMany($graphs->diff($device->graphs->pluck('graph'))->map(function ($graph) {
return new DeviceGraph(['graph' => $graph]);

View File

@ -37,6 +37,7 @@ use Illuminate\Support\Str;
use LibreNMS\Enum\Severity;
use LibreNMS\Exceptions\PollerException;
use LibreNMS\Polling\ConnectivityHelper;
use LibreNMS\Polling\Result;
use LibreNMS\RRD\RrdDefinition;
use LibreNMS\Util\Debug;
use LibreNMS\Util\Dns;
@ -77,9 +78,9 @@ class Poller
$this->parseModules();
}
public function poll(): int
public function poll(): Result
{
$polled = 0;
$results = new Result;
$this->printHeader();
if (Debug::isEnabled() && ! defined('PHPUNIT_RUNNING')) {
@ -89,6 +90,7 @@ class Poller
$this->logger->info("Starting polling run:\n");
foreach ($this->buildDeviceQuery()->pluck('device_id') as $device_id) {
$results->markAttempted();
$this->initDevice($device_id);
PollingDevice::dispatch($this->device);
$this->os = OS::make($this->deviceArray);
@ -104,33 +106,26 @@ class Poller
$measurement->end();
// if modules are not overridden, record performance
if (empty($this->module_override)) {
// record performance
$measurement->manager()->record('device', $measurement);
$this->device->last_polled = Carbon::now();
$this->device->last_polled_timetaken = $measurement->getDuration();
app('Datastore')->put($this->deviceArray, 'poller-perf', [
'rrd_def' => RrdDefinition::make()->addDataset('poller', 'GAUGE', 0),
'module' => 'ALL',
], [
'poller' => $measurement->getDuration(),
]);
$this->os->enableGraph('poller_perf');
if ($this->device->status) {
$this->recordPerformance($measurement);
}
if ($helper->canPing()) {
$this->os->enableGraph('ping_perf');
}
$this->os->persistGraphs();
$this->os->persistGraphs($this->device->status); // save graphs but don't delete any if device is down
$this->logger->info(sprintf("Enabled graphs (%s): %s\n\n",
$this->device->graphs->count(),
$this->device->graphs->pluck('graph')->implode(' ')
));
}
// finalize the device poll
$this->device->save();
$polled++;
$results->markCompleted($this->device->status);
DevicePolled::dispatch($this->device);
$this->logger->info(sprintf("\n>>> Polled %s (%s) in %0.3f seconds <<<",
@ -144,20 +139,12 @@ class Poller
// check if the poll took too long and log an event
if ($measurement->getDuration() > Config::get('rrd.step')) {
\App\Models\Eventlog::log('Polling took longer than ' . round(Config::get('rrd.step') / 60, 2) .
Eventlog::log('Polling took longer than ' . round(Config::get('rrd.step') / 60, 2) .
' minutes! This will cause gaps in graphs.', $this->device, 'system', Severity::Error);
}
}
return $polled;
}
/**
* Get the total number of devices to poll.
*/
public function totalDevices(): int
{
return $this->buildDeviceQuery()->count();
return $results;
}
private function pollModules(): void
@ -193,7 +180,7 @@ class Poller
} catch (Throwable $e) {
// isolate module exceptions so they don't disrupt the polling process
$this->logger->error("%rError polling $module module for {$this->device->hostname}.%n $e", ['color' => true]);
\App\Models\Eventlog::log("Error polling $module module. Check log file for more details.", $this->device, 'poller', Severity::Error);
Eventlog::log("Error polling $module module. Check log file for more details.", $this->device, 'poller', Severity::Error);
report($e);
}
@ -339,4 +326,20 @@ EOH, $this->device->hostname, $group ? " ($group)" : '', $this->device->device_i
$this->logger->info(Version::get()->header());
}
}
private function recordPerformance(Measurement $measurement): void
{
$measurement->manager()->record('device', $measurement);
$this->device->last_polled = Carbon::now();
$this->device->last_ping_timetaken = $measurement->getDuration();
app('Datastore')->put($this->deviceArray, 'poller-perf', [
'rrd_def' => RrdDefinition::make()->addDataset('poller', 'GAUGE', 0),
'module' => 'ALL',
], [
'poller' => $this->device->last_ping_timetaken,
]);
$this->os->enableGraph('poller_perf');
}
}

View File

@ -0,0 +1,74 @@
<?php
/*
* Result.php
*
* Tally attempts and completions
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @package LibreNMS
* @link http://librenms.org
* @copyright 2023 Tony Murray
* @author Tony Murray <murraytony@gmail.com>
*/
namespace LibreNMS\Polling;
class Result
{
private int $attempted = 0;
private int $completed = 0;
public function markAttempted(): void
{
$this->attempted++;
}
public function markCompleted(bool $success = true): void
{
if ($success) {
$this->completed++;
}
}
public function hasNoAttempts(): bool
{
return $this->attempted == 0;
}
public function hasNoCompleted(): bool
{
return $this->completed == 0;
}
public function hasAnyCompleted(): bool
{
return $this->completed > 0;
}
public function hasMultipleCompleted(): bool
{
return $this->completed > 1;
}
public function getCompleted(): int
{
return $this->completed;
}
public function getAttempted(): int
{
return $this->attempted;
}
}

View File

@ -41,14 +41,14 @@ class DevicePoll extends LnmsCommand
try {
/** @var \LibreNMS\Poller $poller */
$poller = app(Poller::class, ['device_spec' => $this->argument('device spec'), 'module_override' => explode(',', $this->option('modules') ?? '')]);
$polled = $poller->poll();
$result = $poller->poll();
if ($polled > 0) {
if ($result->hasAnyCompleted()) {
if (! $this->output->isQuiet()) {
if ($polled > 1) {
if ($result->hasMultipleCompleted()) {
$this->output->newLine();
$time_spent = sprintf('%0.3fs', $measurements->getCategory('device')->getSummary('poll')->getDuration());
$this->line(trans('commands.device:poll.polled', ['count' => $polled, 'time' => $time_spent]));
$this->line(trans('commands.device:poll.polled', ['count' => $result->getCompleted(), 'time' => $time_spent]));
}
$this->output->newLine();
$measurements->printStats();
@ -58,11 +58,18 @@ class DevicePoll extends LnmsCommand
}
// polled 0 devices, maybe there were none to poll
if ($poller->totalDevices() == 0) {
if ($result->hasNoAttempts()) {
$this->error(trans('commands.device:poll.errors.no_devices'));
return 1;
}
// attempted some devices, but none were up.
if ($result->hasNoCompleted()) {
$this->line('<fg=red>' . trans_choice('commands.device:poll.errors.none_up', $result->getAttempted()) . '</>');
return 6;
}
} catch (QueryException $e) {
if ($e->getCode() == 2002) {
$this->error(trans('commands.device:poll.errors.db_connect'));

View File

@ -120,6 +120,7 @@ return [
'db_connect' => 'Failed to connect to database. Verify database service is running and connection settings.',
'db_auth' => 'Failed to connect to database. Verify credentials: :error',
'no_devices' => 'No devices found matching your given device specification.',
'none_up' => 'Device was down, unable to poll.|All devices were down, unable to poll.',
'none_polled' => 'No devices were polled.',
],
'polled' => 'Polled :count devices in :time',