New python service for poller, discovery + more (#8455)

Currently has a file handle leak (and will eventually run out of handles) related to the self update process.

Either need to fix that or rip out self-update and leave that up to cron or something.


DO NOT DELETE THIS TEXT

#### Please note

> Please read this information carefully. You can run `./scripts/pre-commit.php` to check your code before submitting.

- [x] Have you followed our [code guidelines?](http://docs.librenms.org/Developing/Code-Guidelines/)

#### Testers

If you would like to test this pull request then please run: `./scripts/github-apply <pr_id>`, i.e `./scripts/github-apply 5926`
This commit is contained in:
Tony Murray 2018-06-30 06:19:49 -05:00 committed by Neil Lathwood
parent 461e7067f3
commit 0ba76e6d62
27 changed files with 1903 additions and 227 deletions

2
.gitignore vendored
View File

@ -13,6 +13,8 @@
# Others #
##########
__pycache__
*.pyc
*.swp
Thumbs.db
config.php

View File

@ -42,8 +42,9 @@ class ComposerHelper
{
if (!file_exists('.env')) {
self::setPermissions();
self::populateEnv();
}
self::populateEnv();
}
public static function preUpdate(Event $event)
@ -80,34 +81,34 @@ class ComposerHelper
if (!file_exists('.env')) {
copy('.env.example', '.env');
self::exec('php artisan key:generate');
$config = [
'db_host' => '',
'db_port' => '',
'db_name' => '',
'db_user' => '',
'db_pass' => '',
'db_socket' => '',
'base_url' => '',
'user' => '',
'group' => '',
];
@include 'config.php';
self::setEnv([
'NODE_ID' => uniqid(),
'DB_HOST' => $config['db_host'],
'DB_PORT' => $config['db_port'],
'DB_USERNAME' => $config['db_user'],
'DB_PASSWORD' => $config['db_pass'],
'DB_DATABASE' => $config['db_name'],
'DB_SOCKET' => $config['db_socket'],
'APP_URL' => $config['base_url'],
'LIBRENMS_USER' => $config['user'],
'LIBRENMS_GROUP' => $config['group'],
]);
}
$config = [
'db_host' => '',
'db_port' => '',
'db_name' => '',
'db_user' => '',
'db_pass' => '',
'db_socket' => '',
'base_url' => '',
'user' => '',
'group' => '',
];
@include 'config.php';
self::setEnv([
'NODE_ID' => uniqid(),
'DB_HOST' => $config['db_host'],
'DB_PORT' => $config['db_port'],
'DB_USERNAME' => $config['db_user'],
'DB_PASSWORD' => $config['db_pass'],
'DB_DATABASE' => $config['db_name'],
'DB_SOCKET' => $config['db_socket'],
'APP_URL' => $config['base_url'],
'LIBRENMS_USER' => $config['user'],
'LIBRENMS_GROUP' => $config['group'],
]);
}
/**
@ -118,8 +119,10 @@ class ComposerHelper
*/
private static function setEnv($settings, $file = '.env')
{
$content = file_get_contents($file);
if (substr($content, -1) !== "\n") {
$original_content = $content = file_get_contents($file);
// ensure trailing line return
if (substr($content, -1) !== PHP_EOL) {
$content .= PHP_EOL;
}
@ -143,7 +146,10 @@ class ComposerHelper
}
}
file_put_contents($file, $content);
// only write if the content has changed
if ($content !== $original_content) {
file_put_contents($file, $content);
}
}
private static function setPermissions()

View File

@ -314,6 +314,18 @@ class Config
return is_array($curr) && isset($curr[$last]);
}
/**
* Serialise the whole configuration to json for use in external processes.
*
* @return string
*/
public static function json_encode()
{
global $config;
return json_encode($config);
}
/**
* merge the database config with the global config
* Global config overrides db

View File

@ -91,6 +91,15 @@ class Poller extends BaseValidation
$validator->fail("The poller ($poller) has not completed within the last 5 minutes, check the cron job.");
}
}
} elseif (dbFetchCell('SELECT COUNT(*) FROM `poller_cluster`')) {
$sql = "SELECT `node_id` FROM `poller_cluster` WHERE `last_report` <= DATE_ADD(NOW(), INTERVAL - 5 MINUTE)";
$pollers = dbFetchColumn($sql);
if (count($pollers) > 0) {
foreach ($pollers as $poller) {
$validator->fail("The poller cluster member ($poller) has not checked in within the last 5 minutes, check that it is running and healthy.");
}
}
} else {
$validator->fail('The poller has never run or you are not using poller-wrapper.py, check the cron job.');
}

278
LibreNMS/__init__.py Normal file
View File

@ -0,0 +1,278 @@
import threading
from logging import critical, info, debug, exception
from math import ceil
from time import time
from .service import Service, ServiceConfig
from .queuemanager import QueueManager, TimedQueueManager, BillingQueueManager
def normalize_wait(seconds):
return ceil(seconds - (time() % seconds))
class DB:
def __init__(self, config, auto_connect=True):
"""
Simple DB wrapper
:param config: The poller config object
"""
self.config = config
self._db = {}
if auto_connect:
self.connect()
def connect(self):
try:
import pymysql
pymysql.install_as_MySQLdb()
info("Using pure python SQL client")
except ImportError:
info("Using other SQL client")
try:
import MySQLdb
except ImportError:
critical("ERROR: missing a mysql python module")
critical("Install either 'PyMySQL' or 'mysqlclient' from your OS software repository or from PyPI")
raise
try:
args = {
'host': self.config.db_host,
'port': self.config.db_port,
'user': self.config.db_user,
'passwd': self.config.db_pass,
'db': self.config.db_name
}
if self.config.db_socket:
args['unix_socket'] = self.config.db_socket
conn = MySQLdb.connect(**args)
conn.autocommit(True)
conn.ping(True)
self._db[threading.get_ident()] = conn
except Exception as e:
critical("ERROR: Could not connect to MySQL database! {}".format(e))
raise
def db_conn(self):
"""
Refers to a database connection via thread identifier
:return: database connection handle
"""
# Does a connection exist for this thread
if threading.get_ident() not in self._db.keys():
self.connect()
return self._db[threading.get_ident()]
def query(self, query, args=None):
"""
Open a cursor, fetch the query with args, close the cursor and return it.
:rtype: MySQLdb.Cursor
:param query:
:param args:
:return: the cursor with results
"""
cursor = self.db_conn().cursor()
cursor.execute(query, args)
cursor.close()
return cursor
class RecurringTimer:
def __init__(self, duration, target, thread_name=None):
self.duration = duration
self.target = target
self._timer_thread = None
self._thread_name = thread_name
self._event = threading.Event()
def _loop(self):
while not self._event.is_set():
self._event.wait(normalize_wait(self.duration))
if not self._event.is_set():
self.target()
def start(self):
self._timer_thread = threading.Thread(target=self._loop)
if self._thread_name:
self._timer_thread.name = self._thread_name
self._event.clear()
self._timer_thread.start()
def stop(self):
self._event.set()
class Lock:
""" Base lock class this is not thread safe"""
def __init__(self):
self._locks = {} # store a tuple (owner, expiration)
def lock(self, name, owner, expiration, allow_owner_relock=False):
"""
Obtain the named lock.
:param allow_owner_relock:
:param name: str the name of the lock
:param owner: str a unique name for the locking node
:param expiration: int in seconds
"""
if (
(name not in self._locks) or # lock doesn't exist
(allow_owner_relock and self._locks.get(name, [None])[0] == owner) or # owner has permission
time() > self._locks[name][1] # lock has expired
):
self._locks[name] = (owner, expiration + time())
return self._locks[name][0] == owner
return False
def unlock(self, name, owner):
"""
Release the named lock.
:param name: str the name of the lock
:param owner: str a unique name for the locking node
"""
if (name in self._locks) and self._locks[name][0] == owner:
self._locks.pop(name, None)
return True
return False
def check_lock(self, name):
lock = self._locks.get(name, None)
if lock:
return lock[1] > time()
return False
def print_locks(self):
debug(self._locks)
class ThreadingLock(Lock):
"""A subclass of Lock that uses thread-safe locking"""
def __init__(self):
Lock.__init__(self)
self._lock = threading.Lock()
def lock(self, name, owner, expiration, allow_owner_relock=False):
"""
Obtain the named lock.
:param allow_owner_relock:
:param name: str the name of the lock
:param owner: str a unique name for the locking node
:param expiration: int in seconds
"""
with self._lock:
return Lock.lock(self, name, owner, expiration, allow_owner_relock)
def unlock(self, name, owner):
"""
Release the named lock.
:param name: str the name of the lock
:param owner: str a unique name for the locking node
"""
with self._lock:
return Lock.unlock(self, name, owner)
def check_lock(self, name):
return Lock.check_lock(self, name)
def print_locks(self):
Lock.print_locks(self)
class RedisLock(Lock):
def __init__(self, namespace='lock', **redis_kwargs):
import redis
redis_kwargs['decode_responses'] = True
self._redis = redis.Redis(**redis_kwargs)
self._redis.ping()
self._namespace = namespace
def __key(self, name):
return "{}:{}".format(self._namespace, name)
def lock(self, name, owner, expiration=1, allow_owner_relock=False):
"""
Obtain the named lock.
:param allow_owner_relock: bool
:param name: str the name of the lock
:param owner: str a unique name for the locking node
:param expiration: int in seconds, 0 expiration means forever
"""
import redis
try:
if int(expiration) < 1:
expiration = 1
key = self.__key(name)
non_existing = not (allow_owner_relock and self._redis.get(key) == owner)
return self._redis.set(key, owner, ex=int(expiration), nx=non_existing)
except redis.exceptions.ResponseError as e:
exception("Unable to obtain lock, local state: name: %s, owner: %s, expiration: %s, allow_owner_relock: %s",
name, owner, expiration, allow_owner_relock)
def unlock(self, name, owner):
"""
Release the named lock.
:param name: str the name of the lock
:param owner: str a unique name for the locking node
"""
key = self.__key(name)
if self._redis.get(key) == owner:
self._redis.delete(key)
return True
return False
def check_lock(self, name):
return self._redis.get(self.__key(name)) is not None
def print_locks(self):
keys = self._redis.keys(self.__key('*'))
for key in keys:
print("{} locked by {}, expires in {} seconds".format(key, self._redis.get(key), self._redis.ttl(key)))
class RedisQueue(object):
def __init__(self, name, namespace='queue', **redis_kwargs):
import redis
redis_kwargs['decode_responses'] = True
self._redis = redis.Redis(**redis_kwargs)
self._redis.ping()
self.key = "{}:{}".format(namespace, name)
def qsize(self):
return self._redis.llen(self.key)
def empty(self):
return self.qsize() == 0
def put(self, item):
# commented code allows unique entries, but shuffles the queue
# p = self._redis.pipeline()
# p.lrem(self.key, 1, item)
# p.lpush(self.key, item)
# p.execute()
self._redis.rpush(self.key, item)
def get(self, block=True, timeout=None):
if block:
item = self._redis.blpop(self.key, timeout=timeout)
else:
item = self._redis.lpop(self.key)
if item:
item = item[1]
return item
def get_nowait(self):
return self.get(False)

240
LibreNMS/queuemanager.py Normal file
View File

@ -0,0 +1,240 @@
import random
import threading
import traceback
from logging import debug, info, error, critical
from multiprocessing import Queue
from subprocess import CalledProcessError
import sys
import LibreNMS
if sys.version_info[0] < 3:
from Queue import Empty
else:
from queue import Empty
class QueueManager:
def __init__(self, config, type_desc, work_function, auto_start=True):
"""
This class manages a queue of jobs and can be used to submit jobs to the queue with post_work()
and process jobs in that queue in worker threads using the work_function
This will attempt to use redis to create a queue, but fall back to an internal queue.
If you are using redis, you can have multiple QueueManagers working on the same queue
You can start or stop the worker threads with start(), stop(), and stop_and_wait()
:param config: LibreNMS.ServiceConfig reference to the service config object
:param type_desc: description for this queue manager type
:param work_function: function that will be called to perform the task
:param auto_start: automatically start worker threads
"""
self.type = type_desc
self.config = config
self._threads = []
self._queues = {}
self._queue_create_lock = threading.Lock()
self._work_function = work_function
self._stop_event = threading.Event()
info("Groups: {}".format(self.config.group))
info("{} QueueManager created: {} workers, {}s frequency"
.format(self.type.title(), self.get_poller_config().workers, self.get_poller_config().frequency))
if auto_start:
self.start()
def _service_worker(self, work_func, queue_id):
while not self._stop_event.is_set():
try:
# cannot break blocking request with redis-py, so timeout :(
device_id = self.get_queue(queue_id).get(True, 3)
if device_id: # None returned by redis after timeout when empty
debug("Queues: {}".format(self._queues))
work_func(device_id)
except Empty:
pass # ignore empty queue exception from subprocess.Queue
except CalledProcessError as e:
error('{} poller script error! {} returned {}: {}'
.format(self.type.title(), e.cmd, e.returncode, e.output))
except Exception as e:
error('{} poller exception! {}'.format(self.type.title(), e))
traceback.print_exc()
def post_work(self, payload, queue_id):
"""
Post work to the the queue group.
:param payload: string payload to deliver to the worker
:param queue_id: which queue to post to, 0 is the default
"""
self.get_queue(queue_id).put(payload)
debug("Posted work for {} to {}:{} queue size: {}"
.format(payload, self.type, queue_id, self.get_queue(queue_id).qsize()))
def start(self):
"""
Start worker threads
"""
workers = self.get_poller_config().workers
groups = self.config.group if hasattr(self.config.group, "__iter__") else [self.config.group]
if self.type == "discovery" or self.type == "poller":
for group in groups:
group_workers = max(int(workers / len(groups)), 1)
for i in range(group_workers):
thread_name = "{}_{}-{}".format(self.type.title(), group, i + 1)
self.spawn_worker(thread_name, group)
debug("Started {} {} threads for group {}".format(group_workers, self.type, group))
else:
self.spawn_worker(self.type.title(), 0)
def spawn_worker(self, thread_name, group):
pt = threading.Thread(target=self._service_worker, name=thread_name,
args=(self._work_function, group))
pt.daemon = True
self._threads.append(pt)
pt.start()
def restart(self):
"""
Stop the worker threads and wait for them to finish. Then start them again.
"""
self.stop_and_wait()
self.start()
def stop(self):
"""
Stop the worker threads, does not wait for them to finish.
"""
self._stop_event.set()
def stop_and_wait(self):
"""
Stop the worker threads and wait for them to finish.
"""
self.stop() # make sure this has been called so we don't block forever
for t in self._threads:
t.join()
del self._threads[:]
def get_poller_config(self):
"""
Returns the LibreNMS.PollerConfig for this QueueManager
:return: LibreNMS.PollerConfig
"""
return getattr(self.config, self.type)
def get_queue(self, group):
name = self.queue_name(self.type, group)
if name not in self._queues.keys():
with self._queue_create_lock:
if name not in self._queues.keys():
self._queues[name] = self._create_queue(self.type, group)
return self._queues[name]
def _create_queue(self, queue_type, group):
"""
Create a queue (not thread safe)
:param queue_type:
:param group:
:return:
"""
info("Creating queue {}".format(self.queue_name(queue_type, group)))
try:
return LibreNMS.RedisQueue(self.queue_name(queue_type, group),
namespace='librenms.queue',
host=self.config.redis_host,
port=self.config.redis_port,
db=self.config.redis_db,
password=self.config.redis_pass,
unix_socket_path=self.config.redis_socket
)
except ImportError:
if self.config.distributed:
critical("ERROR: Redis connection required for distributed polling")
critical("Please install redis-py, either through your os software repository or from PyPI")
exit(2)
except Exception as e:
if self.config.distributed:
critical("ERROR: Redis connection required for distributed polling")
critical("Could not connect to Redis. {}".format(e))
exit(2)
return Queue()
@staticmethod
def queue_name(queue_type, group):
if queue_type and type(group) == int:
return "{}:{}".format(queue_type, group)
else:
raise ValueError("Refusing to create improperly scoped queue - parameters were invalid or not set")
class TimedQueueManager(QueueManager):
def __init__(self, config, type_desc, work_function, dispatch_function, auto_start=True):
"""
A queue manager that periodically dispatches work to the queue
The times are normalized like they started at 0:00
:param config: LibreNMS.ServiceConfig reference to the service config object
:param type_desc: description for this queue manager type
:param work_function: function that will be called to perform the task
:param dispatch_function: function that will be called when the timer is up, should call post_work()
:param auto_start: automatically start worker threads
"""
QueueManager.__init__(self, config, type_desc, work_function, auto_start)
self.timer = LibreNMS.RecurringTimer(self.get_poller_config().frequency, dispatch_function)
def start_dispatch(self):
"""
Start the dispatch timer, this is not called automatically on init
"""
self.timer.start()
def stop_dispatch(self):
"""
Stop the dispatch timer
"""
self.timer.stop()
def stop(self):
"""
Stop the worker threads and dispatcher thread, does not wait for them to finish.
"""
self.stop_dispatch()
QueueManager.stop(self)
class BillingQueueManager(TimedQueueManager):
def __init__(self, config, work_function, poll_dispatch_function, calculate_dispatch_function,
auto_start=True):
"""
A TimedQueueManager with two timers dispatching poll billing and calculate billing to the same work queue
:param config: LibreNMS.ServiceConfig reference to the service config object
:param work_function: function that will be called to perform the task
:param poll_dispatch_function: function that will be called when the timer is up, should call post_work()
:param calculate_dispatch_function: function that will be called when the timer is up, should call post_work()
:param auto_start: automatically start worker threads
"""
TimedQueueManager.__init__(self, config, 'billing', work_function, poll_dispatch_function, auto_start)
self.calculate_timer = LibreNMS.RecurringTimer(self.get_poller_config().calculate, calculate_dispatch_function, 'calculate_billing_timer')
def start_dispatch(self):
"""
Start the dispatch timer, this is not called automatically on init
"""
self.calculate_timer.start()
TimedQueueManager.start_dispatch(self)
def stop_dispatch(self):
"""
Stop the dispatch timer
"""
self.calculate_timer.stop()
TimedQueueManager.stop_dispatch(self)

728
LibreNMS/service.py Normal file
View File

@ -0,0 +1,728 @@
import LibreNMS
import json
import logging
import os
import subprocess
import threading
import sys
import time
import timeit
from datetime import timedelta
from logging import debug, info, warning, error, critical, exception
from platform import python_version
from time import sleep
from socket import gethostname
from signal import signal, SIGTERM
from uuid import uuid1
class PerformanceCounter(object):
"""
This is a simple counter to record execution time and number of jobs. It's unique to each
poller instance, so does not need to be globally syncronised, just locally.
"""
def __init__(self):
self._count = 0
self._jobs = 0
self._lock = threading.Lock()
def add(self, n):
"""
Add n to the counter and increment the number of jobs by 1
:param n: Number to increment by
"""
with self._lock:
self._count += n
self._jobs += 1
def split(self, precise=False):
"""
Return the current counter value and keep going
:param precise: Whether floating point precision is desired
:return: ((INT or FLOAT), INT)
"""
return (self._count if precise else int(self._count)), self._jobs
def reset(self, precise=False):
"""
Return the current counter value and then zero it.
:param precise: Whether floating point precision is desired
:return: ((INT or FLOAT), INT)
"""
with self._lock:
c = self._count
j = self._jobs
self._count = 0
self._jobs = 0
return (c if precise else int(c)), j
class TimeitContext(object):
"""
Wrapper around timeit to allow the timing of larger blocks of code by wrapping them in "with"
"""
def __init__(self):
self._t = timeit.default_timer()
def __enter__(self):
return self
def __exit__(self, *args):
del self._t
def delta(self):
"""
Calculate the elapsed time since the context was initialised
:return: FLOAT
"""
if not self._t:
raise ArithmeticError("Timer has not been started, cannot return delta")
return timeit.default_timer() - self._t
@classmethod
def start(cls):
"""
Factory method for TimeitContext
:param cls:
:return: TimeitContext
"""
return cls()
class ServiceConfig:
def __init__(self):
"""
Stores all of the configuration variables for the LibreNMS service in a common object
Starts with defaults, but can be populated with variables from config.php by calling populate()
"""
self._uuid = str(uuid1())
self.set_name(gethostname())
def set_name(self, name):
if name:
self.name = name.strip()
self.unique_name = "{}-{}".format(self.name, self._uuid)
class PollerConfig:
def __init__(self, workers, frequency, calculate=None):
self.workers = workers
self.frequency = frequency
self.calculate = calculate
# config variables with defaults
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
node_id = None
name = None
unique_name = None
single_instance = True
distributed = False
group = 0
debug = False
log_level = 20
alerting = PollerConfig(1, 60)
poller = PollerConfig(24, 300)
services = PollerConfig(8, 300)
discovery = PollerConfig(16, 21600)
billing = PollerConfig(2, 300, 60)
down_retry = 60
update_frequency = 86400
master_resolution = 1
master_timeout = 10
redis_host = 'localhost'
redis_port = 6379
redis_db = 0
redis_pass = None
redis_socket = None
db_host = 'localhost'
db_port = 0
db_socket = None
db_user = 'librenms'
db_pass = ''
db_name = 'librenms'
def populate(self):
config = self._get_config_data()
# populate config variables
self.node_id = os.getenv('NODE_ID')
self.set_name(config.get('distributed_poller_name', None))
self.distributed = config.get('distributed_poller', ServiceConfig.distributed)
self.group = ServiceConfig.parse_group(config.get('distributed_poller_group', ServiceConfig.group))
# backward compatible options
self.poller.workers = config.get('poller_service_workers', ServiceConfig.poller.workers)
self.poller.frequency = config.get('poller_service_poll_frequency', ServiceConfig.poller.frequency)
self.discovery.frequency = config.get('poller_service_discover_frequency', ServiceConfig.discovery.frequency)
self.down_retry = config.get('poller_service_down_retry', ServiceConfig.down_retry)
self.log_level = config.get('poller_service_loglevel', ServiceConfig.log_level)
# new options
self.poller.workers = config.get('service_poller_workers', ServiceConfig.poller.workers)
self.poller.frequency = config.get('service_poller_frequency', ServiceConfig.poller.frequency)
self.services.workers = config.get('service_services_workers', ServiceConfig.services.workers)
self.services.frequency = config.get('service_services_frequency', ServiceConfig.services.frequency)
self.discovery.workers = config.get('service_discovery_workers', ServiceConfig.discovery.workers)
self.discovery.frequency = config.get('service_discovery_frequency', ServiceConfig.discovery.frequency)
self.billing.frequency = config.get('service_billing_frequency', ServiceConfig.billing.frequency)
self.billing.calculate = config.get('service_billing_calculate_frequency', ServiceConfig.billing.calculate)
self.down_retry = config.get('service_poller_down_retry', ServiceConfig.down_retry)
self.log_level = config.get('service_loglevel', ServiceConfig.log_level)
self.update_frequency = config.get('service_update_frequency', ServiceConfig.update_frequency)
self.redis_host = os.getenv('REDIS_HOST', config.get('redis_host', ServiceConfig.redis_host))
self.redis_db = os.getenv('REDIS_DB', config.get('redis_db', ServiceConfig.redis_db))
self.redis_pass = os.getenv('REDIS_PASSWORD', config.get('redis_pass', ServiceConfig.redis_pass))
self.redis_port = int(os.getenv('REDIS_PORT', config.get('redis_port', ServiceConfig.redis_port)))
self.redis_socket = os.getenv('REDIS_SOCKET', config.get('redis_socket', ServiceConfig.redis_socket))
self.db_host = os.getenv('DB_HOST', config.get('db_host', ServiceConfig.db_host))
self.db_name = os.getenv('DB_DATABASE', config.get('db_name', ServiceConfig.db_name))
self.db_pass = os.getenv('DB_PASSWORD', config.get('db_pass', ServiceConfig.db_pass))
self.db_port = int(os.getenv('DB_PORT', config.get('db_port', ServiceConfig.db_port)))
self.db_socket = os.getenv('DB_SOCKET', config.get('db_socket', ServiceConfig.db_socket))
self.db_user = os.getenv('DB_USERNAME', config.get('db_user', ServiceConfig.db_user))
# set convenient debug variable
self.debug = logging.getLogger().isEnabledFor(logging.DEBUG)
if not self.debug and self.log_level:
try:
logging.getLogger().setLevel(self.log_level)
except ValueError:
error("Unknown log level {}, must be one of 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'".format(self.log_level))
logging.getLogger().setLevel(logging.INFO)
def _get_config_data(self):
try:
import dotenv
env_path = "{}/.env".format(self.BASE_DIR)
info("Attempting to load .env from '%s'", env_path)
dotenv.load_dotenv(dotenv_path=env_path, verbose=True)
if not os.getenv('NODE_ID'):
raise ImportError(".env does not contain a valid NODE_ID setting.")
except ImportError as e:
exception("Could not import .env - check that the poller user can read the file, and that composer install has been run recently")
sys.exit(3)
config_cmd = ['/usr/bin/env', 'php', '{}/config_to_json.php'.format(self.BASE_DIR), '2>&1']
try:
return json.loads(subprocess.check_output(config_cmd).decode())
except subprocess.CalledProcessError as e:
error("ERROR: Could not load or parse configuration! {}: {}"
.format(subprocess.list2cmdline(e.cmd), e.output.decode()))
@staticmethod
def parse_group(g):
if g is None:
return [0]
elif type(g) is int:
return [g]
elif type(g) is str:
try:
return [int(x) for x in set(g.split(','))]
except ValueError:
pass
error("Could not parse group string, defaulting to 0")
return [0]
class Service:
config = ServiceConfig()
_fp = False
_started = False
alerting_manager = None
poller_manager = None
discovery_manager = None
services_manager = None
billing_manager = None
last_poll = {}
terminate_flag = False
def __init__(self):
self.config.populate()
threading.current_thread().name = self.config.name # rename main thread
self.attach_signals()
# init database connections different ones for different threads
self._db = LibreNMS.DB(self.config) # main
self._services_db = LibreNMS.DB(self.config) # services dispatch
self._discovery_db = LibreNMS.DB(self.config) # discovery dispatch
self._lm = self.create_lock_manager()
self.daily_timer = LibreNMS.RecurringTimer(self.config.update_frequency, self.run_maintenance, 'maintenance')
self.stats_timer = LibreNMS.RecurringTimer(self.config.poller.frequency, self.log_performance_stats, 'performance')
self.is_master = False
self.performance_stats = {'poller': PerformanceCounter(), 'discovery': PerformanceCounter(), 'services': PerformanceCounter()}
def attach_signals(self):
info("Attaching signal handlers on thread %s", threading.current_thread().name)
signal(SIGTERM, self.terminate) # capture sigterm and exit gracefully
def start(self):
debug("Performing startup checks...")
if self.config.single_instance:
self.check_single_instance() # don't allow more than one service at a time
if self._started:
raise RuntimeWarning("Not allowed to start Poller twice")
self._started = True
debug("Starting up queue managers...")
# initialize and start the worker pools
self.poller_manager = LibreNMS.QueueManager(self.config, 'poller', self.poll_device)
self.alerting_manager = LibreNMS.TimedQueueManager(self.config, 'alerting', self.poll_alerting,
self.dispatch_alerting)
self.services_manager = LibreNMS.TimedQueueManager(self.config, 'services', self.poll_services,
self.dispatch_services)
self.discovery_manager = LibreNMS.TimedQueueManager(self.config, 'discovery', self.discover_device,
self.dispatch_discovery)
self.billing_manager = LibreNMS.BillingQueueManager(self.config, self.poll_billing,
self.dispatch_poll_billing, self.dispatch_calculate_billing)
self.daily_timer.start()
self.stats_timer.start()
info("LibreNMS Service: {} started!".format(self.config.unique_name))
info("Poller group {}. Using Python {} and {} locks and queues"
.format('0 (default)' if self.config.group == [0] else self.config.group, python_version(),
'redis' if isinstance(self._lm, LibreNMS.RedisLock) else 'internal'))
info("Maintenance tasks will be run every {}".format(timedelta(seconds=self.config.update_frequency)))
# Main dispatcher loop
try:
while not self.terminate_flag:
master_lock = self._lm.lock('dispatch.master', self.config.unique_name, self.config.master_timeout, True)
if master_lock:
if not self.is_master:
info("{} is now the master dispatcher".format(self.config.name))
self.is_master = True
self.start_dispatch_timers()
devices = self.fetch_immediate_device_list()
for device in devices:
device_id = device[0]
group = device[1]
if device[2]: # polling
self.dispatch_immediate_polling(device_id, group)
if device[3]: # discovery
self.dispatch_immediate_discovery(device_id, group)
else:
if self.is_master:
info("{} is no longer the master dispatcher".format(self.config.name))
self.stop_dispatch_timers()
self.is_master = False # no longer master
sleep(self.config.master_resolution)
except KeyboardInterrupt:
pass
info("Dispatch loop terminated")
self.shutdown()
# ------------ Discovery ------------
def dispatch_immediate_discovery(self, device_id, group):
if self.discovery_manager.get_queue(group).empty() and not self.discovery_is_locked(device_id):
self.discovery_manager.post_work(device_id, group)
def dispatch_discovery(self):
devices = self.fetch_device_list()
for device in devices:
self.discovery_manager.post_work(device[0], device[1])
def discover_device(self, device_id):
if self.lock_discovery(device_id):
try:
with TimeitContext.start() as t:
info("Discovering device {}".format(device_id))
self.call_script('discovery.php', ('-h', device_id))
info('Discovery complete {}'.format(device_id))
self.report_execution_time(t.delta(), 'discovery')
except subprocess.CalledProcessError as e:
if e.returncode == 5:
info("Device {} is down, cannot discover, waiting {}s for retry"
.format(device_id, self.config.down_retry))
self.lock_discovery(device_id, True)
else:
self.unlock_discovery(device_id)
else:
self.unlock_discovery(device_id)
# ------------ Alerting ------------
def dispatch_alerting(self):
self.alerting_manager.post_work('alerts', 0)
def poll_alerting(self, _=None):
try:
info("Checking alerts")
self.call_script('alerts.php')
except subprocess.CalledProcessError as e:
if e.returncode == 1:
warning("There was an error issuing alerts: {}".format(e.output))
else:
raise
# ------------ Services ------------
def dispatch_services(self):
devices = self.fetch_services_device_list()
for device in devices:
self.services_manager.post_work(device[0], device[1])
def poll_services(self, device_id):
if self.lock_services(device_id):
try:
with TimeitContext.start() as t:
info("Checking services on device {}".format(device_id))
self.call_script('check-services.php', ('-h', device_id))
info('Services complete {}'.format(device_id))
self.report_execution_time(t.delta(), 'services')
except subprocess.CalledProcessError as e:
if e.returncode == 5:
info("Device {} is down, cannot poll service, waiting {}s for retry"
.format(device_id, self.config.down_retry))
self.lock_services(device_id, True)
else:
self.unlock_services(device_id)
else:
self.unlock_services(device_id)
# ------------ Billing ------------
def dispatch_calculate_billing(self):
self.billing_manager.post_work('calculate', 0)
def dispatch_poll_billing(self):
self.billing_manager.post_work('poll', 0)
def poll_billing(self, run_type):
if run_type == 'poll':
info("Polling billing")
self.call_script('poll-billing.php')
info("Polling billing complete")
else: # run_type == 'calculate'
info("Calculating billing")
self.call_script('billing-calculate.php')
info("Calculating billing complete")
# ------------ Polling ------------
def dispatch_immediate_polling(self, device_id, group):
if self.poller_manager.get_queue(group).empty() and not self.polling_is_locked(device_id):
self.poller_manager.post_work(device_id, group)
if self.config.debug:
cur_time = time.time()
elapsed = cur_time - self.last_poll.get(device_id, cur_time)
self.last_poll[device_id] = time.time()
# arbitrary limit to reduce spam
if elapsed > (self.config.poller.frequency - self.config.master_resolution):
debug("Dispatching polling for device {}, time since last poll {:.2f}s"
.format(device_id, elapsed))
def poll_device(self, device_id):
if self.lock_polling(device_id):
info('Polling device {}'.format(device_id))
try:
with TimeitContext.start() as t:
self.call_script('poller.php', ('-h', device_id))
self.report_execution_time(t.delta(), 'poller')
except subprocess.CalledProcessError as e:
if e.returncode == 6:
warning('Polling device {} unreachable, waiting {}s for retry'.format(device_id, self.config.down_retry))
# re-lock to set retry timer
self.lock_polling(device_id, True)
else:
error('Polling device {} failed! {}'.format(device_id, e))
self.unlock_polling(device_id)
else:
info('Polling complete {}'.format(device_id))
# self.polling_unlock(device_id)
else:
debug('Tried to poll {}, but it is locked'.format(device_id))
def fetch_services_device_list(self):
return self._services_db.query("SELECT DISTINCT(`device_id`), `poller_group` FROM `services`"
" LEFT JOIN `devices` USING (`device_id`) WHERE `disabled`=0")
def fetch_device_list(self):
return self._discovery_db.query("SELECT `device_id`, `poller_group` FROM `devices` WHERE `disabled`=0")
def fetch_immediate_device_list(self):
poller_find_time = self.config.poller.frequency - 1
discovery_find_time = self.config.discovery.frequency - 1
return self._db.query('''SELECT `device_id`,
`poller_group`,
COALESCE(`last_polled` <= DATE_ADD(DATE_ADD(NOW(), INTERVAL -%s SECOND), INTERVAL `last_polled_timetaken` SECOND), 1) AS `poll`,
COALESCE(`last_discovered` <= DATE_ADD(DATE_ADD(NOW(), INTERVAL -%s SECOND), INTERVAL `last_discovered_timetaken` SECOND), 1) AS `discover`
FROM `devices`
WHERE `disabled` = 0 AND (
`last_polled` IS NULL OR
`last_discovered` IS NULL OR
`last_polled` <= DATE_ADD(DATE_ADD(NOW(), INTERVAL -%s SECOND), INTERVAL `last_polled_timetaken` SECOND) OR
`last_discovered` <= DATE_ADD(DATE_ADD(NOW(), INTERVAL -%s SECOND), INTERVAL `last_discovered_timetaken` SECOND)
)
ORDER BY `last_polled_timetaken` DESC''', (poller_find_time, discovery_find_time, poller_find_time, discovery_find_time))
def run_maintenance(self):
"""
Runs update and cleanup tasks by calling daily.sh. Reloads the python script after the update.
Sets a schema-update lock so no distributed pollers will update until the schema has been updated.
"""
attempt = 0
wait = 5
max_runtime = 86100
max_tries = int(max_runtime / wait)
info("Waiting for schema lock")
while not self._lm.lock('schema-update', self.config.unique_name, max_runtime):
attempt += 1
if attempt >= max_tries: # don't get stuck indefinitely
warning('Reached max wait for other pollers to update, updating now')
break
sleep(wait)
info("Running maintenance tasks")
output = self.call_script('daily.sh')
info("Maintenance tasks complete\n{}".format(output))
self.restart()
# Lock Helpers #
def lock_discovery(self, device_id, retry=False):
lock_name = self.gen_lock_name('discovery', device_id)
timeout = self.config.down_retry if retry else LibreNMS.normalize_wait(self.config.discovery.frequency)
return self._lm.lock(lock_name, self.gen_lock_owner(), timeout, retry)
def unlock_discovery(self, device_id):
lock_name = self.gen_lock_name('discovery', device_id)
return self._lm.unlock(lock_name, self.gen_lock_owner())
def discovery_is_locked(self, device_id):
lock_name = self.gen_lock_name('discovery', device_id)
return self._lm.check_lock(lock_name)
def lock_polling(self, device_id, retry=False):
lock_name = self.gen_lock_name('polling', device_id)
timeout = self.config.down_retry if retry else self.config.poller.frequency
return self._lm.lock(lock_name, self.gen_lock_owner(), timeout, retry)
def unlock_polling(self, device_id):
lock_name = self.gen_lock_name('polling', device_id)
return self._lm.unlock(lock_name, self.gen_lock_owner())
def polling_is_locked(self, device_id):
lock_name = self.gen_lock_name('polling', device_id)
return self._lm.check_lock(lock_name)
def lock_services(self, device_id, retry=False):
lock_name = self.gen_lock_name('services', device_id)
timeout = self.config.down_retry if retry else self.config.services.frequency
return self._lm.lock(lock_name, self.gen_lock_owner(), timeout, retry)
def unlock_services(self, device_id):
lock_name = self.gen_lock_name('services', device_id)
return self._lm.unlock(lock_name, self.gen_lock_owner())
def services_is_locked(self, device_id):
lock_name = self.gen_lock_name('services', device_id)
return self._lm.check_lock(lock_name)
@staticmethod
def gen_lock_name(lock_class, device_id):
return '{}.device.{}'.format(lock_class, device_id)
def gen_lock_owner(self):
return "{}-{}".format(self.config.unique_name, threading.current_thread().name)
def call_script(self, script, args=()):
"""
Run a LibreNMS script. Captures all output and throws an exception if a non-zero
status is returned. Blocks parent signals (like SIGINT and SIGTERM).
:param script: the name of the executable relative to the base directory
:param args: a tuple of arguments to send to the command
:returns the output of the command
"""
if script.endswith('.php'):
# save calling the sh process
base = ('/usr/bin/env', 'php')
else:
base = ()
cmd = base + ("{}/{}".format(self.config.BASE_DIR, script),) + tuple(map(str, args))
# preexec_fn=os.setsid here keeps process signals from propagating
return subprocess.check_output(cmd, stderr=subprocess.STDOUT, preexec_fn=os.setsid, close_fds=True).decode()
def create_lock_manager(self):
"""
Create a new LockManager. Tries to create a Redis LockManager, but falls
back to python's internal threading lock implementation.
Exits if distributing poller is enabled and a Redis LockManager cannot be created.
:return: Instance of LockManager
"""
try:
return LibreNMS.RedisLock(namespace='librenms.lock',
host=self.config.redis_host,
port=self.config.redis_port,
db=self.config.redis_db,
password=self.config.redis_pass,
unix_socket_path=self.config.redis_socket)
except ImportError:
if self.config.distributed:
critical("ERROR: Redis connection required for distributed polling")
critical("Please install redis-py, either through your os software repository or from PyPI")
sys.exit(2)
except Exception as e:
if self.config.distributed:
critical("ERROR: Redis connection required for distributed polling")
critical("Could not connect to Redis. {}".format(e))
sys.exit(2)
return LibreNMS.ThreadingLock()
def restart(self):
"""
Stop then recreate this entire process by re-calling the original script.
Has the effect of reloading the python files from disk.
"""
if sys.version_info < (3, 4, 0):
warning("Skipping restart as running under an incompatible interpreter")
warning("Please restart manually")
return
info('Restarting service... ')
self._stop_managers_and_wait()
self._lm.unlock('dispatch.master', self.config.unique_name)
python = sys.executable
os.execl(python, python, *sys.argv)
def terminate(self, _unused=None, _=None):
"""
Handle a set the terminate flag to begin a clean shutdown
:param _unused:
:param _:
"""
info("Received SIGTERM on thead %s, handling", threading.current_thread().name)
self.terminate_flag = True
def shutdown(self, _unused=None, _=None):
"""
Stop and exit, waiting for all child processes to exit.
:param _unused:
:param _:
"""
info('Shutting down, waiting for running jobs to complete...')
self.stop_dispatch_timers()
self._lm.unlock('dispatch.master', self.config.unique_name)
self.daily_timer.stop()
self.stats_timer.stop()
self._stop_managers_and_wait()
# try to release master lock
info('Shutdown of %s/%s complete', os.getpid(), threading.current_thread().name)
sys.exit(0)
def start_dispatch_timers(self):
"""
Start all dispatch timers and begin pushing events into queues.
This should only be started when we are the master dispatcher.
"""
self.alerting_manager.start_dispatch()
self.billing_manager.start_dispatch()
self.services_manager.start_dispatch()
self.discovery_manager.start_dispatch()
def stop_dispatch_timers(self):
"""
Stop all dispatch timers, this should be called when we are no longer the master dispatcher.
"""
self.alerting_manager.stop_dispatch()
self.billing_manager.stop_dispatch()
self.services_manager.stop_dispatch()
self.discovery_manager.stop_dispatch()
def _stop_managers_and_wait(self):
"""
Stop all QueueManagers, and wait for their processing threads to complete.
We send the stop signal to all QueueManagers first, then wait for them to finish.
"""
self.discovery_manager.stop()
self.poller_manager.stop()
self.services_manager.stop()
self.billing_manager.stop()
self.discovery_manager.stop_and_wait()
self.poller_manager.stop_and_wait()
self.services_manager.stop_and_wait()
self.billing_manager.stop_and_wait()
def check_single_instance(self):
"""
Check that there is only one instance of the service running on this computer.
We do this be creating a file in the base directory (.lock.service) if it doesn't exist and
obtaining an exclusive lock on that file.
"""
lock_file = "{}/{}".format(self.config.BASE_DIR, '.lock.service')
import fcntl
self._fp = open(lock_file, 'w') # keep a reference so the file handle isn't garbage collected
self._fp.flush()
try:
fcntl.lockf(self._fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
except IOError:
warning("Another instance is already running, quitting.")
exit(2)
def report_execution_time(self, time, activity):
self.performance_stats[activity].add(time)
def log_performance_stats(self):
info("Counting up time spent polling")
try:
# Report on the poller instance as a whole
self._db.query('INSERT INTO poller_cluster(node_id, poller_name, poller_version, poller_groups, last_report, master) '
'values("{0}", "{1}", "{2}", "{3}", NOW(), {4}) '
'ON DUPLICATE KEY UPDATE poller_version="{2}", poller_groups="{3}", last_report=NOW(), master={4}; '
.format(self.config.node_id, self.config.name, "librenms-service", ','.join(str(g) for g in self.config.group), 1 if self.is_master else 0))
# Find our ID
self._db.query('SELECT id INTO @parent_poller_id FROM poller_cluster WHERE node_id="{0}"; '.format(self.config.node_id))
for worker_type, counter in self.performance_stats.items():
worker_seconds, devices = counter.reset()
# Record the queue state
self._db.query('INSERT INTO poller_cluster_stats(parent_poller, poller_type, depth, devices, worker_seconds, workers, frequency) '
'values(@parent_poller_id, "{0}", {1}, {2}, {3}, {4}, {5}) '
'ON DUPLICATE KEY UPDATE depth={1}, devices={2}, worker_seconds={3}, workers={4}, frequency={5}; '
.format(worker_type,
sum([getattr(self, ''.join([worker_type, '_manager'])).get_queue(group).qsize() for group in self.config.group]),
devices,
worker_seconds,
getattr(self.config, worker_type).workers,
getattr(self.config, worker_type).frequency)
)
except Exception:
exception("Unable to log performance statistics - is the database still online?")

View File

@ -7,10 +7,11 @@
*
*/
use LibreNMS\Config;
$init_modules = array();
require __DIR__ . '/includes/init.php';
if (isCli()) {
global $config;
echo json_encode($config);
echo Config::json_encode();
}

View File

@ -125,7 +125,7 @@ if (!empty($config['distributed_poller_group'])) {
global $device;
foreach (dbFetch("SELECT * FROM `devices` WHERE disabled = 0 AND snmp_disable = 0 $where ORDER BY device_id DESC", $sqlparams) as $device) {
discover_device($device, $options);
$discovered_devices += discover_device($device, $options);
}
$end = microtime(true);
@ -152,3 +152,8 @@ if (!isset($options['q'])) {
}
logfile($string);
if ($discovered_devices == 0) {
# No discoverable devices, either down or disabled
exit(5);
}

View File

@ -1,41 +1,136 @@
source: Extensions/Poller-Service.md
# Poller Service
# WARNING: THIS IS HIGHLY EXPERIMENTAL AND MAY NOT WORK
> Status: BETA
The Poller service is an alternative to polling and discovery cron jobs and provides support for distributed polling without memcache. It is multi-threaded and runs continuously discovering and polling devices with the oldest data attempting to honor the polling frequency configured in `config.php`. This service replaces all the required cron jobs except for `/opt/librenms/daily.sh` and `/opt/librenms/alerts.php`.
The new poller service (`librenms-service.py`) replaces the old poller service (`poller-service.py`), improving its reliability. It's mostly compatible with the old service, but testing is recommended before switching over.
Configure the maximum number of threads for the service in `$config['poller_service_workers']`. Configure the minimum desired polling frequency in `$config['poller_service_poll_frequency']` and the minimum desired discovery frequency in `$config['poller_service_discover_frequency']`. The service will not poll or discover devices which have data newer than this this configured age in seconds. Configure how frequently the service will attempt to poll devices which are down in `$config['poller_service_down_retry']`. If you have enough pollers that the worker threads run out of work, the service will query looking for devices every `$config['poller_service_retry_query']` seconds.
If you are currently using the old poller service, it's strongly recommended that you migrate away - it has a serious defect under certain versions of mysql/mariadb, and may be inadvertently DoS'ing your devices. The new service does not have this issue,
The poller service is designed to gracefully degrade. If not all devices can be polled within the configured frequency, the service will continuously poll devices refreshing as frequently as possible using the configured number of threads.
Make sure you uninstall the old poller service before deploying the new one.
The service logs to syslog. A loglevel of INFO will print status updates every 5 minutes. Loglevel of DEBUG will print updates on every device as it is scanned.
## External Requirements
#### A recent version of Python
The poller service won't work under Python 2.7+; some features require behaviour only found in Python3.4+.
## Configuration
```php
// Poller-Service settings
$config['poller_service_loglevel'] = "INFO";
$config['poller_service_workers'] = 16;
$config['poller_service_poll_frequency'] = 300;
$config['poller_service_discover_frequency'] = 21600;
$config['poller_service_down_retry'] = 60;
$config['poller_service_retry_query'] = 1;
$config['poller_service_single_connection'] = false;
#### Python modules
- PyMySQL is recommended as it requires no C compiler to install. MySQLclient can also be used, but does require compilation.
- python-dotenv .env loader
- redis-py (if using distributed polling)
These can be obtained from your OS package manager, or from PyPI with the below commands. (You ma)
```bash
pip3 install -r requirements.txt
```
## Distributed Polling
Distributed polling is possible, and uses the same configuration options as are described for traditional distributed polling, except that the memcached options are not necessary. The database must be accessible from the distributed pollers, and properly configured. Remote access to the RRD directory must also be configured as described in the Distributed Poller documentation. Memcache is not required. Concurrency is managed using mysql GET_LOCK to ensure that devices are only being polled by one device at at time. The poller service is compatible with poller groups.
#### Redis (distributed polling only)
If you want to use distributed polling, you'll need a redis instance to coordinate the nodes. It's recommeded that you do not share the redis database with any other system - by default, redis supports up to 16 databases (numbered 0-15).
## Multi-Master MySQL considerations
Because locks are not replicated in Multi-Master MySQL configurations, if you are using such a configuration, you will need to make sure that all pollers are using the same MySQL server.
It's strongly recommended that you deploy a resilient cluster of redis systems, and use redis-sentinel.
## Single Connection
If you are running MariaDB 10.2 or newer, you can tell poller-service to use a single mysql connection for managing locks by setting `$config['poller_service_single_connection']` to `true`. *DO NOT* configure this for any version of MariaDB less than 10.2 or any version of MySQL.
#### MySQL
You should already have this, but the pollers do need access to the SQL database. The poller service runs much faster and more aggressively than the standard poller, so keep an eye on the number of open connections and other important health metrics.
## Configuration
Connection settings are required in `.env`. The `.env` file is generated after composer install and APP_KEY and NODE_ID are set.
```dotenv
#APP_KEY= #Required, generated by composer install
#NODE_ID= #Required, generated by composer install
DB_HOST=localhost
DB_DATABASE=librenms
DB_USERNAME=librenms
DB_PASSWORD=
```
### Distributed Polling Configuration
Once you have your redis database set up, configure it in the .env file on each node.
```dotenv
REDIS_HOST=127.0.0.1
#REDIS_DB=0
#REDIS_PASSWORD=
#REDIS_PORT=6379
```
### Basic Configuration
Additional configuration settings can be set in `config.php` or directly into the database.
The defaults are shown here - it's recommended that you at least tune the number of workers.
```php
$config['service_poller_workers'] = 24; # Processes spawned for polling
$config['service_services_workers'] = 8; # Processes spawned for service polling
$config['service_discovery_workers'] = 16; # Processes spawned for discovery
//Optional Settings
$config['service_poller_frequency'] = 300; # Seconds between polling attempts
$config['service_services_frequency'] = 300; # Seconds between service polling attempts
$config['service_discovery_frequency'] = 21600; # Seconds between discovery runs
$config['service_billing_frequency'] = 300; # Seconds between billing calculations
$config['service_billing_calculate_frequency'] = 60; # Billing interval
$config['service_poller_down_retry'] = 60; # Seconds between failed polling attempts
$config['service_loglevel'] = 'INFO'; # Must be one of 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
$config['service_update_frequency'] = 86400; # Seconds between LibreNMS update checks
```
There are also some SQL options, but these should be inherited from your LibreNMS web UI configuration.
Logs are sent to the system logging service (usually `journald` or `rsyslog`) - see https://docs.python.org/3/library/logging.html#logging-levels for the options available.
You should not rely on the password for the security of your system. See https://redis.io/topics/security
```php
distributed_poller = true; # Set to true to enable distributed polling
distributed_poller_name = null; # Uniquely identifies the poller instance
distributed_poller_group = 0; # Which group to poll
```
## Cron Scripts
Once the poller service is installed, the cron scripts used by LibreNMS are no longer required and must be removed.
## Service Installation
### Upstart
An upstart configuration file can be found in `scripts/librenms-poller-service.conf`. To install run `cp /opt/librenms/scripts/librenms-poller-service.conf /etc/init/librenms-poller-service.conf`. The service will start on boot and can be started manually by running `start librenms-poller-service`. If you receive an error that the service does not exist, run `initctl reload-configuration`. The service is configured to run as the user `librenms` and will fail if that user does not exist.
### LSB
An LSB init script can be found in `scripts/librenms-poller-service.init`. To install run `cp /opt/librenms/scripts/librenms-poller-service.init /etc/init.d/librenms-poller-service && update-rc.d librenms-poller-service defaults`.
A systemd unit file is provided - the sysv and upstart init scripts could also be used with a little modification.
### systemd
A systemd unit file can be found in `scripts/librenms-poller-service.service`. To install run `cp /opt/librenms/scripts/librenms-poller-service.service /etc/systemd/system/librenms-poller-service.service && systemctl enable --now librenms-poller-service.service`.
A systemd unit file can be found in `misc/librenms.service`. To install run `cp /opt/librenms/misc/librenms.service /etc/systemd/system/librenms.service && systemctl enable --now librenms.service`
## OS-Specific Instructions
### RHEL/CentOS
To get the poller service running under python3.4+ on RHEL-derivatives with minimal fuss, you can use the software collections build:
First, enable SCL's on your system:
#### CentOS 7
```
# yum install centos-release-scl
```
#### RHEL 7
```
# subscription-manager repos --enable rhel-server-rhscl-7-rpms
```
Then install and configure the runtime and service:
```
# yum install rh-python36 epel-release
# yum install redis
# vi /opt/librenms/config.php
# vi /etc/redis.conf
# systemctl enable --now redis.service
# scl enable rh-python36 bash
# pip install pymysql redis
# cp /opt/librenms/misc/librenms.service.scl /etc/systemd/system/librenms.service
# systemctl enable --now librenms.service
```
If you want to use another version of python 3, change `rh-python36` in the unit file and the commands above to match the name of the replacement scl.

View File

@ -0,0 +1,44 @@
<?php
/**
* delete-cluster-poller.inc.php
*
* Handle poller delete request
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @package LibreNMS
* @link http://librenms.org
* @copyright 2017 Tony Murray
* @author Tony Murray <murraytony@gmail.com>
*/
use LibreNMS\Authentication\Auth;
if (!Auth::user()->hasGlobalAdmin()) {
$status = array('status' =>1, 'message' => 'ERROR: You need to be admin to delete poller entries');
} else {
$id = $vars['id'];
if (!is_numeric($id)) {
$status = array('status' =>1, 'message' => 'No poller has been selected');
} else {
$poller_name = dbFetchCell('SELECT `poller_name` FROM `pollers` WHERE `id`=?', array($id));
if (dbDelete('poller_cluster', 'id=?', array($id)) && dbDelete('poller_cluster_stats', 'parent_poller=?', array($id))) {
$status = array('status' => 0, 'message' => "Poller: <i>$poller_name ($id), has been deleted.</i>");
} else {
$status = array('status' => 1, 'message' => "Poller: <i>$poller_name ($id), has NOT been deleted.</i>");
}
}
}
header('Content-Type: application/json');
echo _json_encode($status);

View File

@ -45,6 +45,7 @@ if (Auth::user()->hasGlobalAdmin()) {
data-target="poller-removal">Delete
</button>
<input type="hidden" name="id" id="id" value="">
<input type="hidden" name="pollertype" id="pollertype" value="">
<input type="hidden" name="confirm" id="confirm" value="yes">
</form>
</div>
@ -55,16 +56,19 @@ if (Auth::user()->hasGlobalAdmin()) {
<script>
$('#confirm-delete').on('show.bs.modal', function (e) {
id = $(e.relatedTarget).data('id');
pollertype = $(e.relatedTarget).data('pollertype');
$("#id").val(id);
$("#pollertype").val(pollertype);
});
$('#poller-removal').click('', function (e) {
e.preventDefault();
var id = $("#id").val();
var pollertype = $("#pollertype").val();
$.ajax({
type: 'POST',
url: 'ajax_form.php',
data: {type: "delete-poller", id: id},
data: {type: pollertype, id: id},
success: function (result) {
if (result.status == 0) {
toastr.success(result.message);

View File

@ -19,49 +19,153 @@ require_once 'includes/modal/delete_poller.inc.php';
?>
<br />
<div class="table-responsive">
<table class="table table-striped table-bordered table-hover table-condensed">
<tr>
<th>Poller Name</th>
<th>Devices Polled</th>
<th>Total Poll Time</th>
<th>Last Ran</th>
<th>Actions</th>
</tr>
<?php
$query = 'SELECT *,UNIX_TIMESTAMP(NOW()) AS `now`, UNIX_TIMESTAMP(`last_polled`) AS `then` FROM `pollers` ORDER BY poller_name';
$rows = dbFetchRows($query);
foreach (dbFetchRows($query) as $poller) {
$old = ($poller['now'] - $poller['then']);
$step = Config::get('rrd.step', 300);
if (count($rows) !== 0) {
echo '
<h2>Standard Distributed Pollers</h2>
if ($old >= $step) {
$row_class = 'danger';
} elseif ($old >= ($step * 0.95)) {
$row_class = 'warning';
} else {
$row_class = 'success';
}
<div class="table-responsive">
<table class="table table-striped table-bordered table-hover table-condensed">
<tr>
<th>Poller Name</th>
<th>Devices Polled</th>
<th>Total Poll Time</th>
<th>Last Ran</th>
<th>Actions</th>
</tr>';
$actions = "";
if (Auth::user()->hasGlobalAdmin() && $old > ($step * 2)) {
// missed 2 polls show delete button
$actions .= "<button type='button' class='btn btn-danger btn-sm' aria-label='Delete' data-toggle='modal' data-target='#confirm-delete' data-id='{$poller['id']}' name='delete-poller'><i class='fa fa-trash' aria-hidden='true'></i></button>";
foreach ($rows as $poller) {
$old = ($poller['now'] - $poller['then']);
$step = Config::get('rrd.step', 300);
if ($old >= $step) {
$row_class = 'danger';
} elseif ($old >= ($step * 0.95)) {
$row_class = 'warning';
} else {
$row_class = 'success';
}
$actions = "";
if (Auth::user()->hasGlobalAdmin() && $old > ($step * 2)) {
// missed 2 polls show delete button
$actions .= "<button type='button' class='btn btn-danger btn-sm' aria-label='Delete' data-toggle='modal' data-target='#confirm-delete' data-id='{$poller['id']}' data-pollertype='delete-poller' name='delete-poller'><i class='fa fa-trash' aria-hidden='true'></i></button>";
}
echo '
<tr class="'.$row_class.'" id="row_' . $poller['id'] . '">
<td>'.$poller['poller_name'].'</td>
<td>'.$poller['devices'].'</td>
<td>'.$poller['time_taken'].' Seconds</td>
<td>'.$poller['last_polled'].'</td>
<td>'.$actions.'</td>
</tr>
';
}
echo '
<tr class="'.$row_class.'" id="row_' . $poller['id'] . '">
<td>'.$poller['poller_name'].'</td>
<td>'.$poller['devices'].'</td>
<td>'.$poller['time_taken'].' Seconds</td>
<td>'.$poller['last_polled'].'</td>
<td>'.$actions.'</td>
</tr>
';
</table>
</div>';
}
?>
$query = 'SELECT *,UNIX_TIMESTAMP(NOW()) AS `now`, UNIX_TIMESTAMP(`last_report`) AS `then` FROM `poller_cluster` ORDER BY poller_name';
$rows = dbFetchRows($query);
</table>
</div>
if (count($rows) !== 0) {
echo '
<h2>Poller Cluster Health</h2>
<div class="table-responsive">
<table class="table table-striped table-bordered table-condensed">
<tr>
<th>Name</th>
<th>Node ID</th>
<th>Version</th>
<th>Groups Served</th>
<th>Last Checkin</th>
<th>Cluster Master</th>
<th>Job</th>
<th>Workers</th>
<th>Devices Actioned<br><small>Last Interval</small></th>
<th>Devices Pending</th>
<th>Worker Seconds<br><small>Consumed/Maximum</small></th>
<th>Actions</th>
</tr>';
foreach ($rows as $poller) {
$old = ($poller['now'] - $poller['then']);
$step = Config::get('rrd.step', 300);
if ($old >= $step) {
$row_class = 'danger';
} elseif ($old >= ($step * 0.95)) {
$row_class = 'warning';
} else {
$row_class = 'success';
}
$actions = "";
if (Auth::user()->hasGlobalAdmin() && $old > ($step * 2)) {
// missed 2 polls show delete button
$actions .= "<button type='button' class='btn btn-danger btn-sm' aria-label='Delete' data-toggle='modal' data-target='#confirm-delete' data-id='{$poller['id']}' data-pollertype='delete-cluster-poller' name='delete-cluster-poller'><i class='fa fa-trash' aria-hidden='true'></i></button>";
}
$stat_query = 'SELECT * FROM `poller_cluster_stats` WHERE `parent_poller`=' . $poller['id'] . ';';
$stat_row = dbFetchRows($stat_query);
$stat_count = count($stat_row);
$first_row = true;
foreach ($stat_row as $stats) {
$health_query = 'SELECT * FROM `poller_cluster_queue_health` WHERE `parent_poller`=' . $poller['id'] . ' AND `poller_type`="' . $stats['poller_type'] . '";';
$pending = "";
foreach (dbFetchRows($health_query) as $health) {
$pending .= '<small>Group ' . $health['poller_group'] . ': ' . $health['depth'] . '</small><br>';
}
// Emit the row container
echo '<tr class="'.$row_class.'" id="row_' . $poller['id'] . '">';
if ($first_row) {
// On the first iteration, print some rowspanned columns
echo '
<td rowspan="'.$stat_count.'">'.$poller['poller_name'].'</td>
<td rowspan="'.$stat_count.'"' . (empty($poller['node_id']) ? ' class="danger"' : '') . '>'.$poller['node_id'].'</td>
<td rowspan="'.$stat_count.'">'.$poller['poller_version'].'</td>
<td rowspan="'.$stat_count.'">'.$poller['poller_groups'].'</td>
<td rowspan="'.$stat_count.'">'.$poller['last_report'].'</td>
<td rowspan="'.$stat_count.'">'. ($poller['master'] ? "Yes" : "No") .'</td>';
}
// Emit the job stats
echo '
<td>'.$stats['poller_type'].'</td>
<td>'.$stats['workers'].'</td>
<td>'.$stats['devices'].'</td>
<td>'.$pending.'</td>
<td>'.$stats['worker_seconds'].' / '.$stats['frequency']*$stats['workers'].'</td>';
if ($first_row) {
// On the first iteration, print some rowspanned columns
echo '<td rowspan="'.$stat_count.'">'.$actions.'</td>';
}
// End the row
echo '</tr>';
$first_row = false;
}
}
echo '
</table>
<small>
Worker seconds indicates the maximum polling throughput a node can achieve in perfect conditions. If the consumed is close to the maximum, consider adding more threads, or better tuning your groups.<br>
If there are devices pending but consumed worker seconds is low, your hardware is not sufficient for the number of devices and the poller cannot reach maximum throughput.
</small>
</div>';
}
?>

View File

@ -912,8 +912,8 @@ function ExtTransports($obj)
echo "ERROR: $tmp\r\n";
log_event('Could not issue ' . $prefix[$obj['state']] . " for rule '" . $obj['name'] . "' to transport '" . $transport . "' Error: " . $tmp, $obj['device_id'], 'error', 5);
}
echo '; ';
}
echo '; ';
}
}//end ExtTransports()

View File

@ -113,7 +113,7 @@ function load_discovery(&$device)
function discover_device(&$device, $options = null)
{
if ($device['snmp_disable'] == '1') {
return;
return 0;
}
global $valid;
@ -131,7 +131,7 @@ function discover_device(&$device, $options = null)
$response = device_is_up($device, true);
if ($response['status'] !== '1') {
return;
return 0;
}
if ($device['os'] == 'generic') {
@ -207,10 +207,8 @@ function discover_device(&$device, $options = null)
echo "Discovered in $device_time seconds\n";
global $discovered_devices;
echo "\n";
$discovered_devices++;
echo PHP_EOL;
return 1;
}
//end discover_device()

View File

@ -218,10 +218,17 @@ function record_sensor_data($device, $all_sensors)
}
}
/**
* @param $device
* @param $options
* @return bool
*/
function poll_device($device, $options)
{
global $config, $device;
$device_start = microtime(true);
$attribs = get_dev_attribs($device['device_id']);
$device['attribs'] = $attribs;
@ -231,7 +238,7 @@ function poll_device($device, $options)
$device['snmp_max_oid'] = $attribs['snmp_max_oid'];
unset($array);
$device_start = microtime(true);
// Start counting device poll time
echo 'Hostname: ' . $device['hostname'] . PHP_EOL;
echo 'Device ID: ' . $device['device_id'] . PHP_EOL;
@ -336,7 +343,8 @@ function poll_device($device, $options)
// Update device_groups
UpdateGroupsForDevice($device['device_id']);
if (!isset($options['m'])) {
if (!$force_module && !empty($graphs)) {
echo "Enabling graphs: ";
// FIXME EVENTLOGGING -- MAKE IT SO WE DO THIS PER-MODULE?
// This code cycles through the graphs already known in the database and the ones we've defined as being polled here
// If there any don't match, they're added/deleted from the database.
@ -390,11 +398,18 @@ function poll_device($device, $options)
data_update($device, 'poller-perf', $tags, $fields);
}
$update_array['last_polled'] = array('NOW()');
$update_array['last_polled_timetaken'] = $device_time;
if (!$force_module) {
// don't update last_polled time if we are forcing a specific module to be polled
$update_array['last_polled'] = array('NOW()');
$update_array['last_polled_timetaken'] = $device_time;
}
// echo("$device_end - $device_start; $device_time $device_run");
echo "Polled in $device_time seconds\n";
$updated = dbUpdate($update_array, 'devices', '`device_id` = ?', array($device['device_id']));
if ($updated) {
d_echo('Updating ' . $device['hostname'] . PHP_EOL);
}
echo "\nPolled in $device_time seconds\n";
// check if the poll took to long and log an event
if ($device_time > $config['rrd']['step']) {
@ -402,18 +417,15 @@ function poll_device($device, $options)
' minutes! This will cause gaps in graphs.', $device, 'system', 5);
}
d_echo('Updating '.$device['hostname']."\n");
$updated = dbUpdate($update_array, 'devices', '`device_id` = ?', array($device['device_id']));
if ($updated) {
echo "UPDATED!\n";
}
unset($storage_cache);
// Clear cache of hrStorage ** MAYBE FIXME? **
unset($cache);
// Clear cache (unify all things here?)
}//end if
return true; // device was polled
}
return false; // device not polled
}//end poll_device()
/**

48
librenms-service.py Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env python
import argparse
import logging
import os
import sys
import threading
import LibreNMS
from logging import info
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LibreNMS Service - manages polling and other periodic processes')
parser.add_argument('-g', '--group', type=int, help="Set the poller group for this poller")
parser.add_argument('-v', '--verbose', action='count', help="Show verbose output.")
parser.add_argument('-d', '--debug', action="store_true", help="Show debug output.")
parser.add_argument('-m', '--multiple', action="store_true", help="Allow multiple instances of the service.")
parser.add_argument('-t', '--timestamps', action="store_true", help="Include timestamps in the logs (not normally needed for syslog/journald")
args = parser.parse_args()
if args.timestamps:
logging.basicConfig(format='%(asctime)s %(threadName)s(%(levelname)s):%(message)s')
else:
logging.basicConfig(format='%(threadName)s(%(levelname)s):%(message)s')
if args.verbose:
logging.getLogger().setLevel(logging.INFO)
if args.debug:
logging.getLogger().setLevel(logging.DEBUG)
info("Configuring LibreNMS service")
try:
service = LibreNMS.Service()
except Exception as e:
# catch any initialization errors and print the message instead of a stack trace
print(e)
sys.exit(2)
service.config.single_instance = args.multiple
if args.group:
service.config.group = [ args.group ]
info('Entering main LibreNMS service loop on {}/{}...'.format(os.getpid(), threading.current_thread().name))
service.start()

View File

@ -1062,6 +1062,31 @@ pollers:
Indexes:
PRIMARY: { Name: PRIMARY, Columns: [poller_name], Unique: true, Type: BTREE }
id: { Name: id, Columns: [id], Unique: false, Type: BTREE }
poller_cluster:
Columns:
- { Field: id, Type: int(11), 'Null': false, Extra: auto_increment }
- { Field: node_id, Type: varchar(255), 'Null': false, Extra: '' }
- { Field: poller_name, Type: varchar(255), 'Null': false, Extra: '' }
- { Field: poller_version, Type: varchar(255), 'Null': false, Extra: '', Default: '' }
- { Field: poller_groups, Type: varchar(255), 'Null': false, Extra: '', Default: '' }
- { Field: last_report, Type: datetime, 'Null': false, Extra: '' }
- { Field: master, Type: tinyint(1), 'Null': false, Extra: '' }
Indexes:
PRIMARY: { Name: PRIMARY, Columns: [node_id], Unique: true, Type: BTREE }
id: { Name: id, Columns: [id], Unique: true, Type: BTREE }
poller_cluster_stats:
Columns:
- { Field: id, Type: int(11), 'Null': false, Extra: auto_increment }
- { Field: parent_poller, Type: varchar(255), 'Null': false, Extra: '', Default: '' }
- { Field: poller_type, Type: varchar(255), 'Null': false, Extra: '', Default: '' }
- { Field: depth, Type: 'int(11) unsigned', 'Null': false, Extra: '' }
- { Field: devices, Type: 'int(11) unsigned', 'Null': false, Extra: '' }
- { Field: worker_seconds, Type: 'double unsigned', 'Null': false, Extra: '' }
- { Field: workers, Type: 'int(11) unsigned', 'Null': false, Extra: '' }
- { Field: frequency, Type: 'int(11) unsigned', 'Null': false, Extra: '' }
Indexes:
PRIMARY: { Name: PRIMARY, Columns: [parent_poller, poller_type], Unique: true, Type: BTREE }
id: { Name: id, Columns: [id], Unique: true, Type: BTREE }
poller_groups:
Columns:
- { Field: id, Type: int(11), 'Null': false, Extra: auto_increment }

View File

@ -1,9 +1,9 @@
[Unit]
[Unit]
Description=LibreNMS SNMP Poller Service
After=network.target
After=network.target
[Service]
ExecStart=/opt/librenms/poller-service.py
[Service]
ExecStart=/opt/librenms/librenms-service.py -v
WorkingDirectory=/opt/librenms
User=librenms
Group=librenms

14
misc/librenms.service.scl Normal file
View File

@ -0,0 +1,14 @@
[Unit]
Description=LibreNMS SNMP Poller Service
After=network.target
[Service]
ExecStart=/usr/bin/scl enable rh-python36 -- /opt/librenms/librenms-service.py -v
WorkingDirectory=/opt/librenms
User=librenms
Group=librenms
RestartSec=2
Restart=always
[Install]
WantedBy=multi-user.target

View File

@ -146,6 +146,7 @@ rrdtool_initialize();
echo "Starting polling run:\n\n";
$polled_devices = 0;
$unreachable_devices = 0;
if (!isset($query)) {
$query = "SELECT * FROM `devices` WHERE `disabled` = 0 $where ORDER BY `device_id` ASC";
}
@ -156,7 +157,11 @@ foreach (dbFetch($query) as $device) {
} else {
$device['vrf_lite_cisco'] = '';
}
poll_device($device, $options);
if (!poll_device($device, $options)) {
$unreachable_devices++;
}
echo "#### Start Alerts ####\n";
RunRules($device['device_id']);
echo "#### End Alerts ####\r\n";
@ -172,7 +177,14 @@ if ($graphite !== false) {
}
if ($polled_devices) {
dbInsert(array('type' => 'poll', 'doing' => $doing, 'start' => $poller_start, 'duration' => $poller_time, 'devices' => $polled_devices, 'poller' => $config['distributed_poller_name'] ), 'perf_times');
dbInsert(array(
'type' => 'poll',
'doing' => $doing,
'start' => $poller_start,
'duration' => $poller_time,
'devices' => $polled_devices,
'poller' => $config['distributed_poller_name']
), 'perf_times');
}
$string = $argv[0]." $doing ".date($config['dateformat']['compact'])." - $polled_devices devices polled in $poller_time secs";
@ -187,3 +199,9 @@ rrdtool_close();
unset($config);
// Remove this for testing
// print_r(get_defined_vars());
if ($polled_devices === $unreachable_devices) {
exit(6);
}
exit(0);

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
PyMySQL
python-dotenv
redis

View File

@ -1,26 +0,0 @@
# poller-service - SNMP polling service for LibreNMS
description "SNMP polling service for LibreNMS"
author "Clint Armstrong <clint@clintarmstrong.net>"
# When to start the service
start on runlevel [2345]
# When to stop the service
stop on runlevel [016]
# Automatically restart process if crashed
respawn
# Restart an unlimited amount of times
respawn limit unlimited
chdir /opt/librenms
setuid librenms
setgid librenms
# Start the process
exec /opt/librenms/poller-service.py
# Wait 60 seconds before restart
post-stop exec sleep 60

View File

@ -1,79 +0,0 @@
### BEGIN INIT INFO
# Provides: librenms-poller-service
# Required-Start: networking
# Required-Stop: networking
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: The LibreNMS poller-service daemon
# Description: The LibreNMS poller-service daemon
# This polls devices monitored by LibreNMS
### END INIT INFO
. /lib/lsb/init-functions
NAME=librenms-poller-service
DAEMON=/opt/librenms/poller-service.py
USER=librenms
PIDFILE=/var/run/librenms-poller-service.pid
test -x $DAEMON || exit 5
case $1 in
start)
# Checked the PID file exists and check the actual status of process
if [ -e $PIDFILE ]; then
status_of_proc -p $PIDFILE $DAEMON "$NAME process" && status="0" || status="$?"
# If the status is SUCCESS then don't need to start again.
if [ $status = "0" ]; then
exit # Exit
fi
fi
# Start the daemon.
log_daemon_msg "Starting the process" "$NAME"
# Start the daemon with the help of start-stop-daemon
# Log the message appropriately
if start-stop-daemon --start --quiet --oknodo --make-pidfile --pidfile $PIDFILE --exec $DAEMON --chuid $USER --background; then
log_end_msg 0
else
log_end_msg 1
fi
;;
stop)
# Stop the daemon.
if [ -e $PIDFILE ]; then
status_of_proc -p $PIDFILE $DAEMON "Stoppping the $NAME process" && status="0" || status="$?"
if [ "$status" = 0 ]; then
start-stop-daemon --stop --quiet --oknodo --pidfile $PIDFILE
/bin/rm -rf $PIDFILE
fi
else
log_daemon_msg "$NAME process is not running"
log_end_msg 0
fi
;;
restart)
# Restart the daemon.
$0 stop && sleep 2 && $0 start
;;
status)
# Check the status of the process.
if [ -e $PIDFILE ]; then
status_of_proc -p $PIDFILE $DAEMON "$NAME process" && exit 0 || exit $?
else
log_daemon_msg "$NAME Process is not running"
log_end_msg 0
fi
;;
*)
# For invalid arguments, print the usage message.
echo "Usage: $0 {start|stop|restart|reload|status}"
exit 2
;;
esac

3
sql-schema/253.sql Normal file
View File

@ -0,0 +1,3 @@
CREATE TABLE `poller_cluster` (`id` int(11) NOT NULL AUTO_INCREMENT, `node_id` varchar(255) NOT NULL, `poller_name` varchar(255) NOT NULL, `poller_version` varchar(255) NOT NULL DEFAULT '', `poller_groups` varchar(255) NOT NULL DEFAULT '', `last_report` datetime NOT NULL, `master` tinyint(1) NOT NULL, PRIMARY KEY (`node_id`), UNIQUE KEY `id` (`id`));
CREATE TABLE `poller_cluster_stats` (`id` int(11) NOT NULL AUTO_INCREMENT, `parent_poller` varchar(255) NOT NULL DEFAULT '', `poller_type` varchar(255) NOT NULL DEFAULT '', `depth` int(11) unsigned NOT NULL, `devices` int(11) unsigned NOT NULL, `worker_seconds` double unsigned NOT NULL, `workers` int(11) unsigned NOT NULL, `frequency` int(11) unsigned NOT NULL, PRIMARY KEY (`parent_poller`,`poller_type`), UNIQUE KEY `id` (`id`));

0
tests/__init__.py Normal file
View File

132
tests/tests.py Normal file
View File

@ -0,0 +1,132 @@
import threading
import unittest
from os import path
import sys
from time import sleep
try:
import redis
except ImportError:
print("Redis tests won't be run")
pass
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
import LibreNMS
class TestLocks(unittest.TestCase):
def setUp(self):
pass
@staticmethod
def lock_thread(manager, lock_name, expiration, unlock_sleep=0):
manager.lock(lock_name, 'lock_thread', expiration)
if unlock_sleep:
sleep(unlock_sleep)
manager.unlock(lock_name, 'lock_thread')
def test_threading_lock(self):
lm = LibreNMS.ThreadingLock()
thread = threading.Thread(target=self.lock_thread, args=(lm, 'first.lock', 2, 1))
thread.daemon = True
thread.start()
sleep(0.05)
self.assertFalse(lm.lock('first.lock', 'main_thread', 0), "Acquired lock when it is held by thread")
self.assertFalse(lm.unlock('first.lock', 'main_thread'), "Unlocked lock main doesn't own")
sleep(1.1)
self.assertTrue(lm.lock('first.lock', 'main_thread', 1),
"Could not acquire lock previously held by thread")
self.assertFalse(lm.lock('first.lock', 'main_thread', 1, False), "Was able to re-lock a lock main owns")
self.assertTrue(lm.lock('first.lock', 'main_thread', 1, True), "Could not re-lock a lock main owns")
self.assertTrue(lm.check_lock('first.lock'))
self.assertTrue(lm.unlock('first.lock', 'main_thread'), "Could not unlock lock main holds")
self.assertFalse(lm.unlock('first.lock', 'main_thread'), "Unlocked an unlocked lock?")
self.assertFalse(lm.check_lock('first.lock'))
def test_redis_lock(self):
if 'redis' not in sys.modules:
self.assertTrue(True, 'Skipped Redis tests')
else:
rc = redis.Redis()
rc.delete('lock:redis.lock') # make sure no previous data exists
lm = LibreNMS.RedisLock(namespace='lock')
thread = threading.Thread(target=self.lock_thread, args=(lm, 'redis.lock', 2, 1))
thread.daemon = True
thread.start()
sleep(0.05)
self.assertFalse(lm.lock('redis.lock', 'main_thread', 1), "Acquired lock when it is held by thread")
self.assertFalse(lm.unlock('redis.lock', 'main_thread'), "Unlocked lock main doesn't own")
sleep(1.1)
self.assertTrue(lm.lock('redis.lock', 'main_thread', 1),
"Could not acquire lock previously held by thread")
self.assertFalse(lm.lock('redis.lock', 'main_thread', 1), "Relocked an existing lock")
self.assertTrue(lm.lock('redis.lock', 'main_thread', 1, True), "Could not re-lock a lock main owns")
self.assertTrue(lm.unlock('redis.lock', 'main_thread'), "Could not unlock lock main holds")
self.assertFalse(lm.unlock('redis.lock', 'main_thread'), "Unlocked an unlocked lock?")
def queue_thread(self, manager, expect, wait=True):
self.assertEqual(expect, manager.get(wait), 'Got unexpected data in thread')
def test_redis_queue(self):
if 'redis' not in sys.modules:
self.assertTrue(True, 'Skipped Redis tests')
else:
rc = redis.Redis()
rc.delete('queue:testing') # make sure no previous data exists
qm = LibreNMS.RedisQueue('testing', namespace='queue')
thread = threading.Thread(target=self.queue_thread, args=(qm, None, False))
thread.daemon = True
thread.start()
thread = threading.Thread(target=self.queue_thread, args=(qm, '2'))
thread.daemon = True
thread.start()
qm.put(2)
qm.put(3)
qm.put(4)
sleep(0.05)
self.assertEqual(2, qm.qsize())
self.assertEqual('3', qm.get())
self.assertEqual('4', qm.get(), "Did not get second item in queue")
self.assertEqual(None, qm.get_nowait(), "Did not get None when queue should be empty")
self.assertTrue(qm.empty(), "Queue should be empty")
class TestTimer(unittest.TestCase):
def setUp(self):
self.counter = 0
def count(self):
self.counter += 1
def test_recurring_timer(self):
self.assertEqual(0, self.counter)
timer = LibreNMS.RecurringTimer(0.5, self.count)
timer.start()
self.assertEqual(0, self.counter)
sleep(0.5)
self.assertEqual(1, self.counter)
self.assertEqual(1, self.counter)
sleep(0.5)
self.assertEqual(2, self.counter)
timer.stop()
self.assertTrue(timer._event.is_set())
sleep(0.5)
self.assertEqual(2, self.counter)
timer.start()
sleep(0.5)
self.assertEqual(3, self.counter)
timer.stop()
if __name__ == '__main__':
unittest.main()