LibreNMS/LibreNMS/service.py

import LibreNMS

import json
import logging
import os
import pymysql
import subprocess
import threading
import sys
import time

try:
    import psutil
except ImportError:
    pass

from datetime import timedelta
from datetime import datetime
from logging import debug, info, warning, error, critical, exception
from platform import python_version
from time import sleep
from socket import gethostname
from signal import signal, SIGTERM, SIGQUIT, SIGINT, SIGHUP, SIGCHLD, SIG_DFL
from uuid import uuid1


class ServiceConfig:
    def __init__(self):
        """
        Stores all of the configuration variables for the LibreNMS service in a common object
        Starts with defaults, but can be populated with variables from config.php by calling populate()
        """
        self._uuid = str(uuid1())
        self.set_name(gethostname())

    def set_name(self, name):
        if name:
            self.name = name.strip()
            self.unique_name = "{}-{}".format(self.name, self._uuid)

    class PollerConfig:
        def __init__(self, workers, frequency, calculate=None):
            self.enabled = True
            self.workers = workers
            self.frequency = frequency
            self.calculate = calculate

    # config variables with defaults
    BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))

    node_id = None
    name = None
    unique_name = None
    single_instance = True
    distributed = False
    group = 0

    debug = False
    log_level = 20
    max_db_failures = 5

    alerting = PollerConfig(1, 60)
    poller = PollerConfig(24, 300)
    services = PollerConfig(8, 300)
    discovery = PollerConfig(16, 21600)
    billing = PollerConfig(2, 300, 60)
    ping = PollerConfig(1, 60)
    down_retry = 60
    update_enabled = True
    update_frequency = 86400

    master_resolution = 1
    master_timeout = 10

    redis_host = 'localhost'
    redis_port = 6379
    redis_db = 0
    redis_pass = None
    redis_socket = None
    redis_sentinel = None
    redis_sentinel_service = None
    redis_timeout = 60

    db_host = 'localhost'
    db_port = 0
    db_socket = None
    db_user = 'librenms'
    db_pass = ''
    db_name = 'librenms'

    watchdog_enabled = False
    watchdog_logfile = 'logs/librenms.log'

    def populate(self):
        config = self._get_config_data()

        # populate config variables
        self.node_id = os.getenv('NODE_ID')
        self.set_name(config.get('distributed_poller_name', None))
        self.distributed = config.get('distributed_poller', ServiceConfig.distributed)
        self.group = ServiceConfig.parse_group(config.get('distributed_poller_group', ServiceConfig.group))

        # backward compatible options
        self.poller.workers = config.get('poller_service_workers', ServiceConfig.poller.workers)
        self.poller.frequency = config.get('poller_service_poll_frequency', ServiceConfig.poller.frequency)
        self.discovery.frequency = config.get('poller_service_discover_frequency', ServiceConfig.discovery.frequency)
        self.down_retry = config.get('poller_service_down_retry', ServiceConfig.down_retry)
        self.log_level = config.get('poller_service_loglevel', ServiceConfig.log_level)

        # new options
        self.poller.enabled = config.get('service_poller_enabled', True)  # unused
        self.poller.workers = config.get('service_poller_workers', ServiceConfig.poller.workers)
        self.poller.frequency = config.get('service_poller_frequency', ServiceConfig.poller.frequency)
        self.discovery.enabled = config.get('service_discovery_enabled', True)   # unused
        self.discovery.workers = config.get('service_discovery_workers', ServiceConfig.discovery.workers)
        self.discovery.frequency = config.get('service_discovery_frequency', ServiceConfig.discovery.frequency)
        self.services.enabled = config.get('service_services_enabled', True)
        self.services.workers = config.get('service_services_workers', ServiceConfig.services.workers)
        self.services.frequency = config.get('service_services_frequency', ServiceConfig.services.frequency)
        self.billing.enabled = config.get('service_billing_enabled', True)
        self.billing.frequency = config.get('service_billing_frequency', ServiceConfig.billing.frequency)
        self.billing.calculate = config.get('service_billing_calculate_frequency', ServiceConfig.billing.calculate)
        self.alerting.enabled = config.get('service_alerting_enabled', True)
        self.alerting.frequency = config.get('service_alerting_frequency', ServiceConfig.alerting.frequency)
        self.ping.enabled = config.get('service_ping_enabled', False)
        self.ping.frequency = config.get('ping_rrd_step', ServiceConfig.ping.frequency)
        self.down_retry = config.get('service_poller_down_retry', ServiceConfig.down_retry)
        self.log_level = config.get('service_loglevel', ServiceConfig.log_level)
        self.update_enabled = config.get('service_update_enabled', ServiceConfig.update_enabled)
        self.update_frequency = config.get('service_update_frequency', ServiceConfig.update_frequency)

        self.redis_host = os.getenv('REDIS_HOST', config.get('redis_host', ServiceConfig.redis_host))
        self.redis_db = os.getenv('REDIS_DB', config.get('redis_db', ServiceConfig.redis_db))
        self.redis_pass = os.getenv('REDIS_PASSWORD', config.get('redis_pass', ServiceConfig.redis_pass))
        self.redis_port = int(os.getenv('REDIS_PORT', config.get('redis_port', ServiceConfig.redis_port)))
        self.redis_socket = os.getenv('REDIS_SOCKET', config.get('redis_socket', ServiceConfig.redis_socket))
        self.redis_sentinel = os.getenv('REDIS_SENTINEL', config.get('redis_sentinel', ServiceConfig.redis_sentinel))
        self.redis_sentinel_service = os.getenv('REDIS_SENTINEL_SERVICE',
                                                config.get('redis_sentinel_service',
                                                           ServiceConfig.redis_sentinel_service))
        self.redis_timeout = os.getenv('REDIS_TIMEOUT', self.alerting.frequency if self.alerting.frequency != 0 else self.redis_timeout)

        self.db_host = os.getenv('DB_HOST', config.get('db_host', ServiceConfig.db_host))
        self.db_name = os.getenv('DB_DATABASE', config.get('db_name', ServiceConfig.db_name))
        self.db_pass = os.getenv('DB_PASSWORD', config.get('db_pass', ServiceConfig.db_pass))
        self.db_port = int(os.getenv('DB_PORT', config.get('db_port', ServiceConfig.db_port)))
        self.db_socket = os.getenv('DB_SOCKET', config.get('db_socket', ServiceConfig.db_socket))
        self.db_user = os.getenv('DB_USERNAME', config.get('db_user', ServiceConfig.db_user))

        self.watchdog_enabled = config.get('service_watchdog_enabled', ServiceConfig.watchdog_enabled)
        self.watchdog_logfile = config.get('log_file', ServiceConfig.watchdog_logfile)

        # set convenient debug variable
        self.debug = logging.getLogger().isEnabledFor(logging.DEBUG)

        if not self.debug and self.log_level:
            try:
                logging.getLogger().setLevel(self.log_level)
            except ValueError:
                error("Unknown log level {}, must be one of 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'".format(self.log_level))
                logging.getLogger().setLevel(logging.INFO)

    def load_poller_config(self, db):
        try:
            settings = {}
            cursor = db.query('SELECT * FROM `poller_cluster` WHERE `node_id`=%s', self.node_id)
            if cursor.rowcount == 0:
                return

            for index, setting in enumerate(cursor.fetchone()):
                name = cursor.description[index][0]
                settings[name] = setting

            if settings['poller_name'] is not None:
                self.set_name(settings['poller_name'])
            if settings['poller_groups'] is not None:
                self.group = ServiceConfig.parse_group(settings['poller_groups'])
            if settings['poller_enabled'] is not None:
                self.poller.enabled = settings['poller_enabled']
            if settings['poller_frequency'] is not None:
                self.poller.frequency = settings['poller_frequency']
            if settings['poller_workers'] is not None:
                self.poller.workers = settings['poller_workers']
            if settings['poller_down_retry'] is not None:
                self.down_retry = settings['poller_down_retry']
            if settings['discovery_enabled'] is not None:
                self.discovery.enabled = settings['discovery_enabled']
            if settings['discovery_frequency'] is not None:
                self.discovery.frequency = settings['discovery_frequency']
            if settings['discovery_workers'] is not None:
                self.discovery.workers = settings['discovery_workers']
            if settings['services_enabled'] is not None:
                self.services.enabled = settings['services_enabled']
            if settings['services_frequency'] is not None:
                self.services.frequency = settings['services_frequency']
            if settings['services_workers'] is not None:
                self.services.workers = settings['services_workers']
            if settings['billing_enabled'] is not None:
                self.billing.enabled = settings['billing_enabled']
            if settings['billing_frequency'] is not None:
                self.billing.frequency = settings['billing_frequency']
            if settings['billing_calculate_frequency'] is not None:
                self.billing.calculate = settings['billing_calculate_frequency']
            if settings['alerting_enabled'] is not None:
                self.alerting.enabled = settings['alerting_enabled']
            if settings['alerting_frequency'] is not None:
                self.alerting.frequency = settings['alerting_frequency']
            if settings['ping_enabled'] is not None:
                self.ping.enabled = settings['ping_enabled']
            if settings['ping_frequency'] is not None:
                self.ping.frequency = settings['ping_frequency']
            if settings['update_enabled'] is not None:
                self.update_enabled = settings['update_enabled']
            if settings['update_frequency'] is not None:
                self.update_frequency = settings['update_frequency']
            if settings['loglevel'] is not None:
                self.log_level = settings['loglevel']
            if settings['watchdog_enabled'] is not None:
                self.watchdog_enabled = settings['watchdog_enabled']
            if settings['watchdog_log'] is not None:
                self.watchdog_logfile = settings['watchdog_log']
        except pymysql.err.Error:
            warning('Unable to load poller (%s) config', self.node_id)

    def _get_config_data(self):
        try:
            import dotenv
            env_path =  "{}/.env".format(self.BASE_DIR)
            info("Attempting to load .env from '%s'", env_path)
            dotenv.load_dotenv(dotenv_path=env_path, verbose=True)

            if not os.getenv('NODE_ID'):
                raise ImportError(".env does not contain a valid NODE_ID setting.")

        except ImportError as e:
            exception("Could not import .env - check that the poller user can read the file, and that composer install has been run recently")
            sys.exit(3)

        config_cmd = ['/usr/bin/env', 'php', '{}/config_to_json.php'.format(self.BASE_DIR), '2>&1']
        try:
            return json.loads(subprocess.check_output(config_cmd).decode())
        except subprocess.CalledProcessError as e:
            error("ERROR: Could not load or parse configuration! {}: {}"
                  .format(subprocess.list2cmdline(e.cmd), e.output.decode()))

    @staticmethod
    def parse_group(g):
        if g is None:
            return [0]
        elif type(g) is int:
            return [g]
        elif type(g) is str:
            try:
                return [int(x) for x in set(g.split(','))]
            except ValueError:
                pass

        error("Could not parse group string, defaulting to 0")
        return [0]


class Service:
    config = ServiceConfig()
    _fp = False
    _started = False
    start_time = 0
    queue_managers = {}
    poller_manager = None
    discovery_manager = None
    last_poll = {}
    reap_flag = False
    terminate_flag = False
    reload_flag = False
    db_failures = 0

    def __init__(self):
        self.start_time = time.time()
        self.config.populate()
        self._db = LibreNMS.DB(self.config)
        self.config.load_poller_config(self._db)

        threading.current_thread().name = self.config.name  # rename main thread
        self.attach_signals()

        self._lm = self.create_lock_manager()
        self.daily_timer = LibreNMS.RecurringTimer(self.config.update_frequency, self.run_maintenance, 'maintenance')
        self.stats_timer = LibreNMS.RecurringTimer(self.config.poller.frequency, self.log_performance_stats, 'performance')
        if self.config.watchdog_enabled:
            info("Starting watchdog timer for log file: {}".format(self.config.watchdog_logfile))
            self.watchdog_timer = LibreNMS.RecurringTimer(self.config.poller.frequency, self.logfile_watchdog, 'watchdog')
        else:
            info("Watchdog is disabled.")
        self.is_master = False

    def service_age(self):
        return time.time() - self.start_time

    def attach_signals(self):
        info("Attaching signal handlers on thread %s", threading.current_thread().name)
        signal(SIGTERM, self.terminate)  # capture sigterm and exit gracefully
        signal(SIGQUIT, self.terminate)  # capture sigquit and exit gracefully
        signal(SIGINT, self.terminate)  # capture sigint and exit gracefully
        signal(SIGHUP, self.reload)  # capture sighup and restart gracefully

        if 'psutil' not in sys.modules:
            warning("psutil is not available, polling gap possible")
        else:
            signal(SIGCHLD, self.reap)  # capture sigchld and reap the process

    def reap_psutil(self):
        """
        A process from a previous invocation is trying to report its status
        """
        # Speed things up by only looking at direct zombie children
        for p in psutil.Process().children(recursive=False):
            try:
                cmd = p.cmdline() # cmdline is uncached, so needs to go here to avoid NoSuchProcess
                status = p.status()

                if status == psutil.STATUS_ZOMBIE:
                    pid = p.pid
                    r = os.waitpid(p.pid, os.WNOHANG)
                    warning('Reaped long running job "%s" in state %s with PID %d - job returned %d', cmd, status,  r[0], r[1])
            except (OSError, psutil.NoSuchProcess):
                # process was already reaped
                continue

    def start(self):
        debug("Performing startup checks...")

        if self.config.single_instance:
            self.check_single_instance()  # don't allow more than one service at a time

        if self._started:
            raise RuntimeWarning("Not allowed to start Poller twice")
        self._started = True

        debug("Starting up queue managers...")

        # initialize and start the worker pools
        self.poller_manager = LibreNMS.PollerQueueManager(self.config, self._lm)
        self.queue_managers['poller'] = self.poller_manager
        self.discovery_manager = LibreNMS.DiscoveryQueueManager(self.config, self._lm)
        self.queue_managers['discovery'] = self.discovery_manager
        if self.config.alerting.enabled:
            self.queue_managers['alerting'] = LibreNMS.AlertQueueManager(self.config, self._lm)
        if self.config.services.enabled:
            self.queue_managers['services'] = LibreNMS.ServicesQueueManager(self.config, self._lm)
        if self.config.billing.enabled:
            self.queue_managers['billing'] = LibreNMS.BillingQueueManager(self.config, self._lm)
        if self.config.ping.enabled:
            self.queue_managers['ping'] = LibreNMS.PingQueueManager(self.config, self._lm)
        if self.config.update_enabled:
            self.daily_timer.start()
        self.stats_timer.start()
        if self.config.watchdog_enabled:
            self.watchdog_timer.start()

        info("LibreNMS Service: {} started!".format(self.config.unique_name))
        info("Poller group {}. Using Python {} and {} locks and queues"
             .format('0 (default)' if self.config.group == [0] else self.config.group, python_version(),
                     'redis' if isinstance(self._lm, LibreNMS.RedisLock) else 'internal'))
        if self.config.update_enabled:
            info("Maintenance tasks will be run every {}".format(timedelta(seconds=self.config.update_frequency)))
        else:
            warning("Maintenance tasks are disabled.")

        # Main dispatcher loop
        try:
            while not self.terminate_flag:
                if self.reload_flag:
                    info("Picked up reload flag, calling the reload process")
                    self.restart()

                if self.reap_flag:
                    self.reap_psutil()

                    # Re-arm the signal handler
                    signal(SIGCHLD, self.reap)
                    self.reap_flag = False

                master_lock = self._acquire_master()
                if master_lock:
                    if not self.is_master:
                        info("{} is now the master dispatcher".format(self.config.name))
                        self.is_master = True
                        self.start_dispatch_timers()

                    devices = self.fetch_immediate_device_list()
                    for device in devices:
                        device_id = device[0]
                        group = device[1]

                        if device[2]:  # polling
                            self.dispatch_immediate_polling(device_id, group)

                        if device[3]:  # discovery
                            self.dispatch_immediate_discovery(device_id, group)
                else:
                    if self.is_master:
                        info("{} is no longer the master dispatcher".format(self.config.name))
                        self.stop_dispatch_timers()
                        self.is_master = False  # no longer master
                sleep(self.config.master_resolution)
        except KeyboardInterrupt:
            pass

        info("Dispatch loop terminated")
        self.shutdown()

    def _acquire_master(self):
        return self._lm.lock('dispatch.master', self.config.unique_name, self.config.master_timeout, True)

    def _release_master(self):
        self._lm.unlock('dispatch.master', self.config.unique_name)

    # ------------ Discovery ------------
    def dispatch_immediate_discovery(self, device_id, group):
        if not self.discovery_manager.is_locked(device_id):
            self.discovery_manager.post_work(device_id, group)

    # ------------ Polling ------------
    def dispatch_immediate_polling(self, device_id, group):
        if not self.poller_manager.is_locked(device_id):
            self.poller_manager.post_work(device_id, group)

            if self.config.debug:
                cur_time = time.time()
                elapsed = cur_time - self.last_poll.get(device_id, cur_time)
                self.last_poll[device_id] = cur_time
                # arbitrary limit to reduce spam
                if elapsed > (self.config.poller.frequency - self.config.master_resolution):
                    debug("Dispatching polling for device {}, time since last poll {:.2f}s"
                          .format(device_id, elapsed))

    def fetch_immediate_device_list(self):
        try:
            poller_find_time = self.config.poller.frequency - 1
            discovery_find_time = self.config.discovery.frequency - 1

            result = self._db.query('''SELECT `device_id`,
                  `poller_group`,
                  COALESCE(`last_polled` <= DATE_ADD(DATE_ADD(NOW(), INTERVAL -%s SECOND), INTERVAL `last_polled_timetaken` SECOND), 1) AS `poll`,
                  IF(snmp_disable=1 OR status=0, 0, IF (%s < `last_discovered_timetaken` * 1.25, 0, COALESCE(`last_discovered` <= DATE_ADD(DATE_ADD(NOW(), INTERVAL -%s SECOND), INTERVAL `last_discovered_timetaken` SECOND), 1))) AS `discover`
                FROM `devices`
                WHERE `disabled` = 0 AND (
                    `last_polled` IS NULL OR
                    `last_discovered` IS NULL OR
                    `last_polled` <= DATE_ADD(DATE_ADD(NOW(), INTERVAL -%s SECOND), INTERVAL `last_polled_timetaken` SECOND) OR
                    `last_discovered` <= DATE_ADD(DATE_ADD(NOW(), INTERVAL -%s SECOND), INTERVAL `last_discovered_timetaken` SECOND)
                )
                ORDER BY `last_polled_timetaken` DESC''', (poller_find_time, self.service_age(), discovery_find_time, poller_find_time, discovery_find_time))
            self.db_failures = 0
            return result
        except pymysql.err.Error:
            self.db_failures += 1
            if self.db_failures > self.config.max_db_failures:
                warning("Too many DB failures ({}), attempting to release master".format(self.db_failures))
                self._release_master()
                sleep(self.config.master_resolution)  # sleep to give another node a chance to acquire
            return []

    def run_maintenance(self):
        """
        Runs update and cleanup tasks by calling daily.sh.  Reloads the python script after the update.
        Sets a schema-update lock so no distributed pollers will update until the schema has been updated.
        """
        attempt = 0
        wait = 5
        max_runtime = 86100
        max_tries = int(max_runtime / wait)
        info("Waiting for schema lock")
        while not self._lm.lock('schema-update', self.config.unique_name, max_runtime):
            attempt += 1
            if attempt >= max_tries:  # don't get stuck indefinitely
                warning('Reached max wait for other pollers to update, updating now')
                break
            sleep(wait)

        info("Running maintenance tasks")
        try:
            output = LibreNMS.call_script('daily.sh')
            info("Maintenance tasks complete\n{}".format(output))
        except subprocess.CalledProcessError as e:
            error("Error in daily.sh:\n" + (e.output.decode() if e.output is not None else 'No output'))

        self._lm.unlock('schema-update', self.config.unique_name)

        self.restart()

    def create_lock_manager(self):
        """
        Create a new LockManager.  Tries to create a Redis LockManager, but falls
        back to python's internal threading lock implementation.
        Exits if distributing poller is enabled and a Redis LockManager cannot be created.
        :return: Instance of LockManager
        """
        try:
            return LibreNMS.RedisLock(namespace='librenms.lock',
                                      host=self.config.redis_host,
                                      port=self.config.redis_port,
                                      db=self.config.redis_db,
                                      password=self.config.redis_pass,
                                      unix_socket_path=self.config.redis_socket,
                                      sentinel=self.config.redis_sentinel,
                                      sentinel_service=self.config.redis_sentinel_service,
                                      socket_timeout=self.config.redis_timeout)
        except ImportError:
            if self.config.distributed:
                critical("ERROR: Redis connection required for distributed polling")
                critical("Please install redis-py, either through your os software repository or from PyPI")
                self.exit(2)
        except Exception as e:
            if self.config.distributed:
                critical("ERROR: Redis connection required for distributed polling")
                critical("Could not connect to Redis. {}".format(e))
                self.exit(2)

        return LibreNMS.ThreadingLock()

    def restart(self):
        """
        Stop then recreate this entire process by re-calling the original script.
        Has the effect of reloading the python files from disk.
        """
        if sys.version_info < (3, 4, 0):
            warning("Skipping restart as running under an incompatible interpreter")
            warning("Please restart manually")
            return

        info('Restarting service... ')

        if 'psutil' not in sys.modules:
            warning("psutil is not available, polling gap possible")
            self._stop_managers_and_wait()
        else:
            self._stop_managers()
        self._release_master()

        python = sys.executable
        sys.stdout.flush()
        os.execl(python, python, *sys.argv)

    def reap(self, signalnum=None, flag=None):
        """
        Handle a set the reload flag to begin a clean restart
        :param signalnum: UNIX signal number
        :param flag: Flags accompanying signal
        """
        if (signal(SIGCHLD, SIG_DFL) == SIG_DFL):
            # signal is already being handled, bail out as this handler is not reentrant - the kernel will re-raise the signal later
            return

        self.reap_flag = True

    def reload(self, signalnum=None, flag=None):
        """
        Handle a set the reload flag to begin a clean restart
        :param signalnum: UNIX signal number
        :param flag: Flags accompanying signal
        """
        info("Received signal on thread %s, handling", threading.current_thread().name)
        self.reload_flag = True

    def terminate(self, signalnum=None, flag=None):
        """
        Handle a set the terminate flag to begin a clean shutdown
        :param signalnum: UNIX signal number
        :param flag: Flags accompanying signal
        """
        info("Received signal on thread %s, handling", threading.current_thread().name)
        self.terminate_flag = True

    def shutdown(self, signalnum=None, flag=None):
        """
        Stop and exit, waiting for all child processes to exit.
        :param signalnum: UNIX signal number
        :param flag: Flags accompanying signal
        """
        info('Shutting down, waiting for running jobs to complete...')

        self.stop_dispatch_timers()
        self._release_master()

        self.daily_timer.stop()
        self.stats_timer.stop()
        if self.config.watchdog_enabled:
            self.watchdog_timer.stop()

        self._stop_managers_and_wait()

        # try to release master lock
        info('Shutdown of %s/%s complete', os.getpid(), threading.current_thread().name)
        self.exit(0)

    def start_dispatch_timers(self):
        """
        Start all dispatch timers and begin pushing events into queues.
        This should only be started when we are the master dispatcher.
        """
        for manager in self.queue_managers.values():
            try:
                manager.start_dispatch()
            except AttributeError:
                pass

    def stop_dispatch_timers(self):
        """
        Stop all dispatch timers, this should be called when we are no longer the master dispatcher.
        """
        for manager in self.queue_managers.values():
            try:
                manager.stop_dispatch()
            except AttributeError:
                pass

    def _stop_managers(self):
        for manager in self.queue_managers.values():
            manager.stop()

    def _stop_managers_and_wait(self):
        """
        Stop all QueueManagers, and wait for their processing threads to complete.
        We send the stop signal to all QueueManagers first, then wait for them to finish.
        """
        self._stop_managers()

        for manager in self.queue_managers.values():
            manager.stop_and_wait()

    def check_single_instance(self):
        """
        Check that there is only one instance of the service running on this computer.
        We do this be creating a file in the base directory (.lock.service) if it doesn't exist and
        obtaining an exclusive lock on that file.
        """
        lock_file = "{}/{}".format(self.config.BASE_DIR, '.lock.service')

        import fcntl
        self._fp = open(lock_file, 'w')  # keep a reference so the file handle isn't garbage collected
        self._fp.flush()
        try:
            fcntl.lockf(self._fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
        except IOError:
            warning("Another instance is already running, quitting.")
            self.exit(2)

    def log_performance_stats(self):
        info("Counting up time spent polling")

        try:
            # Report on the poller instance as a whole
            self._db.query('INSERT INTO poller_cluster(node_id, poller_name, poller_version, poller_groups, last_report, master) '
                           'values("{0}", "{1}", "{2}", "{3}", NOW(), {4}) '
                           'ON DUPLICATE KEY UPDATE poller_version="{2}", poller_groups="{3}", last_report=NOW(), master={4}; '
                           .format(self.config.node_id, self.config.name, "librenms-service", ','.join(str(g) for g in self.config.group), 1 if self.is_master else 0))

            # Find our ID
            self._db.query('SELECT id INTO @parent_poller_id FROM poller_cluster WHERE node_id="{0}"; '.format(self.config.node_id))

            for worker_type, manager in self.queue_managers.items():
                worker_seconds, devices = manager.performance.reset()

                # Record the queue state
                self._db.query('INSERT INTO poller_cluster_stats(parent_poller, poller_type, depth, devices, worker_seconds, workers, frequency) '
                               'values(@parent_poller_id, "{0}", {1}, {2}, {3}, {4}, {5}) '
                               'ON DUPLICATE KEY UPDATE depth={1}, devices={2}, worker_seconds={3}, workers={4}, frequency={5}; '
                               .format(worker_type,
                                       sum([manager.get_queue(group).qsize() for group in self.config.group]),
                                       devices,
                                       worker_seconds,
                                       getattr(self.config, worker_type).workers,
                                       getattr(self.config, worker_type).frequency)
                               )
        except pymysql.err.Error:
            exception("Unable to log performance statistics - is the database still online?")

    def logfile_watchdog(self):

        try:
            # check that lofgile has been written to within last poll period
            logfile_mdiff = datetime.now().timestamp() - os.path.getmtime(self.config.watchdog_logfile)
        except FileNotFoundError as e:
            error("Log file not found! {}".format(e))
            return

        if logfile_mdiff > self.config.poller.frequency:
            critical("BARK! Log file older than {}s, restarting service!".format(self.config.poller.frequency))
            self.restart()
        else:
            info("Log file updated {}s ago".format(int(logfile_mdiff)))

    def exit(self, code=0):
        sys.stdout.flush()
        sys.exit(code)