Source code for watchme.watchers

'''

Copyright (C) 2019 Vanessa Sochat.

This Source Code Form is subject to the terms of the
Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed
with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

'''

from watchme.logger import ( bot, RobotNamer )
from watchme.version import __version__
from watchme.defaults import (
    WATCHME_BASE_DIR,
    WATCHME_TASK_TYPES,
    WATCHME_NOTALLOWED_PARAMS
)

from watchme.command import (
    create_watcher,
    write_timestamp,
    get_watchers,
    git_commit,
    git_add
)

from configparser import NoOptionError

from .data import (
    export_dict
)

from .settings import (
    get_setting,
    set_setting,
    get_section,
    print_section,
    print_add_task,
    remove_setting,
    remove_section
)

from .schedule import (
    remove_schedule,
    get_crontab,
    get_job,
    has_schedule,
    update_schedule,
    clear_schedule,
    schedule
)

from watchme.config import (
    read_config,
    write_config
)

from watchme.utils import (
    mkdir_p,
    write_file,
    write_json
)

import os
import re
import shutil
import json
import sys


[docs]class Watcher(object): repo=None configfile=None def __init__(self, name=None, base=None, create=False, **kwargs): '''the watcher base loads configuration files for the user (in $HOME) and module, and then stores any arguments given from the caller Parameters ========== name: the watcher name, defaults to github base: the watcher base, will default to $HOME/.watchme create: boolean to create the watcher if doesn't exist (default False) kwargs: should include command line arguments from the client. ''' # Set the watcher base self._set_base(base, create) self._version = __version__ # Load the configuration self.load_config() def _set_base(self, base=None, create=False): ''' set the base for the watcher, ensuring that it exists. Parameters ========== base: the base folder of watcher repos. Uses $HOME/.watchme default create: create the watcher if it doesn't exist (default is False) ''' if base == None: base = WATCHME_BASE_DIR # Does the watcher exist? self.base = base self.repo = os.path.join(self.base, self.name) self.configfile = os.path.join(self.repo, 'watchme.cfg') # If the watcher doesn't exist and we need to create: if not os.path.exists(self.repo) or not os.path.exists(self.configfile): if create is True: create_watcher(self.name) else: bot.exit('Watcher %s does not exist. Use watchme create.' % self.name) # Config
[docs] def save(self): '''save the configuration to file.''' write_config(self.configfile, self.config)
[docs] def edit_task(self, name, action, key, value=None): '''edit a task, meaning doing an addition (add), update (update), or "remove", All actions require a value other than remove. Parameters ========== name: the name of the task to update action: the action to take (update, add, remove) a parameter key: the key to update value: the value to update ''' if not self.has_task(name): bot.exit('%s is not a task defined by %s' %(name, self.name)) if action not in ['update', 'add', 'remove']: bot.exit('Action must be update, add, or remove') if action in ['update', 'add'] and value == None: bot.exit('A value must be provided for the %s action' % action) # Add, and it doesn't exist so it's okay if action == "add" and key not in self.config[name]: bot.info('Adding %s:%s to %s' %(key, value, name)) self.set_setting(name, key, value) # Already exists, encourage user to update elif action == "add" and key in self.config[name]: bot.exit('%s already exists. Use "update" action to change.' % key) # Update, and it's a valid choice elif action == 'update' and key in self.config[name]: bot.info('Updating %s to %s in %s' %(key, value, name)) self.set_setting(name, key, value) # Update, and it's not a valid choice elif action == 'update' and key not in self.config[name]: bot.exit('%s is not found in config, cannot be updated.' % key) # Remove, and it's a valid choice elif action == "remove" and key in self.config[name]: bot.info('Removing %s' % key ) del self.config[name][key] # Remove, and it's not a valid choice elif action == "remove" and key not in self.config[name]: bot.exit('%s is not found in config, cannot be removed.' % key) self.save()
[docs] def has_section(self, name): '''returns True or False to indicate if the watcher has a specified section. To get a task, use self.has_task. Parameters ========== name: the name of the section to check for. ''' self.load_config() if name in self.config._sections: return True bot.warning('%s not found for watcher %s' %(name, self.name)) return False
[docs] def load_config(self): '''load a configuration file, and set the active setting for the watcher if the file doesn't exist, the function will exit and prompt the user to create the watcher first. If the watcher section isn't yet defined, it will be written with a default active status set to false. ''' if not hasattr(self, 'config'): # Load the configuration file if it exists (will exit if not found) if self.configfile != None: self.config = read_config(self.configfile) # The watcher section is added by default, args for the watcher if 'watcher' not in self.config.sections(): self.config.add_section('watcher') self.set_setting('watcher', 'active', 'false') # Only update the config if we've changed it self.save()
def _get_params_dict(self, pairs): '''iterate through parameters, make keys lowercase, and ensure valid format. Parameters ========== pairs: a list of key@value pairs to set. ''' params = {} for pair in pairs: if "@" not in pair: bot.exit('incorrectly formatted param, must be key@value') key,value = pair.split('@', 1) key = key.lower() # All tasks are not allowed to have default params if key in WATCHME_NOTALLOWED_PARAMS: bot.error('%s is a default, not allowed setting by task.' % key) self.valid = False params[key] = value return params # Add Tasks
[docs] def add_task(self, task, task_type, params, force=False, active="true"): '''add a task, meaning ensuring that the type is valid, and that the parameters are valid for the task. Parameters ========== task: the Task object to add, should have a name and params and be child of watchme.tasks.TaskBase task_type: must be in WATCHME_TASK_TYPES, meaning a client exists params: list of parameters to be validated (key@value) force: if task already exists, overwrite active: add the task as active (default "true") ''' # Check again, in case user calling from client if not task.startswith('task'): bot.exit('Task name must start with "task" (e.g., task-reddit)') # Ensure it's a valid type if task_type not in WATCHME_TASK_TYPES: bot.exit('%s is not a valid type: %s' % WATCHME_TASK_TYPES) # Validate variables provided for task if task_type.startswith('url'): from .urls import Task # Validate variables provided for task elif task_type == 'psutils': from .psutils import Task else: bot.exit('task_type %s not properly added to Watcher' % task_type) # Convert list to dictionary params = self._get_params_dict(params) # Creating the task will validate parameters newtask = Task(task, params=params) # Exit if the new task is not valid if not newtask.valid: bot.exit('%s is not valid, will not be added.' % task) # Write to file (all tasks get active = True added, and type) self._add_task(newtask, force, active)
def _add_task(self, task, force=False, active='true'): '''add a new task to the watcher, meaning we: 1. Check first that the task doesn't already exist (if the task exists, we only add if force is set to true) 2. Validate the task (depends on the task) 3. write the task to the helper config file, if valid. Parameters ========== task: the Task object to add, should have a name and params and be child of watchme.tasks.TaskBase force: if task already exists, overwrite active: add the task as active (default "true") ''' self.load_config() if active not in ["true", "false"]: bot.exit('Active must be "true" or "false"') # Don't overwrite a section that already exists if task.name in self.config.sections(): if not force: bot.exit('%s exists, use --force to overwrite.' % task.name) self.remove_section(task.name, save=False) # Add the new section self.config[task.name] = task.export_params(active=active) self.print_section(task.name) self.save() # If the task folder doesn't exist, recreate it. task_folder = os.path.join(self.repo, task.name) if not os.path.exists(task_folder): mkdir_p(task_folder) git_add(self.repo, task.name) # Commit changes git_commit(repo=self.repo, task=self.name, message="ADD task %s" % task.name) # Delete
[docs] def delete(self): '''delete the entire watcher, only if not protected. Cannot be undone. ''' self.load_config() # Check for protection if self.is_frozen(): bot.exit('watcher %s is frozen, unfreeze to delete.' % self.name) elif self.is_protected(): bot.exit('watcher %s is protected, turn off protection to delete.' % self.name) repo = os.path.dirname(self.configfile) # Ensure repository exists before delete if os.path.exists(repo): bot.info('Removing watcher %s' % self.name) shutil.rmtree(repo) else: bot.exit("%s:%s doesn't exist" %(self.name, repo))
[docs] def remove_task(self, task): '''remove a task from the watcher repo, if it exists, and the watcher is not frozen. Parameters ========== task: the name of the task to remove ''' if self.get_section(task) != None: if self.is_frozen(): bot.exit('watcher is frozen, unfreeze first.') self.remove_section(task) # If the task has a folder, remove the entire thing repo = os.path.join(self.repo, task) if os.path.exists(repo): shutil.rmtree(repo) bot.info('%s removed successfully.' % task) git_commit(self.repo, self.name, "REMOVE task %s" % task) else: bot.warning('Task %s does not exist.' % task)
# Inspect
[docs] def inspect(self, tasks=None, create_command=False): '''inspect a watcher, or one or more tasks belonging to it. This means printing the configuration for the entire watcher (if tasks is None) or just for one or more tasks. Parameters ========== tasks: one or more tasks to inspect (None will show entire file) create_command: if True, given one or more tasks, print the command to create them. ''' self.load_config() if tasks == None: tasks = self.config.sections() # If the user supplied one task: if not isinstance(tasks, list): tasks = [tasks] # Show all sections for task in tasks: # If the user doesn't want to see the create command: if create_command is False: self.print_section(task) bot.newline() else: self.print_add_task(task)
[docs] def list(self, quiet=False): '''list the watchers. If quiet is True, don't print to the screen.''' watchers = get_watchers(base=self.base, quiet=quiet) return watchers
# Protection
[docs] def protect(self, status="on"): '''protect a watcher, meaning that it cannot be deleted. This does not influence removing a task. To freeze the entire watcher, use the freeze() function. ''' self._set_status('watcher', 'protected', status) git_commit(self.repo, self.name, "PROTECT %s" % status) self.print_section('watcher')
[docs] def freeze(self): '''freeze a watcher, meaning that it along with its tasks cannot be deleted. This does not prevent the user from manual editing. ''' self._set_status('watcher', 'frozen', 'on') git_commit(self.repo, self.name, "FREEZE") self.print_section('watcher')
[docs] def unfreeze(self): '''freeze a watcher, meaning that it along with its tasks cannot be deleted. This does not prevent the user from manual editing. ''' self._set_status('watcher', 'frozen', 'off') git_commit(self.repo, self.name, "UNFREEZE") self.print_section('watcher')
def _set_status(self, section, setting, value): '''a helper function to set a status, ensuring that status value is in "on" or "off" Parameters ========== status: one of "on" or "off" name: a value to set status for. ''' if value not in ['on', 'off']: bot.exit('Status must be "on" or "off"') self.set_setting(section, setting, value) self.save()
[docs] def is_protected(self): '''return a boolean to indicate if the watcher is protected or frozen. protected indicates no delete to the watcher, but allowed delete to tasks, frozen indicates no change of anything. ''' protected = False for status in ['protected', 'frozen']: if self.get_setting('watcher', status) == "on": protected = True return protected
[docs] def is_frozen(self): '''return a boolean to indicate if the watcher is frozen. protected indicates no delete to the watcher, but allowed delete to tasks, frozen indicates no change of anything. ''' if self.get_setting('watcher', 'frozen') == "on": return True return False
# Status def _active_status(self, status='true', name=None): '''a general function to change the status, used by activate and deactivate. Parameters ========== status: must be one of true, false name: if not None, we are deactivating a task (not the watcher) ''' # Load the configuration, if not loaded self.load_config() if name == None: name = 'watcher' # Cut out early if section not in config if name not in self.config._sections: bot.exit('%s is not a valid task or section' % name) if status not in ['true', 'false']: bot.exit('status must be true or false.') # Update the status and alert the user self.set_setting(name, 'active', status) self.save() # Return the message for the commit message = "ACTIVE" if status == "false": message = "DEACTIVATE" # Add the task name if name != None: message = "%s task %s" %(message, name) bot.info('[%s|%s] active: %s' % (name, self.name, status)) return message
[docs] def activate(self, task=None): '''turn the active status of a watcher to True ''' message = self._active_status('true', task) git_commit(self.repo, self.name, message)
[docs] def deactivate(self, task=None): '''turn the active status of a watcher to false. If a task is provided, update the config value for the task to be false. ''' # If no task defined, user wants to deactiate watcher message = self._active_status('false', task) git_commit(self.repo, self.name, message)
[docs] def is_active(self, task=None): '''determine if the watcher is active by reading from the config directly if a task name is provided, check the active status of the task ''' if task == None: task = 'watcher' if self.get_setting(task, 'active', default='true') == "true": return True return False
# Get and Prepare Tasks
[docs] def has_task(self, name): '''returns True or False to indicate if the watcher has a specified task. ''' self.load_config() if self.has_section(name) and name.startswith('task'): return True return False
[docs] def get_task(self, name, save=False): '''get a particular task, based on the name. This is where each type of class should check the "type" parameter from the config, and import the correct Task class. Parameters ========== name: the name of the task to load save: if saving, will be True ''' self.load_config() task = None # Only sections that start with task- are considered tasks if name in self.config._sections and name.startswith('task'): # Task is an ordered dict, key value pairs are entries params = self.config._sections[name] # Get the task type (if removed, consider disabled) task_type = params.get('type', '') # If we get here, validate and prepare the task if task_type.startswith("url"): from .urls import Task elif task_type == 'psutils': from .psutils import Task else: bot.exit('Type %s not properly set up in get_task' % task_type) # if not valid, will return None task = Task(name, params, _save=save) return task
def _task_selected(self, task, regexp=None): '''check if a task is active and (if defined) passes user provided task names or regular expressions. Parameters ========== task: the task object to check regexp: an optional regular expression (or name) to check ''' selected = True # A task can be None if it wasn't found if task == None: selected = False # Is the task not active (undefined is active)? active = task.params.get('active', 'true') if active == "false": bot.info('Task %s is not active.' % task) selected = False # The user wants to search for a custom task name if regexp != None: if not re.search(regexp, task): bot.info('Task %s is selected to run.' % task) selected = False return selected
[docs] def get_tasks(self, regexp=None): '''get the tasks for a watcher, possibly matching a regular expression. A list of dictionaries is returned, each holding the parameters for a task. "uri" will hold the task (folder) name, active Parameters ========== regexp: if supplied, the user wants to run only tasks that match a particular pattern ''' self.load_config() tasks = [] for section in self.config._sections: # Get the task based on the section name task = self.get_task(section) # Check that the task should be run, and is valid if task != None: if self._task_selected(task, regexp) and task.valid: tasks.append(task) bot.info('Found %s contender tasks.' % len(tasks)) return tasks
# Running Tasks
[docs] def run_tasks(self, queue, parallel=True, show_progress=True): '''this run_tasks function takes a list of Task objects, each potentially a different kind of task, and extracts the parameters with task.export_params(), and the running function with task.export_func(), and hands these over to the multiprocessing worker. It's up to the Task to return some correct function from it's set of task functions that correspond with the variables. Examples ======== funcs {'task-reddit-hpc': <function watchme.watchers.urls.tasks.get_task>} tasks {'task-reddit-hpc': [('url', 'https://www.reddit.com/r/hpc'), ('active', 'true'), ('type', 'urls')]} ''' if parallel is True: return self._run_parallel(queue, show_progress) # Otherwise, run in serial results = {} # Progressbar total = len(queue) progress = 1 for task in queue: prefix = "[%s:%s/%s]" % (task.name, progress, total) if show_progress is True: bot.show_progress(progress, total, length=35, prefix=prefix) else: bot.info('Running %s' % prefix) results[task.name] = task.run() progress+=1 return results
def _run_parallel(self, queue, show_progress=True): ''' run tasks in parallel using the Workers class. Returns a dictionary (lookup) wit results, with the key being the task name Parameters ========== queue: the list of task objects to run ''' from watchme.tasks.worker import Workers # Run with multiprocessing funcs = {} tasks = {} for task in queue: # Export parameters and functions funcs[task.name] = task.export_func() tasks[task.name] = task.export_params() workers = Workers(show_progress=show_progress) return workers.run(funcs, tasks)
[docs] def run(self, regexp=None, parallel=True, test=False, show_progress=True): '''run the watcher, which should be done via the crontab, including: - checks: the instantiation of the client already ensures that the watcher folder exists, and has a configuration, and it loads. - parse: parse the tasks to be run - start: run the tasks that are defined for the watcher. - finish: after completion, commit to the repository changed files Parameters ========== regexp: if supplied, the user wants to run only tasks that match a particular pattern parallel: if True, use multiprocessing to run tasks (True) each watcher should have this setup ready to go. test: run in test mode (no saving of results) show_progress: if True, show progress bar instead of task information (defaults to True) ''' # Step 0: Each run session is given a fun name run_id = RobotNamer().generate() # Step 1: determine if the watcher is active. if self.is_active() == False and test is False: bot.exit('Watcher %s is not active.' % self.name) # Step 2: get the tasks associated with the run, a list of param dicts tasks = self.get_tasks() # Step 3: Run the tasks. This means preparing a list of funcs/params, # and then submitting with multiprocessing results = self.run_tasks(tasks, parallel, show_progress) # Finally, finish the runs. if test is False: self.finish_runs(results) else: # or print results to the screen print(json.dumps(results, indent=4))
[docs] def finish_runs(self, results): '''finish runs should take a dictionary of results, with keys as the folder name, and for each, depending on the result type, write the result to file (or update file) and then commit to git. Parameters ========== results: a dictionary of tasks, with keys as the task name, and values as the result. ''' for name, result in results.items(): task_folder = os.path.join(self.repo, name) task = self.get_task(name, save=True) # Files to be added via Git after files = [] # Ensure that the task folder exists if not os.path.exists(task_folder): mkdir_p(task_folder) git_add(self.repo, task_folder) # Case 1. The result is a list if isinstance(result, list): # Get rid of Nones, if the user accidentally added result = [r for r in result if r] if len(result) == 0: bot.error('%s returned empty list of results.' % name) # json output is specified elif task.params.get('save_as') == 'json': bot.debug('Saving single list as one json...') files.append(task._save_json(result, self.repo)) elif task.params.get('save_as') == 'json': bot.debug('Saving single list as multiple json...') files += task._save_json_list(result, self.repo) # Otherwise, sniff for list of paths elif os.path.exists(result[0]): bot.debug('Found list of paths...') files += task._save_files_list(result, self.repo) # Finally, assume just writing text to file else: bot.debug('Saving content from list to file...') files += task._save_text_list(result, self.repo) # Case 2. The result is a string elif isinstance(result, str): # if it's a path to a file, just save to repository if os.path.exists(result): files.append(task._save_file(result, self.repo)) # Otherwise, it's a string that needs to be saved to file else: files.append(task._save_text(result, self.repo)) # Case 3. The result is a dictionary elif isinstance(result, dict): files.append(task._save_json(result, self.repo)) elif result == None: bot.error('Result for task %s is None' % name) else: bot.error('Unsupported result format %s' % type(result)) # Get rid of None results (don't check excessively for None above) files = [f for f in files if f] # Add files to git, and commit files.append(write_timestamp(repo=self.repo, task=name)) git_add(repo=self.repo, files=files) git_commit(repo=self.repo, task=self.name, message="ADD results %s" % name)
# Identification def __repr__(self): return "[watcher|%s]" %self.name def __str__(self): return "[watcher|%s]" %self.name
# Settings Watcher.remove_setting = remove_setting Watcher.get_setting = get_setting Watcher.get_section = get_section Watcher.set_setting = set_setting Watcher.remove_section = remove_section Watcher.print_section = print_section Watcher.print_add_task = print_add_task # Schedule Watcher.remove_schedule = remove_schedule Watcher.get_crontab = get_crontab Watcher.update_schedule = update_schedule Watcher.has_schedule = has_schedule Watcher.get_job = get_job Watcher.clear_schedule = clear_schedule Watcher.schedule = schedule # Data Watcher.export_dict = export_dict