Source code for samsifter.util.validation
# -*- coding: utf-8 -*-
"""Validation of workflows.
.. moduleauthor:: Florian Aldehoff <samsifter@biohazardous.de>
"""
import os
from os.path import isfile, dirname, exists
# Qt4
from PyQt4.QtCore import QObject
# custom libraries
from samsifter.models.parameter import FilterFilepath
[docs]class WorkflowValidator(QObject):
"""Validator for SamSifter workflows.
Starts off with empty error lists for input, model and output that can
be retrieved individually after running ``validate()``. The individual
steps of validation should always be run in the same order.
Lists of error messages can be retrieved individually for each of the three
validation steps
* input validation
* filter model validation
* output validation
by calling the corresponding getter methods. They will be reported as one
concatenated list when running the full validation.
"""
def __init__(self, workflow, parent=None):
"""Initializes a new instance of workflow validator.
Parameters
----------
workflow : Workflow
Workflow to be validated.
parent : QObject
Parent Qt object, defaults to None.
"""
super(WorkflowValidator, self).__init__(parent)
self.workflow = workflow
self.input_errors = []
self.model_errors = []
self.output_errors = []
# assuming default MALT output
self.stream_format = 'SAM'
self.stream_sorting = 'queryname'
self.stream_compr = 'uncompressed'
[docs] def validate(self):
"""Validate input, workflow model and output in this order.
Returns
-------
list of str
List of error messages occuring in input, model and output
validation.
"""
# checks have to occur in this order to make sense of stream parameters
self.validate_input()
self.validate_model()
self.validate_output()
# only an error-free workflow is considered valid
errors = self.input_errors + self.model_errors + self.output_errors
self.workflow.set_valid(len(errors) == 0)
# send signal about validity changes
self.workflow.validity_changed.emit("\n".join(errors))
return errors
[docs] def validate_output(self):
"""Validate output file of workflow."""
self.output_errors = []
# assuming output as expected by MEGAN and SAM2RMA
out_format = 'SAM'
out_sorting = 'queryname'
out_compr = 'any'
outfile = self.workflow.get_out_filename()
if outfile is None:
self.output_errors.append("output: no filename given")
else:
outdir = dirname(outfile)
if not (exists(outdir) and os.access(outdir, os.W_OK)):
self.output_errors.append(
"output: directory missing or not writable"
)
if outfile.endswith(('.sam', '.SAM')):
out_format = 'SAM'
out_compr = 'uncompressed'
elif outfile.endswith(('.sam.gz', '.SAM.GZ')):
out_format = 'SAM'
out_compr = 'gzip'
elif outfile.endswith(('.bam', '.BAM')):
out_format = 'BAM'
out_compr = 'uncompressed'
else:
out_format = 'as_input'
out_compr = 'as_input'
out_sorting = 'as_input'
# check compatibility with post-processing and MEGAN
if self.stream_format != out_format:
self.output_errors.append(
"output: expecting %s format but receiving %s format"
% (out_format, self.stream_format)
)
if self.stream_sorting != out_sorting:
self.output_errors.append(
"output: expecting %s sorting but receiving %s sorting"
% (out_sorting, self.stream_sorting)
)
if self.stream_compr != out_compr:
self.output_errors.append(
"output: expecting %s format but receiving %s format"
% (out_compr, self.stream_compr)
)
# only an error-free output file is considered valid
self.workflow.set_outfile_valid(len(self.output_errors) == 0)
[docs] def validate_model(self):
"""Validate workflow model."""
self.model_errors = []
# validate model
model = self.workflow.get_model()
for idx, item in enumerate(model.iterate_items(), 1):
item.set_valid(True)
exp_format = item.get_input_format()
exp_sorting = item.get_input_sorting()
exp_compr = item.get_input_compression()
if self.stream_format != exp_format and exp_format != 'any':
self.model_errors.append(
"step %i: expecting %s format but receiving %s format"
% (idx, exp_format, self.stream_format)
)
item.set_valid(False)
if self.stream_sorting != exp_sorting and exp_sorting != 'any':
self.model_errors.append(
"step %i: expecting %s sorting but receiving %s sorting"
% (idx, exp_sorting, self.stream_sorting)
)
item.set_valid(False)
if self.stream_compr != exp_compr and exp_compr != 'any':
self.model_errors.append(
"step %i: expecting %s format but receiving %s format"
% (idx, exp_compr, self.stream_compr)
)
item.set_valid(False)
# validate parameters
for param in item.get_parameters():
if (isinstance(param, FilterFilepath)
and param.is_required()
and not (isfile(param.get_value())
and os.access(param.get_value(), os.R_OK))):
self.model_errors.append(
"step %i: required parameter %s either not existing "
"or not readable" % (idx, param)
)
item.set_valid(False)
if item.get_output_format() != 'as_input':
self.stream_format = item.get_output_format()
if item.get_output_sorting() != 'as_input':
self.stream_sorting = item.get_output_sorting()
if item.get_output_compression() != 'as_input':
self.stream_compr = item.get_output_compression()
# initiate repaint of view to reflect validity changes in model
model.dataChanged.emit(model.index(1), model.index(model.rowCount()))
# Getters & Setters
[docs] def get_output_errors(self):
return "\n".join(self.output_errors)
[docs] def get_model_errors(self):
return "\n".join(self.model_errors)