Source code for samsifter.util.serialize

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""(De-)Serialization of SamSifter workflows.

.. moduleauthor:: Florian Aldehoff (f.aldehoff@student.uni-tuebingen.de)
"""
import sys
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom


from samsifter.models.filter import FilterItem
from samsifter.models.parameter import (
    FilterParameter, FilterThreshold, FilterFilepath, FilterSwitch
)


[docs]class WorkflowSerializer(): """(De-)Serializing workflow data for permanent storage. Uses ElementTree and minidom libraries to write to and read from XML files. """ @classmethod
[docs] def serialize(cls, workflow, filename): """Serializes workflow to XML file. Parameters ---------- workflow : Workflow SamSifter workflow object to be serialized. filename : str Writable path to new XML file. Returns ------- bool True on success. """ return cls.tree_to_file(cls.workflow_to_xml(workflow), filename)
@classmethod
[docs] def deserialize(cls, workflow, filename): """De-serializes workflow from XML file. Parameters ---------- workflow : Workflow SamSifter workflow object to be deserialized. filename : str Readable path to existing XML file. Returns ------- bool True on success. """ return cls.workflow_from_xml(cls.tree_from_file(filename), workflow)
@classmethod
[docs] def tree_from_file(cls, filename): """Initializes element tree from XML file. Parameters ---------- filename : str Readable path to existing XML file. Returns ------- ElementTree Tree representation of XML file contents. """ return ET.ElementTree(file=filename)
@classmethod
[docs] def tree_to_file(cls, tree, filename): """Write element tree to XML file. Parameters ---------- tree : ElementTree Tree representation of workflow. filename : str Writable path to new XML file. Returns ------- bool True on success. """ return tree.write(filename, xml_declaration=True, encoding='UTF-8', method='xml')
@classmethod
[docs] def workflow_to_xml(cls, workflow): """Generate XML tree from workflow container. Parameters ---------- workflow : Workflow SamSifter workflow container object to be serialized. Returns ------- ElementTree Tree representation of workflow None If workflow is unitialized. """ if workflow is None: return None root = ET.Element("workflow") in_file = ET.SubElement(root, 'filepath') in_file.set('type', 'input') in_file.text = workflow.get_in_filename() out_file = ET.SubElement(root, 'filepath') out_file.set('type', 'output') out_file.text = workflow.get_out_filename() filters = ET.SubElement(root, 'pipeline') for position, fltr in enumerate(workflow.getModel().iterate_items()): elem = ET.SubElement(filters, 'tool') elem.set('index', str(position)) elem.set('type', 'filter') elem.set('name', str(fltr.text())) elem.set('desc', str(fltr.desc)) elem.set('command', str(fltr.command)) elem.set('icon_path', str(fltr.icon_path)) # store input/output restrictions elem.set('input_format', str(fltr.get_input_format())) elem.set('input_sorting', str(fltr.get_input_sorting())) elem.set('input_compression', str(fltr.get_input_compression())) elem.set('output_format', str(fltr.get_output_format())) elem.set('output_sorting', str(fltr.get_output_sorting())) elem.set('output_compression', str(fltr.get_output_compression())) for param in fltr.parameters: sub = ET.SubElement(elem, 'parameter') if isinstance(param, FilterThreshold): sub.set('type', 'threshold') sub.set('minimum', str(param.get_minimum())) sub.set('maximum', str(param.get_maximum())) sub.set('precision', str(param.get_precision())) sub.set('unit', str(param.get_unit())) elif isinstance(param, FilterFilepath): sub.set('type', 'filepath') sub.set('readable', str(param.readable)) sub.set('writable', str(param.writable)) for idx, ext in enumerate(param.get_extensions()): suffix = ET.SubElement(sub, 'suffix') suffix.set('extension', ext) suffix.set('index', str(idx)) elif isinstance(param, FilterSwitch): sub.set('type', 'switch') for idx, opt in enumerate(param.get_options()): option = ET.SubElement(sub, 'option') option.set('value', str(opt)) option.set('index', str(idx)) if isinstance(opt, bool): option.set('type', 'bool') elif isinstance(opt, int): option.set('type', 'int') elif isinstance(opt, float): option.set('type', 'float') elif isinstance(opt, str): option.set('type', 'str') elif isinstance(param, FilterParameter): sub.set('type', 'parameter') sub.set('name', param.text) sub.set('desc', param.get_description()) sub.set('cli_name', str(param.get_cli_name())) sub.set('required', str(param.is_required())) sub.set('active', str(param.is_active())) # multi-type attributes (can be None, str, int, float, bool) value = ET.SubElement(sub, 'value') typed_value = param.get_value() type_string = cls.determine_type(typed_value) value.set('type', type_string) value.text = str(typed_value) default = ET.SubElement(sub, 'default') typed_default = param.get_default() type_string = cls.determine_type(typed_default) default.set('type', type_string) default.text = str(typed_default) tree = ET.ElementTree(root) return tree
@classmethod
[docs] def workflow_from_xml(cls, tree, workflow): """Generate workflow from XML element tree. Parameters ---------- ElementTree Tree representation of workflow. workflow : Workflow SamSifter workflow container object to be de-serialized into. Returns ------- workflow : Workflow SamSifter workflow container object. None If tree is unitialized. """ if tree is None: return None root = tree.getroot() workflow.clear() # retrieve input and output filepaths for filepath in root.iter(tag='filepath'): if filepath.attrib['type'] == 'input': workflow.set_in_filename(filepath.text) if filepath.attrib['type'] == 'output': workflow.set_out_filename(filepath.text) # retrieve filter pipeline for pipeline in root.iter(tag='pipeline'): for tool in pipeline.iter(tag='tool'): text = cls.singletons(tool.get('name')) desc = cls.singletons(tool.get('desc')) command = cls.singletons(tool.get('command')) icon_path = cls.singletons(tool.get('icon_path')) # retrieve input and output restrictions input_format = cls.singletons(tool.get('input_format')) input_sorting = cls.singletons(tool.get('input_sorting')) input_compression = cls.singletons( tool.get('input_compression')) output_format = cls.singletons(tool.get('output_format')) output_sorting = cls.singletons(tool.get('output_sorting')) output_compression = cls.singletons( tool.get('output_compression')) item = FilterItem(text, desc, icon_path) item.set_command(command) item.set_input_format(input_format) item.set_input_sorting(input_sorting) item.set_input_compression(input_compression) item.set_output_format(output_format) item.set_output_sorting(output_sorting) item.set_output_compression(output_compression) for param in tool.iter(tag='parameter'): param_name = param.get('name') param_desc = param.get('desc') cli_name = param.get('cli_name') required = cls.singletons(param.get('required')) active = cls.singletons(param.get('active')) # retrieve value and desc, cast according to type for val in param.iter(tag='value'): val_type = val.get('type') if val_type == 'None': value = cls.singletons(val.text) elif val_type == 'bool': value = cls.singletons(val.text) elif val_type == 'int': value = int(cls.singletons(val.text)) elif val_type == 'float': value = float(cls.singletons(val.text)) elif val_type == 'str': value = str(cls.singletons(val.text)) for dflt in param.iter(tag='default'): dflt_type = dflt.get('type') if dflt_type == 'None': default = cls.singletons(dflt.text) elif dflt_type == 'bool': default = cls.singletons(dflt.text) elif dflt_type == 'int': default = int(cls.singletons(dflt.text)) elif dflt_type == 'float': default = float(cls.singletons(dflt.text)) elif dflt_type == 'str': default = str(cls.singletons(dflt.text)) # check for parameter type, initialise subclasses param_type = param.get('type') parameter = None if param_type == 'threshold': minimum = float(cls.singletons(param.get('minimum'))) maximum = float(cls.singletons(param.get('maximum'))) precision = int(cls.singletons(param.get('precision'))) unit = cls.singletons(param.get('unit')) parameter = FilterThreshold( param_name, param_desc, cli_name, default, minimum, maximum, precision, unit, value, required, active ) elif param_type == 'filepath': readable = cls.singletons(param.get('readable')) writable = cls.singletons(param.get('writable')) extensions = [] for sffx in param.iter('suffix'): extensions.append( cls.singletons(sffx.get('extension'))) if len(extensions) <= 0: extensions = None parameter = FilterFilepath( param_name, param_desc, cli_name, default, extensions, readable, writable, value, required, active ) elif param_type == 'switch': options = [] for choice in param.iter(tag='option'): opt_type = choice.get('type') if opt_type == 'None': option = cls.singletons(choice.get('value')) elif opt_type == 'bool': option = cls.singletons(choice.get('value')) elif opt_type == 'int': option = int(cls.singletons( choice.get('value'))) elif opt_type == 'float': option = float(cls.singletons( choice.get('value'))) elif opt_type == 'str': option = cls.singletons(choice.get('value')) options.append(option) if len(options) <= 0: options = None parameter = FilterSwitch( param_name, param_desc, cli_name, default, options, value, required, active ) elif param_type == 'parameter': # no special parameters for this base class parameter = FilterParameter( param_name, param_desc, cli_name, default, value, required, active ) else: print("failed to determine parameter type", file=sys.stderr) continue item.add_parameter(parameter) workflow.get_model().insertItem(item) return workflow
@classmethod
[docs] def singletons(cls, value): """Replaces 'None', 'False' and 'True' strings with actual singletons. Helper method for deserialization from strings to actual Python singleton entities. Parameters ---------- value : str Value string from XML file. Returns ------- None If string represents None. bool If string represents True or False. str If string does not represent a singleton. """ if value == 'None': return None elif value == 'False': return False elif value == 'True': return True else: return value
@classmethod
[docs] def determine_type(cls, value): """Determines type of a typed value and return it as string. Helper method for serialization of typed Python values to string representations in XML. Parameters ---------- value : bool, str, int, float Typed value. Returns ------- str String representation of value type. """ if value is None: return 'None' elif isinstance(value, str): return 'str' elif isinstance(value, bool): return 'bool' elif isinstance(value, int): return 'int' elif isinstance(value, float): return 'float' else: return 'str'
@classmethod
[docs] def prettify(cls, elem, indent=" "): """Create a pretty-printed XML string for an individual tree element. Parameters ---------- elem : Element Tree element. indent : str, optional Indentation character(s) to distinguish tree levels, defaults to doublespace. Returns ------- str Pretty XML string representing the element. """ rough_string = ET.tostring(elem, encoding='UTF-8', method='xml') reparsed = minidom.parseString(rough_string) return reparsed.toprettyxml(indent)
@classmethod
[docs] def tree_to_str(cls, tree, pretty=False): """Converts XML element tree to optionally indented text string. Parameters ---------- tree : ElementTree Tree to be printed. pretty : bool, optional Enable indentation and multiline string representation for better legibility of the XML, default to False. Returns ------- str XML string representing the tree. """ root = tree.getroot() if pretty: return WorkflowSerializer.prettify(root) else: return ET.tostring(root, encoding='UTF-8', method='xml')