Source code for samsifter.tools.filter_taxon_list

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Filter SAM files for a list of taxon IDs.

.. moduleauthor:: Florian Aldehoff <samsifter@biohazardous.de>
"""

import sys
if not sys.version_info[0] >= 3:
    print("Error, I need python 3.x or newer")
    exit(1)

import argparse
import fileinput
import csv
from os.path import basename, splitext

# custom libraries
from samsifter.models.filter import FilterItem
from samsifter.models.parameter import FilterSwitch, FilterFilepath
from samsifter.util.arg_sanitation import check_sam, check_csv
from samsifter.util.filters import pattern_filter

# global variables
TEXT = "Filter taxa by list of taxon IDs"
DESC = ("filtering references by a list of NCBI taxon IDs given in a "
        "tab-separated CSV file")


[docs]def item(): """Create item representing this tool in list and tree views. Returns ------- FilterItem Item for use in item-based list and tree views. """ filter_item = FilterItem(text=TEXT, desc=DESC) filter_item.set_command(splitext(basename(__file__))[0]) filter_item.add_parameter(FilterFilepath( text="taxon list file", desc="tab-separated CSV file with NCBI taxon IDs in first column", cli_name="--list", default="taxa.csv", extensions=['csv'], required=True )) filter_item.add_parameter(FilterSwitch( text="filter direction", desc="Keep or discard entries passing the filter criteria?", cli_name="--discard", default=0, options=["discard", "keep"] )) return filter_item
[docs]def main(): """Executable to filter SAM files for a list of taxon IDs. See ``--help`` for details on expected arguments. Takes input from either STDIN, or optional, or positional arguments. Logs messages to STDERR and writes processed SAM files to STDOUT. """ # parse arguments parser = argparse.ArgumentParser(description=DESC) parser.add_argument('-i', '--input', type=check_sam, help="specify SAM file to be analysed (default: " "STDIN)", required=False) parser.add_argument('-l', '--list', type=check_csv, help="tab-separated CSV file with accession numbers " "in first column", required=True) parser.add_argument('--discard', type=int, help="keep or discard entries passing the filter " "criteria?", required=False, default=0) (args, remain_args) = parser.parse_known_args() # generate pattern list from CSV file patterns = [] with open(args.list, newline='') as csvfile: reader = csv.reader(csvfile, delimiter='\t') for row in reader: patterns.append("tax|" + row[0] + "|") # open SAM file from either command line argument or STDIN if args.input: handle = open(args.input, 'r') else: handle = fileinput.input(remain_args) pattern_filter(patterns, handle, discard=(args.discard == 0)) handle.close() exit()
if __name__ == "__main__": main()