Source code for samsifter.tools.filter_taxon_list
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Filter SAM files for a list of taxon IDs.
.. moduleauthor:: Florian Aldehoff <samsifter@biohazardous.de>
"""
import sys
if not sys.version_info[0] >= 3:
print("Error, I need python 3.x or newer")
exit(1)
import argparse
import fileinput
import csv
from os.path import basename, splitext
# custom libraries
from samsifter.models.filter import FilterItem
from samsifter.models.parameter import FilterSwitch, FilterFilepath
from samsifter.util.arg_sanitation import check_sam, check_csv
from samsifter.util.filters import pattern_filter
# global variables
TEXT = "Filter taxa by list of taxon IDs"
DESC = ("filtering references by a list of NCBI taxon IDs given in a "
"tab-separated CSV file")
[docs]def item():
"""Create item representing this tool in list and tree views.
Returns
-------
FilterItem
Item for use in item-based list and tree views.
"""
filter_item = FilterItem(text=TEXT, desc=DESC)
filter_item.set_command(splitext(basename(__file__))[0])
filter_item.add_parameter(FilterFilepath(
text="taxon list file",
desc="tab-separated CSV file with NCBI taxon IDs in first column",
cli_name="--list",
default="taxa.csv",
extensions=['csv'],
required=True
))
filter_item.add_parameter(FilterSwitch(
text="filter direction",
desc="Keep or discard entries passing the filter criteria?",
cli_name="--discard",
default=0,
options=["discard", "keep"]
))
return filter_item
[docs]def main():
"""Executable to filter SAM files for a list of taxon IDs.
See ``--help`` for details on expected arguments. Takes input from
either STDIN, or optional, or positional arguments. Logs messages to
STDERR and writes processed SAM files to STDOUT.
"""
# parse arguments
parser = argparse.ArgumentParser(description=DESC)
parser.add_argument('-i', '--input',
type=check_sam,
help="specify SAM file to be analysed (default: "
"STDIN)",
required=False)
parser.add_argument('-l', '--list',
type=check_csv,
help="tab-separated CSV file with accession numbers "
"in first column",
required=True)
parser.add_argument('--discard',
type=int,
help="keep or discard entries passing the filter "
"criteria?",
required=False,
default=0)
(args, remain_args) = parser.parse_known_args()
# generate pattern list from CSV file
patterns = []
with open(args.list, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter='\t')
for row in reader:
patterns.append("tax|" + row[0] + "|")
# open SAM file from either command line argument or STDIN
if args.input:
handle = open(args.input, 'r')
else:
handle = fileinput.input(remain_args)
pattern_filter(patterns, handle, discard=(args.discard == 0))
handle.close()
exit()
if __name__ == "__main__":
main()