Module hocort.pipelines.bowtie2
Expand source code
import time
import os
import logging
from hocort.pipelines.utils import debug_log_args
from hocort.aligners.bowtie2 import Bowtie2 as bt2
from hocort.parse.sam import SAM
from hocort.parse.parser import ArgParser
import hocort.execute as exe
logger = logging.getLogger(__file__)
class Bowtie2():
"""
Bowtie2 pipeline which maps reads to a genome and includes/excludes matching reads from the output FastQ file/-s.
"""
def run(self, idx, seq1, out1, seq2=None, out2=None, mfilter=True, preset='end-to-end', threads=1, options=''):
"""
Run function which starts the pipeline.
Parameters
----------
idx : string
Path where the index is located.
seq1 : string
Path where the first input FastQ file is located.
out1 : string
Path where the first output FastQ file will be written.
seq2 : string
Path where the second input FastQ file is located.
out2 : string
Path where the second output FastQ file will be written.
mfilter : bool
Whether to output mapped/unmapped sequences.
True: output unmapped sequences
False: output mapped sequences
preset : string
Bowtie2 execution mode. Can either be 'local' or 'end-to-end'.
threads : int
Number of threads to use.
options : string
An options string where arguments may be defined.
Overrides "preset" argument.
Returns
-------
returncode : int
Resulting returncode after the process is finished.
Raises
------
ValueError
If input FastQ_2 file is given without output FastQ_2.
If disallowed characters are found in input.
"""
debug_log_args(logger,
self.run.__name__,
locals())
if seq2 and not out2:
raise ValueError(f'Input FastQ_2 was given, but no output FastQ_2.')
final_options = []
if preset == 'local':
final_options = ['--local']
elif preset == 'end-to-end':
final_options = ['--end-to-end']
else:
logger.warning(f'Invalid preset: {preset}')
if len(options) > 0:
final_options = [options]
logger.info(f'Running pipeline: {self.__class__.__name__}')
start_time = time.time()
bowtie2_cmd = bt2().align(idx,
seq1,
seq2=seq2,
threads=threads,
options=final_options)
if bowtie2_cmd == None: return 1
fastq_cmd = SAM.sam_to_fastq(out1=out1,
out2=out2,
threads=threads,
mfilter=mfilter)
returncodes = exe.execute(bowtie2_cmd + fastq_cmd,
pipe=True)
logger.debug(returncodes)
for returncode in returncodes:
if returncode != 0: return 1
end_time = time.time()
logger.info(f'Pipeline {self.__class__.__name__} run time: {end_time - start_time} seconds')
return 0
def interface(self, args):
"""
Main function for the user interface. Parses arguments and starts the pipeline.
Parameters
----------
args : list
This list is parsed by ArgumentParser.
Returns
-------
None
"""
parser = ArgParser(
description=f'{self.__class__.__name__} pipeline',
usage=f'hocort map {self.__class__.__name__} [-h] [--threads <int>] [--filter <bool>] [--preset <str>] [-c=<str>] -x <idx> -i <fastq_1> [<fastq_2>] -o <fastq_1> [<fastq_2>]'
)
parser.add_argument(
'-x',
'--index',
required=True,
type=str,
metavar=('<idx>'),
help='str: path to Bowtie2 index (required)'
)
parser.add_argument(
'-i',
'--input',
required=True,
type=str,
nargs=('+'),
metavar=('<fastq_1>', '<fastq_2>'),
help='str: path to sequence files, max 2 (required)'
)
parser.add_argument(
'-o',
'--output',
required=True,
type=str,
nargs=('+'),
metavar=('<fastq_1>', '<fastq_2>'),
help='str: path to output files, max 2 (required)'
)
parser.add_argument(
'-t',
'--threads',
required=False,
type=int,
metavar=('<int>'),
default=os.cpu_count(),
help='int: number of threads (default: max available on machine)'
)
parser.add_argument(
'-p',
'--preset',
required=False,
choices=['local', 'end-to-end'],
default='end-to-end',
help='str: operation mode (default: end-to-end)'
)
parser.add_argument(
'-f',
'--filter',
required=False,
choices=['true', 'false'],
default='true',
help='str: set to false to output mapped sequences, true to output unmapped sequences (default: true)'
)
parser.add_argument(
'-c',
'--config',
required=False,
type=str,
metavar=('<str>'),
help='str: used to pass along arguments to the aligner, use with caution, usage: -c="list arguments here"'
)
parsed = parser.parse_args(args=args)
idx = parsed.index
seq = parsed.input
out = parsed.output
threads = parsed.threads if parsed.threads else 1
preset = parsed.preset
mfilter = True if parsed.filter == 'true' else False
config = parsed.config if parsed.config else ''
seq1 = seq[0]
seq2 = None if len(seq) < 2 else seq[1]
out1 = out[0]
out2 = None if len(out) < 2 else out[1]
return self.run(idx,
seq1,
out1,
out2=out2,
seq2=seq2,
mfilter=mfilter,
threads=threads,
preset=preset,
options=config)
Classes
class Bowtie2
-
Bowtie2 pipeline which maps reads to a genome and includes/excludes matching reads from the output FastQ file/-s.
Expand source code
class Bowtie2(): """ Bowtie2 pipeline which maps reads to a genome and includes/excludes matching reads from the output FastQ file/-s. """ def run(self, idx, seq1, out1, seq2=None, out2=None, mfilter=True, preset='end-to-end', threads=1, options=''): """ Run function which starts the pipeline. Parameters ---------- idx : string Path where the index is located. seq1 : string Path where the first input FastQ file is located. out1 : string Path where the first output FastQ file will be written. seq2 : string Path where the second input FastQ file is located. out2 : string Path where the second output FastQ file will be written. mfilter : bool Whether to output mapped/unmapped sequences. True: output unmapped sequences False: output mapped sequences preset : string Bowtie2 execution mode. Can either be 'local' or 'end-to-end'. threads : int Number of threads to use. options : string An options string where arguments may be defined. Overrides "preset" argument. Returns ------- returncode : int Resulting returncode after the process is finished. Raises ------ ValueError If input FastQ_2 file is given without output FastQ_2. If disallowed characters are found in input. """ debug_log_args(logger, self.run.__name__, locals()) if seq2 and not out2: raise ValueError(f'Input FastQ_2 was given, but no output FastQ_2.') final_options = [] if preset == 'local': final_options = ['--local'] elif preset == 'end-to-end': final_options = ['--end-to-end'] else: logger.warning(f'Invalid preset: {preset}') if len(options) > 0: final_options = [options] logger.info(f'Running pipeline: {self.__class__.__name__}') start_time = time.time() bowtie2_cmd = bt2().align(idx, seq1, seq2=seq2, threads=threads, options=final_options) if bowtie2_cmd == None: return 1 fastq_cmd = SAM.sam_to_fastq(out1=out1, out2=out2, threads=threads, mfilter=mfilter) returncodes = exe.execute(bowtie2_cmd + fastq_cmd, pipe=True) logger.debug(returncodes) for returncode in returncodes: if returncode != 0: return 1 end_time = time.time() logger.info(f'Pipeline {self.__class__.__name__} run time: {end_time - start_time} seconds') return 0 def interface(self, args): """ Main function for the user interface. Parses arguments and starts the pipeline. Parameters ---------- args : list This list is parsed by ArgumentParser. Returns ------- None """ parser = ArgParser( description=f'{self.__class__.__name__} pipeline', usage=f'hocort map {self.__class__.__name__} [-h] [--threads <int>] [--filter <bool>] [--preset <str>] [-c=<str>] -x <idx> -i <fastq_1> [<fastq_2>] -o <fastq_1> [<fastq_2>]' ) parser.add_argument( '-x', '--index', required=True, type=str, metavar=('<idx>'), help='str: path to Bowtie2 index (required)' ) parser.add_argument( '-i', '--input', required=True, type=str, nargs=('+'), metavar=('<fastq_1>', '<fastq_2>'), help='str: path to sequence files, max 2 (required)' ) parser.add_argument( '-o', '--output', required=True, type=str, nargs=('+'), metavar=('<fastq_1>', '<fastq_2>'), help='str: path to output files, max 2 (required)' ) parser.add_argument( '-t', '--threads', required=False, type=int, metavar=('<int>'), default=os.cpu_count(), help='int: number of threads (default: max available on machine)' ) parser.add_argument( '-p', '--preset', required=False, choices=['local', 'end-to-end'], default='end-to-end', help='str: operation mode (default: end-to-end)' ) parser.add_argument( '-f', '--filter', required=False, choices=['true', 'false'], default='true', help='str: set to false to output mapped sequences, true to output unmapped sequences (default: true)' ) parser.add_argument( '-c', '--config', required=False, type=str, metavar=('<str>'), help='str: used to pass along arguments to the aligner, use with caution, usage: -c="list arguments here"' ) parsed = parser.parse_args(args=args) idx = parsed.index seq = parsed.input out = parsed.output threads = parsed.threads if parsed.threads else 1 preset = parsed.preset mfilter = True if parsed.filter == 'true' else False config = parsed.config if parsed.config else '' seq1 = seq[0] seq2 = None if len(seq) < 2 else seq[1] out1 = out[0] out2 = None if len(out) < 2 else out[1] return self.run(idx, seq1, out1, out2=out2, seq2=seq2, mfilter=mfilter, threads=threads, preset=preset, options=config)
Methods
def interface(self, args)
-
Main function for the user interface. Parses arguments and starts the pipeline.
Parameters
args
:list
- This list is parsed by ArgumentParser.
Returns
None
Expand source code
def interface(self, args): """ Main function for the user interface. Parses arguments and starts the pipeline. Parameters ---------- args : list This list is parsed by ArgumentParser. Returns ------- None """ parser = ArgParser( description=f'{self.__class__.__name__} pipeline', usage=f'hocort map {self.__class__.__name__} [-h] [--threads <int>] [--filter <bool>] [--preset <str>] [-c=<str>] -x <idx> -i <fastq_1> [<fastq_2>] -o <fastq_1> [<fastq_2>]' ) parser.add_argument( '-x', '--index', required=True, type=str, metavar=('<idx>'), help='str: path to Bowtie2 index (required)' ) parser.add_argument( '-i', '--input', required=True, type=str, nargs=('+'), metavar=('<fastq_1>', '<fastq_2>'), help='str: path to sequence files, max 2 (required)' ) parser.add_argument( '-o', '--output', required=True, type=str, nargs=('+'), metavar=('<fastq_1>', '<fastq_2>'), help='str: path to output files, max 2 (required)' ) parser.add_argument( '-t', '--threads', required=False, type=int, metavar=('<int>'), default=os.cpu_count(), help='int: number of threads (default: max available on machine)' ) parser.add_argument( '-p', '--preset', required=False, choices=['local', 'end-to-end'], default='end-to-end', help='str: operation mode (default: end-to-end)' ) parser.add_argument( '-f', '--filter', required=False, choices=['true', 'false'], default='true', help='str: set to false to output mapped sequences, true to output unmapped sequences (default: true)' ) parser.add_argument( '-c', '--config', required=False, type=str, metavar=('<str>'), help='str: used to pass along arguments to the aligner, use with caution, usage: -c="list arguments here"' ) parsed = parser.parse_args(args=args) idx = parsed.index seq = parsed.input out = parsed.output threads = parsed.threads if parsed.threads else 1 preset = parsed.preset mfilter = True if parsed.filter == 'true' else False config = parsed.config if parsed.config else '' seq1 = seq[0] seq2 = None if len(seq) < 2 else seq[1] out1 = out[0] out2 = None if len(out) < 2 else out[1] return self.run(idx, seq1, out1, out2=out2, seq2=seq2, mfilter=mfilter, threads=threads, preset=preset, options=config)
def run(self, idx, seq1, out1, seq2=None, out2=None, mfilter=True, preset='end-to-end', threads=1, options='')
-
Run function which starts the pipeline.
Parameters
idx
:string
- Path where the index is located.
seq1
:string
- Path where the first input FastQ file is located.
out1
:string
- Path where the first output FastQ file will be written.
seq2
:string
- Path where the second input FastQ file is located.
out2
:string
- Path where the second output FastQ file will be written.
mfilter
:bool
- Whether to output mapped/unmapped sequences. True: output unmapped sequences False: output mapped sequences
preset
:string
- Bowtie2 execution mode. Can either be 'local' or 'end-to-end'.
threads
:int
- Number of threads to use.
options
:string
- An options string where arguments may be defined. Overrides "preset" argument.
Returns
returncode
:int
- Resulting returncode after the process is finished.
Raises
ValueError
- If input FastQ_2 file is given without output FastQ_2. If disallowed characters are found in input.
Expand source code
def run(self, idx, seq1, out1, seq2=None, out2=None, mfilter=True, preset='end-to-end', threads=1, options=''): """ Run function which starts the pipeline. Parameters ---------- idx : string Path where the index is located. seq1 : string Path where the first input FastQ file is located. out1 : string Path where the first output FastQ file will be written. seq2 : string Path where the second input FastQ file is located. out2 : string Path where the second output FastQ file will be written. mfilter : bool Whether to output mapped/unmapped sequences. True: output unmapped sequences False: output mapped sequences preset : string Bowtie2 execution mode. Can either be 'local' or 'end-to-end'. threads : int Number of threads to use. options : string An options string where arguments may be defined. Overrides "preset" argument. Returns ------- returncode : int Resulting returncode after the process is finished. Raises ------ ValueError If input FastQ_2 file is given without output FastQ_2. If disallowed characters are found in input. """ debug_log_args(logger, self.run.__name__, locals()) if seq2 and not out2: raise ValueError(f'Input FastQ_2 was given, but no output FastQ_2.') final_options = [] if preset == 'local': final_options = ['--local'] elif preset == 'end-to-end': final_options = ['--end-to-end'] else: logger.warning(f'Invalid preset: {preset}') if len(options) > 0: final_options = [options] logger.info(f'Running pipeline: {self.__class__.__name__}') start_time = time.time() bowtie2_cmd = bt2().align(idx, seq1, seq2=seq2, threads=threads, options=final_options) if bowtie2_cmd == None: return 1 fastq_cmd = SAM.sam_to_fastq(out1=out1, out2=out2, threads=threads, mfilter=mfilter) returncodes = exe.execute(bowtie2_cmd + fastq_cmd, pipe=True) logger.debug(returncodes) for returncode in returncodes: if returncode != 0: return 1 end_time = time.time() logger.info(f'Pipeline {self.__class__.__name__} run time: {end_time - start_time} seconds') return 0