Module hocort.parse.sam

Expand source code
class SAM:
    """
    SAM parsing and processing class.

    """
    def select(input_path=None, output_path=None, paired=True, threads=1, mfilter=True):
        """
        Takes SAM input, selects mapped/unmapped reads, and outputs SAM.

        Parameters
        ----------
        input_path : string
            Input SAM file path.
        output_path : string
            Output SAM file path.
        paired : bool
            Process reads as paired or unpaired.
        threads : int
            Number of threads to use.
        mfilter : bool
            Whether to output mapped/unmapped sequences.
            True: output unmapped sequences
            False: output mapped sequences

        Returns
        -------
        [cmd] : list
            List of commands to be executed.

        """
        cmd = ['samtools', 'view', '--threads', f'{threads}', '-h']
        if paired:
            if mfilter:
                cmd += ['-f', '13']
            else:
                cmd += ['-F', '12', '-f', '1']
        else:
            if mfilter:
                cmd += ['-f', '4']
            else:
                cmd += ['-F', '4']
        if output_path:
            cmd += ['-o', output_path]
        if input_path:
            cmd += [input_path]
        else:
            cmd += ['-']

        return [cmd]

    def sam_to_fastq(input_path=None, out1=None, out2=None, threads=1, mfilter=False):
        """
        Takes SAM input, selects mapped/unmapped reads, and outputs FastQ.

        Parameters
        ----------
        input_path : string
            Input SAM file path.
        out1 : string
            FastQ READ1 (if paired), READ_OTHER (if unpaired) output path.
        out2 : string
            FastQ READ2 output path.
        threads : int
            Number of threads to use.
        mfilter : bool
            Whether to output mapped/unmapped sequences.
            True: output unmapped sequences
            False: output mapped sequences

        Returns
        -------
        [cmd] : list
            List of commands to be executed.

        """
        cmd = ['samtools', 'fastq', '--threads', f'{threads}', '-N']
        if out1 and out2:
            if mfilter:
                cmd += ['-f', '13']
            else:
                cmd += ['-F', '12', '-f', '1']
            cmd += ['-1', out1, '-2', out2]
        if out1 and not out2:
            if mfilter:
                cmd += ['-f', '4']
            else:
                cmd += ['-F', '4']
            cmd += ['-0', out1]
        if input_path:
            cmd += [input_path]
        else:
            cmd += ['-']

        return [cmd]

Classes

class SAM

SAM parsing and processing class.

Expand source code
class SAM:
    """
    SAM parsing and processing class.

    """
    def select(input_path=None, output_path=None, paired=True, threads=1, mfilter=True):
        """
        Takes SAM input, selects mapped/unmapped reads, and outputs SAM.

        Parameters
        ----------
        input_path : string
            Input SAM file path.
        output_path : string
            Output SAM file path.
        paired : bool
            Process reads as paired or unpaired.
        threads : int
            Number of threads to use.
        mfilter : bool
            Whether to output mapped/unmapped sequences.
            True: output unmapped sequences
            False: output mapped sequences

        Returns
        -------
        [cmd] : list
            List of commands to be executed.

        """
        cmd = ['samtools', 'view', '--threads', f'{threads}', '-h']
        if paired:
            if mfilter:
                cmd += ['-f', '13']
            else:
                cmd += ['-F', '12', '-f', '1']
        else:
            if mfilter:
                cmd += ['-f', '4']
            else:
                cmd += ['-F', '4']
        if output_path:
            cmd += ['-o', output_path]
        if input_path:
            cmd += [input_path]
        else:
            cmd += ['-']

        return [cmd]

    def sam_to_fastq(input_path=None, out1=None, out2=None, threads=1, mfilter=False):
        """
        Takes SAM input, selects mapped/unmapped reads, and outputs FastQ.

        Parameters
        ----------
        input_path : string
            Input SAM file path.
        out1 : string
            FastQ READ1 (if paired), READ_OTHER (if unpaired) output path.
        out2 : string
            FastQ READ2 output path.
        threads : int
            Number of threads to use.
        mfilter : bool
            Whether to output mapped/unmapped sequences.
            True: output unmapped sequences
            False: output mapped sequences

        Returns
        -------
        [cmd] : list
            List of commands to be executed.

        """
        cmd = ['samtools', 'fastq', '--threads', f'{threads}', '-N']
        if out1 and out2:
            if mfilter:
                cmd += ['-f', '13']
            else:
                cmd += ['-F', '12', '-f', '1']
            cmd += ['-1', out1, '-2', out2]
        if out1 and not out2:
            if mfilter:
                cmd += ['-f', '4']
            else:
                cmd += ['-F', '4']
            cmd += ['-0', out1]
        if input_path:
            cmd += [input_path]
        else:
            cmd += ['-']

        return [cmd]

Methods

def sam_to_fastq(input_path=None, out1=None, out2=None, threads=1, mfilter=False)

Takes SAM input, selects mapped/unmapped reads, and outputs FastQ.

Parameters

input_path : string
Input SAM file path.
out1 : string
FastQ READ1 (if paired), READ_OTHER (if unpaired) output path.
out2 : string
FastQ READ2 output path.
threads : int
Number of threads to use.
mfilter : bool
Whether to output mapped/unmapped sequences. True: output unmapped sequences False: output mapped sequences

Returns

[cmd] : list List of commands to be executed.

Expand source code
def sam_to_fastq(input_path=None, out1=None, out2=None, threads=1, mfilter=False):
    """
    Takes SAM input, selects mapped/unmapped reads, and outputs FastQ.

    Parameters
    ----------
    input_path : string
        Input SAM file path.
    out1 : string
        FastQ READ1 (if paired), READ_OTHER (if unpaired) output path.
    out2 : string
        FastQ READ2 output path.
    threads : int
        Number of threads to use.
    mfilter : bool
        Whether to output mapped/unmapped sequences.
        True: output unmapped sequences
        False: output mapped sequences

    Returns
    -------
    [cmd] : list
        List of commands to be executed.

    """
    cmd = ['samtools', 'fastq', '--threads', f'{threads}', '-N']
    if out1 and out2:
        if mfilter:
            cmd += ['-f', '13']
        else:
            cmd += ['-F', '12', '-f', '1']
        cmd += ['-1', out1, '-2', out2]
    if out1 and not out2:
        if mfilter:
            cmd += ['-f', '4']
        else:
            cmd += ['-F', '4']
        cmd += ['-0', out1]
    if input_path:
        cmd += [input_path]
    else:
        cmd += ['-']

    return [cmd]
def select(input_path=None, output_path=None, paired=True, threads=1, mfilter=True)

Takes SAM input, selects mapped/unmapped reads, and outputs SAM.

Parameters

input_path : string
Input SAM file path.
output_path : string
Output SAM file path.
paired : bool
Process reads as paired or unpaired.
threads : int
Number of threads to use.
mfilter : bool
Whether to output mapped/unmapped sequences. True: output unmapped sequences False: output mapped sequences

Returns

[cmd] : list List of commands to be executed.

Expand source code
def select(input_path=None, output_path=None, paired=True, threads=1, mfilter=True):
    """
    Takes SAM input, selects mapped/unmapped reads, and outputs SAM.

    Parameters
    ----------
    input_path : string
        Input SAM file path.
    output_path : string
        Output SAM file path.
    paired : bool
        Process reads as paired or unpaired.
    threads : int
        Number of threads to use.
    mfilter : bool
        Whether to output mapped/unmapped sequences.
        True: output unmapped sequences
        False: output mapped sequences

    Returns
    -------
    [cmd] : list
        List of commands to be executed.

    """
    cmd = ['samtools', 'view', '--threads', f'{threads}', '-h']
    if paired:
        if mfilter:
            cmd += ['-f', '13']
        else:
            cmd += ['-F', '12', '-f', '1']
    else:
        if mfilter:
            cmd += ['-f', '4']
        else:
            cmd += ['-F', '4']
    if output_path:
        cmd += ['-o', output_path]
    if input_path:
        cmd += [input_path]
    else:
        cmd += ['-']

    return [cmd]