Module read5.Slow5Reader

Expand source code
#!/usr/bin/env python
# author: Jannes Spangenberg
# e-mail: jannes.spangenberg@uni-jena.de
# github: https://github.com/JannesSP
# website: https://jannessp.github.io

from read5.AbstractFileReader import AbstractFileReader
import pyslow5
import numpy as np

class Slow5Reader(AbstractFileReader):
    '''
    File reader for slow5 or blow5 files

    Attributes
    ----------
    threads : int
        number of threads to use in C backend
    batchsize : int
        number of reads to fetch at a time. Higher numbers use more ram, but is more efficient with more threads.
    filepath : str
        Path to the ONT raw data file

    Raises
    ------
    FileNotFoundError
        If the given file does not exist.
    '''
    def __init__(self, threads : int = 1, batchsize : int = 1, *args, **kwargs):
        self._threads = threads
        self._batchsize = batchsize
        super().__init__(*args, **kwargs)
    
    def open(self) -> None:
        self._file = pyslow5.Open(self._filepath, 'r')
        if self._threads > 1:
            self._reads = self._file.seq_reads_multi(threads=self._threads, batchsize=self._batchsize)
        else:
            self._reads, self._nreads = self._file.get_read_ids()
        self._open = True
        
    def __getitem__(self, readid : str):
        return self._file.get_read(readid)

    def getSignal(self, readid : str) -> np.ndarray:
        return self.__getitem__(readid)['signal']

    def getOffset(self, readid : str) -> float:
        return self.__getitem__(readid)['offset']

    def getRange(self, readid : str) -> float:
        return self.__getitem__(readid)['range']

    def getDigitisation(self, readid : str) -> int:
        return self.__getitem__(readid)['digitisation']
    
    def getCalibrationScale(self, readid : str) -> float:
        return self.getRange(readid) / self.getDigitisation(readid)

    def getpASignal(self, readid :str) -> np.ndarray:
        return self._file.get_read(readid, pA=True)['signal']

    def getChannelNumber(self, readid : str) -> int:
        return int(self._file.get_read(readid, aux='channel_number')['channel_number'])

    def getStartTime(self, readid : str) -> int:
        return self._file.get_read(readid, aux='start_time')['start_time']
    
    def getReadNumber(self, readid : str) -> int:
        return self._file.get_read(readid, aux='read_number')['read_number']
    
    def getMedianBefore(self, readid : str) -> int:
        return self._file.get_read(readid, aux='median_before')['median_before']
    
    def getStartMux(self, readid : str) -> int:
        '''
        Returns
        -------
        start_mux : int
        '''
        return self._file.get_read(readid, aux='start_mux')['start_mux']
    
    def getEndReason(self, readid : str) -> int:
        return self._file.get_read(readid, aux='end_reason')['end_reason']

    def getDuration(self, readid : str) -> int:
        return self.__getitem__(readid)['len_raw_signal']

    def getSamplingRate(self, readid : str) -> int:
        return self.__getitem__(readid)['sampling_rate']

    ### get_all_headers

    def getAllHeaders(self) -> dict:
        '''
        Returns
        -------
        metadata : dict
            all metadata of the sequencing run stored in file.get_all_headers()
        '''
        return self._file.get_all_headers()
    
    def getAsicID(self, readid : str = None) -> str:
        return self.getAllHeaders()['asic_id']
    
    def getAsicIDEeprom(self, readid : str = None) -> str:
        return self.getAllHeaders()['asic_id_eeprom']
    
    def getAsicTemp(self, readid : str = None) -> float:
        return float(self.getAllHeaders()['asic_temp'])
    
    def getAsicVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['asic_version']
    
    def isAutoUpdated(self, readid : str = None) -> bool:
        return bool(int(self.getAllHeaders()['auto_update']))
        
    def getAutoUpdateSource(self, readid : str = None) -> str:
        return self.getAllHeaders()['auto_update_source']
    
    def isBarcodingEnabled(self, readid : str = None) -> bool:
        return bool(int(self.getAllHeaders()['barcoding_enabled']))
    
    def isBreamStandard(self, readid : str = None) -> bool:
        return bool(int(self.getAllHeaders()['bream_is_standard']))

    def getConfigurationVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['configuration_version']
    
    def getDeviceID(self, readid : str = None) -> str:
        return self.getAllHeaders()['device_id']

    def getDeviceType(self, readid : str = None) -> str:
        return self.getAllHeaders()['device_type']

    def getDistributionStatus(self, readid : str = None) -> str:
        return self.getAllHeaders()['distribution_status']

    def getDistributionVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['distribution_version']

    def getExpScriptName(self, readid : str = None) -> str:
        return self.getAllHeaders()['exp_script_name']

    def getExpScriptPurpose(self, readid : str = None) -> str:
        return self.getAllHeaders()['exp_script_purpose']

    def getExpStartTime(self, readid : str = None) -> str:
        return self.getAllHeaders()['exp_start_time']
    
    def getExperimentDurationSet(self, readid : str = None) -> int:
        return int(self.getAllHeaders()['experiment_duration_set'])
    
    def getExperimentType(self, readid : str = None) -> str:
        return self.getAllHeaders()['experiment_type']
    
    def getFileType(self) -> str:
        '''
        Returns
        -------
        file_type : str
        '''
        return self.getAllHeaders()['file_type']

    def getFileVersion(self) -> str:
        return self.getAllHeaders()['file_version']

    def getFlowCellID(self, readid : str = None) -> str:
        return self.getAllHeaders()['flow_cell_id']
    
    def getFlowCellProductCode(self, readid :  str = None) -> str:
        return self.getAllHeaders()['flow_cell_product_code']
    
    def getGuppyVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['guppy_version']

    def getHeatSinkTemp(self, readid : str = None) -> float:
        return float(self.getAllHeaders()['heatsink_temp'])
    
    def getHostProductCode(self, readid : str = None) -> str:
        return self.getAllHeaders()['host_product_code']
    
    def getHostProductSerialNumber(self, readid : str = None) -> str:
        return '' if self.getAllHeaders()['host_product_serial_number'] is None else self.getAllHeaders()['host_product_serial_number']
    
    def getHostname(self, readid : str = None) -> str:
        return self.getAllHeaders()['hostname']
    
    def getInstallationType(self, readid : str = None) -> str:
        return self.getAllHeaders()['installation_type']
    
    def isLocalBasecalled(self, readid : str = None) -> bool:
        return bool(int(self.getAllHeaders()['local_basecalling']))
    
    def getLocalFirmwareFile(self, readid : str = None) -> int:
        return int(self.getAllHeaders()['local_firmware_file'])
    
    def getOperatingSystem(self, readid : str = None) -> str:
        return self.getAllHeaders()['operating_system']
    
    def getPackage(self, readid : str = None) -> str:
        return self.getAllHeaders()['package']
    
    def getPackageVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['package_version']
    
    def getPoreType(self, readid : str = None) -> str:
        return self.getAllHeaders()['pore_type']
    
    def getProtocolGroupID(self, readid : str = None) -> str:
        return self.getAllHeaders()['protocol_group_id']

    def getProtocolRunID(self, readid : str = None) -> str:
        return self.getAllHeaders()['protocol_run_id']

    def getProtocolStartTime(self, readid : str = None) -> str:
        return self.getAllHeaders()['protocol_start_time']
    
    def getProtocolVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['protocols_version']
    
    def getRunID(self, readid : str = None) -> str:
        return self.getAllHeaders()['run_id']

    def getSampleID(self, readid : str = None) -> str:
        return self.getAllHeaders()['sample_id']

    def getSequencingKit(self, readid : str = None) -> str:
        return self.getAllHeaders()['sequencing_kit']

    def getUSBConfig(self, readid : str = None) -> str:
        return self.getAllHeaders()['usb_config']
    
    def getVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['version']

Classes

class Slow5Reader (threads: int = 1, batchsize: int = 1, *args, **kwargs)

File reader for slow5 or blow5 files

Attributes

threads : int
number of threads to use in C backend
batchsize : int
number of reads to fetch at a time. Higher numbers use more ram, but is more efficient with more threads.
filepath : str
Path to the ONT raw data file

Raises

FileNotFoundError
If the given file does not exist.
Expand source code
class Slow5Reader(AbstractFileReader):
    '''
    File reader for slow5 or blow5 files

    Attributes
    ----------
    threads : int
        number of threads to use in C backend
    batchsize : int
        number of reads to fetch at a time. Higher numbers use more ram, but is more efficient with more threads.
    filepath : str
        Path to the ONT raw data file

    Raises
    ------
    FileNotFoundError
        If the given file does not exist.
    '''
    def __init__(self, threads : int = 1, batchsize : int = 1, *args, **kwargs):
        self._threads = threads
        self._batchsize = batchsize
        super().__init__(*args, **kwargs)
    
    def open(self) -> None:
        self._file = pyslow5.Open(self._filepath, 'r')
        if self._threads > 1:
            self._reads = self._file.seq_reads_multi(threads=self._threads, batchsize=self._batchsize)
        else:
            self._reads, self._nreads = self._file.get_read_ids()
        self._open = True
        
    def __getitem__(self, readid : str):
        return self._file.get_read(readid)

    def getSignal(self, readid : str) -> np.ndarray:
        return self.__getitem__(readid)['signal']

    def getOffset(self, readid : str) -> float:
        return self.__getitem__(readid)['offset']

    def getRange(self, readid : str) -> float:
        return self.__getitem__(readid)['range']

    def getDigitisation(self, readid : str) -> int:
        return self.__getitem__(readid)['digitisation']
    
    def getCalibrationScale(self, readid : str) -> float:
        return self.getRange(readid) / self.getDigitisation(readid)

    def getpASignal(self, readid :str) -> np.ndarray:
        return self._file.get_read(readid, pA=True)['signal']

    def getChannelNumber(self, readid : str) -> int:
        return int(self._file.get_read(readid, aux='channel_number')['channel_number'])

    def getStartTime(self, readid : str) -> int:
        return self._file.get_read(readid, aux='start_time')['start_time']
    
    def getReadNumber(self, readid : str) -> int:
        return self._file.get_read(readid, aux='read_number')['read_number']
    
    def getMedianBefore(self, readid : str) -> int:
        return self._file.get_read(readid, aux='median_before')['median_before']
    
    def getStartMux(self, readid : str) -> int:
        '''
        Returns
        -------
        start_mux : int
        '''
        return self._file.get_read(readid, aux='start_mux')['start_mux']
    
    def getEndReason(self, readid : str) -> int:
        return self._file.get_read(readid, aux='end_reason')['end_reason']

    def getDuration(self, readid : str) -> int:
        return self.__getitem__(readid)['len_raw_signal']

    def getSamplingRate(self, readid : str) -> int:
        return self.__getitem__(readid)['sampling_rate']

    ### get_all_headers

    def getAllHeaders(self) -> dict:
        '''
        Returns
        -------
        metadata : dict
            all metadata of the sequencing run stored in file.get_all_headers()
        '''
        return self._file.get_all_headers()
    
    def getAsicID(self, readid : str = None) -> str:
        return self.getAllHeaders()['asic_id']
    
    def getAsicIDEeprom(self, readid : str = None) -> str:
        return self.getAllHeaders()['asic_id_eeprom']
    
    def getAsicTemp(self, readid : str = None) -> float:
        return float(self.getAllHeaders()['asic_temp'])
    
    def getAsicVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['asic_version']
    
    def isAutoUpdated(self, readid : str = None) -> bool:
        return bool(int(self.getAllHeaders()['auto_update']))
        
    def getAutoUpdateSource(self, readid : str = None) -> str:
        return self.getAllHeaders()['auto_update_source']
    
    def isBarcodingEnabled(self, readid : str = None) -> bool:
        return bool(int(self.getAllHeaders()['barcoding_enabled']))
    
    def isBreamStandard(self, readid : str = None) -> bool:
        return bool(int(self.getAllHeaders()['bream_is_standard']))

    def getConfigurationVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['configuration_version']
    
    def getDeviceID(self, readid : str = None) -> str:
        return self.getAllHeaders()['device_id']

    def getDeviceType(self, readid : str = None) -> str:
        return self.getAllHeaders()['device_type']

    def getDistributionStatus(self, readid : str = None) -> str:
        return self.getAllHeaders()['distribution_status']

    def getDistributionVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['distribution_version']

    def getExpScriptName(self, readid : str = None) -> str:
        return self.getAllHeaders()['exp_script_name']

    def getExpScriptPurpose(self, readid : str = None) -> str:
        return self.getAllHeaders()['exp_script_purpose']

    def getExpStartTime(self, readid : str = None) -> str:
        return self.getAllHeaders()['exp_start_time']
    
    def getExperimentDurationSet(self, readid : str = None) -> int:
        return int(self.getAllHeaders()['experiment_duration_set'])
    
    def getExperimentType(self, readid : str = None) -> str:
        return self.getAllHeaders()['experiment_type']
    
    def getFileType(self) -> str:
        '''
        Returns
        -------
        file_type : str
        '''
        return self.getAllHeaders()['file_type']

    def getFileVersion(self) -> str:
        return self.getAllHeaders()['file_version']

    def getFlowCellID(self, readid : str = None) -> str:
        return self.getAllHeaders()['flow_cell_id']
    
    def getFlowCellProductCode(self, readid :  str = None) -> str:
        return self.getAllHeaders()['flow_cell_product_code']
    
    def getGuppyVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['guppy_version']

    def getHeatSinkTemp(self, readid : str = None) -> float:
        return float(self.getAllHeaders()['heatsink_temp'])
    
    def getHostProductCode(self, readid : str = None) -> str:
        return self.getAllHeaders()['host_product_code']
    
    def getHostProductSerialNumber(self, readid : str = None) -> str:
        return '' if self.getAllHeaders()['host_product_serial_number'] is None else self.getAllHeaders()['host_product_serial_number']
    
    def getHostname(self, readid : str = None) -> str:
        return self.getAllHeaders()['hostname']
    
    def getInstallationType(self, readid : str = None) -> str:
        return self.getAllHeaders()['installation_type']
    
    def isLocalBasecalled(self, readid : str = None) -> bool:
        return bool(int(self.getAllHeaders()['local_basecalling']))
    
    def getLocalFirmwareFile(self, readid : str = None) -> int:
        return int(self.getAllHeaders()['local_firmware_file'])
    
    def getOperatingSystem(self, readid : str = None) -> str:
        return self.getAllHeaders()['operating_system']
    
    def getPackage(self, readid : str = None) -> str:
        return self.getAllHeaders()['package']
    
    def getPackageVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['package_version']
    
    def getPoreType(self, readid : str = None) -> str:
        return self.getAllHeaders()['pore_type']
    
    def getProtocolGroupID(self, readid : str = None) -> str:
        return self.getAllHeaders()['protocol_group_id']

    def getProtocolRunID(self, readid : str = None) -> str:
        return self.getAllHeaders()['protocol_run_id']

    def getProtocolStartTime(self, readid : str = None) -> str:
        return self.getAllHeaders()['protocol_start_time']
    
    def getProtocolVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['protocols_version']
    
    def getRunID(self, readid : str = None) -> str:
        return self.getAllHeaders()['run_id']

    def getSampleID(self, readid : str = None) -> str:
        return self.getAllHeaders()['sample_id']

    def getSequencingKit(self, readid : str = None) -> str:
        return self.getAllHeaders()['sequencing_kit']

    def getUSBConfig(self, readid : str = None) -> str:
        return self.getAllHeaders()['usb_config']
    
    def getVersion(self, readid : str = None) -> str:
        return self.getAllHeaders()['version']

Ancestors

Methods

def getAllHeaders(self) ‑> dict

Returns

metadata : dict
all metadata of the sequencing run stored in file.get_all_headers()
Expand source code
def getAllHeaders(self) -> dict:
    '''
    Returns
    -------
    metadata : dict
        all metadata of the sequencing run stored in file.get_all_headers()
    '''
    return self._file.get_all_headers()
def getFileType(self) ‑> str

Returns

file_type : str
 
Expand source code
def getFileType(self) -> str:
    '''
    Returns
    -------
    file_type : str
    '''
    return self.getAllHeaders()['file_type']
def getStartMux(self, readid: str) ‑> int

Returns

start_mux : int
 
Expand source code
def getStartMux(self, readid : str) -> int:
    '''
    Returns
    -------
    start_mux : int
    '''
    return self._file.get_read(readid, aux='start_mux')['start_mux']

Inherited members