Source code for yasa.hypno

"""
This file contains several helper functions to manipulate sleep staging
(hypnogram) data. The default hypnogram format in YASA is a one dimensional
integer array where:

* -2  = Unscored
* -1  = Artefact / Movement
* 0   = Wake
* 1   = N1 sleep
* 2   = N2 sleep
* 3   = N3 sleep
* 4   = REM sleep

For more details, please refer to the following references:

- Iber, C. (2007). The AASM manual for the scoring of sleep and
associated events: rules, terminology and technical specifications.
American Academy of Sleep Medicine.

- Silber, M. H., Ancoli-Israel, S., Bonnet, M. H., Chokroverty, S.,
Grigg-Damberger, M. M., Hirshkowitz, M., … Iber, C. (2007). The visual scoring
of sleep in adults. Journal of Clinical Sleep Medicine: JCSM: Official
Publication of the American Academy of Sleep Medicine, 3(2), 121–131.

- Combrisson, E., Vallat, R., Eichenlaub, J.-B., O’Reilly, C., Lajnef, T.,
Guillot, A., … Jerbi, K. (2017). Sleep: An Open-Source Python Software for
Visualization, Analysis, and Staging of Sleep Data. Frontiers in
Neuroinformatics, 11, 60. https://doi.org/10.3389/fninf.2017.00060
"""
import mne
import logging
import numpy as np
import pandas as pd

__all__ = ['hypno_str_to_int', 'hypno_int_to_str', 'hypno_upsample_to_sf',
           'hypno_upsample_to_data', 'load_profusion_hypno']


logger = logging.getLogger('yasa')


#############################################################################
# STR <--> INT CONVERSION
#############################################################################

[docs]def hypno_str_to_int(hypno, mapping_dict={'w': 0, 'wake': 0, 'n1': 1, 's1': 1, 'n2': 2, 's2': 2, 'n3': 3, 's3': 3, 's4': 3, 'r': 4, 'rem': 4, 'art': -1, 'mt': -1, 'uns': -2, 'nd': -2}): """Convert a string hypnogram array to integer. ['W', 'N2', 'N2', 'N3', 'R'] ==> [0, 2, 2, 3, 4] .. versionadded:: 0.1.5 Parameters ---------- hypno : array_like The sleep staging (hypnogram) 1D array. mapping_dict : dict The mapping dictionnary, in lowercase. Note that this function is essentially a wrapper around :py:meth:`pandas.Series.map`. Returns -------- hypno : array_like The corresponding integer hypnogram. """ assert isinstance(hypno, (list, np.ndarray, pd.Series)), 'Not an array.' hypno = pd.Series(np.asarray(hypno, dtype=str)) assert not hypno.str.isnumeric().any(), 'Hypno contains numeric values.' return hypno.str.lower().map(mapping_dict).values
[docs]def hypno_int_to_str(hypno, mapping_dict={0: 'W', 1: 'N1', 2: 'N2', 3: 'N3', 4: 'R', -1: 'Art', -2: 'Uns'}): """Convert an integer hypnogram array to a string array. [0, 2, 2, 3, 4] ==> ['W', 'N2', 'N2', 'N3', 'R'] .. versionadded:: 0.1.5 Parameters ---------- hypno : array_like The sleep staging (hypnogram) 1D array. mapping_dict : dict The mapping dictionnary. Note that this function is essentially a wrapper around :py:meth:`pandas.Series.map`. Returns -------- hypno : array_like The corresponding integer hypnogram. """ assert isinstance(hypno, (list, np.ndarray, pd.Series)), 'Not an array.' hypno = pd.Series(np.asarray(hypno, dtype=int)) return hypno.map(mapping_dict).values
############################################################################# # UPSAMPLING #############################################################################
[docs]def hypno_upsample_to_sf(hypno, sf_hypno, sf_data): """Upsample the hypnogram to a given sampling frequency. .. versionadded:: 0.1.5 Parameters ---------- hypno : array_like The sleep staging (hypnogram) 1D array. sf_hypno : float The current sampling frequency of the hypnogram, in Hz, e.g. * 1/30 = 1 value per each 30 seconds of EEG data, * 1 = 1 value per second of EEG data sf_data : float The desired sampling frequency of the hypnogram, in Hz (e.g. 100 Hz, 256 Hz, ...) Returns ------- hypno : array_like The hypnogram, upsampled to ``sf_data``. """ repeats = sf_data / sf_hypno assert sf_hypno <= sf_data, 'sf_hypno must be less than sf_data.' assert repeats.is_integer(), 'sf_hypno / sf_data must be a whole number.' assert isinstance(hypno, (list, np.ndarray, pd.Series)) return np.repeat(np.asarray(hypno), repeats)
def hypno_fit_to_data(hypno, data, sf=None): """Crop or pad the hypnogram to fit the length of data. Hypnogram and data MUST have the SAME sampling frequency. This is an internal function. Parameters ---------- hypno : array_like The sleep staging (hypnogram) 1D array. data : np.array_like or mne.io.Raw 1D or 2D EEG data. Can also be a MNE Raw object, in which case data and sf will be automatically extracted. sf : float, optional The sampling frequency of data AND the hypnogram. Returns ------- hypno : array_like Hypnogram, with the same number of samples as data. """ # Check if data is an MNE raw object if isinstance(data, mne.io.BaseRaw): sf = data.info['sfreq'] data = data.times # 1D array and does not require to preload data data = np.asarray(data) hypno = np.asarray(hypno) assert hypno.ndim == 1, 'Hypno must be 1D.' npts_hyp = hypno.size npts_data = max(data.shape) # Support for 2D data if npts_hyp < npts_data: # Hypnogram is shorter than data npts_diff = npts_data - npts_hyp if sf is not None: dur_diff = npts_diff / sf logger.warning('Hypnogram is SHORTER than data by %.2f seconds. ' 'Padding hypnogram with last value to match ' 'data.size.' % dur_diff) else: logger.warning('Hypnogram is SHORTER than data by %i samples. ' 'Padding hypnogram with last value to match ' 'data.size.' % npts_diff) hypno = np.pad(hypno, (0, npts_diff), mode='edge') elif npts_hyp > npts_data: # Hypnogram is longer than data npts_diff = npts_hyp - npts_data if sf is not None: dur_diff = npts_diff / sf logger.warning('Hypnogram is LONGER than data by %.2f seconds. ' 'Cropping hypnogram to match data.size.' % dur_diff) else: logger.warning('Hypnogram is LONGER than data by %i samples. ' 'Cropping hypnogram to match data.size.' % npts_diff) hypno = hypno[0:npts_data] return hypno
[docs]def hypno_upsample_to_data(hypno, sf_hypno, data, sf_data=None): """Upsample an hypnogram to a given sampling frequency and fit the resulting hypnogram to corresponding EEG data, such that the hypnogram and EEG data have the exact same number of samples. .. versionadded:: 0.1.5 Parameters ---------- hypno : array_like The sleep staging (hypnogram) 1D array. sf_hypno : float The current sampling frequency of the hypnogram, in Hz, e.g. * 1/30 = 1 value per each 30 seconds of EEG data, * 1 = 1 value per second of EEG data data : array_like or :py:class:`mne.io.BaseRaw` 1D or 2D EEG data. Can also be a :py:class:`mne.io.BaseRaw`, in which case ``data`` and ``sf_data`` will be automatically extracted. sf_data : float The sampling frequency of ``data``, in Hz (e.g. 100 Hz, 256 Hz, ...). Can be omitted if ``data`` is a :py:class:`mne.io.BaseRaw`. Returns ------- hypno : array_like The hypnogram, upsampled to ``sf_data`` and cropped/padded to ``max(data.shape)``. Warns ----- UserWarning If the upsampled ``hypno`` is shorter / longer than ``max(data.shape)`` and therefore needs to be padded/cropped respectively. """ if isinstance(data, mne.io.BaseRaw): sf_data = data.info['sfreq'] data = data.times hypno_up = hypno_upsample_to_sf(hypno=hypno, sf_hypno=sf_hypno, sf_data=sf_data) return hypno_fit_to_data(hypno=hypno_up, data=data, sf=sf_data)
############################################################################# # HYPNO LOADING #############################################################################
[docs]def load_profusion_hypno(fname, replace=True): # pragma: no cover """ Load a Compumedics Profusion hypnogram (.xml). The Compumedics Profusion hypnogram format is one of the two hypnogram formats found in the `National Sleep Research Resource (NSRR) <https://sleepdata.org/>`_ website. For more details on the format, please refer to https://github.com/nsrr/edf-editor-translator/wiki/Compumedics-Annotation-Format Parameters ---------- fname : str Filename with full path. replace : bool If True, the integer values will be mapped to YASA default, i.e. 0 for Wake, 1 for N1, 2 for N2, 3 for N3 / S4 and 4 for REM. Note that the native profusion format is identical except for REM sleep which is marked as 5. Returns ------- hypno : 1D array (n_epochs, ) Hypnogram, with one value per 30 second epochs. sf_hyp : float Sampling frequency of the hypnogram. Default is 1 / 30 Hz. """ # Note that an alternative is to use the `xmltodict` library: # >>> with open(fname) as in_file: # >>> xml = in_file.read() # >>> epoch_length = xml['EpochLength'] # >>> hypno = np.array(xml['SleepStages']['SleepStage'], dtype='int') # >>> xml = xmltodict.parse(xml, process_namespaces=True)['CMPStudyConfig'] # >>> annotations = pd.DataFrame(xml['ScoredEvents']['ScoredEvent']) # >>> annotations["Start"] = annotations["Start"].astype(float) # >>> annotations["Duration"] = annotations["Duration"].astype(float) import xml.etree.ElementTree as ET tree = ET.parse(fname) root = tree.getroot() epoch_length = float(root[0].text) sf_hyp = 1 / epoch_length hypno = [] for s in root[4]: hypno.append(s.text) hypno = np.array(hypno).astype(int) if replace: # Stage 4 --> 3 and REM --> 4 hypno = pd.Series(hypno).replace({4: 3, 5: 4}).to_numpy() return hypno, sf_hyp