Source code for sm.engine.isocalc_wrapper

from cpyMSpec import IsotopePattern
import numpy as np
import logging

logger = logging.getLogger('sm-engine')

ISOTOPIC_PEAK_N = 4
SIGMA_TO_FWHM = 2.3548200450309493  # 2 \sqrt{2 \log 2}

[docs]def trim_centroids(mzs, intensities, k): int_order = np.argsort(intensities)[::-1] mzs = mzs[int_order][:k] intensities = intensities[int_order][:k] mz_order = np.argsort(mzs) return mzs[mz_order], intensities[mz_order]
[docs]class Centroids(object): def __init__(self, isotope_pattern, resolving_power, pts_per_mz=None): self._isotope_pattern = isotope_pattern self._resolving_power = resolving_power self._pts_per_mz = pts_per_mz if isotope_pattern is not None: centroids = isotope_pattern.centroids(resolving_power) order = np.argsort(centroids.masses) self.mzs = np.array(centroids.masses)[order] self.ints = 100.0 * np.array(centroids.abundances)[order] if pts_per_mz is None: fwhm = self.mzs[0] / resolving_power sigma = fwhm / SIGMA_TO_FWHM self._pts_per_mz = 5.0 / sigma else: self.mzs = self.ints = [] @property def _envelope(self): return self._isotope_pattern.envelope(self._resolving_power)
[docs] def spectrum_chart(self, n_peaks=ISOTOPIC_PEAK_N): centr_mzs, _ = trim_centroids(self.mzs, self.ints, n_peaks) min_mz = min(centr_mzs) - 0.25 max_mz = max(centr_mzs) + 0.25 prof_mzs = np.arange(min_mz, max_mz, 1.0 / self._pts_per_mz) prof_ints = self._envelope(prof_mzs) nnz_idx = prof_ints > 1e-9 prof_mzs = prof_mzs[nnz_idx] prof_ints = prof_ints[nnz_idx] return { 'mz_grid': { 'min_mz': min_mz, 'max_mz': max_mz }, 'theor': { 'centroid_mzs': centr_mzs.tolist(), 'mzs': prof_mzs.tolist(), 'ints': (prof_ints * 100.0).tolist() } }
@property def empty(self): return (not self.mzs) and (not self.ints)
[docs]def list_of_floats_to_str(l): return ','.join(map(lambda x: '{:.6f}'.format(x), l))
[docs]class IsocalcWrapper(object): """ Wrapper around pyMSpec.pyisocalc.pyisocalc used for getting theoretical isotope peaks' centroids and profiles for a sum formula. Args ---------- isocalc_config : dict Dictionary representing isotope_generation section of a dataset config file """ def __init__(self, isocalc_config): self.charge = 0 if 'polarity' in isocalc_config['charge']: polarity = isocalc_config['charge']['polarity'] self.charge = (-1 if polarity == '-' else 1) * isocalc_config['charge']['n_charges'] self.sigma = float(isocalc_config['isocalc_sigma']) self.pts_per_mz = int(isocalc_config['isocalc_pts_per_mz'])
[docs] def isotope_peaks(self, sf, adduct): """ Args ---- sf : str Sum formula adduct : str Molecule adduct. One of isotope_generation.adducts from a dataset config file Returns ------- : Centroids In case of any errors returns object with empty 'mzs' and 'ints' fields """ try: isotopes = IsotopePattern(str(sf + adduct)).charged(int(self.charge)) fwhm = self.sigma * SIGMA_TO_FWHM resolving_power = isotopes.masses[0] / fwhm return Centroids(isotopes, resolving_power, self.pts_per_mz) except Exception as e: logger.warning('(%s, %s) - %s', sf, adduct, e) return Centroids(None, None)
@staticmethod
[docs] def slice_array(mzs, lower, upper): return np.hstack(map(lambda (l, u): mzs[l:u], zip(lower, upper)))
def _format_peak_str(self, sf, adduct, centroids): # store only top 4 peaks in the database mzs, ints = trim_centroids(centroids.mzs, centroids.ints, ISOTOPIC_PEAK_N) return '%s\t%s\t%.6f\t%d\t%d\t{%s}\t{%s}' % ( sf, adduct, round(self.sigma, 6), self.charge, self.pts_per_mz, list_of_floats_to_str(mzs), list_of_floats_to_str(ints) )
[docs] def formatted_iso_peaks(self, sf, adduct): """ Args ---- sf : str Sum formula adduct : str Sum formula adduct Returns ------- : str A one line string with tab separated lists. Every list is a comma separated string. """ centroids = self.isotope_peaks(sf, adduct) if len(centroids.mzs) > 0: yield self._format_peak_str(sf, adduct, centroids)