libpappsomspp
Library for mass spectrometry
msrunxicextractordisk.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/xicextractor/private/msrunxicextractordisk.cpp
3  * \date 12/05/2018
4  * \author Olivier Langella
5  * \brief proteowizard based XIC extractor featuring disk cache
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 #include "msrunxicextractordisk.h"
32 #include <QDebug>
33 #include "../../pappsoexception.h"
34 #include "../../massspectrum/massspectrum.h"
35 
36 namespace pappso
37 {
38 
40  const QDir &temporary_dir)
41  : pappso::MsRunXicExtractor(msrun_reader)
42 {
43  mpa_temporaryDirectory = nullptr;
44  m_temporaryDirectory = temporary_dir.absolutePath();
45 }
46 
48  : pappso::MsRunXicExtractor(other)
49 {
50 
52  mpa_temporaryDirectory = new QTemporaryDir(
53  QString("%1/msrun_%2_")
55  .arg(msp_msrun_reader.get()->getMsRunId().get()->getXmlId()));
56 }
57 
59 {
60  if(mpa_temporaryDirectory != nullptr)
61  {
63  }
64 }
65 
66 void
68 {
69  qDebug();
70  try
71  {
73  // msp_msrun_reader = nullptr;
74  }
75  catch(pappso::PappsoException &errora)
76  {
77  qDebug();
79  QObject::tr("Error reading file (%1) : %2")
80  .arg(msp_msrun_reader.get()->getMsRunId().get()->toString())
81  .arg(errora.qwhat()));
82  }
83  catch(std::exception &error)
84  {
85  qDebug();
87  QObject::tr("Error reading file (%1) using : %2")
88  .arg(msp_msrun_reader.get()->getMsRunId().get()->toString())
89  .arg(error.what()));
90  }
91 }
92 
95  pappso::pappso_double rt_begin,
96  pappso::pappso_double rt_end)
97 {
98  std::shared_ptr<Xic> msrunxic_sp = std::make_shared<Xic>(Xic());
99 
100  std::vector<MsRunSliceSPtr> slice_list;
101  slice_list = acquireSlices(mz_range);
102 
103  if(slice_list.size() == 0)
104  {
106  QObject::tr("Error getMsRunXicSp slice_list.size() == 0"));
107  }
108 
109  for(std::size_t i = 0; i < m_retentionTimeList.size(); i++)
110  {
111 
112  DataPoint xic_element;
113  xic_element.x = m_retentionTimeList[i];
114  xic_element.y = 0;
115  if((xic_element.x < rt_begin) || (xic_element.x > rt_end))
116  continue;
117 
118  for(auto &&msrun_slice : slice_list)
119  {
120  const MassSpectrum &spectrum = msrun_slice.get()->getSpectrum(i);
121  for(auto &&peak : spectrum)
122  {
123  if(mz_range.contains(peak.x))
124  {
126  {
127  xic_element.y += peak.y;
128  }
129  else
130  {
131  if(xic_element.y < peak.y)
132  {
133  xic_element.y = peak.y;
134  }
135  }
136  }
137  }
138  }
139  msrunxic_sp.get()->push_back(xic_element);
140  }
141 
142  return (msrunxic_sp);
143 }
144 
145 std::vector<XicCstSPtr>
147  const std::vector<MzRange> &mz_range_list)
148 {
149 
150  std::vector<XicCstSPtr> xic_list_return;
151  for(auto &range : mz_range_list)
152  {
153  xic_list_return.push_back(getXicCstSPtr(range, 0, 40000000));
154  }
155  return xic_list_return;
156 }
157 
158 void
160 {
161  qDebug();
162  m_minMz = 5000;
163  m_maxMz = 0;
164 
165  unsigned int slice_number;
166  std::map<unsigned int, MassSpectrum> spectrum_map;
167 
168  /*
169  const pwiz::msdata::SpectrumList *p_spectrum_list =
170  p_msdatafile->run.spectrumListPtr.get();
171 
172  std::size_t spectrum_list_size = p_spectrum_list->size();
173  pwiz::msdata::SpectrumPtr pwiz_spectrum;
174  */
175 
176  m_rtSize = m_msrun_points.size();
177 
178 
179  MassSpectrumCstSPtr spectrum;
180  for(auto &&msrun_point : m_msrun_points)
181  {
182 
183  spectrum_map.clear();
184 
185  m_retentionTimeList.push_back(msrun_point.rt);
186 
187  spectrum =
188  msp_msrun_reader.get()->massSpectrumCstSPtr(msrun_point.spectrum_index);
189 
190  const MassSpectrum *p_spectrum = spectrum.get();
191  if(p_spectrum->size() > 0)
192  {
193  if(p_spectrum->begin()->x < m_minMz)
194  {
195  m_minMz = p_spectrum->begin()->x;
196  }
197  // iterate through the m/z-intensity pairs
198 
199  if(p_spectrum->back().x > m_maxMz)
200  {
201  m_maxMz = p_spectrum->back().x;
202  }
203 
204  for(auto &peak : *p_spectrum)
205  {
206 
207  slice_number = peak.x;
208 
209  std::pair<std::map<unsigned int, MassSpectrum>::iterator, bool>
210  ret = spectrum_map.insert(std::pair<unsigned int, MassSpectrum>(
211  slice_number, MassSpectrum()));
212 
213  ret.first->second.push_back(peak);
214  // auto ret = spectrum_map.insert(std::pair<unsigned int,
215  // MassSpectrum>(slice_number,MassSpectrum()));
216  // ret.first->second.push_back(peak);
217  }
218 
219  // slices are ready for this retention time
220  storeSlices(spectrum_map, m_retentionTimeList.size() - 1);
221  }
222  }
223 
224  endPwizRead();
225  qDebug();
226 }
227 
228 
229 void
231  std::map<unsigned int, MassSpectrum> &spectrum_map, std::size_t ipos)
232 {
233  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
234 
235  for(auto &&spectrum_pair : spectrum_map)
236  {
237  appendSliceOnDisk(spectrum_pair.first, spectrum_pair.second, ipos);
238  }
239 
240  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
241 }
242 
243 void
245  MassSpectrum &spectrum,
246  std::size_t ipos)
247 {
248  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
249  QFile slice_file(
250  QString("%1/%2").arg(mpa_temporaryDirectory->path()).arg(slice_number));
251  bool new_file = false;
252  if(!slice_file.exists())
253  {
254  new_file = true;
255  }
256  if(!slice_file.open(QIODevice::WriteOnly | QIODevice::Append))
257  {
259  QObject::tr("unable to open file %1").arg(slice_file.fileName()));
260  }
261  QDataStream stream(&slice_file);
262 
263  if(new_file)
264  {
265  stream << (quint32)slice_number;
266  stream << (quint32)m_rtSize;
267  }
268 
269  stream << (quint32)ipos;
270  stream << spectrum;
271 
272  slice_file.flush();
273  slice_file.close();
274  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
275 }
276 
278 MsRunXicExtractorDisk::unserializeSlice(unsigned int slice_number)
279 {
280  qDebug();
281  try
282  {
283  std::shared_ptr<MsRunSlice> msrun_slice_sp =
284  std::make_shared<MsRunSlice>(MsRunSlice());
285 
286  QFile slice_file(
287  QString("%1/%2").arg(mpa_temporaryDirectory->path()).arg(slice_number));
288  if(!slice_file.exists())
289  {
290  msrun_slice_sp.get()->setSize(m_rtSize);
291  msrun_slice_sp.get()->setSliceNumber(slice_number);
292  return msrun_slice_sp;
293  }
294  if(!slice_file.open(QIODevice::ReadOnly))
295  {
297  QObject::tr("unable to open file %1 in readonly")
298  .arg(slice_file.fileName()));
299  }
300  QDataStream stream(&slice_file);
301 
302  stream >> *(msrun_slice_sp.get());
303 
304  slice_file.close();
305 
306  return msrun_slice_sp;
307  }
308  catch(pappso::PappsoException &error)
309  {
311  QObject::tr("error unserializing slice %1:\n%2")
312  .arg(slice_number)
313  .arg(error.qwhat()));
314  }
315  qDebug();
316 }
317 
318 std::vector<MsRunSliceSPtr>
320 {
321  QMutexLocker lock(&m_mutex);
322  std::vector<MsRunSliceSPtr> slice_list;
323  for(unsigned int i = mz_range.lower(); i <= mz_range.upper(); i++)
324  {
325  auto it = std::find_if(m_msRunSliceListCache.begin(),
326  m_msRunSliceListCache.end(),
327  [i](const MsRunSliceSPtr &slice_sp) {
328  return slice_sp.get()->getSliceNumber() == i;
329  });
330  if(it != m_msRunSliceListCache.end())
331  {
332  slice_list.push_back(*it);
333  m_msRunSliceListCache.push_back(*it);
334  }
335  else
336  {
337  MsRunSliceSPtr slice_sp = unserializeSlice(i);
338  slice_list.push_back(slice_sp);
339  m_msRunSliceListCache.push_back(slice_sp);
340  }
341  }
342 
343  if(m_msRunSliceListCache.size() > 20)
344  {
345  m_msRunSliceListCache.pop_front();
346  }
347  return slice_list;
348 }
349 
350 
351 void
353 {
354  msp_msrun_reader.get()->releaseDevice();
355 }
356 } // namespace pappso
Class to represent a mass spectrum.
Definition: massspectrum.h:71
std::vector< MsRunSliceSPtr > acquireSlices(const MzRange &mz_range)
retrieve all the slices corresponding to a given mz_range
std::vector< pappso::pappso_double > m_retentionTimeList
MsRunXicExtractorDisk(MsRunReaderSPtr &msrun_reader)
std::deque< MsRunSliceSPtr > m_msRunSliceListCache
virtual void storeSlices(std::map< unsigned int, MassSpectrum > &slice_vector, std::size_t ipos)
store MassSpectrum slices (by daltons) for a given retention time
void appendSliceOnDisk(unsigned int slice_number, MassSpectrum &spectrum, std::size_t ipos)
append a slice on disk (in a file)
virtual std::vector< XicCstSPtr > getXicCstSPtrList(const std::vector< MzRange > &mz_range_list) override
extract a list of XIC given a list of mass to extract
virtual XicCstSPtr getXicCstSPtr(const MzRange &mz_range, pappso::pappso_double rt_begin, pappso::pappso_double rt_end) override
get a XIC on this MsRun at the given mass range
MsRunSliceSPtr unserializeSlice(unsigned int slice_number)
get one slice from disk by her slice number (dalton)
std::vector< MsRunXicExtractorPoints > m_msrun_points
pappso_double lower() const
Definition: mzrange.h:72
pappso_double upper() const
Definition: mzrange.h:78
bool contains(pappso_double) const
Definition: mzrange.cpp:102
virtual const QString & qwhat() const
MsRunReader based XIC extractor featuring disk cache.
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition: msrunreader.h:151
std::shared_ptr< const Xic > XicCstSPtr
Definition: xic.h:37
std::shared_ptr< const MsRunSlice > MsRunSliceSPtr
Definition: msrunslice.h:39
double pappso_double
A type definition for doubles.
Definition: types.h:48
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
@ sum
sum of intensities
pappso_double x
Definition: datapoint.h:22
pappso_double y
Definition: datapoint.h:23