libStatGen Software  1
SamValidation.h
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __SAM_VALIDATION_H__
19 #define __SAM_VALIDATION_H__
20 
21 #include "SamFile.h"
22 #include <list>
23 
24 // On windows, ERROR and WARNING are pre-defined macros, so undefine them.
25 #ifdef WARNING
26 #undef WARNING
27 #endif
28 #ifdef ERROR
29 #undef ERROR
30 #endif
31 
32 /// The SamValidationError class describes a validation error that occured,
33 /// containing the error type, severity, and textual error message.
35 {
36 public:
37  /// Severity of the error.
38  enum Severity
39  {
40  WARNING, ///< Warning is used if it is just an invalid value.
41  ERROR ///< Error is used if parsing could not succeed.
42  };
43 
44  /// Type of the error.
45  /// TODO: NOT ALL INVALID TYPES HAVE BEEN ADDED SINCE NOT ALL VALIDATION
46  /// IS COMPLETE YET
47  enum Type
48  {
49  INVALID_QNAME, ///< Invalid read/query name
50  INVALID_REF_ID, ///< Invalid reference id
51  INVALID_RNAME, ///< Invalid reference name
52  INVALID_POS, ///< Invalid position
53  INVALID_MAPQ, ///< Invalid mapping quality
54  INVALID_CIGAR, ///< Invalid CIGAR
55  INVALID_MRNM, ///< Invalid mate/next fragment reference name
56  INVALID_QUAL, ///< Invalid base quality
57  INVALID_TAG ///< Invalid tag
58  };
59 
60  /// Get the string representing the specified type of validation error.
61  static const char* getTypeString(Type type);
62 
63  /// Constructor that sets the type, severity, and message for the
64  /// validation error.
65  SamValidationError(Type type, Severity severity, std::string Message);
66 
67  /// Return the type enum of this validation error object.
68  Type getType() const;
69 
70  /// Return the severity enum of this validation error object.
71  Severity getSeverity() const;
72 
73  /// Return the error message of this validation error object.
74  const char* getMessage() const;
75 
76  /// Return the string representing this object's type of validation error.
77  const char* getTypeString() const;
78 
79  /// Return the string representing this object's severity of validation
80  /// error.
81  const char* getSeverityString() const;
82 
83  /// Get the error string representing this object's error.
84  void getErrorString(std::string& errorString) const;
85 
86  /// Print a formatted output of the error to cerr.
87  void printError() const;
88 
89 private:
91 
92  static const char* enumTypeString[];
93  static const char* enumSeverityString[];
94 
95  Type myType;
96  Severity mySeverity;
97  std::string myMessage;
98 
99 };
100 
101 
102 /// stream output for validation failure information
103 inline std::ostream &operator << (std::ostream &stream,
104  const SamValidationError &error)
105 {
106  std::string errorMessage;
107  error.getErrorString(errorMessage);
108  stream << errorMessage;
109  return stream;
110 }
111 
112 
113 /// The SamValidationErrors class is a container class that holds
114 /// SamValidationError Objects, allowing a validation method to return all
115 /// of the invalid errors rather than just one.
117 {
118 public:
119  /// Constructor.
121  /// Destructor
123 
124  /// Remove all the errors from the container.
125  void clear();
126 
127  /// Add the specified error to this container.
128  void addError(SamValidationError::Type newType,
129  SamValidationError::Severity newSeverity,
130  const char* newMessage);
131 
132  /// Return the number of validation errors contained in this object.
133  unsigned int numErrors();
134 
135  /// Return a pointer to the next error without removing it from the
136  /// container, and returning null once all errors have been retrieved
137  /// until resetErrorIter is called.
139 
140  /// Reset the iterator to the begining of the errors.
141  void resetErrorIter();
142 
143  /// Append the error messages contained in this container to the passed
144  /// in string.
145  void getErrorString(std::string& errorString) const;
146 
147 private:
148  std::list<const SamValidationError*> myValidationErrors;
149  std::list<const SamValidationError*>::const_iterator myErrorIter;
150 };
151 
152 
153 /// stream output for all validation failures information
154 inline std::ostream& operator << (std::ostream& stream,
155  const SamValidationErrors& errors)
156 {
157  std::string errorString = "";
158  errors.getErrorString(errorString);
159  stream << errorString;
160  return stream;
161 }
162 
163 
164 /// The SamValidator class contains static methods for validating the SAM/BAM
165 /// Record and each of its fields. The generic isValid method performs all of
166 /// the other validations. The SamValidator methods return whether or not what
167 /// is being validated is valid. True means it is valid, false means it is not.
168 /// The specifics of the invalid value(s) are contained in the
169 /// SamValidationErrors object that is passed in (by reference) to the method.
170 /// The specific errors can be pulled out of that object.
171 /// TODO: VALIDATION METHODS STILL NEED TO BE ADDED, and isValid does not yet
172 /// validate all fields!!!
174 {
175 public:
176 
177  /// Validates whether or not the specified SamRecord is valid, calling
178  /// all of the other validations.
179  /// TODO: more validation needs to be added.
180  /// \param samHeader header associated with the record to be validated.
181  /// \param samRecord record to be validated.
182  /// \param validationErrors status to append any errors too.
183  /// \return true if it is valid, false and appends to SamValidationErrors
184  /// if it is not
185  static bool isValid(SamFileHeader& samHeader, SamRecord& samRecord,
186  SamValidationErrors& validationErrors);
187 
188  /// Determines whether or not the specified qname is valid.
189  /// Validation for QNAME is:
190  /// a) length of the qname string is the same as the read name length
191  /// b) length is between 1 and 254.
192  /// c) [ \t\n\r] are not allowed in the name.
193  /// \param qname the read/query name.
194  /// \param qnameLen length of the read including the null (result of
195  /// SamRecord::getReadNameLength().
196  /// \param validationErrors status to append any errors too.
197  /// \return true if it is valid, false and appends to SamValidationErrors
198  /// if it is not
199  static bool isValidQname(const char* qname, uint8_t qnameLen,
200  SamValidationErrors& validationErrors);
201 
202  /// Determines whether or not the flag is valid.
203  /// TODO: currently no validation is done on the flag.
204  /// \param flag flag to be validated.
205  /// \param validationErrors status to append any errors too.
206  /// \return true if it is valid, false and appends to SamValidationErrors
207  /// if it is not
208  static bool isValidFlag(uint16_t flag,
209  SamValidationErrors& validationErrors);
210 
211  /// Validate the reference name including validating against the header.
212  /// 1) Cross validate the rname and the header.
213  /// 2) perform the validation in the method that doesn't take the header.
214  /// \param samHeader header associated with the rname to be validated.
215  /// \param rname reference name to be validated.
216  /// \param validationErrors status to append any errors too.
217  /// \return true if it is valid, false and appends to SamValidationErrors
218  /// if it is not
219  static bool isValidRname(SamFileHeader& samHeader,
220  const char* rname,
221  SamValidationErrors& validationErrors);
222  /// Validate the rname without validating against the header.
223  /// Validation for RNAME is:
224  /// a) cannot be 0 length.
225  /// b) [ \t\n\r@=] are not allowed in the name.
226  /// \param rname reference name to be validated.
227  /// \param validationErrors status to append any errors too.
228  /// \return true if it is valid, false and appends to SamValidationErrors
229  /// if it is not
230  static bool isValidRname(const char* rname,
231  SamValidationErrors& validationErrors);
232 
233  /// Validate whether or not the specified reference id is valid.
234  /// Validation for rID is:
235  /// a) must be between -1 and the number of refInfo.
236  /// -1 is allowed, and otherwise it must properly index into the array.
237  /// \param refID reference id to be validated.
238  /// \param refInfo sam reference information containing the mapping
239  /// from reference id to reference name for this refID.
240  /// \param validationErrors status to append any errors too.
241  /// \return true if it is valid, false and appends to SamValidationErrors
242  /// if it is not
243  static bool isValidRefID(int32_t refID, const SamReferenceInfo& refInfo,
244  SamValidationErrors& validationErrors);
245 
246  /// Validate the refeference position.
247  /// Validation for pos is:
248  /// a) must be between 0 and (2^29)-1.
249  /// \param pos position to be validated.
250  /// \param validationErrors status to append any errors too.
251  /// \return true if it is valid, false and appends to SamValidationErrors
252  /// if it is not
253  static bool isValid1BasedPos(int32_t pos,
254  SamValidationErrors& validationErrors);
255 
256  /// Validate the mapping quality.
257  /// TODO: currently no validation is done on the mapping quality.
258  /// \param mapQuality mapping quality to be validated.
259  /// \param validationErrors status to append any errors too.
260  /// \return true if it is valid, false and appends to SamValidationErrors
261  /// if it is not
262  static bool isValidMapQuality(uint8_t mapQuality,
263  SamValidationErrors& validationErrors);
264 
265  /// Validate the sequence, but not against the cigar or quality string.
266  /// Validation against cigar is done in isValidCigar.
267  /// Validation against the quality string is done in isValidQuality.
268  /// TODO: currently no validation is done in this method.
269  /// \param samRecord record whose sequence should be validated.
270  /// \param validationErrors status to append any errors too.
271  /// \return true if it is valid, false and appends to SamValidationErrors
272  /// if it is not
273  static bool isValidSequence(SamRecord& samRecord,
274  SamValidationErrors& validationErrors);
275 
276  /// Validate the cigar. Cigar validation depends on sequence.
277  /// Validation for CIGAR is:
278  /// a) cannot be 0 length.
279  /// if not "*", validate the following:
280  /// b) must have an integer length for each operator (if not "*"). TODO
281  /// c) all operators must be valid (if not "*"). TODO
282  /// d) evaluates to the same read length as the sequence string.
283  /// \param samRecord record whose cigar should be validated.
284  /// \param validationErrors status to append any errors too.
285  /// \return true if it is valid, false and appends to SamValidationErrors
286  /// if it is not
287  static bool isValidCigar(SamRecord& samRecord,
288  SamValidationErrors& validationErrors);
289 
290  /// Validate the cigar. Cigar validation depends on sequence.
291  /// Validation for CIGAR is:
292  /// a) cannot be 0 length.
293  /// if not "*", validate the following:
294  /// b) must have an integer length for each operator (if not "*"). TODO
295  /// c) all operators must be valid (if not "*"). TODO
296  /// d) evaluates to the same read length as the sequence string.
297  /// \param cigar cigar string to be validated.
298  /// \param sequence sequence to check the cigar against.
299  /// \param validationErrors status to append any errors too.
300  /// \return true if it is valid, false and appends to SamValidationErrors
301  /// if it is not
302  static bool isValidCigar(const char* cigar, const char* sequence,
303  SamValidationErrors& validationErrors);
304 
305  /// Validate the cigar. Cigar validation depends on sequence.
306  /// Validation for CIGAR is:
307  /// a) cannot be 0 length.
308  /// if not "*", validate the following:
309  /// b) TODO: must have an integer length for each operator (if not "*").
310  /// c) TODO: all operators must be valid (if not "*").
311  /// d) evaluates to the same read length as the sequence string.
312  /// \param cigar cigar string to be validated.
313  /// \param seqLen sequence length to check the cigar against.
314  /// \param validationErrors status to append any errors too.
315  /// \return true if it is valid, false and appends to SamValidationErrors
316  /// if it is not
317  static bool isValidCigar(const char* cigar,
318  int seqLen,
319  SamValidationErrors& validationErrors);
320 
321  /// TODO: validate the mate/next fragment's reference name.
322  /// \return true if it is valid, false and appends to SamValidationErrors
323  /// if it is not
324  static bool isValidMrnm();
325 
326  /// TODO: validate the mate/next fragment's position.
327  /// \return true if it is valid, false and appends to SamValidationErrors
328  /// if it is not
329  static bool isValidMpos();
330 
331  /// TODO: validate the insertion size/observed template length.
332  /// \return true if it is valid, false and appends to SamValidationErrors
333  /// if it is not
334  static bool isValidIsize();
335 
336  /// TODO, validate the sequence.
337  /// \return true if it is valid, false and appends to SamValidationErrors
338  /// if it is not
339  static bool isValidSeq();
340 
341  /// Validate the base quality.
342  /// Quality validation depends on sequence.
343  /// Validation for quality is:
344  /// a) quality & sequence are the same length if both are specified.
345  /// TODO: more validation.
346  /// \param samRecord record whose quality should be validated.
347  /// \param validationErrors status to append any errors too.
348  /// \return true if it is valid, false and appends to SamValidationErrors
349  /// if it is not
350  static bool isValidQuality(SamRecord& samRecord,
351  SamValidationErrors& validationErrors);
352 
353  /// Validate the base quality.
354  /// Quality validation depends on sequence.
355  /// Validation for quality is:
356  /// a) quality & sequence are the same length if both are specified.
357  /// TODO: more validation.
358  /// \param quality quality string to be validated.
359  /// \param seqLen sequence length to check the quality against.
360  /// \param validationErrors status to append any errors too.
361  /// \return true if it is valid, false and appends to SamValidationErrors
362  /// if it is not
363  static bool isValidQuality(const char* quality, const char* sequence,
364  SamValidationErrors& validationErrors);
365 
366  /// Validate the base quality.
367  /// Quality validation depends on sequence.
368  /// Validation for quality is:
369  /// a) quality & sequence are the same length if both are specified.
370  /// TODO: more validation.
371  /// \param quality quality string to be validated.
372  /// \param seqLen sequence length to check the quality against.
373  /// \param validationErrors status to append any errors too.
374  /// \return true if it is valid, false and appends to SamValidationErrors
375  /// if it is not
376  bool static isValidQuality(const char* quality,
377  int seqLength,
378  SamValidationErrors& validationErrors);
379 
380  /// Validate the tags.
381  /// Validation for tags is:
382  /// a) check that the "MD" tag is correct if it is present.
383  /// TODO: more validation.
384  /// \param samRecord record whose tags should be validated.
385  /// \param validationErrors status to append any errors too.
386  /// \return true if it is valid, false and appends to SamValidationErrors
387  /// if it is not
388  static bool isValidTags(SamRecord& samRecord,
389  SamValidationErrors& validationErrors);
390 
391  /// TODO validate the tag vtype
392  /// \return true if it is valid, false and appends to SamValidationErrors
393  /// if it is not
394  static bool isValidVtype();
395 
396  /// TODO validate the tag vtype
397  /// \return true if it is valid, false and appends to SamValidationErrors
398  /// if it is not
399  static bool isValidValue();
400 };
401 
402 
403 #endif
SamValidationError::getType
Type getType() const
Return the type enum of this validation error object.
Definition: SamValidation.cpp:54
SamValidator::isValidValue
static bool isValidValue()
TODO validate the tag vtype.
SamValidationError::WARNING
@ WARNING
Warning is used if it is just an invalid value.
Definition: SamValidation.h:40
SamValidationError::INVALID_CIGAR
@ INVALID_CIGAR
Invalid CIGAR.
Definition: SamValidation.h:54
SamValidationError::INVALID_QNAME
@ INVALID_QNAME
Invalid read/query name.
Definition: SamValidation.h:49
SamValidationError::INVALID_QUAL
@ INVALID_QUAL
Invalid base quality.
Definition: SamValidation.h:56
SamValidationError::Severity
Severity
Severity of the error.
Definition: SamValidation.h:38
SamValidator::isValidCigar
static bool isValidCigar(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the cigar.
Definition: SamValidation.cpp:543
SamValidationError::INVALID_POS
@ INVALID_POS
Invalid position.
Definition: SamValidation.h:52
SamValidationError::getErrorString
void getErrorString(std::string &errorString) const
Get the error string representing this object's error.
Definition: SamValidation.cpp:84
SamValidator::isValidVtype
static bool isValidVtype()
TODO validate the tag vtype.
SamReferenceInfo
Class for tracking the reference information mapping between the reference ids and the reference name...
Definition: SamReferenceInfo.h:27
SamValidator::isValidSeq
static bool isValidSeq()
TODO, validate the sequence.
SamValidator::isValidMrnm
static bool isValidMrnm()
TODO: validate the mate/next fragment's reference name.
SamValidationErrors
The SamValidationErrors class is a container class that holds SamValidationError Objects,...
Definition: SamValidation.h:116
SamValidationErrors::resetErrorIter
void resetErrorIter()
Reset the iterator to the begining of the errors.
Definition: SamValidation.cpp:173
SamValidationErrors::~SamValidationErrors
~SamValidationErrors()
Destructor.
Definition: SamValidation.cpp:111
SamValidationError::printError
void printError() const
Print a formatted output of the error to cerr.
Definition: SamValidation.cpp:95
SamValidator::isValidFlag
static bool isValidFlag(uint16_t flag, SamValidationErrors &validationErrors)
Determines whether or not the flag is valid.
Definition: SamValidation.cpp:340
SamValidationErrors::SamValidationErrors
SamValidationErrors()
Constructor.
Definition: SamValidation.cpp:103
SamValidationError::getMessage
const char * getMessage() const
Return the error message of this validation error object.
Definition: SamValidation.cpp:66
SamValidationErrors::getNextError
const SamValidationError * getNextError()
Return a pointer to the next error without removing it from the container, and returning null once al...
Definition: SamValidation.cpp:160
SamValidationError::getSeverityString
const char * getSeverityString() const
Return the string representing this object's severity of validation error.
Definition: SamValidation.cpp:78
SamValidationErrors::addError
void addError(SamValidationError::Type newType, SamValidationError::Severity newSeverity, const char *newMessage)
Add the specified error to this container.
Definition: SamValidation.cpp:132
SamValidationError::Type
Type
Type of the error.
Definition: SamValidation.h:47
SamValidationError::ERROR
@ ERROR
Error is used if parsing could not succeed.
Definition: SamValidation.h:41
SamValidator::isValidRefID
static bool isValidRefID(int32_t refID, const SamReferenceInfo &refInfo, SamValidationErrors &validationErrors)
Validate whether or not the specified reference id is valid.
Definition: SamValidation.cpp:476
SamValidator::isValidRname
static bool isValidRname(SamFileHeader &samHeader, const char *rname, SamValidationErrors &validationErrors)
Validate the reference name including validating against the header.
Definition: SamValidation.cpp:348
SamValidationError::INVALID_RNAME
@ INVALID_RNAME
Invalid reference name.
Definition: SamValidation.h:51
SamValidationError::INVALID_MAPQ
@ INVALID_MAPQ
Invalid mapping quality.
Definition: SamValidation.h:53
SamValidator::isValid
static bool isValid(SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors)
Validates whether or not the specified SamRecord is valid, calling all of the other validations.
Definition: SamValidation.cpp:195
SamValidator
The SamValidator class contains static methods for validating the SAM/BAM Record and each of its fiel...
Definition: SamValidation.h:173
SamValidationError::getSeverity
Severity getSeverity() const
Return the severity enum of this validation error object.
Definition: SamValidation.cpp:60
SamValidationError::INVALID_MRNM
@ INVALID_MRNM
Invalid mate/next fragment reference name.
Definition: SamValidation.h:55
SamValidationError
The SamValidationError class describes a validation error that occured, containing the error type,...
Definition: SamValidation.h:34
SamValidator::isValidMapQuality
static bool isValidMapQuality(uint8_t mapQuality, SamValidationErrors &validationErrors)
Validate the mapping quality.
Definition: SamValidation.cpp:528
SamValidator::isValidIsize
static bool isValidIsize()
TODO: validate the insertion size/observed template length.
SamFileHeader
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:34
SamRecord
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:51
SamValidationError::getTypeString
const char * getTypeString() const
Return the string representing this object's type of validation error.
Definition: SamValidation.cpp:72
SamValidator::isValidQuality
static bool isValidQuality(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the base quality.
Definition: SamValidation.cpp:611
SamValidator::isValidQname
static bool isValidQname(const char *qname, uint8_t qnameLen, SamValidationErrors &validationErrors)
Determines whether or not the specified qname is valid.
Definition: SamValidation.cpp:238
SamValidationErrors::clear
void clear()
Remove all the errors from the container.
Definition: SamValidation.cpp:117
SamValidationError::INVALID_REF_ID
@ INVALID_REF_ID
Invalid reference id.
Definition: SamValidation.h:50
SamValidator::isValidMpos
static bool isValidMpos()
TODO: validate the mate/next fragment's position.
SamValidator::isValidSequence
static bool isValidSequence(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the sequence, but not against the cigar or quality string.
Definition: SamValidation.cpp:536
SamValidationErrors::getErrorString
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
Definition: SamValidation.cpp:180
SamValidationError::INVALID_TAG
@ INVALID_TAG
Invalid tag.
Definition: SamValidation.h:57
SamValidator::isValidTags
static bool isValidTags(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the tags.
Definition: SamValidation.cpp:669
operator<<
InputFile & operator<<(InputFile &stream, const std::string &str)
Write to a file using streaming.
Definition: InputFile.h:736
SamValidator::isValid1BasedPos
static bool isValid1BasedPos(int32_t pos, SamValidationErrors &validationErrors)
Validate the refeference position.
Definition: SamValidation.cpp:504
SamValidationErrors::numErrors
unsigned int numErrors()
Return the number of validation errors contained in this object.
Definition: SamValidation.cpp:151