libStatGen Software  1
CigarRoller.h
1 /*
2  * Copyright (C) 2010-2011 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #if !defined(_CIGAR_ROLLER_H)
19 #define _CIGAR_ROLLER_H
20 
21 #include "Cigar.h"
22 
23 /// The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object. It is a child class of Cigar.
24 
25 ///
26 /// Docs from Sam1.pdf:
27 ///
28 /// Clipped alignment. In Smith-Waterman alignment, a sequence may not be aligned from the first residue to the last one.
29 /// Subsequences at the ends may be clipped off. We introduce operation ʻSʼ to describe (softly) clipped alignment. Here is
30 /// an example. Suppose the clipped alignment is:
31 /// REF: AGCTAGCATCGTGTCGCCCGTCTAGCATACGCATGATCGACTGTCAGCTAGTCAGACTAGTCGATCGATGTG
32 /// READ: gggGTGTAACC-GACTAGgggg
33 /// where on the read sequence, bases in uppercase are matches and bases in lowercase are clipped off. The CIGAR for
34 /// this alignment is: 3S8M1D6M4S.
35 ///
36 ///
37 /// If the mapping position of the query is not available, RNAME and
38 /// CIGAR are set as “*”
39 ///
40 /// A CIGAR string is comprised of a series of operation lengths plus the operations. The conventional CIGAR format allows
41 /// for three types of operations: M for match or mismatch, I for insertion and D for deletion. The extended CIGAR format
42 /// further allows four more operations, as is shown in the following table, to describe clipping, padding and splicing:
43 ///
44 /// op Description
45 /// -- -----------
46 /// M Match or mismatch
47 /// I Insertion to the reference
48 /// D Deletion from the reference
49 /// N Skipped region from the reference
50 /// S Soft clip on the read (clipped sequence present in <seq>)
51 /// H Hard clip on the read (clipped sequence NOT present in <seq>)
52 /// P Padding (silent deletion from the padded reference sequence)
53 ///
54 
55 
56 
57 ////////////////////////////////////////////////////////////////////////
58 ///
59 /// CigarRoller is an aid to correctly generating the CIGAR strings
60 /// necessary to represent how a read maps to the reference.
61 ///
62 /// It is called once a particular match candidate is being written
63 /// out, so it is far less performance sensitive than the Smith Waterman
64 /// code below.
65 ///
66 class CigarRoller : public Cigar
67 {
68 public:
69 
70  ////////////////////////////////////////////////////////////////////////
71  //
72  // Cigar Roller Class
73  //
74  /// Writes all of the cigar operations contained in this roller to the
75  /// passed in stream.
76  friend std::ostream &operator << (std::ostream &stream, const CigarRoller& roller);
77 
78  /// Default constructor initializes as a CIGAR with no operations.
80  {
81  clearQueryAndReferenceIndexes();
82  }
83 
84  /// Constructor that initializes the object with the specified cigarString.
85  CigarRoller(const char *cigarString)
86  {
87  Set(cigarString);
88  }
89 
90  /// Add the contents of the specified CigarRoller to this object.
92 
93  /// Append the specified operator to this object.
94  CigarRoller & operator += (const CigarOperator &rhs);
95 
96  /// Set this object to be equal to the specified CigarRoller.
98 
99  /// Append the specified operation with the specified count to this object.
100  void Add(Operation operation, int count);
101 
102  /// Append the specified operation with the specified count to this object.
103  void Add(char operation, int count);
104 
105  /// Append the specified cigarString to this object.
106  void Add(const char *cigarString);
107 
108  /// Append the specified Cigar object to this object.
109  void Add(CigarRoller &rhs)
110  {
111  (*this) += rhs;
112  }
113 
114  /// Remove the operation at the specified index.
115  /// \return true if successfully removed, false if not.
116  bool Remove(int index);
117 
118  /// Increments the count for the operation at the specified index
119  /// by the specified value, specify a negative value to decrement.
120  /// \return true if it is successfully incremented, false if not.
121  bool IncrementCount(int index, int increment);
122 
123  /// Updates the operation at the specified index to be the specified
124  /// operation and have the specified count.
125  /// \return true if it is successfully updated, false if not.
126  bool Update(int index, Operation op, int count);
127 
128  /// Sets this object to the specified cigarString.
129  void Set(const char *cigarString);
130 
131  /// Sets this object to the BAM formatted cigar found at the beginning
132  /// of the specified buffer which is bufferLen long.
133  void Set(const uint32_t* cigarBuffer, uint16_t bufferLen);
134 
135  //
136  // when we examine CIGAR strings, we need to know how
137  // many cumulative insert and delete positions there are
138  // so that we can adjust the read location appropriately.
139  //
140  // Here, we iterate over the vector of CIGAR operations,
141  // summaring the count for each insert or delete (insert
142  // increases the offset, delete decreases it).
143  //
144  // The use case for this is when we have a genome match
145  // position based on an index word other than the first one,
146  // and there is also a insert or delete between the beginning
147  // of the read and the index word. We can't simply report
148  // the match position without taking into account the indels,
149  // otherwise we'll be off by N where N is the sum of this
150  // indel count.
151  //
152  /// DEPRECATED - do not use, there are better ways to accomplish that by
153  /// using read lengths, reference lengths, span of the read, etc.
155 
156  /// Get the string reprentation of the Cigar operations in this object,
157  /// caller must delete the returned value.
158  const char *getString();
159 
160  /// Clear this object so that it has no Cigar Operations.
161  void clear();
162 
163 private:
164 };
165 
166 
167 inline std::ostream &operator << (std::ostream &stream, const CigarRoller& roller)
168 {
169  stream << roller.cigarOperations;
170  return stream;
171 }
172 
173 #endif
CigarRoller::Add
void Add(Operation operation, int count)
Append the specified operation with the specified count to this object.
Definition: CigarRoller.cpp:77
Cigar
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
Definition: Cigar.h:83
CigarRoller::CigarRoller
CigarRoller()
Default constructor initializes as a CIGAR with no operations.
Definition: CigarRoller.h:79
Cigar::Operation
Operation
Enum for the cigar operations.
Definition: Cigar.h:87
CigarRoller::clear
void clear()
Clear this object so that it has no Cigar Operations.
Definition: CigarRoller.cpp:325
CigarRoller::Set
void Set(const char *cigarString)
Sets this object to the specified cigarString.
Definition: CigarRoller.cpp:204
CigarRoller::CigarRoller
CigarRoller(const char *cigarString)
Constructor that initializes the object with the specified cigarString.
Definition: CigarRoller.h:85
CigarRoller::Remove
bool Remove(int index)
Remove the operation at the specified index.
Definition: CigarRoller.cpp:156
CigarRoller::Update
bool Update(int index, Operation op, int count)
Updates the operation at the specified index to be the specified operation and have the specified cou...
Definition: CigarRoller.cpp:187
CigarRoller::operator<<
friend std::ostream & operator<<(std::ostream &stream, const CigarRoller &roller)
Writes all of the cigar operations contained in this roller to the passed in stream.
Definition: CigarRoller.h:167
CigarRoller::Add
void Add(CigarRoller &rhs)
Append the specified Cigar object to this object.
Definition: CigarRoller.h:109
CigarRoller::getMatchPositionOffset
int getMatchPositionOffset()
DEPRECATED - do not use, there are better ways to accomplish that by using read lengths,...
Definition: CigarRoller.cpp:244
CigarRoller::getString
const char * getString()
Get the string reprentation of the Cigar operations in this object, caller must delete the returned v...
Definition: CigarRoller.cpp:272
CigarRoller::operator=
CigarRoller & operator=(CigarRoller &rhs)
Set this object to be equal to the specified CigarRoller.
Definition: CigarRoller.cpp:66
CigarRoller
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
Definition: CigarRoller.h:66
CigarRoller::operator+=
CigarRoller & operator+=(CigarRoller &rhs)
Add the contents of the specified CigarRoller to this object.
Definition: CigarRoller.cpp:29
CigarRoller::IncrementCount
bool IncrementCount(int index, int increment)
Increments the count for the operation at the specified index by the specified value,...
Definition: CigarRoller.cpp:171
operator<<
InputFile & operator<<(InputFile &stream, const std::string &str)
Write to a file using streaming.
Definition: InputFile.h:736