libStatGen Software  1
SamQuerySeqWithRefHelper.h
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __SAM_QUERY_SEQ_WITH_REF_HELPER_H__
19 #define __SAM_QUERY_SEQ_WITH_REF_HELPER_H__
20 
21 #include <stdint.h>
22 
23 #include "SamRecord.h"
24 #include "GenomeSequence.h"
25 
26 /// This class contains the match/mismatch information
27 /// between the reference and a read for a single base.
29 {
30 public:
31  /// More types can be added later as needed.
32  enum Type {UNKNOWN, MATCH, MISMATCH};
33 
36 
37 
38  /// Get the type (match/mismatch/unknown) for this object.
39  Type getType();
40 
41  /// Get the query index for this object.
42  int32_t getQueryIndex();
43 
44  /// Set the type (match/mismatch/unkown) for this object.
45  void setType(Type newType);
46 
47  /// Set the query index for this object.
48  void setQueryIndex(int32_t queryIndex);
49 
50 private:
51  Type myType;
52  int32_t myQueryIndex;
53 };
54 
55 /// Iterates through the query and compare with reference.
56 /// NOTE: References to the GenomeSequence and SamRecord are stored, the objects
57 /// are not copied, so they must remain valid as long as this class is used.
59 {
60 public:
61  SamQuerySeqWithRefIter(SamRecord& record, GenomeSequence& refSequence,
62  bool forward = true);
63  virtual ~SamQuerySeqWithRefIter();
64 
65  /// Reset to start at the beginning of the record.
66  /// This will re-read values from SamRecord, so can be used if it has
67  /// changed to contain information for a new record.
68  /// \param forward true means to start from the beginning and go to the end;
69  /// false means to start from the end and go to the beginning.
70  /// \return true if successfully reset; false if failed to read the Cigar.
71  bool reset(bool forward = true);
72 
73  /// Returns information for the next position where the query and the
74  /// reference match or mismatch. To be a match or mismatch, both the query
75  /// and reference must have a base that is not 'N'.
76  /// This means:
77  /// insertions and deletions are not mismatches or matches.
78  /// 'N' bases are not matches or mismatches
79  /// \param matchMismatchInfo return parameter with the information about
80  /// the matching/mismatching base.
81  /// \return true if there was another match/mismatch
82  /// (matchMismatchInfo was set); false if not.
83  bool getNextMatchMismatch(SamSingleBaseMatchInfo& matchMismatchInfo);
84 
85 private:
86 
88 
89  void nextIndex();
90 
91  SamRecord& myRecord;
92  GenomeSequence& myRefSequence;
93  Cigar* myCigar;
94  uint32_t myStartOfReadOnRefIndex;
95  int32_t myQueryIndex;
96  bool myForward;
97 };
98 
99 
100 /// Contains methods for converting between the query sequence and reference.
102 {
103 public:
104  /// Gets the sequence with '=' in any position where the sequence matches
105  /// the reference.
106  /// NOTE: 'N' in both the sequence and the reference is not considered a
107  /// match.
108  /// \param currentSeq sequence that should be converted
109  /// \param seq0BasedPos 0 based start position of currentSeq on the reference.
110  /// \param cigar cigar string for currentSeq (used for determining how the sequence aligns to the reference)
111  /// \param referenceName reference name associated with this sequence
112  /// \param refSequence reference sequence object
113  /// \param updatedSeq return parameter that this method sets to the
114  /// current sequence, replacing any matches to the reference with '='.
115  static void seqWithEquals(const char* currentSeq,
116  int32_t seq0BasedPos,
117  Cigar& cigar,
118  const char* referenceName,
119  const GenomeSequence& refSequence,
120  std::string& updatedSeq);
121 
122  /// Gets the sequence converting '=' to the appropriate base using the
123  /// reference.
124  /// \param currentSeq sequence that should be converted
125  /// \param seq0BasedPos 0 based start position of currentSeq on the reference.
126  /// \param cigar cigar string for currentSeq (used for determining how the sequence aligns to the reference)
127  /// \param referenceName reference name associated with this sequence
128  /// \param refSequence reference sequence object
129  /// \param updatedSeq return parameter that this method sets to the
130  /// current sequence, replacing any '=' with the base from the reference.
131  static void seqWithoutEquals(const char* currentSeq,
132  int32_t seq0BasedPos,
133  Cigar& cigar,
134  const char* referenceName,
135  const GenomeSequence& refSequence,
136  std::string& updatedSeq);
137 
138 private:
140 };
141 #endif
Cigar
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
Definition: Cigar.h:83
SamSingleBaseMatchInfo::setType
void setType(Type newType)
Set the type (match/mismatch/unkown) for this object.
Definition: SamQuerySeqWithRefHelper.cpp:230
SamQuerySeqWithRefIter::reset
bool reset(bool forward=true)
Reset to start at the beginning of the record.
Definition: SamQuerySeqWithRefHelper.cpp:58
SamSingleBaseMatchInfo::setQueryIndex
void setQueryIndex(int32_t queryIndex)
Set the query index for this object.
Definition: SamQuerySeqWithRefHelper.cpp:236
SamQuerySeqWithRefIter::getNextMatchMismatch
bool getNextMatchMismatch(SamSingleBaseMatchInfo &matchMismatchInfo)
Returns information for the next position where the query and the reference match or mismatch.
Definition: SamQuerySeqWithRefHelper.cpp:102
GenomeSequence
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
Definition: GenomeSequence.h:99
SamSingleBaseMatchInfo
This class contains the match/mismatch information between the reference and a read for a single base...
Definition: SamQuerySeqWithRefHelper.h:28
SamSingleBaseMatchInfo::getQueryIndex
int32_t getQueryIndex()
Get the query index for this object.
Definition: SamQuerySeqWithRefHelper.cpp:224
SamQuerySeqWithRef
Contains methods for converting between the query sequence and reference.
Definition: SamQuerySeqWithRefHelper.h:101
SamSingleBaseMatchInfo::getType
Type getType()
Get the type (match/mismatch/unknown) for this object.
Definition: SamQuerySeqWithRefHelper.cpp:218
SamRecord
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:51
SamQuerySeqWithRefIter
Iterates through the query and compare with reference.
Definition: SamQuerySeqWithRefHelper.h:58
SamSingleBaseMatchInfo::Type
Type
More types can be added later as needed.
Definition: SamQuerySeqWithRefHelper.h:32
SamQuerySeqWithRef::seqWithEquals
static void seqWithEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence with '=' in any position where the sequence matches the reference.
Definition: SamQuerySeqWithRefHelper.cpp:243
SamQuerySeqWithRef::seqWithoutEquals
static void seqWithoutEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence converting '=' to the appropriate base using the reference.
Definition: SamQuerySeqWithRefHelper.cpp:296