21 #include "SamFilter.h"
23 #include "SamQuerySeqWithRefHelper.h"
24 #include "BaseUtilities.h"
29 double mismatchThreshold)
39 const int32_t initialLastFrontClipPos = -1;
40 int32_t lastFrontClipPos = initialLastFrontClipPos;
42 int32_t firstBackClipPos = readLength;
44 bool fromFrontComplete =
false;
45 bool fromBackComplete =
false;
46 int32_t numBasesFromFront = 0;
47 int32_t numBasesFromBack = 0;
48 int32_t numMismatchFromFront = 0;
49 int32_t numMismatchFromBack = 0;
53 while(!fromFrontComplete || !fromBackComplete)
57 while(!fromFrontComplete &&
58 ((numBasesFromFront <= numBasesFromBack) ||
64 fromFrontComplete =
true;
72 fromFrontComplete =
true;
79 if(baseMatchInfo.
getType() == SamSingleBaseMatchInfo::MISMATCH)
82 ++numMismatchFromFront;
84 double mismatchPercent =
85 (double)numMismatchFromFront / numBasesFromFront;
86 if(mismatchPercent > mismatchThreshold)
91 numBasesFromFront = 0;
92 numMismatchFromFront = 0;
99 while(!fromBackComplete &&
100 ((numBasesFromBack <= numBasesFromFront) ||
101 (fromFrontComplete)))
106 fromBackComplete =
true;
114 fromBackComplete =
true;
121 if(baseMatchInfo.
getType() == SamSingleBaseMatchInfo::MISMATCH)
124 ++numMismatchFromBack;
126 double mismatchPercent =
127 (double)numMismatchFromBack / numBasesFromBack;
128 if(mismatchPercent > mismatchThreshold)
133 numBasesFromBack = 0;
134 numMismatchFromBack = 0;
150 return(
softClip(record, lastFrontClipPos + 1, readLength - firstBackClipPos));
156 int32_t numFrontClips,
157 int32_t numBackClips)
165 status =
softClip(*cigar, numFrontClips, numBackClips,
166 startPos, updatedCigar);
192 int32_t numFrontClips,
193 int32_t numBackClips,
198 int32_t endClipPos = readLength - numBackClips;
201 if((numFrontClips != 0) || (numBackClips != 0))
206 int32_t totalClips = numFrontClips + numBackClips;
207 if(totalClips >= readLength)
219 int origCigarOpIndex = 0;
224 int32_t numPositions = 0;
227 bool onlyClips =
true;
233 while((origCigarOpIndex < oldCigar.
size()) &&
234 (numPositions < numFrontClips))
237 switch(op->operation)
256 numPositions += op->count;
268 if(numFrontClips != 0)
275 int32_t newCount = numPositions - numFrontClips;
283 if(numPositions > endClipPos)
285 newCount -= (numPositions - endClipPos);
289 updatedCigar.
Add(op->operation, newCount);
307 while((origCigarOpIndex < oldCigar.
size()) &&
308 (numPositions <= endClipPos))
337 uint32_t numPosTilClip = endClipPos - numPositions;
339 if(numPosTilClip < op->count)
343 if(numPosTilClip != 0)
345 updatedCigar.
Add(op->operation,
364 numPositions += op->count;
373 if(numBackClips != 0)
381 while(origCigarOpIndex < oldCigar.
size())
407 if(numFrontClips > 0)
415 int32_t lastFrontClipPos = numFrontClips - 1;
421 startPos = newStartPos + 1;
432 uint32_t qualityThreshold,
433 uint8_t defaultQualityInt)
435 uint32_t totalMismatchQuality =
440 if(totalMismatchQuality > qualityThreshold)
453 uint8_t defaultQualityInt)
456 int mismatchQual = 0;
464 if(baseMatchInfo.
getType() == SamSingleBaseMatchInfo::MISMATCH)
467 char readQualityChar =
469 uint8_t readQualityInt =
475 readQualityInt = defaultQualityInt;
477 mismatchQual += readQualityInt;
482 return(mismatchQual);
489 uint16_t flag = record.
getFlag();
493 flag &= ~
SamFlag::SECONDARY_ALIGNMENT;
494 flag &= ~
SamFlag::SUPPLEMENTARY_ALIGNMENT;
static void filterRead(SamRecord &record)
Filter the read by marking it as unmapped.
bool setMapQuality(uint8_t mapQuality)
Set the mapping quality (MAPQ).
void Add(Operation operation, int count)
Append the specified operation with the specified count to this object.
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
static bool isClip(Operation op)
Return true if the specified operation is a clipping operation, false if not.
static uint8_t getPhredBaseQuality(char charQuality)
Get phred base quality from the specified ascii quality.
@ none
no operation has been set.
@ CLIPPED
Filtering clipped the read.
bool getNextMatchMismatch(SamSingleBaseMatchInfo &matchMismatchInfo)
Returns information for the next position where the query and the reference match or mismatch.
static const uint8_t UNKNOWN_QUALITY_INT
Int value used when the quality is unknown.
@ insert
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
@ hardClip
Hard clip on the read (clipped sequence not present in the query sequence or reference)....
This class contains the match/mismatch information between the reference and a read for a single base...
static void setUnmapped(uint16_t &flag)
Mark the passed in flag as unmapped.
static const int32_t INDEX_NA
Value associated with an index that is not applicable/does not exist, used for converting between que...
uint16_t getFlag()
Get the flag (FLAG).
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.
@ NONE
The filter did not affect the read.
@ match
match/mismatch operation. Associated with CIGAR Operation "M"
@ FILTERED
Filtering caused the read to be modified to unmapped.
@ skip
skipped region from the reference (the reference contains bases that have no corresponding base in th...
@ softClip
Soft clip on the read (clipped sequence present in the query sequence, but not in reference)....
static bool foundInQuery(Operation op)
Return true if the specified operation is found in the query sequence, false if not.
int size() const
Return the number of cigar operations.
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
FilterStatus
Enum describing what sort of filtering was done.
static uint32_t sumMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint8_t defaultQualityInt)
Get the sum of the qualities of all mismatches in the record.
int32_t getQueryIndex()
Get the query index for this object.
Type getType()
Get the type (match/mismatch/unknown) for this object.
bool setFlag(uint16_t flag)
Set the bitwise FLAG to the specified value.
static FilterStatus clipOnMismatchThreshold(SamRecord &record, GenomeSequence &refSequence, double mismatchThreshold)
Clip the read based on the specified mismatch threshold.
Class for extracting information from a SAM Flag.
@ mismatch
mismatch operation. Associated with CIGAR Operation "M"
int32_t getReadLength()
Get the length of the read.
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
@ del
deletion from the reference (the reference contains bases that have no corresponding base in the quer...
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
Iterates through the query and compare with reference.
int getExpectedQueryBaseCount() const
Return the length of the read that corresponds to the current CIGAR string.
int32_t getRefPosition(int32_t queryIndex, int32_t queryStartPos)
Return the reference position associated with the specified query index or INDEX_NA based on this cig...
static FilterStatus filterOnMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint32_t qualityThreshold, uint8_t defaultQualityInt)
Filter the read based on the specified quality threshold.
const CigarOperator & getOperator(int i) const
Return the Cigar Operation at the specified index (starting at 0).
static FilterStatus softClip(SamRecord &record, int32_t numFrontClips, int32_t numBackClips)
Soft clip the record from the front and/or the back.