libStatGen Software  1
BaseAsciiMap.h
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef _BASE_ASCII_MAP_H
19 #define _BASE_ASCII_MAP_H
20 
21 #include "StringBasics.h"
22 
23 /// Map between characters and the associated base type.
25 {
26 public:
27  /// Value associated with 'N' in the ascii to base map (bad read).
28  static const int baseNIndex = 004;
29  /// Value associated with any non-base character in the ascii to base
30  /// map (unknown, bad data).
31  static const int baseXIndex = 005;
32 
33  // Two arrays for converting back and forth between base pair character
34  // value (ASCII) to a base integer in the range 0..3. Note there is actually
35  // a value 4 and 5, for 'N' (indelible) and 'M' (unknown to me).
36  //
37  /// Convert from int representation to the base.
38  static const char int2base[];
39  /// Convert from int representation to colorspace representation.
40  static const char int2colorSpace[];
41  static unsigned char base2complement[];
42 
43  /// The type of space (color or base) to use in the mapping.
44  enum SPACE_TYPE {
45  /// Base decision on the first raw seq character/type has yet
46  /// to be determined.
48  BASE_SPACE, ///< Bases only (A,C,G,T,N).
49  COLOR_SPACE ///< Color space only (0,1,2,3,.).
50  };
51 
52  /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
53  /// both base and color space.
54  /// 'A'/'a'/'0' -> 0; 'C'/'c'/'1' -> 1; 'G'/'g'/'2' -> 2; 'T'/'t'/'3' -> 3;
55  /// 'N'/'n'/'4' -> 4; anything else -> 5.
56  static unsigned char baseColor2int[256+1]; // base space read (ATCG)
57  /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
58  /// just base space (ACTGNactgn).
59  /// 'A'/'a' -> 0; 'C'/'c' -> 1; 'G'/'g' -> 2; 'T'/'t' -> 3;
60  /// 'N'/'n' -> 4; anything else -> 5.
61  static unsigned char base2int[256+1]; // base space read (ATCG)
62  /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
63  /// just color space (0123).
64  /// '0' -> 0; '1' -> 1; '2' -> 2; '3' -> 3; '4' -> 4; anything else -> 5.
65  static unsigned char color2int[256+1]; // base space read (ATCG)
66 
67 public:
68  BaseAsciiMap();
69  ~BaseAsciiMap();
70 
71  /// Set the base type based on the passed in option.
72  inline void setBaseMapType(SPACE_TYPE spaceType)
73  {
75  //First check to see if it is in base space.
76  switch (spaceType)
77  {
78  case BASE_SPACE:
79  // base space.
80  myBase2IntMapPtr = base2int;
81  break;
82  case COLOR_SPACE:
83  // color space.
84  myBase2IntMapPtr = color2int;
85  break;
86  default:
87  // Unknown map type, zero the pointer.
88  myBase2IntMapPtr = NULL;
89  break;
90  }
91  };
92 
93  /// Returns the baseIndex value for the character passed in.
94  inline int getBaseIndex(const char& letter)
95  {
96  if (myBase2IntMapPtr == NULL)
97  {
98  // Check to see if we have hit the number of primer bases.
99  if (myPrimerCount < myNumPrimerBases)
100  {
101  // Still expecting primer bases, so lookup
102  // the letter in the base map.
103  ++myPrimerCount;
104  return(base2int[(int)letter]);
105  }
106 
107  // Have already processed all the primers, so determine
108  // whether this is base or color space.
109 
110  // Need to determime the base type.
111  setBaseMapType(letter);
112 
113  // If it is still null, return invalid. Will be set when the first
114  // letter is either color or base.
115  if (myBase2IntMapPtr == NULL)
116  {
117  return(baseXIndex);
118  }
119  }
120 
121  // Also check if configured as color space that the primers are correct.
122  if ((myBase2IntMapPtr == color2int) && (myPrimerCount < myNumPrimerBases))
123  {
124  // Still expecting primer bases, so lookup
125  // the letter in the base map.
126  ++myPrimerCount;
127  return(base2int[(int)letter]);
128  }
129 
130  return myBase2IntMapPtr[(int)letter];
131  }
132 
133  /// Return the space type that is currently set.
135  {
136  if (myBase2IntMapPtr == base2int)
137  {
138  return(BASE_SPACE);
139  }
140  else if (myBase2IntMapPtr == color2int)
141  {
142  return(COLOR_SPACE);
143  }
144  else
145  {
146  return(UNKNOWN);
147  }
148  }
149 
150  /// Set the number of primer bases expected before the actual
151  /// base/color space type occurs for the rest of the entries.
152  void setNumPrimerBases(int numPrimerBases)
153  {
154  myNumPrimerBases = numPrimerBases;
155  }
156 
157  /// Reset the number of primers to 0.
159  {
160  myPrimerCount = 0;
161  };
162 
163  /// Reset the base mapping type to UNKNOWN.
165  {
166  myBase2IntMapPtr = NULL;
168  };
169 
170 private:
171  // Set the base type based on the passed in letter.
172  // If the letter is in neither the color space or the base space, both
173  // will be allowed.
174  inline void setBaseMapType(const char& letter)
175  {
176  //First check to see if it is in base space.
177  if (base2int[(int)letter] != baseXIndex)
178  {
179  // This is a valid base space index, so it is base space.
180  myBase2IntMapPtr = base2int;
181  }
182  else if (color2int[(int)letter] != baseXIndex)
183  {
184  // This is a valid color space index, so it is base space.
185  myBase2IntMapPtr = color2int;
186  }
187  else
188  {
189  // Unknown map type, zero the pointer.
190  myBase2IntMapPtr = NULL;
191  }
192  };
193 
194 
195  // The number of primer bases to expect for a color-space file.
196  unsigned int myNumPrimerBases;
197 
198  // This is the number of primer bases that have been seen since
199  // the map type was set/reset.
200  unsigned int myPrimerCount;
201 
202  unsigned char* myBase2IntMapPtr;
203 };
204 
205 #endif
BaseAsciiMap::setNumPrimerBases
void setNumPrimerBases(int numPrimerBases)
Set the number of primer bases expected before the actual base/color space type occurs for the rest o...
Definition: BaseAsciiMap.h:152
BaseAsciiMap::baseXIndex
static const int baseXIndex
Value associated with any non-base character in the ascii to base map (unknown, bad data).
Definition: BaseAsciiMap.h:31
BaseAsciiMap::baseNIndex
static const int baseNIndex
Value associated with 'N' in the ascii to base map (bad read).
Definition: BaseAsciiMap.h:28
BaseAsciiMap::UNKNOWN
@ UNKNOWN
Base decision on the first raw seq character/type has yet to be determined.
Definition: BaseAsciiMap.h:47
BaseAsciiMap::resetPrimerCount
void resetPrimerCount()
Reset the number of primers to 0.
Definition: BaseAsciiMap.h:158
BaseAsciiMap
Map between characters and the associated base type.
Definition: BaseAsciiMap.h:24
BaseAsciiMap::baseColor2int
static unsigned char baseColor2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for both base and color space.
Definition: BaseAsciiMap.h:56
BaseAsciiMap::BASE_SPACE
@ BASE_SPACE
Bases only (A,C,G,T,N).
Definition: BaseAsciiMap.h:48
BaseAsciiMap::color2int
static unsigned char color2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for just color space (0123).
Definition: BaseAsciiMap.h:65
BaseAsciiMap::SPACE_TYPE
SPACE_TYPE
The type of space (color or base) to use in the mapping.
Definition: BaseAsciiMap.h:44
BaseAsciiMap::resetBaseMapType
void resetBaseMapType()
Reset the base mapping type to UNKNOWN.
Definition: BaseAsciiMap.h:164
BaseAsciiMap::base2int
static unsigned char base2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for just base space (ACTGNactgn).
Definition: BaseAsciiMap.h:61
BaseAsciiMap::getBaseIndex
int getBaseIndex(const char &letter)
Returns the baseIndex value for the character passed in.
Definition: BaseAsciiMap.h:94
BaseAsciiMap::setBaseMapType
void setBaseMapType(SPACE_TYPE spaceType)
Set the base type based on the passed in option.
Definition: BaseAsciiMap.h:72
BaseAsciiMap::int2colorSpace
static const char int2colorSpace[]
Convert from int representation to colorspace representation.
Definition: BaseAsciiMap.h:40
BaseAsciiMap::getSpaceType
SPACE_TYPE getSpaceType()
Return the space type that is currently set.
Definition: BaseAsciiMap.h:134
BaseAsciiMap::int2base
static const char int2base[]
Convert from int representation to the base.
Definition: BaseAsciiMap.h:38
BaseAsciiMap::base2complement
static unsigned char base2complement[]
This table maps 5' base space to the 3' complement base space values, as well as 5' color space value...
Definition: BaseAsciiMap.h:41
BaseAsciiMap::COLOR_SPACE
@ COLOR_SPACE
Color space only (0,1,2,3,.).
Definition: BaseAsciiMap.h:49