libStatGen Software  1
glfHandler.h
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __GLF_HANDLER_H__
19 #define __GLF_HANDLER_H__
20 
21 #include "InputFile.h"
22 #include "StringBasics.h"
23 
24 #if defined(__APPLE__)
25 // #pragma warn "Caution, glfHandler.h is non-portable"
26 #else
27 #pragma pack(push)
28 #pragma pack(1)
29 #endif
30 
31 struct glfIndel
32 {
33  // Likelihood for the 1/1, 2/2 and 1/2
34  unsigned char lk[3];
35 
36  // Allele lengths
37  short length[2];
38 
39  unsigned char padding[3];
40 };
41 
42 struct glfEntry
43 {
44  /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
45  unsigned char refBase:4, recordType:4;
46 
47  /** offset of this record from the previous one, in bases */
48  unsigned int offset;
49 
50  /** log10 minimum likelihood * 10 and the number of mapped reads */
51  unsigned depth:24, minLLK:8;
52 
53  /** root mean squared maximum mapping quality for overlapping reads */
54  unsigned char mapQuality;
55 
56  union
57  {
58  /** log10 likelihood ratio * 10 for genotypes AA, AC, AG, AT, CC, CG, CT, GG, GT, TT */
59  unsigned char lk[10];
60  glfIndel indel;
61  };
62 
63  glfEntry & operator = (glfEntry & rhs);
64 };
65 
66 #if defined(__APPLE__)
67 // #pragma warn "Caution, glfHandler.h is non-portable"
68 #else
69 #pragma pack(pop)
70 #endif
71 
73 {
74 public:
75  // Global information about the current GLF file
76  bool isStub;
77  IFILE handle;
78  String header;
79 
80  // Information about the current section
81  String label;
82  int sections;
83  int currentSection;
84  int maxPosition;
85 
86  // Information on whether the end of the current section has been reached
87  bool endOfSection;
88 
89  // Currently active GLF record
90  glfEntry data;
91  int position;
92  double likelihoods[10];
93  String indelSequence[2];
94 
95  // Error message in case previous command fails
96  const char * errorMsg;
97 
98  glfHandler();
99  ~glfHandler();
100 
101  bool Open(const String & filename);
102  void OpenStub();
103  bool Create(const String & filename);
104  bool isOpen();
105  void Close();
106  void Rewind();
107 
108  bool NextSection();
109  bool NextEntry();
110  bool NextBaseEntry();
111 
112  void BeginSection(const String & sectionLabel, int sectionLength);
113  void EndSection();
114 
115  void WriteEntry(int outputPosition);
116 
117  char GetReference(int position, char defaultBase);
118  int GetDepth(int position);
119  const double * GetLikelihoods(int position);
120  const unsigned char * GetLogLikelihoods(int position);
121  int GetMapQuality(int position);
122 
123  static const double * GetDefaultLikelihoods()
124  {
125  return nullLikelihoods;
126  }
127  static const unsigned char * GetDefaultLogLikelihoods()
128  {
129  return nullLogLikelihoods;
130  }
131 
132  static int GenotypeIndex(int base1, int base2)
133  {
134  return base1 < base2 ? (base1 - 1) *(10 - base1) / 2 + (base2 - base1) :
135  (base2 - 1) *(10 - base2) / 2 + (base1 - base2);
136  }
137 
138 private:
139  static char translateBase[16];
140  static char backTranslateBase[5];
141  static double nullLikelihoods[10];
142  static unsigned char nullLogLikelihoods[10];
143 
144  bool ReadHeader();
145  void WriteHeader(const String & headerText = "");
146 };
147 
148 #endif
149 
glfEntry::offset
unsigned int offset
offset of this record from the previous one, in bases
Definition: glfHandler.h:48
String
Definition: StringBasics.h:38
glfEntry::depth
unsigned depth
log10 minimum likelihood * 10 and the number of mapped reads
Definition: glfHandler.h:51
glfEntry::lk
unsigned char lk[10]
log10 likelihood ratio * 10 for genotypes AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
Definition: glfHandler.h:59
InputFile.h
glfHandler
Definition: glfHandler.h:72
glfIndel
Definition: glfHandler.h:31
glfEntry::refBase
unsigned char refBase
"XACMGRSVTWYHKDBN"[ref_base] gives the reference base
Definition: glfHandler.h:45
InputFile
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:36
glfEntry::mapQuality
unsigned char mapQuality
root mean squared maximum mapping quality for overlapping reads
Definition: glfHandler.h:54
glfEntry
Definition: glfHandler.h:42