libStatGen Software  1
MemoryMapArray.h
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __MEMORYMAPARRAY_H
19 #define __MEMORYMAPARRAY_H
20 
21 #ifndef __STDC_LIMIT_MACROS
22 #define __STDC_LIMIT_MACROS
23 #endif
24 #include <errno.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #ifndef _WIN32
31 #include <unistd.h> // for gethostname()
32 #endif
33 
34 #include <string>
35 #include <sys/types.h>
36 #include <time.h>
37 
38 // STL:
39 #include <ostream>
40 #include <sstream>
41 
42 #include "Generic.h"
43 #include "MemoryMap.h"
44 
45 
46 //
47 // This file defines a template for generating memory map backed arrays
48 // of different types of values.
49 //
50 // The template assumes that the mapped files are broken into two parts,
51 // first, a header (MemoryMapArrayHeader), then followed by the data
52 // in the array.
53 //
54 // typedefs are used to declare various types of arrays beforehand,
55 // since there will be only a few.
56 //
57 // They are:
58 // mmapArrayUint32_t;
59 // mmapArrayBool_t;
60 // mmapArray4Bit_t;
61 //
62 // XXX consider adding env("USER"), argv[0], date/time creation, etc.
63 //
65 {
66 public:
67  void constructorClear()
68  {
69  memset(this, 0, sizeof(*this));
70  }
71  uint32_t typeCookie;
72  uint32_t typeVersion;
73  uint32_t contentCookie;
74  uint32_t contentVersion;
75  size_t headerSize;
76 
77  // file generation info
78  time_t creationDate;
79  char creationUser[32];
80  char creationHost[32];
81  char application[32];
82  // now describe the data:
83  size_t elementCount;
84  void debugPrint(FILE *);
85  size_t getHeaderSize(int i)
86  {
87  return sizeof(*this);
88  }
89 
90  void setApplication(const char *s)
91  {
92  strncpy(application, s, sizeof(application)-1);
93  application[sizeof(application)-1] = '\0';
94  }
95  void setCreationUser(const char *s)
96  {
97  strncpy(creationUser, s, sizeof(creationUser)-1);
98  creationUser[sizeof(creationUser)-1] = '\0';
99  }
100  void setCreationHost(const char *s)
101  {
102  strncpy(creationHost, s, sizeof(creationHost)-1);
103  creationHost[sizeof(creationHost)-1] = '\0';
104  }
105 };
106 
107 //
108 // stream output for header information
109 //
110 std::ostream &operator << (std::ostream &stream, MemoryMapArrayHeader &h);
111 
112 //
113 // This class object represents the application specific information that doesn't
114 // fit in the general header above. Since it is only allocated via an mmap operation,
115 // as part of the mapped file, the destructor must never be called. The virtual
116 // destructor is declared to eliminate gcc warnings.
117 //
118 // For many arrays, this will be empty.
119 //
121 {
122 protected:
123  size_t headerSize; // set in ::create and ::open only
124 public:
125  size_t getHeaderSize()
126  {
127  return headerSize;
128  }
129  // other stuff follows...
130 };
131 
132 template <
133 class elementT,
134 typename indexT,
135 unsigned int cookieVal,
136 unsigned int versionVal,
137 elementT accessorFunc(char *base, indexT),
138 void setterFunc(char *base, indexT, elementT),
139 size_t elementCount2BytesFunc(indexT),
140 class arrayHeaderClass>
141 class MemoryMapArray : public MemoryMap
142 {
143 protected:
144  arrayHeaderClass *header;
145  char *data;
146  std::string errorStr;
147 public:
148  void constructorClear()
149  {
150  header = NULL;
151  data = NULL;
152 // errorStr = "";
153  }
155  {
156  constructorClear();
157  }
158  ~MemoryMapArray()
159  {
160  if (data) close();
161  }
162 
163  const std::string &getErrorString()
164  {
165  return errorStr;
166  }
167 
168  arrayHeaderClass &getHeader()
169  {
170  return *header;
171  }
172 
173  void setContentCookie(uint32_t c)
174  {
175  header->contentCookie = c;
176  }
177  void setContentVersion(uint32_t v)
178  {
179  header->contentVersion = v;
180  }
181 
182  // accessing
183  inline elementT operator[](indexT i)
184  {
185  return accessorFunc(data, i);
186  }
187  inline void set(indexT i, elementT v)
188  {
189  setterFunc(data, i, v);
190  }
191 
192 
193 
194  /// Create a vector with elementCount memebers.
195  //
196  /// Does administrative setup of the header and populating this
197  /// class members. User will need to finish populating the
198  /// contents of the metaData and data sections.
199  ///
200  /// If file==NULL, the underlying allocation is done via malloc(),
201  /// so that the results of write access to this vecor are not
202  /// saved in a file.
203  ///
204  /// If file!=NULL, a file will be created on disk, and all
205  /// write accesses done via the method ::set will be persistent
206  /// in that file.
207  ///
208  int create(const char *file, indexT elementCount, int optionalHeaderCount = 0)
209  {
210  size_t len = elementCount2BytesFunc(elementCount) +
211  header->getHeaderSize(optionalHeaderCount);
212  int rc;
213  rc = MemoryMap::create(file, len);
214  if (rc)
215  {
216  std::ostringstream buf;
217  buf << file << ": failed to create file";
218  errorStr = buf.str();
219  close();
220  return rc;
221  }
222  header = (arrayHeaderClass *) MemoryMap::data;
223  header->constructorClear();
224  header->typeCookie = cookieVal;
225  header->typeVersion = versionVal;
226  header->headerSize = header->getHeaderSize(optionalHeaderCount);
227  header->elementCount = elementCount;
228  data = (char *)((char *) MemoryMap::data + header->headerSize);
229 
230  const char *env;
231  char hostname[256];
232  env = getenv("USER");
233  if (env) header->setCreationUser(env);
234  header->creationDate = time(NULL);
235 #if defined(_WIN32)
236  hostname[0] = '\0';
237 #else
238  gethostname(hostname, sizeof(hostname));
239 #endif
240  header->setCreationHost(hostname);
241  return 0;
242  }
243 
244  /// allow anonymous (malloc) create.
245  ///
246  /// we do this when we don't expect to save the results.
247  ///
248  /// The single use case so far is in GenomeSequence::populateDBSNP.
249  ///
250  int create(indexT elementCount, int optionalHeaderCount = 0)
251  {
252  return create(NULL, elementCount, optionalHeaderCount);
253  }
254 
255  //
256  // Open the given filename. flags may be set to
257  // O_RDONLY or O_RDWR, and allows the file to be
258  // condtionally written to.
259  //
260  // Several sanity checks are done:
261  // compare the expected cookie value to the actual one
262  // compare the expected version value to the actual one
263  //
264  // if either condition is not met, the member errorStr is
265  // set to explain why, and true is returned.
266  //
267  // If there were no errors, false is returned.
268  //
269  bool open(const char *file, int flags = O_RDONLY)
270  {
271  int rc = MemoryMap::open(file, flags);
272  if (rc)
273  {
274  std::ostringstream buf;
275  buf << file << ": open() failed (error=" << strerror(errno) << ").";
276  errorStr = buf.str();
277  return true;
278  }
279  header = (arrayHeaderClass *) MemoryMap::data;
280  data = (char *)((char *) MemoryMap::data + header->headerSize);
281  if (header->typeCookie!=cookieVal)
282  {
283  std::ostringstream buf;
284  buf << file << ": wrong type of file (expected type "
285  << cookieVal << " but got " << header->typeCookie << ")";
286  errorStr = buf.str();
287  // XXX insert better error handling
288  close();
289  return true;
290  }
291  if (header->typeVersion!=versionVal)
292  {
293  std::ostringstream buf;
294  buf << file << ": wrong version of file (expected version "
295  << versionVal << " but got " << header->typeVersion << ")";
296  errorStr = buf.str();
297  // XXX insert better error handling
298  close();
299  return true;
300  }
301  return false;
302  }
303 
304  bool close()
305  {
306  constructorClear();
307  return MemoryMap::close();
308  }
309  void debugPrint(FILE *f)
310  {
311  if (header) header->debugPrint(f);
312  }
313 
314  size_t getElementCount() const
315  {
316  return header->elementCount;
317  }
318 
319 };
320 
322 {
323 public:
324  size_t getHeaderSize()
325  {
326  return sizeof(*this);
327  }
328 };
329 
330 //
331 // define the uint32 array type:
332 //
333 inline uint32_t mmapUint32Access(char *base, uint32_t index)
334 {
335  return ((uint32_t *)base)[index];
336 }
337 inline void mmapUint32Set(char *base, uint32_t index, uint32_t v)
338 {
339  ((uint32_t *)base)[index] = v;
340 }
341 inline size_t mmapUint32elementCount2Bytes(uint32_t i)
342 {
343  return sizeof(uint32_t) * i;
344 }
345 
346 typedef MemoryMapArray<
347 uint32_t,
348 uint32_t,
349 0x16b3816c,
350 20090109,
351 mmapUint32Access,
352 mmapUint32Set,
353 mmapUint32elementCount2Bytes,
356 
357 //
358 // define the boolean memory mapped array type.
359 // NB: it is limited to 2**32 elements
360 //
361 
362 typedef MemoryMapArray<
363 uint32_t,
364 uint32_t,
365 0xac6c1dc7,
366 20090109,
367 PackedAccess_1Bit,
368 PackedAssign_1Bit,
369 Packed1BitElementCount2Bytes,
372 
373 //
374 // define the two bit memory mapped array type:
375 //
376 
377 typedef MemoryMapArray<
378 uint32_t,
379 uint32_t,
380 0x25b3ea5f,
381 20090109,
382 PackedAccess_2Bit,
383 PackedAssign_2Bit,
384 Packed2BitElementCount2Bytes,
387 
388 typedef MemoryMapArray<
389 uint32_t,
390 uint32_t,
391 0x418e1874,
392 20090109,
393 PackedAccess_4Bit,
394 PackedAssign_4Bit,
395 Packed4BitElementCount2Bytes,
398 
399 #if 0
400 // XXX this is example code I want to use to define arrays of genome wide match values
401 class baseRecord
402 {
403  unsigned int base:4;
404  unsigned int qScore:7;
405  unsigned int conflicts:5; // how many cases of poorer matches that disagree
406 };
407 
408 //
409 // define the baseRecord array type:
410 //
411 inline baseRecord &mmapBaseRecordAccess(void *base, uint32_t index)
412 {
413  return *((baseRecord *)((char *)base + index*sizeof(baseRecord)));
414 }
415 inline void mmapBaseRecordSet(void *base, uint32_t index, baseRecord &v)
416 {
417  mmapBaseRecordAccess(base, index) = v;
418 }
419 inline size_t mmapBaseRecordElementCount2Bytes(uint32_t i)
420 {
421  return sizeof(baseRecord) * i;
422 }
423 
424 typedef MemoryMapArray<
425 baseRecord &,
426 uint32_t,
427 0x12341234,
428 0xdeadbeef,
429 &mmapBaseRecordAccess,
430 mmapBaseRecordSet,
431 mmapBaseRecordElementCount2Bytes,
433 > mmapArrayBaseRecord_t;
434 #endif
435 
436 #endif
MemoryMap::create
virtual bool create(const char *file, size_t size)
create the memory mapped file on disk
Definition: MemoryMap.cpp:243
MemoryMapGenericHeader
Definition: MemoryMapArray.h:120
MemoryMapArray::create
int create(indexT elementCount, int optionalHeaderCount=0)
allow anonymous (malloc) create.
Definition: MemoryMapArray.h:250
MemoryMap::open
virtual bool open(const char *file, int flags=O_RDONLY)
open a previously created mapped vector
Definition: MemoryMap.cpp:156
MemoryMapArrayHeader
Definition: MemoryMapArray.h:64
MemoryMap
There are a pair of related data structures in the operating system, and also a few simple algorithms...
Definition: MemoryMap.h:155
MemoryMapArray::open
bool open(const char *file, int flags=O_RDONLY)
open a previously created mapped vector
Definition: MemoryMapArray.h:269
MemoryMapArray
Definition: MemoryMapArray.h:141
emptyGenericHeader
Definition: MemoryMapArray.h:321
MemoryMapArray::create
int create(const char *file, indexT elementCount, int optionalHeaderCount=0)
Create a vector with elementCount memebers.
Definition: MemoryMapArray.h:208
operator<<
InputFile & operator<<(InputFile &stream, const std::string &str)
Write to a file using streaming.
Definition: InputFile.h:736