libStatGen Software  1
FortranFormat.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "FortranFormat.h"
19 #include "Error.h"
20 
21 FortranFormat::FortranFormat()
22 {
23  inputPos = -1;
24  endOfPattern = false;
25 }
26 
27 void FortranFormat::SetInputFile(IFILE & file)
28 {
29  input = file;
30  inputPos = -1;
31  endOfPattern = false;
32 }
33 
34 void FortranFormat::SetFormat(const String & formatString)
35 {
36  format = formatString;
37 
38  inputPos = -1;
39  endOfPattern = false;
40 
41  repeatCount = 0;
42 
43  format.Clear();
44 
45  // Remove blank spaces from format statement and extract
46  // the first bracketed expression
47  int level = 0;
48  for (int i = 0; i < formatString.Length(); i++)
49  {
50  if (formatString[i] == ' ' || formatString[i] == '\t' ||
51  formatString[i] == '\n' || formatString[i] == '\r')
52  continue;
53 
54  if (formatString[i] == '(')
55  level++;
56 
57  if (formatString[i] == ')')
58  level--;
59 
60  format += formatString[i];
61 
62  if (level == 0) break;
63  }
64 
65  if (format[0] != '(' || format[format.Length() - 1] != ')')
66  error("Invalid FORTRAN format statement\n\n"
67  "The statement \"%s\" is not bracketed correctly.\n",
68  (const char *) formatString);
69 
70  lastBracket = 1;
71  lastCount = 0;
72 
73  formatPos = 1;
74  repeatCount = 0;
75 
76  bracketStack.Clear();
77  bracketCounter.Clear();
78  bracketCount.Clear();
79 }
80 
81 int FortranFormat::GetNextInteger()
82 {
83  GetNextField(buffer);
84 
85  return buffer.AsInteger();
86 }
87 
88 char FortranFormat::GetNextCharacter()
89 {
90  GetNextField(buffer);
91 
92  return buffer[0];
93 }
94 
95 void FortranFormat::GetNextField(String & field)
96 {
97  while (!ProcessToken(field))
98  ;
99 }
100 
101 bool FortranFormat::ProcessToken(String & field)
102 {
103  // This flag only gets set if we encounter the final bracket or a ':'
104  endOfPattern = false;
105 
106  // Read input from file, if appropriate
107  if (inputPos == -1)
108  {
109  inputLine.ReadLine(input);
110  inputPos = 0;
111  }
112 
113  // First read repeat count specifier
114  if (repeatCount == 0)
115  repeatCount = GetIntegerFromFormat();
116 
117  // By default, the repeat count should be 1
118  if (repeatCount == 0)
119  repeatCount = 1;
120 
121  int repeatPos = formatPos;
122 
123  // Check if this is a new bracketed grouping
124  if (format[formatPos] == '(')
125  {
126  formatPos++;
127 
128  bracketStack.Push(formatPos);
129  bracketCounter.Push(repeatCount);
130  bracketCount.Push(repeatCount);
131 
132  repeatCount = 0;
133 
134  return false;
135  }
136 
137  // Check if this an 'X' field
138  if (format[formatPos] == 'X')
139  {
140  formatPos++;
141 
142  // No width specifier allowed for these fields
143  RejectWidth('X');
144 
145  // Skip appropriate number of characters
146  inputPos += repeatCount;
147 
148  // Reset repeat count
149  repeatCount = 0;
150 
151  FinishField();
152 
153  return false;
154  }
155 
156  // Check if this is a '/' (vertical tab field)
157  if (format[formatPos] == '/')
158  {
159  formatPos++;
160 
161  // No width specifier allowed for these fields
162  RejectWidth('/');
163 
164  // Skip the appropriate number of lines
165  while (repeatCount--)
166  inputLine.ReadLine(input);
167 
168  inputPos = 0;
169 
170  // Separators are optional, so we might already be at the next field
171  if (format[formatPos] == ',' || format[formatPos] || ')')
172  FinishField();
173 
174  return false;
175  }
176 
177  // Check that we haven't encountered a rare, but unsupported input type
178  if (format[formatPos] == 'Q' || format[formatPos] == 'P' || format[formatPos] == 'B')
179  {
180  formatPos++;
181 
182  int problemStart = formatPos;
183 
184  while (format[formatPos] != ',' && format[formatPos] != ')' && format[formatPos] != '/')
185  formatPos++;
186 
187  error("Unsupported pattern in FORMAT statement\n\n"
188  "Statement \"%s\" includes unsupporterd pattern '%s'\n",
189  (const char *) format,
190  (const char *) format.SubStr(problemStart, formatPos - problemStart));
191  }
192 
193  if (format[formatPos] == ':')
194  {
195  formatPos++;
196 
197  if (format[formatPos] == ',' || format[formatPos] || ')')
198  FinishField();
199 
200  repeatCount = 0;
201 
202  endOfPattern = true;
203 
204  return false;
205  }
206 
207  // All the other types we recognize include a width specifier
208 
209  // Identify the location of the type specifier
210  int typeStart = formatPos;
211 
212  while (CharacterFollows())
213  formatPos++;
214 
215  int typeLen = formatPos - typeStart;
216 
217  // Retrieve the field width
218  int width = GetIntegerFromFormat();
219 
220  if (width == 0)
221  error("Unrecognized FORMAT statement\n\n"
222  "Statement \"%s\" is missing a width specifier for a field of type '%s'\n",
223  (const char *) format, (const char *) format.SubStr(typeStart, typeLen));
224 
225  // Check for horizontal tab character
226  if (format[typeStart] == 'T')
227  {
228  // Move left by a specified number of characters
229  if (format[typeStart + 1] == 'L')
230  inputPos = width > inputPos ? 0 : inputPos - width;
231  // Move right by a specified number of characters
232  else if (format[typeStart + 1] == 'R')
233  inputPos += width;
234  // Or simply set the appropriate horizontal position
235  else
236  inputPos = width;
237 
238  repeatCount--;
239 
240  if (repeatCount)
241  formatPos = repeatPos;
242  else
243  FinishField();
244 
245  return false;
246  }
247 
248  // Assume that if we got here, we are looking at a data field!
249  field.Copy(inputLine, inputPos, width);
250  field.Trim();
251 
252  inputPos += width;
253 
254  repeatCount--;
255 
256  if (repeatCount)
257  formatPos = repeatPos;
258  else
259  FinishField();
260 
261  return true;
262 }
263 
264 int FortranFormat::GetIntegerFromFormat()
265 {
266  int result = 0;
267 
268  while (DigitFollows())
269  result = result * 10 + (int)(format[formatPos++] - '0');
270 
271  return result;
272 }
273 
274 bool FortranFormat::DigitFollows()
275 {
276  return (format[formatPos] >= '0') && (format[formatPos] <= '9');
277 }
278 
279 bool FortranFormat::CharacterFollows()
280 {
281  return (format[formatPos] >= 'A') && (format[formatPos] <= 'Z');
282 }
283 
284 void FortranFormat::RejectWidth(char ch)
285 {
286  // No width allowed for field types 'X' and '\'
287  if (DigitFollows())
288  error("Unrecognized FORTRAN format statement\n\n"
289  "The statement \"%s\" includes width specifier for field of type '%c'.\n",
290  (const char *) format, ch);
291 }
292 
293 void FortranFormat::FinishField(bool)
294 {
295  // Find the next field separator
296  while (format[formatPos] != ',' && format[formatPos] != ')')
297  {
298  if (format[formatPos] == '/')
299  return;
300 
301  formatPos++;
302  }
303 
304  // Skip commas
305  if (format[formatPos] == ',')
306  {
307  formatPos++;
308  return;
309  }
310 
311  // If we found a bracket, then it is either the end of the statement
312  // (if bracketStack is empty) or we finish an internal grouping
313  if (bracketStack.Length())
314  {
315  // Retrieve information about this grouping
316  lastBracket = bracketStack.Pop();
317  lastCount = bracketCount.Pop();
318  int lastCounter = bracketCounter.Pop() - 1;
319 
320  // Loop if required
321  if (lastCounter)
322  {
323  bracketStack.Push(lastBracket);
324  bracketCount.Push(lastCount);
325  bracketCounter.Push(lastCounter);
326 
327  formatPos = lastBracket;
328  }
329  else
330  // Otherwise find the next separator
331  {
332  formatPos++;
333  FinishField();
334  return;
335  }
336  }
337  else
338  {
339  // If we finished the input line, then activate reset input counter
340  inputPos = -1;
341  endOfPattern = true;
342 
343  // And re-use input tokens starting at the last bracket
344  formatPos = lastBracket;
345 
346  if (lastBracket == 1)
347  return;
348 
349  // With appropriate repeat counts
350  bracketStack.Push(lastBracket);
351  bracketCounter.Push(lastCount);
352  bracketCount.Push(lastCount);
353  }
354 }
355 
356 void FortranFormat::Flush()
357 {
358  while (!endOfPattern)
359  ProcessToken(buffer);
360 
361  inputPos = -1;
362 
363  lastBracket = 1;
364  lastCount = 0;
365 
366  formatPos = 1;
367  repeatCount = 0;
368 
369  bracketStack.Clear();
370  bracketCounter.Clear();
371  bracketCount.Clear();
372 }
String
Definition: StringBasics.h:38
InputFile
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:36