Horizon
parser.hpp
1 #pragma once
2 
3 #include <cmath> // isfinite
4 #include <cstdint> // uint8_t
5 #include <functional> // function
6 #include <string> // string
7 #include <utility> // move
8 #include <vector> // vector
9 
10 #include <nlohmann/detail/exceptions.hpp>
11 #include <nlohmann/detail/input/input_adapters.hpp>
12 #include <nlohmann/detail/input/json_sax.hpp>
13 #include <nlohmann/detail/input/lexer.hpp>
14 #include <nlohmann/detail/macro_scope.hpp>
15 #include <nlohmann/detail/meta/is_sax.hpp>
16 #include <nlohmann/detail/value_t.hpp>
17 
18 namespace nlohmann
19 {
20 namespace detail
21 {
23 // parser //
25 
27 {
31  object_end,
35  array_end,
37  key,
39  value
40 };
41 
42 template<typename BasicJsonType>
43 using parser_callback_t =
44  std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>;
45 
51 template<typename BasicJsonType, typename InputAdapterType>
52 class parser
53 {
54  using number_integer_t = typename BasicJsonType::number_integer_t;
55  using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
56  using number_float_t = typename BasicJsonType::number_float_t;
57  using string_t = typename BasicJsonType::string_t;
59  using token_type = typename lexer_t::token_type;
60 
61  public:
63  explicit parser(InputAdapterType&& adapter,
64  const parser_callback_t<BasicJsonType> cb = nullptr,
65  const bool allow_exceptions_ = true,
66  const bool skip_comments = false)
67  : callback(cb)
68  , m_lexer(std::move(adapter), skip_comments)
69  , allow_exceptions(allow_exceptions_)
70  {
71  // read first token
72  get_token();
73  }
74 
85  void parse(const bool strict, BasicJsonType& result)
86  {
87  if (callback)
88  {
89  json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
90  sax_parse_internal(&sdp);
91 
92  // in strict mode, input must be completely read
93  if (strict && (get_token() != token_type::end_of_input))
94  {
95  sdp.parse_error(m_lexer.get_position(),
96  m_lexer.get_token_string(),
97  parse_error::create(101, m_lexer.get_position(),
98  exception_message(token_type::end_of_input, "value"), BasicJsonType()));
99  }
100 
101  // in case of an error, return discarded value
102  if (sdp.is_errored())
103  {
104  result = value_t::discarded;
105  return;
106  }
107 
108  // set top-level value to null if it was discarded by the callback
109  // function
110  if (result.is_discarded())
111  {
112  result = nullptr;
113  }
114  }
115  else
116  {
117  json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
118  sax_parse_internal(&sdp);
119 
120  // in strict mode, input must be completely read
121  if (strict && (get_token() != token_type::end_of_input))
122  {
123  sdp.parse_error(m_lexer.get_position(),
124  m_lexer.get_token_string(),
125  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), BasicJsonType()));
126  }
127 
128  // in case of an error, return discarded value
129  if (sdp.is_errored())
130  {
131  result = value_t::discarded;
132  return;
133  }
134  }
135 
136  result.assert_invariant();
137  }
138 
145  bool accept(const bool strict = true)
146  {
148  return sax_parse(&sax_acceptor, strict);
149  }
150 
151  template<typename SAX>
152  JSON_HEDLEY_NON_NULL(2)
153  bool sax_parse(SAX* sax, const bool strict = true)
154  {
156  const bool result = sax_parse_internal(sax);
157 
158  // strict mode: next byte must be EOF
159  if (result && strict && (get_token() != token_type::end_of_input))
160  {
161  return sax->parse_error(m_lexer.get_position(),
162  m_lexer.get_token_string(),
163  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), BasicJsonType()));
164  }
165 
166  return result;
167  }
168 
169  private:
170  template<typename SAX>
171  JSON_HEDLEY_NON_NULL(2)
172  bool sax_parse_internal(SAX* sax)
173  {
174  // stack to remember the hierarchy of structured values we are parsing
175  // true = array; false = object
176  std::vector<bool> states;
177  // value to avoid a goto (see comment where set to true)
178  bool skip_to_state_evaluation = false;
179 
180  while (true)
181  {
182  if (!skip_to_state_evaluation)
183  {
184  // invariant: get_token() was called before each iteration
185  switch (last_token)
186  {
187  case token_type::begin_object:
188  {
189  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
190  {
191  return false;
192  }
193 
194  // closing } -> we are done
195  if (get_token() == token_type::end_object)
196  {
197  if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
198  {
199  return false;
200  }
201  break;
202  }
203 
204  // parse key
205  if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
206  {
207  return sax->parse_error(m_lexer.get_position(),
208  m_lexer.get_token_string(),
209  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), BasicJsonType()));
210  }
211  if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
212  {
213  return false;
214  }
215 
216  // parse separator (:)
217  if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
218  {
219  return sax->parse_error(m_lexer.get_position(),
220  m_lexer.get_token_string(),
221  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), BasicJsonType()));
222  }
223 
224  // remember we are now inside an object
225  states.push_back(false);
226 
227  // parse values
228  get_token();
229  continue;
230  }
231 
232  case token_type::begin_array:
233  {
234  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
235  {
236  return false;
237  }
238 
239  // closing ] -> we are done
240  if (get_token() == token_type::end_array)
241  {
242  if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
243  {
244  return false;
245  }
246  break;
247  }
248 
249  // remember we are now inside an array
250  states.push_back(true);
251 
252  // parse values (no need to call get_token)
253  continue;
254  }
255 
256  case token_type::value_float:
257  {
258  const auto res = m_lexer.get_number_float();
259 
260  if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
261  {
262  return sax->parse_error(m_lexer.get_position(),
263  m_lexer.get_token_string(),
264  out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'", BasicJsonType()));
265  }
266 
267  if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
268  {
269  return false;
270  }
271 
272  break;
273  }
274 
275  case token_type::literal_false:
276  {
277  if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
278  {
279  return false;
280  }
281  break;
282  }
283 
284  case token_type::literal_null:
285  {
286  if (JSON_HEDLEY_UNLIKELY(!sax->null()))
287  {
288  return false;
289  }
290  break;
291  }
292 
293  case token_type::literal_true:
294  {
295  if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
296  {
297  return false;
298  }
299  break;
300  }
301 
302  case token_type::value_integer:
303  {
304  if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
305  {
306  return false;
307  }
308  break;
309  }
310 
311  case token_type::value_string:
312  {
313  if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
314  {
315  return false;
316  }
317  break;
318  }
319 
320  case token_type::value_unsigned:
321  {
322  if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
323  {
324  return false;
325  }
326  break;
327  }
328 
329  case token_type::parse_error:
330  {
331  // using "uninitialized" to avoid "expected" message
332  return sax->parse_error(m_lexer.get_position(),
333  m_lexer.get_token_string(),
334  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), BasicJsonType()));
335  }
336 
337  case token_type::uninitialized:
338  case token_type::end_array:
339  case token_type::end_object:
340  case token_type::name_separator:
341  case token_type::value_separator:
342  case token_type::end_of_input:
343  case token_type::literal_or_value:
344  default: // the last token was unexpected
345  {
346  return sax->parse_error(m_lexer.get_position(),
347  m_lexer.get_token_string(),
348  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), BasicJsonType()));
349  }
350  }
351  }
352  else
353  {
354  skip_to_state_evaluation = false;
355  }
356 
357  // we reached this line after we successfully parsed a value
358  if (states.empty())
359  {
360  // empty stack: we reached the end of the hierarchy: done
361  return true;
362  }
363 
364  if (states.back()) // array
365  {
366  // comma -> next value
367  if (get_token() == token_type::value_separator)
368  {
369  // parse a new value
370  get_token();
371  continue;
372  }
373 
374  // closing ]
375  if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
376  {
377  if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
378  {
379  return false;
380  }
381 
382  // We are done with this array. Before we can parse a
383  // new value, we need to evaluate the new state first.
384  // By setting skip_to_state_evaluation to false, we
385  // are effectively jumping to the beginning of this if.
386  JSON_ASSERT(!states.empty());
387  states.pop_back();
388  skip_to_state_evaluation = true;
389  continue;
390  }
391 
392  return sax->parse_error(m_lexer.get_position(),
393  m_lexer.get_token_string(),
394  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), BasicJsonType()));
395  }
396 
397  // states.back() is false -> object
398 
399  // comma -> next value
400  if (get_token() == token_type::value_separator)
401  {
402  // parse key
403  if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
404  {
405  return sax->parse_error(m_lexer.get_position(),
406  m_lexer.get_token_string(),
407  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), BasicJsonType()));
408  }
409 
410  if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
411  {
412  return false;
413  }
414 
415  // parse separator (:)
416  if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
417  {
418  return sax->parse_error(m_lexer.get_position(),
419  m_lexer.get_token_string(),
420  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), BasicJsonType()));
421  }
422 
423  // parse values
424  get_token();
425  continue;
426  }
427 
428  // closing }
429  if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
430  {
431  if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
432  {
433  return false;
434  }
435 
436  // We are done with this object. Before we can parse a
437  // new value, we need to evaluate the new state first.
438  // By setting skip_to_state_evaluation to false, we
439  // are effectively jumping to the beginning of this if.
440  JSON_ASSERT(!states.empty());
441  states.pop_back();
442  skip_to_state_evaluation = true;
443  continue;
444  }
445 
446  return sax->parse_error(m_lexer.get_position(),
447  m_lexer.get_token_string(),
448  parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), BasicJsonType()));
449  }
450  }
451 
453  token_type get_token()
454  {
455  return last_token = m_lexer.scan();
456  }
457 
458  std::string exception_message(const token_type expected, const std::string& context)
459  {
460  std::string error_msg = "syntax error ";
461 
462  if (!context.empty())
463  {
464  error_msg += "while parsing " + context + " ";
465  }
466 
467  error_msg += "- ";
468 
469  if (last_token == token_type::parse_error)
470  {
471  error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
472  m_lexer.get_token_string() + "'";
473  }
474  else
475  {
476  error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
477  }
478 
479  if (expected != token_type::uninitialized)
480  {
481  error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
482  }
483 
484  return error_msg;
485  }
486 
487  private:
489  const parser_callback_t<BasicJsonType> callback = nullptr;
491  token_type last_token = token_type::uninitialized;
493  lexer_t m_lexer;
495  const bool allow_exceptions = true;
496 };
497 
498 } // namespace detail
499 } // namespace nlohmann
Definition: json_sax.hpp:636
SAX implementation to create a JSON value from SAX events.
Definition: json_sax.hpp:155
JSON_HEDLEY_RETURNS_NON_NULL static JSON_HEDLEY_CONST const char * token_type_name(const token_type t) noexcept
return name of values of type token_type (only used for errors)
Definition: lexer.hpp:54
lexical analysis
Definition: lexer.hpp:104
string_t & get_string()
return current string value (implicitly resets the token; useful only once)
Definition: lexer.hpp:1422
constexpr position_t get_position() const noexcept
return position of last read token
Definition: lexer.hpp:1432
constexpr number_integer_t get_number_integer() const noexcept
return integer value
Definition: lexer.hpp:1404
constexpr JSON_HEDLEY_RETURNS_NON_NULL const char * get_error_message() const noexcept
return syntax error message
Definition: lexer.hpp:1465
constexpr number_unsigned_t get_number_unsigned() const noexcept
return unsigned integer value
Definition: lexer.hpp:1410
constexpr number_float_t get_number_float() const noexcept
return floating-point value
Definition: lexer.hpp:1416
std::string get_token_string() const
return the last read token (for errors only).
Definition: lexer.hpp:1440
static parse_error create(int id_, const position_t &pos, const std::string &what_arg, const BasicJsonType &context)
create a parse error exception
Definition: exceptions.hpp:197
syntax analysis
Definition: parser.hpp:53
parser(InputAdapterType &&adapter, const parser_callback_t< BasicJsonType > cb=nullptr, const bool allow_exceptions_=true, const bool skip_comments=false)
a parser reading from an input adapter
Definition: parser.hpp:63
bool accept(const bool strict=true)
public accept interface
Definition: parser.hpp:145
void parse(const bool strict, BasicJsonType &result)
public parser interface
Definition: parser.hpp:85
zip_uint8_t uint8_t
zip_uint8_t typedef.
Definition: zip.hpp:78
@ discarded
discarded by the parser callback function
parse_event_t
Definition: parser.hpp:27
@ value
the parser finished reading a JSON value
@ key
the parser read a key of a value in an object
@ array_end
the parser read ] and finished processing a JSON array
@ array_start
the parser read [ and started to process a JSON array
@ object_start
the parser read { and started to process a JSON object
@ object_end
the parser read } and finished processing a JSON object
@ strict
throw a type_error exception in case of invalid UTF-8
namespace for Niels Lohmann
Definition: adl_serializer.hpp:12