Horizon
binary_reader.hpp
1 #pragma once
2 
3 #include <algorithm> // generate_n
4 #include <array> // array
5 #include <cmath> // ldexp
6 #include <cstddef> // size_t
7 #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
8 #include <cstdio> // snprintf
9 #include <cstring> // memcpy
10 #include <iterator> // back_inserter
11 #include <limits> // numeric_limits
12 #include <string> // char_traits, string
13 #include <utility> // make_pair, move
14 #include <vector> // vector
15 
16 #include <nlohmann/detail/exceptions.hpp>
17 #include <nlohmann/detail/input/input_adapters.hpp>
18 #include <nlohmann/detail/input/json_sax.hpp>
19 #include <nlohmann/detail/input/lexer.hpp>
20 #include <nlohmann/detail/macro_scope.hpp>
21 #include <nlohmann/detail/meta/is_sax.hpp>
22 #include <nlohmann/detail/meta/type_traits.hpp>
23 #include <nlohmann/detail/value_t.hpp>
24 
25 namespace nlohmann
26 {
27 namespace detail
28 {
29 
32 {
33  error,
34  ignore,
35  store
36 };
37 
45 static inline bool little_endianess(int num = 1) noexcept
46 {
47  return *reinterpret_cast<char*>(&num) == 1;
48 }
49 
50 
52 // binary reader //
54 
58 template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
60 {
61  using number_integer_t = typename BasicJsonType::number_integer_t;
62  using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
63  using number_float_t = typename BasicJsonType::number_float_t;
64  using string_t = typename BasicJsonType::string_t;
65  using binary_t = typename BasicJsonType::binary_t;
66  using json_sax_t = SAX;
67  using char_type = typename InputAdapterType::char_type;
68  using char_int_type = typename std::char_traits<char_type>::int_type;
69 
70  public:
76  explicit binary_reader(InputAdapterType&& adapter) noexcept : ia(std::move(adapter))
77  {
79  }
80 
81  // make class move-only
82  binary_reader(const binary_reader&) = delete;
83  binary_reader(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
84  binary_reader& operator=(const binary_reader&) = delete;
85  binary_reader& operator=(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
86  ~binary_reader() = default;
87 
96  JSON_HEDLEY_NON_NULL(3)
97  bool sax_parse(const input_format_t format,
98  json_sax_t* sax_,
99  const bool strict = true,
100  const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
101  {
102  sax = sax_;
103  bool result = false;
104 
105  switch (format)
106  {
107  case input_format_t::bson:
108  result = parse_bson_internal();
109  break;
110 
111  case input_format_t::cbor:
112  result = parse_cbor_internal(true, tag_handler);
113  break;
114 
115  case input_format_t::msgpack:
116  result = parse_msgpack_internal();
117  break;
118 
119  case input_format_t::ubjson:
120  result = parse_ubjson_internal();
121  break;
122 
123  case input_format_t::json: // LCOV_EXCL_LINE
124  default: // LCOV_EXCL_LINE
125  JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
126  }
127 
128  // strict mode: next byte must be EOF
129  if (result && strict)
130  {
131  if (format == input_format_t::ubjson)
132  {
133  get_ignore_noop();
134  }
135  else
136  {
137  get();
138  }
139 
140  if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof()))
141  {
142  return sax->parse_error(chars_read, get_token_string(),
143  parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"), BasicJsonType()));
144  }
145  }
146 
147  return result;
148  }
149 
150  private:
152  // BSON //
154 
159  bool parse_bson_internal()
160  {
161  std::int32_t document_size{};
162  get_number<std::int32_t, true>(input_format_t::bson, document_size);
163 
164  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
165  {
166  return false;
167  }
168 
169  if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
170  {
171  return false;
172  }
173 
174  return sax->end_object();
175  }
176 
184  bool get_bson_cstr(string_t& result)
185  {
186  auto out = std::back_inserter(result);
187  while (true)
188  {
189  get();
190  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
191  {
192  return false;
193  }
194  if (current == 0x00)
195  {
196  return true;
197  }
198  *out++ = static_cast<typename string_t::value_type>(current);
199  }
200  }
201 
213  template<typename NumberType>
214  bool get_bson_string(const NumberType len, string_t& result)
215  {
216  if (JSON_HEDLEY_UNLIKELY(len < 1))
217  {
218  auto last_token = get_token_string();
219  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"), BasicJsonType()));
220  }
221 
222  return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof();
223  }
224 
234  template<typename NumberType>
235  bool get_bson_binary(const NumberType len, binary_t& result)
236  {
237  if (JSON_HEDLEY_UNLIKELY(len < 0))
238  {
239  auto last_token = get_token_string();
240  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary"), BasicJsonType()));
241  }
242 
243  // All BSON binary values have a subtype
244  std::uint8_t subtype{};
245  get_number<std::uint8_t>(input_format_t::bson, subtype);
246  result.set_subtype(subtype);
247 
248  return get_binary(input_format_t::bson, len, result);
249  }
250 
261  bool parse_bson_element_internal(const char_int_type element_type,
262  const std::size_t element_type_parse_position)
263  {
264  switch (element_type)
265  {
266  case 0x01: // double
267  {
268  double number{};
269  return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
270  }
271 
272  case 0x02: // string
273  {
274  std::int32_t len{};
275  string_t value;
276  return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
277  }
278 
279  case 0x03: // object
280  {
281  return parse_bson_internal();
282  }
283 
284  case 0x04: // array
285  {
286  return parse_bson_array();
287  }
288 
289  case 0x05: // binary
290  {
291  std::int32_t len{};
292  binary_t value;
293  return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
294  }
295 
296  case 0x08: // boolean
297  {
298  return sax->boolean(get() != 0);
299  }
300 
301  case 0x0A: // null
302  {
303  return sax->null();
304  }
305 
306  case 0x10: // int32
307  {
309  return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
310  }
311 
312  case 0x12: // int64
313  {
315  return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
316  }
317 
318  default: // anything else not supported (yet)
319  {
320  std::array<char, 3> cr{{}};
321  (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type)); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
322  return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data()), BasicJsonType()));
323  }
324  }
325  }
326 
339  bool parse_bson_element_list(const bool is_array)
340  {
341  string_t key;
342 
343  while (auto element_type = get())
344  {
345  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
346  {
347  return false;
348  }
349 
350  const std::size_t element_type_parse_position = chars_read;
351  if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
352  {
353  return false;
354  }
355 
356  if (!is_array && !sax->key(key))
357  {
358  return false;
359  }
360 
361  if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
362  {
363  return false;
364  }
365 
366  // get_bson_cstr only appends
367  key.clear();
368  }
369 
370  return true;
371  }
372 
377  bool parse_bson_array()
378  {
379  std::int32_t document_size{};
380  get_number<std::int32_t, true>(input_format_t::bson, document_size);
381 
382  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
383  {
384  return false;
385  }
386 
387  if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
388  {
389  return false;
390  }
391 
392  return sax->end_array();
393  }
394 
396  // CBOR //
398 
407  bool parse_cbor_internal(const bool get_char,
408  const cbor_tag_handler_t tag_handler)
409  {
410  switch (get_char ? get() : current)
411  {
412  // EOF
413  case std::char_traits<char_type>::eof():
414  return unexpect_eof(input_format_t::cbor, "value");
415 
416  // Integer 0x00..0x17 (0..23)
417  case 0x00:
418  case 0x01:
419  case 0x02:
420  case 0x03:
421  case 0x04:
422  case 0x05:
423  case 0x06:
424  case 0x07:
425  case 0x08:
426  case 0x09:
427  case 0x0A:
428  case 0x0B:
429  case 0x0C:
430  case 0x0D:
431  case 0x0E:
432  case 0x0F:
433  case 0x10:
434  case 0x11:
435  case 0x12:
436  case 0x13:
437  case 0x14:
438  case 0x15:
439  case 0x16:
440  case 0x17:
441  return sax->number_unsigned(static_cast<number_unsigned_t>(current));
442 
443  case 0x18: // Unsigned integer (one-byte uint8_t follows)
444  {
445  std::uint8_t number{};
446  return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
447  }
448 
449  case 0x19: // Unsigned integer (two-byte uint16_t follows)
450  {
451  std::uint16_t number{};
452  return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
453  }
454 
455  case 0x1A: // Unsigned integer (four-byte uint32_t follows)
456  {
457  std::uint32_t number{};
458  return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
459  }
460 
461  case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
462  {
463  std::uint64_t number{};
464  return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
465  }
466 
467  // Negative integer -1-0x00..-1-0x17 (-1..-24)
468  case 0x20:
469  case 0x21:
470  case 0x22:
471  case 0x23:
472  case 0x24:
473  case 0x25:
474  case 0x26:
475  case 0x27:
476  case 0x28:
477  case 0x29:
478  case 0x2A:
479  case 0x2B:
480  case 0x2C:
481  case 0x2D:
482  case 0x2E:
483  case 0x2F:
484  case 0x30:
485  case 0x31:
486  case 0x32:
487  case 0x33:
488  case 0x34:
489  case 0x35:
490  case 0x36:
491  case 0x37:
492  return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
493 
494  case 0x38: // Negative integer (one-byte uint8_t follows)
495  {
496  std::uint8_t number{};
497  return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
498  }
499 
500  case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
501  {
502  std::uint16_t number{};
503  return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
504  }
505 
506  case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
507  {
508  std::uint32_t number{};
509  return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
510  }
511 
512  case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
513  {
514  std::uint64_t number{};
515  return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
516  - static_cast<number_integer_t>(number));
517  }
518 
519  // Binary data (0x00..0x17 bytes follow)
520  case 0x40:
521  case 0x41:
522  case 0x42:
523  case 0x43:
524  case 0x44:
525  case 0x45:
526  case 0x46:
527  case 0x47:
528  case 0x48:
529  case 0x49:
530  case 0x4A:
531  case 0x4B:
532  case 0x4C:
533  case 0x4D:
534  case 0x4E:
535  case 0x4F:
536  case 0x50:
537  case 0x51:
538  case 0x52:
539  case 0x53:
540  case 0x54:
541  case 0x55:
542  case 0x56:
543  case 0x57:
544  case 0x58: // Binary data (one-byte uint8_t for n follows)
545  case 0x59: // Binary data (two-byte uint16_t for n follow)
546  case 0x5A: // Binary data (four-byte uint32_t for n follow)
547  case 0x5B: // Binary data (eight-byte uint64_t for n follow)
548  case 0x5F: // Binary data (indefinite length)
549  {
550  binary_t b;
551  return get_cbor_binary(b) && sax->binary(b);
552  }
553 
554  // UTF-8 string (0x00..0x17 bytes follow)
555  case 0x60:
556  case 0x61:
557  case 0x62:
558  case 0x63:
559  case 0x64:
560  case 0x65:
561  case 0x66:
562  case 0x67:
563  case 0x68:
564  case 0x69:
565  case 0x6A:
566  case 0x6B:
567  case 0x6C:
568  case 0x6D:
569  case 0x6E:
570  case 0x6F:
571  case 0x70:
572  case 0x71:
573  case 0x72:
574  case 0x73:
575  case 0x74:
576  case 0x75:
577  case 0x76:
578  case 0x77:
579  case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
580  case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
581  case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
582  case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
583  case 0x7F: // UTF-8 string (indefinite length)
584  {
585  string_t s;
586  return get_cbor_string(s) && sax->string(s);
587  }
588 
589  // array (0x00..0x17 data items follow)
590  case 0x80:
591  case 0x81:
592  case 0x82:
593  case 0x83:
594  case 0x84:
595  case 0x85:
596  case 0x86:
597  case 0x87:
598  case 0x88:
599  case 0x89:
600  case 0x8A:
601  case 0x8B:
602  case 0x8C:
603  case 0x8D:
604  case 0x8E:
605  case 0x8F:
606  case 0x90:
607  case 0x91:
608  case 0x92:
609  case 0x93:
610  case 0x94:
611  case 0x95:
612  case 0x96:
613  case 0x97:
614  return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
615 
616  case 0x98: // array (one-byte uint8_t for n follows)
617  {
618  std::uint8_t len{};
619  return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
620  }
621 
622  case 0x99: // array (two-byte uint16_t for n follow)
623  {
624  std::uint16_t len{};
625  return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
626  }
627 
628  case 0x9A: // array (four-byte uint32_t for n follow)
629  {
630  std::uint32_t len{};
631  return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
632  }
633 
634  case 0x9B: // array (eight-byte uint64_t for n follow)
635  {
636  std::uint64_t len{};
637  return get_number(input_format_t::cbor, len) && get_cbor_array(detail::conditional_static_cast<std::size_t>(len), tag_handler);
638  }
639 
640  case 0x9F: // array (indefinite length)
641  return get_cbor_array(std::size_t(-1), tag_handler);
642 
643  // map (0x00..0x17 pairs of data items follow)
644  case 0xA0:
645  case 0xA1:
646  case 0xA2:
647  case 0xA3:
648  case 0xA4:
649  case 0xA5:
650  case 0xA6:
651  case 0xA7:
652  case 0xA8:
653  case 0xA9:
654  case 0xAA:
655  case 0xAB:
656  case 0xAC:
657  case 0xAD:
658  case 0xAE:
659  case 0xAF:
660  case 0xB0:
661  case 0xB1:
662  case 0xB2:
663  case 0xB3:
664  case 0xB4:
665  case 0xB5:
666  case 0xB6:
667  case 0xB7:
668  return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
669 
670  case 0xB8: // map (one-byte uint8_t for n follows)
671  {
672  std::uint8_t len{};
673  return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
674  }
675 
676  case 0xB9: // map (two-byte uint16_t for n follow)
677  {
678  std::uint16_t len{};
679  return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
680  }
681 
682  case 0xBA: // map (four-byte uint32_t for n follow)
683  {
684  std::uint32_t len{};
685  return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
686  }
687 
688  case 0xBB: // map (eight-byte uint64_t for n follow)
689  {
690  std::uint64_t len{};
691  return get_number(input_format_t::cbor, len) && get_cbor_object(detail::conditional_static_cast<std::size_t>(len), tag_handler);
692  }
693 
694  case 0xBF: // map (indefinite length)
695  return get_cbor_object(std::size_t(-1), tag_handler);
696 
697  case 0xC6: // tagged item
698  case 0xC7:
699  case 0xC8:
700  case 0xC9:
701  case 0xCA:
702  case 0xCB:
703  case 0xCC:
704  case 0xCD:
705  case 0xCE:
706  case 0xCF:
707  case 0xD0:
708  case 0xD1:
709  case 0xD2:
710  case 0xD3:
711  case 0xD4:
712  case 0xD8: // tagged item (1 bytes follow)
713  case 0xD9: // tagged item (2 bytes follow)
714  case 0xDA: // tagged item (4 bytes follow)
715  case 0xDB: // tagged item (8 bytes follow)
716  {
717  switch (tag_handler)
718  {
720  {
721  auto last_token = get_token_string();
722  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
723  }
724 
726  {
727  // ignore binary subtype
728  switch (current)
729  {
730  case 0xD8:
731  {
732  std::uint8_t subtype_to_ignore{};
733  get_number(input_format_t::cbor, subtype_to_ignore);
734  break;
735  }
736  case 0xD9:
737  {
738  std::uint16_t subtype_to_ignore{};
739  get_number(input_format_t::cbor, subtype_to_ignore);
740  break;
741  }
742  case 0xDA:
743  {
744  std::uint32_t subtype_to_ignore{};
745  get_number(input_format_t::cbor, subtype_to_ignore);
746  break;
747  }
748  case 0xDB:
749  {
750  std::uint64_t subtype_to_ignore{};
751  get_number(input_format_t::cbor, subtype_to_ignore);
752  break;
753  }
754  default:
755  break;
756  }
757  return parse_cbor_internal(true, tag_handler);
758  }
759 
761  {
762  binary_t b;
763  // use binary subtype and store in binary container
764  switch (current)
765  {
766  case 0xD8:
767  {
768  std::uint8_t subtype{};
769  get_number(input_format_t::cbor, subtype);
770  b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
771  break;
772  }
773  case 0xD9:
774  {
775  std::uint16_t subtype{};
776  get_number(input_format_t::cbor, subtype);
777  b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
778  break;
779  }
780  case 0xDA:
781  {
782  std::uint32_t subtype{};
783  get_number(input_format_t::cbor, subtype);
784  b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
785  break;
786  }
787  case 0xDB:
788  {
789  std::uint64_t subtype{};
790  get_number(input_format_t::cbor, subtype);
791  b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
792  break;
793  }
794  default:
795  return parse_cbor_internal(true, tag_handler);
796  }
797  get();
798  return get_cbor_binary(b) && sax->binary(b);
799  }
800 
801  default: // LCOV_EXCL_LINE
802  JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
803  return false; // LCOV_EXCL_LINE
804  }
805  }
806 
807  case 0xF4: // false
808  return sax->boolean(false);
809 
810  case 0xF5: // true
811  return sax->boolean(true);
812 
813  case 0xF6: // null
814  return sax->null();
815 
816  case 0xF9: // Half-Precision Float (two-byte IEEE 754)
817  {
818  const auto byte1_raw = get();
819  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
820  {
821  return false;
822  }
823  const auto byte2_raw = get();
824  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
825  {
826  return false;
827  }
828 
829  const auto byte1 = static_cast<unsigned char>(byte1_raw);
830  const auto byte2 = static_cast<unsigned char>(byte2_raw);
831 
832  // code from RFC 7049, Appendix D, Figure 3:
833  // As half-precision floating-point numbers were only added
834  // to IEEE 754 in 2008, today's programming platforms often
835  // still only have limited support for them. It is very
836  // easy to include at least decoding support for them even
837  // without such support. An example of a small decoder for
838  // half-precision floating-point numbers in the C language
839  // is shown in Fig. 3.
840  const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
841  const double val = [&half]
842  {
843  const int exp = (half >> 10u) & 0x1Fu;
844  const unsigned int mant = half & 0x3FFu;
845  JSON_ASSERT(0 <= exp&& exp <= 32);
846  JSON_ASSERT(mant <= 1024);
847  switch (exp)
848  {
849  case 0:
850  return std::ldexp(mant, -24);
851  case 31:
852  return (mant == 0)
853  ? std::numeric_limits<double>::infinity()
854  : std::numeric_limits<double>::quiet_NaN();
855  default:
856  return std::ldexp(mant + 1024, exp - 25);
857  }
858  }();
859  return sax->number_float((half & 0x8000u) != 0
860  ? static_cast<number_float_t>(-val)
861  : static_cast<number_float_t>(val), "");
862  }
863 
864  case 0xFA: // Single-Precision Float (four-byte IEEE 754)
865  {
866  float number{};
867  return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
868  }
869 
870  case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
871  {
872  double number{};
873  return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
874  }
875 
876  default: // anything else (0xFF is handled inside the other types)
877  {
878  auto last_token = get_token_string();
879  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
880  }
881  }
882  }
883 
895  bool get_cbor_string(string_t& result)
896  {
897  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
898  {
899  return false;
900  }
901 
902  switch (current)
903  {
904  // UTF-8 string (0x00..0x17 bytes follow)
905  case 0x60:
906  case 0x61:
907  case 0x62:
908  case 0x63:
909  case 0x64:
910  case 0x65:
911  case 0x66:
912  case 0x67:
913  case 0x68:
914  case 0x69:
915  case 0x6A:
916  case 0x6B:
917  case 0x6C:
918  case 0x6D:
919  case 0x6E:
920  case 0x6F:
921  case 0x70:
922  case 0x71:
923  case 0x72:
924  case 0x73:
925  case 0x74:
926  case 0x75:
927  case 0x76:
928  case 0x77:
929  {
930  return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
931  }
932 
933  case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
934  {
935  std::uint8_t len{};
936  return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
937  }
938 
939  case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
940  {
941  std::uint16_t len{};
942  return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
943  }
944 
945  case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
946  {
947  std::uint32_t len{};
948  return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
949  }
950 
951  case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
952  {
953  std::uint64_t len{};
954  return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
955  }
956 
957  case 0x7F: // UTF-8 string (indefinite length)
958  {
959  while (get() != 0xFF)
960  {
961  string_t chunk;
962  if (!get_cbor_string(chunk))
963  {
964  return false;
965  }
966  result.append(chunk);
967  }
968  return true;
969  }
970 
971  default:
972  {
973  auto last_token = get_token_string();
974  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"), BasicJsonType()));
975  }
976  }
977  }
978 
990  bool get_cbor_binary(binary_t& result)
991  {
992  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
993  {
994  return false;
995  }
996 
997  switch (current)
998  {
999  // Binary data (0x00..0x17 bytes follow)
1000  case 0x40:
1001  case 0x41:
1002  case 0x42:
1003  case 0x43:
1004  case 0x44:
1005  case 0x45:
1006  case 0x46:
1007  case 0x47:
1008  case 0x48:
1009  case 0x49:
1010  case 0x4A:
1011  case 0x4B:
1012  case 0x4C:
1013  case 0x4D:
1014  case 0x4E:
1015  case 0x4F:
1016  case 0x50:
1017  case 0x51:
1018  case 0x52:
1019  case 0x53:
1020  case 0x54:
1021  case 0x55:
1022  case 0x56:
1023  case 0x57:
1024  {
1025  return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
1026  }
1027 
1028  case 0x58: // Binary data (one-byte uint8_t for n follows)
1029  {
1030  std::uint8_t len{};
1031  return get_number(input_format_t::cbor, len) &&
1032  get_binary(input_format_t::cbor, len, result);
1033  }
1034 
1035  case 0x59: // Binary data (two-byte uint16_t for n follow)
1036  {
1037  std::uint16_t len{};
1038  return get_number(input_format_t::cbor, len) &&
1039  get_binary(input_format_t::cbor, len, result);
1040  }
1041 
1042  case 0x5A: // Binary data (four-byte uint32_t for n follow)
1043  {
1044  std::uint32_t len{};
1045  return get_number(input_format_t::cbor, len) &&
1046  get_binary(input_format_t::cbor, len, result);
1047  }
1048 
1049  case 0x5B: // Binary data (eight-byte uint64_t for n follow)
1050  {
1051  std::uint64_t len{};
1052  return get_number(input_format_t::cbor, len) &&
1053  get_binary(input_format_t::cbor, len, result);
1054  }
1055 
1056  case 0x5F: // Binary data (indefinite length)
1057  {
1058  while (get() != 0xFF)
1059  {
1060  binary_t chunk;
1061  if (!get_cbor_binary(chunk))
1062  {
1063  return false;
1064  }
1065  result.insert(result.end(), chunk.begin(), chunk.end());
1066  }
1067  return true;
1068  }
1069 
1070  default:
1071  {
1072  auto last_token = get_token_string();
1073  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary"), BasicJsonType()));
1074  }
1075  }
1076  }
1077 
1084  bool get_cbor_array(const std::size_t len,
1085  const cbor_tag_handler_t tag_handler)
1086  {
1087  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1088  {
1089  return false;
1090  }
1091 
1092  if (len != std::size_t(-1))
1093  {
1094  for (std::size_t i = 0; i < len; ++i)
1095  {
1096  if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1097  {
1098  return false;
1099  }
1100  }
1101  }
1102  else
1103  {
1104  while (get() != 0xFF)
1105  {
1106  if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
1107  {
1108  return false;
1109  }
1110  }
1111  }
1112 
1113  return sax->end_array();
1114  }
1115 
1122  bool get_cbor_object(const std::size_t len,
1123  const cbor_tag_handler_t tag_handler)
1124  {
1125  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1126  {
1127  return false;
1128  }
1129 
1130  if (len != 0)
1131  {
1132  string_t key;
1133  if (len != std::size_t(-1))
1134  {
1135  for (std::size_t i = 0; i < len; ++i)
1136  {
1137  get();
1138  if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1139  {
1140  return false;
1141  }
1142 
1143  if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1144  {
1145  return false;
1146  }
1147  key.clear();
1148  }
1149  }
1150  else
1151  {
1152  while (get() != 0xFF)
1153  {
1154  if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1155  {
1156  return false;
1157  }
1158 
1159  if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1160  {
1161  return false;
1162  }
1163  key.clear();
1164  }
1165  }
1166  }
1167 
1168  return sax->end_object();
1169  }
1170 
1172  // MsgPack //
1174 
1178  bool parse_msgpack_internal()
1179  {
1180  switch (get())
1181  {
1182  // EOF
1183  case std::char_traits<char_type>::eof():
1184  return unexpect_eof(input_format_t::msgpack, "value");
1185 
1186  // positive fixint
1187  case 0x00:
1188  case 0x01:
1189  case 0x02:
1190  case 0x03:
1191  case 0x04:
1192  case 0x05:
1193  case 0x06:
1194  case 0x07:
1195  case 0x08:
1196  case 0x09:
1197  case 0x0A:
1198  case 0x0B:
1199  case 0x0C:
1200  case 0x0D:
1201  case 0x0E:
1202  case 0x0F:
1203  case 0x10:
1204  case 0x11:
1205  case 0x12:
1206  case 0x13:
1207  case 0x14:
1208  case 0x15:
1209  case 0x16:
1210  case 0x17:
1211  case 0x18:
1212  case 0x19:
1213  case 0x1A:
1214  case 0x1B:
1215  case 0x1C:
1216  case 0x1D:
1217  case 0x1E:
1218  case 0x1F:
1219  case 0x20:
1220  case 0x21:
1221  case 0x22:
1222  case 0x23:
1223  case 0x24:
1224  case 0x25:
1225  case 0x26:
1226  case 0x27:
1227  case 0x28:
1228  case 0x29:
1229  case 0x2A:
1230  case 0x2B:
1231  case 0x2C:
1232  case 0x2D:
1233  case 0x2E:
1234  case 0x2F:
1235  case 0x30:
1236  case 0x31:
1237  case 0x32:
1238  case 0x33:
1239  case 0x34:
1240  case 0x35:
1241  case 0x36:
1242  case 0x37:
1243  case 0x38:
1244  case 0x39:
1245  case 0x3A:
1246  case 0x3B:
1247  case 0x3C:
1248  case 0x3D:
1249  case 0x3E:
1250  case 0x3F:
1251  case 0x40:
1252  case 0x41:
1253  case 0x42:
1254  case 0x43:
1255  case 0x44:
1256  case 0x45:
1257  case 0x46:
1258  case 0x47:
1259  case 0x48:
1260  case 0x49:
1261  case 0x4A:
1262  case 0x4B:
1263  case 0x4C:
1264  case 0x4D:
1265  case 0x4E:
1266  case 0x4F:
1267  case 0x50:
1268  case 0x51:
1269  case 0x52:
1270  case 0x53:
1271  case 0x54:
1272  case 0x55:
1273  case 0x56:
1274  case 0x57:
1275  case 0x58:
1276  case 0x59:
1277  case 0x5A:
1278  case 0x5B:
1279  case 0x5C:
1280  case 0x5D:
1281  case 0x5E:
1282  case 0x5F:
1283  case 0x60:
1284  case 0x61:
1285  case 0x62:
1286  case 0x63:
1287  case 0x64:
1288  case 0x65:
1289  case 0x66:
1290  case 0x67:
1291  case 0x68:
1292  case 0x69:
1293  case 0x6A:
1294  case 0x6B:
1295  case 0x6C:
1296  case 0x6D:
1297  case 0x6E:
1298  case 0x6F:
1299  case 0x70:
1300  case 0x71:
1301  case 0x72:
1302  case 0x73:
1303  case 0x74:
1304  case 0x75:
1305  case 0x76:
1306  case 0x77:
1307  case 0x78:
1308  case 0x79:
1309  case 0x7A:
1310  case 0x7B:
1311  case 0x7C:
1312  case 0x7D:
1313  case 0x7E:
1314  case 0x7F:
1315  return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1316 
1317  // fixmap
1318  case 0x80:
1319  case 0x81:
1320  case 0x82:
1321  case 0x83:
1322  case 0x84:
1323  case 0x85:
1324  case 0x86:
1325  case 0x87:
1326  case 0x88:
1327  case 0x89:
1328  case 0x8A:
1329  case 0x8B:
1330  case 0x8C:
1331  case 0x8D:
1332  case 0x8E:
1333  case 0x8F:
1334  return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1335 
1336  // fixarray
1337  case 0x90:
1338  case 0x91:
1339  case 0x92:
1340  case 0x93:
1341  case 0x94:
1342  case 0x95:
1343  case 0x96:
1344  case 0x97:
1345  case 0x98:
1346  case 0x99:
1347  case 0x9A:
1348  case 0x9B:
1349  case 0x9C:
1350  case 0x9D:
1351  case 0x9E:
1352  case 0x9F:
1353  return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1354 
1355  // fixstr
1356  case 0xA0:
1357  case 0xA1:
1358  case 0xA2:
1359  case 0xA3:
1360  case 0xA4:
1361  case 0xA5:
1362  case 0xA6:
1363  case 0xA7:
1364  case 0xA8:
1365  case 0xA9:
1366  case 0xAA:
1367  case 0xAB:
1368  case 0xAC:
1369  case 0xAD:
1370  case 0xAE:
1371  case 0xAF:
1372  case 0xB0:
1373  case 0xB1:
1374  case 0xB2:
1375  case 0xB3:
1376  case 0xB4:
1377  case 0xB5:
1378  case 0xB6:
1379  case 0xB7:
1380  case 0xB8:
1381  case 0xB9:
1382  case 0xBA:
1383  case 0xBB:
1384  case 0xBC:
1385  case 0xBD:
1386  case 0xBE:
1387  case 0xBF:
1388  case 0xD9: // str 8
1389  case 0xDA: // str 16
1390  case 0xDB: // str 32
1391  {
1392  string_t s;
1393  return get_msgpack_string(s) && sax->string(s);
1394  }
1395 
1396  case 0xC0: // nil
1397  return sax->null();
1398 
1399  case 0xC2: // false
1400  return sax->boolean(false);
1401 
1402  case 0xC3: // true
1403  return sax->boolean(true);
1404 
1405  case 0xC4: // bin 8
1406  case 0xC5: // bin 16
1407  case 0xC6: // bin 32
1408  case 0xC7: // ext 8
1409  case 0xC8: // ext 16
1410  case 0xC9: // ext 32
1411  case 0xD4: // fixext 1
1412  case 0xD5: // fixext 2
1413  case 0xD6: // fixext 4
1414  case 0xD7: // fixext 8
1415  case 0xD8: // fixext 16
1416  {
1417  binary_t b;
1418  return get_msgpack_binary(b) && sax->binary(b);
1419  }
1420 
1421  case 0xCA: // float 32
1422  {
1423  float number{};
1424  return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1425  }
1426 
1427  case 0xCB: // float 64
1428  {
1429  double number{};
1430  return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1431  }
1432 
1433  case 0xCC: // uint 8
1434  {
1435  std::uint8_t number{};
1436  return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1437  }
1438 
1439  case 0xCD: // uint 16
1440  {
1441  std::uint16_t number{};
1442  return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1443  }
1444 
1445  case 0xCE: // uint 32
1446  {
1447  std::uint32_t number{};
1448  return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1449  }
1450 
1451  case 0xCF: // uint 64
1452  {
1453  std::uint64_t number{};
1454  return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1455  }
1456 
1457  case 0xD0: // int 8
1458  {
1459  std::int8_t number{};
1460  return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1461  }
1462 
1463  case 0xD1: // int 16
1464  {
1465  std::int16_t number{};
1466  return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1467  }
1468 
1469  case 0xD2: // int 32
1470  {
1471  std::int32_t number{};
1472  return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1473  }
1474 
1475  case 0xD3: // int 64
1476  {
1477  std::int64_t number{};
1478  return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1479  }
1480 
1481  case 0xDC: // array 16
1482  {
1483  std::uint16_t len{};
1484  return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1485  }
1486 
1487  case 0xDD: // array 32
1488  {
1489  std::uint32_t len{};
1490  return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1491  }
1492 
1493  case 0xDE: // map 16
1494  {
1495  std::uint16_t len{};
1496  return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1497  }
1498 
1499  case 0xDF: // map 32
1500  {
1501  std::uint32_t len{};
1502  return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1503  }
1504 
1505  // negative fixint
1506  case 0xE0:
1507  case 0xE1:
1508  case 0xE2:
1509  case 0xE3:
1510  case 0xE4:
1511  case 0xE5:
1512  case 0xE6:
1513  case 0xE7:
1514  case 0xE8:
1515  case 0xE9:
1516  case 0xEA:
1517  case 0xEB:
1518  case 0xEC:
1519  case 0xED:
1520  case 0xEE:
1521  case 0xEF:
1522  case 0xF0:
1523  case 0xF1:
1524  case 0xF2:
1525  case 0xF3:
1526  case 0xF4:
1527  case 0xF5:
1528  case 0xF6:
1529  case 0xF7:
1530  case 0xF8:
1531  case 0xF9:
1532  case 0xFA:
1533  case 0xFB:
1534  case 0xFC:
1535  case 0xFD:
1536  case 0xFE:
1537  case 0xFF:
1538  return sax->number_integer(static_cast<std::int8_t>(current));
1539 
1540  default: // anything else
1541  {
1542  auto last_token = get_token_string();
1543  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
1544  }
1545  }
1546  }
1547 
1558  bool get_msgpack_string(string_t& result)
1559  {
1560  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
1561  {
1562  return false;
1563  }
1564 
1565  switch (current)
1566  {
1567  // fixstr
1568  case 0xA0:
1569  case 0xA1:
1570  case 0xA2:
1571  case 0xA3:
1572  case 0xA4:
1573  case 0xA5:
1574  case 0xA6:
1575  case 0xA7:
1576  case 0xA8:
1577  case 0xA9:
1578  case 0xAA:
1579  case 0xAB:
1580  case 0xAC:
1581  case 0xAD:
1582  case 0xAE:
1583  case 0xAF:
1584  case 0xB0:
1585  case 0xB1:
1586  case 0xB2:
1587  case 0xB3:
1588  case 0xB4:
1589  case 0xB5:
1590  case 0xB6:
1591  case 0xB7:
1592  case 0xB8:
1593  case 0xB9:
1594  case 0xBA:
1595  case 0xBB:
1596  case 0xBC:
1597  case 0xBD:
1598  case 0xBE:
1599  case 0xBF:
1600  {
1601  return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1602  }
1603 
1604  case 0xD9: // str 8
1605  {
1606  std::uint8_t len{};
1607  return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1608  }
1609 
1610  case 0xDA: // str 16
1611  {
1612  std::uint16_t len{};
1613  return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1614  }
1615 
1616  case 0xDB: // str 32
1617  {
1618  std::uint32_t len{};
1619  return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1620  }
1621 
1622  default:
1623  {
1624  auto last_token = get_token_string();
1625  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"), BasicJsonType()));
1626  }
1627  }
1628  }
1629 
1640  bool get_msgpack_binary(binary_t& result)
1641  {
1642  // helper function to set the subtype
1643  auto assign_and_return_true = [&result](std::int8_t subtype)
1644  {
1645  result.set_subtype(static_cast<std::uint8_t>(subtype));
1646  return true;
1647  };
1648 
1649  switch (current)
1650  {
1651  case 0xC4: // bin 8
1652  {
1653  std::uint8_t len{};
1654  return get_number(input_format_t::msgpack, len) &&
1655  get_binary(input_format_t::msgpack, len, result);
1656  }
1657 
1658  case 0xC5: // bin 16
1659  {
1660  std::uint16_t len{};
1661  return get_number(input_format_t::msgpack, len) &&
1662  get_binary(input_format_t::msgpack, len, result);
1663  }
1664 
1665  case 0xC6: // bin 32
1666  {
1667  std::uint32_t len{};
1668  return get_number(input_format_t::msgpack, len) &&
1669  get_binary(input_format_t::msgpack, len, result);
1670  }
1671 
1672  case 0xC7: // ext 8
1673  {
1674  std::uint8_t len{};
1675  std::int8_t subtype{};
1676  return get_number(input_format_t::msgpack, len) &&
1677  get_number(input_format_t::msgpack, subtype) &&
1678  get_binary(input_format_t::msgpack, len, result) &&
1679  assign_and_return_true(subtype);
1680  }
1681 
1682  case 0xC8: // ext 16
1683  {
1684  std::uint16_t len{};
1685  std::int8_t subtype{};
1686  return get_number(input_format_t::msgpack, len) &&
1687  get_number(input_format_t::msgpack, subtype) &&
1688  get_binary(input_format_t::msgpack, len, result) &&
1689  assign_and_return_true(subtype);
1690  }
1691 
1692  case 0xC9: // ext 32
1693  {
1694  std::uint32_t len{};
1695  std::int8_t subtype{};
1696  return get_number(input_format_t::msgpack, len) &&
1697  get_number(input_format_t::msgpack, subtype) &&
1698  get_binary(input_format_t::msgpack, len, result) &&
1699  assign_and_return_true(subtype);
1700  }
1701 
1702  case 0xD4: // fixext 1
1703  {
1704  std::int8_t subtype{};
1705  return get_number(input_format_t::msgpack, subtype) &&
1706  get_binary(input_format_t::msgpack, 1, result) &&
1707  assign_and_return_true(subtype);
1708  }
1709 
1710  case 0xD5: // fixext 2
1711  {
1712  std::int8_t subtype{};
1713  return get_number(input_format_t::msgpack, subtype) &&
1714  get_binary(input_format_t::msgpack, 2, result) &&
1715  assign_and_return_true(subtype);
1716  }
1717 
1718  case 0xD6: // fixext 4
1719  {
1720  std::int8_t subtype{};
1721  return get_number(input_format_t::msgpack, subtype) &&
1722  get_binary(input_format_t::msgpack, 4, result) &&
1723  assign_and_return_true(subtype);
1724  }
1725 
1726  case 0xD7: // fixext 8
1727  {
1728  std::int8_t subtype{};
1729  return get_number(input_format_t::msgpack, subtype) &&
1730  get_binary(input_format_t::msgpack, 8, result) &&
1731  assign_and_return_true(subtype);
1732  }
1733 
1734  case 0xD8: // fixext 16
1735  {
1736  std::int8_t subtype{};
1737  return get_number(input_format_t::msgpack, subtype) &&
1738  get_binary(input_format_t::msgpack, 16, result) &&
1739  assign_and_return_true(subtype);
1740  }
1741 
1742  default: // LCOV_EXCL_LINE
1743  return false; // LCOV_EXCL_LINE
1744  }
1745  }
1746 
1751  bool get_msgpack_array(const std::size_t len)
1752  {
1753  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1754  {
1755  return false;
1756  }
1757 
1758  for (std::size_t i = 0; i < len; ++i)
1759  {
1760  if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1761  {
1762  return false;
1763  }
1764  }
1765 
1766  return sax->end_array();
1767  }
1768 
1773  bool get_msgpack_object(const std::size_t len)
1774  {
1775  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1776  {
1777  return false;
1778  }
1779 
1780  string_t key;
1781  for (std::size_t i = 0; i < len; ++i)
1782  {
1783  get();
1784  if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
1785  {
1786  return false;
1787  }
1788 
1789  if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1790  {
1791  return false;
1792  }
1793  key.clear();
1794  }
1795 
1796  return sax->end_object();
1797  }
1798 
1800  // UBJSON //
1802 
1810  bool parse_ubjson_internal(const bool get_char = true)
1811  {
1812  return get_ubjson_value(get_char ? get_ignore_noop() : current);
1813  }
1814 
1829  bool get_ubjson_string(string_t& result, const bool get_char = true)
1830  {
1831  if (get_char)
1832  {
1833  get(); // TODO(niels): may we ignore N here?
1834  }
1835 
1836  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1837  {
1838  return false;
1839  }
1840 
1841  switch (current)
1842  {
1843  case 'U':
1844  {
1845  std::uint8_t len{};
1846  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1847  }
1848 
1849  case 'i':
1850  {
1851  std::int8_t len{};
1852  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1853  }
1854 
1855  case 'I':
1856  {
1857  std::int16_t len{};
1858  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1859  }
1860 
1861  case 'l':
1862  {
1863  std::int32_t len{};
1864  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1865  }
1866 
1867  case 'L':
1868  {
1869  std::int64_t len{};
1870  return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1871  }
1872 
1873  default:
1874  auto last_token = get_token_string();
1875  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"), BasicJsonType()));
1876  }
1877  }
1878 
1883  bool get_ubjson_size_value(std::size_t& result)
1884  {
1885  switch (get_ignore_noop())
1886  {
1887  case 'U':
1888  {
1889  std::uint8_t number{};
1890  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1891  {
1892  return false;
1893  }
1894  result = static_cast<std::size_t>(number);
1895  return true;
1896  }
1897 
1898  case 'i':
1899  {
1900  std::int8_t number{};
1901  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1902  {
1903  return false;
1904  }
1905  result = static_cast<std::size_t>(number); // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char
1906  return true;
1907  }
1908 
1909  case 'I':
1910  {
1911  std::int16_t number{};
1912  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1913  {
1914  return false;
1915  }
1916  result = static_cast<std::size_t>(number);
1917  return true;
1918  }
1919 
1920  case 'l':
1921  {
1922  std::int32_t number{};
1923  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1924  {
1925  return false;
1926  }
1927  result = static_cast<std::size_t>(number);
1928  return true;
1929  }
1930 
1931  case 'L':
1932  {
1933  std::int64_t number{};
1934  if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1935  {
1936  return false;
1937  }
1938  result = static_cast<std::size_t>(number);
1939  return true;
1940  }
1941 
1942  default:
1943  {
1944  auto last_token = get_token_string();
1945  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"), BasicJsonType()));
1946  }
1947  }
1948  }
1949 
1960  bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
1961  {
1962  result.first = string_t::npos; // size
1963  result.second = 0; // type
1964 
1965  get_ignore_noop();
1966 
1967  if (current == '$')
1968  {
1969  result.second = get(); // must not ignore 'N', because 'N' maybe the type
1970  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "type")))
1971  {
1972  return false;
1973  }
1974 
1975  get_ignore_noop();
1976  if (JSON_HEDLEY_UNLIKELY(current != '#'))
1977  {
1978  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1979  {
1980  return false;
1981  }
1982  auto last_token = get_token_string();
1983  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"), BasicJsonType()));
1984  }
1985 
1986  return get_ubjson_size_value(result.first);
1987  }
1988 
1989  if (current == '#')
1990  {
1991  return get_ubjson_size_value(result.first);
1992  }
1993 
1994  return true;
1995  }
1996 
2001  bool get_ubjson_value(const char_int_type prefix)
2002  {
2003  switch (prefix)
2004  {
2005  case std::char_traits<char_type>::eof(): // EOF
2006  return unexpect_eof(input_format_t::ubjson, "value");
2007 
2008  case 'T': // true
2009  return sax->boolean(true);
2010  case 'F': // false
2011  return sax->boolean(false);
2012 
2013  case 'Z': // null
2014  return sax->null();
2015 
2016  case 'U':
2017  {
2018  std::uint8_t number{};
2019  return get_number(input_format_t::ubjson, number) && sax->number_unsigned(number);
2020  }
2021 
2022  case 'i':
2023  {
2024  std::int8_t number{};
2025  return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
2026  }
2027 
2028  case 'I':
2029  {
2030  std::int16_t number{};
2031  return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
2032  }
2033 
2034  case 'l':
2035  {
2036  std::int32_t number{};
2037  return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
2038  }
2039 
2040  case 'L':
2041  {
2042  std::int64_t number{};
2043  return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
2044  }
2045 
2046  case 'd':
2047  {
2048  float number{};
2049  return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2050  }
2051 
2052  case 'D':
2053  {
2054  double number{};
2055  return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2056  }
2057 
2058  case 'H':
2059  {
2060  return get_ubjson_high_precision_number();
2061  }
2062 
2063  case 'C': // char
2064  {
2065  get();
2066  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "char")))
2067  {
2068  return false;
2069  }
2070  if (JSON_HEDLEY_UNLIKELY(current > 127))
2071  {
2072  auto last_token = get_token_string();
2073  return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"), BasicJsonType()));
2074  }
2075  string_t s(1, static_cast<typename string_t::value_type>(current));
2076  return sax->string(s);
2077  }
2078 
2079  case 'S': // string
2080  {
2081  string_t s;
2082  return get_ubjson_string(s) && sax->string(s);
2083  }
2084 
2085  case '[': // array
2086  return get_ubjson_array();
2087 
2088  case '{': // object
2089  return get_ubjson_object();
2090 
2091  default: // anything else
2092  {
2093  auto last_token = get_token_string();
2094  return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
2095  }
2096  }
2097  }
2098 
2102  bool get_ubjson_array()
2103  {
2104  std::pair<std::size_t, char_int_type> size_and_type;
2105  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2106  {
2107  return false;
2108  }
2109 
2110  if (size_and_type.first != string_t::npos)
2111  {
2112  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
2113  {
2114  return false;
2115  }
2116 
2117  if (size_and_type.second != 0)
2118  {
2119  if (size_and_type.second != 'N')
2120  {
2121  for (std::size_t i = 0; i < size_and_type.first; ++i)
2122  {
2123  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2124  {
2125  return false;
2126  }
2127  }
2128  }
2129  }
2130  else
2131  {
2132  for (std::size_t i = 0; i < size_and_type.first; ++i)
2133  {
2134  if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2135  {
2136  return false;
2137  }
2138  }
2139  }
2140  }
2141  else
2142  {
2143  if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
2144  {
2145  return false;
2146  }
2147 
2148  while (current != ']')
2149  {
2150  if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
2151  {
2152  return false;
2153  }
2154  get_ignore_noop();
2155  }
2156  }
2157 
2158  return sax->end_array();
2159  }
2160 
2164  bool get_ubjson_object()
2165  {
2166  std::pair<std::size_t, char_int_type> size_and_type;
2167  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2168  {
2169  return false;
2170  }
2171 
2172  string_t key;
2173  if (size_and_type.first != string_t::npos)
2174  {
2175  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
2176  {
2177  return false;
2178  }
2179 
2180  if (size_and_type.second != 0)
2181  {
2182  for (std::size_t i = 0; i < size_and_type.first; ++i)
2183  {
2184  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2185  {
2186  return false;
2187  }
2188  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2189  {
2190  return false;
2191  }
2192  key.clear();
2193  }
2194  }
2195  else
2196  {
2197  for (std::size_t i = 0; i < size_and_type.first; ++i)
2198  {
2199  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2200  {
2201  return false;
2202  }
2203  if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2204  {
2205  return false;
2206  }
2207  key.clear();
2208  }
2209  }
2210  }
2211  else
2212  {
2213  if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
2214  {
2215  return false;
2216  }
2217 
2218  while (current != '}')
2219  {
2220  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
2221  {
2222  return false;
2223  }
2224  if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2225  {
2226  return false;
2227  }
2228  get_ignore_noop();
2229  key.clear();
2230  }
2231  }
2232 
2233  return sax->end_object();
2234  }
2235 
2236  // Note, no reader for UBJSON binary types is implemented because they do
2237  // not exist
2238 
2239  bool get_ubjson_high_precision_number()
2240  {
2241  // get size of following number string
2242  std::size_t size{};
2243  auto res = get_ubjson_size_value(size);
2244  if (JSON_HEDLEY_UNLIKELY(!res))
2245  {
2246  return res;
2247  }
2248 
2249  // get number string
2250  std::vector<char> number_vector;
2251  for (std::size_t i = 0; i < size; ++i)
2252  {
2253  get();
2254  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "number")))
2255  {
2256  return false;
2257  }
2258  number_vector.push_back(static_cast<char>(current));
2259  }
2260 
2261  // parse number string
2262  using ia_type = decltype(detail::input_adapter(number_vector));
2263  auto number_lexer = detail::lexer<BasicJsonType, ia_type>(detail::input_adapter(number_vector), false);
2264  const auto result_number = number_lexer.scan();
2265  const auto number_string = number_lexer.get_token_string();
2266  const auto result_remainder = number_lexer.scan();
2267 
2268  using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
2269 
2270  if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
2271  {
2272  return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType()));
2273  }
2274 
2275  switch (result_number)
2276  {
2277  case token_type::value_integer:
2278  return sax->number_integer(number_lexer.get_number_integer());
2279  case token_type::value_unsigned:
2280  return sax->number_unsigned(number_lexer.get_number_unsigned());
2281  case token_type::value_float:
2282  return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
2283  case token_type::uninitialized:
2284  case token_type::literal_true:
2285  case token_type::literal_false:
2286  case token_type::literal_null:
2287  case token_type::value_string:
2288  case token_type::begin_array:
2289  case token_type::begin_object:
2290  case token_type::end_array:
2291  case token_type::end_object:
2292  case token_type::name_separator:
2293  case token_type::value_separator:
2294  case token_type::parse_error:
2295  case token_type::end_of_input:
2296  case token_type::literal_or_value:
2297  default:
2298  return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType()));
2299  }
2300  }
2301 
2303  // Utility functions //
2305 
2315  char_int_type get()
2316  {
2317  ++chars_read;
2318  return current = ia.get_character();
2319  }
2320 
2324  char_int_type get_ignore_noop()
2325  {
2326  do
2327  {
2328  get();
2329  }
2330  while (current == 'N');
2331 
2332  return current;
2333  }
2334 
2335  /*
2336  @brief read a number from the input
2337 
2338  @tparam NumberType the type of the number
2339  @param[in] format the current format (for diagnostics)
2340  @param[out] result number of type @a NumberType
2341 
2342  @return whether conversion completed
2343 
2344  @note This function needs to respect the system's endianess, because
2345  bytes in CBOR, MessagePack, and UBJSON are stored in network order
2346  (big endian) and therefore need reordering on little endian systems.
2347  */
2348  template<typename NumberType, bool InputIsLittleEndian = false>
2349  bool get_number(const input_format_t format, NumberType& result)
2350  {
2351  // step 1: read input into array with system's byte order
2352  std::array<std::uint8_t, sizeof(NumberType)> vec{};
2353  for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2354  {
2355  get();
2356  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2357  {
2358  return false;
2359  }
2360 
2361  // reverse byte order prior to conversion if necessary
2362  if (is_little_endian != InputIsLittleEndian)
2363  {
2364  vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2365  }
2366  else
2367  {
2368  vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2369  }
2370  }
2371 
2372  // step 2: convert array into number of type T and return
2373  std::memcpy(&result, vec.data(), sizeof(NumberType));
2374  return true;
2375  }
2376 
2391  template<typename NumberType>
2392  bool get_string(const input_format_t format,
2393  const NumberType len,
2394  string_t& result)
2395  {
2396  bool success = true;
2397  for (NumberType i = 0; i < len; i++)
2398  {
2399  get();
2400  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
2401  {
2402  success = false;
2403  break;
2404  }
2405  result.push_back(static_cast<typename string_t::value_type>(current));
2406  }
2407  return success;
2408  }
2409 
2424  template<typename NumberType>
2425  bool get_binary(const input_format_t format,
2426  const NumberType len,
2427  binary_t& result)
2428  {
2429  bool success = true;
2430  for (NumberType i = 0; i < len; i++)
2431  {
2432  get();
2433  if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
2434  {
2435  success = false;
2436  break;
2437  }
2438  result.push_back(static_cast<std::uint8_t>(current));
2439  }
2440  return success;
2441  }
2442 
2448  JSON_HEDLEY_NON_NULL(3)
2449  bool unexpect_eof(const input_format_t format, const char* context) const
2450  {
2451  if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
2452  {
2453  return sax->parse_error(chars_read, "<end of file>",
2454  parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), BasicJsonType()));
2455  }
2456  return true;
2457  }
2458 
2462  std::string get_token_string() const
2463  {
2464  std::array<char, 3> cr{{}};
2465  (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current)); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
2466  return std::string{cr.data()};
2467  }
2468 
2475  std::string exception_message(const input_format_t format,
2476  const std::string& detail,
2477  const std::string& context) const
2478  {
2479  std::string error_msg = "syntax error while parsing ";
2480 
2481  switch (format)
2482  {
2483  case input_format_t::cbor:
2484  error_msg += "CBOR";
2485  break;
2486 
2487  case input_format_t::msgpack:
2488  error_msg += "MessagePack";
2489  break;
2490 
2491  case input_format_t::ubjson:
2492  error_msg += "UBJSON";
2493  break;
2494 
2495  case input_format_t::bson:
2496  error_msg += "BSON";
2497  break;
2498 
2499  case input_format_t::json: // LCOV_EXCL_LINE
2500  default: // LCOV_EXCL_LINE
2501  JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
2502  }
2503 
2504  return error_msg + " " + context + ": " + detail;
2505  }
2506 
2507  private:
2509  InputAdapterType ia;
2510 
2512  char_int_type current = std::char_traits<char_type>::eof();
2513 
2515  std::size_t chars_read = 0;
2516 
2518  const bool is_little_endian = little_endianess();
2519 
2521  json_sax_t* sax = nullptr;
2522 };
2523 } // namespace detail
2524 } // namespace nlohmann
deserialization of CBOR, MessagePack, and UBJSON values
Definition: binary_reader.hpp:60
binary_reader(InputAdapterType &&adapter) noexcept
create a binary reader
Definition: binary_reader.hpp:76
bool sax_parse(const input_format_t format, json_sax_t *sax_, const bool strict=true, const cbor_tag_handler_t tag_handler=cbor_tag_handler_t::error)
Definition: binary_reader.hpp:97
token_type
token types for the parser
Definition: lexer.hpp:31
static parse_error create(int id_, const position_t &pos, const std::string &what_arg, const BasicJsonType &context)
create a parse error exception
Definition: exceptions.hpp:197
zip_uint64_t uint64_t
zip_uint64_t_t typedef.
Definition: zip.hpp:108
zip_int64_t int64_t
zip_int64_t typedef.
Definition: zip.hpp:103
zip_uint32_t uint32_t
zip_uint32_t typedef.
Definition: zip.hpp:98
zip_int32_t int32_t
zip_int32_t typedef.
Definition: zip.hpp:93
zip_uint8_t uint8_t
zip_uint8_t typedef.
Definition: zip.hpp:78
zip_uint16_t uint16_t
zip_uint16_t typedef.
Definition: zip.hpp:88
zip_int16_t int16_t
zip_int16_t typedef.
Definition: zip.hpp:83
zip_int8_t int8_t
zip_int8_t typedef.
Definition: zip.hpp:73
cbor_tag_handler_t
how to treat CBOR tags
Definition: binary_reader.hpp:32
@ store
store tags as binary type
@ error
throw a parse_error exception in case of a tag
@ value
the parser finished reading a JSON value
@ key
the parser read a key of a value in an object
@ strict
throw a type_error exception in case of invalid UTF-8
input_format_t
the supported input formats
Definition: input_adapters.hpp:26
namespace for Niels Lohmann
Definition: adl_serializer.hpp:12