Horizon
input_adapters.hpp
1 #pragma once
2 
3 #include <array> // array
4 #include <cstddef> // size_t
5 #include <cstring> // strlen
6 #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
7 #include <memory> // shared_ptr, make_shared, addressof
8 #include <numeric> // accumulate
9 #include <string> // string, char_traits
10 #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
11 #include <utility> // pair, declval
12 
13 #ifndef JSON_NO_IO
14  #include <cstdio> // FILE *
15  #include <istream> // istream
16 #endif // JSON_NO_IO
17 
18 #include <nlohmann/detail/iterators/iterator_traits.hpp>
19 #include <nlohmann/detail/macro_scope.hpp>
20 
21 namespace nlohmann
22 {
23 namespace detail
24 {
26 enum class input_format_t { json, cbor, msgpack, ubjson, bson };
27 
29 // input adapters //
31 
32 #ifndef JSON_NO_IO
38 {
39  public:
40  using char_type = char;
41 
42  JSON_HEDLEY_NON_NULL(2)
43  explicit file_input_adapter(std::FILE* f) noexcept
44  : m_file(f)
45  {}
46 
47  // make class move-only
48  file_input_adapter(const file_input_adapter&) = delete;
49  file_input_adapter(file_input_adapter&&) noexcept = default;
50  file_input_adapter& operator=(const file_input_adapter&) = delete;
51  file_input_adapter& operator=(file_input_adapter&&) = delete;
52  ~file_input_adapter() = default;
53 
54  std::char_traits<char>::int_type get_character() noexcept
55  {
56  return std::fgetc(m_file);
57  }
58 
59  private:
61  std::FILE* m_file;
62 };
63 
64 
75 {
76  public:
77  using char_type = char;
78 
80  {
81  // clear stream flags; we use underlying streambuf I/O, do not
82  // maintain ifstream flags, except eof
83  if (is != nullptr)
84  {
85  is->clear(is->rdstate() & std::ios::eofbit);
86  }
87  }
88 
89  explicit input_stream_adapter(std::istream& i)
90  : is(&i), sb(i.rdbuf())
91  {}
92 
93  // delete because of pointer members
95  input_stream_adapter& operator=(input_stream_adapter&) = delete;
96  input_stream_adapter& operator=(input_stream_adapter&&) = delete;
97 
99  : is(rhs.is), sb(rhs.sb)
100  {
101  rhs.is = nullptr;
102  rhs.sb = nullptr;
103  }
104 
105  // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
106  // ensure that std::char_traits<char>::eof() and the character 0xFF do not
107  // end up as the same value, eg. 0xFFFFFFFF.
108  std::char_traits<char>::int_type get_character()
109  {
110  auto res = sb->sbumpc();
111  // set eof manually, as we don't use the istream interface.
112  if (JSON_HEDLEY_UNLIKELY(res == std::char_traits<char>::eof()))
113  {
114  is->clear(is->rdstate() | std::ios::eofbit);
115  }
116  return res;
117  }
118 
119  private:
121  std::istream* is = nullptr;
122  std::streambuf* sb = nullptr;
123 };
124 #endif // JSON_NO_IO
125 
126 // General-purpose iterator-based adapter. It might not be as fast as
127 // theoretically possible for some containers, but it is extremely versatile.
128 template<typename IteratorType>
130 {
131  public:
132  using char_type = typename std::iterator_traits<IteratorType>::value_type;
133 
134  iterator_input_adapter(IteratorType first, IteratorType last)
135  : current(std::move(first)), end(std::move(last))
136  {}
137 
138  typename std::char_traits<char_type>::int_type get_character()
139  {
140  if (JSON_HEDLEY_LIKELY(current != end))
141  {
142  auto result = std::char_traits<char_type>::to_int_type(*current);
143  std::advance(current, 1);
144  return result;
145  }
146 
147  return std::char_traits<char_type>::eof();
148  }
149 
150  private:
151  IteratorType current;
152  IteratorType end;
153 
154  template<typename BaseInputAdapter, size_t T>
155  friend struct wide_string_input_helper;
156 
157  bool empty() const
158  {
159  return current == end;
160  }
161 };
162 
163 
164 template<typename BaseInputAdapter, size_t T>
166 
167 template<typename BaseInputAdapter>
168 struct wide_string_input_helper<BaseInputAdapter, 4>
169 {
170  // UTF-32
171  static void fill_buffer(BaseInputAdapter& input,
172  std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
173  size_t& utf8_bytes_index,
174  size_t& utf8_bytes_filled)
175  {
176  utf8_bytes_index = 0;
177 
178  if (JSON_HEDLEY_UNLIKELY(input.empty()))
179  {
180  utf8_bytes[0] = std::char_traits<char>::eof();
181  utf8_bytes_filled = 1;
182  }
183  else
184  {
185  // get the current character
186  const auto wc = input.get_character();
187 
188  // UTF-32 to UTF-8 encoding
189  if (wc < 0x80)
190  {
191  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
192  utf8_bytes_filled = 1;
193  }
194  else if (wc <= 0x7FF)
195  {
196  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
197  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
198  utf8_bytes_filled = 2;
199  }
200  else if (wc <= 0xFFFF)
201  {
202  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
203  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
204  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
205  utf8_bytes_filled = 3;
206  }
207  else if (wc <= 0x10FFFF)
208  {
209  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
210  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
211  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
212  utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
213  utf8_bytes_filled = 4;
214  }
215  else
216  {
217  // unknown character
218  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
219  utf8_bytes_filled = 1;
220  }
221  }
222  }
223 };
224 
225 template<typename BaseInputAdapter>
226 struct wide_string_input_helper<BaseInputAdapter, 2>
227 {
228  // UTF-16
229  static void fill_buffer(BaseInputAdapter& input,
230  std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
231  size_t& utf8_bytes_index,
232  size_t& utf8_bytes_filled)
233  {
234  utf8_bytes_index = 0;
235 
236  if (JSON_HEDLEY_UNLIKELY(input.empty()))
237  {
238  utf8_bytes[0] = std::char_traits<char>::eof();
239  utf8_bytes_filled = 1;
240  }
241  else
242  {
243  // get the current character
244  const auto wc = input.get_character();
245 
246  // UTF-16 to UTF-8 encoding
247  if (wc < 0x80)
248  {
249  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
250  utf8_bytes_filled = 1;
251  }
252  else if (wc <= 0x7FF)
253  {
254  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
255  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
256  utf8_bytes_filled = 2;
257  }
258  else if (0xD800 > wc || wc >= 0xE000)
259  {
260  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
261  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
262  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
263  utf8_bytes_filled = 3;
264  }
265  else
266  {
267  if (JSON_HEDLEY_UNLIKELY(!input.empty()))
268  {
269  const auto wc2 = static_cast<unsigned int>(input.get_character());
270  const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
271  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
272  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
273  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
274  utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
275  utf8_bytes_filled = 4;
276  }
277  else
278  {
279  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
280  utf8_bytes_filled = 1;
281  }
282  }
283  }
284  }
285 };
286 
287 // Wraps another input apdater to convert wide character types into individual bytes.
288 template<typename BaseInputAdapter, typename WideCharType>
290 {
291  public:
292  using char_type = char;
293 
294  wide_string_input_adapter(BaseInputAdapter base)
295  : base_adapter(base) {}
296 
297  typename std::char_traits<char>::int_type get_character() noexcept
298  {
299  // check if buffer needs to be filled
300  if (utf8_bytes_index == utf8_bytes_filled)
301  {
302  fill_buffer<sizeof(WideCharType)>();
303 
304  JSON_ASSERT(utf8_bytes_filled > 0);
305  JSON_ASSERT(utf8_bytes_index == 0);
306  }
307 
308  // use buffer
309  JSON_ASSERT(utf8_bytes_filled > 0);
310  JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
311  return utf8_bytes[utf8_bytes_index++];
312  }
313 
314  private:
315  BaseInputAdapter base_adapter;
316 
317  template<size_t T>
318  void fill_buffer()
319  {
320  wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
321  }
322 
324  std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
325 
327  std::size_t utf8_bytes_index = 0;
329  std::size_t utf8_bytes_filled = 0;
330 };
331 
332 
333 template<typename IteratorType, typename Enable = void>
335 {
336  using iterator_type = IteratorType;
337  using char_type = typename std::iterator_traits<iterator_type>::value_type;
339 
340  static adapter_type create(IteratorType first, IteratorType last)
341  {
342  return adapter_type(std::move(first), std::move(last));
343  }
344 };
345 
346 template<typename T>
348 {
349  using value_type = typename std::iterator_traits<T>::value_type;
350  enum
351  {
352  value = sizeof(value_type) > 1
353  };
354 };
355 
356 template<typename IteratorType>
357 struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
358 {
359  using iterator_type = IteratorType;
360  using char_type = typename std::iterator_traits<iterator_type>::value_type;
363 
364  static adapter_type create(IteratorType first, IteratorType last)
365  {
366  return adapter_type(base_adapter_type(std::move(first), std::move(last)));
367  }
368 };
369 
370 // General purpose iterator-based input
371 template<typename IteratorType>
372 typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
373 {
375  return factory_type::create(first, last);
376 }
377 
378 // Convenience shorthand from container to iterator
379 // Enables ADL on begin(container) and end(container)
380 // Encloses the using declarations in namespace for not to leak them to outside scope
381 
382 namespace container_input_adapter_factory_impl
383 {
384 
385 using std::begin;
386 using std::end;
387 
388 template<typename ContainerType, typename Enable = void>
390 
391 template<typename ContainerType>
392 struct container_input_adapter_factory< ContainerType,
393  void_t<decltype(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>()))>>
394  {
395  using adapter_type = decltype(input_adapter(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>())));
396 
397  static adapter_type create(const ContainerType& container)
398 {
399  return input_adapter(begin(container), end(container));
400 }
401  };
402 
403 } // namespace container_input_adapter_factory_impl
404 
405 template<typename ContainerType>
407 {
409 }
410 
411 #ifndef JSON_NO_IO
412 // Special cases with fast paths
413 inline file_input_adapter input_adapter(std::FILE* file)
414 {
415  return file_input_adapter(file);
416 }
417 
418 inline input_stream_adapter input_adapter(std::istream& stream)
419 {
420  return input_stream_adapter(stream);
421 }
422 
423 inline input_stream_adapter input_adapter(std::istream&& stream)
424 {
425  return input_stream_adapter(stream);
426 }
427 #endif // JSON_NO_IO
428 
429 using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
430 
431 // Null-delimited strings, and the like.
432 template < typename CharT,
433  typename std::enable_if <
434  std::is_pointer<CharT>::value&&
435  !std::is_array<CharT>::value&&
436  std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
437  sizeof(typename std::remove_pointer<CharT>::type) == 1,
438  int >::type = 0 >
439 contiguous_bytes_input_adapter input_adapter(CharT b)
440 {
441  auto length = std::strlen(reinterpret_cast<const char*>(b));
442  const auto* ptr = reinterpret_cast<const char*>(b);
443  return input_adapter(ptr, ptr + length);
444 }
445 
446 template<typename T, std::size_t N>
447 auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N)) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
448 {
449  return input_adapter(array, array + N);
450 }
451 
452 // This class only handles inputs of input_buffer_adapter type.
453 // It's required so that expressions like {ptr, len} can be implicitely casted
454 // to the correct adapter.
456 {
457  public:
458  template < typename CharT,
459  typename std::enable_if <
460  std::is_pointer<CharT>::value&&
461  std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
462  sizeof(typename std::remove_pointer<CharT>::type) == 1,
463  int >::type = 0 >
464  span_input_adapter(CharT b, std::size_t l)
465  : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
466 
467  template<class IteratorType,
468  typename std::enable_if<
469  std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
470  int>::type = 0>
471  span_input_adapter(IteratorType first, IteratorType last)
472  : ia(input_adapter(first, last)) {}
473 
474  contiguous_bytes_input_adapter&& get()
475  {
476  return std::move(ia); // NOLINT(hicpp-move-const-arg,performance-move-const-arg)
477  }
478 
479  private:
480  contiguous_bytes_input_adapter ia;
481 };
482 } // namespace detail
483 } // namespace nlohmann
a class to store JSON values
Definition: json.hpp:177
Definition: input_adapters.hpp:38
Definition: input_adapters.hpp:75
Definition: input_adapters.hpp:130
Definition: input_adapters.hpp:456
Definition: input_adapters.hpp:290
@ value
the parser finished reading a JSON value
input_format_t
the supported input formats
Definition: input_adapters.hpp:26
namespace for Niels Lohmann
Definition: adl_serializer.hpp:12
Definition: input_adapters.hpp:348
Definition: input_adapters.hpp:335
Definition: input_adapters.hpp:165