From 0ca0e1da36a1a4d29dad2657f40cd362d08546fc Mon Sep 17 00:00:00 2001 From: Adrian Conlon Date: Wed, 13 Jul 2022 07:44:47 +0100 Subject: [PATCH 1/4] Optimisation: the NOP is meaningless Signed-off-by: Adrian Conlon --- LR35902/src/LR35902.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LR35902/src/LR35902.cpp b/LR35902/src/LR35902.cpp index 5149742..b8859de 100644 --- a/LR35902/src/LR35902.cpp +++ b/LR35902/src/LR35902.cpp @@ -460,7 +460,7 @@ int EightBit::GameBoy::LR35902::step() noexcept { handleINT(); } else if (UNLIKELY(lowered(HALT()))) { IntelProcessor::memoryRead(PC()); - Processor::execute(0); // NOP + //Processor::execute(0); // NOP } else { Processor::execute(fetchByte()); } From dc7b47a221c25d908adea5cef25a8bdd8d9aef94 Mon Sep 17 00:00:00 2001 From: Adrian Conlon <98398945+AdrianConlon@users.noreply.github.com> Date: Fri, 15 Jul 2022 16:11:23 +0100 Subject: [PATCH 2/4] Update catch2 library Signed-off-by: Adrian Conlon <98398945+AdrianConlon@users.noreply.github.com> --- modules/catch2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/catch2 b/modules/catch2 index c4e3767..20d413b 160000 --- a/modules/catch2 +++ b/modules/catch2 @@ -1 +1 @@ -Subproject commit c4e3767e265808590986d5db6ca1b5532a7f3d13 +Subproject commit 20d413b8b6803c6d456874935d7014fe684e9426 From b883cfb67372f1b27dd91e2742eb1ed0edc0fe95 Mon Sep 17 00:00:00 2001 From: Adrian Conlon <98398945+AdrianConlon@users.noreply.github.com> Date: Fri, 15 Jul 2022 16:17:10 +0100 Subject: [PATCH 3/4] Remove ProcessorTests (temporarily!) Signed-off-by: Adrian Conlon <98398945+AdrianConlon@users.noreply.github.com> --- .gitmodules | 3 --- modules/ProcessorTests | 1 - 2 files changed, 4 deletions(-) delete mode 160000 modules/ProcessorTests diff --git a/.gitmodules b/.gitmodules index ee2516b..40f0bdd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "modules/catch2"] path = modules/catch2 url = https://github.com/catchorg/Catch2.git -[submodule "modules/ProcessorTests"] - path = modules/ProcessorTests - url = https://github.com/TomHarte/ProcessorTests.git diff --git a/modules/ProcessorTests b/modules/ProcessorTests deleted file mode 160000 index ab0c825..0000000 --- a/modules/ProcessorTests +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ab0c825e677466e5fc67bb7a00b22d64cf59a691 From 14af272d75eca2666b758ec059cefc17f6a41690 Mon Sep 17 00:00:00 2001 From: Adrian Conlon <98398945+AdrianConlon@users.noreply.github.com> Date: Sat, 1 Apr 2023 07:52:02 +0100 Subject: [PATCH 4/4] Update simdjson --- M6502/HarteTest_6502/simdjson/simdjson.cpp | 3643 +++++++----- M6502/HarteTest_6502/simdjson/simdjson.h | 6033 +++++++++++--------- 2 files changed, 5426 insertions(+), 4250 deletions(-) diff --git a/M6502/HarteTest_6502/simdjson/simdjson.cpp b/M6502/HarteTest_6502/simdjson/simdjson.cpp index ef2ab24..76e7e37 100644 --- a/M6502/HarteTest_6502/simdjson/simdjson.cpp +++ b/M6502/HarteTest_6502/simdjson/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2022-06-30 10:29:50 -0400. Do not edit! */ +/* auto-generated on 2023-03-13 21:26:32 -0400. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" @@ -220,7 +220,7 @@ namespace simdjson { ? diyfp(4 * v.f - 1, v.e - 2) // (B) : diyfp(2 * v.f - 1, v.e - 1); // (A) -// Determine the normalized w+ = m+. + // Determine the normalized w+ = m+. const diyfp w_plus = diyfp::normalize(m_plus); // Determine w- = m- such that e_(w-) = e_(w+). @@ -520,12 +520,12 @@ namespace simdjson { diyfp::sub(M_plus, w) .f; // (significand of (M+ - w ), implicit exponent is e) - // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0): - // - // M+ = f * 2^e - // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e - // = ((p1 ) * 2^-e + (p2 )) * 2^e - // = p1 + p2 * 2^e + // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0): + // + // M+ = f * 2^e + // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e + // = ((p1 ) * 2^-e + (p2 )) * 2^e + // = p1 + p2 * 2^e const diyfp one(std::uint64_t{ 1 } << -M_plus.e, M_plus.e); @@ -879,9 +879,9 @@ namespace simdjson { std::memset(buf + k, '0', static_cast(n) - static_cast(k)); // Make it look like a floating-point number (#362, #378) - // buf[n + 0] = '.'; - // buf[n + 1] = '0'; - return buf + (static_cast(n)); + buf[n + 0] = '.'; + buf[n + 1] = '0'; + return buf + (static_cast(n)) + 2; } if (0 < n && n <= max_exp) { @@ -945,10 +945,8 @@ namespace simdjson { { *first++ = '0'; // Make it look like a floating-point number (#362, #378) - if (negative) { - *first++ = '.'; - *first++ = '0'; - } + *first++ = '.'; + *first++ = '0'; return first; } // Compute v = buffer * 10^decimal_exponent. @@ -1614,7 +1612,8 @@ namespace simdjson { { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." }, { SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, - { OUT_OF_BOUNDS, "Attempted to access location outside of document."} + { OUT_OF_BOUNDS, "Attempted to access location outside of document."}, + { TRAILING_CONTENT, "Unexpected trailing content in the JSON input."} }; // error_messages[] } // namespace internal @@ -2702,7 +2701,7 @@ namespace simdjson { simdjson_warn_unused bool validate_utf8(const char* buf, size_t len) const noexcept final override { return set_best()->validate_utf8(buf, len); } - simdjson_really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} + simdjson_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} private: const implementation* set_best() const noexcept; }; @@ -2875,17 +2874,17 @@ namespace simdjson { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); - simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t op() const noexcept { return _op; } - simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } uint64_t _whitespace; uint64_t _op; }; - simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // Functional programming causes trouble with Visual Studio. // Keeping this version in comments since it is much nicer: // auto v = in.map([&](simd8 chunk) { @@ -2927,27 +2926,27 @@ namespace simdjson { v.chunks[1].any_bits_set(0x7), v.chunks[2].any_bits_set(0x7), v.chunks[3].any_bits_set(0x7) - ).to_bitmask(); + ).to_bitmask(); uint64_t whitespace = simd8x64( v.chunks[0].any_bits_set(0x18), v.chunks[1].any_bits_set(0x18), v.chunks[2].any_bits_set(0x18), v.chunks[3].any_bits_set(0x18) - ).to_bitmask(); + ).to_bitmask(); return { whitespace, op }; } - simdjson_really_inline bool is_ascii(const simd8x64& input) { + simdjson_inline bool is_ascii(const simd8x64& input) { simd8 bits = input.reduce_or(); - return bits.max_val() < 0b10000000u; + return bits.max_val() < 0x80u; } - simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1 >= uint8_t(0xc0u); + simd8 is_third_byte = prev2 >= uint8_t(0xe0u); + simd8 is_fourth_byte = prev3 >= uint8_t(0xf0u); // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well. // This will work fine because we only have to report errors for cases with 0-1 lead bytes. // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is @@ -2956,9 +2955,9 @@ namespace simdjson { return is_second_byte ^ is_third_byte ^ is_fourth_byte; } - simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0xe0u); + simd8 is_fourth_byte = prev3 >= uint8_t(0xf0u); return is_third_byte ^ is_fourth_byte; } @@ -2974,7 +2973,7 @@ namespace simdjson { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -2982,20 +2981,20 @@ namespace simdjson { // Bit 5 = Overlong 2-byte // Bit 7 = Two Continuations constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ + // 11______ 11______ constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; // 11110101 1000____ // 1111011_ 1000____ @@ -3016,7 +3015,7 @@ namespace simdjson { TOO_SHORT | OVERLONG_3 | SURROGATE, // 1111____ ________ TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); + ); constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( // ____0000 ________ @@ -3045,7 +3044,7 @@ namespace simdjson { CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, CARRY | TOO_LARGE | TOO_LARGE_1000, CARRY | TOO_LARGE | TOO_LARGE_1000 - ); + ); const simd8 byte_2_high = input.shr<4>().lookup_16( // ________ 0_______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, @@ -3061,10 +3060,10 @@ namespace simdjson { // ________ 11______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); + ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -3077,7 +3076,7 @@ namespace simdjson { // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -3089,14 +3088,14 @@ namespace simdjson { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #endif const simd8 max_value(&max_array[sizeof(max_array) - sizeof(simd8)]); @@ -3114,7 +3113,7 @@ namespace simdjson { // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -3125,13 +3124,21 @@ namespace simdjson { // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if (simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } @@ -3141,24 +3148,25 @@ namespace simdjson { || (simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); } - else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; +else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } + else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -3186,10 +3194,10 @@ namespace simdjson { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t* _buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t* full_block() const; + simdjson_inline buf_block_reader(const uint8_t* _buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t* full_block() const; /** * Get the last block, padded with spaces. * @@ -3199,8 +3207,8 @@ namespace simdjson { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t* dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t* dst) const; + simdjson_inline void advance(); private: const uint8_t* buf; const size_t len; @@ -3239,23 +3247,23 @@ namespace simdjson { } template - simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} + simdjson_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} template - simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + simdjson_inline size_t buf_block_reader::block_index() { return idx; } template - simdjson_really_inline bool buf_block_reader::has_full_block() const { + simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template - simdjson_really_inline const uint8_t* buf_block_reader::full_block() const { + simdjson_inline const uint8_t* buf_block_reader::full_block() const { return &buf[idx]; } template - simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { + simdjson_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { if (len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -3263,7 +3271,7 @@ namespace simdjson { } template - simdjson_really_inline void buf_block_reader::advance() { + simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -3279,27 +3287,27 @@ namespace simdjson { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -3314,14 +3322,14 @@ namespace simdjson { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -3356,7 +3364,7 @@ namespace simdjson { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // - simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -3381,7 +3389,7 @@ namespace simdjson { // // Backslash sequences outside of quotes will be detected in stage 2. // - simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -3414,7 +3422,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_string_scanner::finish() { + simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -3452,25 +3460,25 @@ namespace simdjson { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -3485,12 +3493,12 @@ namespace simdjson { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -3501,7 +3509,7 @@ namespace simdjson { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -3525,10 +3533,10 @@ namespace simdjson { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -3545,13 +3553,13 @@ namespace simdjson { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // - simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { + simdjson_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } - simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -3576,7 +3584,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_scanner::finish() { + simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -3602,23 +3610,23 @@ namespace simdjson { static error_code minify(const uint8_t* buf, size_t len, uint8_t* dst, size_t& dst_len) noexcept; private: - simdjson_really_inline json_minifier(uint8_t* _dst) + simdjson_inline json_minifier(uint8_t* _dst) : dst{ _dst } {} template - simdjson_really_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t* dst_start, size_t& dst_len); + simdjson_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t* dst_start, size_t& dst_len); json_scanner scanner{}; uint8_t* dst; }; - simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } - simdjson_really_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { + simdjson_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -3626,7 +3634,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf + 64); json_block block_1 = scanner.next(in_1); @@ -3637,7 +3645,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -3710,7 +3718,7 @@ namespace simdjson { * complete document, therefore the last json buffer location is the end of the * batch. */ - simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { + simdjson_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { // Variant: do not count separately, just figure out depth if (parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -3785,7 +3793,7 @@ namespace simdjson { public: uint32_t* tail; - simdjson_really_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -3796,15 +3804,15 @@ namespace simdjson { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. if (bits == 0) return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) +#if SIMDJSON_PREFER_REVERSE_BITS /** * ARM lacks a fast trailing zero instruction, but it has a fast * bit reversal instruction and a fast leading zero instruction. @@ -3907,11 +3915,11 @@ namespace simdjson { static error_code index(const uint8_t* buf, size_t len, dom_parser_implementation& parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t* structural_indexes); + simdjson_inline json_structural_indexer(uint32_t* structural_indexes); template - simdjson_really_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -3920,26 +3928,26 @@ namespace simdjson { uint64_t unescaped_chars_error = 0; }; - simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} + simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} // Skip the last character if it is partial - simdjson_really_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { + simdjson_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len - 3] >= 0b11110000) { return len - 3; } // 4-byte characters with only 3 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len - 3] >= 0xf0) { return len - 3; } // 4-byte characters with only 3 bytes left return len; } @@ -3988,7 +3996,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block + 64); json_block block_1 = scanner.next(in_1); @@ -3999,22 +4007,24 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } - simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION checker.check_next_input(in); +#endif indexer.write(uint32_t(idx - 64), prev_structurals); // Output *last* iteration's structurals to the parser prev_structurals = block.structural_start(); unescaped_chars_error |= block.non_quote_inside_string(unescaped); } - simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { + simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { // Write out the final iteration's structurals indexer.write(uint32_t(idx - 64), prev_structurals); error_code error = scanner.finish(); @@ -4206,39 +4216,100 @@ namespace simdjson { // return true if the unicode codepoint was valid // We work in little-endian then swap at write time simdjson_warn_unused - simdjson_really_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, - uint8_t** dst_ptr) { + simdjson_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, + uint8_t** dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; - // check for low surrogate for characters outside the Basic + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; + } + else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; + } + else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); *dst_ptr += offset; return offset > 0; } + + // handle a unicode codepoint using the wobbly convention + // https://simonsapin.github.io/wtf-8/ + // write appropriate values into dest + // src will advance 6 bytes or 12 bytes + // dest will advance a variable amount (return via pointer) + // return true if the unicode codepoint was valid + // We work in little-endian then swap at write time + simdjson_warn_unused + simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t** src_ptr, + uint8_t** dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; + } + + /** * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output @@ -4247,7 +4318,7 @@ namespace simdjson { * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ - simdjson_warn_unused simdjson_really_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst) { + simdjson_warn_unused simdjson_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. auto bs_quote = backslash_and_quote::copy_and_find(src, dst); @@ -4266,7 +4337,56 @@ namespace simdjson { within the unicode codepoint handling code. */ src += bs_dist; dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } + else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } + else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; + } + + simdjson_warn_unused simdjson_inline uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { return nullptr; } } @@ -4325,7 +4445,7 @@ namespace simdjson { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } @@ -4335,7 +4455,7 @@ namespace simdjson { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -4344,7 +4464,7 @@ namespace simdjson { } } - simdjson_unused static simdjson_really_inline void log_string(const char* message) { + simdjson_unused static simdjson_inline void log_string(const char* message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -4352,7 +4472,7 @@ namespace simdjson { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { + static simdjson_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth * 2, "", title_prefix, LOG_EVENT_LEN - log_depth * 2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural - 1; @@ -4431,14 +4551,14 @@ namespace simdjson { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V& visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V& visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -4447,7 +4567,7 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* peek() const noexcept; + simdjson_inline const uint8_t* peek() const noexcept; /** * Advance to the next token. * @@ -4455,56 +4575,56 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* advance() noexcept; + simdjson_inline const uint8_t* advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char* type) const noexcept; + simdjson_inline void log_value(const char* type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char* type) const noexcept; + simdjson_inline void log_start_value(const char* type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char* type) const noexcept; + simdjson_inline void log_end_value(const char* type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char* error) const noexcept; + simdjson_inline void log_error(const char* error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; }; template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V& visitor) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V& visitor) noexcept { logger::log_start(); // @@ -4629,52 +4749,52 @@ namespace simdjson { } // walk_document() - simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) + simdjson_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) : buf{ _dom_parser.buf }, next_structural{ &_dom_parser.structural_indexes[start_structural_index] }, dom_parser{ _dom_parser } { } - simdjson_really_inline const uint8_t* json_iterator::peek() const noexcept { + simdjson_inline const uint8_t* json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } - simdjson_really_inline const uint8_t* json_iterator::advance() noexcept { + simdjson_inline const uint8_t* json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } - simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural - 1); } - simdjson_really_inline bool json_iterator::at_eof() const noexcept { + simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } - simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } - simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } - simdjson_really_inline void json_iterator::log_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_value(const char* type) const noexcept { logger::log_line(*this, "", type, ""); } - simdjson_really_inline void json_iterator::log_start_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_start_value(const char* type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } - simdjson_really_inline void json_iterator::log_end_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_end_value(const char* type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } - simdjson_really_inline void json_iterator::log_error(const char* error) const noexcept { + simdjson_inline void json_iterator::log_error(const char* error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -4690,7 +4810,7 @@ namespace simdjson { } } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -4722,18 +4842,18 @@ namespace simdjson { uint64_t* next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -4741,24 +4861,24 @@ namespace simdjson { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -4766,50 +4886,50 @@ namespace simdjson { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer - simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } - simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ - simdjson_really_inline void tape_writer::append_double(double value) noexcept { + simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } - simdjson_really_inline void tape_writer::skip() noexcept { + simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } - simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::skip_double() noexcept { + simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template - simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } - simdjson_really_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -4826,40 +4946,40 @@ namespace simdjson { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator& iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator& iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator& iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator& iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator& iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator& iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator& iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator& iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -4868,22 +4988,22 @@ namespace simdjson { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator& iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -4891,18 +5011,18 @@ namespace simdjson { /** Next write location in the string buf for stage 2 parsing */ uint8_t* current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document& doc) noexcept; + simdjson_inline tape_builder(dom::document& doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; - simdjson_really_inline void start_container(json_iterator& iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t* on_start_string(json_iterator& iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t* dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; + simdjson_inline void start_container(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t* on_start_string(json_iterator& iter) noexcept; + simdjson_inline void on_end_string(uint8_t* dst) noexcept; }; // class tape_builder template - simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept { dom_parser.doc = &doc; @@ -4911,59 +5031,59 @@ namespace simdjson { return iter.walk_document(builder); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_root_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { return visit_string(iter, key, true); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } - simdjson_really_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} + simdjson_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t* dst = on_start_string(iter); - dst = stringparsing::parse_string(value + 1, dst); + dst = stringparsing::parse_string(value + 1, dst, false); // We do not allow replacement when the escape characters are invalid. if (dst == nullptr) { iter.log_error("Invalid escape in string"); return STRING_ERROR; @@ -4972,16 +5092,16 @@ namespace simdjson { return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { return visit_string(iter, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -5003,42 +5123,42 @@ namespace simdjson { return error; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -5047,24 +5167,24 @@ namespace simdjson { // private: - simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { + simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index + 2, start); tape.append(start_index, end); return SUCCESS; } - simdjson_really_inline void tape_builder::start_container(json_iterator& iter) noexcept { + simdjson_inline void tape_builder::start_container(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -5077,13 +5197,13 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { + simdjson_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } - simdjson_really_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { + simdjson_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -5109,7 +5229,7 @@ namespace simdjson { namespace { namespace stage1 { - simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { // On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no // benefit and therefore makes things worse. // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } @@ -5141,8 +5261,12 @@ namespace simdjson { return stage2::tape_builder::parse_document(*this, _doc); } - simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst) const noexcept { - return arm64::stringparsing::parse_string(src, dst); + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) const noexcept { + return arm64::stringparsing::parse_string(src, dst, allow_replacement); + } + + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept { + return arm64::stringparsing::parse_wobbly_string(src, dst); } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t* _buf, size_t _len, dom::document& _doc) noexcept { @@ -5164,7 +5288,6 @@ namespace simdjson { // redefining SIMDJSON_IMPLEMENTATION to "fallback" // #define SIMDJSON_IMPLEMENTATION fallback /* end file include/simdjson/fallback/begin.h */ - namespace simdjson { namespace fallback { @@ -5227,7 +5350,7 @@ namespace simdjson { * complete document, therefore the last json buffer location is the end of the * batch. */ - simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { + simdjson_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { // Variant: do not count separately, just figure out depth if (parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -5301,7 +5424,7 @@ namespace simdjson { class structural_scanner { public: - simdjson_really_inline structural_scanner(dom_parser_implementation& _parser, stage1_mode _partial) + simdjson_inline structural_scanner(dom_parser_implementation& _parser, stage1_mode _partial) : buf{ _parser.buf }, next_structural_index{ _parser.structural_indexes.get() }, parser{ _parser }, @@ -5309,18 +5432,18 @@ namespace simdjson { partial{ _partial } { } - simdjson_really_inline void add_structural() { + simdjson_inline void add_structural() { *next_structural_index = idx; next_structural_index++; } - simdjson_really_inline bool is_continuation(uint8_t c) { - return (c & 0b11000000) == 0b10000000; + simdjson_inline bool is_continuation(uint8_t c) { + return (c & 0xc0) == 0x80; } - simdjson_really_inline void validate_utf8_character() { + simdjson_inline void validate_utf8_character() { // Continuation - if (simdjson_unlikely((buf[idx] & 0b01000000) == 0)) { + if (simdjson_unlikely((buf[idx] & 0x40) == 0)) { // extra continuation error = UTF8_ERROR; idx++; @@ -5328,7 +5451,7 @@ namespace simdjson { } // 2-byte - if ((buf[idx] & 0b00100000) == 0) { + if ((buf[idx] & 0x20) == 0) { // missing continuation if (simdjson_unlikely(idx + 1 > len || !is_continuation(buf[idx + 1]))) { if (idx + 1 > len && is_streaming(partial)) { idx = len; return; } @@ -5337,13 +5460,13 @@ namespace simdjson { return; } // overlong: 1100000_ 10______ - if (buf[idx] <= 0b11000001) { error = UTF8_ERROR; } + if (buf[idx] <= 0xc1) { error = UTF8_ERROR; } idx += 2; return; } // 3-byte - if ((buf[idx] & 0b00010000) == 0) { + if ((buf[idx] & 0x10) == 0) { // missing continuation if (simdjson_unlikely(idx + 2 > len || !is_continuation(buf[idx + 1]) || !is_continuation(buf[idx + 2]))) { if (idx + 2 > len && is_streaming(partial)) { idx = len; return; } @@ -5352,9 +5475,9 @@ namespace simdjson { return; } // overlong: 11100000 100_____ ________ - if (buf[idx] == 0b11100000 && buf[idx + 1] <= 0b10011111) { error = UTF8_ERROR; } + if (buf[idx] == 0xe0 && buf[idx + 1] <= 0x9f) { error = UTF8_ERROR; } // surrogates: U+D800-U+DFFF 11101101 101_____ - if (buf[idx] == 0b11101101 && buf[idx + 1] >= 0b10100000) { error = UTF8_ERROR; } + if (buf[idx] == 0xed && buf[idx + 1] >= 0xa0) { error = UTF8_ERROR; } idx += 3; return; } @@ -5368,25 +5491,25 @@ namespace simdjson { return; } // overlong: 11110000 1000____ ________ ________ - if (buf[idx] == 0b11110000 && buf[idx + 1] <= 0b10001111) { error = UTF8_ERROR; } + if (buf[idx] == 0xf0 && buf[idx + 1] <= 0x8f) { error = UTF8_ERROR; } // too large: > U+10FFFF: // 11110100 (1001|101_)____ // 1111(1___|011_|0101) 10______ // also includes 5, 6, 7 and 8 byte characters: // 11111___ - if (buf[idx] == 0b11110100 && buf[idx + 1] >= 0b10010000) { error = UTF8_ERROR; } - if (buf[idx] >= 0b11110101) { error = UTF8_ERROR; } + if (buf[idx] == 0xf4 && buf[idx + 1] >= 0x90) { error = UTF8_ERROR; } + if (buf[idx] >= 0xf5) { error = UTF8_ERROR; } idx += 4; } // Returns true if the string is unclosed. - simdjson_really_inline bool validate_string() { + simdjson_inline bool validate_string() { idx++; // skip first quote while (idx < len && buf[idx] != '"') { if (buf[idx] == '\\') { idx += 2; } - else if (simdjson_unlikely(buf[idx] & 0b10000000)) { + else if (simdjson_unlikely(buf[idx] & 0x80)) { validate_utf8_character(); } else { @@ -5398,7 +5521,7 @@ namespace simdjson { return false; } - simdjson_really_inline bool is_whitespace_or_operator(uint8_t c) { + simdjson_inline bool is_whitespace_or_operator(uint8_t c) { switch (c) { case '{': case '}': case '[': case ']': case ',': case ':': case ' ': case '\r': case '\n': case '\t': @@ -5411,7 +5534,7 @@ namespace simdjson { // // Parse the entire input in STEP_SIZE-byte chunks. // - simdjson_really_inline error_code scan() { + simdjson_inline error_code scan() { bool unclosed_string = false; for (; idx < len; idx++) { switch (buf[idx]) { @@ -5590,42 +5713,42 @@ namespace simdjson { } } unsigned char byte = data[pos]; - if (byte < 0b10000000) { + if (byte < 0x80) { pos++; continue; } - else if ((byte & 0b11100000) == 0b11000000) { + else if ((byte & 0xe0) == 0xc0) { next_pos = pos + 2; if (next_pos > len) { return false; } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + code_point = (byte & 0x1f) << 6 | (data[pos + 1] & 0x3f); if (code_point < 0x80 || 0x7ff < code_point) { return false; } } - else if ((byte & 0b11110000) == 0b11100000) { + else if ((byte & 0xf0) == 0xe0) { next_pos = pos + 3; if (next_pos > len) { return false; } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); + code_point = (byte & 0x0f) << 12 | + (data[pos + 1] & 0x3f) << 6 | + (data[pos + 2] & 0x3f); if (code_point < 0x800 || 0xffff < code_point || (0xd7ff < code_point && code_point < 0xe000)) { return false; } } - else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + else if ((byte & 0xf8) == 0xf0) { // 0b11110000 next_pos = pos + 4; if (next_pos > len) { return false; } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + if ((data[pos + 3] & 0xc0) != 0x80) { return false; } // range check code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + (byte & 0x07) << 18 | (data[pos + 1] & 0x3f) << 12 | + (data[pos + 2] & 0x3f) << 6 | (data[pos + 3] & 0x3f); if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } } else { @@ -5686,39 +5809,100 @@ namespace simdjson { // return true if the unicode codepoint was valid // We work in little-endian then swap at write time simdjson_warn_unused - simdjson_really_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, - uint8_t** dst_ptr) { + simdjson_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, + uint8_t** dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; - // check for low surrogate for characters outside the Basic + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; + } + else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; + } + else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); *dst_ptr += offset; return offset > 0; } + + // handle a unicode codepoint using the wobbly convention + // https://simonsapin.github.io/wtf-8/ + // write appropriate values into dest + // src will advance 6 bytes or 12 bytes + // dest will advance a variable amount (return via pointer) + // return true if the unicode codepoint was valid + // We work in little-endian then swap at write time + simdjson_warn_unused + simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t** src_ptr, + uint8_t** dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; + } + + /** * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output @@ -5727,7 +5911,7 @@ namespace simdjson { * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ - simdjson_warn_unused simdjson_really_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst) { + simdjson_warn_unused simdjson_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. auto bs_quote = backslash_and_quote::copy_and_find(src, dst); @@ -5746,7 +5930,56 @@ namespace simdjson { within the unicode codepoint handling code. */ src += bs_dist; dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } + else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } + else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; + } + + simdjson_warn_unused simdjson_inline uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { return nullptr; } } @@ -5805,7 +6038,7 @@ namespace simdjson { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } @@ -5815,7 +6048,7 @@ namespace simdjson { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -5824,7 +6057,7 @@ namespace simdjson { } } - simdjson_unused static simdjson_really_inline void log_string(const char* message) { + simdjson_unused static simdjson_inline void log_string(const char* message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -5832,7 +6065,7 @@ namespace simdjson { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { + static simdjson_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth * 2, "", title_prefix, LOG_EVENT_LEN - log_depth * 2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural - 1; @@ -5911,14 +6144,14 @@ namespace simdjson { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V& visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V& visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -5927,7 +6160,7 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* peek() const noexcept; + simdjson_inline const uint8_t* peek() const noexcept; /** * Advance to the next token. * @@ -5935,56 +6168,56 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* advance() noexcept; + simdjson_inline const uint8_t* advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char* type) const noexcept; + simdjson_inline void log_value(const char* type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char* type) const noexcept; + simdjson_inline void log_start_value(const char* type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char* type) const noexcept; + simdjson_inline void log_end_value(const char* type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char* error) const noexcept; + simdjson_inline void log_error(const char* error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; }; template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V& visitor) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V& visitor) noexcept { logger::log_start(); // @@ -6109,52 +6342,52 @@ namespace simdjson { } // walk_document() - simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) + simdjson_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) : buf{ _dom_parser.buf }, next_structural{ &_dom_parser.structural_indexes[start_structural_index] }, dom_parser{ _dom_parser } { } - simdjson_really_inline const uint8_t* json_iterator::peek() const noexcept { + simdjson_inline const uint8_t* json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } - simdjson_really_inline const uint8_t* json_iterator::advance() noexcept { + simdjson_inline const uint8_t* json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } - simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural - 1); } - simdjson_really_inline bool json_iterator::at_eof() const noexcept { + simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } - simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } - simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } - simdjson_really_inline void json_iterator::log_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_value(const char* type) const noexcept { logger::log_line(*this, "", type, ""); } - simdjson_really_inline void json_iterator::log_start_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_start_value(const char* type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } - simdjson_really_inline void json_iterator::log_end_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_end_value(const char* type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } - simdjson_really_inline void json_iterator::log_error(const char* error) const noexcept { + simdjson_inline void json_iterator::log_error(const char* error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -6170,7 +6403,7 @@ namespace simdjson { } } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -6202,18 +6435,18 @@ namespace simdjson { uint64_t* next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -6221,24 +6454,24 @@ namespace simdjson { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -6246,50 +6479,50 @@ namespace simdjson { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer - simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } - simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ - simdjson_really_inline void tape_writer::append_double(double value) noexcept { + simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } - simdjson_really_inline void tape_writer::skip() noexcept { + simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } - simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::skip_double() noexcept { + simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template - simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } - simdjson_really_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -6306,40 +6539,40 @@ namespace simdjson { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator& iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator& iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator& iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator& iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator& iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator& iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator& iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator& iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -6348,22 +6581,22 @@ namespace simdjson { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator& iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -6371,18 +6604,18 @@ namespace simdjson { /** Next write location in the string buf for stage 2 parsing */ uint8_t* current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document& doc) noexcept; + simdjson_inline tape_builder(dom::document& doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; - simdjson_really_inline void start_container(json_iterator& iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t* on_start_string(json_iterator& iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t* dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; + simdjson_inline void start_container(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t* on_start_string(json_iterator& iter) noexcept; + simdjson_inline void on_end_string(uint8_t* dst) noexcept; }; // class tape_builder template - simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept { dom_parser.doc = &doc; @@ -6391,59 +6624,59 @@ namespace simdjson { return iter.walk_document(builder); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_root_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { return visit_string(iter, key, true); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } - simdjson_really_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} + simdjson_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t* dst = on_start_string(iter); - dst = stringparsing::parse_string(value + 1, dst); + dst = stringparsing::parse_string(value + 1, dst, false); // We do not allow replacement when the escape characters are invalid. if (dst == nullptr) { iter.log_error("Invalid escape in string"); return STRING_ERROR; @@ -6452,16 +6685,16 @@ namespace simdjson { return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { return visit_string(iter, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -6483,42 +6716,42 @@ namespace simdjson { return error; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -6527,24 +6760,24 @@ namespace simdjson { // private: - simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { + simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index + 2, start); tape.append(start_index, end); return SUCCESS; } - simdjson_really_inline void tape_builder::start_container(json_iterator& iter) noexcept { + simdjson_inline void tape_builder::start_container(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -6557,13 +6790,13 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { + simdjson_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } - simdjson_really_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { + simdjson_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -6592,8 +6825,12 @@ namespace simdjson { return stage2::tape_builder::parse_document(*this, _doc); } - simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst) const noexcept { - return fallback::stringparsing::parse_string(src, dst); + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst, bool replacement_char) const noexcept { + return fallback::stringparsing::parse_string(src, dst, replacement_char); + } + + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept { + return fallback::stringparsing::parse_wobbly_string(src, dst); } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t* _buf, size_t _len, dom::document& _doc) noexcept { @@ -6660,25 +6897,25 @@ namespace simdjson { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); // ASCII white-space ('\r','\n','\t',' ') - simdjson_really_inline uint64_t whitespace() const noexcept; + simdjson_inline uint64_t whitespace() const noexcept; // non-quote structural characters (comma, colon, braces, brackets) - simdjson_really_inline uint64_t op() const noexcept; + simdjson_inline uint64_t op() const noexcept; // neither a structural character nor a white-space, so letters, numbers and quotes - simdjson_really_inline uint64_t scalar() const noexcept; + simdjson_inline uint64_t scalar() const noexcept; uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) }; - simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; } - simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } + simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; } + simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } // This identifies structural characters (comma, colon, braces, brackets), // and ASCII white-space ('\r','\n','\t',' '). - simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // we can't use the generic lookup_16. const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); @@ -6728,21 +6965,21 @@ namespace simdjson { return { whitespace, op }; } - simdjson_really_inline bool is_ascii(const simd8x64& input) { + simdjson_inline bool is_ascii(const simd8x64& input) { return input.reduce_or().is_ascii(); } - simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u - 1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u - 1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); } - simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u - 1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_third_byte | is_fourth_byte) > int8_t(0); } @@ -6759,7 +6996,7 @@ namespace simdjson { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -6767,20 +7004,20 @@ namespace simdjson { // Bit 5 = Overlong 2-byte // Bit 7 = Two Continuations constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ + // 11______ 11______ constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; // 11110101 1000____ // 1111011_ 1000____ @@ -6801,7 +7038,7 @@ namespace simdjson { TOO_SHORT | OVERLONG_3 | SURROGATE, // 1111____ ________ TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); + ); constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( // ____0000 ________ @@ -6830,7 +7067,7 @@ namespace simdjson { CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, CARRY | TOO_LARGE | TOO_LARGE_1000, CARRY | TOO_LARGE | TOO_LARGE_1000 - ); + ); const simd8 byte_2_high = input.shr<4>().lookup_16( // ________ 0_______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, @@ -6846,10 +7083,10 @@ namespace simdjson { // ________ 11______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); + ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -6862,7 +7099,7 @@ namespace simdjson { // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -6874,14 +7111,14 @@ namespace simdjson { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #endif const simd8 max_value(&max_array[sizeof(max_array) - sizeof(simd8)]); @@ -6899,7 +7136,7 @@ namespace simdjson { // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -6910,13 +7147,21 @@ namespace simdjson { // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if (simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } @@ -6926,24 +7171,25 @@ namespace simdjson { || (simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); } - else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; +else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } + else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -6973,10 +7219,10 @@ namespace simdjson { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t* _buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t* full_block() const; + simdjson_inline buf_block_reader(const uint8_t* _buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t* full_block() const; /** * Get the last block, padded with spaces. * @@ -6986,8 +7232,8 @@ namespace simdjson { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t* dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t* dst) const; + simdjson_inline void advance(); private: const uint8_t* buf; const size_t len; @@ -7026,23 +7272,23 @@ namespace simdjson { } template - simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} + simdjson_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} template - simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + simdjson_inline size_t buf_block_reader::block_index() { return idx; } template - simdjson_really_inline bool buf_block_reader::has_full_block() const { + simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template - simdjson_really_inline const uint8_t* buf_block_reader::full_block() const { + simdjson_inline const uint8_t* buf_block_reader::full_block() const { return &buf[idx]; } template - simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { + simdjson_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { if (len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -7050,7 +7296,7 @@ namespace simdjson { } template - simdjson_really_inline void buf_block_reader::advance() { + simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -7066,27 +7312,27 @@ namespace simdjson { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -7101,14 +7347,14 @@ namespace simdjson { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -7143,7 +7389,7 @@ namespace simdjson { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // - simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -7168,7 +7414,7 @@ namespace simdjson { // // Backslash sequences outside of quotes will be detected in stage 2. // - simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -7201,7 +7447,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_string_scanner::finish() { + simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -7239,25 +7485,25 @@ namespace simdjson { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -7272,12 +7518,12 @@ namespace simdjson { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -7288,7 +7534,7 @@ namespace simdjson { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -7312,10 +7558,10 @@ namespace simdjson { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -7332,13 +7578,13 @@ namespace simdjson { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // - simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { + simdjson_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } - simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -7363,7 +7609,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_scanner::finish() { + simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -7389,23 +7635,23 @@ namespace simdjson { static error_code minify(const uint8_t* buf, size_t len, uint8_t* dst, size_t& dst_len) noexcept; private: - simdjson_really_inline json_minifier(uint8_t* _dst) + simdjson_inline json_minifier(uint8_t* _dst) : dst{ _dst } {} template - simdjson_really_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t* dst_start, size_t& dst_len); + simdjson_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t* dst_start, size_t& dst_len); json_scanner scanner{}; uint8_t* dst; }; - simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } - simdjson_really_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { + simdjson_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -7413,7 +7659,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf + 64); json_block block_1 = scanner.next(in_1); @@ -7424,7 +7670,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -7497,7 +7743,7 @@ namespace simdjson { * complete document, therefore the last json buffer location is the end of the * batch. */ - simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { + simdjson_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { // Variant: do not count separately, just figure out depth if (parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -7572,7 +7818,7 @@ namespace simdjson { public: uint32_t* tail; - simdjson_really_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -7583,15 +7829,15 @@ namespace simdjson { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. if (bits == 0) return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) +#if SIMDJSON_PREFER_REVERSE_BITS /** * ARM lacks a fast trailing zero instruction, but it has a fast * bit reversal instruction and a fast leading zero instruction. @@ -7694,11 +7940,11 @@ namespace simdjson { static error_code index(const uint8_t* buf, size_t len, dom_parser_implementation& parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t* structural_indexes); + simdjson_inline json_structural_indexer(uint32_t* structural_indexes); template - simdjson_really_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -7707,26 +7953,26 @@ namespace simdjson { uint64_t unescaped_chars_error = 0; }; - simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} + simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} // Skip the last character if it is partial - simdjson_really_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { + simdjson_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len - 3] >= 0b11110000) { return len - 3; } // 4-byte characters with only 3 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len - 3] >= 0xf0) { return len - 3; } // 4-byte characters with only 3 bytes left return len; } @@ -7775,7 +8021,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block + 64); json_block block_1 = scanner.next(in_1); @@ -7786,22 +8032,24 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } - simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION checker.check_next_input(in); +#endif indexer.write(uint32_t(idx - 64), prev_structurals); // Output *last* iteration's structurals to the parser prev_structurals = block.structural_start(); unescaped_chars_error |= block.non_quote_inside_string(unescaped); } - simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { + simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { // Write out the final iteration's structurals indexer.write(uint32_t(idx - 64), prev_structurals); error_code error = scanner.finish(); @@ -7924,7 +8172,7 @@ namespace simdjson { namespace icelake { namespace { namespace stage1 { - simdjson_really_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { + simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. @@ -8045,39 +8293,100 @@ namespace simdjson { // return true if the unicode codepoint was valid // We work in little-endian then swap at write time simdjson_warn_unused - simdjson_really_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, - uint8_t** dst_ptr) { + simdjson_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, + uint8_t** dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; - // check for low surrogate for characters outside the Basic + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; + } + else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; + } + else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); *dst_ptr += offset; return offset > 0; } + + // handle a unicode codepoint using the wobbly convention + // https://simonsapin.github.io/wtf-8/ + // write appropriate values into dest + // src will advance 6 bytes or 12 bytes + // dest will advance a variable amount (return via pointer) + // return true if the unicode codepoint was valid + // We work in little-endian then swap at write time + simdjson_warn_unused + simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t** src_ptr, + uint8_t** dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; + } + + /** * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output @@ -8086,7 +8395,7 @@ namespace simdjson { * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ - simdjson_warn_unused simdjson_really_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst) { + simdjson_warn_unused simdjson_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. auto bs_quote = backslash_and_quote::copy_and_find(src, dst); @@ -8105,7 +8414,56 @@ namespace simdjson { within the unicode codepoint handling code. */ src += bs_dist; dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } + else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } + else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; + } + + simdjson_warn_unused simdjson_inline uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { return nullptr; } } @@ -8164,7 +8522,7 @@ namespace simdjson { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } @@ -8174,7 +8532,7 @@ namespace simdjson { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -8183,7 +8541,7 @@ namespace simdjson { } } - simdjson_unused static simdjson_really_inline void log_string(const char* message) { + simdjson_unused static simdjson_inline void log_string(const char* message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -8191,7 +8549,7 @@ namespace simdjson { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { + static simdjson_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth * 2, "", title_prefix, LOG_EVENT_LEN - log_depth * 2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural - 1; @@ -8270,14 +8628,14 @@ namespace simdjson { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V& visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V& visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -8286,7 +8644,7 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* peek() const noexcept; + simdjson_inline const uint8_t* peek() const noexcept; /** * Advance to the next token. * @@ -8294,56 +8652,56 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* advance() noexcept; + simdjson_inline const uint8_t* advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char* type) const noexcept; + simdjson_inline void log_value(const char* type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char* type) const noexcept; + simdjson_inline void log_start_value(const char* type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char* type) const noexcept; + simdjson_inline void log_end_value(const char* type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char* error) const noexcept; + simdjson_inline void log_error(const char* error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; }; template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V& visitor) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V& visitor) noexcept { logger::log_start(); // @@ -8468,52 +8826,52 @@ namespace simdjson { } // walk_document() - simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) + simdjson_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) : buf{ _dom_parser.buf }, next_structural{ &_dom_parser.structural_indexes[start_structural_index] }, dom_parser{ _dom_parser } { } - simdjson_really_inline const uint8_t* json_iterator::peek() const noexcept { + simdjson_inline const uint8_t* json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } - simdjson_really_inline const uint8_t* json_iterator::advance() noexcept { + simdjson_inline const uint8_t* json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } - simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural - 1); } - simdjson_really_inline bool json_iterator::at_eof() const noexcept { + simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } - simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } - simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } - simdjson_really_inline void json_iterator::log_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_value(const char* type) const noexcept { logger::log_line(*this, "", type, ""); } - simdjson_really_inline void json_iterator::log_start_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_start_value(const char* type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } - simdjson_really_inline void json_iterator::log_end_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_end_value(const char* type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } - simdjson_really_inline void json_iterator::log_error(const char* error) const noexcept { + simdjson_inline void json_iterator::log_error(const char* error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -8529,7 +8887,7 @@ namespace simdjson { } } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -8561,18 +8919,18 @@ namespace simdjson { uint64_t* next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -8580,24 +8938,24 @@ namespace simdjson { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -8605,50 +8963,50 @@ namespace simdjson { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer - simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } - simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ - simdjson_really_inline void tape_writer::append_double(double value) noexcept { + simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } - simdjson_really_inline void tape_writer::skip() noexcept { + simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } - simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::skip_double() noexcept { + simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template - simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } - simdjson_really_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -8665,40 +9023,40 @@ namespace simdjson { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator& iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator& iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator& iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator& iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator& iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator& iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator& iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator& iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -8707,22 +9065,22 @@ namespace simdjson { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator& iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -8730,18 +9088,18 @@ namespace simdjson { /** Next write location in the string buf for stage 2 parsing */ uint8_t* current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document& doc) noexcept; + simdjson_inline tape_builder(dom::document& doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; - simdjson_really_inline void start_container(json_iterator& iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t* on_start_string(json_iterator& iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t* dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; + simdjson_inline void start_container(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t* on_start_string(json_iterator& iter) noexcept; + simdjson_inline void on_end_string(uint8_t* dst) noexcept; }; // class tape_builder template - simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept { dom_parser.doc = &doc; @@ -8750,59 +9108,59 @@ namespace simdjson { return iter.walk_document(builder); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_root_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { return visit_string(iter, key, true); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } - simdjson_really_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} + simdjson_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t* dst = on_start_string(iter); - dst = stringparsing::parse_string(value + 1, dst); + dst = stringparsing::parse_string(value + 1, dst, false); // We do not allow replacement when the escape characters are invalid. if (dst == nullptr) { iter.log_error("Invalid escape in string"); return STRING_ERROR; @@ -8811,16 +9169,16 @@ namespace simdjson { return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { return visit_string(iter, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -8842,42 +9200,42 @@ namespace simdjson { return error; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -8886,24 +9244,24 @@ namespace simdjson { // private: - simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { + simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index + 2, start); tape.append(start_index, end); return SUCCESS; } - simdjson_really_inline void tape_builder::start_container(json_iterator& iter) noexcept { + simdjson_inline void tape_builder::start_container(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -8916,13 +9274,13 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { + simdjson_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } - simdjson_really_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { + simdjson_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -8948,7 +9306,7 @@ namespace simdjson { namespace { namespace stage1 { - simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } return find_escaped_branchless(backslash); } @@ -8978,8 +9336,12 @@ namespace simdjson { return stage2::tape_builder::parse_document(*this, _doc); } - simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst) const noexcept { - return icelake::stringparsing::parse_string(src, dst); + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst, bool replacement_char) const noexcept { + return icelake::stringparsing::parse_string(src, dst, replacement_char); + } + + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept { + return icelake::stringparsing::parse_wobbly_string(src, dst); } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t* _buf, size_t _len, dom::document& _doc) noexcept { @@ -9047,25 +9409,25 @@ namespace simdjson { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); // ASCII white-space ('\r','\n','\t',' ') - simdjson_really_inline uint64_t whitespace() const noexcept; + simdjson_inline uint64_t whitespace() const noexcept; // non-quote structural characters (comma, colon, braces, brackets) - simdjson_really_inline uint64_t op() const noexcept; + simdjson_inline uint64_t op() const noexcept; // neither a structural character nor a white-space, so letters, numbers and quotes - simdjson_really_inline uint64_t scalar() const noexcept; + simdjson_inline uint64_t scalar() const noexcept; uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) }; - simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; } - simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } + simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; } + simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } // This identifies structural characters (comma, colon, braces, brackets), // and ASCII white-space ('\r','\n','\t',' '). - simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // we can't use the generic lookup_16. const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); @@ -9118,21 +9480,21 @@ namespace simdjson { return { whitespace, op }; } - simdjson_really_inline bool is_ascii(const simd8x64& input) { + simdjson_inline bool is_ascii(const simd8x64& input) { return input.reduce_or().is_ascii(); } - simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u - 1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u - 1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); } - simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u - 1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_third_byte | is_fourth_byte) > int8_t(0); } @@ -9149,7 +9511,7 @@ namespace simdjson { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -9157,20 +9519,20 @@ namespace simdjson { // Bit 5 = Overlong 2-byte // Bit 7 = Two Continuations constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ + // 11______ 11______ constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; // 11110101 1000____ // 1111011_ 1000____ @@ -9191,7 +9553,7 @@ namespace simdjson { TOO_SHORT | OVERLONG_3 | SURROGATE, // 1111____ ________ TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); + ); constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( // ____0000 ________ @@ -9220,7 +9582,7 @@ namespace simdjson { CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, CARRY | TOO_LARGE | TOO_LARGE_1000, CARRY | TOO_LARGE | TOO_LARGE_1000 - ); + ); const simd8 byte_2_high = input.shr<4>().lookup_16( // ________ 0_______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, @@ -9236,10 +9598,10 @@ namespace simdjson { // ________ 11______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); + ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -9252,7 +9614,7 @@ namespace simdjson { // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -9264,14 +9626,14 @@ namespace simdjson { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #endif const simd8 max_value(&max_array[sizeof(max_array) - sizeof(simd8)]); @@ -9289,7 +9651,7 @@ namespace simdjson { // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -9300,13 +9662,21 @@ namespace simdjson { // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if (simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } @@ -9316,24 +9686,25 @@ namespace simdjson { || (simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); } - else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; +else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } + else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -9361,10 +9732,10 @@ namespace simdjson { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t* _buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t* full_block() const; + simdjson_inline buf_block_reader(const uint8_t* _buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t* full_block() const; /** * Get the last block, padded with spaces. * @@ -9374,8 +9745,8 @@ namespace simdjson { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t* dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t* dst) const; + simdjson_inline void advance(); private: const uint8_t* buf; const size_t len; @@ -9414,23 +9785,23 @@ namespace simdjson { } template - simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} + simdjson_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} template - simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + simdjson_inline size_t buf_block_reader::block_index() { return idx; } template - simdjson_really_inline bool buf_block_reader::has_full_block() const { + simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template - simdjson_really_inline const uint8_t* buf_block_reader::full_block() const { + simdjson_inline const uint8_t* buf_block_reader::full_block() const { return &buf[idx]; } template - simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { + simdjson_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { if (len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -9438,7 +9809,7 @@ namespace simdjson { } template - simdjson_really_inline void buf_block_reader::advance() { + simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -9454,27 +9825,27 @@ namespace simdjson { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -9489,14 +9860,14 @@ namespace simdjson { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -9531,7 +9902,7 @@ namespace simdjson { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // - simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -9556,7 +9927,7 @@ namespace simdjson { // // Backslash sequences outside of quotes will be detected in stage 2. // - simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -9589,7 +9960,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_string_scanner::finish() { + simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -9627,25 +9998,25 @@ namespace simdjson { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -9660,12 +10031,12 @@ namespace simdjson { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -9676,7 +10047,7 @@ namespace simdjson { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -9700,10 +10071,10 @@ namespace simdjson { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -9720,13 +10091,13 @@ namespace simdjson { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // - simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { + simdjson_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } - simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -9751,7 +10122,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_scanner::finish() { + simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -9777,23 +10148,23 @@ namespace simdjson { static error_code minify(const uint8_t* buf, size_t len, uint8_t* dst, size_t& dst_len) noexcept; private: - simdjson_really_inline json_minifier(uint8_t* _dst) + simdjson_inline json_minifier(uint8_t* _dst) : dst{ _dst } {} template - simdjson_really_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t* dst_start, size_t& dst_len); + simdjson_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t* dst_start, size_t& dst_len); json_scanner scanner{}; uint8_t* dst; }; - simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } - simdjson_really_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { + simdjson_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -9801,7 +10172,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf + 64); json_block block_1 = scanner.next(in_1); @@ -9812,7 +10183,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -9885,7 +10256,7 @@ namespace simdjson { * complete document, therefore the last json buffer location is the end of the * batch. */ - simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { + simdjson_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { // Variant: do not count separately, just figure out depth if (parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -9960,7 +10331,7 @@ namespace simdjson { public: uint32_t* tail; - simdjson_really_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -9971,15 +10342,15 @@ namespace simdjson { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. if (bits == 0) return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) +#if SIMDJSON_PREFER_REVERSE_BITS /** * ARM lacks a fast trailing zero instruction, but it has a fast * bit reversal instruction and a fast leading zero instruction. @@ -10082,11 +10453,11 @@ namespace simdjson { static error_code index(const uint8_t* buf, size_t len, dom_parser_implementation& parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t* structural_indexes); + simdjson_inline json_structural_indexer(uint32_t* structural_indexes); template - simdjson_really_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -10095,26 +10466,26 @@ namespace simdjson { uint64_t unescaped_chars_error = 0; }; - simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} + simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} // Skip the last character if it is partial - simdjson_really_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { + simdjson_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len - 3] >= 0b11110000) { return len - 3; } // 4-byte characters with only 3 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len - 3] >= 0xf0) { return len - 3; } // 4-byte characters with only 3 bytes left return len; } @@ -10163,7 +10534,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block + 64); json_block block_1 = scanner.next(in_1); @@ -10174,22 +10545,24 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } - simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION checker.check_next_input(in); +#endif indexer.write(uint32_t(idx - 64), prev_structurals); // Output *last* iteration's structurals to the parser prev_structurals = block.structural_start(); unescaped_chars_error |= block.non_quote_inside_string(unescaped); } - simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { + simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { // Write out the final iteration's structurals indexer.write(uint32_t(idx - 64), prev_structurals); error_code error = scanner.finish(); @@ -10380,39 +10753,100 @@ namespace simdjson { // return true if the unicode codepoint was valid // We work in little-endian then swap at write time simdjson_warn_unused - simdjson_really_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, - uint8_t** dst_ptr) { + simdjson_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, + uint8_t** dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; - // check for low surrogate for characters outside the Basic + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; + } + else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; + } + else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); *dst_ptr += offset; return offset > 0; } + + // handle a unicode codepoint using the wobbly convention + // https://simonsapin.github.io/wtf-8/ + // write appropriate values into dest + // src will advance 6 bytes or 12 bytes + // dest will advance a variable amount (return via pointer) + // return true if the unicode codepoint was valid + // We work in little-endian then swap at write time + simdjson_warn_unused + simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t** src_ptr, + uint8_t** dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; + } + + /** * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output @@ -10421,7 +10855,7 @@ namespace simdjson { * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ - simdjson_warn_unused simdjson_really_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst) { + simdjson_warn_unused simdjson_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. auto bs_quote = backslash_and_quote::copy_and_find(src, dst); @@ -10440,7 +10874,56 @@ namespace simdjson { within the unicode codepoint handling code. */ src += bs_dist; dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } + else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } + else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; + } + + simdjson_warn_unused simdjson_inline uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { return nullptr; } } @@ -10499,7 +10982,7 @@ namespace simdjson { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } @@ -10509,7 +10992,7 @@ namespace simdjson { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -10518,7 +11001,7 @@ namespace simdjson { } } - simdjson_unused static simdjson_really_inline void log_string(const char* message) { + simdjson_unused static simdjson_inline void log_string(const char* message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -10526,7 +11009,7 @@ namespace simdjson { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { + static simdjson_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth * 2, "", title_prefix, LOG_EVENT_LEN - log_depth * 2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural - 1; @@ -10605,14 +11088,14 @@ namespace simdjson { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V& visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V& visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -10621,7 +11104,7 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* peek() const noexcept; + simdjson_inline const uint8_t* peek() const noexcept; /** * Advance to the next token. * @@ -10629,56 +11112,56 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* advance() noexcept; + simdjson_inline const uint8_t* advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char* type) const noexcept; + simdjson_inline void log_value(const char* type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char* type) const noexcept; + simdjson_inline void log_start_value(const char* type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char* type) const noexcept; + simdjson_inline void log_end_value(const char* type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char* error) const noexcept; + simdjson_inline void log_error(const char* error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; }; template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V& visitor) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V& visitor) noexcept { logger::log_start(); // @@ -10803,52 +11286,52 @@ namespace simdjson { } // walk_document() - simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) + simdjson_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) : buf{ _dom_parser.buf }, next_structural{ &_dom_parser.structural_indexes[start_structural_index] }, dom_parser{ _dom_parser } { } - simdjson_really_inline const uint8_t* json_iterator::peek() const noexcept { + simdjson_inline const uint8_t* json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } - simdjson_really_inline const uint8_t* json_iterator::advance() noexcept { + simdjson_inline const uint8_t* json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } - simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural - 1); } - simdjson_really_inline bool json_iterator::at_eof() const noexcept { + simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } - simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } - simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } - simdjson_really_inline void json_iterator::log_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_value(const char* type) const noexcept { logger::log_line(*this, "", type, ""); } - simdjson_really_inline void json_iterator::log_start_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_start_value(const char* type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } - simdjson_really_inline void json_iterator::log_end_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_end_value(const char* type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } - simdjson_really_inline void json_iterator::log_error(const char* error) const noexcept { + simdjson_inline void json_iterator::log_error(const char* error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -10864,7 +11347,7 @@ namespace simdjson { } } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -10896,18 +11379,18 @@ namespace simdjson { uint64_t* next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -10915,24 +11398,24 @@ namespace simdjson { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -10940,50 +11423,50 @@ namespace simdjson { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer - simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } - simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ - simdjson_really_inline void tape_writer::append_double(double value) noexcept { + simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } - simdjson_really_inline void tape_writer::skip() noexcept { + simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } - simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::skip_double() noexcept { + simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template - simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } - simdjson_really_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -11000,40 +11483,40 @@ namespace simdjson { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator& iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator& iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator& iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator& iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator& iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator& iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator& iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator& iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -11042,22 +11525,22 @@ namespace simdjson { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator& iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -11065,18 +11548,18 @@ namespace simdjson { /** Next write location in the string buf for stage 2 parsing */ uint8_t* current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document& doc) noexcept; + simdjson_inline tape_builder(dom::document& doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; - simdjson_really_inline void start_container(json_iterator& iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t* on_start_string(json_iterator& iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t* dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; + simdjson_inline void start_container(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t* on_start_string(json_iterator& iter) noexcept; + simdjson_inline void on_end_string(uint8_t* dst) noexcept; }; // class tape_builder template - simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept { dom_parser.doc = &doc; @@ -11085,59 +11568,59 @@ namespace simdjson { return iter.walk_document(builder); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_root_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { return visit_string(iter, key, true); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } - simdjson_really_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} + simdjson_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t* dst = on_start_string(iter); - dst = stringparsing::parse_string(value + 1, dst); + dst = stringparsing::parse_string(value + 1, dst, false); // We do not allow replacement when the escape characters are invalid. if (dst == nullptr) { iter.log_error("Invalid escape in string"); return STRING_ERROR; @@ -11146,16 +11629,16 @@ namespace simdjson { return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { return visit_string(iter, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -11177,42 +11660,42 @@ namespace simdjson { return error; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -11221,24 +11704,24 @@ namespace simdjson { // private: - simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { + simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index + 2, start); tape.append(start_index, end); return SUCCESS; } - simdjson_really_inline void tape_builder::start_container(json_iterator& iter) noexcept { + simdjson_inline void tape_builder::start_container(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -11251,13 +11734,13 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { + simdjson_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } - simdjson_really_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { + simdjson_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -11283,7 +11766,7 @@ namespace simdjson { namespace { namespace stage1 { - simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } return find_escaped_branchless(backslash); } @@ -11313,8 +11796,12 @@ namespace simdjson { return stage2::tape_builder::parse_document(*this, _doc); } - simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst) const noexcept { - return haswell::stringparsing::parse_string(src, dst); + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst, bool replacement_char) const noexcept { + return haswell::stringparsing::parse_string(src, dst, replacement_char); + } + + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept { + return haswell::stringparsing::parse_wobbly_string(src, dst); } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t* _buf, size_t _len, dom::document& _doc) noexcept { @@ -11377,17 +11864,17 @@ namespace simdjson { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); - simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t op() const noexcept { return _op; } - simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } uint64_t _whitespace; uint64_t _op; }; - simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); @@ -11403,34 +11890,34 @@ namespace simdjson { v.chunks[1].any_bits_set(0x7), v.chunks[2].any_bits_set(0x7), v.chunks[3].any_bits_set(0x7) - ).to_bitmask(); + ).to_bitmask(); uint64_t whitespace = simd8x64( v.chunks[0].any_bits_set(0x18), v.chunks[1].any_bits_set(0x18), v.chunks[2].any_bits_set(0x18), v.chunks[3].any_bits_set(0x18) - ).to_bitmask(); + ).to_bitmask(); return { whitespace, op }; } - simdjson_really_inline bool is_ascii(const simd8x64& input) { + simdjson_inline bool is_ascii(const simd8x64& input) { // careful: 0x80 is not ascii. - return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere(); + return input.reduce_or().saturating_sub(0x7fu).bits_not_set_anywhere(); } - simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u - 1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u - 1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); } - simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u - 1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_third_byte | is_fourth_byte) > int8_t(0); } @@ -11447,7 +11934,7 @@ namespace simdjson { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -11455,20 +11942,20 @@ namespace simdjson { // Bit 5 = Overlong 2-byte // Bit 7 = Two Continuations constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ + // 11______ 11______ constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; // 11110101 1000____ // 1111011_ 1000____ @@ -11489,7 +11976,7 @@ namespace simdjson { TOO_SHORT | OVERLONG_3 | SURROGATE, // 1111____ ________ TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); + ); constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( // ____0000 ________ @@ -11518,7 +12005,7 @@ namespace simdjson { CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, CARRY | TOO_LARGE | TOO_LARGE_1000, CARRY | TOO_LARGE | TOO_LARGE_1000 - ); + ); const simd8 byte_2_high = input.shr<4>().lookup_16( // ________ 0_______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, @@ -11534,10 +12021,10 @@ namespace simdjson { // ________ 11______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); + ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -11550,7 +12037,7 @@ namespace simdjson { // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -11562,14 +12049,14 @@ namespace simdjson { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #endif const simd8 max_value(&max_array[sizeof(max_array) - sizeof(simd8)]); @@ -11587,7 +12074,7 @@ namespace simdjson { // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -11598,13 +12085,21 @@ namespace simdjson { // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if (simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } @@ -11614,24 +12109,25 @@ namespace simdjson { || (simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); } - else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; +else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } + else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -11659,10 +12155,10 @@ namespace simdjson { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t* _buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t* full_block() const; + simdjson_inline buf_block_reader(const uint8_t* _buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t* full_block() const; /** * Get the last block, padded with spaces. * @@ -11672,8 +12168,8 @@ namespace simdjson { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t* dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t* dst) const; + simdjson_inline void advance(); private: const uint8_t* buf; const size_t len; @@ -11712,23 +12208,23 @@ namespace simdjson { } template - simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} + simdjson_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} template - simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + simdjson_inline size_t buf_block_reader::block_index() { return idx; } template - simdjson_really_inline bool buf_block_reader::has_full_block() const { + simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template - simdjson_really_inline const uint8_t* buf_block_reader::full_block() const { + simdjson_inline const uint8_t* buf_block_reader::full_block() const { return &buf[idx]; } template - simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { + simdjson_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { if (len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -11736,7 +12232,7 @@ namespace simdjson { } template - simdjson_really_inline void buf_block_reader::advance() { + simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -11752,27 +12248,27 @@ namespace simdjson { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -11787,14 +12283,14 @@ namespace simdjson { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -11829,7 +12325,7 @@ namespace simdjson { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // - simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -11854,7 +12350,7 @@ namespace simdjson { // // Backslash sequences outside of quotes will be detected in stage 2. // - simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -11887,7 +12383,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_string_scanner::finish() { + simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -11925,25 +12421,25 @@ namespace simdjson { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -11958,12 +12454,12 @@ namespace simdjson { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -11974,7 +12470,7 @@ namespace simdjson { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -11998,10 +12494,10 @@ namespace simdjson { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -12018,13 +12514,13 @@ namespace simdjson { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // - simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { + simdjson_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } - simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -12049,7 +12545,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_scanner::finish() { + simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -12075,23 +12571,23 @@ namespace simdjson { static error_code minify(const uint8_t* buf, size_t len, uint8_t* dst, size_t& dst_len) noexcept; private: - simdjson_really_inline json_minifier(uint8_t* _dst) + simdjson_inline json_minifier(uint8_t* _dst) : dst{ _dst } {} template - simdjson_really_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t* dst_start, size_t& dst_len); + simdjson_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t* dst_start, size_t& dst_len); json_scanner scanner{}; uint8_t* dst; }; - simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } - simdjson_really_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { + simdjson_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -12099,7 +12595,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf + 64); json_block block_1 = scanner.next(in_1); @@ -12110,7 +12606,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -12183,7 +12679,7 @@ namespace simdjson { * complete document, therefore the last json buffer location is the end of the * batch. */ - simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { + simdjson_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { // Variant: do not count separately, just figure out depth if (parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -12258,7 +12754,7 @@ namespace simdjson { public: uint32_t* tail; - simdjson_really_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -12269,15 +12765,15 @@ namespace simdjson { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. if (bits == 0) return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) +#if SIMDJSON_PREFER_REVERSE_BITS /** * ARM lacks a fast trailing zero instruction, but it has a fast * bit reversal instruction and a fast leading zero instruction. @@ -12380,11 +12876,11 @@ namespace simdjson { static error_code index(const uint8_t* buf, size_t len, dom_parser_implementation& parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t* structural_indexes); + simdjson_inline json_structural_indexer(uint32_t* structural_indexes); template - simdjson_really_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -12393,26 +12889,26 @@ namespace simdjson { uint64_t unescaped_chars_error = 0; }; - simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} + simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} // Skip the last character if it is partial - simdjson_really_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { + simdjson_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len - 3] >= 0b11110000) { return len - 3; } // 4-byte characters with only 3 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len - 3] >= 0xf0) { return len - 3; } // 4-byte characters with only 3 bytes left return len; } @@ -12461,7 +12957,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block + 64); json_block block_1 = scanner.next(in_1); @@ -12472,22 +12968,24 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } - simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION checker.check_next_input(in); +#endif indexer.write(uint32_t(idx - 64), prev_structurals); // Output *last* iteration's structurals to the parser prev_structurals = block.structural_start(); unescaped_chars_error |= block.non_quote_inside_string(unescaped); } - simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { + simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { // Write out the final iteration's structurals indexer.write(uint32_t(idx - 64), prev_structurals); error_code error = scanner.finish(); @@ -12678,39 +13176,100 @@ namespace simdjson { // return true if the unicode codepoint was valid // We work in little-endian then swap at write time simdjson_warn_unused - simdjson_really_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, - uint8_t** dst_ptr) { + simdjson_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, + uint8_t** dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; - // check for low surrogate for characters outside the Basic + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; + } + else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; + } + else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); *dst_ptr += offset; return offset > 0; } + + // handle a unicode codepoint using the wobbly convention + // https://simonsapin.github.io/wtf-8/ + // write appropriate values into dest + // src will advance 6 bytes or 12 bytes + // dest will advance a variable amount (return via pointer) + // return true if the unicode codepoint was valid + // We work in little-endian then swap at write time + simdjson_warn_unused + simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t** src_ptr, + uint8_t** dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; + } + + /** * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output @@ -12719,7 +13278,7 @@ namespace simdjson { * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ - simdjson_warn_unused simdjson_really_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst) { + simdjson_warn_unused simdjson_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. auto bs_quote = backslash_and_quote::copy_and_find(src, dst); @@ -12738,7 +13297,56 @@ namespace simdjson { within the unicode codepoint handling code. */ src += bs_dist; dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } + else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } + else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; + } + + simdjson_warn_unused simdjson_inline uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { return nullptr; } } @@ -12797,7 +13405,7 @@ namespace simdjson { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } @@ -12807,7 +13415,7 @@ namespace simdjson { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -12816,7 +13424,7 @@ namespace simdjson { } } - simdjson_unused static simdjson_really_inline void log_string(const char* message) { + simdjson_unused static simdjson_inline void log_string(const char* message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -12824,7 +13432,7 @@ namespace simdjson { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { + static simdjson_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth * 2, "", title_prefix, LOG_EVENT_LEN - log_depth * 2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural - 1; @@ -12903,14 +13511,14 @@ namespace simdjson { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V& visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V& visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -12919,7 +13527,7 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* peek() const noexcept; + simdjson_inline const uint8_t* peek() const noexcept; /** * Advance to the next token. * @@ -12927,56 +13535,56 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* advance() noexcept; + simdjson_inline const uint8_t* advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char* type) const noexcept; + simdjson_inline void log_value(const char* type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char* type) const noexcept; + simdjson_inline void log_start_value(const char* type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char* type) const noexcept; + simdjson_inline void log_end_value(const char* type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char* error) const noexcept; + simdjson_inline void log_error(const char* error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; }; template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V& visitor) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V& visitor) noexcept { logger::log_start(); // @@ -13101,52 +13709,52 @@ namespace simdjson { } // walk_document() - simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) + simdjson_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) : buf{ _dom_parser.buf }, next_structural{ &_dom_parser.structural_indexes[start_structural_index] }, dom_parser{ _dom_parser } { } - simdjson_really_inline const uint8_t* json_iterator::peek() const noexcept { + simdjson_inline const uint8_t* json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } - simdjson_really_inline const uint8_t* json_iterator::advance() noexcept { + simdjson_inline const uint8_t* json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } - simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural - 1); } - simdjson_really_inline bool json_iterator::at_eof() const noexcept { + simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } - simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } - simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } - simdjson_really_inline void json_iterator::log_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_value(const char* type) const noexcept { logger::log_line(*this, "", type, ""); } - simdjson_really_inline void json_iterator::log_start_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_start_value(const char* type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } - simdjson_really_inline void json_iterator::log_end_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_end_value(const char* type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } - simdjson_really_inline void json_iterator::log_error(const char* error) const noexcept { + simdjson_inline void json_iterator::log_error(const char* error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -13162,7 +13770,7 @@ namespace simdjson { } } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -13194,18 +13802,18 @@ namespace simdjson { uint64_t* next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -13213,24 +13821,24 @@ namespace simdjson { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -13238,50 +13846,50 @@ namespace simdjson { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer - simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } - simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ - simdjson_really_inline void tape_writer::append_double(double value) noexcept { + simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } - simdjson_really_inline void tape_writer::skip() noexcept { + simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } - simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::skip_double() noexcept { + simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template - simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } - simdjson_really_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -13298,40 +13906,40 @@ namespace simdjson { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator& iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator& iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator& iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator& iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator& iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator& iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator& iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator& iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -13340,22 +13948,22 @@ namespace simdjson { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator& iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -13363,18 +13971,18 @@ namespace simdjson { /** Next write location in the string buf for stage 2 parsing */ uint8_t* current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document& doc) noexcept; + simdjson_inline tape_builder(dom::document& doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; - simdjson_really_inline void start_container(json_iterator& iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t* on_start_string(json_iterator& iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t* dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; + simdjson_inline void start_container(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t* on_start_string(json_iterator& iter) noexcept; + simdjson_inline void on_end_string(uint8_t* dst) noexcept; }; // class tape_builder template - simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept { dom_parser.doc = &doc; @@ -13383,59 +13991,59 @@ namespace simdjson { return iter.walk_document(builder); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_root_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { return visit_string(iter, key, true); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } - simdjson_really_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} + simdjson_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t* dst = on_start_string(iter); - dst = stringparsing::parse_string(value + 1, dst); + dst = stringparsing::parse_string(value + 1, dst, false); // We do not allow replacement when the escape characters are invalid. if (dst == nullptr) { iter.log_error("Invalid escape in string"); return STRING_ERROR; @@ -13444,16 +14052,16 @@ namespace simdjson { return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { return visit_string(iter, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -13475,42 +14083,42 @@ namespace simdjson { return error; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -13519,24 +14127,24 @@ namespace simdjson { // private: - simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { + simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index + 2, start); tape.append(start_index, end); return SUCCESS; } - simdjson_really_inline void tape_builder::start_container(json_iterator& iter) noexcept { + simdjson_inline void tape_builder::start_container(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -13549,13 +14157,13 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { + simdjson_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } - simdjson_really_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { + simdjson_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -13581,7 +14189,7 @@ namespace simdjson { namespace { namespace stage1 { - simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { // On PPC, we don't short-circuit this if there are no backslashes, because the branch gives us no // benefit and therefore makes things worse. // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } @@ -13613,8 +14221,12 @@ namespace simdjson { return stage2::tape_builder::parse_document(*this, _doc); } - simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst) const noexcept { - return ppc64::stringparsing::parse_string(src, dst); + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst, bool replacement_char) const noexcept { + return ppc64::stringparsing::parse_string(src, dst, replacement_char); + } + + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept { + return ppc64::stringparsing::parse_wobbly_string(src, dst); } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t* _buf, size_t _len, dom::document& _doc) noexcept { @@ -13680,17 +14292,17 @@ namespace simdjson { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); - simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t op() const noexcept { return _op; } - simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } uint64_t _whitespace; uint64_t _op; }; - simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // we can't use the generic lookup_16. auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); @@ -13749,21 +14361,21 @@ namespace simdjson { return { whitespace, op }; } - simdjson_really_inline bool is_ascii(const simd8x64& input) { + simdjson_inline bool is_ascii(const simd8x64& input) { return input.reduce_or().is_ascii(); } - simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u - 1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u - 1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); } - simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u - 1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_third_byte | is_fourth_byte) > int8_t(0); } @@ -13780,7 +14392,7 @@ namespace simdjson { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -13788,20 +14400,20 @@ namespace simdjson { // Bit 5 = Overlong 2-byte // Bit 7 = Two Continuations constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ + // 11______ 11______ constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; // 11110101 1000____ // 1111011_ 1000____ @@ -13822,7 +14434,7 @@ namespace simdjson { TOO_SHORT | OVERLONG_3 | SURROGATE, // 1111____ ________ TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); + ); constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( // ____0000 ________ @@ -13851,7 +14463,7 @@ namespace simdjson { CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, CARRY | TOO_LARGE | TOO_LARGE_1000, CARRY | TOO_LARGE | TOO_LARGE_1000 - ); + ); const simd8 byte_2_high = input.shr<4>().lookup_16( // ________ 0_______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, @@ -13867,10 +14479,10 @@ namespace simdjson { // ________ 11______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); + ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -13883,7 +14495,7 @@ namespace simdjson { // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -13895,14 +14507,14 @@ namespace simdjson { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u - 1, 0b11100000u - 1, 0b11000000u - 1 + 255, 255, 255, 255, 255, 0xf0u - 1, 0xe0u - 1, 0xc0u - 1 }; #endif const simd8 max_value(&max_array[sizeof(max_array) - sizeof(simd8)]); @@ -13920,7 +14532,7 @@ namespace simdjson { // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -13931,13 +14543,21 @@ namespace simdjson { // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if (simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } @@ -13947,24 +14567,25 @@ namespace simdjson { || (simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); } - else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; +else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } + else SIMDJSON_IF_CONSTEXPR(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -13992,10 +14613,10 @@ namespace simdjson { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t* _buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t* full_block() const; + simdjson_inline buf_block_reader(const uint8_t* _buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t* full_block() const; /** * Get the last block, padded with spaces. * @@ -14005,8 +14626,8 @@ namespace simdjson { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t* dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t* dst) const; + simdjson_inline void advance(); private: const uint8_t* buf; const size_t len; @@ -14045,23 +14666,23 @@ namespace simdjson { } template - simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} + simdjson_inline buf_block_reader::buf_block_reader(const uint8_t* _buf, size_t _len) : buf{ _buf }, len{ _len }, lenminusstep{ len < STEP_SIZE ? 0 : len - STEP_SIZE }, idx{ 0 } {} template - simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + simdjson_inline size_t buf_block_reader::block_index() { return idx; } template - simdjson_really_inline bool buf_block_reader::has_full_block() const { + simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template - simdjson_really_inline const uint8_t* buf_block_reader::full_block() const { + simdjson_inline const uint8_t* buf_block_reader::full_block() const { return &buf[idx]; } template - simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { + simdjson_inline size_t buf_block_reader::get_remainder(uint8_t* dst) const { if (len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -14069,7 +14690,7 @@ namespace simdjson { } template - simdjson_really_inline void buf_block_reader::advance() { + simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -14085,27 +14706,27 @@ namespace simdjson { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -14120,14 +14741,14 @@ namespace simdjson { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -14162,7 +14783,7 @@ namespace simdjson { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // - simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -14187,7 +14808,7 @@ namespace simdjson { // // Backslash sequences outside of quotes will be detected in stage 2. // - simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -14220,7 +14841,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_string_scanner::finish() { + simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -14258,25 +14879,25 @@ namespace simdjson { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -14291,12 +14912,12 @@ namespace simdjson { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -14307,7 +14928,7 @@ namespace simdjson { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -14331,10 +14952,10 @@ namespace simdjson { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -14351,13 +14972,13 @@ namespace simdjson { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // - simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { + simdjson_inline uint64_t follows(const uint64_t match, uint64_t& overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } - simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -14382,7 +15003,7 @@ namespace simdjson { ); } - simdjson_really_inline error_code json_scanner::finish() { + simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -14408,23 +15029,23 @@ namespace simdjson { static error_code minify(const uint8_t* buf, size_t len, uint8_t* dst, size_t& dst_len) noexcept; private: - simdjson_really_inline json_minifier(uint8_t* _dst) + simdjson_inline json_minifier(uint8_t* _dst) : dst{ _dst } {} template - simdjson_really_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t* dst_start, size_t& dst_len); + simdjson_inline void step(const uint8_t* block_buf, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t* dst_start, size_t& dst_len); json_scanner scanner{}; uint8_t* dst; }; - simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } - simdjson_really_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { + simdjson_inline error_code json_minifier::finish(uint8_t* dst_start, size_t& dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -14432,7 +15053,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_minifier::step<128>(const uint8_t* block_buf, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf + 64); json_block block_1 = scanner.next(in_1); @@ -14443,7 +15064,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_minifier::step<64>(const uint8_t* block_buf, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -14516,7 +15137,7 @@ namespace simdjson { * complete document, therefore the last json buffer location is the end of the * batch. */ - simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { + simdjson_inline uint32_t find_next_document_index(dom_parser_implementation& parser) { // Variant: do not count separately, just figure out depth if (parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -14591,7 +15212,7 @@ namespace simdjson { public: uint32_t* tail; - simdjson_really_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t* index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -14602,15 +15223,15 @@ namespace simdjson { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. if (bits == 0) return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) +#if SIMDJSON_PREFER_REVERSE_BITS /** * ARM lacks a fast trailing zero instruction, but it has a fast * bit reversal instruction and a fast leading zero instruction. @@ -14713,11 +15334,11 @@ namespace simdjson { static error_code index(const uint8_t* buf, size_t len, dom_parser_implementation& parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t* structural_indexes); + simdjson_inline json_structural_indexer(uint32_t* structural_indexes); template - simdjson_really_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t* block, buf_block_reader& reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -14726,26 +15347,26 @@ namespace simdjson { uint64_t unescaped_chars_error = 0; }; - simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} + simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t* structural_indexes) : indexer{ structural_indexes } {} // Skip the last character if it is partial - simdjson_really_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { + simdjson_inline size_t trim_partial_utf8(const uint8_t* buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len - 1] >= 0b11000000) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len - 2] >= 0b11100000) { return len - 2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len - 3] >= 0b11110000) { return len - 3; } // 4-byte characters with only 3 bytes left + if (buf[len - 1] >= 0xc0) { return len - 1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len - 2] >= 0xe0) { return len - 2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len - 3] >= 0xf0) { return len - 3; } // 4-byte characters with only 3 bytes left return len; } @@ -14794,7 +15415,7 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<128>(const uint8_t* block, buf_block_reader<128>& reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block + 64); json_block block_1 = scanner.next(in_1); @@ -14805,22 +15426,24 @@ namespace simdjson { } template<> - simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { + simdjson_inline void json_structural_indexer::step<64>(const uint8_t* block, buf_block_reader<64>& reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } - simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION checker.check_next_input(in); +#endif indexer.write(uint32_t(idx - 64), prev_structurals); // Output *last* iteration's structurals to the parser prev_structurals = block.structural_start(); unescaped_chars_error |= block.non_quote_inside_string(unescaped); } - simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { + simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation& parser, size_t idx, size_t len, stage1_mode partial) { // Write out the final iteration's structurals indexer.write(uint32_t(idx - 64), prev_structurals); error_code error = scanner.finish(); @@ -15011,39 +15634,100 @@ namespace simdjson { // return true if the unicode codepoint was valid // We work in little-endian then swap at write time simdjson_warn_unused - simdjson_really_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, - uint8_t** dst_ptr) { + simdjson_inline bool handle_unicode_codepoint(const uint8_t** src_ptr, + uint8_t** dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; - // check for low surrogate for characters outside the Basic + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if (!allow_replacement) { return false; } + code_point = substitution_code_point; + } + else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; + } + else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if (!allow_replacement) { return false; } + code_point = substitution_code_point; } size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); *dst_ptr += offset; return offset > 0; } + + // handle a unicode codepoint using the wobbly convention + // https://simonsapin.github.io/wtf-8/ + // write appropriate values into dest + // src will advance 6 bytes or 12 bytes + // dest will advance a variable amount (return via pointer) + // return true if the unicode codepoint was valid + // We work in little-endian then swap at write time + simdjson_warn_unused + simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t** src_ptr, + uint8_t** dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t* src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; + } + + /** * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output @@ -15052,7 +15736,7 @@ namespace simdjson { * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ - simdjson_warn_unused simdjson_really_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst) { + simdjson_warn_unused simdjson_inline uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. auto bs_quote = backslash_and_quote::copy_and_find(src, dst); @@ -15071,7 +15755,56 @@ namespace simdjson { within the unicode codepoint handling code. */ src += bs_dist; dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } + else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } + else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; + } + + simdjson_warn_unused simdjson_inline uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { return nullptr; } } @@ -15130,7 +15863,7 @@ namespace simdjson { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } @@ -15140,7 +15873,7 @@ namespace simdjson { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -15149,7 +15882,7 @@ namespace simdjson { } } - simdjson_unused static simdjson_really_inline void log_string(const char* message) { + simdjson_unused static simdjson_inline void log_string(const char* message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -15157,7 +15890,7 @@ namespace simdjson { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { + static simdjson_inline void log_line(S& structurals, const char* title_prefix, const char* title, const char* detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth * 2, "", title_prefix, LOG_EVENT_LEN - log_depth * 2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural - 1; @@ -15236,14 +15969,14 @@ namespace simdjson { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V& visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V& visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -15252,7 +15985,7 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* peek() const noexcept; + simdjson_inline const uint8_t* peek() const noexcept; /** * Advance to the next token. * @@ -15260,56 +15993,56 @@ namespace simdjson { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t* advance() noexcept; + simdjson_inline const uint8_t* advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char* type) const noexcept; + simdjson_inline void log_value(const char* type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char* type) const noexcept; + simdjson_inline void log_start_value(const char* type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char* type) const noexcept; + simdjson_inline void log_end_value(const char* type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char* error) const noexcept; + simdjson_inline void log_error(const char* error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V& visitor, const uint8_t* value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V& visitor, const uint8_t* value) noexcept; }; template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V& visitor) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V& visitor) noexcept { logger::log_start(); // @@ -15434,52 +16167,52 @@ namespace simdjson { } // walk_document() - simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) + simdjson_inline json_iterator::json_iterator(dom_parser_implementation& _dom_parser, size_t start_structural_index) : buf{ _dom_parser.buf }, next_structural{ &_dom_parser.structural_indexes[start_structural_index] }, dom_parser{ _dom_parser } { } - simdjson_really_inline const uint8_t* json_iterator::peek() const noexcept { + simdjson_inline const uint8_t* json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } - simdjson_really_inline const uint8_t* json_iterator::advance() noexcept { + simdjson_inline const uint8_t* json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } - simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural - 1); } - simdjson_really_inline bool json_iterator::at_eof() const noexcept { + simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } - simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } - simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } - simdjson_really_inline void json_iterator::log_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_value(const char* type) const noexcept { logger::log_line(*this, "", type, ""); } - simdjson_really_inline void json_iterator::log_start_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_start_value(const char* type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } - simdjson_really_inline void json_iterator::log_end_value(const char* type) const noexcept { + simdjson_inline void json_iterator::log_end_value(const char* type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } - simdjson_really_inline void json_iterator::log_error(const char* error) const noexcept { + simdjson_inline void json_iterator::log_error(const char* error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -15495,7 +16228,7 @@ namespace simdjson { } } template - simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V& visitor, const uint8_t* value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -15527,18 +16260,18 @@ namespace simdjson { uint64_t* next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -15546,24 +16279,24 @@ namespace simdjson { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -15571,50 +16304,50 @@ namespace simdjson { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer - simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } - simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ - simdjson_really_inline void tape_writer::append_double(double value) noexcept { + simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } - simdjson_really_inline void tape_writer::skip() noexcept { + simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } - simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::skip_double() noexcept { + simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } - simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template - simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } - simdjson_really_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { + simdjson_inline void tape_writer::write(uint64_t& tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -15631,40 +16364,40 @@ namespace simdjson { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator& iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator& iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator& iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator& iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator& iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator& iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator& iter, const uint8_t* key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator& iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator& iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator& iter, const uint8_t* value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -15673,22 +16406,22 @@ namespace simdjson { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator& iter, const uint8_t* value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator& iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -15696,18 +16429,18 @@ namespace simdjson { /** Next write location in the string buf for stage 2 parsing */ uint8_t* current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document& doc) noexcept; + simdjson_inline tape_builder(dom::document& doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; - simdjson_really_inline void start_container(json_iterator& iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t* on_start_string(json_iterator& iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t* dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator& iter) const noexcept; + simdjson_inline void start_container(json_iterator& iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t* on_start_string(json_iterator& iter) noexcept; + simdjson_inline void on_end_string(uint8_t* dst) noexcept; }; // class tape_builder template - simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation& dom_parser, dom::document& doc) noexcept { dom_parser.doc = &doc; @@ -15716,59 +16449,59 @@ namespace simdjson { return iter.walk_document(builder); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_root_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator& iter, const uint8_t* value) noexcept { return iter.visit_primitive(*this, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator& iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator& iter) noexcept { start_container(iter); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator& iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator& iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator& iter, const uint8_t* key) noexcept { return visit_string(iter, key, true); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } - simdjson_really_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} + simdjson_inline tape_builder::tape_builder(dom::document& doc) noexcept : tape{ doc.tape.get() }, current_string_buf_loc{ doc.string_buf.get() } {} - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator& iter, const uint8_t* value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t* dst = on_start_string(iter); - dst = stringparsing::parse_string(value + 1, dst); + dst = stringparsing::parse_string(value + 1, dst, false); // We do not allow replacement when the escape characters are invalid. if (dst == nullptr) { iter.log_error("Invalid escape in string"); return STRING_ERROR; @@ -15777,16 +16510,16 @@ namespace simdjson { return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator& iter, const uint8_t* value) noexcept { return visit_string(iter, value); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator& iter, const uint8_t* value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -15808,42 +16541,42 @@ namespace simdjson { return error; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator& iter, const uint8_t* value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -15852,24 +16585,24 @@ namespace simdjson { // private: - simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { + simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator& iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index + 2, start); tape.append(start_index, end); return SUCCESS; } - simdjson_really_inline void tape_builder::start_container(json_iterator& iter) noexcept { + simdjson_inline void tape_builder::start_container(json_iterator& iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } - simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { + simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator& iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -15882,13 +16615,13 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { + simdjson_inline uint8_t* tape_builder::on_start_string(json_iterator& iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } - simdjson_really_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { + simdjson_inline void tape_builder::on_end_string(uint8_t* dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -15915,7 +16648,7 @@ namespace simdjson { namespace { namespace stage1 { - simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } return find_escaped_branchless(backslash); } @@ -15945,8 +16678,12 @@ namespace simdjson { return stage2::tape_builder::parse_document(*this, _doc); } - simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst) const noexcept { - return westmere::stringparsing::parse_string(src, dst); + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_string(const uint8_t* src, uint8_t* dst, bool replacement_char) const noexcept { + return westmere::stringparsing::parse_string(src, dst, replacement_char); + } + + simdjson_warn_unused uint8_t* dom_parser_implementation::parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept { + return westmere::stringparsing::parse_wobbly_string(src, dst); } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t* _buf, size_t _len, dom::document& _doc) noexcept { diff --git a/M6502/HarteTest_6502/simdjson/simdjson.h b/M6502/HarteTest_6502/simdjson/simdjson.h index 903c835..729c988 100644 --- a/M6502/HarteTest_6502/simdjson/simdjson.h +++ b/M6502/HarteTest_6502/simdjson/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2022-06-30 10:29:50 -0400. Do not edit! */ +/* auto-generated on 2023-03-13 21:26:32 -0400. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H @@ -43,22 +43,22 @@ #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION 2.1.0 +#define SIMDJSON_VERSION "3.1.6" namespace simdjson { enum { /** * The major version (MAJOR.minor.revision) of simdjson being used. */ - SIMDJSON_VERSION_MAJOR = 2, + SIMDJSON_VERSION_MAJOR = 3, /** * The minor version (major.MINOR.revision) of simdjson being used. */ - SIMDJSON_VERSION_MINOR = 1, - /** - * The revision (major.minor.REVISION) of simdjson being used. - */ - SIMDJSON_VERSION_REVISION = 0 + SIMDJSON_VERSION_MINOR = 1, + /** + * The revision (major.minor.REVISION) of simdjson being used. + */ + SIMDJSON_VERSION_REVISION = 6 }; } // namespace simdjson @@ -149,7 +149,7 @@ namespace simdjson { #endif // __clang__ #endif // _MSC_VER -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO // https://en.wikipedia.org/wiki/C_alternative_tokens // This header should have no effect, except maybe // under Visual Studio. @@ -176,8 +176,11 @@ namespace simdjson { #endif #endif // defined(__x86_64__) || defined(_M_AMD64) +#ifndef SIMDJSON_IS_32BITS +#define SIMDJSON_IS_32BITS 0 +#endif -#ifdef SIMDJSON_IS_32BITS +#if SIMDJSON_IS_32BITS #ifndef SIMDJSON_NO_PORTABILITY_WARNING #pragma message("The simdjson library is designed \ for 64-bit processors and it seems that you are not \ @@ -208,7 +211,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") // // We are going to use runtime dispatch. -#ifdef SIMDJSON_IS_X86_64 +#if SIMDJSON_IS_X86_64 #ifdef __clang__ // clang does not have GCC push pop // warning: clang attribute push can't be used within a namespace in clang up @@ -262,7 +265,20 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") #define SIMDJSON_NO_SANITIZE_UNDEFINED #endif -#ifdef SIMDJSON_VISUAL_STUDIO + +#if defined(__clang__) || defined(__GNUC__) +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) +#define SIMDJSON_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# endif // if __has_feature(memory_sanitizer) +#endif // defined(__has_feature) +#endif +// make sure it is defined as 'nothing' if it is unapplicable. +#ifndef SIMDJSON_NO_SANITIZE_MEMORY +#define SIMDJSON_NO_SANITIZE_MEMORY +#endif + +#if SIMDJSON_VISUAL_STUDIO // This is one case where we do not distinguish between // regular visual studio and clang under visual studio. // clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) @@ -278,7 +294,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") #ifdef NDEBUG -#ifdef SIMDJSON_VISUAL_STUDIO +#if SIMDJSON_VISUAL_STUDIO #define SIMDJSON_UNREACHABLE() __assume(0) #define SIMDJSON_ASSUME(COND) __assume(COND) #else @@ -329,12 +345,12 @@ namespace simdjson { /** * The amount of padding needed in a buffer to parse JSON. * - * the input buf should be readable up to buf + SIMDJSON_PADDING + * The input buf should be readable up to buf + SIMDJSON_PADDING * this is a stopgap; there should be a better description of the * main loop and its behavior that abstracts over this * See https://github.com/simdjson/simdjson/issues/174 */ - constexpr size_t SIMDJSON_PADDING = 32; + constexpr size_t SIMDJSON_PADDING = 64; /** * By default, simdjson supports this many nested objects and arrays. @@ -362,7 +378,7 @@ namespace simdjson { #define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) +#if SIMDJSON_REGULAR_VISUAL_STUDIO #define simdjson_really_inline __forceinline #define simdjson_never_inline __declspec(noinline) @@ -447,7 +463,7 @@ namespace simdjson { #define SIMDJSON_PRAGMA(P) _Pragma(#P) #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) -#if defined(SIMDJSON_CLANG_VISUAL_STUDIO) +#if SIMDJSON_CLANG_VISUAL_STUDIO #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) #else #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS @@ -460,7 +476,19 @@ namespace simdjson { #endif // MSC_VER -#if defined(SIMDJSON_VISUAL_STUDIO) +#if defined(simdjson_inline) + // Prefer the user's definition of simdjson_inline; don't define it ourselves. +#elif defined(__GNUC__) && !defined(__OPTIMIZE__) + // If optimizations are disabled, forcing inlining can lead to significant + // code bloat and high compile times. Don't use simdjson_really_inline for + // unoptimized builds. +#define simdjson_inline inline +#else + // Force inlining for most simdjson functions. +#define simdjson_inline simdjson_really_inline +#endif + +#if SIMDJSON_VISUAL_STUDIO /** * Windows users need to do some extra work when building * or using a dynamic library (DLL). When building, we need @@ -2287,11 +2315,30 @@ namespace std { /// If EXPR is an error, returns it. #define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } +// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS, +// we want to set it under debug builds. We detect a debug build +// under Visual Studio when the _DEBUG macro is set. Under the other +// compilers, we use the fact that they define __OPTIMIZE__ whenever +// they allow optimizations. +// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS +// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS. +// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer +// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to +// set the __OPTIMIZE__ macro). #ifndef SIMDJSON_DEVELOPMENT_CHECKS -#ifndef NDEBUG -#define SIMDJSON_DEVELOPMENT_CHECKS -#endif -#endif +#ifdef _MSC_VER +// Visual Studio seems to set _DEBUG for debug builds. +#ifdef _DEBUG +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // _DEBUG +#else // _MSC_VER +// All other compilers appear to set __OPTIMIZE__ to a positive integer +// when the compiler is optimizing. +#ifndef __OPTIMIZE__ +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // __OPTIMIZE__ +#endif // _MSC_VER +#endif // SIMDJSON_DEVELOPMENT_CHECKS // The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" // feature. @@ -2312,6 +2359,17 @@ namespace std { # define simdjson_fallthrough do {} while (0) /* fallthrough */ #endif // simdjson_fallthrough + +#if SIMDJSON_DEVELOPMENT_CHECKS +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { assert ((expr)); } while (0) +#else +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { } while (0) +#endif + +#ifndef SIMDJSON_UTF8VALIDATION +#define SIMDJSON_UTF8VALIDATION 1 +#endif + #endif // SIMDJSON_COMMON_DEFS_H /* end file include/simdjson/common_defs.h */ @@ -2339,7 +2397,7 @@ namespace simdjson { SUCCESS = 0, ///< No error CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error + TAPE_ERROR, ///< Something went wrong, this is a generic error DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' @@ -2366,6 +2424,7 @@ namespace simdjson { INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. + TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input NUM_ERROR_CODES }; @@ -2432,22 +2491,22 @@ namespace simdjson { /** * Create a new empty result with error = UNINITIALIZED. */ - simdjson_really_inline simdjson_result_base() noexcept; + simdjson_inline simdjson_result_base() noexcept; /** * Create a new error result. */ - simdjson_really_inline simdjson_result_base(error_code error) noexcept; + simdjson_inline simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ - simdjson_really_inline simdjson_result_base(T&& value) noexcept; + simdjson_inline simdjson_result_base(T&& value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ - simdjson_really_inline simdjson_result_base(T&& value, error_code error) noexcept; + simdjson_inline simdjson_result_base(T&& value, error_code error) noexcept; /** * Move the value and the error to the provided variables. @@ -2455,19 +2514,19 @@ namespace simdjson { * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ - simdjson_really_inline void tie(T& value, error_code& error) && noexcept; + simdjson_inline void tie(T& value, error_code& error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_really_inline error_code get(T& value) && noexcept; + simdjson_inline error_code get(T& value) && noexcept; /** * The error. */ - simdjson_really_inline error_code error() const noexcept; + simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -2476,41 +2535,41 @@ namespace simdjson { * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T& value() & noexcept(false); + simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& value() && noexcept(false); + simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& take_value() && noexcept(false); + simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline operator T && () && noexcept(false); + simdjson_inline operator T && () && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline const T& value_unsafe() const& noexcept; + simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline T&& value_unsafe() && noexcept; + simdjson_inline T&& value_unsafe() && noexcept; }; // struct simdjson_result_base @@ -2526,19 +2585,19 @@ namespace simdjson { /** * @private Create a new empty result with error = UNINITIALIZED. */ - simdjson_really_inline simdjson_result() noexcept; + simdjson_inline simdjson_result() noexcept; /** * @private Create a new error result. */ - simdjson_really_inline simdjson_result(T&& value) noexcept; + simdjson_inline simdjson_result(T&& value) noexcept; /** * @private Create a new successful result. */ - simdjson_really_inline simdjson_result(error_code error_code) noexcept; + simdjson_inline simdjson_result(error_code error_code) noexcept; /** * @private Create a new result with both things (use if you don't want to branch when creating the result). */ - simdjson_really_inline simdjson_result(T&& value, error_code error) noexcept; + simdjson_inline simdjson_result(T&& value, error_code error) noexcept; /** * Move the value and the error to the provided variables. @@ -2546,19 +2605,19 @@ namespace simdjson { * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ - simdjson_really_inline void tie(T& value, error_code& error) && noexcept; + simdjson_inline void tie(T& value, error_code& error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_warn_unused simdjson_really_inline error_code get(T& value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T& value) && noexcept; /** * The error. */ - simdjson_really_inline error_code error() const noexcept; + simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -2567,41 +2626,41 @@ namespace simdjson { * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T& value() & noexcept(false); + simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& value() && noexcept(false); + simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& take_value() && noexcept(false); + simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline operator T && () && noexcept(false); + simdjson_inline operator T && () && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline const T& value_unsafe() const& noexcept; + simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline T&& value_unsafe() && noexcept; + simdjson_inline T&& value_unsafe() && noexcept; }; // struct simdjson_result @@ -2886,15 +2945,15 @@ namespace simdjson { }; // padded_string_view #if SIMDJSON_EXCEPTIONS -/** - * Send padded_string instance to an output stream. - * - * @param out The output stream. - * @param s The padded_string_view. - * @throw simdjson_error if the result being printed has an error. If there is an error with the - * underlying output stream, that error will be propagated (simdjson_error will not be - * thrown). - */ + /** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string_view. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ inline std::ostream& operator<<(std::ostream& out, simdjson_result& s) noexcept(false) { return out << s.value(); } #endif @@ -3021,9 +3080,26 @@ namespace simdjson { * * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @param allow_replacement whether we allow a replacement character when the UTF-8 contains unmatched surrogate pairs. * @return end of the of the written region (exclusive) or nullptr in case of error. */ - simdjson_warn_unused virtual uint8_t* parse_string(const uint8_t* src, uint8_t* dst) const noexcept = 0; + simdjson_warn_unused virtual uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) const noexcept = 0; + + /** + * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a possibly invalid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept = 0; /** * Change the capacity of this parser. @@ -3067,14 +3143,14 @@ namespace simdjson { * * @return Current capacity, in bytes. */ - simdjson_really_inline size_t capacity() const noexcept; + simdjson_inline size_t capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ - simdjson_really_inline size_t max_depth() const noexcept; + simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -3103,23 +3179,23 @@ namespace simdjson { size_t _max_depth{ 0 }; // Declaring these so that subclasses can use them to implement their constructors. - simdjson_really_inline dom_parser_implementation() noexcept; - simdjson_really_inline dom_parser_implementation(dom_parser_implementation&& other) noexcept; - simdjson_really_inline dom_parser_implementation& operator=(dom_parser_implementation&& other) noexcept; + simdjson_inline dom_parser_implementation() noexcept; + simdjson_inline dom_parser_implementation(dom_parser_implementation&& other) noexcept; + simdjson_inline dom_parser_implementation& operator=(dom_parser_implementation&& other) noexcept; - simdjson_really_inline dom_parser_implementation(const dom_parser_implementation&) noexcept = delete; - simdjson_really_inline dom_parser_implementation& operator=(const dom_parser_implementation& other) noexcept = delete; + simdjson_inline dom_parser_implementation(const dom_parser_implementation&) noexcept = delete; + simdjson_inline dom_parser_implementation& operator=(const dom_parser_implementation& other) noexcept = delete; }; // class dom_parser_implementation - simdjson_really_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; - simdjson_really_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation&& other) noexcept = default; - simdjson_really_inline dom_parser_implementation& dom_parser_implementation::operator=(dom_parser_implementation&& other) noexcept = default; + simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; + simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation&& other) noexcept = default; + simdjson_inline dom_parser_implementation& dom_parser_implementation::operator=(dom_parser_implementation&& other) noexcept = default; - simdjson_really_inline size_t dom_parser_implementation::capacity() const noexcept { + simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { return _capacity; } - simdjson_really_inline size_t dom_parser_implementation::max_depth() const noexcept { + simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { return _max_depth; } @@ -3228,22 +3304,12 @@ namespace simdjson { return instruction_set::ALTIVEC; } -#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 - -#if defined(__ARM_NEON) +#elif defined(__aarch64__) || defined(_M_ARM64) static inline uint32_t detect_supported_architectures() { return instruction_set::NEON; } -#else // ARM without NEON - - static inline uint32_t detect_supported_architectures() { - return instruction_set::DEFAULT; - } - -#endif - #elif defined(__x86_64__) || defined(_M_AMD64) // x64 @@ -3259,7 +3325,7 @@ namespace simdjson { constexpr uint32_t cpuid_avx512er_bit = 1 << 27; ///< @private bit 27 of EBX for EAX=0x7 constexpr uint32_t cpuid_avx512cd_bit = 1 << 28; ///< @private bit 28 of EBX for EAX=0x7 constexpr uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 - constexpr uint32_t cpuid_avx512vl_bit = 1 << 31; ///< @private bit 31 of EBX for EAX=0x7 + constexpr uint32_t cpuid_avx512vl_bit = 1U << 31; ///< @private bit 31 of EBX for EAX=0x7 constexpr uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 constexpr uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 @@ -3393,7 +3459,7 @@ namespace simdjson { * @param sv the string_view to validate. * @return true if the string is valid UTF-8. */ - simdjson_really_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { + simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { return validate_utf8(sv.data(), sv.size()); } @@ -3403,7 +3469,7 @@ namespace simdjson { * @param p the string to validate. * @return true if the string is valid UTF-8. */ - simdjson_really_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { + simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { return validate_utf8(s.data(), s.size()); } @@ -3426,7 +3492,7 @@ namespace simdjson { * const implementation *impl = simdjson::get_active_implementation(); * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". */ virtual const std::string& name() const { return _name; } @@ -3436,7 +3502,7 @@ namespace simdjson { * const implementation *impl = simdjson::get_active_implementation(); * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". */ virtual const std::string& description() const { return _description; } @@ -3445,8 +3511,7 @@ namespace simdjson { * and the current CPU match. This function may poll the current CPU/system * and should therefore not be called too often if performance is a concern. * - * - * @return true if the implementation can be safely used on the current system (determined at runtime) + * @return true if the implementation can be safely used on the current system (determined at runtime). */ bool supported_by_runtime_system() const; @@ -3455,9 +3520,9 @@ namespace simdjson { * * The instruction sets this implementation is compiled against. * - * @return a mask of all required `internal::instruction_set::` values + * @return a mask of all required `internal::instruction_set::` values. */ - virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }; + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } /** * @private For internal implementation use @@ -3468,7 +3533,7 @@ namespace simdjson { * @param capacity The largest document that will be passed to the parser. * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. * @param dst The place to put the resulting parser implementation. - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + * @return the error code, or SUCCESS if there was no error. */ virtual error_code create_dom_parser_implementation( size_t capacity, @@ -3505,7 +3570,7 @@ namespace simdjson { protected: /** @private Construct an implementation with the given name and description. For subclasses. */ - simdjson_really_inline implementation( + simdjson_inline implementation( std::string_view name, std::string_view description, uint32_t required_instruction_sets @@ -3543,7 +3608,7 @@ namespace simdjson { class available_implementation_list { public: /** Get the list of available implementations compiled into simdjson */ - simdjson_really_inline available_implementation_list() {} + simdjson_inline available_implementation_list() {} /** Number of implementations */ size_t size() const noexcept; /** STL const begin() iterator */ @@ -3670,7 +3735,7 @@ namespace simdjson { // template - simdjson_really_inline void simdjson_result_base::tie(T& value, error_code& error) && noexcept { + simdjson_inline void simdjson_result_base::tie(T& value, error_code& error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; @@ -3678,64 +3743,64 @@ namespace simdjson { } template - simdjson_warn_unused simdjson_really_inline error_code simdjson_result_base::get(T& value) && noexcept { + simdjson_warn_unused simdjson_inline error_code simdjson_result_base::get(T& value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template - simdjson_really_inline error_code simdjson_result_base::error() const noexcept { + simdjson_inline error_code simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template - simdjson_really_inline T& simdjson_result_base::value() & noexcept(false) { + simdjson_inline T& simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template - simdjson_really_inline T&& simdjson_result_base::value() && noexcept(false) { + simdjson_inline T&& simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template - simdjson_really_inline T&& simdjson_result_base::take_value() && noexcept(false) { + simdjson_inline T&& simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template - simdjson_really_inline simdjson_result_base::operator T && () && noexcept(false) { + simdjson_inline simdjson_result_base::operator T && () && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template - simdjson_really_inline const T& simdjson_result_base::value_unsafe() const& noexcept { + simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template - simdjson_really_inline T&& simdjson_result_base::value_unsafe() && noexcept { + simdjson_inline T&& simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template - simdjson_really_inline simdjson_result_base::simdjson_result_base(T&& value, error_code error) noexcept + simdjson_inline simdjson_result_base::simdjson_result_base(T&& value, error_code error) noexcept : std::pair(std::forward(value), error) {} template - simdjson_really_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept + simdjson_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept : simdjson_result_base(T{}, error) {} template - simdjson_really_inline simdjson_result_base::simdjson_result_base(T&& value) noexcept + simdjson_inline simdjson_result_base::simdjson_result_base(T&& value) noexcept : simdjson_result_base(std::forward(value), SUCCESS) {} template - simdjson_really_inline simdjson_result_base::simdjson_result_base() noexcept + simdjson_inline simdjson_result_base::simdjson_result_base() noexcept : simdjson_result_base(T{}, UNINITIALIZED) {} } // namespace internal @@ -3745,65 +3810,65 @@ namespace simdjson { /// template - simdjson_really_inline void simdjson_result::tie(T& value, error_code& error) && noexcept { + simdjson_inline void simdjson_result::tie(T& value, error_code& error) && noexcept { std::forward>(*this).tie(value, error); } template - simdjson_warn_unused simdjson_really_inline error_code simdjson_result::get(T& value) && noexcept { + simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T& value) && noexcept { return std::forward>(*this).get(value); } template - simdjson_really_inline error_code simdjson_result::error() const noexcept { + simdjson_inline error_code simdjson_result::error() const noexcept { return internal::simdjson_result_base::error(); } #if SIMDJSON_EXCEPTIONS template - simdjson_really_inline T& simdjson_result::value() & noexcept(false) { + simdjson_inline T& simdjson_result::value() & noexcept(false) { return internal::simdjson_result_base::value(); } template - simdjson_really_inline T&& simdjson_result::value() && noexcept(false) { + simdjson_inline T&& simdjson_result::value() && noexcept(false) { return std::forward>(*this).value(); } template - simdjson_really_inline T&& simdjson_result::take_value() && noexcept(false) { + simdjson_inline T&& simdjson_result::take_value() && noexcept(false) { return std::forward>(*this).take_value(); } template - simdjson_really_inline simdjson_result::operator T && () && noexcept(false) { + simdjson_inline simdjson_result::operator T && () && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template - simdjson_really_inline const T& simdjson_result::value_unsafe() const& noexcept { + simdjson_inline const T& simdjson_result::value_unsafe() const& noexcept { return internal::simdjson_result_base::value_unsafe(); } template - simdjson_really_inline T&& simdjson_result::value_unsafe() && noexcept { + simdjson_inline T&& simdjson_result::value_unsafe() && noexcept { return std::forward>(*this).value_unsafe(); } template - simdjson_really_inline simdjson_result::simdjson_result(T&& value, error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(T&& value, error_code error) noexcept : internal::simdjson_result_base(std::forward(value), error) {} template - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} template - simdjson_really_inline simdjson_result::simdjson_result(T&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(T&& value) noexcept : internal::simdjson_result_base(std::forward(value)) {} template - simdjson_really_inline simdjson_result::simdjson_result() noexcept + simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} } // namespace simdjson @@ -3856,7 +3921,7 @@ namespace simdjson { } // namespace internal - inline padded_string::padded_string() noexcept {} + inline padded_string::padded_string() noexcept = default; inline padded_string::padded_string(size_t length) noexcept : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { } @@ -3938,11 +4003,17 @@ namespace simdjson { } // Get the file size - if (std::fseek(fp, 0, SEEK_END) < 0) { + int ret; +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if (ret < 0) { std::fclose(fp); return IO_ERROR; } -#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS __int64 llen = _ftelli64(fp); if (llen == -1L) { std::fclose(fp); @@ -4082,25 +4153,26 @@ namespace simdjson { */ class tape_ref { public: - simdjson_really_inline tape_ref() noexcept; - simdjson_really_inline tape_ref(const dom::document* doc, size_t json_index) noexcept; + simdjson_inline tape_ref() noexcept; + simdjson_inline tape_ref(const dom::document* doc, size_t json_index) noexcept; inline size_t after_element() const noexcept; - simdjson_really_inline tape_type tape_ref_type() const noexcept; - simdjson_really_inline uint64_t tape_value() const noexcept; - simdjson_really_inline bool is_double() const noexcept; - simdjson_really_inline bool is_int64() const noexcept; - simdjson_really_inline bool is_uint64() const noexcept; - simdjson_really_inline bool is_false() const noexcept; - simdjson_really_inline bool is_true() const noexcept; - simdjson_really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. - simdjson_really_inline uint32_t matching_brace_index() const noexcept; - simdjson_really_inline uint32_t scope_count() const noexcept; + simdjson_inline tape_type tape_ref_type() const noexcept; + simdjson_inline uint64_t tape_value() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_false() const noexcept; + simdjson_inline bool is_true() const noexcept; + simdjson_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. + simdjson_inline uint32_t matching_brace_index() const noexcept; + simdjson_inline uint32_t scope_count() const noexcept; template - simdjson_really_inline T next_tape_value() const noexcept; - simdjson_really_inline uint32_t get_string_length() const noexcept; - simdjson_really_inline const char* get_c_str() const noexcept; + simdjson_inline T next_tape_value() const noexcept; + simdjson_inline uint32_t get_string_length() const noexcept; + simdjson_inline const char* get_c_str() const noexcept; inline std::string_view get_string_view() const noexcept; - simdjson_really_inline bool is_document_root() const noexcept; + simdjson_inline bool is_document_root() const noexcept; + simdjson_inline bool usable() const noexcept; /** The document this element references. */ const dom::document* doc; @@ -4132,7 +4204,7 @@ namespace simdjson { class array { public: /** Create a new, invalid array */ - simdjson_really_inline array() noexcept; + simdjson_inline array() noexcept; class iterator { public: @@ -4172,7 +4244,7 @@ namespace simdjson { iterator(const iterator&) noexcept = default; iterator& operator=(const iterator&) noexcept = default; private: - simdjson_really_inline iterator(const internal::tape_ref& tape) noexcept; + simdjson_inline iterator(const internal::tape_ref& tape) noexcept; internal::tape_ref tape; friend class array; }; @@ -4243,7 +4315,7 @@ namespace simdjson { inline simdjson_result at(size_t index) const noexcept; private: - simdjson_really_inline array(const internal::tape_ref& tape) noexcept; + simdjson_inline array(const internal::tape_ref& tape) noexcept; internal::tape_ref tape; friend class element; friend struct simdjson_result; @@ -4258,9 +4330,9 @@ namespace simdjson { template<> struct simdjson_result : public internal::simdjson_result_base { public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::array value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::array value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; inline simdjson_result at(size_t index) const noexcept; @@ -4455,20 +4527,20 @@ namespace simdjson { * to allocate an initial capacity, call allocate() after constructing the parser. * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). */ - simdjson_really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ - simdjson_really_inline parser(parser&& other) noexcept; + simdjson_inline parser(parser&& other) noexcept; parser(const parser&) = delete; ///< @private Disallow copying /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ - simdjson_really_inline parser& operator=(parser&& other) noexcept; + simdjson_inline parser& operator=(parser&& other) noexcept; parser& operator=(const parser&) = delete; ///< @private Disallow copying /** Deallocate the JSON parser. */ @@ -4532,7 +4604,9 @@ namespace simdjson { * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. * * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: @@ -4578,17 +4652,20 @@ namespace simdjson { inline simdjson_result parse(const uint8_t* buf, size_t len, bool realloc_if_needed = true) & noexcept; inline simdjson_result parse(const uint8_t* buf, size_t len, bool realloc_if_needed = true) && = delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse(const char* buf, size_t len, bool realloc_if_needed = true) & noexcept; - simdjson_really_inline simdjson_result parse(const char* buf, size_t len, bool realloc_if_needed = true) && = delete; + simdjson_inline simdjson_result parse(const char* buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse(const char* buf, size_t len, bool realloc_if_needed = true) && = delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse(const std::string& s) & noexcept; - simdjson_really_inline simdjson_result parse(const std::string& s) && = delete; + simdjson_inline simdjson_result parse(const std::string& s) & noexcept; + simdjson_inline simdjson_result parse(const std::string& s) && = delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse(const padded_string& s) & noexcept; - simdjson_really_inline simdjson_result parse(const padded_string& s) && = delete; + simdjson_inline simdjson_result parse(const padded_string& s) & noexcept; + simdjson_inline simdjson_result parse(const padded_string& s) && = delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const padded_string_view& v) & noexcept; + simdjson_inline simdjson_result parse(const padded_string_view& v) && = delete; /** @private We do not want to allow implicit conversion from C string to std::string. */ - simdjson_really_inline simdjson_result parse(const char* buf) noexcept = delete; + simdjson_inline simdjson_result parse(const char* buf) noexcept = delete; /** * Parse a JSON document into a provide document instance and return a temporary reference to it. @@ -4637,17 +4714,17 @@ namespace simdjson { inline simdjson_result parse_into_document(document& doc, const uint8_t* buf, size_t len, bool realloc_if_needed = true) & noexcept; inline simdjson_result parse_into_document(document& doc, const uint8_t* buf, size_t len, bool realloc_if_needed = true) && = delete; /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const char* buf, size_t len, bool realloc_if_needed = true) & noexcept; - simdjson_really_inline simdjson_result parse_into_document(document& doc, const char* buf, size_t len, bool realloc_if_needed = true) && = delete; + simdjson_inline simdjson_result parse_into_document(document& doc, const char* buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const char* buf, size_t len, bool realloc_if_needed = true) && = delete; /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const std::string& s) & noexcept; - simdjson_really_inline simdjson_result parse_into_document(document& doc, const std::string& s) && = delete; + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string& s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string& s) && = delete; /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const padded_string& s) & noexcept; - simdjson_really_inline simdjson_result parse_into_document(document& doc, const padded_string& s) && = delete; + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string& s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string& s) && = delete; /** @private We do not want to allow implicit conversion from C string to std::string. */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const char* buf) noexcept = delete; + simdjson_inline simdjson_result parse_into_document(document& doc, const char* buf) noexcept = delete; /** * Load a file containing many JSON documents. @@ -4792,7 +4869,9 @@ namespace simdjson { * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. * * ### Threads * @@ -4859,7 +4938,7 @@ namespace simdjson { * * @return Current capacity, in bytes. */ - simdjson_really_inline size_t capacity() const noexcept; + simdjson_inline size_t capacity() const noexcept; /** * The largest document this parser can automatically support. @@ -4868,14 +4947,14 @@ namespace simdjson { * * @return Maximum capacity, in bytes. */ - simdjson_really_inline size_t max_capacity() const noexcept; + simdjson_inline size_t max_capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ - simdjson_really_inline size_t max_depth() const noexcept; + simdjson_inline size_t max_depth() const noexcept; /** * Set max_capacity. This is the largest document this parser can automatically support. @@ -4891,7 +4970,7 @@ namespace simdjson { * * @param max_capacity The new maximum capacity, in bytes. */ - simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; #ifdef SIMDJSON_THREADS_ENABLED /** @@ -5073,13 +5152,13 @@ namespace simdjson { * error = parser.parse_many(json).get(docs); * ``` */ - simdjson_really_inline document_stream() noexcept; + simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ - simdjson_really_inline document_stream(document_stream&& other) noexcept = default; + simdjson_inline document_stream(document_stream&& other) noexcept = default; /** Move one document_stream to another. */ - simdjson_really_inline document_stream& operator=(document_stream&& other) noexcept = default; + simdjson_inline document_stream& operator=(document_stream&& other) noexcept = default; - simdjson_really_inline ~document_stream() noexcept; + simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. */ @@ -5118,11 +5197,11 @@ namespace simdjson { /** * Default constructor. */ - simdjson_really_inline iterator() noexcept; + simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ - simdjson_really_inline reference operator*() noexcept; + simdjson_inline reference operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -5131,7 +5210,7 @@ namespace simdjson { * Check if we're at the end yet. * @param other the end iterator to compare to. */ - simdjson_really_inline bool operator!=(const iterator& other) const noexcept; + simdjson_inline bool operator!=(const iterator& other) const noexcept; /** * @private * @@ -5147,7 +5226,7 @@ namespace simdjson { * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ - simdjson_really_inline size_t current_index() const noexcept; + simdjson_inline size_t current_index() const noexcept; /** * @private * @@ -5167,10 +5246,10 @@ namespace simdjson { * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ - simdjson_really_inline std::string_view source() const noexcept; + simdjson_inline std::string_view source() const noexcept; private: - simdjson_really_inline iterator(document_stream* s, bool finished) noexcept; + simdjson_inline iterator(document_stream* s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ @@ -5181,11 +5260,11 @@ namespace simdjson { /** * Start iterating the documents in the stream. */ - simdjson_really_inline iterator begin() noexcept; + simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ - simdjson_really_inline iterator end() noexcept; + simdjson_inline iterator end() noexcept; private: @@ -5201,7 +5280,7 @@ namespace simdjson { * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ - simdjson_really_inline document_stream( + simdjson_inline document_stream( dom::parser& parser, const uint8_t* buf, size_t len, @@ -5295,19 +5374,19 @@ namespace simdjson { template<> struct simdjson_result : public internal::simdjson_result_base { public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::document_stream&& value) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result(dom::document_stream&& value) noexcept; ///< @private #if SIMDJSON_EXCEPTIONS - simdjson_really_inline dom::document_stream::iterator begin() noexcept(false); - simdjson_really_inline dom::document_stream::iterator end() noexcept(false); + simdjson_inline dom::document_stream::iterator begin() noexcept(false); + simdjson_inline dom::document_stream::iterator end() noexcept(false); #else // SIMDJSON_EXCEPTIONS #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] - simdjson_really_inline dom::document_stream::iterator begin() noexcept; + simdjson_inline dom::document_stream::iterator begin() noexcept; [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] - simdjson_really_inline dom::document_stream::iterator end() noexcept; + simdjson_inline dom::document_stream::iterator end() noexcept; #endif // SIMDJSON_DISABLE_DEPRECATED_API #endif // SIMDJSON_EXCEPTIONS }; // struct simdjson_result @@ -5356,10 +5435,10 @@ namespace simdjson { class element { public: /** Create a new, invalid element. */ - simdjson_really_inline element() noexcept; + simdjson_inline element() noexcept; /** The type of this element. */ - simdjson_really_inline element_type type() const noexcept; + simdjson_inline element_type type() const noexcept; /** * Cast this element to an array. @@ -5511,7 +5590,7 @@ namespace simdjson { * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object */ template - simdjson_really_inline bool is() const noexcept; + simdjson_inline bool is() const noexcept; /** * Get the value as the provided type (T). @@ -5556,7 +5635,7 @@ namespace simdjson { * @returns The error that occurred, or SUCCESS if there was no error. */ template - simdjson_warn_unused simdjson_really_inline error_code get(T& value) const noexcept; + simdjson_warn_unused simdjson_inline error_code get(T& value) const noexcept; /** * Get the value as the provided type (T), setting error if it's not the given type. @@ -5780,7 +5859,7 @@ namespace simdjson { inline bool dump_raw_tape(std::ostream& out) const noexcept; private: - simdjson_really_inline element(const internal::tape_ref& tape) noexcept; + simdjson_inline element(const internal::tape_ref& tape) noexcept; internal::tape_ref tape; friend class document; friend class object; @@ -5797,59 +5876,59 @@ namespace simdjson { template<> struct simdjson_result : public internal::simdjson_result_base { public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::element&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::element&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result type() const noexcept; + simdjson_inline simdjson_result type() const noexcept; template - simdjson_really_inline bool is() const noexcept; + simdjson_inline bool is() const noexcept; template - simdjson_really_inline simdjson_result get() const noexcept; + simdjson_inline simdjson_result get() const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code get(T& value) const noexcept; + simdjson_warn_unused simdjson_inline error_code get(T& value) const noexcept; - simdjson_really_inline simdjson_result get_array() const noexcept; - simdjson_really_inline simdjson_result get_object() const noexcept; - simdjson_really_inline simdjson_result get_c_str() const noexcept; - simdjson_really_inline simdjson_result get_string_length() const noexcept; - simdjson_really_inline simdjson_result get_string() const noexcept; - simdjson_really_inline simdjson_result get_int64() const noexcept; - simdjson_really_inline simdjson_result get_uint64() const noexcept; - simdjson_really_inline simdjson_result get_double() const noexcept; - simdjson_really_inline simdjson_result get_bool() const noexcept; + simdjson_inline simdjson_result get_array() const noexcept; + simdjson_inline simdjson_result get_object() const noexcept; + simdjson_inline simdjson_result get_c_str() const noexcept; + simdjson_inline simdjson_result get_string_length() const noexcept; + simdjson_inline simdjson_result get_string() const noexcept; + simdjson_inline simdjson_result get_int64() const noexcept; + simdjson_inline simdjson_result get_uint64() const noexcept; + simdjson_inline simdjson_result get_double() const noexcept; + simdjson_inline simdjson_result get_bool() const noexcept; - simdjson_really_inline bool is_array() const noexcept; - simdjson_really_inline bool is_object() const noexcept; - simdjson_really_inline bool is_string() const noexcept; - simdjson_really_inline bool is_int64() const noexcept; - simdjson_really_inline bool is_uint64() const noexcept; - simdjson_really_inline bool is_double() const noexcept; - simdjson_really_inline bool is_number() const noexcept; - simdjson_really_inline bool is_bool() const noexcept; - simdjson_really_inline bool is_null() const noexcept; + simdjson_inline bool is_array() const noexcept; + simdjson_inline bool is_object() const noexcept; + simdjson_inline bool is_string() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_number() const noexcept; + simdjson_inline bool is_bool() const noexcept; + simdjson_inline bool is_null() const noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) const noexcept; - simdjson_really_inline simdjson_result operator[](const char* key) const noexcept; - simdjson_really_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) const noexcept; + simdjson_inline simdjson_result operator[](const char* key) const noexcept; + simdjson_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] - simdjson_really_inline simdjson_result at(const std::string_view json_pointer) const noexcept; - simdjson_really_inline simdjson_result at(size_t index) const noexcept; - simdjson_really_inline simdjson_result at_key(std::string_view key) const noexcept; - simdjson_really_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + simdjson_inline simdjson_result at(const std::string_view json_pointer) const noexcept; + simdjson_inline simdjson_result at(size_t index) const noexcept; + simdjson_inline simdjson_result at_key(std::string_view key) const noexcept; + simdjson_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator bool() const noexcept(false); - simdjson_really_inline explicit operator const char* () const noexcept(false); - simdjson_really_inline operator std::string_view() const noexcept(false); - simdjson_really_inline operator uint64_t() const noexcept(false); - simdjson_really_inline operator int64_t() const noexcept(false); - simdjson_really_inline operator double() const noexcept(false); - simdjson_really_inline operator dom::array() const noexcept(false); - simdjson_really_inline operator dom::object() const noexcept(false); + simdjson_inline operator bool() const noexcept(false); + simdjson_inline explicit operator const char* () const noexcept(false); + simdjson_inline operator std::string_view() const noexcept(false); + simdjson_inline operator uint64_t() const noexcept(false); + simdjson_inline operator int64_t() const noexcept(false); + simdjson_inline operator double() const noexcept(false); + simdjson_inline operator dom::array() const noexcept(false); + simdjson_inline operator dom::object() const noexcept(false); - simdjson_really_inline dom::array::iterator begin() const noexcept(false); - simdjson_really_inline dom::array::iterator end() const noexcept(false); + simdjson_inline dom::array::iterator begin() const noexcept(false); + simdjson_inline dom::array::iterator end() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; @@ -5880,7 +5959,7 @@ namespace simdjson { class object { public: /** Create a new, invalid object */ - simdjson_really_inline object() noexcept; + simdjson_inline object() noexcept; class iterator { public: @@ -5950,7 +6029,7 @@ namespace simdjson { iterator(const iterator&) noexcept = default; iterator& operator=(const iterator&) noexcept = default; private: - simdjson_really_inline iterator(const internal::tape_ref& tape) noexcept; + simdjson_inline iterator(const internal::tape_ref& tape) noexcept; internal::tape_ref tape; @@ -6064,7 +6143,7 @@ namespace simdjson { inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; private: - simdjson_really_inline object(const internal::tape_ref& tape) noexcept; + simdjson_inline object(const internal::tape_ref& tape) noexcept; internal::tape_ref tape; @@ -6085,7 +6164,7 @@ namespace simdjson { element value; private: - simdjson_really_inline key_value_pair(std::string_view _key, element _value) noexcept; + simdjson_inline key_value_pair(std::string_view _key, element _value) noexcept; friend class object; }; @@ -6095,9 +6174,9 @@ namespace simdjson { template<> struct simdjson_result : public internal::simdjson_result_base { public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::object value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::object value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private inline simdjson_result operator[](std::string_view key) const noexcept; inline simdjson_result operator[](const char* key) const noexcept; @@ -6167,7 +6246,7 @@ namespace simdjson { /** Append an object to the builder (to be printed) **/ inline void append(simdjson::dom::object value); /** Reset the builder (so that it would print the empty string) **/ - simdjson_really_inline void clear(); + simdjson_inline void clear(); /** * Get access to the string. The string_view is owned by the builder * and it is invalid to use it after the string_builder has been @@ -6175,9 +6254,9 @@ namespace simdjson { * However you can make a copy of the string_view on memory that you * own. */ - simdjson_really_inline std::string_view str() const; + simdjson_inline std::string_view str() const; /** Append a key_value_pair to the builder (to be printed) **/ - simdjson_really_inline void append(simdjson::dom::key_value_pair value); + simdjson_inline void append(simdjson::dom::key_value_pair value); private: formatter format{}; }; @@ -6191,43 +6270,43 @@ namespace simdjson { public: mini_formatter() = default; /** Add a comma **/ - simdjson_really_inline void comma(); + simdjson_inline void comma(); /** Start an array, prints [ **/ - simdjson_really_inline void start_array(); + simdjson_inline void start_array(); /** End an array, prints ] **/ - simdjson_really_inline void end_array(); + simdjson_inline void end_array(); /** Start an array, prints { **/ - simdjson_really_inline void start_object(); + simdjson_inline void start_object(); /** Start an array, prints } **/ - simdjson_really_inline void end_object(); + simdjson_inline void end_object(); /** Prints a true **/ - simdjson_really_inline void true_atom(); + simdjson_inline void true_atom(); /** Prints a false **/ - simdjson_really_inline void false_atom(); + simdjson_inline void false_atom(); /** Prints a null **/ - simdjson_really_inline void null_atom(); + simdjson_inline void null_atom(); /** Prints a number **/ - simdjson_really_inline void number(int64_t x); + simdjson_inline void number(int64_t x); /** Prints a number **/ - simdjson_really_inline void number(uint64_t x); + simdjson_inline void number(uint64_t x); /** Prints a number **/ - simdjson_really_inline void number(double x); + simdjson_inline void number(double x); /** Prints a key (string + colon) **/ - simdjson_really_inline void key(std::string_view unescaped); + simdjson_inline void key(std::string_view unescaped); /** Prints a string. The string is escaped as needed. **/ - simdjson_really_inline void string(std::string_view unescaped); + simdjson_inline void string(std::string_view unescaped); /** Clears out the content. **/ - simdjson_really_inline void clear(); + simdjson_inline void clear(); /** * Get access to the buffer, it is owned by the instance, but * the user can make a copy. **/ - simdjson_really_inline std::string_view str() const; + simdjson_inline std::string_view str() const; private: // implementation details (subject to change) /** Prints one character **/ - simdjson_really_inline void one_char(char c); + simdjson_inline void one_char(char c); /** Backing buffer **/ std::vector buffer{}; // not ideal! }; @@ -6662,7 +6741,7 @@ namespace simdjson { inline double get_double() const { if (location + 1 >= tape_length) { return std::numeric_limits::quiet_NaN(); // default value in - // case of error + // case of error } double answer; std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); @@ -6857,11 +6936,11 @@ namespace simdjson { // // simdjson_result inline implementation // - simdjson_really_inline simdjson_result::simdjson_result() noexcept + simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} - simdjson_really_inline simdjson_result::simdjson_result(dom::array value) noexcept + simdjson_inline simdjson_result::simdjson_result(dom::array value) noexcept : internal::simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} #if SIMDJSON_EXCEPTIONS @@ -6895,21 +6974,26 @@ namespace simdjson { // // array inline implementation // - simdjson_really_inline array::array() noexcept : tape{} {} - simdjson_really_inline array::array(const internal::tape_ref& _tape) noexcept : tape{ _tape } {} + simdjson_inline array::array() noexcept : tape{} {} + simdjson_inline array::array(const internal::tape_ref& _tape) noexcept : tape{ _tape } {} inline array::iterator array::begin() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.json_index + 1); } inline array::iterator array::end() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.after_element() - 1); } inline size_t array::size() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.scope_count(); } inline size_t array::number_of_slots() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.matching_brace_index() - tape.json_index; } inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if (json_pointer.empty()) { // an empty string means that we return the current node return element(this->tape); // copy the current node } @@ -6951,6 +7035,7 @@ namespace simdjson { } inline simdjson_result array::at(size_t index) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 size_t i = 0; for (auto element : *this) { if (i == index) { return element; } @@ -6962,7 +7047,7 @@ namespace simdjson { // // array::iterator inline implementation // - simdjson_really_inline array::iterator::iterator(const internal::tape_ref& _tape) noexcept : tape{ _tape } { } + simdjson_inline array::iterator::iterator(const internal::tape_ref& _tape) noexcept : tape{ _tape } { } inline element array::iterator::operator*() const noexcept { return element(tape); } @@ -7011,11 +7096,11 @@ namespace simdjson { // // simdjson_result inline implementation // - simdjson_really_inline simdjson_result::simdjson_result() noexcept + simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} - simdjson_really_inline simdjson_result::simdjson_result(dom::element&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(dom::element&& value) noexcept : internal::simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} inline simdjson_result simdjson_result::type() const noexcept { if (error()) { return error(); } @@ -7023,101 +7108,101 @@ namespace simdjson { } template - simdjson_really_inline bool simdjson_result::is() const noexcept { + simdjson_inline bool simdjson_result::is() const noexcept { return !error() && first.is(); } template - simdjson_really_inline simdjson_result simdjson_result::get() const noexcept { + simdjson_inline simdjson_result simdjson_result::get() const noexcept { if (error()) { return error(); } return first.get(); } template - simdjson_warn_unused simdjson_really_inline error_code simdjson_result::get(T& value) const noexcept { + simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T& value) const noexcept { if (error()) { return error(); } return first.get(value); } - simdjson_really_inline simdjson_result simdjson_result::get_array() const noexcept { + simdjson_inline simdjson_result simdjson_result::get_array() const noexcept { if (error()) { return error(); } return first.get_array(); } - simdjson_really_inline simdjson_result simdjson_result::get_object() const noexcept { + simdjson_inline simdjson_result simdjson_result::get_object() const noexcept { if (error()) { return error(); } return first.get_object(); } - simdjson_really_inline simdjson_result simdjson_result::get_c_str() const noexcept { + simdjson_inline simdjson_result simdjson_result::get_c_str() const noexcept { if (error()) { return error(); } return first.get_c_str(); } - simdjson_really_inline simdjson_result simdjson_result::get_string_length() const noexcept { + simdjson_inline simdjson_result simdjson_result::get_string_length() const noexcept { if (error()) { return error(); } return first.get_string_length(); } - simdjson_really_inline simdjson_result simdjson_result::get_string() const noexcept { + simdjson_inline simdjson_result simdjson_result::get_string() const noexcept { if (error()) { return error(); } return first.get_string(); } - simdjson_really_inline simdjson_result simdjson_result::get_int64() const noexcept { + simdjson_inline simdjson_result simdjson_result::get_int64() const noexcept { if (error()) { return error(); } return first.get_int64(); } - simdjson_really_inline simdjson_result simdjson_result::get_uint64() const noexcept { + simdjson_inline simdjson_result simdjson_result::get_uint64() const noexcept { if (error()) { return error(); } return first.get_uint64(); } - simdjson_really_inline simdjson_result simdjson_result::get_double() const noexcept { + simdjson_inline simdjson_result simdjson_result::get_double() const noexcept { if (error()) { return error(); } return first.get_double(); } - simdjson_really_inline simdjson_result simdjson_result::get_bool() const noexcept { + simdjson_inline simdjson_result simdjson_result::get_bool() const noexcept { if (error()) { return error(); } return first.get_bool(); } - simdjson_really_inline bool simdjson_result::is_array() const noexcept { + simdjson_inline bool simdjson_result::is_array() const noexcept { return !error() && first.is_array(); } - simdjson_really_inline bool simdjson_result::is_object() const noexcept { + simdjson_inline bool simdjson_result::is_object() const noexcept { return !error() && first.is_object(); } - simdjson_really_inline bool simdjson_result::is_string() const noexcept { + simdjson_inline bool simdjson_result::is_string() const noexcept { return !error() && first.is_string(); } - simdjson_really_inline bool simdjson_result::is_int64() const noexcept { + simdjson_inline bool simdjson_result::is_int64() const noexcept { return !error() && first.is_int64(); } - simdjson_really_inline bool simdjson_result::is_uint64() const noexcept { + simdjson_inline bool simdjson_result::is_uint64() const noexcept { return !error() && first.is_uint64(); } - simdjson_really_inline bool simdjson_result::is_double() const noexcept { + simdjson_inline bool simdjson_result::is_double() const noexcept { return !error() && first.is_double(); } - simdjson_really_inline bool simdjson_result::is_number() const noexcept { + simdjson_inline bool simdjson_result::is_number() const noexcept { return !error() && first.is_number(); } - simdjson_really_inline bool simdjson_result::is_bool() const noexcept { + simdjson_inline bool simdjson_result::is_bool() const noexcept { return !error() && first.is_bool(); } - simdjson_really_inline bool simdjson_result::is_null() const noexcept { + simdjson_inline bool simdjson_result::is_null() const noexcept { return !error() && first.is_null(); } - simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { if (error()) { return error(); } return first[key]; } - simdjson_really_inline simdjson_result simdjson_result::operator[](const char* key) const noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](const char* key) const noexcept { if (error()) { return error(); } return first[key]; } - simdjson_really_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { + simdjson_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] - simdjson_really_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { + simdjson_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING if (error()) { return error(); } @@ -7125,51 +7210,51 @@ namespace simdjson { SIMDJSON_POP_DISABLE_WARNINGS } #endif // SIMDJSON_DISABLE_DEPRECATED_API - simdjson_really_inline simdjson_result simdjson_result::at(size_t index) const noexcept { + simdjson_inline simdjson_result simdjson_result::at(size_t index) const noexcept { if (error()) { return error(); } return first.at(index); } - simdjson_really_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { + simdjson_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key(key); } - simdjson_really_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { + simdjson_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key_case_insensitive(key); } #if SIMDJSON_EXCEPTIONS - simdjson_really_inline simdjson_result::operator bool() const noexcept(false) { + simdjson_inline simdjson_result::operator bool() const noexcept(false) { return get(); } - simdjson_really_inline simdjson_result::operator const char* () const noexcept(false) { + simdjson_inline simdjson_result::operator const char* () const noexcept(false) { return get(); } - simdjson_really_inline simdjson_result::operator std::string_view() const noexcept(false) { + simdjson_inline simdjson_result::operator std::string_view() const noexcept(false) { return get(); } - simdjson_really_inline simdjson_result::operator uint64_t() const noexcept(false) { + simdjson_inline simdjson_result::operator uint64_t() const noexcept(false) { return get(); } - simdjson_really_inline simdjson_result::operator int64_t() const noexcept(false) { + simdjson_inline simdjson_result::operator int64_t() const noexcept(false) { return get(); } - simdjson_really_inline simdjson_result::operator double() const noexcept(false) { + simdjson_inline simdjson_result::operator double() const noexcept(false) { return get(); } - simdjson_really_inline simdjson_result::operator dom::array() const noexcept(false) { + simdjson_inline simdjson_result::operator dom::array() const noexcept(false) { return get(); } - simdjson_really_inline simdjson_result::operator dom::object() const noexcept(false) { + simdjson_inline simdjson_result::operator dom::object() const noexcept(false) { return get(); } - simdjson_really_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + simdjson_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } - simdjson_really_inline dom::array::iterator simdjson_result::end() const noexcept(false) { + simdjson_inline dom::array::iterator simdjson_result::end() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } @@ -7181,15 +7266,17 @@ namespace simdjson { // // element inline implementation // - simdjson_really_inline element::element() noexcept : tape{} {} - simdjson_really_inline element::element(const internal::tape_ref& _tape) noexcept : tape{ _tape } { } + simdjson_inline element::element() noexcept : tape{} {} + simdjson_inline element::element(const internal::tape_ref& _tape) noexcept : tape{ _tape } { } inline element_type element::type() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 auto tape_type = tape.tape_ref_type(); return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast(tape_type); } inline simdjson_result element::get_bool() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if (tape.is_true()) { return true; } @@ -7199,6 +7286,7 @@ namespace simdjson { return INCORRECT_TYPE; } inline simdjson_result element::get_c_str() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::STRING: { return tape.get_c_str(); @@ -7208,6 +7296,7 @@ namespace simdjson { } } inline simdjson_result element::get_string_length() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::STRING: { return tape.get_string_length(); @@ -7217,6 +7306,7 @@ namespace simdjson { } } inline simdjson_result element::get_string() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::STRING: return tape.get_string_view(); @@ -7225,6 +7315,7 @@ namespace simdjson { } } inline simdjson_result element::get_uint64() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if (simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken if (tape.is_int64()) { int64_t result = tape.next_tape_value(); @@ -7238,6 +7329,7 @@ namespace simdjson { return tape.next_tape_value(); } inline simdjson_result element::get_int64() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if (simdjson_unlikely(!tape.is_int64())) { // branch rarely taken if (tape.is_uint64()) { uint64_t result = tape.next_tape_value(); @@ -7252,6 +7344,7 @@ namespace simdjson { return tape.next_tape_value(); } inline simdjson_result element::get_double() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 // Performance considerations: // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight // comparison. @@ -7274,6 +7367,7 @@ namespace simdjson { return tape.next_tape_value(); } inline simdjson_result element::get_array() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::START_ARRAY: return array(tape); @@ -7282,6 +7376,7 @@ namespace simdjson { } } inline simdjson_result element::get_object() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::START_OBJECT: return object(tape); @@ -7291,12 +7386,12 @@ namespace simdjson { } template - simdjson_warn_unused simdjson_really_inline error_code element::get(T& value) const noexcept { + simdjson_warn_unused simdjson_inline error_code element::get(T& value) const noexcept { return get().get(value); } // An element-specific version prevents recursion with simdjson_result::get(value) template<> - simdjson_warn_unused simdjson_really_inline error_code element::get(element& value) const noexcept { + simdjson_warn_unused simdjson_inline error_code element::get(element& value) const noexcept { value = element(tape); return SUCCESS; } @@ -7306,7 +7401,7 @@ namespace simdjson { } template - simdjson_really_inline bool element::is() const noexcept { + simdjson_inline bool element::is() const noexcept { auto result = get(); return !result.error(); } @@ -7361,6 +7456,7 @@ namespace simdjson { } inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::START_OBJECT: return object(tape).at_pointer(json_pointer); @@ -7396,6 +7492,7 @@ namespace simdjson { } inline bool element::dump_raw_tape(std::ostream& out) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.doc->dump_raw_tape(out); } @@ -7524,7 +7621,7 @@ namespace simdjson { } #endif - simdjson_really_inline document_stream::document_stream( + simdjson_inline document_stream::document_stream( dom::parser& _parser, const uint8_t* _buf, size_t _len, @@ -7546,7 +7643,7 @@ namespace simdjson { #endif } - simdjson_really_inline document_stream::document_stream() noexcept + simdjson_inline document_stream::document_stream() noexcept : parser{ nullptr }, buf{ nullptr }, len{ 0 }, @@ -7558,31 +7655,31 @@ namespace simdjson { { } - simdjson_really_inline document_stream::~document_stream() noexcept { + simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); #endif } - simdjson_really_inline document_stream::iterator::iterator() noexcept + simdjson_inline document_stream::iterator::iterator() noexcept : stream{ nullptr }, finished{ true } { } - simdjson_really_inline document_stream::iterator document_stream::begin() noexcept { + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } - simdjson_really_inline document_stream::iterator document_stream::end() noexcept { + simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } - simdjson_really_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{ _stream }, finished{ is_end } { } - simdjson_really_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { + simdjson_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { // Note that in case of error, we do not yet mark // the iterator as "finished": this detection is done // in the operator++ function since it is possible @@ -7592,7 +7689,7 @@ namespace simdjson { return stream->parser->doc.root(); } - simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond @@ -7619,7 +7716,7 @@ namespace simdjson { return *this; } - simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator& other) const noexcept { + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator& other) const noexcept { return finished != other.finished; } @@ -7650,11 +7747,11 @@ namespace simdjson { next(); } - simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept { + simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } - simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { + simdjson_inline std::string_view document_stream::iterator::source() const noexcept { const char* start = reinterpret_cast(stream->buf) + current_index(); bool object_or_array = ((*start == '[') || (*start == '{')); if (object_or_array) { @@ -7750,32 +7847,32 @@ namespace simdjson { } // namespace dom - simdjson_really_inline simdjson_result::simdjson_result() noexcept + simdjson_inline simdjson_result::simdjson_result() noexcept : simdjson_result_base() { } - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : simdjson_result_base(error) { } - simdjson_really_inline simdjson_result::simdjson_result(dom::document_stream&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(dom::document_stream&& value) noexcept : simdjson_result_base(std::forward(value)) { } #if SIMDJSON_EXCEPTIONS - simdjson_really_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { + simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } - simdjson_really_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { + simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } #else // SIMDJSON_EXCEPTIONS #ifndef SIMDJSON_DISABLE_DEPRECATED_API - simdjson_really_inline dom::document_stream::iterator simdjson_result::begin() noexcept { + simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept { first.error = error(); return first.begin(); } - simdjson_really_inline dom::document_stream::iterator simdjson_result::end() noexcept { + simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept { first.error = error(); return first.end(); } @@ -7923,7 +8020,7 @@ namespace simdjson { << " (start of the scope)\n"; break; case 'r': // we start and end with the root node - // should we be hitting the root node? + // should we be hitting the root node? return false; default: return false; @@ -7954,11 +8051,11 @@ namespace simdjson { // // simdjson_result inline implementation // - simdjson_really_inline simdjson_result::simdjson_result() noexcept + simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} - simdjson_really_inline simdjson_result::simdjson_result(dom::object value) noexcept + simdjson_inline simdjson_result::simdjson_result(dom::object value) noexcept : internal::simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { @@ -8004,15 +8101,18 @@ namespace simdjson { // // object inline implementation // - simdjson_really_inline object::object() noexcept : tape{} {} - simdjson_really_inline object::object(const internal::tape_ref& _tape) noexcept : tape{ _tape } { } + simdjson_inline object::object() noexcept : tape{} {} + simdjson_inline object::object(const internal::tape_ref& _tape) noexcept : tape{ _tape } { } inline object::iterator object::begin() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.json_index + 1); } inline object::iterator object::end() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.after_element() - 1); } inline size_t object::size() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.scope_count(); } @@ -8023,6 +8123,7 @@ namespace simdjson { return at_key(key); } inline simdjson_result object::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if (json_pointer.empty()) { // an empty string means that we return the current node return element(this->tape); // copy the current node } @@ -8093,7 +8194,7 @@ namespace simdjson { // // object::iterator inline implementation // - simdjson_really_inline object::iterator::iterator(const internal::tape_ref& _tape) noexcept : tape{ _tape } { } + simdjson_inline object::iterator::iterator(const internal::tape_ref& _tape) noexcept : tape{ _tape } { } inline const key_value_pair object::iterator::operator*() const noexcept { return key_value_pair(key(), value()); } @@ -8667,7 +8768,7 @@ namespace simdjson { } } else { // regular numeric index - // The index can't have a leading '0' + // The index can't have a leading '0' if (key_or_index[0] == '0' && key_or_index.length() > 1) { return false; } @@ -8707,12 +8808,12 @@ namespace simdjson { // // parser inline implementation // - simdjson_really_inline parser::parser(size_t max_capacity) noexcept + simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{ max_capacity }, loaded_bytes(nullptr) { } - simdjson_really_inline parser::parser(parser&& other) noexcept = default; - simdjson_really_inline parser& parser::operator=(parser&& other) noexcept = default; + simdjson_inline parser::parser(parser&& other) noexcept = default; + simdjson_inline parser& parser::operator=(parser&& other) noexcept = default; inline bool parser::is_valid() const noexcept { return valid; } inline int parser::get_error_code() const noexcept { return error; } @@ -8734,11 +8835,17 @@ namespace simdjson { } // Get the file size - if (std::fseek(fp, 0, SEEK_END) < 0) { + int ret; +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if (ret < 0) { std::fclose(fp); return IO_ERROR; } -#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS __int64 len = _ftelli64(fp); if (len == -1L) { std::fclose(fp); @@ -8810,13 +8917,13 @@ namespace simdjson { return provided_doc.root(); } - simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const char* buf, size_t len, bool realloc_if_needed) & noexcept { + simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const char* buf, size_t len, bool realloc_if_needed) & noexcept { return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); } - simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string& s) & noexcept { + simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string& s) & noexcept { return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); } - simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string& s) & noexcept { + simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string& s) & noexcept { return parse_into_document(provided_doc, s.data(), s.length(), false); } @@ -8825,15 +8932,18 @@ namespace simdjson { return parse_into_document(doc, buf, len, realloc_if_needed); } - simdjson_really_inline simdjson_result parser::parse(const char* buf, size_t len, bool realloc_if_needed) & noexcept { + simdjson_inline simdjson_result parser::parse(const char* buf, size_t len, bool realloc_if_needed) & noexcept { return parse(reinterpret_cast(buf), len, realloc_if_needed); } - simdjson_really_inline simdjson_result parser::parse(const std::string& s) & noexcept { + simdjson_inline simdjson_result parser::parse(const std::string& s) & noexcept { return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); } - simdjson_really_inline simdjson_result parser::parse(const padded_string& s) & noexcept { + simdjson_inline simdjson_result parser::parse(const padded_string& s) & noexcept { return parse(s.data(), s.length(), false); } + simdjson_inline simdjson_result parser::parse(const padded_string_view& v) & noexcept { + return parse(v.data(), v.length(), false); + } inline simdjson_result parser::parse_many(const uint8_t* buf, size_t len, size_t batch_size) noexcept { if (batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } @@ -8849,13 +8959,13 @@ namespace simdjson { return parse_many(s.data(), s.length(), batch_size); } - simdjson_really_inline size_t parser::capacity() const noexcept { + simdjson_inline size_t parser::capacity() const noexcept { return implementation ? implementation->capacity() : 0; } - simdjson_really_inline size_t parser::max_capacity() const noexcept { + simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } - simdjson_really_inline size_t parser::max_depth() const noexcept { + simdjson_inline size_t parser::max_depth() const noexcept { return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; } @@ -8909,8 +9019,8 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if (max_capacity < MINIMAL_DOCUMENT_CAPACITY) { + simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if (max_capacity > MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { @@ -8935,39 +9045,41 @@ namespace simdjson { // // tape_ref inline implementation // - simdjson_really_inline tape_ref::tape_ref() noexcept : doc{ nullptr }, json_index{ 0 } {} - simdjson_really_inline tape_ref::tape_ref(const dom::document* _doc, size_t _json_index) noexcept : doc{ _doc }, json_index{ _json_index } {} + simdjson_inline tape_ref::tape_ref() noexcept : doc{ nullptr }, json_index{ 0 } {} + simdjson_inline tape_ref::tape_ref(const dom::document* _doc, size_t _json_index) noexcept : doc{ _doc }, json_index{ _json_index } {} - simdjson_really_inline bool tape_ref::is_document_root() const noexcept { + simdjson_inline bool tape_ref::is_document_root() const noexcept { return json_index == 1; // should we ever change the structure of the tape, this should get updated. } - + simdjson_inline bool tape_ref::usable() const noexcept { + return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed). + } // Some value types have a specific on-tape word value. It can be faster // to check the type by doing a word-to-word comparison instead of extracting the // most significant 8 bits. - simdjson_really_inline bool tape_ref::is_double() const noexcept { + simdjson_inline bool tape_ref::is_double() const noexcept { constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE) << 56; return doc->tape[json_index] == tape_double; } - simdjson_really_inline bool tape_ref::is_int64() const noexcept { + simdjson_inline bool tape_ref::is_int64() const noexcept { constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64) << 56; return doc->tape[json_index] == tape_int64; } - simdjson_really_inline bool tape_ref::is_uint64() const noexcept { + simdjson_inline bool tape_ref::is_uint64() const noexcept { constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64) << 56; return doc->tape[json_index] == tape_uint64; } - simdjson_really_inline bool tape_ref::is_false() const noexcept { + simdjson_inline bool tape_ref::is_false() const noexcept { constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE) << 56; return doc->tape[json_index] == tape_false; } - simdjson_really_inline bool tape_ref::is_true() const noexcept { + simdjson_inline bool tape_ref::is_true() const noexcept { constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE) << 56; return doc->tape[json_index] == tape_true; } - simdjson_really_inline bool tape_ref::is_null_on_tape() const noexcept { + simdjson_inline bool tape_ref::is_null_on_tape() const noexcept { constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE) << 56; return doc->tape[json_index] == tape_null; } @@ -8985,21 +9097,21 @@ namespace simdjson { return json_index + 1; } } - simdjson_really_inline tape_type tape_ref::tape_ref_type() const noexcept { + simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept { return static_cast(doc->tape[json_index] >> 56); } - simdjson_really_inline uint64_t internal::tape_ref::tape_value() const noexcept { + simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept { return doc->tape[json_index] & internal::JSON_VALUE_MASK; } - simdjson_really_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { + simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { return uint32_t(doc->tape[json_index]); } - simdjson_really_inline uint32_t internal::tape_ref::scope_count() const noexcept { + simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept { return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); } template - simdjson_really_inline T tape_ref::next_tape_value() const noexcept { + simdjson_inline T tape_ref::next_tape_value() const noexcept { static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); // Though the following is tempting... // return *reinterpret_cast(&doc->tape[json_index + 1]); @@ -9010,14 +9122,14 @@ namespace simdjson { return x; } - simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { + simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept { size_t string_buf_index = size_t(tape_value()); uint32_t len; std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); return len; } - simdjson_really_inline const char* internal::tape_ref::get_c_str() const noexcept { + simdjson_inline const char* internal::tape_ref::get_c_str() const noexcept { size_t string_buf_index = size_t(tape_value()); return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); } @@ -9144,19 +9256,19 @@ namespace simdjson { * Minifier/formatter code. **/ - simdjson_really_inline void mini_formatter::number(uint64_t x) { + simdjson_inline void mini_formatter::number(uint64_t x) { char number_buffer[24]; char* newp = fast_itoa(number_buffer, x); buffer.insert(buffer.end(), number_buffer, newp); } - simdjson_really_inline void mini_formatter::number(int64_t x) { + simdjson_inline void mini_formatter::number(int64_t x) { char number_buffer[24]; char* newp = fast_itoa(number_buffer, x); buffer.insert(buffer.end(), number_buffer, newp); } - simdjson_really_inline void mini_formatter::number(double x) { + simdjson_inline void mini_formatter::number(double x) { char number_buffer[24]; // Currently, passing the nullptr to the second argument is // safe because our implementation does not check the second @@ -9165,31 +9277,31 @@ namespace simdjson { buffer.insert(buffer.end(), number_buffer, newp); } - simdjson_really_inline void mini_formatter::start_array() { one_char('['); } - simdjson_really_inline void mini_formatter::end_array() { one_char(']'); } - simdjson_really_inline void mini_formatter::start_object() { one_char('{'); } - simdjson_really_inline void mini_formatter::end_object() { one_char('}'); } - simdjson_really_inline void mini_formatter::comma() { one_char(','); } + simdjson_inline void mini_formatter::start_array() { one_char('['); } + simdjson_inline void mini_formatter::end_array() { one_char(']'); } + simdjson_inline void mini_formatter::start_object() { one_char('{'); } + simdjson_inline void mini_formatter::end_object() { one_char('}'); } + simdjson_inline void mini_formatter::comma() { one_char(','); } - simdjson_really_inline void mini_formatter::true_atom() { + simdjson_inline void mini_formatter::true_atom() { const char* s = "true"; buffer.insert(buffer.end(), s, s + 4); } - simdjson_really_inline void mini_formatter::false_atom() { + simdjson_inline void mini_formatter::false_atom() { const char* s = "false"; buffer.insert(buffer.end(), s, s + 5); } - simdjson_really_inline void mini_formatter::null_atom() { + simdjson_inline void mini_formatter::null_atom() { const char* s = "null"; buffer.insert(buffer.end(), s, s + 4); } - simdjson_really_inline void mini_formatter::one_char(char c) { buffer.push_back(c); } - simdjson_really_inline void mini_formatter::key(std::string_view unescaped) { + simdjson_inline void mini_formatter::one_char(char c) { buffer.push_back(c); } + simdjson_inline void mini_formatter::key(std::string_view unescaped) { string(unescaped); one_char(':'); } - simdjson_really_inline void mini_formatter::string(std::string_view unescaped) { + simdjson_inline void mini_formatter::string(std::string_view unescaped) { one_char('\"'); size_t i = 0; // Fast path for the case where we have no control character, no ", and no backslash. @@ -9275,7 +9387,7 @@ namespace simdjson { buffer.clear(); } - simdjson_really_inline std::string_view mini_formatter::str() const { + simdjson_inline std::string_view mini_formatter::str() const { return std::string_view(buffer.data(), buffer.size()); } @@ -9368,17 +9480,17 @@ namespace simdjson { case tape_type::INT64: format.number(iter.next_tape_value()); iter.json_index++; // numbers take up 2 spots, so we need to increment - // extra + // extra break; case tape_type::UINT64: format.number(iter.next_tape_value()); iter.json_index++; // numbers take up 2 spots, so we need to increment - // extra + // extra break; case tape_type::DOUBLE: format.number(iter.next_tape_value()); iter.json_index++; // numbers take up 2 spots, so we need to increment - // extra + // extra break; case tape_type::TRUE_VALUE: format.true_atom(); @@ -9447,18 +9559,18 @@ namespace simdjson { } template - simdjson_really_inline void string_builder::append(simdjson::dom::key_value_pair kv) { + simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { format.key(kv.key); append(kv.value); } template - simdjson_really_inline void string_builder::clear() { + simdjson_inline void string_builder::clear() { format.clear(); } template - simdjson_really_inline std::string_view string_builder::str() const { + simdjson_inline std::string_view string_builder::str() const { return format.str(); } @@ -9656,8 +9768,13 @@ namespace simdjson { // table modified and copied from // Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected // at runtime. #ifndef SIMDJSON_IMPLEMENTATION_HASWELL +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +// if icelake is always available, never enable haswell. +#define SIMDJSON_IMPLEMENTATION_HASWELL 0 +#else #define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 #endif +#endif #ifdef _MSC_VER // To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see // https://github.com/simdjson/simdjson/issues/1247 @@ -9666,9 +9783,14 @@ namespace simdjson { // table modified and copied from #define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) #endif -// Default Westmere to on if this is x86-64. Note that the macro SIMDJSON_REQUIRES_HASWELL appears unused. +// Default Westmere to on if this is x86-64. #ifndef SIMDJSON_IMPLEMENTATION_WESTMERE -#define SIMDJSON_IMPLEMENTATION_WESTMERE (SIMDJSON_IS_X86_64 && !SIMDJSON_REQUIRES_HASWELL) +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// if icelake or haswell are always available, never enable westmere. +#define SIMDJSON_IMPLEMENTATION_WESTMERE 0 +#else +#define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 +#endif #endif #define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) @@ -9679,7 +9801,12 @@ namespace simdjson { // table modified and copied from // Default Fallback to on unless a builtin implementation has already been selected. #ifndef SIMDJSON_IMPLEMENTATION_FALLBACK -#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 // (!SIMDJSON_CAN_ALWAYS_RUN_ARM64 && !SIMDJSON_CAN_ALWAYS_RUN_HASWELL && !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE && !SIMDJSON_CAN_ALWAYS_RUN_PPC64) +#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 +// if anything at all except fallback can always run, then disable fallback. +#define SIMDJSON_IMPLEMENTATION_FALLBACK 0 +#else +#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 +#endif #endif #define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK @@ -9715,9 +9842,12 @@ namespace simdjson { using namespace simdjson::dom; } + /** + * @private + */ class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, @@ -9775,11 +9905,12 @@ namespace simdjson { simdjson_warn_unused error_code stage1(const uint8_t* buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document& doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document& doc) noexcept final; - simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst) const noexcept final; + simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -9828,6 +9959,8 @@ namespace simdjson { // you use visual studio or other compilers. #include +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + #endif // SIMDJSON_ARM64_INTRINSICS_H /* end file include/simdjson/arm64/intrinsics.h */ /* begin file include/simdjson/arm64/bitmanipulation.h */ @@ -9842,7 +9975,11 @@ namespace simdjson { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED - simdjson_really_inline int trailing_zeroes(uint64_t input_num) { + // This function can be used safely even if not all bytes have been + // initialized. + // See issue https://github.com/simdjson/simdjson/issues/1965 + SIMDJSON_NO_SANITIZE_MEMORY + simdjson_inline int trailing_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) @@ -9855,12 +9992,12 @@ namespace simdjson { } /* result might be undefined when input_num is zero */ - simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num - 1); } /* result might be undefined when input_num is zero */ - simdjson_really_inline int leading_zeroes(uint64_t input_num) { + simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) @@ -9875,7 +10012,7 @@ namespace simdjson { } /* result might be undefined when input_num is zero */ - simdjson_really_inline int count_ones(uint64_t input_num) { + simdjson_inline int count_ones(uint64_t input_num) { return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); } @@ -9894,7 +10031,7 @@ namespace simdjson { #define SIMDJSON_PREFER_REVERSE_BITS 1 /* reverse the bits */ - simdjson_really_inline uint64_t reverse_bits(uint64_t input_num) { + simdjson_inline uint64_t reverse_bits(uint64_t input_num) { uint64_t rev_bits; __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); return rev_bits; @@ -9907,13 +10044,13 @@ namespace simdjson { * of such undefined behavior is never used. **/ SIMDJSON_NO_SANITIZE_UNDEFINED - simdjson_really_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); } #endif - simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t* result) { + simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t* result) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO * result = value1 + value2; return *result < value1; @@ -9942,7 +10079,7 @@ namespace simdjson { // // For example, prefix_xor(00100100) == 00011100 // - simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { + simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { ///////////// // We could do this with PMULL, but it is apparently slow. // @@ -9999,7 +10136,7 @@ namespace simdjson { * You should not use this function except for compile-time constants: * it is not efficient. */ - simdjson_really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + simdjson_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { @@ -10028,7 +10165,7 @@ namespace simdjson { return x; } - simdjson_really_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + simdjson_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { uint8x8_t x{}; x = vset_lane_u8(x1, x, 0); @@ -10043,7 +10180,7 @@ namespace simdjson { } // We have to do the same work for make_int8x16_t - simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, + simdjson_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7, int8_t x8, int8_t x9, int8_t x10, int8_t x11, int8_t x12, int8_t x13, int8_t x14, int8_t x15, int8_t x16) { @@ -10089,24 +10226,24 @@ namespace simdjson { static const int SIZE = sizeof(value); // Conversion from/to SIMD register - simdjson_really_inline base_u8(const uint8x16_t _value) : value(_value) {} - simdjson_really_inline operator const uint8x16_t& () const { return this->value; } - simdjson_really_inline operator uint8x16_t& () { return this->value; } + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t& () const { return this->value; } + simdjson_inline operator uint8x16_t& () { return this->value; } // Bit operations - simdjson_really_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } - simdjson_really_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } - simdjson_really_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } - simdjson_really_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - simdjson_really_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + simdjson_inline simd8 prev(const simd8 prev_chunk) const { return vextq_u8(prev_chunk, *this, 16 - N); } }; @@ -10117,17 +10254,17 @@ namespace simdjson { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; - static simdjson_really_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } - simdjson_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} // False constructor - simdjson_really_inline simd8() : simd8(vdupq_n_u8(0)) {} + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} // Splat constructor - simdjson_really_inline simd8(bool _value) : simd8(splat(_value)) {} + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} // We return uint32_t instead of uint16_t because that seems to be more efficient for most // purposes (cutting it down to uint16_t costs performance in some compilers). - simdjson_really_inline uint32_t to_bitmask() const { + simdjson_inline uint32_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); @@ -10141,26 +10278,26 @@ namespace simdjson { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } - simdjson_really_inline bool any() const { return vmaxvq_u8(*this) != 0; } + simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } }; // Unsigned bytes template<> struct simd8 : base_u8 { - static simdjson_really_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } - static simdjson_really_inline uint8x16_t zero() { return vdupq_n_u8(0); } - static simdjson_really_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } - simdjson_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} // Zero constructor - simdjson_really_inline simd8() : simd8(zero()) {} + simdjson_inline simd8() : simd8(zero()) {} // Array constructor - simdjson_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Member-by-member initialization #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(make_uint8x16_t( @@ -10168,7 +10305,7 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15 )) {} #else - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(uint8x16_t{ @@ -10178,55 +10315,55 @@ namespace simdjson { #endif // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } // Store to array - simdjson_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } // Order-specific operations - simdjson_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); } - simdjson_really_inline uint8_t min_val() const { return vminvq_u8(*this); } - simdjson_really_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } - simdjson_really_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } - simdjson_really_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } // Bit-specific operations - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template - simdjson_really_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } template - simdjson_really_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return lookup_table.apply_lookup_16_to(*this); } @@ -10239,7 +10376,7 @@ namespace simdjson { // signature simd8 compress(uint16_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint16_t mask, L* output) const { + simdjson_inline void compress(uint16_t mask, L* output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; @@ -10276,7 +10413,7 @@ namespace simdjson { // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a // bitset) to output1, then those corresponding to a 0 in the high half to output2. template - simdjson_really_inline void compress_halves(uint16_t mask, L* output1, L* output2) const { + simdjson_inline void compress_halves(uint16_t mask, L* output1, L* output2) const { using internal::thintable_epi8; uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits @@ -10295,7 +10432,7 @@ namespace simdjson { } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -10309,7 +10446,7 @@ namespace simdjson { } template - simdjson_really_inline simd8 apply_lookup_16_to(const simd8 original) { + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { return vqtbl1q_u8(*this, simd8(original)); } }; @@ -10319,24 +10456,24 @@ namespace simdjson { struct simd8 { int8x16_t value; - static simdjson_really_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } - static simdjson_really_inline simd8 zero() { return vdupq_n_s8(0); } - static simdjson_really_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } // Conversion from/to SIMD register - simdjson_really_inline simd8(const int8x16_t _value) : value{ _value } {} - simdjson_really_inline operator const int8x16_t& () const { return this->value; } - simdjson_really_inline operator int8x16_t& () { return this->value; } + simdjson_inline simd8(const int8x16_t _value) : value{ _value } {} + simdjson_inline operator const int8x16_t& () const { return this->value; } + simdjson_inline operator int8x16_t& () { return this->value; } // Zero constructor - simdjson_really_inline simd8() : simd8(zero()) {} + simdjson_inline simd8() : simd8(zero()) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(make_int8x16_t( @@ -10344,7 +10481,7 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15 )) {} #else - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(int8x16_t{ @@ -10353,18 +10490,18 @@ namespace simdjson { }) {} #endif // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } // Store to array - simdjson_really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } // Explicit conversion to/from unsigned // @@ -10372,35 +10509,35 @@ namespace simdjson { // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 // and relatively ugly and hard to read. #ifndef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline explicit simd8(const uint8x16_t other) : simd8(vreinterpretq_s8_u8(other)) {} + simdjson_inline explicit simd8(const uint8x16_t other) : simd8(vreinterpretq_s8_u8(other)) {} #endif - simdjson_really_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } // Math - simdjson_really_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } - simdjson_really_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + simdjson_inline simd8 prev(const simd8 prev_chunk) const { return vextq_s8(prev_chunk, *this, 16 - N); } // Perform a lookup assuming no value is larger than 16 template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return lookup_table.apply_lookup_16_to(*this); } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -10414,7 +10551,7 @@ namespace simdjson { } template - simdjson_really_inline simd8 apply_lookup_16_to(const simd8 original) { + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { return vqtbl1q_s8(*this, simd8(original)); } }; @@ -10429,22 +10566,22 @@ namespace simdjson { simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{ chunk0, chunk1, chunk2, chunk3 } {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{ simd8::load(ptr), simd8::load(ptr + 16), simd8::load(ptr + 32), simd8::load(ptr + 48) } {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{ chunk0, chunk1, chunk2, chunk3 } {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{ simd8::load(ptr), simd8::load(ptr + 16), simd8::load(ptr + 32), simd8::load(ptr + 48) } {} - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr + sizeof(simd8) * 0); this->chunks[1].store(ptr + sizeof(simd8) * 1); this->chunks[2].store(ptr + sizeof(simd8) * 2); this->chunks[3].store(ptr + sizeof(simd8) * 3); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_really_inline uint64_t compress(uint64_t mask, T* output) const { + simdjson_inline uint64_t compress(uint64_t mask, T* output) const { uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); // compute the prefix sum of the popcounts of each byte uint64_t offsets = popcounts * 0x0101010101010101; @@ -10455,7 +10592,7 @@ namespace simdjson { return offsets >> 56; } - simdjson_really_inline uint64_t to_bitmask() const { + simdjson_inline uint64_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = make_uint8x16_t( 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, @@ -10475,24 +10612,24 @@ namespace simdjson { return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask - ).to_bitmask(); + ).to_bitmask(); } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask - ).to_bitmask(); + ).to_bitmask(); } }; // struct simd8x64 @@ -10512,11 +10649,11 @@ namespace simdjson { // return non-zero if not a structural or whitespace char // zero otherwise - simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } - simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -10547,7 +10684,7 @@ namespace simdjson { // // Note: we assume that surrogates are treated separately // - simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { + simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -10567,7 +10704,7 @@ namespace simdjson { return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed + // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); @@ -10578,13 +10715,13 @@ namespace simdjson { return 0; // bad r } -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // - static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } - static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { + static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -10598,9 +10735,9 @@ namespace simdjson { using internal::value128; - simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -10608,7 +10745,7 @@ namespace simdjson { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); @@ -10634,13 +10771,13 @@ namespace simdjson { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. - simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused - simdjson_really_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { + simdjson_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -10648,36 +10785,36 @@ namespace simdjson { } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src) { return (str4ncmp(src + 1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src + 1, "alse"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -10703,18 +10840,18 @@ namespace simdjson { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote - simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { + simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); @@ -10736,9 +10873,6 @@ namespace simdjson { } // namespace arm64 } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_ARM64_STRINGPARSING_H /* end file include/simdjson/arm64/stringparsing.h */ /* begin file include/simdjson/arm64/numberparsing.h */ @@ -10751,7 +10885,7 @@ namespace simdjson { // we don't have SSE, so let us use a scalar function // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ - static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { + static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { uint64_t val; std::memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; @@ -10804,7 +10938,7 @@ namespace simdjson { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. - simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -10819,7 +10953,7 @@ namespace simdjson { // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. - simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { + simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -10880,7 +11014,7 @@ namespace simdjson { // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if (i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -10993,9 +11127,9 @@ namespace simdjson { // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 + // Here have that real_exponent <= 0 so -real_exponent >= 0 if (-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -11103,7 +11237,7 @@ namespace simdjson { // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html - simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { + simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -11130,7 +11264,7 @@ namespace simdjson { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later - simdjson_really_inline bool parse_digit(const uint8_t c, I & i) { + simdjson_inline bool parse_digit(const uint8_t c, I & i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -11140,7 +11274,7 @@ namespace simdjson { return true; } - simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { + simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -11168,7 +11302,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { + simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -11219,7 +11353,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t* start = start_digits; @@ -11229,7 +11363,7 @@ namespace simdjson { } template - simdjson_really_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { + simdjson_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -11265,11 +11399,12 @@ namespace simdjson { static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if ((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } @@ -11286,20 +11421,20 @@ namespace simdjson { #ifdef SIMDJSON_SKIPNUMBERPARSING template - simdjson_really_inline error_code parse_number(const uint8_t* const, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const, W & writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -11312,13 +11447,13 @@ namespace simdjson { // // Our objective is accurate parsing (ULP of 0) at high speed. template - simdjson_really_inline error_code parse_number(const uint8_t* const src, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const src, W & writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -11464,7 +11599,7 @@ namespace simdjson { NUMBER_ERROR }; // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -11514,7 +11649,7 @@ namespace simdjson { // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -11562,7 +11697,7 @@ namespace simdjson { } // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { const uint8_t* p = src + 1; // // Parse the integer part. @@ -11612,12 +11747,12 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -11655,13 +11790,13 @@ namespace simdjson { // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { // // Check for minus sign // if (src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -11698,24 +11833,24 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t* p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t* const start_digits = p; + const uint8_t* const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -11727,11 +11862,11 @@ namespace simdjson { // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + if (*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -11739,12 +11874,12 @@ namespace simdjson { return negative ? (~i + 1) : i; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -11812,19 +11947,19 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline bool is_negative(const uint8_t* src) noexcept { return (*src == '-'); } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -11832,9 +11967,9 @@ namespace simdjson { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -11858,13 +11993,13 @@ namespace simdjson { } // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { if (src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -11934,18 +12069,18 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -12013,7 +12148,7 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -12064,9 +12199,12 @@ namespace simdjson { using namespace simdjson::dom; } + /** + * @private + */ class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation( + simdjson_inline implementation() : simdjson::implementation( "fallback", "Generic fallback implementation", 0 @@ -12128,11 +12266,12 @@ namespace simdjson { simdjson_warn_unused error_code stage1(const uint8_t* buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document& doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document& doc) noexcept final; - simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst) const noexcept final; + simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -12201,7 +12340,7 @@ namespace simdjson { #endif /* result might be undefined when input_num is zero */ - simdjson_really_inline int leading_zeroes(uint64_t input_num) { + simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef _MSC_VER unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) @@ -12230,11 +12369,11 @@ namespace simdjson { // return non-zero if not a structural or whitespace char // zero otherwise - simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } - simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -12265,7 +12404,7 @@ namespace simdjson { // // Note: we assume that surrogates are treated separately // - simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { + simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -12285,7 +12424,7 @@ namespace simdjson { return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed + // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); @@ -12296,13 +12435,13 @@ namespace simdjson { return 0; // bad r } -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // - static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } - static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { + static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -12316,9 +12455,9 @@ namespace simdjson { using internal::value128; - simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -12326,7 +12465,7 @@ namespace simdjson { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); @@ -12352,13 +12491,13 @@ namespace simdjson { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. - simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused - simdjson_really_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { + simdjson_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -12366,36 +12505,36 @@ namespace simdjson { } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src) { return (str4ncmp(src + 1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src + 1, "alse"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -12419,17 +12558,17 @@ namespace simdjson { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); - simdjson_really_inline bool has_quote_first() { return c == '"'; } - simdjson_really_inline bool has_backslash() { return c == '\\'; } - simdjson_really_inline int quote_index() { return c == '"' ? 0 : 1; } - simdjson_really_inline int backslash_index() { return c == '\\' ? 0 : 1; } + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } uint8_t c; }; // struct backslash_and_quote - simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { + simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { // store to dest unconditionally - we can overwrite the bits we don't like later dst[0] = src[0]; return { src[0] }; @@ -12439,9 +12578,6 @@ namespace simdjson { } // namespace fallback } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_FALLBACK_STRINGPARSING_H /* end file include/simdjson/fallback/stringparsing.h */ /* begin file include/simdjson/fallback/numberparsing.h */ @@ -12459,14 +12595,14 @@ namespace simdjson { namespace fallback { namespace { // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ - static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const char* chars) { + static simdjson_inline uint32_t parse_eight_digits_unrolled(const char* chars) { uint64_t val; memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } - static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { + static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { return parse_eight_digits_unrolled(reinterpret_cast(chars)); } @@ -12515,7 +12651,7 @@ namespace simdjson { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. - simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -12530,7 +12666,7 @@ namespace simdjson { // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. - simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { + simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -12591,7 +12727,7 @@ namespace simdjson { // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if (i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -12704,9 +12840,9 @@ namespace simdjson { // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 + // Here have that real_exponent <= 0 so -real_exponent >= 0 if (-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -12814,7 +12950,7 @@ namespace simdjson { // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html - simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { + simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -12841,7 +12977,7 @@ namespace simdjson { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later - simdjson_really_inline bool parse_digit(const uint8_t c, I & i) { + simdjson_inline bool parse_digit(const uint8_t c, I & i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -12851,7 +12987,7 @@ namespace simdjson { return true; } - simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { + simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -12879,7 +13015,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { + simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -12930,7 +13066,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t* start = start_digits; @@ -12940,7 +13076,7 @@ namespace simdjson { } template - simdjson_really_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { + simdjson_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -12976,11 +13112,12 @@ namespace simdjson { static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if ((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } @@ -12997,20 +13134,20 @@ namespace simdjson { #ifdef SIMDJSON_SKIPNUMBERPARSING template - simdjson_really_inline error_code parse_number(const uint8_t* const, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const, W & writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -13023,13 +13160,13 @@ namespace simdjson { // // Our objective is accurate parsing (ULP of 0) at high speed. template - simdjson_really_inline error_code parse_number(const uint8_t* const src, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const src, W & writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -13175,7 +13312,7 @@ namespace simdjson { NUMBER_ERROR }; // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -13225,7 +13362,7 @@ namespace simdjson { // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -13273,7 +13410,7 @@ namespace simdjson { } // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { const uint8_t* p = src + 1; // // Parse the integer part. @@ -13323,12 +13460,12 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -13366,13 +13503,13 @@ namespace simdjson { // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { // // Check for minus sign // if (src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -13409,24 +13546,24 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t* p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t* const start_digits = p; + const uint8_t* const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -13438,11 +13575,11 @@ namespace simdjson { // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + if (*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -13450,12 +13587,12 @@ namespace simdjson { return negative ? (~i + 1) : i; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -13523,19 +13660,19 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline bool is_negative(const uint8_t* src) noexcept { return (*src == '-'); } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -13543,9 +13680,9 @@ namespace simdjson { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -13569,13 +13706,13 @@ namespace simdjson { } // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { if (src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -13645,18 +13782,18 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -13724,7 +13861,7 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -13783,9 +13920,12 @@ namespace simdjson { using namespace simdjson; + /** + * @private + */ class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation( + simdjson_inline implementation() : simdjson::implementation( "icelake", "Intel/AMD AVX512", internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 @@ -13809,14 +13949,14 @@ namespace simdjson { #define SIMDJSON_ICELAKE_INTRINSICS_H -#ifdef SIMDJSON_VISUAL_STUDIO +#if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO -#ifdef SIMDJSON_CLANG_VISUAL_STUDIO +#if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h @@ -13859,6 +13999,8 @@ namespace simdjson { #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + #endif // SIMDJSON_ICELAKE_INTRINSICS_H /* end file include/simdjson/icelake/intrinsics.h */ @@ -13908,11 +14050,12 @@ namespace simdjson { simdjson_warn_unused error_code stage1(const uint8_t* buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document& doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document& doc) noexcept final; - simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst) const noexcept final; + simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -13965,8 +14108,12 @@ namespace simdjson { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED - simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + // This function can be used safely even if not all bytes have been + // initialized. + // See issue https://github.com/simdjson/simdjson/issues/1965 + SIMDJSON_NO_SANITIZE_MEMORY + simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO return (int)_tzcnt_u64(input_num); #else // SIMDJSON_REGULAR_VISUAL_STUDIO //////// @@ -13979,29 +14126,29 @@ namespace simdjson { } /* result might be undefined when input_num is zero */ - simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return _blsr_u64(input_num); } /* result might be undefined when input_num is zero */ - simdjson_really_inline int leading_zeroes(uint64_t input_num) { + simdjson_inline int leading_zeroes(uint64_t input_num) { return int(_lzcnt_u64(input_num)); } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num);// Visual Studio wants two underscores } #else - simdjson_really_inline long long int count_ones(uint64_t input_num) { + simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif - simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, + simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t* result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else @@ -14029,7 +14176,7 @@ namespace simdjson { // // For example, prefix_xor(00100100) == 00011100 // - simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { + simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processor supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); @@ -14085,23 +14232,23 @@ namespace simdjson { __m512i value; // Zero constructor - simdjson_really_inline base() : value{ __m512i() } {} + simdjson_inline base() : value{ __m512i() } {} // Conversion from SIMD register - simdjson_really_inline base(const __m512i _value) : value(_value) {} + simdjson_inline base(const __m512i _value) : value(_value) {} // Conversion to SIMD register - simdjson_really_inline operator const __m512i& () const { return this->value; } - simdjson_really_inline operator __m512i& () { return this->value; } + simdjson_inline operator const __m512i& () const { return this->value; } + simdjson_inline operator __m512i& () { return this->value; } // Bit operations - simdjson_really_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } - simdjson_really_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } - simdjson_really_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } - simdjson_really_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } - simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. @@ -14113,8 +14260,8 @@ namespace simdjson { typedef uint32_t bitmask_t; typedef uint64_t bitmask2_t; - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m512i _value) : base>(_value) {} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); @@ -14123,39 +14270,35 @@ namespace simdjson { static const int SIZE = sizeof(base::value); template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { -#if SIMDJSON_GCC8 + simdjson_inline simd8 prev(const simd8 prev_chunk) const { // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) constexpr int shift = 16 - N; return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); -#else - return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), 16 - N); -#endif } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8 : base8 { - static simdjson_really_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m512i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} // Splat constructor - simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_really_inline bool any() const { return !!_mm512_test_epi8_mask(*this, *this); } - simdjson_really_inline simd8 operator~() const { return *this ^ true; } + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric : base8 { - static simdjson_really_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } - static simdjson_really_inline simd8 zero() { return _mm512_setzero_si512(); } - static simdjson_really_inline simd8 load(const T values[64]) { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { return _mm512_loadu_si512(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16( + static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { @@ -14168,27 +14311,27 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m512i _value) : base8(_value) {} + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} // Store to array - simdjson_really_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i*>(dst), *this); } + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i*>(dst), *this); } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm512_shuffle_epi8(lookup_table, *this); } @@ -14200,12 +14343,12 @@ namespace simdjson { // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint64_t mask, L* output) const { + simdjson_inline void compress(uint64_t mask, L* output) const { _mm512_mask_compressstoreu_epi8(output, ~mask, *this); } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -14222,14 +14365,14 @@ namespace simdjson { // Signed bytes template<> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m512i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, @@ -14250,7 +14393,7 @@ namespace simdjson { )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { @@ -14263,28 +14406,28 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other), _mm512_set1_epi8(uint8_t(0x80))); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this), _mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other), _mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this), _mm512_set1_epi8(uint8_t(0x80))); } }; // Unsigned bytes template<> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m512i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, @@ -14305,7 +14448,7 @@ namespace simdjson { )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { @@ -14318,46 +14461,46 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } // Order-specific operations - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } - simdjson_really_inline bool bits_not_set_anywhere() const { + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return !_mm512_test_epi8_mask(*this, *this); } - simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_really_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template - simdjson_really_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template - simdjson_really_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7 - N)); } + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7 - N)); } }; template @@ -14370,40 +14513,40 @@ namespace simdjson { simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{ chunk0, chunk1 } {} - simdjson_really_inline simd8x64(const simd8 chunk0) : chunks{ chunk0 } {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{ simd8::load(ptr) } {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{ chunk0, chunk1 } {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{ chunk0 } {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{ simd8::load(ptr) } {} - simdjson_really_inline uint64_t compress(uint64_t mask, T* output) const { + simdjson_inline uint64_t compress(uint64_t mask, T* output) const { this->chunks[0].compress(mask, output); return 64 - count_ones(mask); } - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr + sizeof(simd8) * 0); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return this->chunks[0]; } - simdjson_really_inline simd8x64 bit_or(const T m) const { + simdjson_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] | mask - ); + ); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return this->chunks[0] == mask; } - simdjson_really_inline uint64_t eq(const simd8x64& other) const { + simdjson_inline uint64_t eq(const simd8x64& other) const { return this->chunks[0] == other.chunks[0]; } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return this->chunks[0] <= mask; } @@ -14426,11 +14569,11 @@ namespace simdjson { // return non-zero if not a structural or whitespace char // zero otherwise - simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } - simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -14461,7 +14604,7 @@ namespace simdjson { // // Note: we assume that surrogates are treated separately // - simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { + simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -14481,7 +14624,7 @@ namespace simdjson { return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed + // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); @@ -14492,13 +14635,13 @@ namespace simdjson { return 0; // bad r } -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // - static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } - static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { + static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -14512,9 +14655,9 @@ namespace simdjson { using internal::value128; - simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -14522,7 +14665,7 @@ namespace simdjson { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); @@ -14548,13 +14691,13 @@ namespace simdjson { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. - simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused - simdjson_really_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { + simdjson_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -14562,36 +14705,36 @@ namespace simdjson { } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src) { return (str4ncmp(src + 1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src + 1, "alse"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -14617,18 +14760,18 @@ namespace simdjson { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint64_t bs_bits; uint64_t quote_bits; }; // struct backslash_and_quote - simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { + simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { // this can read up to 15 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); @@ -14645,9 +14788,6 @@ namespace simdjson { } // namespace icelake } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_ICELAKE_STRINGPARSING_H /* end file include/simdjson/icelake/stringparsing.h */ /* begin file include/simdjson/icelake/numberparsing.h */ @@ -14658,7 +14798,7 @@ namespace simdjson { namespace icelake { namespace { - static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { + static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = @@ -14721,7 +14861,7 @@ namespace simdjson { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. - simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -14736,7 +14876,7 @@ namespace simdjson { // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. - simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { + simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -14797,7 +14937,7 @@ namespace simdjson { // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if (i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -14910,9 +15050,9 @@ namespace simdjson { // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 + // Here have that real_exponent <= 0 so -real_exponent >= 0 if (-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -15020,7 +15160,7 @@ namespace simdjson { // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html - simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { + simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -15047,7 +15187,7 @@ namespace simdjson { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later - simdjson_really_inline bool parse_digit(const uint8_t c, I & i) { + simdjson_inline bool parse_digit(const uint8_t c, I & i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -15057,7 +15197,7 @@ namespace simdjson { return true; } - simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { + simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -15085,7 +15225,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { + simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -15136,7 +15276,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t* start = start_digits; @@ -15146,7 +15286,7 @@ namespace simdjson { } template - simdjson_really_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { + simdjson_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -15182,11 +15322,12 @@ namespace simdjson { static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if ((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } @@ -15203,20 +15344,20 @@ namespace simdjson { #ifdef SIMDJSON_SKIPNUMBERPARSING template - simdjson_really_inline error_code parse_number(const uint8_t* const, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const, W & writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -15229,13 +15370,13 @@ namespace simdjson { // // Our objective is accurate parsing (ULP of 0) at high speed. template - simdjson_really_inline error_code parse_number(const uint8_t* const src, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const src, W & writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -15381,7 +15522,7 @@ namespace simdjson { NUMBER_ERROR }; // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -15431,7 +15572,7 @@ namespace simdjson { // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -15479,7 +15620,7 @@ namespace simdjson { } // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { const uint8_t* p = src + 1; // // Parse the integer part. @@ -15529,12 +15670,12 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -15572,13 +15713,13 @@ namespace simdjson { // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { // // Check for minus sign // if (src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -15615,24 +15756,24 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t* p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t* const start_digits = p; + const uint8_t* const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -15644,11 +15785,11 @@ namespace simdjson { // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + if (*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -15656,12 +15797,12 @@ namespace simdjson { return negative ? (~i + 1) : i; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -15729,19 +15870,19 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline bool is_negative(const uint8_t* src) noexcept { return (*src == '-'); } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -15749,9 +15890,9 @@ namespace simdjson { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -15775,13 +15916,13 @@ namespace simdjson { } // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { if (src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -15851,18 +15992,18 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -15930,7 +16071,7 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -15990,9 +16131,12 @@ namespace simdjson { using namespace simdjson; + /** + * @private + */ class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation( + simdjson_inline implementation() : simdjson::implementation( "haswell", "Intel/AMD AVX2", internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 @@ -16016,14 +16160,14 @@ namespace simdjson { #define SIMDJSON_HASWELL_INTRINSICS_H -#ifdef SIMDJSON_VISUAL_STUDIO +#if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO -#ifdef SIMDJSON_CLANG_VISUAL_STUDIO +#if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h @@ -16058,6 +16202,8 @@ namespace simdjson { #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + #endif // SIMDJSON_HASWELL_INTRINSICS_H /* end file include/simdjson/haswell/intrinsics.h */ @@ -16107,11 +16253,12 @@ namespace simdjson { simdjson_warn_unused error_code stage1(const uint8_t* buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document& doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document& doc) noexcept final; - simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst) const noexcept final; + simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -16164,8 +16311,12 @@ namespace simdjson { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED - simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + // This function can be used safely even if not all bytes have been + // initialized. + // See issue https://github.com/simdjson/simdjson/issues/1965 + SIMDJSON_NO_SANITIZE_MEMORY + simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO return (int)_tzcnt_u64(input_num); #else // SIMDJSON_REGULAR_VISUAL_STUDIO //////// @@ -16178,29 +16329,29 @@ namespace simdjson { } /* result might be undefined when input_num is zero */ - simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return _blsr_u64(input_num); } /* result might be undefined when input_num is zero */ - simdjson_really_inline int leading_zeroes(uint64_t input_num) { + simdjson_inline int leading_zeroes(uint64_t input_num) { return int(_lzcnt_u64(input_num)); } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num);// Visual Studio wants two underscores } #else - simdjson_really_inline long long int count_ones(uint64_t input_num) { + simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif - simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, + simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t* result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else @@ -16228,7 +16379,7 @@ namespace simdjson { // // For example, prefix_xor(00100100) == 00011100 // - simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { + simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processor supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); @@ -16258,23 +16409,23 @@ namespace simdjson { __m256i value; // Zero constructor - simdjson_really_inline base() : value{ __m256i() } {} + simdjson_inline base() : value{ __m256i() } {} // Conversion from SIMD register - simdjson_really_inline base(const __m256i _value) : value(_value) {} + simdjson_inline base(const __m256i _value) : value(_value) {} // Conversion to SIMD register - simdjson_really_inline operator const __m256i& () const { return this->value; } - simdjson_really_inline operator __m256i& () { return this->value; } + simdjson_inline operator const __m256i& () const { return this->value; } + simdjson_inline operator __m256i& () { return this->value; } // Bit operations - simdjson_really_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } - simdjson_really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } - simdjson_really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } - simdjson_really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } - simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. @@ -16286,15 +16437,15 @@ namespace simdjson { typedef uint32_t bitmask_t; typedef uint64_t bitmask2_t; - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m256i _value) : base>(_value) {} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base::value); template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); } }; @@ -16302,27 +16453,27 @@ namespace simdjson { // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8 : base8 { - static simdjson_really_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m256i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} // Splat constructor - simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_really_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } - simdjson_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); } - simdjson_really_inline simd8 operator~() const { return *this ^ true; } + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric : base8 { - static simdjson_really_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } - static simdjson_really_inline simd8 zero() { return _mm256_setzero_si256(); } - static simdjson_really_inline simd8 load(const T values[32]) { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { return _mm256_loadu_si256(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16( + static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { @@ -16331,27 +16482,27 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m256i _value) : base8(_value) {} + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} // Store to array - simdjson_really_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), *this); } + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), *this); } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm256_shuffle_epi8(lookup_table, *this); } @@ -16363,7 +16514,7 @@ namespace simdjson { // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint32_t mask, L* output) const { + simdjson_inline void compress(uint32_t mask, L* output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; @@ -16409,7 +16560,7 @@ namespace simdjson { } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -16426,14 +16577,14 @@ namespace simdjson { // Signed bytes template<> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m256i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, @@ -16445,7 +16596,7 @@ namespace simdjson { v24, v25, v26, v27, v28, v29, v30, v31 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { @@ -16454,27 +16605,27 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m256i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, @@ -16486,7 +16637,7 @@ namespace simdjson { v24, v25, v26, v27, v28, v29, v30, v31 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { @@ -16495,43 +16646,43 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } // Order-specific operations - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } - simdjson_really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } - simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_really_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template - simdjson_really_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template - simdjson_really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7 - N)); } + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7 - N)); } }; template @@ -16544,10 +16695,10 @@ namespace simdjson { simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{ chunk0, chunk1 } {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{ simd8::load(ptr), simd8::load(ptr + 32) } {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{ chunk0, chunk1 } {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{ simd8::load(ptr), simd8::load(ptr + 32) } {} - simdjson_really_inline uint64_t compress(uint64_t mask, T* output) const { + simdjson_inline uint64_t compress(uint64_t mask, T* output) const { uint32_t mask1 = uint32_t(mask); uint32_t mask2 = uint32_t(mask >> 32); this->chunks[0].compress(mask1, output); @@ -16555,50 +16706,50 @@ namespace simdjson { return 64 - count_ones(mask); } - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr + sizeof(simd8) * 0); this->chunks[1].store(ptr + sizeof(simd8) * 1); } - simdjson_really_inline uint64_t to_bitmask() const { + simdjson_inline uint64_t to_bitmask() const { uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); uint64_t r_hi = this->chunks[1].to_bitmask(); return r_lo | (r_hi << 32); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return this->chunks[0] | this->chunks[1]; } - simdjson_really_inline simd8x64 bit_or(const T m) const { + simdjson_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] | mask, this->chunks[1] | mask - ); + ); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask - ).to_bitmask(); + ).to_bitmask(); } - simdjson_really_inline uint64_t eq(const simd8x64& other) const { + simdjson_inline uint64_t eq(const simd8x64& other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1] - ).to_bitmask(); + ).to_bitmask(); } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask - ).to_bitmask(); + ).to_bitmask(); } }; // struct simd8x64 @@ -16619,11 +16770,11 @@ namespace simdjson { // return non-zero if not a structural or whitespace char // zero otherwise - simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } - simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -16654,7 +16805,7 @@ namespace simdjson { // // Note: we assume that surrogates are treated separately // - simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { + simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -16674,7 +16825,7 @@ namespace simdjson { return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed + // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); @@ -16685,13 +16836,13 @@ namespace simdjson { return 0; // bad r } -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // - static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } - static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { + static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -16705,9 +16856,9 @@ namespace simdjson { using internal::value128; - simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -16715,7 +16866,7 @@ namespace simdjson { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); @@ -16741,13 +16892,13 @@ namespace simdjson { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. - simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused - simdjson_really_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { + simdjson_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -16755,36 +16906,36 @@ namespace simdjson { } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src) { return (str4ncmp(src + 1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src + 1, "alse"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -16810,18 +16961,18 @@ namespace simdjson { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote - simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { + simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { // this can read up to 15 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); @@ -16838,9 +16989,6 @@ namespace simdjson { } // namespace haswell } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_HASWELL_STRINGPARSING_H /* end file include/simdjson/haswell/stringparsing.h */ /* begin file include/simdjson/haswell/numberparsing.h */ @@ -16851,7 +16999,7 @@ namespace simdjson { namespace haswell { namespace { - static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { + static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = @@ -16914,7 +17062,7 @@ namespace simdjson { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. - simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -16929,7 +17077,7 @@ namespace simdjson { // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. - simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { + simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -16990,7 +17138,7 @@ namespace simdjson { // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if (i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -17103,9 +17251,9 @@ namespace simdjson { // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 + // Here have that real_exponent <= 0 so -real_exponent >= 0 if (-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -17213,7 +17361,7 @@ namespace simdjson { // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html - simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { + simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -17240,7 +17388,7 @@ namespace simdjson { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later - simdjson_really_inline bool parse_digit(const uint8_t c, I & i) { + simdjson_inline bool parse_digit(const uint8_t c, I & i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -17250,7 +17398,7 @@ namespace simdjson { return true; } - simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { + simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -17278,7 +17426,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { + simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -17329,7 +17477,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t* start = start_digits; @@ -17339,7 +17487,7 @@ namespace simdjson { } template - simdjson_really_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { + simdjson_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -17375,11 +17523,12 @@ namespace simdjson { static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if ((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } @@ -17396,20 +17545,20 @@ namespace simdjson { #ifdef SIMDJSON_SKIPNUMBERPARSING template - simdjson_really_inline error_code parse_number(const uint8_t* const, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const, W & writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -17422,13 +17571,13 @@ namespace simdjson { // // Our objective is accurate parsing (ULP of 0) at high speed. template - simdjson_really_inline error_code parse_number(const uint8_t* const src, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const src, W & writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -17574,7 +17723,7 @@ namespace simdjson { NUMBER_ERROR }; // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -17624,7 +17773,7 @@ namespace simdjson { // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -17672,7 +17821,7 @@ namespace simdjson { } // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { const uint8_t* p = src + 1; // // Parse the integer part. @@ -17722,12 +17871,12 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -17765,13 +17914,13 @@ namespace simdjson { // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { // // Check for minus sign // if (src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -17808,24 +17957,24 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t* p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t* const start_digits = p; + const uint8_t* const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -17837,11 +17986,11 @@ namespace simdjson { // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + if (*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -17849,12 +17998,12 @@ namespace simdjson { return negative ? (~i + 1) : i; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -17922,19 +18071,19 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline bool is_negative(const uint8_t* src) noexcept { return (*src == '-'); } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -17942,9 +18091,9 @@ namespace simdjson { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -17968,13 +18117,13 @@ namespace simdjson { } // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { if (src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -18044,18 +18193,18 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -18123,7 +18272,7 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -18174,9 +18323,12 @@ namespace simdjson { using namespace simdjson::dom; } // namespace + /** + * @private + */ class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() + simdjson_inline implementation() : simdjson::implementation("ppc64", "PPC64 ALTIVEC", internal::instruction_set::ALTIVEC) {} simdjson_warn_unused error_code create_dom_parser_implementation( @@ -18238,11 +18390,12 @@ namespace simdjson { simdjson_warn_unused error_code stage1(const uint8_t* buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document& doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document& doc) noexcept final; - simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst) const noexcept final; + simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -18301,6 +18454,8 @@ namespace simdjson { #undef vector #endif +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + #endif // SIMDJSON_PPC64_INTRINSICS_H /* end file include/simdjson/ppc64/intrinsics.h */ /* begin file include/simdjson/ppc64/bitmanipulation.h */ @@ -18315,8 +18470,12 @@ namespace simdjson { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED - simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + // This function can be used safely even if not all bytes have been + // initialized. + // See issue https://github.com/simdjson/simdjson/issues/1965 + SIMDJSON_NO_SANITIZE_MEMORY + simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). @@ -18328,13 +18487,13 @@ namespace simdjson { } /* result might be undefined when input_num is zero */ - simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num - 1); } /* result might be undefined when input_num is zero */ - simdjson_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). @@ -18347,20 +18506,20 @@ namespace simdjson { #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline int count_ones(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline int count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num); // Visual Studio wants two underscores } #else - simdjson_really_inline int count_ones(uint64_t input_num) { + simdjson_inline int count_ones(uint64_t input_num) { return __builtin_popcountll(input_num); } #endif - simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, + simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t* result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO * result = value1 + value2; return *result < value1; #else @@ -18389,7 +18548,7 @@ namespace simdjson { // // For example, prefix_xor(00100100) == 00011100 // - simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { + simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { // You can use the version below, however gcc sometimes miscompiles // vec_pmsum_be, it happens somewhere around between 8 and 9th version. // The performance boost was not noticeable, falling back to a usual @@ -18436,41 +18595,41 @@ namespace simdjson { __m128i value; // Zero constructor - simdjson_really_inline base() : value{ __m128i() } {} + simdjson_inline base() : value{ __m128i() } {} // Conversion from SIMD register - simdjson_really_inline base(const __m128i _value) : value(_value) {} + simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register - simdjson_really_inline operator const __m128i& () const { + simdjson_inline operator const __m128i& () const { return this->value; } - simdjson_really_inline operator __m128i& () { return this->value; } + simdjson_inline operator __m128i& () { return this->value; } // Bit operations - simdjson_really_inline Child operator|(const Child other) const { + simdjson_inline Child operator|(const Child other) const { return vec_or(this->value, (__m128i)other); } - simdjson_really_inline Child operator&(const Child other) const { + simdjson_inline Child operator&(const Child other) const { return vec_and(this->value, (__m128i)other); } - simdjson_really_inline Child operator^(const Child other) const { + simdjson_inline Child operator^(const Child other) const { return vec_xor(this->value, (__m128i)other); } - simdjson_really_inline Child bit_andnot(const Child other) const { + simdjson_inline Child bit_andnot(const Child other) const { return vec_andc(this->value, (__m128i)other); } - simdjson_really_inline Child& operator|=(const Child other) { + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; @@ -18485,17 +18644,17 @@ namespace simdjson { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m128i _value) : base>(_value) {} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); } static const int SIZE = sizeof(base>::value); template - simdjson_really_inline simd8 prev(simd8 prev_chunk) const { + simdjson_inline simd8 prev(simd8 prev_chunk) const { __m128i chunk = this->value; #ifdef __LITTLE_ENDIAN__ chunk = (__m128i)vec_reve(this->value); @@ -18511,18 +18670,18 @@ namespace simdjson { // SIMD byte mask type (returned by things like eq and gt) template <> struct simd8 : base8 { - static simdjson_really_inline simd8 splat(bool _value) { + static simdjson_inline simd8 splat(bool _value) { return (__m128i)vec_splats((unsigned char)(-(!!_value))); } - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m128i _value) + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor - simdjson_really_inline simd8(bool _value) + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_really_inline int to_bitmask() const { + simdjson_inline int to_bitmask() const { __vector unsigned long long result; const __m128i perm_mask = { 0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00 }; @@ -18535,25 +18694,25 @@ namespace simdjson { return static_cast(result[0]); #endif } - simdjson_really_inline bool any() const { + simdjson_inline bool any() const { return !vec_all_eq(this->value, (__m128i)vec_splats(0)); } - simdjson_really_inline simd8 operator~() const { + simdjson_inline simd8 operator~() const { return this->value ^ (__m128i)splat(true); } }; template struct base8_numeric : base8 { - static simdjson_really_inline simd8 splat(T value) { + static simdjson_inline simd8 splat(T value) { (void)value; return (__m128i)vec_splats(value); } - static simdjson_really_inline simd8 zero() { return splat(0); } - static simdjson_really_inline simd8 load(const T values[16]) { + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); } // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) { @@ -18561,30 +18720,30 @@ namespace simdjson { v14, v15); } - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m128i _value) + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array - simdjson_really_inline void store(T dst[16]) const { + simdjson_inline void store(T dst[16]) const { vec_vsx_st(this->value, 0, reinterpret_cast<__m128i*>(dst)); } // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { + simdjson_inline simd8 operator+(const simd8 other) const { return (__m128i)((__m128i)this->value + (__m128i)other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { + simdjson_inline simd8 operator-(const simd8 other) const { return (__m128i)((__m128i)this->value - (__m128i)other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast *>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast *>(this); } @@ -18592,7 +18751,7 @@ namespace simdjson { // Perform a lookup assuming the value is between 0 and 16 (undefined behavior // for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); } @@ -18603,7 +18762,7 @@ namespace simdjson { // seems like a function with the signature simd8 compress(uint32_t mask) // would be sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint16_t mask, L* output) const { + simdjson_inline void compress(uint16_t mask, L* output) const { using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; using internal::thintable_epi8; @@ -18644,7 +18803,7 @@ namespace simdjson { } template - simdjson_really_inline simd8 + simdjson_inline simd8 lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, @@ -18658,15 +18817,15 @@ namespace simdjson { // Signed bytes template <> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15) @@ -18676,7 +18835,7 @@ namespace simdjson { v15 }) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 + simdjson_inline static simd8 repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15) { @@ -18685,22 +18844,22 @@ namespace simdjson { } // Order-sensitive comparisons - simdjson_really_inline simd8 + simdjson_inline simd8 max_val(const simd8 other) const { return (__m128i)vec_max((__vector signed char)this->value, (__vector signed char)(__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 min_val(const simd8 other) const { return (__m128i)vec_min((__vector signed char)this->value, (__vector signed char)(__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 operator>(const simd8 other) const { return (__m128i)vec_cmpgt((__vector signed char)this->value, (__vector signed char)(__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 operator<(const simd8 other) const { return (__m128i)vec_cmplt((__vector signed char)this->value, (__vector signed char)(__m128i)other); @@ -18709,15 +18868,15 @@ namespace simdjson { // Unsigned bytes template <> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const uint8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline + simdjson_inline simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) @@ -18726,7 +18885,7 @@ namespace simdjson { v13, v14, v15 }) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 + simdjson_inline static simd8 repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, @@ -18736,82 +18895,82 @@ namespace simdjson { } // Saturated math - simdjson_really_inline simd8 + simdjson_inline simd8 saturating_add(const simd8 other) const { return (__m128i)vec_adds(this->value, (__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 saturating_sub(const simd8 other) const { return (__m128i)vec_subs(this->value, (__m128i)other); } // Order-specific operations - simdjson_really_inline simd8 + simdjson_inline simd8 max_val(const simd8 other) const { return (__m128i)vec_max(this->value, (__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 min_val(const simd8 other) const { return (__m128i)vec_min(this->value, (__m128i)other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline simd8 + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline simd8 + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { + simdjson_inline simd8 bits_not_set() const { return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool bits_not_set_anywhere() const { + simdjson_inline bool bits_not_set_anywhere() const { return vec_all_eq(this->value, (__m128i)vec_splats(0)); } - simdjson_really_inline bool any_bits_set_anywhere() const { + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return vec_all_eq(vec_and(this->value, (__m128i)bits), (__m128i)vec_splats(0)); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template simdjson_really_inline simd8 shr() const { + template simdjson_inline simd8 shr() const { return simd8( (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); } - template simdjson_really_inline simd8 shl() const { + template simdjson_inline simd8 shl() const { return simd8( (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); } @@ -18828,26 +18987,26 @@ namespace simdjson { operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{ chunk0, chunk1, chunk2, chunk3 } {} - simdjson_really_inline simd8x64(const T ptr[64]) + simdjson_inline simd8x64(const T ptr[64]) : chunks{ simd8::load(ptr), simd8::load(ptr + 16), simd8::load(ptr + 32), simd8::load(ptr + 48) } {} - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr + sizeof(simd8) * 0); this->chunks[1].store(ptr + sizeof(simd8) * 1); this->chunks[2].store(ptr + sizeof(simd8) * 2); this->chunks[3].store(ptr + sizeof(simd8) * 3); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_really_inline uint64_t compress(uint64_t mask, T* output) const { + simdjson_inline uint64_t compress(uint64_t mask, T* output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); @@ -18858,7 +19017,7 @@ namespace simdjson { return 64 - count_ones(mask); } - simdjson_really_inline uint64_t to_bitmask() const { + simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); uint64_t r1 = this->chunks[1].to_bitmask(); uint64_t r2 = this->chunks[2].to_bitmask(); @@ -18866,14 +19025,14 @@ namespace simdjson { return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask) .to_bitmask(); } - simdjson_really_inline uint64_t eq(const simd8x64& other) const { + simdjson_inline uint64_t eq(const simd8x64& other) const { return simd8x64(this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], this->chunks[2] == other.chunks[2], @@ -18881,7 +19040,7 @@ namespace simdjson { .to_bitmask(); } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask) @@ -18905,11 +19064,11 @@ namespace simdjson { // return non-zero if not a structural or whitespace char // zero otherwise - simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } - simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -18940,7 +19099,7 @@ namespace simdjson { // // Note: we assume that surrogates are treated separately // - simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { + simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -18960,7 +19119,7 @@ namespace simdjson { return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed + // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); @@ -18971,13 +19130,13 @@ namespace simdjson { return 0; // bad r } -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // - static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } - static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { + static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -18991,9 +19150,9 @@ namespace simdjson { using internal::value128; - simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -19001,7 +19160,7 @@ namespace simdjson { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); @@ -19027,13 +19186,13 @@ namespace simdjson { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. - simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused - simdjson_really_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { + simdjson_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -19041,36 +19200,36 @@ namespace simdjson { } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src) { return (str4ncmp(src + 1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src + 1, "alse"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -19096,17 +19255,17 @@ namespace simdjson { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); - simdjson_really_inline bool has_quote_first() { + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } @@ -19114,7 +19273,7 @@ namespace simdjson { uint32_t quote_bits; }; // struct backslash_and_quote - simdjson_really_inline backslash_and_quote + simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -19141,9 +19300,6 @@ namespace simdjson { } // namespace ppc64 } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_PPC64_STRINGPARSING_H /* end file include/simdjson/ppc64/stringparsing.h */ /* begin file include/simdjson/ppc64/numberparsing.h */ @@ -19162,7 +19318,7 @@ namespace simdjson { // we don't have appropriate instructions, so let us use a scalar function // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ - static simdjson_really_inline uint32_t + static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { uint64_t val; std::memcpy(&val, chars, sizeof(uint64_t)); @@ -19223,7 +19379,7 @@ namespace simdjson { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. - simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -19238,7 +19394,7 @@ namespace simdjson { // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. - simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { + simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -19299,7 +19455,7 @@ namespace simdjson { // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if (i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -19412,9 +19568,9 @@ namespace simdjson { // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 + // Here have that real_exponent <= 0 so -real_exponent >= 0 if (-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -19522,7 +19678,7 @@ namespace simdjson { // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html - simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { + simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -19549,7 +19705,7 @@ namespace simdjson { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later - simdjson_really_inline bool parse_digit(const uint8_t c, I & i) { + simdjson_inline bool parse_digit(const uint8_t c, I & i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -19559,7 +19715,7 @@ namespace simdjson { return true; } - simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { + simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -19587,7 +19743,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { + simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -19638,7 +19794,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t* start = start_digits; @@ -19648,7 +19804,7 @@ namespace simdjson { } template - simdjson_really_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { + simdjson_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -19684,11 +19840,12 @@ namespace simdjson { static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if ((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } @@ -19705,20 +19862,20 @@ namespace simdjson { #ifdef SIMDJSON_SKIPNUMBERPARSING template - simdjson_really_inline error_code parse_number(const uint8_t* const, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const, W & writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -19731,13 +19888,13 @@ namespace simdjson { // // Our objective is accurate parsing (ULP of 0) at high speed. template - simdjson_really_inline error_code parse_number(const uint8_t* const src, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const src, W & writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -19883,7 +20040,7 @@ namespace simdjson { NUMBER_ERROR }; // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -19933,7 +20090,7 @@ namespace simdjson { // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -19981,7 +20138,7 @@ namespace simdjson { } // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { const uint8_t* p = src + 1; // // Parse the integer part. @@ -20031,12 +20188,12 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -20074,13 +20231,13 @@ namespace simdjson { // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { // // Check for minus sign // if (src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -20117,24 +20274,24 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t* p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t* const start_digits = p; + const uint8_t* const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -20146,11 +20303,11 @@ namespace simdjson { // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + if (*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -20158,12 +20315,12 @@ namespace simdjson { return negative ? (~i + 1) : i; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -20231,19 +20388,19 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline bool is_negative(const uint8_t* src) noexcept { return (*src == '-'); } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -20251,9 +20408,9 @@ namespace simdjson { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -20277,13 +20434,13 @@ namespace simdjson { } // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { if (src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -20353,18 +20510,18 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -20432,7 +20589,7 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -20495,9 +20652,12 @@ namespace simdjson { using namespace simdjson::dom; } + /** + * @private + */ class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, @@ -20516,7 +20676,7 @@ namespace simdjson { #ifndef SIMDJSON_WESTMERE_INTRINSICS_H #define SIMDJSON_WESTMERE_INTRINSICS_H -#ifdef SIMDJSON_VISUAL_STUDIO +#if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else @@ -20524,7 +20684,7 @@ namespace simdjson { #endif // SIMDJSON_VISUAL_STUDIO -#ifdef SIMDJSON_CLANG_VISUAL_STUDIO +#if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h @@ -20537,17 +20697,17 @@ namespace simdjson { #include // for _mm_clmulepi64_si128 #endif - +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); #endif // SIMDJSON_WESTMERE_INTRINSICS_H - /* end file include/simdjson/westmere/intrinsics.h */ +/* end file include/simdjson/westmere/intrinsics.h */ - // - // The rest need to be inside the region - // - /* begin file include/simdjson/westmere/begin.h */ - // redefining SIMDJSON_IMPLEMENTATION to "westmere" - // #define SIMDJSON_IMPLEMENTATION westmere +// +// The rest need to be inside the region +// +/* begin file include/simdjson/westmere/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "westmere" +// #define SIMDJSON_IMPLEMENTATION westmere SIMDJSON_TARGET_WESTMERE /* end file include/simdjson/westmere/begin.h */ @@ -20588,11 +20748,12 @@ namespace simdjson { simdjson_warn_unused error_code stage1(const uint8_t* buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document& doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document& doc) noexcept final; - simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst) const noexcept final; + simdjson_warn_unused uint8_t* parse_string(const uint8_t* src, uint8_t* dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t* parse_wobbly_string(const uint8_t* src, uint8_t* dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -20645,8 +20806,12 @@ namespace simdjson { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED - simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + // This function can be used safely even if not all bytes have been + // initialized. + // See issue https://github.com/simdjson/simdjson/issues/1965 + SIMDJSON_NO_SANITIZE_MEMORY + simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). @@ -20658,13 +20823,13 @@ namespace simdjson { } /* result might be undefined when input_num is zero */ - simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num - 1); } /* result might be undefined when input_num is zero */ - simdjson_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). @@ -20677,20 +20842,20 @@ namespace simdjson { #endif// SIMDJSON_REGULAR_VISUAL_STUDIO } -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num);// Visual Studio wants two underscores } #else - simdjson_really_inline long long int count_ones(uint64_t input_num) { + simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif - simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, + simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t* result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else @@ -20718,7 +20883,7 @@ namespace simdjson { // // For example, prefix_xor(00100100) == 00011100 // - simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { + simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processing supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); @@ -20747,23 +20912,23 @@ namespace simdjson { __m128i value; // Zero constructor - simdjson_really_inline base() : value{ __m128i() } {} + simdjson_inline base() : value{ __m128i() } {} // Conversion from SIMD register - simdjson_really_inline base(const __m128i _value) : value(_value) {} + simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register - simdjson_really_inline operator const __m128i& () const { return this->value; } - simdjson_really_inline operator __m128i& () { return this->value; } + simdjson_inline operator const __m128i& () const { return this->value; } + simdjson_inline operator __m128i& () { return this->value; } // Bit operations - simdjson_really_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } - simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. @@ -20775,15 +20940,15 @@ namespace simdjson { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m128i _value) : base>(_value) {} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base>::value); template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm_alignr_epi8(*this, prev_chunk, 16 - N); } }; @@ -20791,54 +20956,54 @@ namespace simdjson { // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8 : base8 { - static simdjson_really_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m128i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor - simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_really_inline bool any() const { return !_mm_testz_si128(*this, *this); } - simdjson_really_inline simd8 operator~() const { return *this ^ true; } + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric : base8 { - static simdjson_really_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_really_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_really_inline simd8 load(const T values[16]) { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { return _mm_loadu_si128(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16( + static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m128i _value) : base8(_value) {} + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array - simdjson_really_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), *this); } + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), *this); } // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm_shuffle_epi8(lookup_table, *this); } @@ -20850,7 +21015,7 @@ namespace simdjson { // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint16_t mask, L* output) const { + simdjson_inline void compress(uint16_t mask, L* output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; @@ -20881,7 +21046,7 @@ namespace simdjson { } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -20898,14 +21063,14 @@ namespace simdjson { // Signed bytes template<> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(_mm_setr_epi8( @@ -20913,34 +21078,34 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const uint8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(_mm_setr_epi8( @@ -20948,50 +21113,50 @@ namespace simdjson { v8, v9, v10, v11, v12, v13, v14, v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 - ); + ); } // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } // Order-specific operations - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } - simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_really_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template - simdjson_really_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template - simdjson_really_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7 - N)); } + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7 - N)); } }; template @@ -21004,21 +21169,21 @@ namespace simdjson { simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{ chunk0, chunk1, chunk2, chunk3 } {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{ simd8::load(ptr), simd8::load(ptr + 16), simd8::load(ptr + 32), simd8::load(ptr + 48) } {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{ chunk0, chunk1, chunk2, chunk3 } {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{ simd8::load(ptr), simd8::load(ptr + 16), simd8::load(ptr + 32), simd8::load(ptr + 48) } {} - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr + sizeof(simd8) * 0); this->chunks[1].store(ptr + sizeof(simd8) * 1); this->chunks[2].store(ptr + sizeof(simd8) * 2); this->chunks[3].store(ptr + sizeof(simd8) * 3); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_really_inline uint64_t compress(uint64_t mask, T* output) const { + simdjson_inline uint64_t compress(uint64_t mask, T* output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); @@ -21026,7 +21191,7 @@ namespace simdjson { return 64 - count_ones(mask); } - simdjson_really_inline uint64_t to_bitmask() const { + simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); uint64_t r1 = this->chunks[1].to_bitmask(); uint64_t r2 = this->chunks[2].to_bitmask(); @@ -21034,33 +21199,33 @@ namespace simdjson { return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask - ).to_bitmask(); + ).to_bitmask(); } - simdjson_really_inline uint64_t eq(const simd8x64& other) const { + simdjson_inline uint64_t eq(const simd8x64& other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], this->chunks[2] == other.chunks[2], this->chunks[3] == other.chunks[3] - ).to_bitmask(); + ).to_bitmask(); } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask - ).to_bitmask(); + ).to_bitmask(); } }; // struct simd8x64 @@ -21080,11 +21245,11 @@ namespace simdjson { // return non-zero if not a structural or whitespace char // zero otherwise - simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } - simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -21115,7 +21280,7 @@ namespace simdjson { // // Note: we assume that surrogates are treated separately // - simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { + simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t* c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -21135,7 +21300,7 @@ namespace simdjson { return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed + // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); @@ -21146,13 +21311,13 @@ namespace simdjson { return 0; // bad r } -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // - static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } - static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { + static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t* hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -21166,9 +21331,9 @@ namespace simdjson { using internal::value128; - simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); @@ -21176,7 +21341,7 @@ namespace simdjson { #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); @@ -21202,13 +21367,13 @@ namespace simdjson { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. - simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused - simdjson_really_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { + simdjson_inline uint32_t str4ncmp(const uint8_t* src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -21216,36 +21381,36 @@ namespace simdjson { } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_true_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src) { return (str4ncmp(src + 1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_false_atom(const uint8_t* src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src + 1, "alse"); } else { return false; } } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused - simdjson_really_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { + simdjson_inline bool is_valid_null_atom(const uint8_t* src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -21270,18 +21435,18 @@ namespace simdjson { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t* src, uint8_t* dst); - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote - simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { + simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t* src, uint8_t* dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); @@ -21300,9 +21465,6 @@ namespace simdjson { } // namespace westmere } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_WESTMERE_STRINGPARSING_H /* end file include/simdjson/westmere/stringparsing.h */ /* begin file include/simdjson/westmere/numberparsing.h */ @@ -21313,7 +21475,7 @@ namespace simdjson { namespace westmere { namespace { - static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { + static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t* chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = @@ -21376,7 +21538,7 @@ namespace simdjson { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. - simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -21391,7 +21553,7 @@ namespace simdjson { // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. - simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { + simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double& d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -21452,7 +21614,7 @@ namespace simdjson { // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if (i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -21565,9 +21727,9 @@ namespace simdjson { // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 + // Here have that real_exponent <= 0 so -real_exponent >= 0 if (-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -21675,7 +21837,7 @@ namespace simdjson { // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html - simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { + simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t * chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -21702,7 +21864,7 @@ namespace simdjson { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later - simdjson_really_inline bool parse_digit(const uint8_t c, I & i) { + simdjson_inline bool parse_digit(const uint8_t c, I & i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -21712,7 +21874,7 @@ namespace simdjson { return true; } - simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { + simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t* const src, const uint8_t * &p, uint64_t & i, int64_t & exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -21740,7 +21902,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { + simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t* const src, const uint8_t * &p, int64_t & exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -21791,7 +21953,7 @@ namespace simdjson { return SUCCESS; } - simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t* start = start_digits; @@ -21801,7 +21963,7 @@ namespace simdjson { } template - simdjson_really_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { + simdjson_inline error_code write_float(const uint8_t* const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W & writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -21837,11 +21999,12 @@ namespace simdjson { static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if ((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } @@ -21858,20 +22021,20 @@ namespace simdjson { #ifdef SIMDJSON_SKIPNUMBERPARSING template - simdjson_really_inline error_code parse_number(const uint8_t* const, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const, W & writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* const src) noexcept { return 0; } + simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -21884,13 +22047,13 @@ namespace simdjson { // // Our objective is accurate parsing (ULP of 0) at high speed. template - simdjson_really_inline error_code parse_number(const uint8_t* const src, W & writer) { + simdjson_inline error_code parse_number(const uint8_t* const src, W & writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -22036,7 +22199,7 @@ namespace simdjson { NUMBER_ERROR }; // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -22086,7 +22249,7 @@ namespace simdjson { // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t* const src, const uint8_t* const src_end) noexcept { const uint8_t* p = src; // // Parse the integer part. @@ -22134,7 +22297,7 @@ namespace simdjson { } // Parse any number from 0 to 18,446,744,073,709,551,615 - simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t* const src) noexcept { const uint8_t* p = src + 1; // // Parse the integer part. @@ -22184,12 +22347,12 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -22227,13 +22390,13 @@ namespace simdjson { // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t* const src, const uint8_t* const src_end) noexcept { // // Check for minus sign // if (src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t* p = src + negative; + const uint8_t* p = src + uint8_t(negative); // // Parse the integer part. @@ -22270,24 +22433,24 @@ namespace simdjson { } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t* p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t* const start_digits = p; + const uint8_t* const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -22299,11 +22462,11 @@ namespace simdjson { // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + if (*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -22311,12 +22474,12 @@ namespace simdjson { return negative ? (~i + 1) : i; } - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -22384,19 +22547,19 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline bool is_negative(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline bool is_negative(const uint8_t* src) noexcept { return (*src == '-'); } - simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -22404,9 +22567,9 @@ namespace simdjson { return false; } - simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t* src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t* p = src; while (static_cast(*p - '0') <= 9) { p++; } if (p == src) { return NUMBER_ERROR; } @@ -22430,13 +22593,13 @@ namespace simdjson { } // Never read at src_end or beyond - simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t* src, const uint8_t* const src_end) noexcept { if (src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -22506,18 +22669,18 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } - simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { + simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t* src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -22585,7 +22748,7 @@ namespace simdjson { if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src - negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -22674,22 +22837,22 @@ namespace simdjson { /** * Create a new empty result with error = UNINITIALIZED. */ - simdjson_really_inline implementation_simdjson_result_base() noexcept = default; + simdjson_inline implementation_simdjson_result_base() noexcept = default; /** * Create a new error result. */ - simdjson_really_inline implementation_simdjson_result_base(error_code error) noexcept; + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ - simdjson_really_inline implementation_simdjson_result_base(T&& value) noexcept; + simdjson_inline implementation_simdjson_result_base(T&& value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ - simdjson_really_inline implementation_simdjson_result_base(T&& value, error_code error) noexcept; + simdjson_inline implementation_simdjson_result_base(T&& value, error_code error) noexcept; /** * Move the value and the error to the provided variables. @@ -22697,19 +22860,19 @@ namespace simdjson { * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ - simdjson_really_inline void tie(T& value, error_code& error) && noexcept; + simdjson_inline void tie(T& value, error_code& error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_really_inline error_code get(T& value) && noexcept; + simdjson_inline error_code get(T& value) && noexcept; /** * The error. */ - simdjson_really_inline error_code error() const noexcept; + simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -22718,28 +22881,28 @@ namespace simdjson { * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T& value() & noexcept(false); + simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& value() && noexcept(false); + simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& take_value() && noexcept(false); + simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline operator T && () && noexcept(false); + simdjson_inline operator T && () && noexcept(false); #endif // SIMDJSON_EXCEPTIONS @@ -22748,17 +22911,17 @@ namespace simdjson { * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline const T& value_unsafe() const& noexcept; + simdjson_inline const T& value_unsafe() const& noexcept; /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline T& value_unsafe() & noexcept; + simdjson_inline T& value_unsafe() & noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline T&& value_unsafe() && noexcept; + simdjson_inline T&& value_unsafe() && noexcept; protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -22822,46 +22985,46 @@ namespace simdjson { * unsigned_integer /// a positive integer larger or equal to 1<<63 * }; */ - simdjson_really_inline number_type get_number_type() const noexcept; + simdjson_inline number_type get_number_type() const noexcept; /** * return true if the automatically determined type of * the number is number_type::unsigned_integer. */ - simdjson_really_inline bool is_uint64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; /** * return the value as a uint64_t, only valid if is_uint64() is true. */ - simdjson_really_inline uint64_t get_uint64() const noexcept; - simdjson_really_inline operator uint64_t() const noexcept; + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::signed_integer. */ - simdjson_really_inline bool is_int64() const noexcept; + simdjson_inline bool is_int64() const noexcept; /** * return the value as a int64_t, only valid if is_int64() is true. */ - simdjson_really_inline int64_t get_int64() const noexcept; - simdjson_really_inline operator int64_t() const noexcept; + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::floating_point_number. */ - simdjson_really_inline bool is_double() const noexcept; + simdjson_inline bool is_double() const noexcept; /** * return the value as a double, only valid if is_double() is true. */ - simdjson_really_inline double get_double() const noexcept; - simdjson_really_inline operator double() const noexcept; + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; /** * Convert the number to a double. Though it always succeed, the conversion * may be lossy if the number cannot be represented exactly. */ - simdjson_really_inline double as_double() const noexcept; + simdjson_inline double as_double() const noexcept; protected: @@ -22878,13 +23041,13 @@ namespace simdjson { template friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t* src, W writer); /** Store a signed 64-bit value to the number. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Store an unsigned 64-bit value to the number. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Store a double value to the number. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** Specifies that the value is a double, but leave it undefined. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * End of friend declarations. */ @@ -22932,10 +23095,10 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson @@ -23034,7 +23197,7 @@ namespace simdjson { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline raw_json_string() noexcept = default; + simdjson_inline raw_json_string() noexcept = default; /** * Create a new invalid raw_json_string pointed at the given location in the JSON. @@ -23043,14 +23206,14 @@ namespace simdjson { * * It *must* be terminated by a ", and be a valid JSON string. */ - simdjson_really_inline raw_json_string(const uint8_t* _buf) noexcept; + simdjson_inline raw_json_string(const uint8_t* _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ - simdjson_really_inline const char* raw() const noexcept; + simdjson_inline const char* raw() const noexcept; /** * This compares the current instance to the std::string_view target: returns true if @@ -23066,7 +23229,7 @@ namespace simdjson { * Performance: the comparison may be done using memcmp which may be efficient * for long strings. */ - simdjson_really_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if @@ -23083,7 +23246,7 @@ namespace simdjson { * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ - simdjson_really_inline bool unsafe_is_equal(std::string_view target) const noexcept; + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if @@ -23097,27 +23260,27 @@ namespace simdjson { * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ - simdjson_really_inline bool unsafe_is_equal(const char* target) const noexcept; + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). */ - simdjson_really_inline bool is_equal(std::string_view target) const noexcept; + simdjson_inline bool is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). */ - simdjson_really_inline bool is_equal(const char* target) const noexcept; + simdjson_inline bool is_equal(const char* target) const noexcept; /** * Returns true if target is free from unescaped quote. If target is known at * compile-time, we might expect the computation to happen at compile time with * many compilers (not all!). */ - static simdjson_really_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; - static simdjson_really_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; private: @@ -23126,15 +23289,29 @@ namespace simdjson { * This will set the inner pointer to zero, effectively making * this instance unusable. */ - simdjson_really_inline void consume() noexcept { buf = nullptr; } + simdjson_inline void consume() noexcept { buf = nullptr; } /** * Checks whether the inner pointer is non-null and thus usable. */ - simdjson_really_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator& iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ * * ## IMPORTANT: string_view lifetime * @@ -23142,8 +23319,7 @@ namespace simdjson { * * @param iter A json_iterator, which contains a buffer where the string will be written. */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(json_iterator& iter) const noexcept; - + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator& iter) const noexcept; const uint8_t* buf{}; friend class object; friend class field; @@ -23151,16 +23327,16 @@ namespace simdjson { friend struct simdjson_result; }; - simdjson_unused simdjson_really_inline std::ostream& operator<<(std::ostream&, const raw_json_string&) noexcept; + simdjson_unused simdjson_inline std::ostream& operator<<(std::ostream&, const raw_json_string&) noexcept; /** * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. */ - simdjson_unused simdjson_really_inline bool operator==(const raw_json_string& a, std::string_view c) noexcept; - simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string& a) noexcept; - simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string& a, std::string_view c) noexcept; - simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string& a) noexcept; + simdjson_unused simdjson_inline bool operator==(const raw_json_string& a, std::string_view c) noexcept; + simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string& a) noexcept; + simdjson_unused simdjson_inline bool operator!=(const raw_json_string& a, std::string_view c) noexcept; + simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string& a) noexcept; } // namespace ondemand @@ -23172,13 +23348,14 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private - simdjson_really_inline simdjson_result raw() const noexcept; - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator& iter) const noexcept; + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator& iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator& iter) const noexcept; }; } // namespace simdjson @@ -23201,20 +23378,20 @@ namespace simdjson { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline token_iterator() noexcept = default; - simdjson_really_inline token_iterator(token_iterator&& other) noexcept = default; - simdjson_really_inline token_iterator& operator=(token_iterator&& other) noexcept = default; - simdjson_really_inline token_iterator(const token_iterator& other) noexcept = default; - simdjson_really_inline token_iterator& operator=(const token_iterator& other) noexcept = default; + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator&& other) noexcept = default; + simdjson_inline token_iterator& operator=(token_iterator&& other) noexcept = default; + simdjson_inline token_iterator(const token_iterator& other) noexcept = default; + simdjson_inline token_iterator& operator=(const token_iterator& other) noexcept = default; /** * Advance to the next token (returning the current one). */ - simdjson_really_inline const uint8_t* return_current_and_advance() noexcept; + simdjson_inline const uint8_t* return_current_and_advance() noexcept; /** * Reports the current offset in bytes from the start of the underlying buffer. */ - simdjson_really_inline uint32_t current_offset() const noexcept; + simdjson_inline uint32_t current_offset() const noexcept; /** * Get the JSON text for a given token (relative). * @@ -23226,7 +23403,7 @@ namespace simdjson { * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ - simdjson_really_inline const uint8_t* peek(int32_t delta = 0) const noexcept; + simdjson_inline const uint8_t* peek(int32_t delta = 0) const noexcept; /** * Get the maximum length of the JSON text for a given token. * @@ -23235,7 +23412,7 @@ namespace simdjson { * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ - simdjson_really_inline uint32_t peek_length(int32_t delta = 0) const noexcept; + simdjson_inline uint32_t peek_length(int32_t delta = 0) const noexcept; /** * Get the JSON text for a given token. @@ -23245,7 +23422,7 @@ namespace simdjson { * @param position The position of the token. * */ - simdjson_really_inline const uint8_t* peek(token_position position) const noexcept; + simdjson_inline const uint8_t* peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for a given token. * @@ -23253,29 +23430,29 @@ namespace simdjson { * * @param position The position of the token. */ - simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; + simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Return the current index. */ - simdjson_really_inline token_position position() const noexcept; + simdjson_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ - simdjson_really_inline void set_position(token_position target_position) noexcept; + simdjson_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. - simdjson_really_inline bool operator==(const token_iterator& other) const noexcept; - simdjson_really_inline bool operator!=(const token_iterator& other) const noexcept; - simdjson_really_inline bool operator>(const token_iterator& other) const noexcept; - simdjson_really_inline bool operator>=(const token_iterator& other) const noexcept; - simdjson_really_inline bool operator<(const token_iterator& other) const noexcept; - simdjson_really_inline bool operator<=(const token_iterator& other) const noexcept; + simdjson_inline bool operator==(const token_iterator& other) const noexcept; + simdjson_inline bool operator!=(const token_iterator& other) const noexcept; + simdjson_inline bool operator>(const token_iterator& other) const noexcept; + simdjson_inline bool operator>=(const token_iterator& other) const noexcept; + simdjson_inline bool operator<(const token_iterator& other) const noexcept; + simdjson_inline bool operator<=(const token_iterator& other) const noexcept; protected: - simdjson_really_inline token_iterator(const uint8_t* buf, token_position position) noexcept; + simdjson_inline token_iterator(const uint8_t* buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). @@ -23285,7 +23462,7 @@ namespace simdjson { * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ - simdjson_really_inline uint32_t peek_index(int32_t delta = 0) const noexcept; + simdjson_inline uint32_t peek_index(int32_t delta = 0) const noexcept; /** * Get the index of the JSON text for a given token. * @@ -23294,7 +23471,7 @@ namespace simdjson { * @param position The position of the token. * */ - simdjson_really_inline uint32_t peek_index(token_position position) const noexcept; + simdjson_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t* buf{}; token_position _position{}; @@ -23302,8 +23479,8 @@ namespace simdjson { friend class json_iterator; friend class value_iterator; friend class object; - friend simdjson_really_inline void logger::log_line(const json_iterator& iter, const char* title_prefix, const char* title, std::string_view detail, int delta, int depth_delta) noexcept; - friend simdjson_really_inline void logger::log_line(const json_iterator& iter, token_position index, depth_t depth, const char* title_prefix, const char* title, std::string_view detail) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator& iter, const char* title_prefix, const char* title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator& iter, token_position index, depth_t depth, const char* title_prefix, const char* title, std::string_view detail) noexcept; }; } // namespace ondemand @@ -23315,10 +23492,10 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson @@ -23387,20 +23564,20 @@ namespace simdjson { bool _streaming{ false }; public: - simdjson_really_inline json_iterator() noexcept = default; - simdjson_really_inline json_iterator(json_iterator&& other) noexcept; - simdjson_really_inline json_iterator& operator=(json_iterator&& other) noexcept; - simdjson_really_inline explicit json_iterator(const json_iterator& other) noexcept = default; - simdjson_really_inline json_iterator& operator=(const json_iterator& other) noexcept = default; + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator&& other) noexcept; + simdjson_inline json_iterator& operator=(json_iterator&& other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator& other) noexcept = default; + simdjson_inline json_iterator& operator=(const json_iterator& other) noexcept = default; /** * Skips a JSON value, whether it is a scalar, array or object. */ - simdjson_warn_unused simdjson_really_inline error_code skip_child(depth_t parent_depth) noexcept; + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; /** * Tell whether the iterator is still at the start */ - simdjson_really_inline bool at_root() const noexcept; + simdjson_inline bool at_root() const noexcept; /** * Tell whether we should be expected to run in streaming @@ -23408,53 +23585,61 @@ namespace simdjson { * that does not affect how the iterator works. It is used by * start_root_array() and start_root_object(). */ - simdjson_really_inline bool streaming() const noexcept; + simdjson_inline bool streaming() const noexcept; /** * Get the root value iterator */ - simdjson_really_inline token_position root_position() const noexcept; + simdjson_inline token_position root_position() const noexcept; /** * Assert that we are at the document depth (== 1) */ - simdjson_really_inline void assert_at_document_depth() const noexcept; + simdjson_inline void assert_at_document_depth() const noexcept; /** * Assert that we are at the root of the document */ - simdjson_really_inline void assert_at_root() const noexcept; + simdjson_inline void assert_at_root() const noexcept; /** * Tell whether the iterator is at the EOF mark */ - simdjson_really_inline bool at_end() const noexcept; + simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). */ - simdjson_really_inline bool is_alive() const noexcept; + simdjson_inline bool is_alive() const noexcept; /** * Abandon this iterator, setting depth to 0 (as if the document is finished). */ - simdjson_really_inline void abandon() noexcept; + simdjson_inline void abandon() noexcept; /** * Advance the current token without modifying depth. */ - simdjson_really_inline const uint8_t* return_current_and_advance() noexcept; + simdjson_inline const uint8_t* return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; /** * Assert that there are at least the given number of tokens left. * * Has no effect in release builds. */ - simdjson_really_inline void assert_more_tokens(uint32_t required_tokens = 1) const noexcept; + simdjson_inline void assert_more_tokens(uint32_t required_tokens = 1) const noexcept; /** * Assert that the given position addresses an actual token (is within bounds). * * Has no effect in release builds. */ - simdjson_really_inline void assert_valid_position(token_position position) const noexcept; + simdjson_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * @@ -23465,7 +23650,7 @@ namespace simdjson { * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ - simdjson_really_inline const uint8_t* peek(int32_t delta = 0) const noexcept; + simdjson_inline const uint8_t* peek(int32_t delta = 0) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * @@ -23473,7 +23658,7 @@ namespace simdjson { * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ - simdjson_really_inline uint32_t peek_length(int32_t delta = 0) const noexcept; + simdjson_inline uint32_t peek_length(int32_t delta = 0) const noexcept; /** * Get a pointer to the current location in the input buffer. * @@ -23482,7 +23667,7 @@ namespace simdjson { * You may be pointing outside of the input buffer: it is not generally * safe to dereference this pointer. */ - simdjson_really_inline const uint8_t* unsafe_pointer() const noexcept; + simdjson_inline const uint8_t* unsafe_pointer() const noexcept; /** * Get the JSON text for a given token. * @@ -23493,7 +23678,7 @@ namespace simdjson { * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ - simdjson_really_inline const uint8_t* peek(token_position position) const noexcept; + simdjson_inline const uint8_t* peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * @@ -23501,7 +23686,7 @@ namespace simdjson { * * @param position The position of the token to retrieve. */ - simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; + simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * @@ -23510,7 +23695,7 @@ namespace simdjson { * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ - simdjson_really_inline const uint8_t* peek_last() const noexcept; + simdjson_inline const uint8_t* peek_last() const noexcept; /** * Ascend one level. @@ -23519,7 +23704,7 @@ namespace simdjson { * * @param parent_depth the expected parent depth. */ - simdjson_really_inline void ascend_to(depth_t parent_depth) noexcept; + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; /** * Descend one level. @@ -23528,18 +23713,18 @@ namespace simdjson { * * @param child_depth the expected child depth. */ - simdjson_really_inline void descend_to(depth_t child_depth) noexcept; - simdjson_really_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; /** * Get current depth. */ - simdjson_really_inline depth_t depth() const noexcept; + simdjson_inline depth_t depth() const noexcept; /** * Get current (writeable) location in the string buffer. */ - simdjson_really_inline uint8_t*& string_buf_loc() noexcept; + simdjson_inline uint8_t*& string_buf_loc() noexcept; /** * Report an unrecoverable error, preventing further iteration. @@ -23547,30 +23732,32 @@ namespace simdjson { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_really_inline error_code report_error(error_code error, const char* message) noexcept; + simdjson_inline error_code report_error(error_code error, const char* message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_really_inline error_code optional_error(error_code error, const char* message) noexcept; + simdjson_inline error_code optional_error(error_code error, const char* message) noexcept; - template simdjson_warn_unused simdjson_really_inline bool copy_to_buffer(const uint8_t* json, uint32_t max_len, uint8_t(&tmpbuf)[N]) noexcept; + template simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t* json, uint32_t max_len, uint8_t(&tmpbuf)[N]) noexcept; - simdjson_really_inline token_position position() const noexcept; + simdjson_inline token_position position() const noexcept; /** * Write the raw_json_string to the string buffer and return a string_view. * Each raw_json_string should be unescaped once, or else the string buffer might * overflow. */ - simdjson_really_inline simdjson_result unescape(raw_json_string in) noexcept; - simdjson_really_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - simdjson_really_inline token_position start_position(depth_t depth) const noexcept; - simdjson_really_inline void set_start_position(depth_t depth, token_position position) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; #endif + /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; @@ -23592,13 +23779,13 @@ namespace simdjson { */ inline bool balanced() const noexcept; protected: - simdjson_really_inline json_iterator(const uint8_t* buf, ondemand::parser* parser) noexcept; + simdjson_inline json_iterator(const uint8_t* buf, ondemand::parser* parser) noexcept; /// The last token before the end - simdjson_really_inline token_position last_position() const noexcept; + simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. - simdjson_really_inline token_position end_position() const noexcept; + simdjson_inline token_position end_position() const noexcept; /// The end of the buffer. - simdjson_really_inline token_position end() const noexcept; + simdjson_inline token_position end() const noexcept; friend class document; friend class document_stream; @@ -23608,8 +23795,8 @@ namespace simdjson { friend class raw_json_string; friend class parser; friend class value_iterator; - friend simdjson_really_inline void logger::log_line(const json_iterator& iter, const char* title_prefix, const char* title, std::string_view detail, int delta, int depth_delta) noexcept; - friend simdjson_really_inline void logger::log_line(const json_iterator& iter, token_position index, depth_t depth, const char* title_prefix, const char* title, std::string_view detail) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator& iter, const char* title_prefix, const char* title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator& iter, token_position index, depth_t depth, const char* title_prefix, const char* title, std::string_view detail) noexcept; }; // json_iterator } // namespace ondemand @@ -23621,10 +23808,10 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson @@ -23661,61 +23848,61 @@ namespace simdjson { token_position _start_position{}; public: - simdjson_really_inline value_iterator() noexcept = default; + simdjson_inline value_iterator() noexcept = default; /** * Denote that we're starting a document. */ - simdjson_really_inline void start_document() noexcept; + simdjson_inline void start_document() noexcept; /** * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. * * Optimized for scalars. */ - simdjson_warn_unused simdjson_really_inline error_code skip_child() noexcept; + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; /** * Tell whether the iterator is at the EOF mark */ - simdjson_really_inline bool at_end() const noexcept; + simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value */ - simdjson_really_inline bool at_start() const noexcept; + simdjson_inline bool at_start() const noexcept; /** * Tell whether the value is open--if the value has not been used, or the array/object is still open. */ - simdjson_really_inline bool is_open() const noexcept; + simdjson_inline bool is_open() const noexcept; /** * Tell whether the value is at an object's first field (just after the {). */ - simdjson_really_inline bool at_first_field() const noexcept; + simdjson_inline bool at_first_field() const noexcept; /** * Abandon all iteration. */ - simdjson_really_inline void abandon() noexcept; + simdjson_inline void abandon() noexcept; /** * Get the child value as a value_iterator. */ - simdjson_really_inline value_iterator child_value() const noexcept; + simdjson_inline value_iterator child_value() const noexcept; /** * Get the depth of this value. */ - simdjson_really_inline int32_t depth() const noexcept; + simdjson_inline int32_t depth() const noexcept; /** * Get the JSON type of this value. * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result type() const noexcept; + simdjson_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration @@ -23732,7 +23919,7 @@ namespace simdjson { * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; /** * Start an object iteration from the root. * @@ -23740,7 +23927,7 @@ namespace simdjson { * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_root_object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; /** * Start an object iteration after the user has already checked and moved past the {. @@ -23751,7 +23938,7 @@ namespace simdjson { * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** * Start an object iteration from the root, after the user has already checked and moved past the {. * @@ -23761,7 +23948,7 @@ namespace simdjson { * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_root_object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. @@ -23773,17 +23960,17 @@ namespace simdjson { * @error TAPE_ERROR If there is a comma missing between fields. * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ - simdjson_warn_unused simdjson_really_inline simdjson_result has_next_field() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; /** * Get the current field's key. */ - simdjson_warn_unused simdjson_really_inline simdjson_result field_key() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; /** * Pass the : in the field and move to its value. */ - simdjson_warn_unused simdjson_really_inline error_code field_value() noexcept; + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; /** * Find the next field with the given key. @@ -23801,7 +23988,7 @@ namespace simdjson { * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ - simdjson_warn_unused simdjson_really_inline error_code find_field(const std::string_view key) noexcept; + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; /** * Find the next field with the given key, *without* unescaping. This assumes object order: it @@ -23820,7 +24007,7 @@ namespace simdjson { * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ - simdjson_warn_unused simdjson_really_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; /** * Find the field with the given key without regard to order, and *without* unescaping. @@ -23840,7 +24027,7 @@ namespace simdjson { * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ - simdjson_warn_unused simdjson_really_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; /** @} */ @@ -23857,7 +24044,7 @@ namespace simdjson { * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; /** * Check for an opening [ and start an array iteration while at the root. * @@ -23865,7 +24052,7 @@ namespace simdjson { * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_root_array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; /** * Start an array iteration, after the user has already checked and moved past the [. @@ -23876,7 +24063,7 @@ namespace simdjson { * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** * Start an array iteration from the root, after the user has already checked and moved past the [. * @@ -23886,7 +24073,7 @@ namespace simdjson { * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_root_array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. @@ -23897,12 +24084,12 @@ namespace simdjson { * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ - simdjson_warn_unused simdjson_really_inline simdjson_result has_next_element() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; /** * Get a child value iterator. */ - simdjson_warn_unused simdjson_really_inline value_iterator child() const noexcept; + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; /** @} */ @@ -23912,43 +24099,45 @@ namespace simdjson { * @{ */ - simdjson_warn_unused simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_int64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_double_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline bool is_null() noexcept; - simdjson_warn_unused simdjson_really_inline bool is_negative() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_raw_json_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_bool() noexcept; - simdjson_warn_unused simdjson_really_inline bool is_root_negative() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result is_root_integer() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number_type() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number() noexcept; - simdjson_really_inline bool is_root_null() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; - simdjson_really_inline error_code error() const noexcept; - simdjson_really_inline uint8_t*& string_buf_loc() noexcept; - simdjson_really_inline const json_iterator& json_iter() const noexcept; - simdjson_really_inline json_iterator& json_iter() noexcept; + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t*& string_buf_loc() noexcept; + simdjson_inline const json_iterator& json_iter() const noexcept; + simdjson_inline json_iterator& json_iter() noexcept; - simdjson_really_inline void assert_is_valid() const noexcept; - simdjson_really_inline bool is_valid() const noexcept; + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; /** @} */ protected: @@ -23956,12 +24145,12 @@ namespace simdjson { * Restarts an array iteration. * @returns Whether the array has any elements (returns false for empty). */ - simdjson_really_inline simdjson_result reset_array() noexcept; + simdjson_inline simdjson_result reset_array() noexcept; /** * Restarts an object iteration. * @returns Whether the object has any fields (returns false for empty). */ - simdjson_really_inline simdjson_result reset_object() noexcept; + simdjson_inline simdjson_result reset_object() noexcept; /** * move_at_start(): moves us so that we are pointing at the beginning of * the container. It updates the index so that at_start() is true and it @@ -23969,7 +24158,7 @@ namespace simdjson { * * Usage: used with value::count_elements(). **/ - simdjson_really_inline void move_at_start() noexcept; + simdjson_inline void move_at_start() noexcept; /** * move_at_container_start(): moves us so that we are pointing at the beginning of @@ -23977,15 +24166,15 @@ namespace simdjson { * * Usage: used with reset_array() and reset_object(). **/ - simdjson_really_inline void move_at_container_start() noexcept; + simdjson_inline void move_at_container_start() noexcept; /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; - simdjson_really_inline value_iterator(json_iterator* json_iter, depth_t depth, token_position start_index) noexcept; + simdjson_inline value_iterator(json_iterator* json_iter, depth_t depth, token_position start_index) noexcept; - simdjson_really_inline bool parse_null(const uint8_t* json) const noexcept; - simdjson_really_inline simdjson_result parse_bool(const uint8_t* json) const noexcept; - simdjson_really_inline const uint8_t* peek_start() const noexcept; - simdjson_really_inline uint32_t peek_start_length() const noexcept; + simdjson_inline simdjson_result parse_null(const uint8_t* json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t* json) const noexcept; + simdjson_inline const uint8_t* peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods @@ -24016,17 +24205,17 @@ namespace simdjson { * Unfortunately, it makes the code more verbose, longer and maybe more error prone. */ - simdjson_really_inline void advance_scalar(const char* type) noexcept; - simdjson_really_inline void advance_root_scalar(const char* type) noexcept; - simdjson_really_inline void advance_non_root_scalar(const char* type) noexcept; + simdjson_inline void advance_scalar(const char* type) noexcept; + simdjson_inline void advance_root_scalar(const char* type) noexcept; + simdjson_inline void advance_non_root_scalar(const char* type) noexcept; - simdjson_really_inline const uint8_t* peek_scalar(const char* type) noexcept; - simdjson_really_inline const uint8_t* peek_root_scalar(const char* type) noexcept; - simdjson_really_inline const uint8_t* peek_non_root_scalar(const char* type) noexcept; + simdjson_inline const uint8_t* peek_scalar(const char* type) noexcept; + simdjson_inline const uint8_t* peek_root_scalar(const char* type) noexcept; + simdjson_inline const uint8_t* peek_non_root_scalar(const char* type) noexcept; - simdjson_really_inline error_code start_container(uint8_t start_char, const char* incorrect_type_message, const char* type) noexcept; - simdjson_really_inline error_code end_container() noexcept; + simdjson_inline error_code start_container(uint8_t start_char, const char* incorrect_type_message, const char* type) noexcept; + simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). @@ -24034,19 +24223,19 @@ namespace simdjson { * @return The current token (the one left behind). * @error TAPE_ERROR If the document ended early. */ - simdjson_really_inline simdjson_result advance_to_value() noexcept; + simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_really_inline error_code incorrect_type_error(const char* message) const noexcept; - simdjson_really_inline error_code error_unless_more_tokens(uint32_t tokens = 1) const noexcept; + simdjson_inline error_code incorrect_type_error(const char* message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens = 1) const noexcept; - simdjson_really_inline bool is_at_start() const noexcept; + simdjson_inline bool is_at_start() const noexcept; /** * is_at_iterator_start() returns true on an array or object after it has just been * created, whether the instance is empty or not. * * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) */ - simdjson_really_inline bool is_at_iterator_start() const noexcept; + simdjson_inline bool is_at_iterator_start() const noexcept; /** * Assuming that we are within an object, this returns true if we @@ -24055,7 +24244,7 @@ namespace simdjson { * Usage: the skip_child() method should never be used while we are pointing * at a key inside an object. */ - simdjson_really_inline bool is_at_key() const noexcept; + simdjson_inline bool is_at_key() const noexcept; inline void assert_at_start() const noexcept; inline void assert_at_container_start() const noexcept; @@ -24065,16 +24254,16 @@ namespace simdjson { inline void assert_at_non_root_start() const noexcept; /** Get the starting position of this value */ - simdjson_really_inline token_position start_position() const noexcept; + simdjson_inline token_position start_position() const noexcept; /** @copydoc error_code json_iterator::position() const noexcept; */ - simdjson_really_inline token_position position() const noexcept; + simdjson_inline token_position position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ - simdjson_really_inline token_position last_position() const noexcept; + simdjson_inline token_position last_position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ - simdjson_really_inline token_position end_position() const noexcept; + simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_really_inline error_code report_error(error_code error, const char* message) noexcept; + simdjson_inline error_code report_error(error_code error, const char* message) noexcept; friend class document; friend class object; @@ -24091,9 +24280,9 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson @@ -24119,7 +24308,7 @@ namespace simdjson { class array_iterator { public: /** Create a new, invalid array iterator. */ - simdjson_really_inline array_iterator() noexcept = default; + simdjson_inline array_iterator() noexcept = default; // // Iterator interface @@ -24130,7 +24319,7 @@ namespace simdjson { * * Part of the std::iterator interface. */ - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -24138,7 +24327,7 @@ namespace simdjson { * * @return true if there are no more elements in the JSON array. */ - simdjson_really_inline bool operator==(const array_iterator&) const noexcept; + simdjson_inline bool operator==(const array_iterator&) const noexcept; /** * Check if there are more elements in the JSON array. * @@ -24146,18 +24335,18 @@ namespace simdjson { * * @return true if there are more elements in the JSON array. */ - simdjson_really_inline bool operator!=(const array_iterator&) const noexcept; + simdjson_inline bool operator!=(const array_iterator&) const noexcept; /** * Move to the next element. * * Part of the std::iterator interface. */ - simdjson_really_inline array_iterator& operator++() noexcept; + simdjson_inline array_iterator& operator++() noexcept; private: value_iterator iter{}; - simdjson_really_inline array_iterator(const value_iterator& iter) noexcept; + simdjson_inline array_iterator(const value_iterator& iter) noexcept; friend class array; friend class value; @@ -24173,18 +24362,18 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_really_inline bool operator==(const simdjson_result&) const noexcept; - simdjson_really_inline bool operator!=(const simdjson_result&) const noexcept; - simdjson_really_inline simdjson_result& operator++() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result&) const noexcept; + simdjson_inline bool operator!=(const simdjson_result&) const noexcept; + simdjson_inline simdjson_result& operator++() noexcept; }; } // namespace simdjson @@ -24204,7 +24393,7 @@ namespace simdjson { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline object_iterator() noexcept = default; + simdjson_inline object_iterator() noexcept = default; // // Iterator interface @@ -24212,13 +24401,13 @@ namespace simdjson { // Reads key and value, yielding them to the user. // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_really_inline simdjson_result operator*() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_really_inline bool operator==(const object_iterator&) const noexcept; + simdjson_inline bool operator==(const object_iterator&) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_really_inline bool operator!=(const object_iterator&) const noexcept; + simdjson_inline bool operator!=(const object_iterator&) const noexcept; // Checks for ']' and ',' - simdjson_really_inline object_iterator& operator++() noexcept; + simdjson_inline object_iterator& operator++() noexcept; private: /** @@ -24229,7 +24418,7 @@ namespace simdjson { */ value_iterator iter{}; - simdjson_really_inline object_iterator(const value_iterator& iter) noexcept; + simdjson_inline object_iterator(const value_iterator& iter) noexcept; friend struct simdjson_result; friend class object; }; @@ -24243,22 +24432,22 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_really_inline bool operator==(const simdjson_result&) const noexcept; + simdjson_inline bool operator==(const simdjson_result&) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_really_inline bool operator!=(const simdjson_result&) const noexcept; + simdjson_inline bool operator!=(const simdjson_result&) const noexcept; // Checks for ']' and ',' - simdjson_really_inline simdjson_result& operator++() noexcept; + simdjson_inline simdjson_result& operator++() noexcept; }; } // namespace simdjson @@ -24282,20 +24471,20 @@ namespace simdjson { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline array() noexcept = default; + simdjson_inline array() noexcept = default; /** * Begin array iteration. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result begin() noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result end() noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun @@ -24310,7 +24499,7 @@ namespace simdjson { * To check that an array is empty, it is more performant to use * the is_empty() method. */ - simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the beginning of the array and checks whether the * array is empty. @@ -24320,7 +24509,7 @@ namespace simdjson { * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ - simdjson_really_inline simdjson_result is_empty() & noexcept; + simdjson_inline simdjson_result is_empty() & noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the array. You should still consume values only once even if you @@ -24365,7 +24554,7 @@ namespace simdjson { * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. */ - simdjson_really_inline simdjson_result raw_json() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; /** * Get the value at the given index. This function has linear-time complexity. @@ -24374,12 +24563,12 @@ namespace simdjson { * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ - simdjson_really_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_really_inline error_code consume() noexcept; + simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -24388,7 +24577,7 @@ namespace simdjson { * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. */ - static simdjson_really_inline simdjson_result start(value_iterator& iter) noexcept; + static simdjson_inline simdjson_result start(value_iterator& iter) noexcept; /** * Begin array iteration from the root. * @@ -24397,7 +24586,7 @@ namespace simdjson { * @error INCORRECT_TYPE if the iterator is not at [. * @error TAPE_ERROR if there is no closing ] at the end of the document. */ - static simdjson_really_inline simdjson_result start_root(value_iterator& iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator& iter) noexcept; /** * Begin array iteration. * @@ -24406,7 +24595,7 @@ namespace simdjson { * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ - static simdjson_really_inline simdjson_result started(value_iterator& iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator& iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. @@ -24415,7 +24604,7 @@ namespace simdjson { * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* * into the resulting array. */ - simdjson_really_inline array(const value_iterator& iter) noexcept; + simdjson_inline array(const value_iterator& iter) noexcept; /** * Iterator marking current position. @@ -24440,17 +24629,17 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson @@ -24483,11 +24672,11 @@ namespace simdjson { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline document() noexcept = default; - simdjson_really_inline document(const document& other) noexcept = delete; // pass your documents by reference, not by copy - simdjson_really_inline document(document&& other) noexcept = default; - simdjson_really_inline document& operator=(const document& other) noexcept = delete; - simdjson_really_inline document& operator=(document&& other) noexcept = default; + simdjson_inline document() noexcept = default; + simdjson_inline document(const document& other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document&& other) noexcept = default; + simdjson_inline document& operator=(const document& other) noexcept = delete; + simdjson_inline document& operator=(document&& other) noexcept = default; /** * Cast this JSON value to an array. @@ -24495,49 +24684,49 @@ namespace simdjson { * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ - simdjson_really_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ - simdjson_really_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ - simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ - simdjson_really_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ - simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double. @@ -24545,7 +24734,7 @@ namespace simdjson { * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ - simdjson_really_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * @@ -24553,11 +24742,24 @@ namespace simdjson { * * Important: Calling get_string() twice on the same document is an error. * + * @param Whether to allow a replacement character for unmatched surrogate pairs. * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * @@ -24566,28 +24768,31 @@ namespace simdjson { * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ - simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; /** * Cast this JSON value to a value when the document is an object or an array. * * @returns A value if a JSON array or object cannot be found. * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ - simdjson_really_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; /** - * Checks if this JSON value is null. + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ - simdjson_really_inline bool is_null() noexcept; + simdjson_inline simdjson_result is_null() noexcept; /** * Get this value as the given type. @@ -24600,13 +24805,13 @@ namespace simdjson { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_really_inline simdjson_result get() & noexcept { + template simdjson_inline simdjson_result get() & noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_really_inline simdjson_result get() && noexcept { + template simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); @@ -24623,9 +24828,9 @@ namespace simdjson { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_really_inline error_code get(T& out) & noexcept; + template simdjson_inline error_code get(T& out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_really_inline error_code get(T& out) && noexcept; + template simdjson_inline error_code get(T& out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -24634,35 +24839,35 @@ namespace simdjson { * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ - simdjson_really_inline operator array() & noexcept(false); + simdjson_inline operator array() & noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ - simdjson_really_inline operator object() & noexcept(false); + simdjson_inline operator object() & noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ - simdjson_really_inline operator int64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ - simdjson_really_inline operator double() noexcept(false); + simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * @@ -24672,7 +24877,7 @@ namespace simdjson { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * @@ -24681,21 +24886,21 @@ namespace simdjson { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ - simdjson_really_inline operator bool() noexcept(false); + simdjson_inline operator bool() noexcept(false); /** * Cast this JSON value to a value. * * @returns A value value. * @exception if a JSON value cannot be found */ - simdjson_really_inline operator value() noexcept(false); + simdjson_inline operator value() noexcept(false); #endif /** * This method scans the array and counts the number of elements. @@ -24708,7 +24913,7 @@ namespace simdjson { * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ - simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun @@ -24723,7 +24928,7 @@ namespace simdjson { * To check that an object is empty, it is more performant to use * the is_empty() method. */ - simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. @@ -24731,19 +24936,19 @@ namespace simdjson { * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ - simdjson_really_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; /** * Begin array iteration. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -24777,9 +24982,9 @@ namespace simdjson { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field(const char* key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char* key) & noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -24811,16 +25016,18 @@ namespace simdjson { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(const char* key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](const char* key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char* key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char* key) & noexcept; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -24828,7 +25035,7 @@ namespace simdjson { * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; /** * Checks whether the document is a scalar (string, number, null, Boolean). @@ -24837,14 +25044,14 @@ namespace simdjson { * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the document is a negative number. * * @returns true if the number if negative. */ - simdjson_really_inline bool is_negative() noexcept; + simdjson_inline bool is_negative() noexcept; /** * Checks whether the document is an integer number. Note that * this requires to partially parse the number string. If @@ -24854,7 +25061,7 @@ namespace simdjson { * * @returns true if the number if negative. */ - simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is @@ -24877,7 +25084,7 @@ namespace simdjson { * * @returns the type of the number */ - simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may @@ -24905,7 +25112,7 @@ namespace simdjson { * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. */ - simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. @@ -24929,7 +25136,7 @@ namespace simdjson { * - false * - null */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; /** * Reset the iterator inside the document instance so we are pointing back at the @@ -24961,7 +25168,7 @@ namespace simdjson { * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ - simdjson_really_inline int32_t current_depth() const noexcept; + simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 @@ -24994,26 +25201,26 @@ namespace simdjson { * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ - simdjson_really_inline simdjson_result raw_json() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Consumes the document. */ - simdjson_really_inline error_code consume() noexcept; + simdjson_inline error_code consume() noexcept; - simdjson_really_inline document(ondemand::json_iterator&& iter) noexcept; - simdjson_really_inline const uint8_t* text(uint32_t idx) const noexcept; + simdjson_inline document(ondemand::json_iterator&& iter) noexcept; + simdjson_inline const uint8_t* text(uint32_t idx) const noexcept; - simdjson_really_inline value_iterator resume_value_iterator() noexcept; - simdjson_really_inline value_iterator get_root_value_iterator() noexcept; - simdjson_really_inline simdjson_result start_or_resume_object() noexcept; - static simdjson_really_inline document start(ondemand::json_iterator&& iter) noexcept; + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator&& iter) noexcept; // // Fields @@ -25029,6 +25236,7 @@ namespace simdjson { friend class field; friend class token; friend class document_stream; + friend class document_reference; }; @@ -25037,59 +25245,63 @@ namespace simdjson { */ class document_reference { public: - simdjson_really_inline document_reference() noexcept; - simdjson_really_inline document_reference(document& d) noexcept; - simdjson_really_inline document_reference(const document_reference& other) noexcept = default; - simdjson_really_inline document_reference& operator=(const document_reference& other) noexcept = default; - simdjson_really_inline void rewind() noexcept; - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline simdjson_result get_value() noexcept; + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document& d) noexcept; + simdjson_inline document_reference(const document_reference& other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference& other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; - simdjson_really_inline bool is_null() noexcept; - simdjson_really_inline simdjson_result raw_json() noexcept; - simdjson_really_inline operator document& () const noexcept; + simdjson_inline simdjson_result is_null() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document& () const noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator array() & noexcept(false); - simdjson_really_inline operator object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); - simdjson_really_inline operator value() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); #endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(const char* key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char* key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(const char* key) & noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char* key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char* key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char* key) & noexcept; - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result current_location() noexcept; - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline bool is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - simdjson_really_inline simdjson_result raw_json_token() noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; private: document* doc{ nullptr }; }; @@ -25102,63 +25314,66 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline error_code rewind() noexcept; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_double_from_string() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline simdjson_result get_value() noexcept; - simdjson_really_inline bool is_null() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; - template simdjson_really_inline simdjson_result get() & noexcept; - template simdjson_really_inline simdjson_result get() && noexcept; + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; - template simdjson_really_inline error_code get(T& out) & noexcept; - template simdjson_really_inline error_code get(T& out) && noexcept; + template simdjson_inline error_code get(T& out) & noexcept; + template simdjson_inline error_code get(T& out) && noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); #endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(const char* key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char* key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(const char* key) & noexcept; - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result current_location() noexcept; - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline bool is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - /** @copydoc simdjson_really_inline std::string_view document::raw_json_token() const noexcept */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char* key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char* key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char* key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; @@ -25171,55 +25386,59 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept; - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline error_code rewind() noexcept; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline simdjson_result get_value() noexcept; - simdjson_really_inline bool is_null() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); #endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(const char* key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char* key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(const char* key) & noexcept; - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result current_location() noexcept; - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline bool is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - /** @copydoc simdjson_really_inline std::string_view document_reference::raw_json_token() const noexcept */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char* key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char* key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char* key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; @@ -25238,7 +25457,8 @@ namespace simdjson { class raw_json_string; /** - * An ephemeral JSON value returned during iteration. + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. */ class value { public: @@ -25247,7 +25467,7 @@ namespace simdjson { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline value() noexcept = default; + simdjson_inline value() noexcept = default; /** * Get this value as the given type. @@ -25260,7 +25480,7 @@ namespace simdjson { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_really_inline simdjson_result get() noexcept { + template simdjson_inline simdjson_result get() noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); @@ -25275,7 +25495,7 @@ namespace simdjson { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_really_inline error_code get(T& out) noexcept; + template simdjson_inline error_code get(T& out) noexcept; /** * Cast this JSON value to an array. @@ -25283,7 +25503,7 @@ namespace simdjson { * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ - simdjson_really_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_array() noexcept; /** * Cast this JSON value to an object. @@ -25291,7 +25511,7 @@ namespace simdjson { * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ - simdjson_really_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_object() noexcept; /** * Cast this JSON value to an unsigned integer. @@ -25299,7 +25519,7 @@ namespace simdjson { * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to a unsigned integer. @@ -25307,7 +25527,7 @@ namespace simdjson { * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. @@ -25315,7 +25535,7 @@ namespace simdjson { * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ - simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. @@ -25323,7 +25543,7 @@ namespace simdjson { * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ - simdjson_really_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. @@ -25331,7 +25551,7 @@ namespace simdjson { * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ - simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double @@ -25339,7 +25559,7 @@ namespace simdjson { * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ - simdjson_really_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. @@ -25355,8 +25575,23 @@ namespace simdjson { * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * @@ -25365,7 +25600,7 @@ namespace simdjson { * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. @@ -25373,14 +25608,17 @@ namespace simdjson { * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ - simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; /** - * Checks if this JSON value is null. + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ - simdjson_really_inline bool is_null() noexcept; + simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -25389,35 +25627,35 @@ namespace simdjson { * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ - simdjson_really_inline operator array() noexcept(false); + simdjson_inline operator array() noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ - simdjson_really_inline operator object() noexcept(false); + simdjson_inline operator object() noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ - simdjson_really_inline operator int64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ - simdjson_really_inline operator double() noexcept(false); + simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * @@ -25429,7 +25667,7 @@ namespace simdjson { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * @@ -25438,14 +25676,14 @@ namespace simdjson { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ - simdjson_really_inline operator bool() noexcept(false); + simdjson_inline operator bool() noexcept(false); #endif /** @@ -25455,13 +25693,13 @@ namespace simdjson { * * @returns INCORRECT_TYPE If the JSON value is not an array. */ - simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun @@ -25472,8 +25710,11 @@ namespace simdjson { * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. */ - simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun @@ -25487,8 +25728,11 @@ namespace simdjson { * * To check that an object is empty, it is more performant to use * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. */ - simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. @@ -25496,7 +25740,7 @@ namespace simdjson { * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ - simdjson_really_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; /** * Look up a field by name on an object (order-sensitive). * @@ -25519,9 +25763,9 @@ namespace simdjson { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field(const char* key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char* key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -25545,16 +25789,18 @@ namespace simdjson { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(const char* key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](const char* key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char* key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char* key) noexcept; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -25564,7 +25810,7 @@ namespace simdjson { * json_type::number, json_type::boolean, or json_type::null). * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; /** * Checks whether the value is a scalar (string, number, null, Boolean). @@ -25573,14 +25819,14 @@ namespace simdjson { * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the value is a negative number. * * @returns true if the number if negative. */ - simdjson_really_inline bool is_negative() noexcept; + simdjson_inline bool is_negative() noexcept; /** * Checks whether the value is an integer number. Note that * this requires to partially parse the number string. If @@ -25594,7 +25840,7 @@ namespace simdjson { * * @returns true if the number if negative. */ - simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is @@ -25617,7 +25863,7 @@ namespace simdjson { * * @returns the type of the number */ - simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may @@ -25649,7 +25895,7 @@ namespace simdjson { * calling 'get_number()', you scan the number string only once, determining * efficiently the type and storing it in an efficient manner. */ - simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** @@ -25675,12 +25921,12 @@ namespace simdjson { * - false * - null */ - simdjson_really_inline std::string_view raw_json_token() noexcept; + simdjson_inline std::string_view raw_json_token() noexcept; /** * Returns the current location in the document if in bounds. */ - simdjson_really_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_location() noexcept; /** * Returns the current depth in the document if in bounds. @@ -25691,7 +25937,7 @@ namespace simdjson { * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ - simdjson_really_inline int32_t current_depth() const noexcept; + simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 @@ -25735,38 +25981,38 @@ namespace simdjson { * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; protected: /** * Create a value. */ - simdjson_really_inline value(const value_iterator& iter) noexcept; + simdjson_inline value(const value_iterator& iter) noexcept; /** * Skip this value, allowing iteration to continue. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Start a value at the current position. * * (It should already be started; this is just a self-documentation method.) */ - static simdjson_really_inline value start(const value_iterator& iter) noexcept; + static simdjson_inline value start(const value_iterator& iter) noexcept; /** * Resume a value. */ - static simdjson_really_inline value resume(const value_iterator& iter) noexcept; + static simdjson_inline value resume(const value_iterator& iter) noexcept; /** * Get the object, starting or resuming it as necessary */ - simdjson_really_inline simdjson_result start_or_resume_object() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; - // simdjson_really_inline void log_value(const char *type) const noexcept; - // simdjson_really_inline void log_error(const char *message) const noexcept; + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; value_iterator iter{}; @@ -25787,43 +26033,44 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result get_array() noexcept; - simdjson_really_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_int64_in_string() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_double_in_string() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline bool is_null() noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; - template simdjson_really_inline simdjson_result get() noexcept; + template simdjson_inline simdjson_result get() noexcept; - template simdjson_really_inline error_code get(T& out) noexcept; + template simdjson_inline error_code get(T& out) noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); #endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -25845,9 +26092,9 @@ namespace simdjson { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field(const char* key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char* key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -25868,13 +26115,13 @@ namespace simdjson { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(const char* key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](const char* key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char* key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char* key) noexcept; /** * Get the type of this JSON value. @@ -25883,21 +26130,21 @@ namespace simdjson { * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; - /** @copydoc simdjson_really_inline std::string_view value::raw_json_token() const noexcept */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; - /** @copydoc simdjson_really_inline simdjson_result current_location() noexcept */ - simdjson_really_inline simdjson_result current_location() noexcept; - /** @copydoc simdjson_really_inline int32_t current_depth() const noexcept */ - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson @@ -25922,7 +26169,7 @@ namespace simdjson { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline field() noexcept; + simdjson_inline field() noexcept; /** * Get the key as a string_view (for higher speed, consider raw_key). @@ -25932,25 +26179,25 @@ namespace simdjson { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescaped_key() noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with * an unescaped C string: e.g., key() == "test". */ - simdjson_really_inline raw_json_string key() const noexcept; + simdjson_inline raw_json_string key() const noexcept; /** * Get the field value. */ - simdjson_really_inline ondemand::value& value() & noexcept; + simdjson_inline ondemand::value& value() & noexcept; /** * @overload ondemand::value &ondemand::value() & noexcept */ - simdjson_really_inline ondemand::value value() && noexcept; + simdjson_inline ondemand::value value() && noexcept; protected: - simdjson_really_inline field(raw_json_string key, ondemand::value&& value) noexcept; - static simdjson_really_inline simdjson_result start(value_iterator& parent_iter) noexcept; - static simdjson_really_inline simdjson_result start(const value_iterator& parent_iter, raw_json_string key) noexcept; + simdjson_inline field(raw_json_string key, ondemand::value&& value) noexcept; + static simdjson_inline simdjson_result start(value_iterator& parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator& parent_iter, raw_json_string key) noexcept; friend struct simdjson_result; friend class object_iterator; }; @@ -25964,13 +26211,13 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result unescaped_key() noexcept; - simdjson_really_inline simdjson_result key() noexcept; - simdjson_really_inline simdjson_result value() noexcept; + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson @@ -25991,10 +26238,10 @@ namespace simdjson { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline object() noexcept = default; + simdjson_inline object() noexcept = default; - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; /** * Look up a field by name on an object (order-sensitive). * @@ -26015,7 +26262,8 @@ namespace simdjson { * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * You must consume the fields on an object one at a time. A request for a new key - * invalidates previous field values: it makes them unsafe. E.g., the array + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an @@ -26028,9 +26276,9 @@ namespace simdjson { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -26052,7 +26300,8 @@ namespace simdjson { * that only one field is returned. * * You must consume the fields on an object one at a time. A request for a new key - * invalidates previous field values: it makes them unsafe. E.g., the array + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an @@ -26064,13 +26313,13 @@ namespace simdjson { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 @@ -26141,27 +26390,30 @@ namespace simdjson { * * To check that an object is empty, it is more performant to use * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. */ - simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; /** * Consumes the object and returns a string_view instance corresponding to the * object as represented in JSON. It points inside the original byte array containing * the JSON document. */ - simdjson_really_inline simdjson_result raw_json() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_really_inline error_code consume() noexcept; - static simdjson_really_inline simdjson_result start(value_iterator& iter) noexcept; - static simdjson_really_inline simdjson_result start_root(value_iterator& iter) noexcept; - static simdjson_really_inline simdjson_result started(value_iterator& iter) noexcept; - static simdjson_really_inline object resume(const value_iterator& iter) noexcept; - simdjson_really_inline object(const value_iterator& iter) noexcept; + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator& iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator& iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator& iter) noexcept; + static simdjson_inline object resume(const value_iterator& iter) noexcept; + simdjson_inline object(const value_iterator& iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code find_field_raw(const std::string_view key) noexcept; + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; value_iterator iter{}; @@ -26179,19 +26431,19 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) && noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; @@ -26243,9 +26495,9 @@ namespace simdjson { inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; inline parser(parser&& other) noexcept = default; - simdjson_really_inline parser(const parser& other) = delete; - simdjson_really_inline parser& operator=(const parser& other) = delete; - simdjson_really_inline parser& operator=(parser&& other) noexcept = default; + simdjson_inline parser(const parser& other) = delete; + simdjson_inline parser& operator=(const parser& other) = delete; + simdjson_inline parser& operator=(parser&& other) noexcept = default; /** Deallocate the JSON parser. */ inline ~parser() noexcept = default; @@ -26280,7 +26532,9 @@ namespace simdjson { * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * @param len The length of the JSON. @@ -26336,7 +26590,9 @@ namespace simdjson { * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * @@ -26390,7 +26646,9 @@ namespace simdjson { * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. * * ### Threads * @@ -26428,17 +26686,26 @@ namespace simdjson { simdjson_result iterate_many(const char* buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_really_inline size_t capacity() const noexcept; + simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_really_inline size_t max_capacity() const noexcept; - simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept; - /** The maximum depth of this parser (the most deeply nested objects and arrays it can process). */ - simdjson_really_inline size_t max_depth() const noexcept; + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. @@ -26456,6 +26723,31 @@ namespace simdjson { /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t*& dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. @@ -26474,7 +26766,8 @@ namespace simdjson { * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ - simdjson_really_inline simdjson_result unescape(raw_json_string in, uint8_t*& dst) const noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t*& dst) const noexcept; + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -26482,7 +26775,7 @@ namespace simdjson { size_t _max_capacity; size_t _max_depth{ DEFAULT_MAX_DEPTH }; std::unique_ptr string_buf{}; -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -26499,9 +26792,9 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson @@ -26589,13 +26882,13 @@ namespace simdjson { * auto error = parser.iterate_many(json).get(docs); * ``` */ - simdjson_really_inline document_stream() noexcept; + simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ - simdjson_really_inline document_stream(document_stream&& other) noexcept = default; + simdjson_inline document_stream(document_stream&& other) noexcept = default; /** Move one document_stream to another. */ - simdjson_really_inline document_stream& operator=(document_stream&& other) noexcept = default; + simdjson_inline document_stream& operator=(document_stream&& other) noexcept = default; - simdjson_really_inline ~document_stream() noexcept; + simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. @@ -26634,11 +26927,11 @@ namespace simdjson { /** * Default constructor. */ - simdjson_really_inline iterator() noexcept; + simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ - simdjson_really_inline simdjson_result operator*() noexcept; + simdjson_inline simdjson_result operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -26647,7 +26940,7 @@ namespace simdjson { * Check if we're at the end yet. * @param other the end iterator to compare to. */ - simdjson_really_inline bool operator!=(const iterator& other) const noexcept; + simdjson_inline bool operator!=(const iterator& other) const noexcept; /** * @private * @@ -26663,7 +26956,7 @@ namespace simdjson { * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ - simdjson_really_inline size_t current_index() const noexcept; + simdjson_inline size_t current_index() const noexcept; /** * @private @@ -26684,7 +26977,7 @@ namespace simdjson { * awkward and we would like to offer something friendlier. * */ - simdjson_really_inline std::string_view source() const noexcept; + simdjson_inline std::string_view source() const noexcept; /** * Returns error of the stream (if any). @@ -26692,7 +26985,7 @@ namespace simdjson { inline error_code error() const noexcept; private: - simdjson_really_inline iterator(document_stream* s, bool finished) noexcept; + simdjson_inline iterator(document_stream* s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ @@ -26706,11 +26999,11 @@ namespace simdjson { /** * Start iterating the documents in the stream. */ - simdjson_really_inline iterator begin() noexcept; + simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ - simdjson_really_inline iterator end() noexcept; + simdjson_inline iterator end() noexcept; private: @@ -26726,7 +27019,7 @@ namespace simdjson { * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ - simdjson_really_inline document_stream( + simdjson_inline document_stream( ondemand::parser& parser, const uint8_t* buf, size_t len, @@ -26830,9 +27123,9 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream&& value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream&& value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson @@ -26842,23 +27135,27 @@ namespace simdjson { namespace simdjson { /** * Create a string-view instance out of a document instance. The string-view instance - * contains JSON text that is suitable to be parsed as JSON again. + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. */ inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must - * not have been accessed previously. + * not have been accessed previously. It does not + * validate the content. */ inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance - * contains JSON text that is suitable to be parsed as JSON again. + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. */ inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance - * contains JSON text that is suitable to be parsed as JSON again. + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. */ inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept; inline simdjson_result to_json_string(simdjson_result x); @@ -26879,7 +27176,8 @@ namespace simdjson { namespace ondemand { /** - * Print JSON to an output stream. + * Print JSON to an output stream. It does not + * validate the content. * * @param out The output stream. * @param value The element. @@ -26890,7 +27188,8 @@ namespace simdjson { inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** - * Print JSON to an output stream. + * Print JSON to an output stream. It does not + * validate the content. * * @param out The output stream. * @param value The array. @@ -26901,7 +27200,8 @@ namespace simdjson { inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** - * Print JSON to an output stream. + * Print JSON to an output stream. It does not + * validate the content. * * @param out The output stream. * @param value The array. @@ -26916,7 +27216,8 @@ namespace simdjson { inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** - * Print JSON to an output stream. + * Print JSON to an output stream. It does not + * validate the content. * * @param out The output stream. * @param value The object. @@ -26942,7 +27243,7 @@ namespace simdjson { // template - simdjson_really_inline void implementation_simdjson_result_base::tie(T& value, error_code& error) && noexcept { + simdjson_inline void implementation_simdjson_result_base::tie(T& value, error_code& error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; @@ -26950,66 +27251,66 @@ namespace simdjson { } template - simdjson_warn_unused simdjson_really_inline error_code implementation_simdjson_result_base::get(T& value) && noexcept { + simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T& value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template - simdjson_really_inline error_code implementation_simdjson_result_base::error() const noexcept { + simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template - simdjson_really_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template - simdjson_really_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template - simdjson_really_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template - simdjson_really_inline implementation_simdjson_result_base::operator T && () && noexcept(false) { + simdjson_inline implementation_simdjson_result_base::operator T && () && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template - simdjson_really_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template - simdjson_really_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { return this->first; } template - simdjson_really_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template - simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T&& value, error_code error) noexcept + simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T&& value, error_code error) noexcept : first{ std::forward(value) }, second{ error } {} template - simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept : implementation_simdjson_result_base(T{}, error) {} template - simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T&& value) noexcept + simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T&& value) noexcept : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} } // namespace SIMDJSON_BUILTIN_IMPLEMENTATION @@ -27051,48 +27352,48 @@ namespace simdjson { - simdjson_really_inline number_type number::get_number_type() const noexcept { + simdjson_inline number_type number::get_number_type() const noexcept { return type; } - simdjson_really_inline bool number::is_uint64() const noexcept { + simdjson_inline bool number::is_uint64() const noexcept { return get_number_type() == number_type::unsigned_integer; } - simdjson_really_inline uint64_t number::get_uint64() const noexcept { + simdjson_inline uint64_t number::get_uint64() const noexcept { return payload.unsigned_integer; } - simdjson_really_inline number::operator uint64_t() const noexcept { + simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } - simdjson_really_inline bool number::is_int64() const noexcept { + simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } - simdjson_really_inline int64_t number::get_int64() const noexcept { + simdjson_inline int64_t number::get_int64() const noexcept { return payload.signed_integer; } - simdjson_really_inline number::operator int64_t() const noexcept { + simdjson_inline number::operator int64_t() const noexcept { return get_int64(); } - simdjson_really_inline bool number::is_double() const noexcept { + simdjson_inline bool number::is_double() const noexcept { return get_number_type() == number_type::floating_point_number; } - simdjson_really_inline double number::get_double() const noexcept { + simdjson_inline double number::get_double() const noexcept { return payload.floating_point_number; } - simdjson_really_inline number::operator double() const noexcept { + simdjson_inline number::operator double() const noexcept { return get_double(); } - simdjson_really_inline double number::as_double() const noexcept { + simdjson_inline double number::as_double() const noexcept { if (is_double()) { return payload.floating_point_number; } @@ -27102,22 +27403,22 @@ namespace simdjson { return double(payload.unsigned_integer); } - simdjson_really_inline void number::append_s64(int64_t value) noexcept { + simdjson_inline void number::append_s64(int64_t value) noexcept { payload.signed_integer = value; type = number_type::signed_integer; } - simdjson_really_inline void number::append_u64(uint64_t value) noexcept { + simdjson_inline void number::append_u64(uint64_t value) noexcept { payload.unsigned_integer = value; type = number_type::unsigned_integer; } - simdjson_really_inline void number::append_double(double value) noexcept { + simdjson_inline void number::append_double(double value) noexcept { payload.floating_point_number = value; type = number_type::floating_point_number; } - simdjson_really_inline void number::skip_double() noexcept { + simdjson_inline void number::skip_double() noexcept { type = number_type::floating_point_number; } @@ -27127,9 +27428,9 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type&& value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -27274,9 +27575,16 @@ namespace simdjson { { // Print the current structural. printf("| "); - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } + else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } } printf(" "); } @@ -27290,8 +27598,8 @@ namespace simdjson { printf(" "); } // printf("| %5u ", *(index+1)); - printf("| %5u ", depth); - printf("| %.*s ", int(detail.size()), detail.data()); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); printf("|\n"); fflush(stdout); } @@ -27308,12 +27616,12 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline raw_json_string::raw_json_string(const uint8_t* _buf) noexcept : buf{ _buf } {} + simdjson_inline raw_json_string::raw_json_string(const uint8_t* _buf) noexcept : buf{ _buf } {} - simdjson_really_inline const char* raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + simdjson_inline const char* raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{ 0 }; // if the content has no escape character, just scan through it quickly! for (; pos < target.size() && target[pos] != '\\'; pos++) {} @@ -27333,7 +27641,7 @@ namespace simdjson { return true; } - simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{ 0 }; // if the content has no escape character, just scan through it quickly! for (; target[pos] && target[pos] != '\\'; pos++) {} @@ -27354,12 +27662,12 @@ namespace simdjson { } - simdjson_really_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { // If we are going to call memcmp, then we must know something about the length of the raw_json_string. return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } - simdjson_really_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. if (target.size() <= SIMDJSON_PADDING) { @@ -27374,7 +27682,7 @@ namespace simdjson { return true; } - simdjson_really_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { const char* r{ raw() }; size_t pos{ 0 }; bool escaping{ false }; @@ -27400,7 +27708,7 @@ namespace simdjson { } - simdjson_really_inline bool raw_json_string::unsafe_is_equal(const char* target) const noexcept { + simdjson_inline bool raw_json_string::unsafe_is_equal(const char* target) const noexcept { // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and // the raw content is quote terminated within a valid JSON string. const char* r{ raw() }; @@ -27412,7 +27720,7 @@ namespace simdjson { return true; } - simdjson_really_inline bool raw_json_string::is_equal(const char* target) const noexcept { + simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. const char* r{ raw() }; @@ -27439,29 +27747,32 @@ namespace simdjson { return true; } - simdjson_unused simdjson_really_inline bool operator==(const raw_json_string& a, std::string_view c) noexcept { + simdjson_unused simdjson_inline bool operator==(const raw_json_string& a, std::string_view c) noexcept { return a.unsafe_is_equal(c); } - simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string& a) noexcept { + simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string& a) noexcept { return a == c; } - simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string& a, std::string_view c) noexcept { + simdjson_unused simdjson_inline bool operator!=(const raw_json_string& a, std::string_view c) noexcept { return !(a == c); } - simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string& a) noexcept { + simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string& a) noexcept { return !(a == c); } - simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator& iter) const noexcept { - return iter.unescape(*this); + simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator& iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); } + simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator& iter) const noexcept { + return iter.unescape_wobbly(*this); + } - simdjson_unused simdjson_really_inline std::ostream& operator<<(std::ostream& out, const raw_json_string& str) noexcept { + simdjson_unused simdjson_inline std::ostream& operator<<(std::ostream& out, const raw_json_string& str) noexcept { bool in_escape = false; const char* s = str.raw(); while (true) { @@ -27482,20 +27793,23 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string&& value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} - simdjson_really_inline simdjson_result simdjson_result::raw() const noexcept { + simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } - simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator& iter) const noexcept { + simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator& iter, bool allow_replacement) const noexcept { if (error()) { return error(); } - return first.unescape(iter); + return first.unescape(iter, allow_replacement); + } + simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator& iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); } - } // namespace simdjson /* end file include/simdjson/generic/ondemand/raw_json_string-inl.h */ /* begin file include/simdjson/generic/ondemand/token_iterator-inl.h */ @@ -27503,65 +27817,65 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline token_iterator::token_iterator( + simdjson_inline token_iterator::token_iterator( const uint8_t* _buf, token_position position ) noexcept : buf{ _buf }, _position{ position } { } - simdjson_really_inline uint32_t token_iterator::current_offset() const noexcept { + simdjson_inline uint32_t token_iterator::current_offset() const noexcept { return *(_position); } - simdjson_really_inline const uint8_t* token_iterator::return_current_and_advance() noexcept { + simdjson_inline const uint8_t* token_iterator::return_current_and_advance() noexcept { return &buf[*(_position++)]; } - simdjson_really_inline const uint8_t* token_iterator::peek(token_position position) const noexcept { + simdjson_inline const uint8_t* token_iterator::peek(token_position position) const noexcept { return &buf[*position]; } - simdjson_really_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { return *position; } - simdjson_really_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { return *(position + 1) - *position; } - simdjson_really_inline const uint8_t* token_iterator::peek(int32_t delta) const noexcept { + simdjson_inline const uint8_t* token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position + delta)]; } - simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { return *(_position + delta); } - simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { return *(_position + delta + 1) - *(_position + delta); } - simdjson_really_inline token_position token_iterator::position() const noexcept { + simdjson_inline token_position token_iterator::position() const noexcept { return _position; } - simdjson_really_inline void token_iterator::set_position(token_position target_position) noexcept { + simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { _position = target_position; } - simdjson_really_inline bool token_iterator::operator==(const token_iterator& other) const noexcept { + simdjson_inline bool token_iterator::operator==(const token_iterator& other) const noexcept { return _position == other._position; } - simdjson_really_inline bool token_iterator::operator!=(const token_iterator& other) const noexcept { + simdjson_inline bool token_iterator::operator!=(const token_iterator& other) const noexcept { return _position != other._position; } - simdjson_really_inline bool token_iterator::operator>(const token_iterator& other) const noexcept { + simdjson_inline bool token_iterator::operator>(const token_iterator& other) const noexcept { return _position > other._position; } - simdjson_really_inline bool token_iterator::operator>=(const token_iterator& other) const noexcept { + simdjson_inline bool token_iterator::operator>=(const token_iterator& other) const noexcept { return _position >= other._position; } - simdjson_really_inline bool token_iterator::operator<(const token_iterator& other) const noexcept { + simdjson_inline bool token_iterator::operator<(const token_iterator& other) const noexcept { return _position < other._position; } - simdjson_really_inline bool token_iterator::operator<=(const token_iterator& other) const noexcept { + simdjson_inline bool token_iterator::operator<=(const token_iterator& other) const noexcept { return _position <= other._position; } @@ -27571,9 +27885,9 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator&& value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -27583,7 +27897,7 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline json_iterator::json_iterator(json_iterator&& other) noexcept + simdjson_inline json_iterator::json_iterator(json_iterator&& other) noexcept : token(std::forward(other.token)), parser{ other.parser }, _string_buf_loc{ other._string_buf_loc }, @@ -27594,7 +27908,7 @@ namespace simdjson { { other.parser = nullptr; } - simdjson_really_inline json_iterator& json_iterator::operator=(json_iterator&& other) noexcept { + simdjson_inline json_iterator& json_iterator::operator=(json_iterator&& other) noexcept { token = other.token; parser = other.parser; _string_buf_loc = other._string_buf_loc; @@ -27606,7 +27920,7 @@ namespace simdjson { return *this; } - simdjson_really_inline json_iterator::json_iterator(const uint8_t* buf, ondemand::parser* _parser) noexcept + simdjson_inline json_iterator::json_iterator(const uint8_t* buf, ondemand::parser* _parser) noexcept : token(buf, &_parser->implementation->structural_indexes[0]), parser{ _parser }, _string_buf_loc{ parser->string_buf.get() }, @@ -27654,7 +27968,7 @@ namespace simdjson { // skip_child() function is not marked inline). SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING - simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { if (depth() <= parent_depth) { return SUCCESS; } switch (*return_current_and_advance()) { // TODO consider whether matching braces is a requirement: if non-matching braces indicates @@ -27731,23 +28045,27 @@ namespace simdjson { SIMDJSON_POP_DISABLE_WARNINGS - simdjson_really_inline bool json_iterator::at_root() const noexcept { + simdjson_inline bool json_iterator::at_root() const noexcept { return position() == root_position(); } - simdjson_really_inline bool json_iterator::streaming() const noexcept { + simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; + } + + simdjson_inline bool json_iterator::streaming() const noexcept { return _streaming; } - simdjson_really_inline token_position json_iterator::root_position() const noexcept { + simdjson_inline token_position json_iterator::root_position() const noexcept { return _root; } - simdjson_really_inline void json_iterator::assert_at_document_depth() const noexcept { + simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { SIMDJSON_ASSUME(_depth == 1); } - simdjson_really_inline void json_iterator::assert_at_root() const noexcept { + simdjson_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME(_depth == 1); #ifndef SIMDJSON_CLANG_VISUAL_STUDIO // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument @@ -27756,21 +28074,21 @@ namespace simdjson { #endif } - simdjson_really_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { assert_valid_position(token._position + required_tokens - 1); } - simdjson_really_inline void json_iterator::assert_valid_position(token_position position) const noexcept { + simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME(position >= &parser->implementation->structural_indexes[0]); SIMDJSON_ASSUME(position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]); #endif } - simdjson_really_inline bool json_iterator::at_end() const noexcept { + simdjson_inline bool json_iterator::at_end() const noexcept { return position() == end_position(); } - simdjson_really_inline token_position json_iterator::end_position() const noexcept { + simdjson_inline token_position json_iterator::end_position() const noexcept { uint32_t n_structural_indexes{ parser->implementation->n_structural_indexes }; return &parser->implementation->structural_indexes[n_structural_indexes]; } @@ -27800,42 +28118,42 @@ namespace simdjson { return reinterpret_cast(token.peek()); } - simdjson_really_inline bool json_iterator::is_alive() const noexcept { + simdjson_inline bool json_iterator::is_alive() const noexcept { return parser; } - simdjson_really_inline void json_iterator::abandon() noexcept { + simdjson_inline void json_iterator::abandon() noexcept { parser = nullptr; _depth = 0; } - simdjson_really_inline const uint8_t* json_iterator::return_current_and_advance() noexcept { + simdjson_inline const uint8_t* json_iterator::return_current_and_advance() noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif // SIMDJSON_CHECK_EOF return token.return_current_and_advance(); } - simdjson_really_inline const uint8_t* json_iterator::unsafe_pointer() const noexcept { + simdjson_inline const uint8_t* json_iterator::unsafe_pointer() const noexcept { // deliberately done without safety guard: return token.peek(0); } - simdjson_really_inline const uint8_t* json_iterator::peek(int32_t delta) const noexcept { + simdjson_inline const uint8_t* json_iterator::peek(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta + 1); #endif // SIMDJSON_CHECK_EOF return token.peek(delta); } - simdjson_really_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { + simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta + 1); #endif // #if SIMDJSON_CHECK_EOF return token.peek_length(delta); } - simdjson_really_inline const uint8_t* json_iterator::peek(token_position position) const noexcept { + simdjson_inline const uint8_t* json_iterator::peek(token_position position) const noexcept { // todo: currently we require end-of-string buffering, but the following // assert_valid_position should be turned on if/when we lift that condition. // assert_valid_position(position); @@ -27844,14 +28162,14 @@ namespace simdjson { return token.peek(position); } - simdjson_really_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { + simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { #if SIMDJSON_CHECK_EOF assert_valid_position(position); #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } - simdjson_really_inline token_position json_iterator::last_position() const noexcept { + simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); // since it has side-effects. @@ -27859,50 +28177,55 @@ namespace simdjson { SIMDJSON_ASSUME(n_structural_indexes > 0); return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } - simdjson_really_inline const uint8_t* json_iterator::peek_last() const noexcept { + simdjson_inline const uint8_t* json_iterator::peek_last() const noexcept { return token.peek(last_position()); } - simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); _depth = parent_depth; } - simdjson_really_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); _depth = child_depth; } - simdjson_really_inline depth_t json_iterator::depth() const noexcept { + simdjson_inline depth_t json_iterator::depth() const noexcept { return _depth; } - simdjson_really_inline uint8_t*& json_iterator::string_buf_loc() noexcept { + simdjson_inline uint8_t*& json_iterator::string_buf_loc() noexcept { return _string_buf_loc; } - simdjson_really_inline error_code json_iterator::report_error(error_code _error, const char* message) noexcept { + simdjson_inline error_code json_iterator::report_error(error_code _error, const char* message) noexcept { SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); logger::log_error(*this, message); error = _error; return error; } - simdjson_really_inline token_position json_iterator::position() const noexcept { + simdjson_inline token_position json_iterator::position() const noexcept { return token.position(); } - simdjson_really_inline simdjson_result json_iterator::unescape(raw_json_string in) noexcept { - return parser->unescape(in, _string_buf_loc); + simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); } - simdjson_really_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); + } + + simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS #ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); #endif #endif @@ -27910,27 +28233,29 @@ namespace simdjson { _depth = child_depth; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS - simdjson_really_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - return parser->start_positions[depth]; + simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } - simdjson_really_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - parser->start_positions[depth] = position; + simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if (size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } #endif - simdjson_really_inline error_code json_iterator::optional_error(error_code _error, const char* message) noexcept { + simdjson_inline error_code json_iterator::optional_error(error_code _error, const char* message) noexcept { SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); logger::log_error(*this, message); return _error; } template - simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t* json, uint32_t max_len, uint8_t(&tmpbuf)[N]) noexcept { + simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t* json, uint32_t max_len, uint8_t(&tmpbuf)[N]) noexcept { // Let us guard against silly cases: if ((N < max_len) || (N == 0)) { return false; } // Truncate whitespace to fit the buffer. @@ -27951,9 +28276,9 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator&& value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -27963,7 +28288,7 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline value_iterator::value_iterator( + simdjson_inline value_iterator::value_iterator( json_iterator* json_iter, depth_t depth, token_position start_position @@ -27971,19 +28296,19 @@ namespace simdjson { { } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_object() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { SIMDJSON_TRY(start_container('{', "Not an object", "object")); return started_object(); } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_object() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { SIMDJSON_TRY(start_container('{', "Not an object", "object")); return started_root_object(); } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_object() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { assert_at_container_start(); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif if (*_json_iter->peek() == '}') { @@ -27995,7 +28320,7 @@ namespace simdjson { return true; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_object() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most @@ -28020,7 +28345,7 @@ namespace simdjson { return started_object(); } - simdjson_warn_unused simdjson_really_inline error_code value_iterator::end_container() noexcept { + simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { #if SIMDJSON_CHECK_EOF if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } @@ -28029,7 +28354,7 @@ namespace simdjson { return SUCCESS; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_field() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); // It's illegal to call this unless there are more tokens: anything that ends in } or ] is @@ -28046,7 +28371,7 @@ namespace simdjson { } } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { error_code error; bool has_value; // @@ -28073,7 +28398,7 @@ namespace simdjson { // } else if (!is_open()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. @@ -28099,7 +28424,7 @@ namespace simdjson { else { if ((error = skip_child())) { abandon(); return error; } if ((error = has_next_field().get(has_value))) { abandon(); return error; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } @@ -28145,7 +28470,7 @@ namespace simdjson { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { /** * When find_field_unordered_raw is called, we can either be pointing at the * first key, pointing outside (at the closing brace) or if a key was matched @@ -28187,7 +28512,7 @@ namespace simdjson { } else if (!is_open()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. @@ -28217,7 +28542,7 @@ namespace simdjson { if ((error = skip_child())) { abandon(); return error; } search_start = _json_iter->position(); if ((error = has_next_field().get(has_value))) { abandon(); return error; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } @@ -28336,7 +28661,7 @@ namespace simdjson { } SIMDJSON_POP_DISABLE_WARNINGS - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::field_key() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { assert_at_next(); const uint8_t* key = _json_iter->return_current_and_advance(); @@ -28344,7 +28669,7 @@ namespace simdjson { return raw_json_string(key); } - simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_value() noexcept { + simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { assert_at_next(); if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } @@ -28352,12 +28677,12 @@ namespace simdjson { return SUCCESS; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_array() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { SIMDJSON_TRY(start_container('[', "Not an array", "array")); return started_array(); } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_array() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { SIMDJSON_TRY(start_container('[', "Not an array", "array")); return started_root_array(); } @@ -28369,7 +28694,7 @@ namespace simdjson { return answer; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_array() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); @@ -28378,13 +28703,13 @@ namespace simdjson { return false; } _json_iter->descend_to(depth() + 1); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif return true; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_array() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most @@ -28409,7 +28734,7 @@ namespace simdjson { return started_array(); } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_element() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); logger::log_event(*this, "has_next_element"); @@ -28426,96 +28751,108 @@ namespace simdjson { } } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_bool(const uint8_t* json) const noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t* json) const noexcept { auto not_true = atomparsing::str4ncmp(json, "true"); auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); if (error) { return incorrect_type_error("Not a boolean"); } return simdjson_result(!not_true); } - simdjson_really_inline bool value_iterator::parse_null(const uint8_t* json) const noexcept { - return !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t* json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if (!is_null_string && json[0] == 'n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_string() noexcept { - return get_raw_json_string().unescape(json_iter()); + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); + } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } advance_scalar("string"); return raw_json_string(json + 1); } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); if (result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); if (result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); if (result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); if (result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { auto result = numberparsing::parse_double(peek_non_root_scalar("double")); if (result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double_in_string() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); if (result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_bool() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { auto result = parse_bool(peek_non_root_scalar("bool")); if (result.error() == SUCCESS) { advance_non_root_scalar("bool"); } return result; } - simdjson_really_inline bool value_iterator::is_null() noexcept { - auto result = parse_null(peek_non_root_scalar("null")); - if (result) { advance_non_root_scalar("null"); } - return result; + simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if (is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; } - simdjson_really_inline bool value_iterator::is_negative() noexcept { + simdjson_inline bool value_iterator::is_negative() noexcept { return numberparsing::is_negative(peek_non_root_scalar("numbersign")); } - simdjson_really_inline bool value_iterator::is_root_negative() noexcept { + simdjson_inline bool value_iterator::is_root_negative() noexcept { return numberparsing::is_negative(peek_root_scalar("numbersign")); } - simdjson_really_inline simdjson_result value_iterator::is_integer() noexcept { + simdjson_inline simdjson_result value_iterator::is_integer() noexcept { return numberparsing::is_integer(peek_non_root_scalar("integer")); } - simdjson_really_inline simdjson_result value_iterator::get_number_type() noexcept { + simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { return numberparsing::get_number_type(peek_non_root_scalar("integer")); } - simdjson_really_inline simdjson_result value_iterator::get_number() noexcept { + simdjson_inline simdjson_result value_iterator::get_number() noexcept { number num; error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); if (error) { return error; } return num; } - simdjson_really_inline simdjson_result value_iterator::is_root_integer() noexcept { + simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20 + 1]; // <20 digits> is the longest possible unsigned integer if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return false; // if there are more than 20 characters, it cannot be represented as an integer. } - return numberparsing::is_integer(tmpbuf); + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if (check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; } - simdjson_really_inline simdjson_result value_iterator::get_root_number_type() noexcept { + simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, @@ -28526,9 +28863,11 @@ namespace simdjson { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } - return numberparsing::get_number_type(tmpbuf); + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; } - simdjson_really_inline simdjson_result value_iterator::get_root_number() noexcept { + simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, @@ -28542,17 +28881,24 @@ namespace simdjson { number num; error_code error = numberparsing::parse_number(tmpbuf, num); if (error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("number"); return num; } - - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_string() noexcept { - return get_string(); + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_raw_json_string() noexcept { - return get_raw_json_string(); + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json + 1); + } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20 + 1]; // <20 digits> is the longest possible unsigned integer @@ -28561,10 +28907,13 @@ namespace simdjson { return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned(tmpbuf); - if (result.error() == SUCCESS) { advance_root_scalar("uint64"); } + if (result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64_in_string() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20 + 1]; // <20 digits> is the longest possible unsigned integer @@ -28573,10 +28922,13 @@ namespace simdjson { return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if (result.error() == SUCCESS) { advance_root_scalar("uint64"); } + if (result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20 + 1]; // -<19 digits> is the longest possible integer @@ -28586,10 +28938,13 @@ namespace simdjson { } auto result = numberparsing::parse_integer(tmpbuf); - if (result.error() == SUCCESS) { advance_root_scalar("int64"); } + if (result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64_in_string() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20 + 1]; // -<19 digits> is the longest possible integer @@ -28599,10 +28954,13 @@ namespace simdjson { } auto result = numberparsing::parse_integer_in_string(tmpbuf); - if (result.error() == SUCCESS) { advance_root_scalar("int64"); } + if (result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, @@ -28614,11 +28972,14 @@ namespace simdjson { return NUMBER_ERROR; } auto result = numberparsing::parse_double(tmpbuf); - if (result.error() == SUCCESS) { advance_root_scalar("double"); } + if (result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double_in_string() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, @@ -28630,35 +28991,44 @@ namespace simdjson { return NUMBER_ERROR; } auto result = numberparsing::parse_double_in_string(tmpbuf); - if (result.error() == SUCCESS) { advance_root_scalar("double"); } + if (result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } return result; } - simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_bool() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5 + 1]; if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } auto result = parse_bool(tmpbuf); - if (result.error() == SUCCESS) { advance_root_scalar("bool"); } + if (result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } return result; } - simdjson_really_inline bool value_iterator::is_root_null() noexcept { + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5]))); - if (result) { advance_root_scalar("null"); } + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if (result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } return result; } - simdjson_warn_unused simdjson_really_inline error_code value_iterator::skip_child() noexcept { + simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { SIMDJSON_ASSUME(_json_iter->token._position > _start_position); SIMDJSON_ASSUME(_json_iter->_depth >= _depth); return _json_iter->skip_child(depth()); } - simdjson_really_inline value_iterator value_iterator::child() const noexcept { + simdjson_inline value_iterator value_iterator::child() const noexcept { assert_at_child(); return { _json_iter, depth() + 1, _json_iter->token.position() }; } @@ -28668,52 +29038,52 @@ namespace simdjson { // marked non-inline. SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING - simdjson_really_inline bool value_iterator::is_open() const noexcept { + simdjson_inline bool value_iterator::is_open() const noexcept { return _json_iter->depth() >= depth(); } SIMDJSON_POP_DISABLE_WARNINGS - simdjson_really_inline bool value_iterator::at_end() const noexcept { + simdjson_inline bool value_iterator::at_end() const noexcept { return _json_iter->at_end(); } - simdjson_really_inline bool value_iterator::at_start() const noexcept { + simdjson_inline bool value_iterator::at_start() const noexcept { return _json_iter->token.position() == start_position(); } - simdjson_really_inline bool value_iterator::at_first_field() const noexcept { + simdjson_inline bool value_iterator::at_first_field() const noexcept { SIMDJSON_ASSUME(_json_iter->token._position > _start_position); return _json_iter->token.position() == start_position() + 1; } - simdjson_really_inline void value_iterator::abandon() noexcept { + simdjson_inline void value_iterator::abandon() noexcept { _json_iter->abandon(); } - simdjson_warn_unused simdjson_really_inline depth_t value_iterator::depth() const noexcept { + simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { return _depth; } - simdjson_warn_unused simdjson_really_inline error_code value_iterator::error() const noexcept { + simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { return _json_iter->error; } - simdjson_warn_unused simdjson_really_inline uint8_t*& value_iterator::string_buf_loc() noexcept { + simdjson_warn_unused simdjson_inline uint8_t*& value_iterator::string_buf_loc() noexcept { return _json_iter->string_buf_loc(); } - simdjson_warn_unused simdjson_really_inline const json_iterator& value_iterator::json_iter() const noexcept { + simdjson_warn_unused simdjson_inline const json_iterator& value_iterator::json_iter() const noexcept { return *_json_iter; } - simdjson_warn_unused simdjson_really_inline json_iterator& value_iterator::json_iter() noexcept { + simdjson_warn_unused simdjson_inline json_iterator& value_iterator::json_iter() noexcept { return *_json_iter; } - simdjson_really_inline const uint8_t* value_iterator::peek_start() const noexcept { + simdjson_inline const uint8_t* value_iterator::peek_start() const noexcept { return _json_iter->peek(start_position()); } - simdjson_really_inline uint32_t value_iterator::peek_start_length() const noexcept { + simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } - simdjson_really_inline const uint8_t* value_iterator::peek_scalar(const char* type) noexcept { + simdjson_inline const uint8_t* value_iterator::peek_scalar(const char* type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return peek_start(); } @@ -28723,7 +29093,7 @@ namespace simdjson { return _json_iter->peek(); } - simdjson_really_inline void value_iterator::advance_scalar(const char* type) noexcept { + simdjson_inline void value_iterator::advance_scalar(const char* type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return; } @@ -28734,12 +29104,12 @@ namespace simdjson { _json_iter->ascend_to(depth() - 1); } - simdjson_really_inline error_code value_iterator::start_container(uint8_t start_char, const char* incorrect_type_message, const char* type) noexcept { + simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char* incorrect_type_message, const char* type) noexcept { logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. const uint8_t* json; if (!is_at_start()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif json = peek_start(); @@ -28762,14 +29132,14 @@ namespace simdjson { } - simdjson_really_inline const uint8_t* value_iterator::peek_root_scalar(const char* type) noexcept { + simdjson_inline const uint8_t* value_iterator::peek_root_scalar(const char* type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_root(); return _json_iter->peek(); } - simdjson_really_inline const uint8_t* value_iterator::peek_non_root_scalar(const char* type) noexcept { + simdjson_inline const uint8_t* value_iterator::peek_non_root_scalar(const char* type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } @@ -28777,7 +29147,7 @@ namespace simdjson { return _json_iter->peek(); } - simdjson_really_inline void value_iterator::advance_root_scalar(const char* type) noexcept { + simdjson_inline void value_iterator::advance_root_scalar(const char* type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } @@ -28785,7 +29155,7 @@ namespace simdjson { _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth() - 1); } - simdjson_really_inline void value_iterator::advance_non_root_scalar(const char* type) noexcept { + simdjson_inline void value_iterator::advance_non_root_scalar(const char* type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } @@ -28794,23 +29164,23 @@ namespace simdjson { _json_iter->ascend_to(depth() - 1); } - simdjson_really_inline error_code value_iterator::incorrect_type_error(const char* message) const noexcept { + simdjson_inline error_code value_iterator::incorrect_type_error(const char* message) const noexcept { logger::log_error(*_json_iter, start_position(), depth(), message); return INCORRECT_TYPE; } - simdjson_really_inline bool value_iterator::is_at_start() const noexcept { + simdjson_inline bool value_iterator::is_at_start() const noexcept { return position() == start_position(); } - simdjson_really_inline bool value_iterator::is_at_key() const noexcept { + simdjson_inline bool value_iterator::is_at_key() const noexcept { // Keys are at the same depth as the object. // Note here that we could be safer and check that we are within an object, // but we do not. return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } - simdjson_really_inline bool value_iterator::is_at_iterator_start() const noexcept { + simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). auto delta = position() - start_position(); return delta == 1 || delta == 2; @@ -28834,22 +29204,22 @@ namespace simdjson { SIMDJSON_ASSUME(_depth > 0); } - simdjson_really_inline void value_iterator::move_at_start() noexcept { + simdjson_inline void value_iterator::move_at_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position); } - simdjson_really_inline void value_iterator::move_at_container_start() noexcept { + simdjson_inline void value_iterator::move_at_container_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position + 1); } - simdjson_really_inline simdjson_result value_iterator::reset_array() noexcept { + simdjson_inline simdjson_result value_iterator::reset_array() noexcept { move_at_container_start(); return started_array(); } - simdjson_really_inline simdjson_result value_iterator::reset_object() noexcept { + simdjson_inline simdjson_result value_iterator::reset_object() noexcept { move_at_container_start(); return started_object(); } @@ -28874,11 +29244,11 @@ namespace simdjson { SIMDJSON_ASSUME(_json_iter != nullptr); } - simdjson_really_inline bool value_iterator::is_valid() const noexcept { + simdjson_inline bool value_iterator::is_valid() const noexcept { return _json_iter != nullptr; } - simdjson_really_inline simdjson_result value_iterator::type() const noexcept { + simdjson_inline simdjson_result value_iterator::type() const noexcept { switch (*peek_start()) { case '{': return json_type::object; @@ -28899,23 +29269,23 @@ namespace simdjson { } } - simdjson_really_inline token_position value_iterator::start_position() const noexcept { + simdjson_inline token_position value_iterator::start_position() const noexcept { return _start_position; } - simdjson_really_inline token_position value_iterator::position() const noexcept { + simdjson_inline token_position value_iterator::position() const noexcept { return _json_iter->position(); } - simdjson_really_inline token_position value_iterator::end_position() const noexcept { + simdjson_inline token_position value_iterator::end_position() const noexcept { return _json_iter->end_position(); } - simdjson_really_inline token_position value_iterator::last_position() const noexcept { + simdjson_inline token_position value_iterator::last_position() const noexcept { return _json_iter->last_position(); } - simdjson_really_inline error_code value_iterator::report_error(error_code error, const char* message) noexcept { + simdjson_inline error_code value_iterator::report_error(error_code error, const char* message) noexcept { return _json_iter->report_error(error, message); } @@ -28925,9 +29295,9 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator&& value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -28937,21 +29307,21 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline array_iterator::array_iterator(const value_iterator& _iter) noexcept + simdjson_inline array_iterator::array_iterator(const value_iterator& _iter) noexcept : iter{ _iter } {} - simdjson_really_inline simdjson_result array_iterator::operator*() noexcept { + simdjson_inline simdjson_result array_iterator::operator*() noexcept { if (iter.error()) { iter.abandon(); return iter.error(); } return value(iter.child()); } - simdjson_really_inline bool array_iterator::operator==(const array_iterator& other) const noexcept { + simdjson_inline bool array_iterator::operator==(const array_iterator& other) const noexcept { return !(*this != other); } - simdjson_really_inline bool array_iterator::operator!=(const array_iterator&) const noexcept { + simdjson_inline bool array_iterator::operator!=(const array_iterator&) const noexcept { return iter.is_open(); } - simdjson_really_inline array_iterator& array_iterator::operator++() noexcept { + simdjson_inline array_iterator& array_iterator::operator++() noexcept { error_code error; // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. @@ -28967,31 +29337,31 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator&& value ) noexcept : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base({}, error) { } - simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { + simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } - simdjson_really_inline bool simdjson_result::operator==(const simdjson_result& other) const noexcept { + simdjson_inline bool simdjson_result::operator==(const simdjson_result& other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } - simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result& other) const noexcept { + simdjson_inline bool simdjson_result::operator!=(const simdjson_result& other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } - simdjson_really_inline simdjson_result& simdjson_result::operator++() noexcept { + simdjson_inline simdjson_result& simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++(first); @@ -29009,11 +29379,11 @@ namespace simdjson { // object_iterator // - simdjson_really_inline object_iterator::object_iterator(const value_iterator& _iter) noexcept + simdjson_inline object_iterator::object_iterator(const value_iterator& _iter) noexcept : iter{ _iter } {} - simdjson_really_inline simdjson_result object_iterator::operator*() noexcept { + simdjson_inline simdjson_result object_iterator::operator*() noexcept { error_code error = iter.error(); if (error) { iter.abandon(); return error; } auto result = field::start(iter); @@ -29022,16 +29392,16 @@ namespace simdjson { if (result.error()) { iter.abandon(); } return result; } - simdjson_really_inline bool object_iterator::operator==(const object_iterator& other) const noexcept { + simdjson_inline bool object_iterator::operator==(const object_iterator& other) const noexcept { return !(*this != other); } - simdjson_really_inline bool object_iterator::operator!=(const object_iterator&) const noexcept { + simdjson_inline bool object_iterator::operator!=(const object_iterator&) const noexcept { return iter.is_open(); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING - simdjson_really_inline object_iterator& object_iterator::operator++() noexcept { + simdjson_inline object_iterator& object_iterator::operator++() noexcept { // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error @@ -29092,34 +29462,34 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator&& value ) noexcept : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base({}, error) { } - simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { + simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. - simdjson_really_inline bool simdjson_result::operator==(const simdjson_result& other) const noexcept { + simdjson_inline bool simdjson_result::operator==(const simdjson_result& other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. - simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result& other) const noexcept { + simdjson_inline bool simdjson_result::operator!=(const simdjson_result& other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' - simdjson_really_inline simdjson_result& simdjson_result::operator++() noexcept { + simdjson_inline simdjson_result& simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; @@ -29171,45 +29541,45 @@ namespace simdjson { // error == SUCCESS. // - simdjson_really_inline array::array(const value_iterator& _iter) noexcept + simdjson_inline array::array(const value_iterator& _iter) noexcept : iter{ _iter } { } - simdjson_really_inline simdjson_result array::start(value_iterator& iter) noexcept { + simdjson_inline simdjson_result array::start(value_iterator& iter) noexcept { // We don't need to know if the array is empty to start iteration, but we do want to know if there // is an error--thus `simdjson_unused`. simdjson_unused bool has_value; SIMDJSON_TRY(iter.start_array().get(has_value)); return array(iter); } - simdjson_really_inline simdjson_result array::start_root(value_iterator& iter) noexcept { + simdjson_inline simdjson_result array::start_root(value_iterator& iter) noexcept { simdjson_unused bool has_value; SIMDJSON_TRY(iter.start_root_array().get(has_value)); return array(iter); } - simdjson_really_inline simdjson_result array::started(value_iterator& iter) noexcept { + simdjson_inline simdjson_result array::started(value_iterator& iter) noexcept { bool has_value; SIMDJSON_TRY(iter.started_array().get(has_value)); return array(iter); } - simdjson_really_inline simdjson_result array::begin() noexcept { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return array_iterator(iter); } - simdjson_really_inline simdjson_result array::end() noexcept { + simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } - simdjson_really_inline error_code array::consume() noexcept { + simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth() - 1); if (error) { iter.abandon(); } return error; } - simdjson_really_inline simdjson_result array::raw_json() noexcept { + simdjson_inline simdjson_result array::raw_json() noexcept { const uint8_t* starting_point{ iter.peek_start() }; auto error = consume(); if (error) { return error; } @@ -29222,7 +29592,7 @@ namespace simdjson { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING - simdjson_really_inline simdjson_result array::count_elements() & noexcept { + simdjson_inline simdjson_result array::count_elements() & noexcept { size_t count{ 0 }; // Important: we do not consume any of the values. for (simdjson_unused auto v : *this) { count++; } @@ -29235,7 +29605,7 @@ namespace simdjson { } SIMDJSON_POP_DISABLE_WARNINGS - simdjson_really_inline simdjson_result array::is_empty() & noexcept { + simdjson_inline simdjson_result array::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_array().get(is_not_empty); if (error) { return error; } @@ -29282,7 +29652,7 @@ namespace simdjson { return child; } - simdjson_really_inline simdjson_result array::at(size_t index) noexcept { + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { if (i == index) { return value; } @@ -29297,42 +29667,42 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array&& value ) noexcept : implementation_simdjson_result_base( std::forward(value) - ) + ) { } - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } - simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { + simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } - simdjson_really_inline simdjson_result simdjson_result::end() noexcept { + simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } - simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } - simdjson_really_inline simdjson_result simdjson_result::is_empty() & noexcept { + simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { if (error()) { return error(); } return first.is_empty(); } - simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { + simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } - simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -29343,13 +29713,13 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline document::document(ondemand::json_iterator&& _iter) noexcept + simdjson_inline document::document(ondemand::json_iterator&& _iter) noexcept : iter{ std::forward(_iter) } { logger::log_start_value(iter, "document"); } - simdjson_really_inline document document::start(json_iterator&& iter) noexcept { + simdjson_inline document document::start(json_iterator&& iter) noexcept { return document(std::forward(iter)); } @@ -29372,13 +29742,13 @@ namespace simdjson { inline bool document::is_alive() noexcept { return iter.is_alive(); } - simdjson_really_inline value_iterator document::resume_value_iterator() noexcept { + simdjson_inline value_iterator document::resume_value_iterator() noexcept { return value_iterator(&iter, 1, iter.root_position()); } - simdjson_really_inline value_iterator document::get_root_value_iterator() noexcept { + simdjson_inline value_iterator document::get_root_value_iterator() noexcept { return resume_value_iterator(); } - simdjson_really_inline simdjson_result document::start_or_resume_object() noexcept { + simdjson_inline simdjson_result document::start_or_resume_object() noexcept { if (iter.at_root()) { return get_object(); } @@ -29386,7 +29756,7 @@ namespace simdjson { return object::resume(resume_value_iterator()); } } - simdjson_really_inline simdjson_result document::get_value() noexcept { + simdjson_inline simdjson_result document::get_value() noexcept { // Make sure we start any arrays or objects before returning, so that start_root_() // gets called. iter.assert_at_document_depth(); @@ -29401,139 +29771,145 @@ namespace simdjson { // return value(get_root_value_iterator()); } } - simdjson_really_inline simdjson_result document::get_array() & noexcept { + simdjson_inline simdjson_result document::get_array() & noexcept { auto value = get_root_value_iterator(); return array::start_root(value); } - simdjson_really_inline simdjson_result document::get_object() & noexcept { + simdjson_inline simdjson_result document::get_object() & noexcept { auto value = get_root_value_iterator(); return object::start_root(value); } - simdjson_really_inline simdjson_result document::get_uint64() noexcept { - return get_root_value_iterator().get_root_uint64(); + + /** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + + simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); } - simdjson_really_inline simdjson_result document::get_uint64_in_string() noexcept { - return get_root_value_iterator().get_root_uint64_in_string(); + simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); } - simdjson_really_inline simdjson_result document::get_int64() noexcept { - return get_root_value_iterator().get_root_int64(); + simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); } - simdjson_really_inline simdjson_result document::get_int64_in_string() noexcept { - return get_root_value_iterator().get_root_int64_in_string(); + simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); } - simdjson_really_inline simdjson_result document::get_double() noexcept { - return get_root_value_iterator().get_root_double(); + simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); } - simdjson_really_inline simdjson_result document::get_double_in_string() noexcept { - return get_root_value_iterator().get_root_double_in_string(); + simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); } - simdjson_really_inline simdjson_result document::get_string() noexcept { - return get_root_value_iterator().get_root_string(); + simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); } - simdjson_really_inline simdjson_result document::get_raw_json_string() noexcept { - return get_root_value_iterator().get_root_raw_json_string(); + simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); } - simdjson_really_inline simdjson_result document::get_bool() noexcept { - return get_root_value_iterator().get_root_bool(); + simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); } - simdjson_really_inline bool document::is_null() noexcept { - return get_root_value_iterator().is_root_null(); + simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); + } + simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); } - template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_array(); } - template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_object(); } - template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } - template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_string(); } - template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_double(); } - template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_uint64(); } - template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_int64(); } - template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_bool(); } - template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_value(); } + template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } + template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } + template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } + template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } + template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } + template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } + template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } + template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } + template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } - template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } - template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_string(); } - template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } - template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } - template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } - template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } - template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_value(); } + template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } + template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } + template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } + template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } + template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } + template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } + template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } - template simdjson_really_inline error_code document::get(T& out) & noexcept { + template simdjson_inline error_code document::get(T& out) & noexcept { return get().get(out); } - template simdjson_really_inline error_code document::get(T& out) && noexcept { + template simdjson_inline error_code document::get(T& out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS - simdjson_really_inline document::operator array() & noexcept(false) { return get_array(); } - simdjson_really_inline document::operator object() & noexcept(false) { return get_object(); } - simdjson_really_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } - simdjson_really_inline document::operator int64_t() noexcept(false) { return get_int64(); } - simdjson_really_inline document::operator double() noexcept(false) { return get_double(); } - simdjson_really_inline document::operator std::string_view() noexcept(false) { return get_string(); } - simdjson_really_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } - simdjson_really_inline document::operator bool() noexcept(false) { return get_bool(); } - simdjson_really_inline document::operator value() noexcept(false) { return get_value(); } + simdjson_inline document::operator array() & noexcept(false) { return get_array(); } + simdjson_inline document::operator object() & noexcept(false) { return get_object(); } + simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } + simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } + simdjson_inline document::operator double() noexcept(false) { return get_double(); } + simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } + simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } + simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } + simdjson_inline document::operator value() noexcept(false) { return get_value(); } #endif - simdjson_really_inline simdjson_result document::count_elements() & noexcept { + simdjson_inline simdjson_result document::count_elements() & noexcept { auto a = get_array(); simdjson_result answer = a.count_elements(); /* If there was an array, we are now left pointing at its first element. */ - if (answer.error() == SUCCESS) { - iter._depth = 1; /* undoing the increment so we go back at the doc depth.*/ - iter.assert_at_document_depth(); - } + if (answer.error() == SUCCESS) { rewind(); } return answer; } - simdjson_really_inline simdjson_result document::count_fields() & noexcept { + simdjson_inline simdjson_result document::count_fields() & noexcept { auto a = get_object(); simdjson_result answer = a.count_fields(); - /* If there was an array, we are now left pointing at its first element. */ - if (answer.error() == SUCCESS) { - iter._depth = 1; /* undoing the increment so we go back at the doc depth.*/ - iter.assert_at_document_depth(); - } + /* If there was an object, we are now left pointing at its first element. */ + if (answer.error() == SUCCESS) { rewind(); } return answer; } - simdjson_really_inline simdjson_result document::at(size_t index) & noexcept { + simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } - simdjson_really_inline simdjson_result document::begin() & noexcept { + simdjson_inline simdjson_result document::begin() & noexcept { return get_array().begin(); } - simdjson_really_inline simdjson_result document::end() & noexcept { + simdjson_inline simdjson_result document::end() & noexcept { return {}; } - simdjson_really_inline simdjson_result document::find_field(std::string_view key) & noexcept { + simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { return start_or_resume_object().find_field(key); } - simdjson_really_inline simdjson_result document::find_field(const char* key) & noexcept { + simdjson_inline simdjson_result document::find_field(const char* key) & noexcept { return start_or_resume_object().find_field(key); } - simdjson_really_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { return start_or_resume_object().find_field_unordered(key); } - simdjson_really_inline simdjson_result document::find_field_unordered(const char* key) & noexcept { + simdjson_inline simdjson_result document::find_field_unordered(const char* key) & noexcept { return start_or_resume_object().find_field_unordered(key); } - simdjson_really_inline simdjson_result document::operator[](std::string_view key) & noexcept { + simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { return start_or_resume_object()[key]; } - simdjson_really_inline simdjson_result document::operator[](const char* key) & noexcept { + simdjson_inline simdjson_result document::operator[](const char* key) & noexcept { return start_or_resume_object()[key]; } - simdjson_really_inline error_code document::consume() noexcept { + simdjson_inline error_code document::consume() noexcept { auto error = iter.skip_child(0); if (error) { iter.abandon(); } return error; } - simdjson_really_inline simdjson_result document::raw_json() noexcept { + simdjson_inline simdjson_result document::raw_json() noexcept { auto _iter = get_root_value_iterator(); const uint8_t* starting_point{ _iter.peek_start() }; auto error = consume(); @@ -29545,40 +29921,40 @@ namespace simdjson { return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } - simdjson_really_inline simdjson_result document::type() noexcept { + simdjson_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } - simdjson_really_inline simdjson_result document::is_scalar() noexcept { + simdjson_inline simdjson_result document::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if (error) { return error; } return !((this_type == json_type::array) || (this_type == json_type::object)); } - simdjson_really_inline bool document::is_negative() noexcept { + simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } - simdjson_really_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(); + simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); } - simdjson_really_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(); + simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); } - simdjson_really_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(); + simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); } - simdjson_really_inline simdjson_result document::raw_json_token() noexcept { + simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); } - simdjson_really_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { rewind(); // Rewind the document each time at_pointer is called if (json_pointer.empty()) { return this->get_value(); @@ -29602,232 +29978,248 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document&& value ) noexcept : implementation_simdjson_result_base( std::forward(value) - ) + ) { } - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base( error - ) + ) { } - simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } - simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } - simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } - simdjson_really_inline error_code simdjson_result::rewind() noexcept { + simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } - simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { + simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } - simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { + simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } - simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } - simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char* key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char* key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } - simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } - simdjson_really_inline simdjson_result simdjson_result::operator[](const char* key) & noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](const char* key) & noexcept { if (error()) { return error(); } return first[key]; } - simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } - simdjson_really_inline simdjson_result simdjson_result::find_field(const char* key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field(const char* key) & noexcept { if (error()) { return error(); } return first.find_field(key); } - simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { + simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } - simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { + simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } - simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } - simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); + } + simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } - simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); + } + simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } - simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } - return first.get_string(); + return first.get_double_in_string(); } - simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); + } + simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); + } + simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } - simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } - simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { + simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } - simdjson_really_inline bool simdjson_result::is_null() noexcept { + simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template - simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { + simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template - simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { + simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } template - simdjson_really_inline error_code simdjson_result::get(T& out) & noexcept { + simdjson_inline error_code simdjson_result::get(T& out) & noexcept { if (error()) { return error(); } return first.get(out); } template - simdjson_really_inline error_code simdjson_result::get(T& out) && noexcept { + simdjson_inline error_code simdjson_result::get(T& out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } - template<> simdjson_really_inline simdjson_result simdjson_result::get() & noexcept = delete; - template<> simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { + template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; + template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } - template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& out) & noexcept = delete; - template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& out) && noexcept { + template<> simdjson_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& out) & noexcept = delete; + template<> simdjson_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& out) && noexcept { if (error()) { return error(); } out = std::forward(first); return SUCCESS; } - simdjson_really_inline simdjson_result simdjson_result::type() noexcept { + simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } - simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { + simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } - simdjson_really_inline bool simdjson_result::is_negative() noexcept { + simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } - simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { + simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } - simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { + simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } - simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { + simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator double() noexcept(false) { + simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif - simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { + simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } - simdjson_really_inline int32_t simdjson_result::current_depth() const noexcept { + simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } - simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } - simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -29840,54 +30232,72 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline document_reference::document_reference() noexcept : doc{ nullptr } {} - simdjson_really_inline document_reference::document_reference(document& d) noexcept : doc(&d) {} - simdjson_really_inline void document_reference::rewind() noexcept { doc->rewind(); } - simdjson_really_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } - simdjson_really_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } - simdjson_really_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_uint64(); } - simdjson_really_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_int64(); } - simdjson_really_inline simdjson_result document_reference::get_double() noexcept { return doc->get_double(); } - simdjson_really_inline simdjson_result document_reference::get_string() noexcept { return doc->get_string(); } - simdjson_really_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_raw_json_string(); } - simdjson_really_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_bool(); } - simdjson_really_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } - simdjson_really_inline bool document_reference::is_null() noexcept { return doc->is_null(); } + simdjson_inline document_reference::document_reference() noexcept : doc{ nullptr } {} + simdjson_inline document_reference::document_reference(document& d) noexcept : doc(&d) {} + simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } + simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } + simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } + /** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ + simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } + simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } + simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } + simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } + simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } + simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } + simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } + simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } + simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } + simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } + simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } + simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } #if SIMDJSON_EXCEPTIONS - simdjson_really_inline document_reference::operator array() & noexcept(false) { return array(*doc); } - simdjson_really_inline document_reference::operator object() & noexcept(false) { return object(*doc); } - simdjson_really_inline document_reference::operator uint64_t() noexcept(false) { return uint64_t(*doc); } - simdjson_really_inline document_reference::operator int64_t() noexcept(false) { return int64_t(*doc); } - simdjson_really_inline document_reference::operator double() noexcept(false) { return double(*doc); } - simdjson_really_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } - simdjson_really_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } - simdjson_really_inline document_reference::operator bool() noexcept(false) { return bool(*doc); } - simdjson_really_inline document_reference::operator value() noexcept(false) { return value(*doc); } + simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } + simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } + simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } + simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } + simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } + simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } + simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } + simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } + simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif - simdjson_really_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } - simdjson_really_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } - simdjson_really_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } - simdjson_really_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } - simdjson_really_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } - simdjson_really_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } - simdjson_really_inline simdjson_result document_reference::find_field(const char* key) & noexcept { return doc->find_field(key); } - simdjson_really_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } - simdjson_really_inline simdjson_result document_reference::operator[](const char* key) & noexcept { return (*doc)[key]; } - simdjson_really_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } - simdjson_really_inline simdjson_result document_reference::find_field_unordered(const char* key) & noexcept { return doc->find_field_unordered(key); } - simdjson_really_inline simdjson_result document_reference::type() noexcept { return doc->type(); } - simdjson_really_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } - simdjson_really_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } - simdjson_really_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } - simdjson_really_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } - simdjson_really_inline simdjson_result document_reference::is_integer() noexcept { return doc->is_integer(); } - simdjson_really_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_number_type(); } - simdjson_really_inline simdjson_result document_reference::get_number() noexcept { return doc->get_number(); } - simdjson_really_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } - simdjson_really_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } - simdjson_really_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json(); } - simdjson_really_inline document_reference::operator document& () const noexcept { return *doc; } + simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } + simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } + simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } + simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } + simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } + simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } + simdjson_inline simdjson_result document_reference::find_field(const char* key) & noexcept { return doc->find_field(key); } + simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } + simdjson_inline simdjson_result document_reference::operator[](const char* key) & noexcept { return (*doc)[key]; } + simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } + simdjson_inline simdjson_result document_reference::find_field_unordered(const char* key) & noexcept { return doc->find_field_unordered(key); } + simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } + simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } + simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } + simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } + simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } + simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } + simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } + simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } + simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } + simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } + simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json(); } + simdjson_inline document_reference::operator document& () const noexcept { return *doc; } } // namespace ondemand } // namespace SIMDJSON_BUILTIN_IMPLEMENTATION @@ -29896,172 +30306,188 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) + simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept : implementation_simdjson_result_base(std::forward(value), error) {} - simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } - simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } - simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } - simdjson_really_inline error_code simdjson_result::rewind() noexcept { + simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } - simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { + simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } - simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { + simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } - simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } - simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char* key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char* key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } - simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } - simdjson_really_inline simdjson_result simdjson_result::operator[](const char* key) & noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](const char* key) & noexcept { if (error()) { return error(); } return first[key]; } - simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } - simdjson_really_inline simdjson_result simdjson_result::find_field(const char* key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field(const char* key) & noexcept { if (error()) { return error(); } return first.find_field(key); } - simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { + simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } - simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { + simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } - simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } - simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); + } + simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } - simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); + } + simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } - simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } - return first.get_string(); + return first.get_double_in_string(); } - simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); + } + simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); + } + simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } - simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } - simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { + simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } - simdjson_really_inline bool simdjson_result::is_null() noexcept { + simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } - simdjson_really_inline simdjson_result simdjson_result::type() noexcept { + simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } - simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { + simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } - simdjson_really_inline bool simdjson_result::is_negative() noexcept { + simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } - simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { + simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } - simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { + simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } - simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { + simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator double() noexcept(false) { + simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif - simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { + simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } - simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } - simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -30074,24 +30500,24 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline value::value(const value_iterator& _iter) noexcept + simdjson_inline value::value(const value_iterator& _iter) noexcept : iter{ _iter } { } - simdjson_really_inline value value::start(const value_iterator& iter) noexcept { + simdjson_inline value value::start(const value_iterator& iter) noexcept { return iter; } - simdjson_really_inline value value::resume(const value_iterator& iter) noexcept { + simdjson_inline value value::resume(const value_iterator& iter) noexcept { return iter; } - simdjson_really_inline simdjson_result value::get_array() noexcept { + simdjson_inline simdjson_result value::get_array() noexcept { return array::start(iter); } - simdjson_really_inline simdjson_result value::get_object() noexcept { + simdjson_inline simdjson_result value::get_object() noexcept { return object::start(iter); } - simdjson_really_inline simdjson_result value::start_or_resume_object() noexcept { + simdjson_inline simdjson_result value::start_or_resume_object() noexcept { if (iter.at_start()) { return get_object(); } @@ -30100,85 +30526,87 @@ namespace simdjson { } } - simdjson_really_inline simdjson_result value::get_raw_json_string() noexcept { + simdjson_inline simdjson_result value::get_raw_json_string() noexcept { return iter.get_raw_json_string(); } - simdjson_really_inline simdjson_result value::get_string() noexcept { - return iter.get_string(); + simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); } - simdjson_really_inline simdjson_result value::get_double() noexcept { + simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); + } + simdjson_inline simdjson_result value::get_double() noexcept { return iter.get_double(); } - simdjson_really_inline simdjson_result value::get_double_in_string() noexcept { + simdjson_inline simdjson_result value::get_double_in_string() noexcept { return iter.get_double_in_string(); } - simdjson_really_inline simdjson_result value::get_uint64() noexcept { + simdjson_inline simdjson_result value::get_uint64() noexcept { return iter.get_uint64(); } - simdjson_really_inline simdjson_result value::get_uint64_in_string() noexcept { + simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { return iter.get_uint64_in_string(); } - simdjson_really_inline simdjson_result value::get_int64() noexcept { + simdjson_inline simdjson_result value::get_int64() noexcept { return iter.get_int64(); } - simdjson_really_inline simdjson_result value::get_int64_in_string() noexcept { + simdjson_inline simdjson_result value::get_int64_in_string() noexcept { return iter.get_int64_in_string(); } - simdjson_really_inline simdjson_result value::get_bool() noexcept { + simdjson_inline simdjson_result value::get_bool() noexcept { return iter.get_bool(); } - simdjson_really_inline bool value::is_null() noexcept { + simdjson_inline simdjson_result value::is_null() noexcept { return iter.is_null(); } + template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } + template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } + template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } + template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } + template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } + template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } + template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } + template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } + template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } - template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_array(); } - template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_object(); } - template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } - template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_string(); } - template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_number(); } - template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_double(); } - template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_uint64(); } - template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_int64(); } - template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_bool(); } - - template simdjson_really_inline error_code value::get(T& out) noexcept { + template simdjson_inline error_code value::get(T& out) noexcept { return get().get(out); } #if SIMDJSON_EXCEPTIONS - simdjson_really_inline value::operator array() noexcept(false) { + simdjson_inline value::operator array() noexcept(false) { return get_array(); } - simdjson_really_inline value::operator object() noexcept(false) { + simdjson_inline value::operator object() noexcept(false) { return get_object(); } - simdjson_really_inline value::operator uint64_t() noexcept(false) { + simdjson_inline value::operator uint64_t() noexcept(false) { return get_uint64(); } - simdjson_really_inline value::operator int64_t() noexcept(false) { + simdjson_inline value::operator int64_t() noexcept(false) { return get_int64(); } - simdjson_really_inline value::operator double() noexcept(false) { + simdjson_inline value::operator double() noexcept(false) { return get_double(); } - simdjson_really_inline value::operator std::string_view() noexcept(false) { - return get_string(); + simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); } - simdjson_really_inline value::operator raw_json_string() noexcept(false) { + simdjson_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } - simdjson_really_inline value::operator bool() noexcept(false) { + simdjson_inline value::operator bool() noexcept(false) { return get_bool(); } #endif - simdjson_really_inline simdjson_result value::begin() & noexcept { + simdjson_inline simdjson_result value::begin() & noexcept { return get_array().begin(); } - simdjson_really_inline simdjson_result value::end() & noexcept { + simdjson_inline simdjson_result value::end() & noexcept { return {}; } - simdjson_really_inline simdjson_result value::count_elements() & noexcept { + simdjson_inline simdjson_result value::count_elements() & noexcept { simdjson_result answer; auto a = get_array(); answer = a.count_elements(); @@ -30188,77 +30616,77 @@ namespace simdjson { iter.move_at_start(); return answer; } - simdjson_really_inline simdjson_result value::count_fields() & noexcept { + simdjson_inline simdjson_result value::count_fields() & noexcept { simdjson_result answer; auto a = get_object(); answer = a.count_fields(); iter.move_at_start(); return answer; } - simdjson_really_inline simdjson_result value::at(size_t index) noexcept { + simdjson_inline simdjson_result value::at(size_t index) noexcept { auto a = get_array(); return a.at(index); } - simdjson_really_inline simdjson_result value::find_field(std::string_view key) noexcept { + simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { return start_or_resume_object().find_field(key); } - simdjson_really_inline simdjson_result value::find_field(const char* key) noexcept { + simdjson_inline simdjson_result value::find_field(const char* key) noexcept { return start_or_resume_object().find_field(key); } - simdjson_really_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { return start_or_resume_object().find_field_unordered(key); } - simdjson_really_inline simdjson_result value::find_field_unordered(const char* key) noexcept { + simdjson_inline simdjson_result value::find_field_unordered(const char* key) noexcept { return start_or_resume_object().find_field_unordered(key); } - simdjson_really_inline simdjson_result value::operator[](std::string_view key) noexcept { + simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { return start_or_resume_object()[key]; } - simdjson_really_inline simdjson_result value::operator[](const char* key) noexcept { + simdjson_inline simdjson_result value::operator[](const char* key) noexcept { return start_or_resume_object()[key]; } - simdjson_really_inline simdjson_result value::type() noexcept { + simdjson_inline simdjson_result value::type() noexcept { return iter.type(); } - simdjson_really_inline simdjson_result value::is_scalar() noexcept { + simdjson_inline simdjson_result value::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if (error) { return error; } return !((this_type == json_type::array) || (this_type == json_type::object)); } - simdjson_really_inline bool value::is_negative() noexcept { + simdjson_inline bool value::is_negative() noexcept { return iter.is_negative(); } - simdjson_really_inline simdjson_result value::is_integer() noexcept { + simdjson_inline simdjson_result value::is_integer() noexcept { return iter.is_integer(); } - simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number_type() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { return iter.get_number_type(); } - simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number() noexcept { + simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { return iter.get_number(); } - simdjson_really_inline std::string_view value::raw_json_token() noexcept { + simdjson_inline std::string_view value::raw_json_token() noexcept { return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); } - simdjson_really_inline simdjson_result value::current_location() noexcept { + simdjson_inline simdjson_result value::current_location() noexcept { return iter.json_iter().current_location(); } - simdjson_really_inline int32_t value::current_depth() const noexcept { + simdjson_inline int32_t value::current_depth() const noexcept { return iter.json_iter().depth(); } - simdjson_really_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) @@ -30278,211 +30706,215 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value&& value ) noexcept : implementation_simdjson_result_base( std::forward(value) - ) + ) { } - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } - simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } - simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } - simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { + simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } - simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { + simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } - simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { + simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } return {}; } - simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field(key); } - simdjson_really_inline simdjson_result simdjson_result::find_field(const char* key) noexcept { + simdjson_inline simdjson_result simdjson_result::find_field(const char* key) noexcept { if (error()) { return error(); } return first.find_field(key); } - simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } - simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char* key) noexcept { + simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char* key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } - simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } return first[key]; } - simdjson_really_inline simdjson_result simdjson_result::operator[](const char* key) noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](const char* key) noexcept { if (error()) { return error(); } return first[key]; } - simdjson_really_inline simdjson_result simdjson_result::get_array() noexcept { + simdjson_inline simdjson_result simdjson_result::get_array() noexcept { if (error()) { return error(); } return first.get_array(); } - simdjson_really_inline simdjson_result simdjson_result::get_object() noexcept { + simdjson_inline simdjson_result simdjson_result::get_object() noexcept { if (error()) { return error(); } return first.get_object(); } - simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } - simdjson_really_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } - simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } - simdjson_really_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } - simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } - simdjson_really_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } - simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } - return first.get_string(); + return first.get_string(allow_replacement); } - simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); + } + simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } - simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } - simdjson_really_inline bool simdjson_result::is_null() noexcept { - if (error()) { return false; } + simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } return first.is_null(); } - template simdjson_really_inline simdjson_result simdjson_result::get() noexcept { + template simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return first.get(); } - template simdjson_really_inline error_code simdjson_result::get(T& out) noexcept { + template simdjson_inline error_code simdjson_result::get(T& out) noexcept { if (error()) { return error(); } return first.get(out); } - template<> simdjson_really_inline simdjson_result simdjson_result::get() noexcept { + template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return std::move(first); } - template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& out) noexcept { + template<> simdjson_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& out) noexcept { if (error()) { return error(); } out = first; return SUCCESS; } - simdjson_really_inline simdjson_result simdjson_result::type() noexcept { + simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } - simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { + simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } - simdjson_really_inline simdjson_result simdjson_result::is_negative() noexcept { + simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } - simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { + simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } - simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { + simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } - simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { + simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator double() noexcept(false) { + simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } - simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif - simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } - simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { + simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } - simdjson_really_inline int32_t simdjson_result::current_depth() const noexcept { + simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } - simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -30495,41 +30927,41 @@ namespace simdjson { namespace ondemand { // clang 6 doesn't think the default constructor can be noexcept, so we make it explicit - simdjson_really_inline field::field() noexcept : std::pair() {} + simdjson_inline field::field() noexcept : std::pair() {} - simdjson_really_inline field::field(raw_json_string key, ondemand::value&& value) noexcept + simdjson_inline field::field(raw_json_string key, ondemand::value&& value) noexcept : std::pair(key, std::forward(value)) { } - simdjson_really_inline simdjson_result field::start(value_iterator& parent_iter) noexcept { + simdjson_inline simdjson_result field::start(value_iterator& parent_iter) noexcept { raw_json_string key; SIMDJSON_TRY(parent_iter.field_key().get(key)); SIMDJSON_TRY(parent_iter.field_value()); return field::start(parent_iter, key); } - simdjson_really_inline simdjson_result field::start(const value_iterator& parent_iter, raw_json_string key) noexcept { + simdjson_inline simdjson_result field::start(const value_iterator& parent_iter, raw_json_string key) noexcept { return field(key, parent_iter.child()); } - simdjson_really_inline simdjson_warn_unused simdjson_result field::unescaped_key() noexcept { + simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter()); + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); first.consume(); return answer; } - simdjson_really_inline raw_json_string field::key() const noexcept { + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } - simdjson_really_inline value& field::value() & noexcept { + simdjson_inline value& field::value() & noexcept { return second; } - simdjson_really_inline value field::value() && noexcept { + simdjson_inline value field::value() && noexcept { return std::forward(*this).second; } @@ -30539,30 +30971,30 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field&& value ) noexcept : implementation_simdjson_result_base( std::forward(value) - ) + ) { } - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } - simdjson_really_inline simdjson_result simdjson_result::key() noexcept { + simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } - simdjson_really_inline simdjson_result simdjson_result::unescaped_key() noexcept { + simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } - return first.unescaped_key(); + return first.unescaped_key(allow_replacement); } - simdjson_really_inline simdjson_result simdjson_result::value() noexcept { + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } @@ -30574,46 +31006,46 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY(iter.find_field_unordered_raw(key).get(has_value)); if (!has_value) { return NO_SUCH_FIELD; } return value(iter.child()); } - simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY(iter.find_field_unordered_raw(key).get(has_value)); if (!has_value) { return NO_SUCH_FIELD; } return value(iter.child()); } - simdjson_really_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { return find_field_unordered(key); } - simdjson_really_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { return std::forward(*this).find_field_unordered(key); } - simdjson_really_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY(iter.find_field_raw(key).get(has_value)); if (!has_value) { return NO_SUCH_FIELD; } return value(iter.child()); } - simdjson_really_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY(iter.find_field_raw(key).get(has_value)); if (!has_value) { return NO_SUCH_FIELD; } return value(iter.child()); } - simdjson_really_inline simdjson_result object::start(value_iterator& iter) noexcept { + simdjson_inline simdjson_result object::start(value_iterator& iter) noexcept { SIMDJSON_TRY(iter.start_object().error()); return object(iter); } - simdjson_really_inline simdjson_result object::start_root(value_iterator& iter) noexcept { + simdjson_inline simdjson_result object::start_root(value_iterator& iter) noexcept { SIMDJSON_TRY(iter.start_root_object().error()); return object(iter); } - simdjson_really_inline error_code object::consume() noexcept { + simdjson_inline error_code object::consume() noexcept { if (iter.is_at_key()) { /** * whenever you are pointing at a key, calling skip_child() is @@ -30639,7 +31071,7 @@ namespace simdjson { return error_skip; } - simdjson_really_inline simdjson_result object::raw_json() noexcept { + simdjson_inline simdjson_result object::raw_json() noexcept { const uint8_t* starting_point{ iter.peek_start() }; auto error = consume(); if (error) { return error; } @@ -30647,27 +31079,27 @@ namespace simdjson { return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } - simdjson_really_inline simdjson_result object::started(value_iterator& iter) noexcept { + simdjson_inline simdjson_result object::started(value_iterator& iter) noexcept { SIMDJSON_TRY(iter.started_object().error()); return object(iter); } - simdjson_really_inline object object::resume(const value_iterator& iter) noexcept { + simdjson_inline object object::resume(const value_iterator& iter) noexcept { return iter; } - simdjson_really_inline object::object(const value_iterator& _iter) noexcept + simdjson_inline object::object(const value_iterator& _iter) noexcept : iter{ _iter } { } - simdjson_really_inline simdjson_result object::begin() noexcept { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return object_iterator(iter); } - simdjson_really_inline simdjson_result object::end() noexcept { + simdjson_inline simdjson_result object::end() noexcept { return object_iterator(iter); } @@ -30712,7 +31144,7 @@ namespace simdjson { return child; } - simdjson_really_inline simdjson_result object::count_fields() & noexcept { + simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{ 0 }; // Important: we do not consume any of the values. for (simdjson_unused auto v : *this) { count++; } @@ -30724,14 +31156,14 @@ namespace simdjson { return count; } - simdjson_really_inline simdjson_result object::is_empty() & noexcept { + simdjson_inline simdjson_result object::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_object().get(is_not_empty); if (error) { return error; } return !is_not_empty; } - simdjson_really_inline simdjson_result object::reset() & noexcept { + simdjson_inline simdjson_result object::reset() & noexcept { return iter.reset_object(); } @@ -30741,45 +31173,45 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object&& value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} - simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { + simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } - simdjson_really_inline simdjson_result simdjson_result::end() noexcept { + simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } - simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } - simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field_unordered(key); } - simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } - simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first)[key]; } - simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } - simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field(key); } - simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -30794,7 +31226,7 @@ namespace simdjson { return first.is_empty(); } - simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } @@ -30806,11 +31238,11 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - simdjson_really_inline parser::parser(size_t max_capacity) noexcept + simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{ max_capacity } { } - simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { if (new_capacity > max_capacity()) { return CAPACITY; } if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } @@ -30818,7 +31250,7 @@ namespace simdjson { _capacity = 0; size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS start_positions.reset(new (std::nothrow) token_position[new_max_depth]); #endif if (implementation) { @@ -30833,7 +31265,7 @@ namespace simdjson { return SUCCESS; } - simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } // Allocate if needed @@ -30846,37 +31278,37 @@ namespace simdjson { return document::start({ reinterpret_cast(json.data()), this }); } - simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const char* json, size_t len, size_t allocated) & noexcept { + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char* json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } - simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const uint8_t* json, size_t len, size_t allocated) & noexcept { + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t* json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } - simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { return iterate(padded_string_view(json, allocated)); } - simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const std::string& json) & noexcept { + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string& json) & noexcept { return iterate(padded_string_view(json)); } - simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result& result) & noexcept { + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result& result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY(result.error()); padded_string_view json = result.value_unsafe(); return iterate(json); } - simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result& result) & noexcept { + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result& result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY(result.error()); const padded_string& json = result.value_unsafe(); return iterate(json); } - simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } // Allocate if needed @@ -30903,28 +31335,35 @@ namespace simdjson { return iterate_many(s.data(), s.length(), batch_size); } - simdjson_really_inline size_t parser::capacity() const noexcept { + simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } - simdjson_really_inline size_t parser::max_capacity() const noexcept { + simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } - simdjson_really_inline size_t parser::max_depth() const noexcept { + simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } - simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - size_t MINIMAL_DOCUMENT_CAPACITY = 32; - if (max_capacity < MINIMAL_DOCUMENT_CAPACITY) { + simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if (max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { - _max_capacity = MINIMAL_DOCUMENT_CAPACITY; + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } } - simdjson_really_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t*& dst) const noexcept { - uint8_t* end = implementation->parse_string(in.buf, dst); + simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t*& dst, bool allow_replacement) const noexcept { + uint8_t* end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end - dst); + dst = end; + return result; + } + + simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t*& dst) const noexcept { + uint8_t* end = implementation->parse_wobbly_string(in.buf, dst); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end - dst); dst = end; @@ -30937,9 +31376,9 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser&& value) noexcept + simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser&& value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} - simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -31027,7 +31466,7 @@ namespace simdjson { #endif // SIMDJSON_THREADS_ENABLED - simdjson_really_inline document_stream::document_stream( + simdjson_inline document_stream::document_stream( ondemand::parser& _parser, const uint8_t* _buf, size_t _len, @@ -31049,7 +31488,7 @@ namespace simdjson { #endif } - simdjson_really_inline document_stream::document_stream() noexcept + simdjson_inline document_stream::document_stream() noexcept : parser{ nullptr }, buf{ nullptr }, len{ 0 }, @@ -31061,7 +31500,7 @@ namespace simdjson { { } - simdjson_really_inline document_stream::~document_stream() noexcept + simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); @@ -31077,20 +31516,20 @@ namespace simdjson { return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } - simdjson_really_inline document_stream::iterator::iterator() noexcept + simdjson_inline document_stream::iterator::iterator() noexcept : stream{ nullptr }, finished{ true } { } - simdjson_really_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{ _stream }, finished{ is_end } { } - simdjson_really_inline simdjson_result document_stream::iterator::operator*() noexcept { + simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } - simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond @@ -31117,17 +31556,17 @@ namespace simdjson { return *this; } - simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator& other) const noexcept { + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator& other) const noexcept { return finished != other.finished; } - simdjson_really_inline document_stream::iterator document_stream::begin() noexcept { + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } - simdjson_really_inline document_stream::iterator document_stream::end() noexcept { + simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } @@ -31259,11 +31698,11 @@ namespace simdjson { } } - simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept { + simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } - simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { + simdjson_inline std::string_view document_stream::iterator::source() const noexcept { auto depth = stream->doc.iter.depth(); auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); @@ -31276,8 +31715,8 @@ namespace simdjson { depth--; break; default: // Scalar value document - // TODO: Remove any trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); } cur_struct_index++; @@ -31338,18 +31777,18 @@ namespace simdjson { namespace simdjson { - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } - simdjson_really_inline simdjson_result::simdjson_result( + simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream&& value ) noexcept : implementation_simdjson_result_base( std::forward(value) - ) + ) { }