From 7cff2283f24a3fcdb1b87db0e457548675326f14 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Thu, 23 Jan 2025 20:37:13 +0000 Subject: [PATCH 1/6] remove `SmallVec` and `LazyIndexMap` from json value --- crates/jiter/src/value.rs | 118 +++++++++++----------- crates/jiter/tests/main.rs | 196 ++++++++++++++++++------------------- 2 files changed, 155 insertions(+), 159 deletions(-) diff --git a/crates/jiter/src/value.rs b/crates/jiter/src/value.rs index 6e835496..b62fbe58 100644 --- a/crates/jiter/src/value.rs +++ b/crates/jiter/src/value.rs @@ -1,12 +1,11 @@ use std::borrow::Cow; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; #[cfg(feature = "num-bigint")] use num_bigint::BigInt; use smallvec::SmallVec; use crate::errors::{json_error, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT}; -use crate::lazy_index_map::LazyIndexMap; use crate::number_decoder::{NumberAny, NumberInt, NumberRange}; use crate::parse::{Parser, Peek}; use crate::string_decoder::{StringDecoder, StringDecoderRange, StringOutput, Tape}; @@ -26,8 +25,13 @@ pub enum JsonValue<'s> { Object(JsonObject<'s>), } -pub type JsonArray<'s> = Arc; 8]>>; -pub type JsonObject<'s> = Arc, JsonValue<'s>>>; +/// Parsed JSON array. +pub type JsonArray<'s> = Arc>>; +/// Parsed JSON object. Note that `jiter` does not attempt to deduplicate keys, +/// so it is possible that the key occurs multiple times in the object. +/// +/// It is up to the user to handle this case and decide how to proceed. +pub type JsonObject<'s> = Arc, JsonValue<'s>)>>; #[cfg(feature = "python")] #[allow(deprecated)] // keeping around for sake of allowing downstream to migrate @@ -149,6 +153,16 @@ impl<'j> JsonValue<'j> { pub fn to_static(&self) -> JsonValue<'static> { value_static(self.clone()) } + + fn empty_array() -> JsonValue<'static> { + static EMPTY_ARRAY: OnceLock> = OnceLock::new(); + JsonValue::Array(EMPTY_ARRAY.get_or_init(|| Arc::new(Vec::new())).clone()) + } + + fn empty_object() -> JsonValue<'static> { + static EMPTY_OBJECT: OnceLock> = OnceLock::new(); + JsonValue::Object(EMPTY_OBJECT.get_or_init(|| Arc::new(Vec::new())).clone()) + } } fn value_static(v: JsonValue<'_>) -> JsonValue<'static> { @@ -160,8 +174,12 @@ fn value_static(v: JsonValue<'_>) -> JsonValue<'static> { JsonValue::BigInt(b) => JsonValue::BigInt(b), JsonValue::Float(f) => JsonValue::Float(f), JsonValue::Str(s) => JsonValue::Str(s.into_owned().into()), - JsonValue::Array(v) => JsonValue::Array(Arc::new(v.iter().map(JsonValue::to_static).collect::>())), - JsonValue::Object(o) => JsonValue::Object(Arc::new(o.to_static())), + JsonValue::Array(v) => JsonValue::Array(Arc::new(v.iter().map(JsonValue::to_static).collect())), + JsonValue::Object(o) => JsonValue::Object(Arc::new( + o.iter() + .map(|(k, v)| (k.clone().into_owned().into(), v.to_static())) + .collect(), + )), } } @@ -252,15 +270,14 @@ fn take_value<'j, 's>( Ok(JsonValue::Str(create_cow(s))) } Peek::Array => { - let array = Arc::new(SmallVec::new()); let peek_first = match parser.array_first() { Ok(Some(peek)) => peek, Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), - Ok(None) | Err(_) => return Ok(JsonValue::Array(array)), + Ok(None) | Err(_) => return Ok(JsonValue::empty_array()), }; take_value_recursive( peek_first, - RecursedValue::Array(array), + RecursedValue::new_array(), parser, tape, recursion_limit, @@ -271,20 +288,16 @@ fn take_value<'j, 's>( } Peek::Object => { // same for objects - let object = Arc::new(LazyIndexMap::new()); let first_key = match parser.object_first::(tape) { Ok(Some(first_key)) => first_key, Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), - _ => return Ok(JsonValue::Object(object)), + _ => return Ok(JsonValue::empty_object()), }; let first_key = create_cow(first_key); match parser.peek() { Ok(peek) => take_value_recursive( peek, - RecursedValue::Object { - partial: object, - next_key: first_key, - }, + RecursedValue::new_object(first_key), parser, tape, recursion_limit, @@ -293,7 +306,7 @@ fn take_value<'j, 's>( create_cow, ), Err(e) if !(partial_active && e.allowed_if_partial()) => Err(e), - _ => Ok(JsonValue::Object(object)), + _ => Ok(JsonValue::empty_object()), } } _ => { @@ -316,13 +329,26 @@ fn take_value<'j, 's>( } enum RecursedValue<'s> { - Array(JsonArray<'s>), + Array(Vec>), Object { - partial: JsonObject<'s>, + partial: Vec<(Cow<'s, str>, JsonValue<'s>)>, next_key: Cow<'s, str>, }, } +impl<'s> RecursedValue<'s> { + fn new_array() -> Self { + RecursedValue::Array(Vec::with_capacity(8)) + } + + fn new_object(next_key: Cow<'s, str>) -> Self { + RecursedValue::Object { + partial: Vec::with_capacity(8), + next_key, + } + } +} + #[inline(never)] // this is an iterative algo called only from take_value, no point in inlining #[allow(clippy::too_many_lines)] // FIXME? #[allow(clippy::too_many_arguments)] @@ -354,7 +380,6 @@ fn take_value_recursive<'j, 's>( 'recursion: loop { let mut value = match &mut current_recursion { RecursedValue::Array(array) => { - let array = Arc::get_mut(array).expect("sole writer"); loop { let result = match peek { Peek::True => parser.consume_true().map(|()| JsonValue::Bool(true)), @@ -364,30 +389,22 @@ fn take_value_recursive<'j, 's>( .consume_string::(tape, allow_partial.allow_trailing_str()) .map(|s| JsonValue::Str(create_cow(s))), Peek::Array => { - let array = Arc::new(SmallVec::new()); match parser.array_first() { Ok(Some(first_peek)) => { - push_recursion!(first_peek, RecursedValue::Array(array)); + push_recursion!(first_peek, RecursedValue::new_array()); // immediately jump to process the first value in the array continue 'recursion; } Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; - Ok(JsonValue::Array(array)) + Ok(JsonValue::empty_array()) } Peek::Object => { - let object = Arc::new(LazyIndexMap::new()); match parser.object_first::(tape) { Ok(Some(first_key)) => match parser.peek() { Ok(peek) => { - push_recursion!( - peek, - RecursedValue::Object { - partial: object, - next_key: create_cow(first_key) - } - ); + push_recursion!(peek, RecursedValue::new_object(create_cow(first_key))); continue 'recursion; } Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), @@ -396,7 +413,7 @@ fn take_value_recursive<'j, 's>( Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; - Ok(JsonValue::Object(object)) + Ok(JsonValue::empty_object()) } _ => parser .consume_number::(peek.into_inner(), allow_inf_nan) @@ -432,8 +449,7 @@ fn take_value_recursive<'j, 's>( let RecursedValue::Array(mut array) = current_recursion else { unreachable!("known to be in array recursion"); }; - - Arc::get_mut(&mut array).expect("sole writer to value").push(value); + array.push(value); array } Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), @@ -445,11 +461,10 @@ fn take_value_recursive<'j, 's>( } }; - break JsonValue::Array(array); + break JsonValue::Array(Arc::new(array)); } } RecursedValue::Object { partial, next_key } => { - let partial = Arc::get_mut(partial).expect("sole writer"); loop { let result = match peek { Peek::True => parser.consume_true().map(|()| JsonValue::Bool(true)), @@ -459,30 +474,22 @@ fn take_value_recursive<'j, 's>( .consume_string::(tape, allow_partial.allow_trailing_str()) .map(|s| JsonValue::Str(create_cow(s))), Peek::Array => { - let array = Arc::new(SmallVec::new()); match parser.array_first() { Ok(Some(first_peek)) => { - push_recursion!(first_peek, RecursedValue::Array(array)); + push_recursion!(first_peek, RecursedValue::new_array()); // immediately jump to process the first value in the array continue 'recursion; } Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; - Ok(JsonValue::Array(array)) + Ok(JsonValue::empty_array()) } Peek::Object => { - let object = Arc::new(LazyIndexMap::new()); match parser.object_first::(tape) { Ok(Some(first_key)) => match parser.peek() { Ok(peek) => { - push_recursion!( - peek, - RecursedValue::Object { - partial: object, - next_key: create_cow(first_key) - } - ); + push_recursion!(peek, RecursedValue::new_object(create_cow(first_key))); continue 'recursion; } Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), @@ -491,7 +498,7 @@ fn take_value_recursive<'j, 's>( Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; - Ok(JsonValue::Object(object)) + Ok(JsonValue::empty_object()) } _ => parser .consume_number::(peek.into_inner(), allow_inf_nan) @@ -518,10 +525,10 @@ fn take_value_recursive<'j, 's>( match parser.peek() { Ok(next_peek) => { // object continuing - partial.insert( + partial.push(( std::mem::replace(next_key, create_cow(yet_another_key)), value, - ); + )); peek = next_peek; continue; } @@ -536,8 +543,7 @@ fn take_value_recursive<'j, 's>( let RecursedValue::Object { mut partial, next_key } = current_recursion else { unreachable!("known to be in object recursion"); }; - - Arc::get_mut(&mut partial).expect("sole writer").insert(next_key, value); + partial.push((next_key, value)); partial } Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), @@ -549,7 +555,7 @@ fn take_value_recursive<'j, 's>( } }; - break JsonValue::Object(object); + break JsonValue::Object(Arc::new(object)); } } }; @@ -565,7 +571,7 @@ fn take_value_recursive<'j, 's>( value = match current_recursion { RecursedValue::Array(mut array) => { - Arc::get_mut(&mut array).expect("sole writer").push(value); + array.push(value); match parser.array_step() { Ok(Some(next_peek)) => { current_recursion = RecursedValue::Array(array); @@ -574,10 +580,10 @@ fn take_value_recursive<'j, 's>( Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), } - JsonValue::Array(array) + JsonValue::Array(Arc::new(array)) } RecursedValue::Object { mut partial, next_key } => { - Arc::get_mut(&mut partial).expect("sole writer").insert(next_key, value); + partial.push((next_key, value)); match parser.object_step::(tape) { Ok(Some(next_key)) => match parser.peek() { @@ -595,7 +601,7 @@ fn take_value_recursive<'j, 's>( _ => (), } - JsonValue::Object(partial) + JsonValue::Object(Arc::new(partial)) } } }; diff --git a/crates/jiter/tests/main.rs b/crates/jiter/tests/main.rs index a756cc29..30b31aad 100644 --- a/crates/jiter/tests/main.rs +++ b/crates/jiter/tests/main.rs @@ -502,11 +502,11 @@ fn nan_disallowed_wrong_type() { fn value_allow_nan_inf() { let json = r#"[1, NaN, Infinity, -Infinity]"#; let value = JsonValue::parse(json.as_bytes(), true).unwrap(); - let expected = JsonValue::Array(Arc::new(smallvec![ + let expected = JsonValue::Array(Arc::new([ JsonValue::Int(1), JsonValue::Float(f64::NAN), JsonValue::Float(f64::INFINITY), - JsonValue::Float(f64::NEG_INFINITY) + JsonValue::Float(f64::NEG_INFINITY), ])); // compare debug since `f64::NAN != f64::NAN` assert_eq!(format!("{:?}", value), format!("{:?}", expected)); @@ -664,17 +664,14 @@ fn json_value_object() { let json = r#"{"foo": "bar", "spam": [1, null, true]}"#; let v = JsonValue::parse(json.as_bytes(), false).unwrap(); - let mut expected = LazyIndexMap::new(); - expected.insert("foo".into(), JsonValue::Str("bar".into())); - expected.insert( - "spam".into(), - JsonValue::Array(Arc::new(smallvec![ - JsonValue::Int(1), - JsonValue::Null, - JsonValue::Bool(true) - ])), - ); - assert_eq!(v, JsonValue::Object(Arc::new(expected))); + let expected = JsonValue::Object(Arc::new([ + ("foo".into(), JsonValue::Str("bar".into())), + ( + "spam".into(), + JsonValue::Array(Arc::new([JsonValue::Int(1), JsonValue::Null, JsonValue::Bool(true)])), + ), + ])); + assert_eq!(v, expected); } #[test] @@ -682,10 +679,10 @@ fn json_value_string() { let json = r#"["foo", "\u00a3", "\""]"#; let v = JsonValue::parse(json.as_bytes(), false).unwrap(); - let expected = JsonValue::Array(Arc::new(smallvec![ + let expected = JsonValue::Array(Arc::new([ JsonValue::Str("foo".into()), JsonValue::Str("£".into()), - JsonValue::Str("\"".into()) + JsonValue::Str("\"".into()), ])); assert_eq!(v, expected); } @@ -696,11 +693,7 @@ fn parse_array_3() { let v = JsonValue::parse(json.as_bytes(), false).unwrap(); assert_eq!( v, - JsonValue::Array(Arc::new(smallvec![ - JsonValue::Int(1), - JsonValue::Null, - JsonValue::Bool(true) - ])) + JsonValue::Array(Arc::new([JsonValue::Int(1), JsonValue::Null, JsonValue::Bool(true)])) ); } @@ -708,7 +701,7 @@ fn parse_array_3() { fn parse_array_empty() { let json = r#"[ ]"#; let v = JsonValue::parse(json.as_bytes(), false).unwrap(); - assert_eq!(v, JsonValue::Array(Arc::new(smallvec![]))); + assert_eq!(v, JsonValue::Array(Arc::new([]))); } #[test] @@ -725,10 +718,10 @@ fn parse_value_nested() { let v = JsonValue::parse(json.as_bytes(), false).unwrap(); assert_eq!( v, - JsonValue::Array(Arc::new(smallvec![ + JsonValue::Array(Arc::new([ JsonValue::Int(1), JsonValue::Int(2), - JsonValue::Array(Arc::new(smallvec![JsonValue::Int(3), JsonValue::Int(4)])), + JsonValue::Array(Arc::new([JsonValue::Int(3), JsonValue::Int(4)])), JsonValue::Int(5), JsonValue::Int(6), ]),) @@ -924,35 +917,35 @@ fn test_crazy_massive_int() { jiter.finish().unwrap(); } -#[test] -fn unique_iter_object() { - let value = JsonValue::parse(br#" {"x": 1, "x": 2} "#, false).unwrap(); - if let JsonValue::Object(obj) = value { - assert_eq!(obj.len(), 1); - let mut unique = obj.iter_unique(); - let first = unique.next().unwrap(); - assert_eq!(first.0, "x"); - assert_eq!(first.1, &JsonValue::Int(2)); - assert!(unique.next().is_none()); - } else { - panic!("expected object"); - } -} - -#[test] -fn unique_iter_object_repeat() { - let value = JsonValue::parse(br#" {"x": 1, "x": 1} "#, false).unwrap(); - if let JsonValue::Object(obj) = value { - assert_eq!(obj.len(), 1); - let mut unique = obj.iter_unique(); - let first = unique.next().unwrap(); - assert_eq!(first.0, "x"); - assert_eq!(first.1, &JsonValue::Int(1)); - assert!(unique.next().is_none()); - } else { - panic!("expected object"); - } -} +// #[test] +// fn unique_iter_object() { +// let value = JsonValue::parse(br#" {"x": 1, "x": 2} "#, false).unwrap(); +// if let JsonValue::Object(obj) = value { +// assert_eq!(obj.len(), 1); +// let mut unique = obj.iter_unique(); +// let first = unique.next().unwrap(); +// assert_eq!(first.0, "x"); +// assert_eq!(first.1, &JsonValue::Int(2)); +// assert!(unique.next().is_none()); +// } else { +// panic!("expected object"); +// } +// } + +// #[test] +// fn unique_iter_object_repeat() { +// let value = JsonValue::parse(br#" {"x": 1, "x": 1} "#, false).unwrap(); +// if let JsonValue::Object(obj) = value { +// assert_eq!(obj.len(), 1); +// let mut unique = obj.iter_unique(); +// let first = unique.next().unwrap(); +// assert_eq!(first.0, "x"); +// assert_eq!(first.1, &JsonValue::Int(1)); +// assert!(unique.next().is_none()); +// } else { +// panic!("expected object"); +// } +// } #[test] fn test_recursion_limit() { @@ -1297,25 +1290,25 @@ fn value_owned() -> JsonValue<'static> { JsonValue::parse_owned(s.as_bytes(), false, PartialMode::Off).unwrap() } -#[test] -fn test_owned_value() { - let value = value_owned(); - let obj = match value { - JsonValue::Object(obj) => obj, - _ => panic!("expected object"), - }; - assert_eq!(obj.get("int").unwrap(), &JsonValue::Int(1)); - assert_eq!(obj.get("const").unwrap(), &JsonValue::Bool(true)); - assert_eq!(obj.get("float").unwrap(), &JsonValue::Float(1.2)); - let array = match obj.get("array").unwrap() { - JsonValue::Array(array) => array, - _ => panic!("expected array"), - }; - assert_eq!( - array, - &Arc::new(smallvec![JsonValue::Int(1), JsonValue::Bool(false), JsonValue::Null]) - ); -} +// #[test] +// fn test_owned_value() { +// let value = value_owned(); +// let obj = match value { +// JsonValue::Object(obj) => obj, +// _ => panic!("expected object"), +// }; +// assert_eq!(obj.get("int").unwrap(), &JsonValue::Int(1)); +// assert_eq!(obj.get("const").unwrap(), &JsonValue::Bool(true)); +// assert_eq!(obj.get("float").unwrap(), &JsonValue::Float(1.2)); +// let array = match obj.get("array").unwrap() { +// JsonValue::Array(array) => array, +// _ => panic!("expected array"), +// }; +// assert_eq!( +// array, +// &Arc::new(smallvec![JsonValue::Int(1), JsonValue::Bool(false), JsonValue::Null]) +// ); +// } fn value_into_static() -> JsonValue<'static> { let s = r#"{ "big_int": 92233720368547758070, "const": true, "float": 1.2, "array": [1, false, null, "x"]}"# @@ -1324,32 +1317,32 @@ fn value_into_static() -> JsonValue<'static> { jiter.into_static() } -#[cfg(feature = "num-bigint")] -#[test] -fn test_into_static() { - let value = crate::value_into_static(); - let obj = match value { - JsonValue::Object(obj) => obj, - _ => panic!("expected object"), - }; - let expected_big_int = BigInt::from_str("92233720368547758070").unwrap(); - assert_eq!(obj.get("big_int").unwrap(), &JsonValue::BigInt(expected_big_int)); - assert_eq!(obj.get("const").unwrap(), &JsonValue::Bool(true)); - assert_eq!(obj.get("float").unwrap(), &JsonValue::Float(1.2)); - let array = match obj.get("array").unwrap() { - JsonValue::Array(array) => array, - _ => panic!("expected array"), - }; - assert_eq!( - array, - &Arc::new(smallvec![ - JsonValue::Int(1), - JsonValue::Bool(false), - JsonValue::Null, - JsonValue::Str("x".into()) - ]) - ); -} +// #[cfg(feature = "num-bigint")] +// #[test] +// fn test_into_static() { +// let value = crate::value_into_static(); +// let obj = match value { +// JsonValue::Object(obj) => obj, +// _ => panic!("expected object"), +// }; +// let expected_big_int = BigInt::from_str("92233720368547758070").unwrap(); +// assert_eq!(obj.get("big_int").unwrap(), &JsonValue::BigInt(expected_big_int)); +// assert_eq!(obj.get("const").unwrap(), &JsonValue::Bool(true)); +// assert_eq!(obj.get("float").unwrap(), &JsonValue::Float(1.2)); +// let array = match obj.get("array").unwrap() { +// JsonValue::Array(array) => array, +// _ => panic!("expected array"), +// }; +// assert_eq!( +// array, +// &Arc::new(smallvec![ +// JsonValue::Int(1), +// JsonValue::Bool(false), +// JsonValue::Null, +// JsonValue::Str("x".into()) +// ]) +// ); +// } #[test] fn jiter_next_value_borrowed() { @@ -1679,7 +1672,7 @@ fn test_value_partial_array_on() { let value = JsonValue::parse_with_config(json_bytes, false, PartialMode::On).unwrap(); assert_eq!( value, - JsonValue::Array(Arc::new(smallvec![ + JsonValue::Array(Arc::new([ JsonValue::Str("string".into()), JsonValue::Bool(true), JsonValue::Null, @@ -1700,7 +1693,7 @@ fn test_value_partial_array_trailing_strings() { let value = JsonValue::parse_with_config(json_bytes, false, PartialMode::TrailingStrings).unwrap(); assert_eq!( value, - JsonValue::Array(Arc::new(smallvec![ + JsonValue::Array(Arc::new([ JsonValue::Str("string".into()), JsonValue::Bool(true), JsonValue::Null, @@ -1732,10 +1725,7 @@ fn test_value_partial_object() { assert_eq!(pairs[3].clone(), (Cow::Borrowed("d"), JsonValue::Null)); assert_eq!(pairs[4].clone(), (Cow::Borrowed("e"), JsonValue::Int(1))); assert_eq!(pairs[5].clone(), (Cow::Borrowed("f"), JsonValue::Float(2.22))); - assert_eq!( - pairs[6].clone(), - (Cow::Borrowed("g"), JsonValue::Array(Arc::new(smallvec![]))) - ); + assert_eq!(pairs[6].clone(), (Cow::Borrowed("g"), JsonValue::Array(Arc::new([])))); // test all position in the string for i in 1..json_bytes.len() { let partial_json = &json_bytes[..i]; From 8f2ec5dd20715cc3ccbdb8ff19289a050c7113ef Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Tue, 28 Jan 2025 13:20:01 +0000 Subject: [PATCH 2/6] cleanup, fix tests --- crates/jiter/src/lazy_index_map.rs | 174 -------------------------- crates/jiter/src/lib.rs | 2 - crates/jiter/tests/main.rs | 191 +++++++++++------------------ 3 files changed, 72 insertions(+), 295 deletions(-) delete mode 100644 crates/jiter/src/lazy_index_map.rs diff --git a/crates/jiter/src/lazy_index_map.rs b/crates/jiter/src/lazy_index_map.rs deleted file mode 100644 index b57bdcb0..00000000 --- a/crates/jiter/src/lazy_index_map.rs +++ /dev/null @@ -1,174 +0,0 @@ -use std::borrow::{Borrow, Cow}; -use std::fmt; -use std::hash::Hash; -use std::slice::Iter as SliceIter; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::OnceLock; - -use ahash::AHashMap; -use smallvec::SmallVec; - -/// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed. -pub struct LazyIndexMap { - vec: SmallVec<[(K, V); 8]>, - map: OnceLock>, - last_find: AtomicUsize, -} - -impl Default for LazyIndexMap -where - K: Clone + fmt::Debug + Eq + Hash, - V: fmt::Debug, -{ - fn default() -> Self { - Self::new() - } -} - -impl Clone for LazyIndexMap { - fn clone(&self) -> Self { - Self { - vec: self.vec.clone(), - map: self.map.clone(), - last_find: AtomicUsize::new(0), - } - } -} - -impl fmt::Debug for LazyIndexMap -where - K: Clone + fmt::Debug + Eq + Hash, - V: fmt::Debug, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_map().entries(self.iter_unique()).finish() - } -} - -// picked to be a good tradeoff after experimenting with `lazy_map_lookup` benchmark, should cover most models -const HASHMAP_THRESHOLD: usize = 16; - -/// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed. -impl LazyIndexMap -where - K: Clone + fmt::Debug + Eq + Hash, - V: fmt::Debug, -{ - pub fn new() -> Self { - Self { - vec: SmallVec::new(), - map: OnceLock::new(), - last_find: AtomicUsize::new(0), - } - } - - pub fn insert(&mut self, key: K, value: V) { - if let Some(map) = self.map.get_mut() { - map.insert(key.clone(), self.vec.len()); - } - self.vec.push((key, value)); - } - - pub fn len(&self) -> usize { - self.get_map().len() - } - - pub fn is_empty(&self) -> bool { - self.vec.is_empty() - } - - pub fn get(&self, key: &Q) -> Option<&V> - where - K: Borrow + PartialEq, - Q: Hash + Eq + ?Sized, - { - let vec_len = self.vec.len(); - // if the vec is longer than the threshold, we use the hashmap for lookups - if vec_len > HASHMAP_THRESHOLD { - self.get_map().get(key).map(|&i| &self.vec[i].1) - } else { - // otherwise we find the value in the vec - // we assume the most likely position for the match is at `last_find + 1` - let first_try = self.last_find.load(Ordering::Relaxed) + 1; - for i in first_try..first_try + vec_len { - let index = i % vec_len; - let (k, v) = &self.vec[index]; - if k == key { - self.last_find.store(index, Ordering::Relaxed); - return Some(v); - } - } - None - } - } - - pub fn keys(&self) -> impl Iterator { - self.vec.iter().map(|(k, _)| k) - } - - pub fn iter(&self) -> SliceIter<'_, (K, V)> { - self.vec.iter() - } - - pub fn iter_unique(&self) -> impl Iterator { - IterUnique { - vec: &self.vec, - map: self.get_map(), - index: 0, - } - } - - fn get_map(&self) -> &AHashMap { - self.map.get_or_init(|| { - self.vec - .iter() - .enumerate() - .map(|(index, (key, _))| (key.clone(), index)) - .collect() - }) - } -} - -impl<'j> LazyIndexMap, crate::JsonValue<'j>> { - pub(crate) fn to_static(&self) -> LazyIndexMap, crate::JsonValue<'static>> { - LazyIndexMap { - vec: self - .vec - .iter() - .map(|(k, v)| (k.to_string().into(), v.to_static())) - .collect(), - map: OnceLock::new(), - last_find: AtomicUsize::new(0), - } - } -} - -impl PartialEq for LazyIndexMap { - fn eq(&self, other: &Self) -> bool { - self.vec == other.vec - } -} - -struct IterUnique<'a, K, V> { - vec: &'a SmallVec<[(K, V); 8]>, - map: &'a AHashMap, - index: usize, -} - -impl<'a, K: Hash + Eq, V> Iterator for IterUnique<'a, K, V> { - type Item = (&'a K, &'a V); - - fn next(&mut self) -> Option { - while self.index < self.vec.len() { - let (k, v) = &self.vec[self.index]; - if let Some(map_index) = self.map.get(k) { - if *map_index == self.index { - self.index += 1; - return Some((k, v)); - } - } - self.index += 1; - } - None - } -} diff --git a/crates/jiter/src/lib.rs b/crates/jiter/src/lib.rs index ac39e55d..70a14e9b 100644 --- a/crates/jiter/src/lib.rs +++ b/crates/jiter/src/lib.rs @@ -150,7 +150,6 @@ mod errors; mod jiter; -mod lazy_index_map; mod number_decoder; mod parse; #[cfg(feature = "python")] @@ -166,7 +165,6 @@ mod value; pub use errors::{JiterError, JiterErrorType, JsonError, JsonErrorType, JsonResult, JsonType, LinePosition}; pub use jiter::{Jiter, JiterResult}; -pub use lazy_index_map::LazyIndexMap; pub use number_decoder::{NumberAny, NumberInt}; pub use parse::Peek; pub use value::{JsonArray, JsonObject, JsonValue}; diff --git a/crates/jiter/tests/main.rs b/crates/jiter/tests/main.rs index 30b31aad..4030018c 100644 --- a/crates/jiter/tests/main.rs +++ b/crates/jiter/tests/main.rs @@ -7,10 +7,9 @@ use std::sync::Arc; #[cfg(feature = "num-bigint")] use num_bigint::BigInt; -use smallvec::smallvec; use jiter::{ - Jiter, JiterErrorType, JiterResult, JsonErrorType, JsonType, JsonValue, LazyIndexMap, LinePosition, NumberAny, + Jiter, JiterErrorType, JiterResult, JsonErrorType, JsonObject, JsonType, JsonValue, LinePosition, NumberAny, NumberInt, PartialMode, Peek, }; @@ -502,7 +501,7 @@ fn nan_disallowed_wrong_type() { fn value_allow_nan_inf() { let json = r#"[1, NaN, Infinity, -Infinity]"#; let value = JsonValue::parse(json.as_bytes(), true).unwrap(); - let expected = JsonValue::Array(Arc::new([ + let expected = JsonValue::Array(Arc::new(vec![ JsonValue::Int(1), JsonValue::Float(f64::NAN), JsonValue::Float(f64::INFINITY), @@ -664,11 +663,15 @@ fn json_value_object() { let json = r#"{"foo": "bar", "spam": [1, null, true]}"#; let v = JsonValue::parse(json.as_bytes(), false).unwrap(); - let expected = JsonValue::Object(Arc::new([ + let expected = JsonValue::Object(Arc::new(vec![ ("foo".into(), JsonValue::Str("bar".into())), ( "spam".into(), - JsonValue::Array(Arc::new([JsonValue::Int(1), JsonValue::Null, JsonValue::Bool(true)])), + JsonValue::Array(Arc::new(vec![ + JsonValue::Int(1), + JsonValue::Null, + JsonValue::Bool(true), + ])), ), ])); assert_eq!(v, expected); @@ -679,7 +682,7 @@ fn json_value_string() { let json = r#"["foo", "\u00a3", "\""]"#; let v = JsonValue::parse(json.as_bytes(), false).unwrap(); - let expected = JsonValue::Array(Arc::new([ + let expected = JsonValue::Array(Arc::new(vec![ JsonValue::Str("foo".into()), JsonValue::Str("£".into()), JsonValue::Str("\"".into()), @@ -693,7 +696,11 @@ fn parse_array_3() { let v = JsonValue::parse(json.as_bytes(), false).unwrap(); assert_eq!( v, - JsonValue::Array(Arc::new([JsonValue::Int(1), JsonValue::Null, JsonValue::Bool(true)])) + JsonValue::Array(Arc::new(vec![ + JsonValue::Int(1), + JsonValue::Null, + JsonValue::Bool(true) + ])) ); } @@ -701,7 +708,7 @@ fn parse_array_3() { fn parse_array_empty() { let json = r#"[ ]"#; let v = JsonValue::parse(json.as_bytes(), false).unwrap(); - assert_eq!(v, JsonValue::Array(Arc::new([]))); + assert_eq!(v, JsonValue::Array(Arc::new(vec![]))); } #[test] @@ -718,10 +725,10 @@ fn parse_value_nested() { let v = JsonValue::parse(json.as_bytes(), false).unwrap(); assert_eq!( v, - JsonValue::Array(Arc::new([ + JsonValue::Array(Arc::new(vec![ JsonValue::Int(1), JsonValue::Int(2), - JsonValue::Array(Arc::new([JsonValue::Int(3), JsonValue::Int(4)])), + JsonValue::Array(Arc::new(vec![JsonValue::Int(3), JsonValue::Int(4)])), JsonValue::Int(5), JsonValue::Int(6), ]),) @@ -1064,67 +1071,6 @@ fn test_big_int_errs() { } } -#[test] -fn lazy_index_map_pretty() { - let mut map: LazyIndexMap, JsonValue<'_>> = LazyIndexMap::new(); - assert!(map.is_empty()); - map.insert("foo".into(), JsonValue::Str("bar".into())); - assert!(!map.is_empty()); - map.insert("spam".into(), JsonValue::Null); - assert_eq!(format!("{map:?}"), r#"{"foo": Str("bar"), "spam": Null}"#); - let keys = map.keys().collect::>(); - assert_eq!(keys, vec!["foo", "spam"]); -} - -#[test] -fn lazy_index_map_small_get() { - let mut map: LazyIndexMap, JsonValue<'_>> = LazyIndexMap::new(); - map.insert("foo".into(), JsonValue::Str("bar".into())); - map.insert("spam".into(), JsonValue::Null); - - assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".into()))); - assert_eq!(map.get("spam"), Some(&JsonValue::Null)); - assert_eq!(map.get("spam"), Some(&JsonValue::Null)); - assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".into()))); - assert_eq!(map.get("other"), None); -} - -#[test] -fn lazy_index_map_big_get() { - let mut map: LazyIndexMap, JsonValue<'_>> = LazyIndexMap::new(); - - for i in 0..25 { - let key = i.to_string().into(); - map.insert(key, JsonValue::Int(i)); - } - - assert_eq!(map.get("0"), Some(&JsonValue::Int(0))); - assert_eq!(map.get("10"), Some(&JsonValue::Int(10))); - assert_eq!(map.get("22"), Some(&JsonValue::Int(22))); - assert_eq!(map.get("other"), None); -} - -#[test] -fn lazy_index_map_clone() { - let mut map: LazyIndexMap, JsonValue<'_>> = LazyIndexMap::default(); - - map.insert("foo".into(), JsonValue::Str("bar".into())); - map.insert("spam".into(), JsonValue::Null); - - assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".into()))); - assert_eq!(map.get("spam"), Some(&JsonValue::Null)); - assert_eq!(map.get("spam"), Some(&JsonValue::Null)); - assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".into()))); - assert_eq!(map.get("other"), None); - - let map2 = map.clone(); - assert_eq!(map2.get("foo"), Some(&JsonValue::Str("bar".into()))); - assert_eq!(map2.get("spam"), Some(&JsonValue::Null)); - assert_eq!(map2.get("spam"), Some(&JsonValue::Null)); - assert_eq!(map2.get("foo"), Some(&JsonValue::Str("bar".into()))); - assert_eq!(map2.get("other"), None); -} - #[test] fn readme_jiter() { let json_data = r#" @@ -1290,25 +1236,29 @@ fn value_owned() -> JsonValue<'static> { JsonValue::parse_owned(s.as_bytes(), false, PartialMode::Off).unwrap() } -// #[test] -// fn test_owned_value() { -// let value = value_owned(); -// let obj = match value { -// JsonValue::Object(obj) => obj, -// _ => panic!("expected object"), -// }; -// assert_eq!(obj.get("int").unwrap(), &JsonValue::Int(1)); -// assert_eq!(obj.get("const").unwrap(), &JsonValue::Bool(true)); -// assert_eq!(obj.get("float").unwrap(), &JsonValue::Float(1.2)); -// let array = match obj.get("array").unwrap() { -// JsonValue::Array(array) => array, -// _ => panic!("expected array"), -// }; -// assert_eq!( -// array, -// &Arc::new(smallvec![JsonValue::Int(1), JsonValue::Bool(false), JsonValue::Null]) -// ); -// } +fn get_key<'a, 'j>(o: &'a JsonObject<'j>, key: &str) -> Option<&'a JsonValue<'j>> { + o.iter().find_map(|(k, v)| (k == key).then_some(v)) +} + +#[test] +fn test_owned_value() { + let value = value_owned(); + let obj = match value { + JsonValue::Object(obj) => obj, + _ => panic!("expected object"), + }; + assert_eq!(get_key(&obj, "int").unwrap(), &JsonValue::Int(1)); + assert_eq!(get_key(&obj, "const").unwrap(), &JsonValue::Bool(true)); + assert_eq!(get_key(&obj, "float").unwrap(), &JsonValue::Float(1.2)); + let array = match get_key(&obj, "array").unwrap() { + JsonValue::Array(array) => array, + _ => panic!("expected array"), + }; + assert_eq!( + array, + &Arc::new(vec![JsonValue::Int(1), JsonValue::Bool(false), JsonValue::Null]) + ); +} fn value_into_static() -> JsonValue<'static> { let s = r#"{ "big_int": 92233720368547758070, "const": true, "float": 1.2, "array": [1, false, null, "x"]}"# @@ -1317,32 +1267,32 @@ fn value_into_static() -> JsonValue<'static> { jiter.into_static() } -// #[cfg(feature = "num-bigint")] -// #[test] -// fn test_into_static() { -// let value = crate::value_into_static(); -// let obj = match value { -// JsonValue::Object(obj) => obj, -// _ => panic!("expected object"), -// }; -// let expected_big_int = BigInt::from_str("92233720368547758070").unwrap(); -// assert_eq!(obj.get("big_int").unwrap(), &JsonValue::BigInt(expected_big_int)); -// assert_eq!(obj.get("const").unwrap(), &JsonValue::Bool(true)); -// assert_eq!(obj.get("float").unwrap(), &JsonValue::Float(1.2)); -// let array = match obj.get("array").unwrap() { -// JsonValue::Array(array) => array, -// _ => panic!("expected array"), -// }; -// assert_eq!( -// array, -// &Arc::new(smallvec![ -// JsonValue::Int(1), -// JsonValue::Bool(false), -// JsonValue::Null, -// JsonValue::Str("x".into()) -// ]) -// ); -// } +#[cfg(feature = "num-bigint")] +#[test] +fn test_into_static() { + let value = crate::value_into_static(); + let obj = match value { + JsonValue::Object(obj) => obj, + _ => panic!("expected object"), + }; + let expected_big_int = BigInt::from_str("92233720368547758070").unwrap(); + assert_eq!(get_key(&obj, "big_int").unwrap(), &JsonValue::BigInt(expected_big_int)); + assert_eq!(get_key(&obj, "const").unwrap(), &JsonValue::Bool(true)); + assert_eq!(get_key(&obj, "float").unwrap(), &JsonValue::Float(1.2)); + let array = match get_key(&obj, "array").unwrap() { + JsonValue::Array(array) => array, + _ => panic!("expected array"), + }; + assert_eq!( + array, + &Arc::new(vec![ + JsonValue::Int(1), + JsonValue::Bool(false), + JsonValue::Null, + JsonValue::Str("x".into()) + ]) + ); +} #[test] fn jiter_next_value_borrowed() { @@ -1672,7 +1622,7 @@ fn test_value_partial_array_on() { let value = JsonValue::parse_with_config(json_bytes, false, PartialMode::On).unwrap(); assert_eq!( value, - JsonValue::Array(Arc::new([ + JsonValue::Array(Arc::new(vec![ JsonValue::Str("string".into()), JsonValue::Bool(true), JsonValue::Null, @@ -1693,7 +1643,7 @@ fn test_value_partial_array_trailing_strings() { let value = JsonValue::parse_with_config(json_bytes, false, PartialMode::TrailingStrings).unwrap(); assert_eq!( value, - JsonValue::Array(Arc::new([ + JsonValue::Array(Arc::new(vec![ JsonValue::Str("string".into()), JsonValue::Bool(true), JsonValue::Null, @@ -1725,7 +1675,10 @@ fn test_value_partial_object() { assert_eq!(pairs[3].clone(), (Cow::Borrowed("d"), JsonValue::Null)); assert_eq!(pairs[4].clone(), (Cow::Borrowed("e"), JsonValue::Int(1))); assert_eq!(pairs[5].clone(), (Cow::Borrowed("f"), JsonValue::Float(2.22))); - assert_eq!(pairs[6].clone(), (Cow::Borrowed("g"), JsonValue::Array(Arc::new([])))); + assert_eq!( + pairs[6].clone(), + (Cow::Borrowed("g"), JsonValue::Array(Arc::new(vec![]))) + ); // test all position in the string for i in 1..json_bytes.len() { let partial_json = &json_bytes[..i]; From bbe5b8905a9c874dcb68745b8acf1eb5768d0e52 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Tue, 28 Jan 2025 13:30:57 +0000 Subject: [PATCH 3/6] fix fuzzing --- crates/fuzz/fuzz_targets/compare_to_serde.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/fuzz/fuzz_targets/compare_to_serde.rs b/crates/fuzz/fuzz_targets/compare_to_serde.rs index 53aad191..9b946012 100644 --- a/crates/fuzz/fuzz_targets/compare_to_serde.rs +++ b/crates/fuzz/fuzz_targets/compare_to_serde.rs @@ -1,6 +1,7 @@ #![no_main] #![allow(clippy::dbg_macro)] +use indexmap::IndexMap; use jiter::{JsonError as JiterError, JsonErrorType as JiterJsonErrorType, JsonValue as JiterValue}; use serde_json::{Error as SerdeError, Number as SerdeNumber, Value as SerdeValue}; @@ -30,7 +31,9 @@ pub fn values_equal(jiter_value: &JiterValue, serde_value: &SerdeValue) -> bool if o1.len() != o2.len() { return false; } - for (k1, v1) in o1.iter_unique() { + // deduplicate, as `jiter` doesn't do this during parsing + let o1: IndexMap<_, _> = o1.iter().map(|(k, v)| (k, v)).collect(); + for (k1, v1) in o1 { if let Some(v2) = o2.get::(k1.as_ref()) { if !values_equal(v1, v2) { return false; From 0336cd9db4ada6eded1984810768e43dda35bda4 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Tue, 28 Jan 2025 14:07:35 +0000 Subject: [PATCH 4/6] fix fuzz again --- crates/fuzz/fuzz_targets/compare_to_serde.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/fuzz/fuzz_targets/compare_to_serde.rs b/crates/fuzz/fuzz_targets/compare_to_serde.rs index 9b946012..4a199567 100644 --- a/crates/fuzz/fuzz_targets/compare_to_serde.rs +++ b/crates/fuzz/fuzz_targets/compare_to_serde.rs @@ -28,11 +28,11 @@ pub fn values_equal(jiter_value: &JiterValue, serde_value: &SerdeValue) -> bool true } (JiterValue::Object(o1), SerdeValue::Object(o2)) => { + // deduplicate, as `jiter` doesn't do this during parsing + let o1: IndexMap<_, _> = o1.iter().map(|(k, v)| (k, v)).collect(); if o1.len() != o2.len() { return false; } - // deduplicate, as `jiter` doesn't do this during parsing - let o1: IndexMap<_, _> = o1.iter().map(|(k, v)| (k, v)).collect(); for (k1, v1) in o1 { if let Some(v2) = o2.get::(k1.as_ref()) { if !values_equal(v1, v2) { From a3c9ea312b2c37ca664bf06bbb600284d30c91b8 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Tue, 28 Jan 2025 14:08:21 +0000 Subject: [PATCH 5/6] fix benchmarks --- crates/jiter/benches/main.rs | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/crates/jiter/benches/main.rs b/crates/jiter/benches/main.rs index 2429d7ab..bd736cad 100644 --- a/crates/jiter/benches/main.rs +++ b/crates/jiter/benches/main.rs @@ -4,7 +4,7 @@ use std::hint::black_box; use std::fs::File; use std::io::Read; -use jiter::{Jiter, JsonValue, LazyIndexMap, PartialMode, Peek}; +use jiter::{Jiter, JsonValue, PartialMode, Peek}; use serde_json::Value; fn read_file(path: &str) -> String { @@ -276,33 +276,6 @@ fn x100_serde_iter(bench: &mut Bencher) { serde_str("./benches/x100.json", bench); } -fn lazy_map_lookup(length: i64, bench: &mut Bencher) { - bench.iter(|| { - let mut map: LazyIndexMap = LazyIndexMap::new(); - for i in 0..length { - let key = i.to_string(); - map.insert(key, JsonValue::Int(i)); - } - - // best case we get the next value each time - for i in 0..length { - black_box(map.get(&i.to_string()).unwrap()); - } - }) -} - -fn lazy_map_lookup_1_10(bench: &mut Bencher) { - lazy_map_lookup(10, bench); -} - -fn lazy_map_lookup_2_20(bench: &mut Bencher) { - lazy_map_lookup(20, bench); -} - -fn lazy_map_lookup_3_50(bench: &mut Bencher) { - lazy_map_lookup(50, bench); -} - benchmark_group!( benches, big_jiter_iter, @@ -360,9 +333,6 @@ benchmark_group!( true_object_jiter_skip, true_object_jiter_value, true_object_serde_value, - lazy_map_lookup_1_10, - lazy_map_lookup_2_20, - lazy_map_lookup_3_50, short_numbers_jiter_iter, short_numbers_jiter_skip, short_numbers_jiter_value, From 9fabe089ae87c84c5421d403c5a610a7d9f3351d Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Thu, 13 Feb 2025 13:50:14 +0000 Subject: [PATCH 6/6] remove dead tests --- crates/jiter/tests/main.rs | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/crates/jiter/tests/main.rs b/crates/jiter/tests/main.rs index 4030018c..12a278ec 100644 --- a/crates/jiter/tests/main.rs +++ b/crates/jiter/tests/main.rs @@ -924,36 +924,6 @@ fn test_crazy_massive_int() { jiter.finish().unwrap(); } -// #[test] -// fn unique_iter_object() { -// let value = JsonValue::parse(br#" {"x": 1, "x": 2} "#, false).unwrap(); -// if let JsonValue::Object(obj) = value { -// assert_eq!(obj.len(), 1); -// let mut unique = obj.iter_unique(); -// let first = unique.next().unwrap(); -// assert_eq!(first.0, "x"); -// assert_eq!(first.1, &JsonValue::Int(2)); -// assert!(unique.next().is_none()); -// } else { -// panic!("expected object"); -// } -// } - -// #[test] -// fn unique_iter_object_repeat() { -// let value = JsonValue::parse(br#" {"x": 1, "x": 1} "#, false).unwrap(); -// if let JsonValue::Object(obj) = value { -// assert_eq!(obj.len(), 1); -// let mut unique = obj.iter_unique(); -// let first = unique.next().unwrap(); -// assert_eq!(first.0, "x"); -// assert_eq!(first.1, &JsonValue::Int(1)); -// assert!(unique.next().is_none()); -// } else { -// panic!("expected object"); -// } -// } - #[test] fn test_recursion_limit() { let json = (0..2000).map(|_| "[").collect::();