|
| 1 | +// Copyright (c) 2020 Ritchie Vink |
| 2 | +// Copyright 2021 Datafuse Labs |
| 3 | +// |
| 4 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +// you may not use this file except in compliance with the License. |
| 6 | +// You may obtain a copy of the License at |
| 7 | +// |
| 8 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +// |
| 10 | +// Unless required by applicable law or agreed to in writing, software |
| 11 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +// See the License for the specific language governing permissions and |
| 14 | +// limitations under the License. |
| 15 | + |
| 16 | +use std::fmt::Debug; |
| 17 | + |
| 18 | +use serde::Deserialize; |
| 19 | +use serde::Serialize; |
| 20 | + |
| 21 | +use super::FixedSizeBinaryColumn; |
| 22 | + |
| 23 | +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] |
| 24 | +pub struct FixedSizeBinaryColumnBuilder { |
| 25 | + pub data: Vec<u8>, |
| 26 | + pub value_length: usize, |
| 27 | +} |
| 28 | + |
| 29 | +impl FixedSizeBinaryColumnBuilder { |
| 30 | + pub fn with_capacity(data_capacity: usize, value_length: usize) -> Self { |
| 31 | + FixedSizeBinaryColumnBuilder { |
| 32 | + data: Vec::with_capacity(data_capacity), |
| 33 | + value_length, |
| 34 | + } |
| 35 | + } |
| 36 | + |
| 37 | + pub fn from_column(col: FixedSizeBinaryColumn) -> Self { |
| 38 | + FixedSizeBinaryColumnBuilder { |
| 39 | + data: col.data.make_mut(), |
| 40 | + value_length: col.value_length, |
| 41 | + } |
| 42 | + } |
| 43 | + |
| 44 | + pub fn from_data(data: Vec<u8>) -> Self { |
| 45 | + let value_length = data.len(); |
| 46 | + FixedSizeBinaryColumnBuilder { data, value_length } |
| 47 | + } |
| 48 | + |
| 49 | + pub fn repeat(scalar: &[u8], n: usize) -> Self { |
| 50 | + let len = scalar.len(); |
| 51 | + let data = scalar.repeat(n); |
| 52 | + FixedSizeBinaryColumnBuilder { |
| 53 | + data, |
| 54 | + value_length: len, |
| 55 | + } |
| 56 | + } |
| 57 | + |
| 58 | + pub fn repeat_default(n: usize, value_length: usize) -> Self { |
| 59 | + FixedSizeBinaryColumnBuilder { |
| 60 | + data: vec![0; n * value_length], |
| 61 | + value_length, |
| 62 | + } |
| 63 | + } |
| 64 | + |
| 65 | + pub fn len(&self) -> usize { |
| 66 | + if self.data.is_empty() { |
| 67 | + 0 |
| 68 | + } else { |
| 69 | + self.data.len() / self.value_length |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + pub fn is_empty(&self) -> bool { |
| 74 | + self.len() <= 1 |
| 75 | + } |
| 76 | + |
| 77 | + pub fn memory_size(&self) -> usize { |
| 78 | + self.data.len() |
| 79 | + } |
| 80 | + |
| 81 | + pub fn put_u8(&mut self, item: u8) { |
| 82 | + self.data.push(item); |
| 83 | + } |
| 84 | + |
| 85 | + pub fn push_default(&mut self) { |
| 86 | + self.data.extend_from_slice(&vec![0; self.value_length]); |
| 87 | + } |
| 88 | + |
| 89 | + pub fn put_char(&mut self, item: char) { |
| 90 | + self.data |
| 91 | + .extend_from_slice(item.encode_utf8(&mut [0; 4]).as_bytes()); |
| 92 | + } |
| 93 | + |
| 94 | + #[inline] |
| 95 | + pub fn put_str(&mut self, item: &str) { |
| 96 | + debug_assert!(self.value_length == item.as_bytes().len()); |
| 97 | + self.data.extend_from_slice(item.as_bytes()); |
| 98 | + } |
| 99 | + |
| 100 | + #[inline] |
| 101 | + pub fn put_slice(&mut self, item: &[u8]) { |
| 102 | + debug_assert!(self.value_length == item.len()); |
| 103 | + self.data.extend_from_slice(item); |
| 104 | + } |
| 105 | + |
| 106 | + #[inline] |
| 107 | + pub fn commit_row(&mut self) { |
| 108 | + self.data.reserve(self.data.capacity()); |
| 109 | + } |
| 110 | + |
| 111 | + pub fn put_char_iter(&mut self, iter: impl Iterator<Item = char>) { |
| 112 | + for c in iter { |
| 113 | + let mut buf = [0; 4]; |
| 114 | + let result = c.encode_utf8(&mut buf); |
| 115 | + self.data.extend_from_slice(result.as_bytes()); |
| 116 | + } |
| 117 | + } |
| 118 | + |
| 119 | + pub fn put(&mut self, item: &[u8]) { |
| 120 | + self.data.extend_from_slice(item); |
| 121 | + } |
| 122 | + |
| 123 | + pub fn append_column(&mut self, other: &FixedSizeBinaryColumn) { |
| 124 | + debug_assert!(other.value_length == self.value_length); |
| 125 | + self.data.extend_from_slice(&other.data); |
| 126 | + } |
| 127 | + |
| 128 | + pub fn build(self) -> FixedSizeBinaryColumn { |
| 129 | + FixedSizeBinaryColumn::new(self.data.into(), self.value_length) |
| 130 | + } |
| 131 | + |
| 132 | + pub fn build_scalar(self) -> Vec<u8> { |
| 133 | + if self.data.is_empty() { |
| 134 | + vec![] |
| 135 | + } else { |
| 136 | + self.data[0..self.value_length].to_vec() |
| 137 | + } |
| 138 | + } |
| 139 | + |
| 140 | + #[inline] |
| 141 | + pub fn may_resize(&self, add_size: usize) -> bool { |
| 142 | + self.data.len() + add_size > self.data.capacity() |
| 143 | + } |
| 144 | + |
| 145 | + /// # Safety |
| 146 | + /// |
| 147 | + /// Calling this method with an out-of-bounds index is *[undefined behavior]* |
| 148 | + pub unsafe fn index_unchecked(&self, row: usize) -> &[u8] { |
| 149 | + debug_assert!((row + 1) * self.value_length < self.data.len()); |
| 150 | + |
| 151 | + self.data |
| 152 | + .get_unchecked(row * self.value_length..(row + 1) * self.value_length) |
| 153 | + } |
| 154 | + |
| 155 | + pub fn push_repeat(&mut self, item: &[u8], n: usize) { |
| 156 | + debug_assert!(item.len() / n == self.data.len()); |
| 157 | + self.data.reserve(item.len() * n); |
| 158 | + for _ in 0..n { |
| 159 | + self.data.extend_from_slice(item) |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + pub fn pop(&mut self) -> Option<Vec<u8>> { |
| 164 | + if !self.is_empty() { |
| 165 | + let val = self.data.split_off(self.len() - 1); |
| 166 | + Some(val) |
| 167 | + } else { |
| 168 | + None |
| 169 | + } |
| 170 | + } |
| 171 | + |
| 172 | + /// Extends the [`MutableBinaryArray`] from an iterator of values. |
| 173 | + /// This differs from `extended_trusted_len` which accepts iterator of optional values. |
| 174 | + #[inline] |
| 175 | + pub fn extend_values<I, P>(&mut self, iterator: I) |
| 176 | + where |
| 177 | + P: AsRef<[u8]>, |
| 178 | + I: Iterator<Item = P>, |
| 179 | + { |
| 180 | + for item in iterator { |
| 181 | + self.put_slice(item.as_ref()); |
| 182 | + self.commit_row(); |
| 183 | + } |
| 184 | + } |
| 185 | +} |
| 186 | + |
| 187 | +impl<P: AsRef<[u8]>> FromIterator<P> for FixedSizeBinaryColumnBuilder { |
| 188 | + fn from_iter<I: IntoIterator<Item = P>>(iter: I) -> Self { |
| 189 | + let iter = iter.into_iter(); |
| 190 | + let (lower, _) = iter.size_hint(); |
| 191 | + let mut builder = FixedSizeBinaryColumnBuilder::with_capacity(lower, 0); |
| 192 | + builder.extend_values(iter); |
| 193 | + builder |
| 194 | + } |
| 195 | +} |
| 196 | + |
| 197 | +impl From<FixedSizeBinaryColumnBuilder> for FixedSizeBinaryColumn { |
| 198 | + fn from(value: FixedSizeBinaryColumnBuilder) -> Self { |
| 199 | + value.build() |
| 200 | + } |
| 201 | +} |
0 commit comments