csv/byte_record.rs
1use std::{
2 cmp, fmt,
3 iter::FromIterator,
4 ops::{self, Range},
5 result,
6};
7
8use serde::de::Deserialize;
9
10use crate::{
11 deserializer::deserialize_byte_record,
12 error::{new_utf8_error, Result, Utf8Error},
13 string_record::StringRecord,
14};
15
16/// A single CSV record stored as raw bytes.
17///
18/// A byte record permits reading or writing CSV rows that are not UTF-8.
19/// In general, you should prefer using a
20/// [`StringRecord`](struct.StringRecord.html)
21/// since it is more ergonomic, but a `ByteRecord` is provided in case you need
22/// it.
23///
24/// If you are using the Serde (de)serialization APIs, then you probably never
25/// need to interact with a `ByteRecord` or a `StringRecord`. However, there
26/// are some circumstances in which you might need to use a raw record type
27/// while still using Serde. For example, if you need to deserialize possibly
28/// invalid UTF-8 fields, then you'll need to first read your record into a
29/// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
30/// reason for using the raw record deserialization APIs is if you're using
31/// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
32///
33/// Two `ByteRecord`s are compared on the basis of their field data. Any
34/// position information associated with the records is ignored.
35#[derive(Clone, Eq)]
36pub struct ByteRecord(Box<ByteRecordInner>);
37
38impl PartialEq for ByteRecord {
39 fn eq(&self, other: &ByteRecord) -> bool {
40 if self.len() != other.len() {
41 return false;
42 }
43 self.iter().zip(other.iter()).all(|e| e.0 == e.1)
44 }
45}
46
47impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord {
48 fn eq(&self, other: &Vec<T>) -> bool {
49 self.iter_eq(other)
50 }
51}
52
53impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord {
54 fn eq(&self, other: &Vec<T>) -> bool {
55 self.iter_eq(other)
56 }
57}
58
59impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord {
60 fn eq(&self, other: &[T]) -> bool {
61 self.iter_eq(other)
62 }
63}
64
65impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
66 fn eq(&self, other: &[T]) -> bool {
67 self.iter_eq(other)
68 }
69}
70
71impl fmt::Debug for ByteRecord {
72 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
73 write!(f, "ByteRecord(")?;
74 f.debug_list()
75 .entries(self.iter().map(crate::debug::Bytes))
76 .finish()?;
77 write!(f, ")")?;
78 Ok(())
79 }
80}
81
82/// The inner portion of a byte record.
83///
84/// We use this memory layout so that moving a `ByteRecord` only requires
85/// moving a single pointer. The optimization is dubious at best, but does
86/// seem to result in slightly better numbers in microbenchmarks. Methinks this
87/// may heavily depend on the underlying allocator.
88#[derive(Clone, Debug, Eq, PartialEq)]
89struct ByteRecordInner {
90 /// The position of this byte record.
91 pos: Option<Position>,
92 /// All fields in this record, stored contiguously.
93 fields: Vec<u8>,
94 /// The number of and location of each field in this record.
95 bounds: Bounds,
96}
97
98impl Default for ByteRecord {
99 #[inline]
100 fn default() -> ByteRecord {
101 ByteRecord::new()
102 }
103}
104
105impl ByteRecord {
106 /// Create a new empty `ByteRecord`.
107 ///
108 /// Note that you may find the `ByteRecord::from` constructor more
109 /// convenient, which is provided by an impl on the `From` trait.
110 ///
111 /// # Example: create an empty record
112 ///
113 /// ```
114 /// use csv::ByteRecord;
115 ///
116 /// let record = ByteRecord::new();
117 /// assert_eq!(record.len(), 0);
118 /// ```
119 ///
120 /// # Example: initialize a record from a `Vec`
121 ///
122 /// ```
123 /// use csv::ByteRecord;
124 ///
125 /// let record = ByteRecord::from(vec!["a", "b", "c"]);
126 /// assert_eq!(record.len(), 3);
127 /// ```
128 #[inline]
129 pub fn new() -> ByteRecord {
130 ByteRecord::with_capacity(0, 0)
131 }
132
133 /// Create a new empty `ByteRecord` with the given capacity settings.
134 ///
135 /// `buffer` refers to the capacity of the buffer used to store the
136 /// actual row contents. `fields` refers to the number of fields one
137 /// might expect to store.
138 #[inline]
139 pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord {
140 ByteRecord(Box::new(ByteRecordInner {
141 pos: None,
142 fields: vec![0; buffer],
143 bounds: Bounds::with_capacity(fields),
144 }))
145 }
146
147 /// Deserialize this record.
148 ///
149 /// The `D` type parameter refers to the type that this record should be
150 /// deserialized into. The `'de` lifetime refers to the lifetime of the
151 /// `ByteRecord`. The `'de` lifetime permits deserializing into structs
152 /// that borrow field data from this record.
153 ///
154 /// An optional `headers` parameter permits deserializing into a struct
155 /// based on its field names (corresponding to header values) rather than
156 /// the order in which the fields are defined.
157 ///
158 /// # Example: without headers
159 ///
160 /// This shows how to deserialize a single row into a struct based on the
161 /// order in which fields occur. This example also shows how to borrow
162 /// fields from the `ByteRecord`, which results in zero allocation
163 /// deserialization.
164 ///
165 /// ```
166 /// use std::error::Error;
167 ///
168 /// use csv::ByteRecord;
169 /// use serde::Deserialize;
170 ///
171 /// #[derive(Deserialize)]
172 /// struct Row<'a> {
173 /// city: &'a str,
174 /// country: &'a str,
175 /// population: u64,
176 /// }
177 ///
178 /// # fn main() { example().unwrap() }
179 /// fn example() -> Result<(), Box<dyn Error>> {
180 /// let record = ByteRecord::from(vec![
181 /// "Boston", "United States", "4628910",
182 /// ]);
183 ///
184 /// let row: Row = record.deserialize(None)?;
185 /// assert_eq!(row.city, "Boston");
186 /// assert_eq!(row.country, "United States");
187 /// assert_eq!(row.population, 4628910);
188 /// Ok(())
189 /// }
190 /// ```
191 ///
192 /// # Example: with headers
193 ///
194 /// This example is like the previous one, but shows how to deserialize
195 /// into a struct based on the struct's field names. For this to work,
196 /// you must provide a header row.
197 ///
198 /// This example also shows that you can deserialize into owned data
199 /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
200 ///
201 /// ```
202 /// use std::error::Error;
203 ///
204 /// use csv::ByteRecord;
205 /// use serde::Deserialize;
206 ///
207 /// #[derive(Deserialize)]
208 /// struct Row {
209 /// city: String,
210 /// country: String,
211 /// population: u64,
212 /// }
213 ///
214 /// # fn main() { example().unwrap() }
215 /// fn example() -> Result<(), Box<dyn Error>> {
216 /// // Notice that the fields are not in the same order
217 /// // as the fields in the struct!
218 /// let header = ByteRecord::from(vec![
219 /// "country", "city", "population",
220 /// ]);
221 /// let record = ByteRecord::from(vec![
222 /// "United States", "Boston", "4628910",
223 /// ]);
224 ///
225 /// let row: Row = record.deserialize(Some(&header))?;
226 /// assert_eq!(row.city, "Boston");
227 /// assert_eq!(row.country, "United States");
228 /// assert_eq!(row.population, 4628910);
229 /// Ok(())
230 /// }
231 /// ```
232 pub fn deserialize<'de, D: Deserialize<'de>>(
233 &'de self,
234 headers: Option<&'de ByteRecord>,
235 ) -> Result<D> {
236 deserialize_byte_record(self, headers)
237 }
238
239 /// Returns an iterator over all fields in this record.
240 ///
241 /// # Example
242 ///
243 /// This example shows how to iterate over each field in a `ByteRecord`.
244 ///
245 /// ```
246 /// use csv::ByteRecord;
247 ///
248 /// let record = ByteRecord::from(vec!["a", "b", "c"]);
249 /// for field in record.iter() {
250 /// assert!(field == b"a" || field == b"b" || field == b"c");
251 /// }
252 /// ```
253 #[inline]
254 pub fn iter(&self) -> ByteRecordIter {
255 self.into_iter()
256 }
257
258 /// Return the field at index `i`.
259 ///
260 /// If no field at index `i` exists, then this returns `None`.
261 ///
262 /// # Example
263 ///
264 /// ```
265 /// use csv::ByteRecord;
266 ///
267 /// let record = ByteRecord::from(vec!["a", "b", "c"]);
268 /// assert_eq!(record.get(1), Some(&b"b"[..]));
269 /// assert_eq!(record.get(3), None);
270 /// ```
271 #[inline]
272 pub fn get(&self, i: usize) -> Option<&[u8]> {
273 self.0.bounds.get(i).map(|range| &self.0.fields[range])
274 }
275
276 /// Returns true if and only if this record is empty.
277 ///
278 /// # Example
279 ///
280 /// ```
281 /// use csv::ByteRecord;
282 ///
283 /// assert!(ByteRecord::new().is_empty());
284 /// ```
285 #[inline]
286 pub fn is_empty(&self) -> bool {
287 self.len() == 0
288 }
289
290 /// Returns the number of fields in this record.
291 ///
292 /// # Example
293 ///
294 /// ```
295 /// use csv::ByteRecord;
296 ///
297 /// let record = ByteRecord::from(vec!["a", "b", "c"]);
298 /// assert_eq!(record.len(), 3);
299 /// ```
300 #[inline]
301 pub fn len(&self) -> usize {
302 self.0.bounds.len()
303 }
304
305 /// Truncate this record to `n` fields.
306 ///
307 /// If `n` is greater than the number of fields in this record, then this
308 /// has no effect.
309 ///
310 /// # Example
311 ///
312 /// ```
313 /// use csv::ByteRecord;
314 ///
315 /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
316 /// assert_eq!(record.len(), 3);
317 /// record.truncate(1);
318 /// assert_eq!(record.len(), 1);
319 /// assert_eq!(record, vec!["a"]);
320 /// ```
321 #[inline]
322 pub fn truncate(&mut self, n: usize) {
323 if n <= self.len() {
324 self.0.bounds.len = n;
325 }
326 }
327
328 /// Clear this record so that it has zero fields.
329 ///
330 /// This is equivalent to calling `truncate(0)`.
331 ///
332 /// Note that it is not necessary to clear the record to reuse it with
333 /// the CSV reader.
334 ///
335 /// # Example
336 ///
337 /// ```
338 /// use csv::ByteRecord;
339 ///
340 /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
341 /// assert_eq!(record.len(), 3);
342 /// record.clear();
343 /// assert_eq!(record.len(), 0);
344 /// ```
345 #[inline]
346 pub fn clear(&mut self) {
347 self.truncate(0);
348 }
349
350 /// Trim the fields of this record so that leading and trailing whitespace
351 /// is removed.
352 ///
353 /// This method uses the ASCII definition of whitespace. That is, only
354 /// bytes in the class `[\t\n\v\f\r ]` are trimmed.
355 ///
356 /// # Example
357 ///
358 /// ```
359 /// use csv::ByteRecord;
360 ///
361 /// let mut record = ByteRecord::from(vec![
362 /// " ", "\tfoo", "bar ", "b a z",
363 /// ]);
364 /// record.trim();
365 /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
366 /// ```
367 pub fn trim(&mut self) {
368 let length = self.len();
369 if length == 0 {
370 return;
371 }
372 // TODO: We could likely do this in place, but for now, we allocate.
373 let mut trimmed =
374 ByteRecord::with_capacity(self.as_slice().len(), self.len());
375 trimmed.set_position(self.position().cloned());
376 for field in self.iter() {
377 trimmed.push_field(trim_ascii(field));
378 }
379 *self = trimmed;
380 }
381
382 /// Add a new field to this record.
383 ///
384 /// # Example
385 ///
386 /// ```
387 /// use csv::ByteRecord;
388 ///
389 /// let mut record = ByteRecord::new();
390 /// record.push_field(b"foo");
391 /// assert_eq!(&record[0], b"foo");
392 /// ```
393 #[inline]
394 pub fn push_field(&mut self, field: &[u8]) {
395 let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len());
396 while e > self.0.fields.len() {
397 self.expand_fields();
398 }
399 self.0.fields[s..e].copy_from_slice(field);
400 self.0.bounds.add(e);
401 }
402
403 /// Return the position of this record, if available.
404 ///
405 /// # Example
406 ///
407 /// ```
408 /// use std::error::Error;
409 ///
410 /// use csv::{ByteRecord, ReaderBuilder};
411 ///
412 /// # fn main() { example().unwrap(); }
413 /// fn example() -> Result<(), Box<dyn Error>> {
414 /// let mut record = ByteRecord::new();
415 /// let mut rdr = ReaderBuilder::new()
416 /// .has_headers(false)
417 /// .from_reader("a,b,c\nx,y,z".as_bytes());
418 ///
419 /// assert!(rdr.read_byte_record(&mut record)?);
420 /// {
421 /// let pos = record.position().expect("a record position");
422 /// assert_eq!(pos.byte(), 0);
423 /// assert_eq!(pos.line(), 1);
424 /// assert_eq!(pos.record(), 0);
425 /// }
426 ///
427 /// assert!(rdr.read_byte_record(&mut record)?);
428 /// {
429 /// let pos = record.position().expect("a record position");
430 /// assert_eq!(pos.byte(), 6);
431 /// assert_eq!(pos.line(), 2);
432 /// assert_eq!(pos.record(), 1);
433 /// }
434 ///
435 /// // Finish the CSV reader for good measure.
436 /// assert!(!rdr.read_byte_record(&mut record)?);
437 /// Ok(())
438 /// }
439 /// ```
440 #[inline]
441 pub fn position(&self) -> Option<&Position> {
442 self.0.pos.as_ref()
443 }
444
445 /// Set the position of this record.
446 ///
447 /// # Example
448 ///
449 /// ```
450 /// use csv::{ByteRecord, Position};
451 ///
452 /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
453 /// let mut pos = Position::new();
454 /// pos.set_byte(100);
455 /// pos.set_line(4);
456 /// pos.set_record(2);
457 ///
458 /// record.set_position(Some(pos.clone()));
459 /// assert_eq!(record.position(), Some(&pos));
460 /// ```
461 #[inline]
462 pub fn set_position(&mut self, pos: Option<Position>) {
463 self.0.pos = pos;
464 }
465
466 /// Return the start and end position of a field in this record.
467 ///
468 /// If no such field exists at the given index, then return `None`.
469 ///
470 /// The range returned can be used with the slice returned by `as_slice`.
471 ///
472 /// # Example
473 ///
474 /// ```
475 /// use csv::ByteRecord;
476 ///
477 /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
478 /// let range = record.range(1).expect("a record range");
479 /// assert_eq!(&record.as_slice()[range], &b"quux"[..]);
480 /// ```
481 #[inline]
482 pub fn range(&self, i: usize) -> Option<Range<usize>> {
483 self.0.bounds.get(i)
484 }
485
486 /// Return the entire row as a single byte slice. The slice returned stores
487 /// all fields contiguously. The boundaries of each field can be determined
488 /// via the `range` method.
489 ///
490 /// # Example
491 ///
492 /// ```
493 /// use csv::ByteRecord;
494 ///
495 /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
496 /// assert_eq!(record.as_slice(), &b"fooquuxz"[..]);
497 /// ```
498 #[inline]
499 pub fn as_slice(&self) -> &[u8] {
500 &self.0.fields[..self.0.bounds.end()]
501 }
502
503 /// Clone this record, but only copy `fields` up to the end of bounds. This
504 /// is useful when one wants to copy a record, but not necessarily any
505 /// excess capacity in that record.
506 #[inline]
507 pub(crate) fn clone_truncated(&self) -> ByteRecord {
508 let mut br = ByteRecord::new();
509 br.0.pos = self.0.pos.clone();
510 br.0.bounds = self.0.bounds.clone();
511 br.0.fields = self.0.fields[..self.0.bounds.end()].to_vec();
512 br
513 }
514
515 /// Retrieve the underlying parts of a byte record.
516 #[inline]
517 pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) {
518 let inner = &mut *self.0;
519 (&mut inner.fields, &mut inner.bounds.ends)
520 }
521
522 /// Set the number of fields in the given record record.
523 #[inline]
524 pub(crate) fn set_len(&mut self, len: usize) {
525 self.0.bounds.len = len;
526 }
527
528 /// Expand the capacity for storing fields.
529 #[inline]
530 pub(crate) fn expand_fields(&mut self) {
531 let new_len = self.0.fields.len().checked_mul(2).unwrap();
532 self.0.fields.resize(cmp::max(4, new_len), 0);
533 }
534
535 /// Expand the capacity for storing field ending positions.
536 #[inline]
537 pub(crate) fn expand_ends(&mut self) {
538 self.0.bounds.expand();
539 }
540
541 /// Validate the given record as UTF-8.
542 ///
543 /// If it's not UTF-8, return an error.
544 #[inline]
545 pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> {
546 // If the entire buffer is ASCII, then we have nothing to fear.
547 if self.0.fields[..self.0.bounds.end()].is_ascii() {
548 return Ok(());
549 }
550 // Otherwise, we must check each field individually to ensure that
551 // it's valid UTF-8.
552 for (i, field) in self.iter().enumerate() {
553 if let Err(err) = std::str::from_utf8(field) {
554 return Err(new_utf8_error(i, err.valid_up_to()));
555 }
556 }
557 Ok(())
558 }
559
560 /// Compare the given byte record with the iterator of fields for equality.
561 pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool
562 where
563 I: IntoIterator<Item = T>,
564 T: AsRef<[u8]>,
565 {
566 let mut it_record = self.iter();
567 let mut it_other = other.into_iter();
568 loop {
569 match (it_record.next(), it_other.next()) {
570 (None, None) => return true,
571 (None, Some(_)) | (Some(_), None) => return false,
572 (Some(x), Some(y)) => {
573 if x != y.as_ref() {
574 return false;
575 }
576 }
577 }
578 }
579 }
580}
581
582/// A position in CSV data.
583///
584/// A position is used to report errors in CSV data. All positions include the
585/// byte offset, line number and record index at which the error occurred.
586///
587/// Byte offsets and record indices start at `0`. Line numbers start at `1`.
588///
589/// A CSV reader will automatically assign the position of each record.
590#[derive(Clone, Debug, Eq, PartialEq)]
591pub struct Position {
592 byte: u64,
593 line: u64,
594 record: u64,
595}
596
597impl Position {
598 /// Returns a new position initialized to the start value.
599 #[inline]
600 pub fn new() -> Position {
601 Position { byte: 0, line: 1, record: 0 }
602 }
603
604 /// The byte offset, starting at `0`, of this position.
605 #[inline]
606 pub fn byte(&self) -> u64 {
607 self.byte
608 }
609 /// The line number, starting at `1`, of this position.
610 #[inline]
611 pub fn line(&self) -> u64 {
612 self.line
613 }
614 /// The record index, starting with the first record at `0`.
615 #[inline]
616 pub fn record(&self) -> u64 {
617 self.record
618 }
619
620 /// Set the byte offset of this position.
621 #[inline]
622 pub fn set_byte(&mut self, byte: u64) -> &mut Position {
623 self.byte = byte;
624 self
625 }
626
627 /// Set the line number of this position.
628 ///
629 /// If the line number is less than `1`, then this method panics.
630 #[inline]
631 pub fn set_line(&mut self, line: u64) -> &mut Position {
632 assert!(line > 0);
633 self.line = line;
634 self
635 }
636
637 /// Set the record index of this position.
638 #[inline]
639 pub fn set_record(&mut self, record: u64) -> &mut Position {
640 self.record = record;
641 self
642 }
643}
644
645/// The bounds of fields in a single record.
646#[derive(Clone, Debug, Eq, PartialEq)]
647struct Bounds {
648 /// The ending index of each field.
649 ends: Vec<usize>,
650 /// The number of fields in this record.
651 ///
652 /// Technically, we could drop this field and maintain an invariant that
653 /// `ends.len()` is always the number of fields, but doing that efficiently
654 /// requires attention to safety. We play it safe at essentially no cost.
655 len: usize,
656}
657
658impl Default for Bounds {
659 #[inline]
660 fn default() -> Bounds {
661 Bounds::with_capacity(0)
662 }
663}
664
665impl Bounds {
666 /// Create a new set of bounds with the given capacity for storing the
667 /// ends of fields.
668 #[inline]
669 fn with_capacity(capacity: usize) -> Bounds {
670 Bounds { ends: vec![0; capacity], len: 0 }
671 }
672
673 /// Returns the bounds of field `i`.
674 #[inline]
675 fn get(&self, i: usize) -> Option<Range<usize>> {
676 if i >= self.len {
677 return None;
678 }
679 let end = match self.ends.get(i) {
680 None => return None,
681 Some(&end) => end,
682 };
683 let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) {
684 None => 0,
685 Some(&start) => start,
686 };
687 Some(ops::Range { start, end })
688 }
689
690 /// Returns a slice of ending positions of all fields.
691 #[inline]
692 fn ends(&self) -> &[usize] {
693 &self.ends[..self.len]
694 }
695
696 /// Return the last position of the last field.
697 ///
698 /// If there are no fields, this returns `0`.
699 #[inline]
700 fn end(&self) -> usize {
701 self.ends().last().map(|&i| i).unwrap_or(0)
702 }
703
704 /// Returns the number of fields in these bounds.
705 #[inline]
706 fn len(&self) -> usize {
707 self.len
708 }
709
710 /// Expand the capacity for storing field ending positions.
711 #[inline]
712 fn expand(&mut self) {
713 let new_len = self.ends.len().checked_mul(2).unwrap();
714 self.ends.resize(cmp::max(4, new_len), 0);
715 }
716
717 /// Add a new field with the given ending position.
718 #[inline]
719 fn add(&mut self, pos: usize) {
720 if self.len >= self.ends.len() {
721 self.expand();
722 }
723 self.ends[self.len] = pos;
724 self.len += 1;
725 }
726}
727
728impl ops::Index<usize> for ByteRecord {
729 type Output = [u8];
730 #[inline]
731 fn index(&self, i: usize) -> &[u8] {
732 self.get(i).unwrap()
733 }
734}
735
736impl From<StringRecord> for ByteRecord {
737 #[inline]
738 fn from(record: StringRecord) -> ByteRecord {
739 record.into_byte_record()
740 }
741}
742
743impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord {
744 #[inline]
745 fn from(xs: Vec<T>) -> ByteRecord {
746 ByteRecord::from_iter(&xs)
747 }
748}
749
750impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord {
751 #[inline]
752 fn from(xs: &'a [T]) -> ByteRecord {
753 ByteRecord::from_iter(xs)
754 }
755}
756
757impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
758 #[inline]
759 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord {
760 let mut record = ByteRecord::new();
761 record.extend(iter);
762 record
763 }
764}
765
766impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
767 #[inline]
768 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
769 for x in iter {
770 self.push_field(x.as_ref());
771 }
772 }
773}
774
775/// A double-ended iterator over the fields in a byte record.
776///
777/// The `'r` lifetime variable refers to the lifetime of the `ByteRecord` that
778/// is being iterated over.
779#[derive(Clone)]
780pub struct ByteRecordIter<'r> {
781 /// The record we are iterating over.
782 r: &'r ByteRecord,
783 /// The starting index of the previous field. (For reverse iteration.)
784 last_start: usize,
785 /// The ending index of the previous field. (For forward iteration.)
786 last_end: usize,
787 /// The index of forward iteration.
788 i_forward: usize,
789 /// The index of reverse iteration.
790 i_reverse: usize,
791}
792
793impl<'r> IntoIterator for &'r ByteRecord {
794 type IntoIter = ByteRecordIter<'r>;
795 type Item = &'r [u8];
796
797 #[inline]
798 fn into_iter(self) -> ByteRecordIter<'r> {
799 ByteRecordIter {
800 r: self,
801 last_start: self.as_slice().len(),
802 last_end: 0,
803 i_forward: 0,
804 i_reverse: self.len(),
805 }
806 }
807}
808
809impl<'r> ExactSizeIterator for ByteRecordIter<'r> {}
810
811impl<'r> Iterator for ByteRecordIter<'r> {
812 type Item = &'r [u8];
813
814 #[inline]
815 fn next(&mut self) -> Option<&'r [u8]> {
816 if self.i_forward == self.i_reverse {
817 None
818 } else {
819 let start = self.last_end;
820 let end = self.r.0.bounds.ends()[self.i_forward];
821 self.i_forward += 1;
822 self.last_end = end;
823 Some(&self.r.0.fields[start..end])
824 }
825 }
826
827 #[inline]
828 fn size_hint(&self) -> (usize, Option<usize>) {
829 let x = self.i_reverse - self.i_forward;
830 (x, Some(x))
831 }
832
833 #[inline]
834 fn count(self) -> usize {
835 self.len()
836 }
837}
838
839impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
840 #[inline]
841 fn next_back(&mut self) -> Option<&'r [u8]> {
842 if self.i_forward == self.i_reverse {
843 None
844 } else {
845 self.i_reverse -= 1;
846 let start = self
847 .i_reverse
848 .checked_sub(1)
849 .map(|i| self.r.0.bounds.ends()[i])
850 .unwrap_or(0);
851 let end = self.last_start;
852 self.last_start = start;
853 Some(&self.r.0.fields[start..end])
854 }
855 }
856}
857
858fn trim_ascii(bytes: &[u8]) -> &[u8] {
859 trim_ascii_start(trim_ascii_end(bytes))
860}
861
862fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
863 while let [first, rest @ ..] = bytes {
864 if first.is_ascii_whitespace() {
865 bytes = rest;
866 } else {
867 break;
868 }
869 }
870 bytes
871}
872
873fn trim_ascii_end(mut bytes: &[u8]) -> &[u8] {
874 while let [rest @ .., last] = bytes {
875 if last.is_ascii_whitespace() {
876 bytes = rest;
877 } else {
878 break;
879 }
880 }
881 bytes
882}
883
884#[cfg(test)]
885mod tests {
886 use crate::string_record::StringRecord;
887
888 use super::ByteRecord;
889
890 fn b(s: &str) -> &[u8] {
891 s.as_bytes()
892 }
893
894 #[test]
895 fn record_1() {
896 let mut rec = ByteRecord::new();
897 rec.push_field(b"foo");
898
899 assert_eq!(rec.len(), 1);
900 assert_eq!(rec.get(0), Some(b("foo")));
901 assert_eq!(rec.get(1), None);
902 assert_eq!(rec.get(2), None);
903 }
904
905 #[test]
906 fn record_2() {
907 let mut rec = ByteRecord::new();
908 rec.push_field(b"foo");
909 rec.push_field(b"quux");
910
911 assert_eq!(rec.len(), 2);
912 assert_eq!(rec.get(0), Some(b("foo")));
913 assert_eq!(rec.get(1), Some(b("quux")));
914 assert_eq!(rec.get(2), None);
915 assert_eq!(rec.get(3), None);
916 }
917
918 #[test]
919 fn empty_record() {
920 let rec = ByteRecord::new();
921
922 assert_eq!(rec.len(), 0);
923 assert_eq!(rec.get(0), None);
924 assert_eq!(rec.get(1), None);
925 }
926
927 #[test]
928 fn trim_whitespace_only() {
929 let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]);
930 rec.trim();
931 assert_eq!(rec.get(0), Some(b("")));
932 }
933
934 #[test]
935 fn trim_front() {
936 let mut rec = ByteRecord::from(vec![b" abc"]);
937 rec.trim();
938 assert_eq!(rec.get(0), Some(b("abc")));
939
940 let mut rec = ByteRecord::from(vec![b(" abc"), b(" xyz")]);
941 rec.trim();
942 assert_eq!(rec.get(0), Some(b("abc")));
943 assert_eq!(rec.get(1), Some(b("xyz")));
944 }
945
946 #[test]
947 fn trim_back() {
948 let mut rec = ByteRecord::from(vec![b"abc "]);
949 rec.trim();
950 assert_eq!(rec.get(0), Some(b("abc")));
951
952 let mut rec = ByteRecord::from(vec![b("abc "), b("xyz ")]);
953 rec.trim();
954 assert_eq!(rec.get(0), Some(b("abc")));
955 assert_eq!(rec.get(1), Some(b("xyz")));
956 }
957
958 #[test]
959 fn trim_both() {
960 let mut rec = ByteRecord::from(vec![b" abc "]);
961 rec.trim();
962 assert_eq!(rec.get(0), Some(b("abc")));
963
964 let mut rec = ByteRecord::from(vec![b(" abc "), b(" xyz ")]);
965 rec.trim();
966 assert_eq!(rec.get(0), Some(b("abc")));
967 assert_eq!(rec.get(1), Some(b("xyz")));
968 }
969
970 #[test]
971 fn trim_does_not_panic_on_empty_records_1() {
972 let mut rec = ByteRecord::from(vec![b""]);
973 rec.trim();
974 assert_eq!(rec.get(0), Some(b("")));
975 }
976
977 #[test]
978 fn trim_does_not_panic_on_empty_records_2() {
979 let mut rec = ByteRecord::from(vec![b"", b""]);
980 rec.trim();
981 assert_eq!(rec.get(0), Some(b("")));
982 assert_eq!(rec.get(1), Some(b("")));
983 }
984
985 #[test]
986 fn trim_does_not_panic_on_empty_records_3() {
987 let mut rec = ByteRecord::new();
988 rec.trim();
989 assert_eq!(rec.as_slice().len(), 0);
990 }
991
992 #[test]
993 fn empty_field_1() {
994 let mut rec = ByteRecord::new();
995 rec.push_field(b"");
996
997 assert_eq!(rec.len(), 1);
998 assert_eq!(rec.get(0), Some(b("")));
999 assert_eq!(rec.get(1), None);
1000 assert_eq!(rec.get(2), None);
1001 }
1002
1003 #[test]
1004 fn empty_field_2() {
1005 let mut rec = ByteRecord::new();
1006 rec.push_field(b"");
1007 rec.push_field(b"");
1008
1009 assert_eq!(rec.len(), 2);
1010 assert_eq!(rec.get(0), Some(b("")));
1011 assert_eq!(rec.get(1), Some(b("")));
1012 assert_eq!(rec.get(2), None);
1013 assert_eq!(rec.get(3), None);
1014 }
1015
1016 #[test]
1017 fn empty_surround_1() {
1018 let mut rec = ByteRecord::new();
1019 rec.push_field(b"foo");
1020 rec.push_field(b"");
1021 rec.push_field(b"quux");
1022
1023 assert_eq!(rec.len(), 3);
1024 assert_eq!(rec.get(0), Some(b("foo")));
1025 assert_eq!(rec.get(1), Some(b("")));
1026 assert_eq!(rec.get(2), Some(b("quux")));
1027 assert_eq!(rec.get(3), None);
1028 assert_eq!(rec.get(4), None);
1029 }
1030
1031 #[test]
1032 fn empty_surround_2() {
1033 let mut rec = ByteRecord::new();
1034 rec.push_field(b"foo");
1035 rec.push_field(b"");
1036 rec.push_field(b"quux");
1037 rec.push_field(b"");
1038
1039 assert_eq!(rec.len(), 4);
1040 assert_eq!(rec.get(0), Some(b("foo")));
1041 assert_eq!(rec.get(1), Some(b("")));
1042 assert_eq!(rec.get(2), Some(b("quux")));
1043 assert_eq!(rec.get(3), Some(b("")));
1044 assert_eq!(rec.get(4), None);
1045 assert_eq!(rec.get(5), None);
1046 }
1047
1048 #[test]
1049 fn utf8_error_1() {
1050 let mut rec = ByteRecord::new();
1051 rec.push_field(b"foo");
1052 rec.push_field(b"b\xFFar");
1053
1054 let err = StringRecord::from_byte_record(rec).unwrap_err();
1055 assert_eq!(err.utf8_error().field(), 1);
1056 assert_eq!(err.utf8_error().valid_up_to(), 1);
1057 }
1058
1059 #[test]
1060 fn utf8_error_2() {
1061 let mut rec = ByteRecord::new();
1062 rec.push_field(b"\xFF");
1063
1064 let err = StringRecord::from_byte_record(rec).unwrap_err();
1065 assert_eq!(err.utf8_error().field(), 0);
1066 assert_eq!(err.utf8_error().valid_up_to(), 0);
1067 }
1068
1069 #[test]
1070 fn utf8_error_3() {
1071 let mut rec = ByteRecord::new();
1072 rec.push_field(b"a\xFF");
1073
1074 let err = StringRecord::from_byte_record(rec).unwrap_err();
1075 assert_eq!(err.utf8_error().field(), 0);
1076 assert_eq!(err.utf8_error().valid_up_to(), 1);
1077 }
1078
1079 #[test]
1080 fn utf8_error_4() {
1081 let mut rec = ByteRecord::new();
1082 rec.push_field(b"a");
1083 rec.push_field(b"b");
1084 rec.push_field(b"c");
1085 rec.push_field(b"d");
1086 rec.push_field(b"xyz\xFF");
1087
1088 let err = StringRecord::from_byte_record(rec).unwrap_err();
1089 assert_eq!(err.utf8_error().field(), 4);
1090 assert_eq!(err.utf8_error().valid_up_to(), 3);
1091 }
1092
1093 #[test]
1094 fn utf8_error_5() {
1095 let mut rec = ByteRecord::new();
1096 rec.push_field(b"a");
1097 rec.push_field(b"b");
1098 rec.push_field(b"c");
1099 rec.push_field(b"d");
1100 rec.push_field(b"\xFFxyz");
1101
1102 let err = StringRecord::from_byte_record(rec).unwrap_err();
1103 assert_eq!(err.utf8_error().field(), 4);
1104 assert_eq!(err.utf8_error().valid_up_to(), 0);
1105 }
1106
1107 // This tests a tricky case where a single field on its own isn't valid
1108 // UTF-8, but the concatenation of all fields is.
1109 #[test]
1110 fn utf8_error_6() {
1111 let mut rec = ByteRecord::new();
1112 rec.push_field(b"a\xc9");
1113 rec.push_field(b"\x91b");
1114
1115 let err = StringRecord::from_byte_record(rec).unwrap_err();
1116 assert_eq!(err.utf8_error().field(), 0);
1117 assert_eq!(err.utf8_error().valid_up_to(), 1);
1118 }
1119
1120 // This tests that we can always clear a `ByteRecord` and get a guaranteed
1121 // successful conversion to UTF-8. This permits reusing the allocation.
1122 #[test]
1123 fn utf8_clear_ok() {
1124 let mut rec = ByteRecord::new();
1125 rec.push_field(b"\xFF");
1126 assert!(StringRecord::from_byte_record(rec).is_err());
1127
1128 let mut rec = ByteRecord::new();
1129 rec.push_field(b"\xFF");
1130 rec.clear();
1131 assert!(StringRecord::from_byte_record(rec).is_ok());
1132 }
1133
1134 #[test]
1135 fn iter() {
1136 let data = vec!["foo", "bar", "baz", "quux", "wat"];
1137 let rec = ByteRecord::from(&*data);
1138 let got: Vec<&str> =
1139 rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect();
1140 assert_eq!(data, got);
1141 }
1142
1143 #[test]
1144 fn iter_reverse() {
1145 let mut data = vec!["foo", "bar", "baz", "quux", "wat"];
1146 let rec = ByteRecord::from(&*data);
1147 let got: Vec<&str> = rec
1148 .iter()
1149 .rev()
1150 .map(|x| ::std::str::from_utf8(x).unwrap())
1151 .collect();
1152 data.reverse();
1153 assert_eq!(data, got);
1154 }
1155
1156 #[test]
1157 fn iter_forward_and_reverse() {
1158 let data = vec!["foo", "bar", "baz", "quux", "wat"];
1159 let rec = ByteRecord::from(data);
1160 let mut it = rec.iter();
1161
1162 assert_eq!(it.next_back(), Some(b("wat")));
1163 assert_eq!(it.next(), Some(b("foo")));
1164 assert_eq!(it.next(), Some(b("bar")));
1165 assert_eq!(it.next_back(), Some(b("quux")));
1166 assert_eq!(it.next(), Some(b("baz")));
1167 assert_eq!(it.next_back(), None);
1168 assert_eq!(it.next(), None);
1169 }
1170
1171 // Check that record equality respects field boundaries.
1172 //
1173 // Regression test for #138.
1174 #[test]
1175 fn eq_field_boundaries() {
1176 let test1 = ByteRecord::from(vec!["12", "34"]);
1177 let test2 = ByteRecord::from(vec!["123", "4"]);
1178
1179 assert_ne!(test1, test2);
1180 }
1181
1182 // Check that record equality respects number of fields.
1183 //
1184 // Regression test for #138.
1185 #[test]
1186 fn eq_record_len() {
1187 let test1 = ByteRecord::from(vec!["12", "34", "56"]);
1188 let test2 = ByteRecord::from(vec!["12", "34"]);
1189 assert_ne!(test1, test2);
1190 }
1191}