csv/
reader.rs

Help
1use std::{
2    fs::File,
3    io::{self, BufRead, Seek},
4    marker::PhantomData,
5    path::Path,
6    result,
7};
8
9use {
10    csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder},
11    serde::de::DeserializeOwned,
12};
13
14use crate::{
15    byte_record::{ByteRecord, Position},
16    error::{Error, ErrorKind, Result, Utf8Error},
17    string_record::StringRecord,
18    {Terminator, Trim},
19};
20
21/// Builds a CSV reader with various configuration knobs.
22///
23/// This builder can be used to tweak the field delimiter, record terminator
24/// and more. Once a CSV `Reader` is built, its configuration cannot be
25/// changed.
26#[derive(Debug)]
27pub struct ReaderBuilder {
28    capacity: usize,
29    flexible: bool,
30    has_headers: bool,
31    trim: Trim,
32    /// The underlying CSV parser builder.
33    ///
34    /// We explicitly put this on the heap because CoreReaderBuilder embeds an
35    /// entire DFA transition table, which along with other things, tallies up
36    /// to almost 500 bytes on the stack.
37    builder: Box<CoreReaderBuilder>,
38}
39
40impl Default for ReaderBuilder {
41    fn default() -> ReaderBuilder {
42        ReaderBuilder {
43            capacity: 8 * (1 << 10),
44            flexible: false,
45            has_headers: true,
46            trim: Trim::default(),
47            builder: Box::new(CoreReaderBuilder::default()),
48        }
49    }
50}
51
52impl ReaderBuilder {
53    /// Create a new builder for configuring CSV parsing.
54    ///
55    /// To convert a builder into a reader, call one of the methods starting
56    /// with `from_`.
57    ///
58    /// # Example
59    ///
60    /// ```
61    /// use std::error::Error;
62    /// use csv::{ReaderBuilder, StringRecord};
63    ///
64    /// # fn main() { example().unwrap(); }
65    /// fn example() -> Result<(), Box<dyn Error>> {
66    ///     let data = "\
67    /// city,country,pop
68    /// Boston,United States,4628910
69    /// Concord,United States,42695
70    /// ";
71    ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
72    ///
73    ///     let records = rdr
74    ///         .records()
75    ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
76    ///     assert_eq!(records, vec![
77    ///         vec!["Boston", "United States", "4628910"],
78    ///         vec!["Concord", "United States", "42695"],
79    ///     ]);
80    ///     Ok(())
81    /// }
82    /// ```
83    pub fn new() -> ReaderBuilder {
84        ReaderBuilder::default()
85    }
86
87    /// Build a CSV parser from this configuration that reads data from the
88    /// given file path.
89    ///
90    /// If there was a problem opening the file at the given path, then this
91    /// returns the corresponding error.
92    ///
93    /// # Example
94    ///
95    /// ```no_run
96    /// use std::error::Error;
97    /// use csv::ReaderBuilder;
98    ///
99    /// # fn main() { example().unwrap(); }
100    /// fn example() -> Result<(), Box<dyn Error>> {
101    ///     let mut rdr = ReaderBuilder::new().from_path("foo.csv")?;
102    ///     for result in rdr.records() {
103    ///         let record = result?;
104    ///         println!("{:?}", record);
105    ///     }
106    ///     Ok(())
107    /// }
108    /// ```
109    pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
110        Ok(Reader::new(self, File::open(path)?))
111    }
112
113    /// Build a CSV parser from this configuration that reads data from `rdr`.
114    ///
115    /// Note that the CSV reader is buffered automatically, so you should not
116    /// wrap `rdr` in a buffered reader like `io::BufReader`.
117    ///
118    /// # Example
119    ///
120    /// ```
121    /// use std::error::Error;
122    /// use csv::ReaderBuilder;
123    ///
124    /// # fn main() { example().unwrap(); }
125    /// fn example() -> Result<(), Box<dyn Error>> {
126    ///     let data = "\
127    /// city,country,pop
128    /// Boston,United States,4628910
129    /// Concord,United States,42695
130    /// ";
131    ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
132    ///     for result in rdr.records() {
133    ///         let record = result?;
134    ///         println!("{:?}", record);
135    ///     }
136    ///     Ok(())
137    /// }
138    /// ```
139    pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> {
140        Reader::new(self, rdr)
141    }
142
143    /// The field delimiter to use when parsing CSV.
144    ///
145    /// The default is `b','`.
146    ///
147    /// # Example
148    ///
149    /// ```
150    /// use std::error::Error;
151    /// use csv::ReaderBuilder;
152    ///
153    /// # fn main() { example().unwrap(); }
154    /// fn example() -> Result<(), Box<dyn Error>> {
155    ///     let data = "\
156    /// city;country;pop
157    /// Boston;United States;4628910
158    /// ";
159    ///     let mut rdr = ReaderBuilder::new()
160    ///         .delimiter(b';')
161    ///         .from_reader(data.as_bytes());
162    ///
163    ///     if let Some(result) = rdr.records().next() {
164    ///         let record = result?;
165    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
166    ///         Ok(())
167    ///     } else {
168    ///         Err(From::from("expected at least one record but got none"))
169    ///     }
170    /// }
171    /// ```
172    pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder {
173        self.builder.delimiter(delimiter);
174        self
175    }
176
177    /// Whether to treat the first row as a special header row.
178    ///
179    /// By default, the first row is treated as a special header row, which
180    /// means the header is never returned by any of the record reading methods
181    /// or iterators. When this is disabled (`yes` set to `false`), the first
182    /// row is not treated specially.
183    ///
184    /// Note that the `headers` and `byte_headers` methods are unaffected by
185    /// whether this is set. Those methods always return the first record.
186    ///
187    /// # Example
188    ///
189    /// This example shows what happens when `has_headers` is disabled.
190    /// Namely, the first row is treated just like any other row.
191    ///
192    /// ```
193    /// use std::error::Error;
194    /// use csv::ReaderBuilder;
195    ///
196    /// # fn main() { example().unwrap(); }
197    /// fn example() -> Result<(), Box<dyn Error>> {
198    ///     let data = "\
199    /// city,country,pop
200    /// Boston,United States,4628910
201    /// ";
202    ///     let mut rdr = ReaderBuilder::new()
203    ///         .has_headers(false)
204    ///         .from_reader(data.as_bytes());
205    ///     let mut iter = rdr.records();
206    ///
207    ///     // Read the first record.
208    ///     if let Some(result) = iter.next() {
209    ///         let record = result?;
210    ///         assert_eq!(record, vec!["city", "country", "pop"]);
211    ///     } else {
212    ///         return Err(From::from(
213    ///             "expected at least two records but got none"));
214    ///     }
215    ///
216    ///     // Read the second record.
217    ///     if let Some(result) = iter.next() {
218    ///         let record = result?;
219    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
220    ///     } else {
221    ///         return Err(From::from(
222    ///             "expected at least two records but got one"))
223    ///     }
224    ///     Ok(())
225    /// }
226    /// ```
227    pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder {
228        self.has_headers = yes;
229        self
230    }
231
232    /// Whether the number of fields in records is allowed to change or not.
233    ///
234    /// When disabled (which is the default), parsing CSV data will return an
235    /// error if a record is found with a number of fields different from the
236    /// number of fields in a previous record.
237    ///
238    /// When enabled, this error checking is turned off.
239    ///
240    /// # Example: flexible records enabled
241    ///
242    /// ```
243    /// use std::error::Error;
244    /// use csv::ReaderBuilder;
245    ///
246    /// # fn main() { example().unwrap(); }
247    /// fn example() -> Result<(), Box<dyn Error>> {
248    ///     // Notice that the first row is missing the population count.
249    ///     let data = "\
250    /// city,country,pop
251    /// Boston,United States
252    /// ";
253    ///     let mut rdr = ReaderBuilder::new()
254    ///         .flexible(true)
255    ///         .from_reader(data.as_bytes());
256    ///
257    ///     if let Some(result) = rdr.records().next() {
258    ///         let record = result?;
259    ///         assert_eq!(record, vec!["Boston", "United States"]);
260    ///         Ok(())
261    ///     } else {
262    ///         Err(From::from("expected at least one record but got none"))
263    ///     }
264    /// }
265    /// ```
266    ///
267    /// # Example: flexible records disabled
268    ///
269    /// This shows the error that appears when records of unequal length
270    /// are found and flexible records have been disabled (which is the
271    /// default).
272    ///
273    /// ```
274    /// use std::error::Error;
275    /// use csv::{ErrorKind, ReaderBuilder};
276    ///
277    /// # fn main() { example().unwrap(); }
278    /// fn example() -> Result<(), Box<dyn Error>> {
279    ///     // Notice that the first row is missing the population count.
280    ///     let data = "\
281    /// city,country,pop
282    /// Boston,United States
283    /// ";
284    ///     let mut rdr = ReaderBuilder::new()
285    ///         .flexible(false)
286    ///         .from_reader(data.as_bytes());
287    ///
288    ///     if let Some(Err(err)) = rdr.records().next() {
289    ///         match *err.kind() {
290    ///             ErrorKind::UnequalLengths { expected_len, len, .. } => {
291    ///                 // The header row has 3 fields...
292    ///                 assert_eq!(expected_len, 3);
293    ///                 // ... but the first row has only 2 fields.
294    ///                 assert_eq!(len, 2);
295    ///                 Ok(())
296    ///             }
297    ///             ref wrong => {
298    ///                 Err(From::from(format!(
299    ///                     "expected UnequalLengths error but got {:?}",
300    ///                     wrong)))
301    ///             }
302    ///         }
303    ///     } else {
304    ///         Err(From::from(
305    ///             "expected at least one errored record but got none"))
306    ///     }
307    /// }
308    /// ```
309    pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder {
310        self.flexible = yes;
311        self
312    }
313
314    /// Whether fields are trimmed of leading and trailing whitespace or not.
315    ///
316    /// By default, no trimming is performed. This method permits one to
317    /// override that behavior and choose one of the following options:
318    ///
319    /// 1. `Trim::Headers` trims only header values.
320    /// 2. `Trim::Fields` trims only non-header or "field" values.
321    /// 3. `Trim::All` trims both header and non-header values.
322    ///
323    /// A value is only interpreted as a header value if this CSV reader is
324    /// configured to read a header record (which is the default).
325    ///
326    /// When reading string records, characters meeting the definition of
327    /// Unicode whitespace are trimmed. When reading byte records, characters
328    /// meeting the definition of ASCII whitespace are trimmed. ASCII
329    /// whitespace characters correspond to the set `[\t\n\v\f\r ]`.
330    ///
331    /// # Example
332    ///
333    /// This example shows what happens when all values are trimmed.
334    ///
335    /// ```
336    /// use std::error::Error;
337    /// use csv::{ReaderBuilder, StringRecord, Trim};
338    ///
339    /// # fn main() { example().unwrap(); }
340    /// fn example() -> Result<(), Box<dyn Error>> {
341    ///     let data = "\
342    /// city ,   country ,  pop
343    /// Boston,\"
344    ///    United States\",4628910
345    /// Concord,   United States   ,42695
346    /// ";
347    ///     let mut rdr = ReaderBuilder::new()
348    ///         .trim(Trim::All)
349    ///         .from_reader(data.as_bytes());
350    ///     let records = rdr
351    ///         .records()
352    ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
353    ///     assert_eq!(records, vec![
354    ///         vec!["Boston", "United States", "4628910"],
355    ///         vec!["Concord", "United States", "42695"],
356    ///     ]);
357    ///     Ok(())
358    /// }
359    /// ```
360    pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder {
361        self.trim = trim;
362        self
363    }
364
365    /// The record terminator to use when parsing CSV.
366    ///
367    /// A record terminator can be any single byte. The default is a special
368    /// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n`
369    /// or `\r\n` as a single record terminator.
370    ///
371    /// # Example: `$` as a record terminator
372    ///
373    /// ```
374    /// use std::error::Error;
375    /// use csv::{ReaderBuilder, Terminator};
376    ///
377    /// # fn main() { example().unwrap(); }
378    /// fn example() -> Result<(), Box<dyn Error>> {
379    ///     let data = "city,country,pop$Boston,United States,4628910";
380    ///     let mut rdr = ReaderBuilder::new()
381    ///         .terminator(Terminator::Any(b'$'))
382    ///         .from_reader(data.as_bytes());
383    ///
384    ///     if let Some(result) = rdr.records().next() {
385    ///         let record = result?;
386    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
387    ///         Ok(())
388    ///     } else {
389    ///         Err(From::from("expected at least one record but got none"))
390    ///     }
391    /// }
392    /// ```
393    pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder {
394        self.builder.terminator(term.to_core());
395        self
396    }
397
398    /// The quote character to use when parsing CSV.
399    ///
400    /// The default is `b'"'`.
401    ///
402    /// # Example: single quotes instead of double quotes
403    ///
404    /// ```
405    /// use std::error::Error;
406    /// use csv::ReaderBuilder;
407    ///
408    /// # fn main() { example().unwrap(); }
409    /// fn example() -> Result<(), Box<dyn Error>> {
410    ///     let data = "\
411    /// city,country,pop
412    /// Boston,'United States',4628910
413    /// ";
414    ///     let mut rdr = ReaderBuilder::new()
415    ///         .quote(b'\'')
416    ///         .from_reader(data.as_bytes());
417    ///
418    ///     if let Some(result) = rdr.records().next() {
419    ///         let record = result?;
420    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
421    ///         Ok(())
422    ///     } else {
423    ///         Err(From::from("expected at least one record but got none"))
424    ///     }
425    /// }
426    /// ```
427    pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder {
428        self.builder.quote(quote);
429        self
430    }
431
432    /// The escape character to use when parsing CSV.
433    ///
434    /// In some variants of CSV, quotes are escaped using a special escape
435    /// character like `\` (instead of escaping quotes by doubling them).
436    ///
437    /// By default, recognizing these idiosyncratic escapes is disabled.
438    ///
439    /// # Example
440    ///
441    /// ```
442    /// use std::error::Error;
443    /// use csv::ReaderBuilder;
444    ///
445    /// # fn main() { example().unwrap(); }
446    /// fn example() -> Result<(), Box<dyn Error>> {
447    ///     let data = "\
448    /// city,country,pop
449    /// Boston,\"The \\\"United\\\" States\",4628910
450    /// ";
451    ///     let mut rdr = ReaderBuilder::new()
452    ///         .escape(Some(b'\\'))
453    ///         .from_reader(data.as_bytes());
454    ///
455    ///     if let Some(result) = rdr.records().next() {
456    ///         let record = result?;
457    ///         assert_eq!(record, vec![
458    ///             "Boston", "The \"United\" States", "4628910",
459    ///         ]);
460    ///         Ok(())
461    ///     } else {
462    ///         Err(From::from("expected at least one record but got none"))
463    ///     }
464    /// }
465    /// ```
466    pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder {
467        self.builder.escape(escape);
468        self
469    }
470
471    /// Enable double quote escapes.
472    ///
473    /// This is enabled by default, but it may be disabled. When disabled,
474    /// doubled quotes are not interpreted as escapes.
475    ///
476    /// # Example
477    ///
478    /// ```
479    /// use std::error::Error;
480    /// use csv::ReaderBuilder;
481    ///
482    /// # fn main() { example().unwrap(); }
483    /// fn example() -> Result<(), Box<dyn Error>> {
484    ///     let data = "\
485    /// city,country,pop
486    /// Boston,\"The \"\"United\"\" States\",4628910
487    /// ";
488    ///     let mut rdr = ReaderBuilder::new()
489    ///         .double_quote(false)
490    ///         .from_reader(data.as_bytes());
491    ///
492    ///     if let Some(result) = rdr.records().next() {
493    ///         let record = result?;
494    ///         assert_eq!(record, vec![
495    ///             "Boston", "The \"United\"\" States\"", "4628910",
496    ///         ]);
497    ///         Ok(())
498    ///     } else {
499    ///         Err(From::from("expected at least one record but got none"))
500    ///     }
501    /// }
502    /// ```
503    pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder {
504        self.builder.double_quote(yes);
505        self
506    }
507
508    /// Enable or disable quoting.
509    ///
510    /// This is enabled by default, but it may be disabled. When disabled,
511    /// quotes are not treated specially.
512    ///
513    /// # Example
514    ///
515    /// ```
516    /// use std::error::Error;
517    /// use csv::ReaderBuilder;
518    ///
519    /// # fn main() { example().unwrap(); }
520    /// fn example() -> Result<(), Box<dyn Error>> {
521    ///     let data = "\
522    /// city,country,pop
523    /// Boston,\"The United States,4628910
524    /// ";
525    ///     let mut rdr = ReaderBuilder::new()
526    ///         .quoting(false)
527    ///         .from_reader(data.as_bytes());
528    ///
529    ///     if let Some(result) = rdr.records().next() {
530    ///         let record = result?;
531    ///         assert_eq!(record, vec![
532    ///             "Boston", "\"The United States", "4628910",
533    ///         ]);
534    ///         Ok(())
535    ///     } else {
536    ///         Err(From::from("expected at least one record but got none"))
537    ///     }
538    /// }
539    /// ```
540    pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder {
541        self.builder.quoting(yes);
542        self
543    }
544
545    /// The comment character to use when parsing CSV.
546    ///
547    /// If the start of a record begins with the byte given here, then that
548    /// line is ignored by the CSV parser.
549    ///
550    /// This is disabled by default.
551    ///
552    /// # Example
553    ///
554    /// ```
555    /// use std::error::Error;
556    /// use csv::ReaderBuilder;
557    ///
558    /// # fn main() { example().unwrap(); }
559    /// fn example() -> Result<(), Box<dyn Error>> {
560    ///     let data = "\
561    /// city,country,pop
562    /// #Concord,United States,42695
563    /// Boston,United States,4628910
564    /// ";
565    ///     let mut rdr = ReaderBuilder::new()
566    ///         .comment(Some(b'#'))
567    ///         .from_reader(data.as_bytes());
568    ///
569    ///     if let Some(result) = rdr.records().next() {
570    ///         let record = result?;
571    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
572    ///         Ok(())
573    ///     } else {
574    ///         Err(From::from("expected at least one record but got none"))
575    ///     }
576    /// }
577    /// ```
578    pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder {
579        self.builder.comment(comment);
580        self
581    }
582
583    /// A convenience method for specifying a configuration to read ASCII
584    /// delimited text.
585    ///
586    /// This sets the delimiter and record terminator to the ASCII unit
587    /// separator (`\x1F`) and record separator (`\x1E`), respectively.
588    ///
589    /// # Example
590    ///
591    /// ```
592    /// use std::error::Error;
593    /// use csv::ReaderBuilder;
594    ///
595    /// # fn main() { example().unwrap(); }
596    /// fn example() -> Result<(), Box<dyn Error>> {
597    ///     let data = "\
598    /// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910";
599    ///     let mut rdr = ReaderBuilder::new()
600    ///         .ascii()
601    ///         .from_reader(data.as_bytes());
602    ///
603    ///     if let Some(result) = rdr.records().next() {
604    ///         let record = result?;
605    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
606    ///         Ok(())
607    ///     } else {
608    ///         Err(From::from("expected at least one record but got none"))
609    ///     }
610    /// }
611    /// ```
612    pub fn ascii(&mut self) -> &mut ReaderBuilder {
613        self.builder.ascii();
614        self
615    }
616
617    /// Set the capacity (in bytes) of the buffer used in the CSV reader.
618    /// This defaults to a reasonable setting.
619    pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder {
620        self.capacity = capacity;
621        self
622    }
623
624    /// Enable or disable the NFA for parsing CSV.
625    ///
626    /// This is intended to be a debug option. The NFA is always slower than
627    /// the DFA.
628    #[doc(hidden)]
629    pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder {
630        self.builder.nfa(yes);
631        self
632    }
633}
634
635/// A already configured CSV reader.
636///
637/// A CSV reader takes as input CSV data and transforms that into standard Rust
638/// values. The most flexible way to read CSV data is as a sequence of records,
639/// where a record is a sequence of fields and each field is a string. However,
640/// a reader can also deserialize CSV data into Rust types like `i64` or
641/// `(String, f64, f64, f64)` or even a custom struct automatically using
642/// Serde.
643///
644/// # Configuration
645///
646/// A CSV reader has a couple convenient constructor methods like `from_path`
647/// and `from_reader`. However, if you want to configure the CSV reader to use
648/// a different delimiter or quote character (among many other things), then
649/// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct
650/// a `Reader`. For example, to change the field delimiter:
651///
652/// ```
653/// use std::error::Error;
654/// use csv::ReaderBuilder;
655///
656/// # fn main() { example().unwrap(); }
657/// fn example() -> Result<(), Box<dyn Error>> {
658///     let data = "\
659/// city;country;pop
660/// Boston;United States;4628910
661/// ";
662///     let mut rdr = ReaderBuilder::new()
663///         .delimiter(b';')
664///         .from_reader(data.as_bytes());
665///
666///     if let Some(result) = rdr.records().next() {
667///         let record = result?;
668///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
669///         Ok(())
670///     } else {
671///         Err(From::from("expected at least one record but got none"))
672///     }
673/// }
674/// ```
675///
676/// # Error handling
677///
678/// In general, CSV *parsing* does not ever return an error. That is, there is
679/// no such thing as malformed CSV data. Instead, this reader will prioritize
680/// finding a parse over rejecting CSV data that it does not understand. This
681/// choice was inspired by other popular CSV parsers, but also because it is
682/// pragmatic. CSV data varies wildly, so even if the CSV data is malformed,
683/// it might still be possible to work with the data. In the land of CSV, there
684/// is no "right" or "wrong," only "right" and "less right."
685///
686/// With that said, a number of errors can occur while reading CSV data:
687///
688/// * By default, all records in CSV data must have the same number of fields.
689///   If a record is found with a different number of fields than a prior
690///   record, then an error is returned. This behavior can be disabled by
691///   enabling flexible parsing via the `flexible` method on
692///   [`ReaderBuilder`](struct.ReaderBuilder.html).
693/// * When reading CSV data from a resource (like a file), it is possible for
694///   reading from the underlying resource to fail. This will return an error.
695///   For subsequent calls to the `Reader` after encountering a such error
696///   (unless `seek` is used), it will behave as if end of file had been
697///   reached, in order to avoid running into infinite loops when still
698///   attempting to read the next record when one has errored.
699/// * When reading CSV data into `String` or `&str` fields (e.g., via a
700///   [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly
701///   enforced. If CSV data is invalid UTF-8, then an error is returned. If
702///   you want to read invalid UTF-8, then you should use the byte oriented
703///   APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit
704///   support for another encoding entirely, then you'll need to use another
705///   crate to transcode your CSV data to UTF-8 before parsing it.
706/// * When using Serde to deserialize CSV data into Rust types, it is possible
707///   for a number of additional errors to occur. For example, deserializing
708///   a field `xyz` into an `i32` field will result in an error.
709///
710/// For more details on the precise semantics of errors, see the
711/// [`Error`](enum.Error.html) type.
712#[derive(Debug)]
713pub struct Reader<R> {
714    /// The underlying CSV parser.
715    ///
716    /// We explicitly put this on the heap because CoreReader embeds an entire
717    /// DFA transition table, which along with other things, tallies up to
718    /// almost 500 bytes on the stack.
719    core: Box<CoreReader>,
720    /// The underlying reader.
721    rdr: io::BufReader<R>,
722    /// Various state tracking.
723    ///
724    /// There is more state embedded in the `CoreReader`.
725    state: ReaderState,
726}
727
728#[derive(Debug)]
729struct ReaderState {
730    /// When set, this contains the first row of any parsed CSV data.
731    ///
732    /// This is always populated, regardless of whether `has_headers` is set.
733    headers: Option<Headers>,
734    /// When set, the first row of parsed CSV data is excluded from things
735    /// that read records, like iterators and `read_record`.
736    has_headers: bool,
737    /// When set, there is no restriction on the length of records. When not
738    /// set, every record must have the same number of fields, or else an error
739    /// is reported.
740    flexible: bool,
741    trim: Trim,
742    /// The number of fields in the first record parsed.
743    first_field_count: Option<u64>,
744    /// The current position of the parser.
745    ///
746    /// Note that this position is only observable by callers at the start
747    /// of a record. More granular positions are not supported.
748    cur_pos: Position,
749    /// Whether the first record has been read or not.
750    first: bool,
751    /// Whether the reader has been seeked or not.
752    seeked: bool,
753    /// Whether EOF of the underlying reader has been reached or not.
754    ///
755    /// IO errors on the underlying reader will be considered as an EOF for
756    /// subsequent read attempts, as it would be incorrect to keep on trying
757    /// to read when the underlying reader has broken.
758    ///
759    /// For clarity, having the best `Debug` impl and in case they need to be
760    /// treated differently at some point, we store whether the `EOF` is
761    /// considered because an actual EOF happened, or because we encoundered
762    /// an IO error.
763    /// This has no additional runtime cost.
764    eof: ReaderEofState,
765}
766
767/// Whether EOF of the underlying reader has been reached or not.
768///
769/// IO errors on the underlying reader will be considered as an EOF for
770/// subsequent read attempts, as it would be incorrect to keep on trying
771/// to read when the underlying reader has broken.
772///
773/// For clarity, having the best `Debug` impl and in case they need to be
774/// treated differently at some point, we store whether the `EOF` is
775/// considered because an actual EOF happened, or because we encoundered
776/// an IO error
777#[derive(Debug, Clone, Copy, PartialEq, Eq)]
778enum ReaderEofState {
779    NotEof,
780    Eof,
781    IOError,
782}
783
784/// Headers encapsulates any data associated with the headers of CSV data.
785///
786/// The headers always correspond to the first row.
787#[derive(Debug)]
788struct Headers {
789    /// The header, as raw bytes.
790    byte_record: ByteRecord,
791    /// The header, as valid UTF-8 (or a UTF-8 error).
792    string_record: result::Result<StringRecord, Utf8Error>,
793}
794
795impl Reader<Reader<File>> {
796    /// Create a new CSV parser with a default configuration for the given
797    /// file path.
798    ///
799    /// To customize CSV parsing, use a `ReaderBuilder`.
800    ///
801    /// # Example
802    ///
803    /// ```no_run
804    /// use std::error::Error;
805    /// use csv::Reader;
806    ///
807    /// # fn main() { example().unwrap(); }
808    /// fn example() -> Result<(), Box<dyn Error>> {
809    ///     let mut rdr = Reader::from_path("foo.csv")?;
810    ///     for result in rdr.records() {
811    ///         let record = result?;
812    ///         println!("{:?}", record);
813    ///     }
814    ///     Ok(())
815    /// }
816    /// ```
817    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> {
818        ReaderBuilder::new().from_path(path)
819    }
820}
821
822impl<R: io::Read> Reader<R> {
823    /// Create a new CSV reader given a builder and a source of underlying
824    /// bytes.
825    fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> {
826        Reader {
827            core: Box::new(builder.builder.build()),
828            rdr: io::BufReader::with_capacity(builder.capacity, rdr),
829            state: ReaderState {
830                headers: None,
831                has_headers: builder.has_headers,
832                flexible: builder.flexible,
833                trim: builder.trim,
834                first_field_count: None,
835                cur_pos: Position::new(),
836                first: false,
837                seeked: false,
838                eof: ReaderEofState::NotEof,
839            },
840        }
841    }
842
843    /// Create a new CSV parser with a default configuration for the given
844    /// reader.
845    ///
846    /// To customize CSV parsing, use a `ReaderBuilder`.
847    ///
848    /// # Example
849    ///
850    /// ```
851    /// use std::error::Error;
852    /// use csv::Reader;
853    ///
854    /// # fn main() { example().unwrap(); }
855    /// fn example() -> Result<(), Box<dyn Error>> {
856    ///     let data = "\
857    /// city,country,pop
858    /// Boston,United States,4628910
859    /// Concord,United States,42695
860    /// ";
861    ///     let mut rdr = Reader::from_reader(data.as_bytes());
862    ///     for result in rdr.records() {
863    ///         let record = result?;
864    ///         println!("{:?}", record);
865    ///     }
866    ///     Ok(())
867    /// }
868    /// ```
869    pub fn from_reader(rdr: R) -> Reader<R> {
870        ReaderBuilder::new().from_reader(rdr)
871    }
872
873    /// Returns a borrowed iterator over deserialized records.
874    ///
875    /// Each item yielded by this iterator is a `Result<D, Error>`.
876    /// Therefore, in order to access the record, callers must handle the
877    /// possibility of error (typically with `try!` or `?`).
878    ///
879    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
880    /// default), then this does not include the first record. Additionally,
881    /// if `has_headers` is enabled, then deserializing into a struct will
882    /// automatically align the values in each row to the fields of a struct
883    /// based on the header row.
884    ///
885    /// # Example
886    ///
887    /// This shows how to deserialize CSV data into normal Rust structs. The
888    /// fields of the header row are used to match up the values in each row
889    /// to the fields of the struct.
890    ///
891    /// ```
892    /// use std::error::Error;
893    ///
894    /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
895    /// struct Row {
896    ///     city: String,
897    ///     country: String,
898    ///     #[serde(rename = "popcount")]
899    ///     population: u64,
900    /// }
901    ///
902    /// # fn main() { example().unwrap(); }
903    /// fn example() -> Result<(), Box<dyn Error>> {
904    ///     let data = "\
905    /// city,country,popcount
906    /// Boston,United States,4628910
907    /// ";
908    ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
909    ///     let mut iter = rdr.deserialize();
910    ///
911    ///     if let Some(result) = iter.next() {
912    ///         let record: Row = result?;
913    ///         assert_eq!(record, Row {
914    ///             city: "Boston".to_string(),
915    ///             country: "United States".to_string(),
916    ///             population: 4628910,
917    ///         });
918    ///         Ok(())
919    ///     } else {
920    ///         Err(From::from("expected at least one record but got none"))
921    ///     }
922    /// }
923    /// ```
924    ///
925    /// # Rules
926    ///
927    /// For the most part, any Rust type that maps straight-forwardly to a CSV
928    /// record is supported. This includes maps, structs, tuples and tuple
929    /// structs. Other Rust types, such as `Vec`s, arrays, and enums have
930    /// a more complicated story. In general, when working with CSV data, one
931    /// should avoid *nested sequences* as much as possible.
932    ///
933    /// Maps, structs, tuples and tuple structs map to CSV records in a simple
934    /// way. Tuples and tuple structs decode their fields in the order that
935    /// they are defined. Structs will do the same only if `has_headers` has
936    /// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html),
937    /// otherwise, structs and maps are deserialized based on the fields
938    /// defined in the header row. (If there is no header row, then
939    /// deserializing into a map will result in an error.)
940    ///
941    /// Nested sequences are supported in a limited capacity. Namely, they
942    /// are flattened. As a result, it's often useful to use a `Vec` to capture
943    /// a "tail" of fields in a record:
944    ///
945    /// ```
946    /// use std::error::Error;
947    ///
948    /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
949    /// struct Row {
950    ///     label: String,
951    ///     values: Vec<i32>,
952    /// }
953    ///
954    /// # fn main() { example().unwrap(); }
955    /// fn example() -> Result<(), Box<dyn Error>> {
956    ///     let data = "foo,1,2,3";
957    ///     let mut rdr = csv::ReaderBuilder::new()
958    ///         .has_headers(false)
959    ///         .from_reader(data.as_bytes());
960    ///     let mut iter = rdr.deserialize();
961    ///
962    ///     if let Some(result) = iter.next() {
963    ///         let record: Row = result?;
964    ///         assert_eq!(record, Row {
965    ///             label: "foo".to_string(),
966    ///             values: vec![1, 2, 3],
967    ///         });
968    ///         Ok(())
969    ///     } else {
970    ///         Err(From::from("expected at least one record but got none"))
971    ///     }
972    /// }
973    /// ```
974    ///
975    /// In the above example, adding another field to the `Row` struct after
976    /// the `values` field will result in a deserialization error. This is
977    /// because the deserializer doesn't know when to stop reading fields
978    /// into the `values` vector, so it will consume the rest of the fields in
979    /// the record leaving none left over for the additional field.
980    ///
981    /// Finally, simple enums in Rust can be deserialized as well. Namely,
982    /// enums must either be variants with no arguments or variants with a
983    /// single argument. Variants with no arguments are deserialized based on
984    /// which variant name the field matches. Variants with a single argument
985    /// are deserialized based on which variant can store the data. The latter
986    /// is only supported when using "untagged" enum deserialization. The
987    /// following example shows both forms in action:
988    ///
989    /// ```
990    /// use std::error::Error;
991    ///
992    /// #[derive(Debug, serde::Deserialize, PartialEq)]
993    /// struct Row {
994    ///     label: Label,
995    ///     value: Number,
996    /// }
997    ///
998    /// #[derive(Debug, serde::Deserialize, PartialEq)]
999    /// #[serde(rename_all = "lowercase")]
1000    /// enum Label {
1001    ///     Celsius,
1002    ///     Fahrenheit,
1003    /// }
1004    ///
1005    /// #[derive(Debug, serde::Deserialize, PartialEq)]
1006    /// #[serde(untagged)]
1007    /// enum Number {
1008    ///     Integer(i64),
1009    ///     Float(f64),
1010    /// }
1011    ///
1012    /// # fn main() { example().unwrap(); }
1013    /// fn example() -> Result<(), Box<dyn Error>> {
1014    ///     let data = "\
1015    /// label,value
1016    /// celsius,22.2222
1017    /// fahrenheit,72
1018    /// ";
1019    ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
1020    ///     let mut iter = rdr.deserialize();
1021    ///
1022    ///     // Read the first record.
1023    ///     if let Some(result) = iter.next() {
1024    ///         let record: Row = result?;
1025    ///         assert_eq!(record, Row {
1026    ///             label: Label::Celsius,
1027    ///             value: Number::Float(22.2222),
1028    ///         });
1029    ///     } else {
1030    ///         return Err(From::from(
1031    ///             "expected at least two records but got none"));
1032    ///     }
1033    ///
1034    ///     // Read the second record.
1035    ///     if let Some(result) = iter.next() {
1036    ///         let record: Row = result?;
1037    ///         assert_eq!(record, Row {
1038    ///             label: Label::Fahrenheit,
1039    ///             value: Number::Integer(72),
1040    ///         });
1041    ///         Ok(())
1042    ///     } else {
1043    ///         Err(From::from(
1044    ///             "expected at least two records but got only one"))
1045    ///     }
1046    /// }
1047    /// ```
1048    pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D>
1049    where
1050        D: DeserializeOwned,
1051    {
1052        DeserializeRecordsIter::new(self)
1053    }
1054
1055    /// Returns an owned iterator over deserialized records.
1056    ///
1057    /// Each item yielded by this iterator is a `Result<D, Error>`.
1058    /// Therefore, in order to access the record, callers must handle the
1059    /// possibility of error (typically with `try!` or `?`).
1060    ///
1061    /// This is mostly useful when you want to return a CSV iterator or store
1062    /// it somewhere.
1063    ///
1064    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1065    /// default), then this does not include the first record. Additionally,
1066    /// if `has_headers` is enabled, then deserializing into a struct will
1067    /// automatically align the values in each row to the fields of a struct
1068    /// based on the header row.
1069    ///
1070    /// For more detailed deserialization rules, see the documentation on the
1071    /// `deserialize` method.
1072    ///
1073    /// # Example
1074    ///
1075    /// ```
1076    /// use std::error::Error;
1077    ///
1078    /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
1079    /// struct Row {
1080    ///     city: String,
1081    ///     country: String,
1082    ///     #[serde(rename = "popcount")]
1083    ///     population: u64,
1084    /// }
1085    ///
1086    /// # fn main() { example().unwrap(); }
1087    /// fn example() -> Result<(), Box<dyn Error>> {
1088    ///     let data = "\
1089    /// city,country,popcount
1090    /// Boston,United States,4628910
1091    /// ";
1092    ///     let rdr = csv::Reader::from_reader(data.as_bytes());
1093    ///     let mut iter = rdr.into_deserialize();
1094    ///
1095    ///     if let Some(result) = iter.next() {
1096    ///         let record: Row = result?;
1097    ///         assert_eq!(record, Row {
1098    ///             city: "Boston".to_string(),
1099    ///             country: "United States".to_string(),
1100    ///             population: 4628910,
1101    ///         });
1102    ///         Ok(())
1103    ///     } else {
1104    ///         Err(From::from("expected at least one record but got none"))
1105    ///     }
1106    /// }
1107    /// ```
1108    pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D>
1109    where
1110        D: DeserializeOwned,
1111    {
1112        DeserializeRecordsIntoIter::new(self)
1113    }
1114
1115    /// Returns a borrowed iterator over all records as strings.
1116    ///
1117    /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
1118    /// Therefore, in order to access the record, callers must handle the
1119    /// possibility of error (typically with `try!` or `?`).
1120    ///
1121    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1122    /// default), then this does not include the first record.
1123    ///
1124    /// # Example
1125    ///
1126    /// ```
1127    /// use std::error::Error;
1128    /// use csv::Reader;
1129    ///
1130    /// # fn main() { example().unwrap(); }
1131    /// fn example() -> Result<(), Box<dyn Error>> {
1132    ///     let data = "\
1133    /// city,country,pop
1134    /// Boston,United States,4628910
1135    /// ";
1136    ///     let mut rdr = Reader::from_reader(data.as_bytes());
1137    ///     let mut iter = rdr.records();
1138    ///
1139    ///     if let Some(result) = iter.next() {
1140    ///         let record = result?;
1141    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1142    ///         Ok(())
1143    ///     } else {
1144    ///         Err(From::from("expected at least one record but got none"))
1145    ///     }
1146    /// }
1147    /// ```
1148    pub fn records(&mut self) -> StringRecordsIter<R> {
1149        StringRecordsIter::new(self)
1150    }
1151
1152    /// Returns an owned iterator over all records as strings.
1153    ///
1154    /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
1155    /// Therefore, in order to access the record, callers must handle the
1156    /// possibility of error (typically with `try!` or `?`).
1157    ///
1158    /// This is mostly useful when you want to return a CSV iterator or store
1159    /// it somewhere.
1160    ///
1161    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1162    /// default), then this does not include the first record.
1163    ///
1164    /// # Example
1165    ///
1166    /// ```
1167    /// use std::error::Error;
1168    /// use csv::Reader;
1169    ///
1170    /// # fn main() { example().unwrap(); }
1171    /// fn example() -> Result<(), Box<dyn Error>> {
1172    ///     let data = "\
1173    /// city,country,pop
1174    /// Boston,United States,4628910
1175    /// ";
1176    ///     let rdr = Reader::from_reader(data.as_bytes());
1177    ///     let mut iter = rdr.into_records();
1178    ///
1179    ///     if let Some(result) = iter.next() {
1180    ///         let record = result?;
1181    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1182    ///         Ok(())
1183    ///     } else {
1184    ///         Err(From::from("expected at least one record but got none"))
1185    ///     }
1186    /// }
1187    /// ```
1188    pub fn into_records(self) -> StringRecordsIntoIter<R> {
1189        StringRecordsIntoIter::new(self)
1190    }
1191
1192    /// Returns a borrowed iterator over all records as raw bytes.
1193    ///
1194    /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
1195    /// Therefore, in order to access the record, callers must handle the
1196    /// possibility of error (typically with `try!` or `?`).
1197    ///
1198    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1199    /// default), then this does not include the first record.
1200    ///
1201    /// # Example
1202    ///
1203    /// ```
1204    /// use std::error::Error;
1205    /// use csv::Reader;
1206    ///
1207    /// # fn main() { example().unwrap(); }
1208    /// fn example() -> Result<(), Box<dyn Error>> {
1209    ///     let data = "\
1210    /// city,country,pop
1211    /// Boston,United States,4628910
1212    /// ";
1213    ///     let mut rdr = Reader::from_reader(data.as_bytes());
1214    ///     let mut iter = rdr.byte_records();
1215    ///
1216    ///     if let Some(result) = iter.next() {
1217    ///         let record = result?;
1218    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1219    ///         Ok(())
1220    ///     } else {
1221    ///         Err(From::from("expected at least one record but got none"))
1222    ///     }
1223    /// }
1224    /// ```
1225    pub fn byte_records(&mut self) -> ByteRecordsIter<R> {
1226        ByteRecordsIter::new(self)
1227    }
1228
1229    /// Returns an owned iterator over all records as raw bytes.
1230    ///
1231    /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
1232    /// Therefore, in order to access the record, callers must handle the
1233    /// possibility of error (typically with `try!` or `?`).
1234    ///
1235    /// This is mostly useful when you want to return a CSV iterator or store
1236    /// it somewhere.
1237    ///
1238    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1239    /// default), then this does not include the first record.
1240    ///
1241    /// # Example
1242    ///
1243    /// ```
1244    /// use std::error::Error;
1245    /// use csv::Reader;
1246    ///
1247    /// # fn main() { example().unwrap(); }
1248    /// fn example() -> Result<(), Box<dyn Error>> {
1249    ///     let data = "\
1250    /// city,country,pop
1251    /// Boston,United States,4628910
1252    /// ";
1253    ///     let rdr = Reader::from_reader(data.as_bytes());
1254    ///     let mut iter = rdr.into_byte_records();
1255    ///
1256    ///     if let Some(result) = iter.next() {
1257    ///         let record = result?;
1258    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1259    ///         Ok(())
1260    ///     } else {
1261    ///         Err(From::from("expected at least one record but got none"))
1262    ///     }
1263    /// }
1264    /// ```
1265    pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> {
1266        ByteRecordsIntoIter::new(self)
1267    }
1268
1269    /// Returns a reference to the first row read by this parser.
1270    ///
1271    /// If no row has been read yet, then this will force parsing of the first
1272    /// row.
1273    ///
1274    /// If there was a problem parsing the row or if it wasn't valid UTF-8,
1275    /// then this returns an error.
1276    ///
1277    /// If the underlying reader emits EOF before any data, then this returns
1278    /// an empty record.
1279    ///
1280    /// Note that this method may be used regardless of whether `has_headers`
1281    /// was enabled (but it is enabled by default).
1282    ///
1283    /// # Example
1284    ///
1285    /// This example shows how to get the header row of CSV data. Notice that
1286    /// the header row does not appear as a record in the iterator!
1287    ///
1288    /// ```
1289    /// use std::error::Error;
1290    /// use csv::Reader;
1291    ///
1292    /// # fn main() { example().unwrap(); }
1293    /// fn example() -> Result<(), Box<dyn Error>> {
1294    ///     let data = "\
1295    /// city,country,pop
1296    /// Boston,United States,4628910
1297    /// ";
1298    ///     let mut rdr = Reader::from_reader(data.as_bytes());
1299    ///
1300    ///     // We can read the headers before iterating.
1301    ///     {
1302    ///         // `headers` borrows from the reader, so we put this in its
1303    ///         // own scope. That way, the borrow ends before we try iterating
1304    ///         // below. Alternatively, we could clone the headers.
1305    ///         let headers = rdr.headers()?;
1306    ///         assert_eq!(headers, vec!["city", "country", "pop"]);
1307    ///     }
1308    ///
1309    ///     if let Some(result) = rdr.records().next() {
1310    ///         let record = result?;
1311    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1312    ///     } else {
1313    ///         return Err(From::from(
1314    ///             "expected at least one record but got none"))
1315    ///     }
1316    ///
1317    ///     // We can also read the headers after iterating.
1318    ///     let headers = rdr.headers()?;
1319    ///     assert_eq!(headers, vec!["city", "country", "pop"]);
1320    ///     Ok(())
1321    /// }
1322    /// ```
1323    pub fn headers(&mut self) -> Result<&StringRecord> {
1324        if self.state.headers.is_none() {
1325            let mut record = ByteRecord::new();
1326            self.read_byte_record_impl(&mut record)?;
1327            self.set_headers_impl(Err(record));
1328        }
1329        let headers = self.state.headers.as_ref().unwrap();
1330        match headers.string_record {
1331            Ok(ref record) => Ok(record),
1332            Err(ref err) => Err(Error::new(ErrorKind::Utf8 {
1333                pos: headers.byte_record.position().map(Clone::clone),
1334                err: err.clone(),
1335            })),
1336        }
1337    }
1338
1339    /// Returns a reference to the first row read by this parser as raw bytes.
1340    ///
1341    /// If no row has been read yet, then this will force parsing of the first
1342    /// row.
1343    ///
1344    /// If there was a problem parsing the row then this returns an error.
1345    ///
1346    /// If the underlying reader emits EOF before any data, then this returns
1347    /// an empty record.
1348    ///
1349    /// Note that this method may be used regardless of whether `has_headers`
1350    /// was enabled (but it is enabled by default).
1351    ///
1352    /// # Example
1353    ///
1354    /// This example shows how to get the header row of CSV data. Notice that
1355    /// the header row does not appear as a record in the iterator!
1356    ///
1357    /// ```
1358    /// use std::error::Error;
1359    /// use csv::Reader;
1360    ///
1361    /// # fn main() { example().unwrap(); }
1362    /// fn example() -> Result<(), Box<dyn Error>> {
1363    ///     let data = "\
1364    /// city,country,pop
1365    /// Boston,United States,4628910
1366    /// ";
1367    ///     let mut rdr = Reader::from_reader(data.as_bytes());
1368    ///
1369    ///     // We can read the headers before iterating.
1370    ///     {
1371    ///         // `headers` borrows from the reader, so we put this in its
1372    ///         // own scope. That way, the borrow ends before we try iterating
1373    ///         // below. Alternatively, we could clone the headers.
1374    ///         let headers = rdr.byte_headers()?;
1375    ///         assert_eq!(headers, vec!["city", "country", "pop"]);
1376    ///     }
1377    ///
1378    ///     if let Some(result) = rdr.byte_records().next() {
1379    ///         let record = result?;
1380    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1381    ///     } else {
1382    ///         return Err(From::from(
1383    ///             "expected at least one record but got none"))
1384    ///     }
1385    ///
1386    ///     // We can also read the headers after iterating.
1387    ///     let headers = rdr.byte_headers()?;
1388    ///     assert_eq!(headers, vec!["city", "country", "pop"]);
1389    ///     Ok(())
1390    /// }
1391    /// ```
1392    pub fn byte_headers(&mut self) -> Result<&ByteRecord> {
1393        if self.state.headers.is_none() {
1394            let mut record = ByteRecord::new();
1395            self.read_byte_record_impl(&mut record)?;
1396            self.set_headers_impl(Err(record));
1397        }
1398        Ok(&self.state.headers.as_ref().unwrap().byte_record)
1399    }
1400
1401    /// Set the headers of this CSV parser manually.
1402    ///
1403    /// This overrides any other setting (including `set_byte_headers`). Any
1404    /// automatic detection of headers is disabled. This may be called at any
1405    /// time.
1406    ///
1407    /// # Example
1408    ///
1409    /// ```
1410    /// use std::error::Error;
1411    /// use csv::{Reader, StringRecord};
1412    ///
1413    /// # fn main() { example().unwrap(); }
1414    /// fn example() -> Result<(), Box<dyn Error>> {
1415    ///     let data = "\
1416    /// city,country,pop
1417    /// Boston,United States,4628910
1418    /// ";
1419    ///     let mut rdr = Reader::from_reader(data.as_bytes());
1420    ///
1421    ///     assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]);
1422    ///     rdr.set_headers(StringRecord::from(vec!["a", "b", "c"]));
1423    ///     assert_eq!(rdr.headers()?, vec!["a", "b", "c"]);
1424    ///
1425    ///     Ok(())
1426    /// }
1427    /// ```
1428    pub fn set_headers(&mut self, headers: StringRecord) {
1429        self.set_headers_impl(Ok(headers));
1430    }
1431
1432    /// Set the headers of this CSV parser manually as raw bytes.
1433    ///
1434    /// This overrides any other setting (including `set_headers`). Any
1435    /// automatic detection of headers is disabled. This may be called at any
1436    /// time.
1437    ///
1438    /// # Example
1439    ///
1440    /// ```
1441    /// use std::error::Error;
1442    /// use csv::{Reader, ByteRecord};
1443    ///
1444    /// # fn main() { example().unwrap(); }
1445    /// fn example() -> Result<(), Box<dyn Error>> {
1446    ///     let data = "\
1447    /// city,country,pop
1448    /// Boston,United States,4628910
1449    /// ";
1450    ///     let mut rdr = Reader::from_reader(data.as_bytes());
1451    ///
1452    ///     assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]);
1453    ///     rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"]));
1454    ///     assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]);
1455    ///
1456    ///     Ok(())
1457    /// }
1458    /// ```
1459    pub fn set_byte_headers(&mut self, headers: ByteRecord) {
1460        self.set_headers_impl(Err(headers));
1461    }
1462
1463    fn set_headers_impl(
1464        &mut self,
1465        headers: result::Result<StringRecord, ByteRecord>,
1466    ) {
1467        // If we have string headers, then get byte headers. But if we have
1468        // byte headers, then get the string headers (or a UTF-8 error).
1469        let (mut str_headers, mut byte_headers) = match headers {
1470            Ok(string) => {
1471                let bytes = string.clone().into_byte_record();
1472                (Ok(string), bytes)
1473            }
1474            Err(bytes) => {
1475                match StringRecord::from_byte_record(bytes.clone()) {
1476                    Ok(str_headers) => (Ok(str_headers), bytes),
1477                    Err(err) => (Err(err.utf8_error().clone()), bytes),
1478                }
1479            }
1480        };
1481        if self.state.trim.should_trim_headers() {
1482            if let Ok(ref mut str_headers) = str_headers.as_mut() {
1483                str_headers.trim();
1484            }
1485            byte_headers.trim();
1486        }
1487        self.state.headers = Some(Headers {
1488            byte_record: byte_headers,
1489            string_record: str_headers,
1490        });
1491    }
1492
1493    /// Read a single row into the given record. Returns false when no more
1494    /// records could be read.
1495    ///
1496    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1497    /// default), then this will never read the first record.
1498    ///
1499    /// This method is useful when you want to read records as fast as
1500    /// as possible. It's less ergonomic than an iterator, but it permits the
1501    /// caller to reuse the `StringRecord` allocation, which usually results
1502    /// in higher throughput.
1503    ///
1504    /// Records read via this method are guaranteed to have a position set
1505    /// on them, even if the reader is at EOF or if an error is returned.
1506    ///
1507    /// # Example
1508    ///
1509    /// ```
1510    /// use std::error::Error;
1511    /// use csv::{Reader, StringRecord};
1512    ///
1513    /// # fn main() { example().unwrap(); }
1514    /// fn example() -> Result<(), Box<dyn Error>> {
1515    ///     let data = "\
1516    /// city,country,pop
1517    /// Boston,United States,4628910
1518    /// ";
1519    ///     let mut rdr = Reader::from_reader(data.as_bytes());
1520    ///     let mut record = StringRecord::new();
1521    ///
1522    ///     if rdr.read_record(&mut record)? {
1523    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1524    ///         Ok(())
1525    ///     } else {
1526    ///         Err(From::from("expected at least one record but got none"))
1527    ///     }
1528    /// }
1529    /// ```
1530    pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> {
1531        let result = record.read(self);
1532        // We need to trim again because trimming string records includes
1533        // Unicode whitespace. (ByteRecord trimming only includes ASCII
1534        // whitespace.)
1535        if self.state.trim.should_trim_fields() {
1536            record.trim();
1537        }
1538        result
1539    }
1540
1541    /// Read a single row into the given byte record. Returns false when no
1542    /// more records could be read.
1543    ///
1544    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1545    /// default), then this will never read the first record.
1546    ///
1547    /// This method is useful when you want to read records as fast as
1548    /// as possible. It's less ergonomic than an iterator, but it permits the
1549    /// caller to reuse the `ByteRecord` allocation, which usually results
1550    /// in higher throughput.
1551    ///
1552    /// Records read via this method are guaranteed to have a position set
1553    /// on them, even if the reader is at EOF or if an error is returned.
1554    ///
1555    /// # Example
1556    ///
1557    /// ```
1558    /// use std::error::Error;
1559    /// use csv::{ByteRecord, Reader};
1560    ///
1561    /// # fn main() { example().unwrap(); }
1562    /// fn example() -> Result<(), Box<dyn Error>> {
1563    ///     let data = "\
1564    /// city,country,pop
1565    /// Boston,United States,4628910
1566    /// ";
1567    ///     let mut rdr = Reader::from_reader(data.as_bytes());
1568    ///     let mut record = ByteRecord::new();
1569    ///
1570    ///     if rdr.read_byte_record(&mut record)? {
1571    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1572    ///         Ok(())
1573    ///     } else {
1574    ///         Err(From::from("expected at least one record but got none"))
1575    ///     }
1576    /// }
1577    /// ```
1578    pub fn read_byte_record(
1579        &mut self,
1580        record: &mut ByteRecord,
1581    ) -> Result<bool> {
1582        if !self.state.seeked && !self.state.has_headers && !self.state.first {
1583            // If the caller indicated "no headers" and we haven't yielded the
1584            // first record yet, then we should yield our header row if we have
1585            // one.
1586            if let Some(ref headers) = self.state.headers {
1587                self.state.first = true;
1588                record.clone_from(&headers.byte_record);
1589                if self.state.trim.should_trim_fields() {
1590                    record.trim();
1591                }
1592                return Ok(!record.is_empty());
1593            }
1594        }
1595        let ok = self.read_byte_record_impl(record)?;
1596        self.state.first = true;
1597        if !self.state.seeked && self.state.headers.is_none() {
1598            self.set_headers_impl(Err(record.clone()));
1599            // If the end user indicated that we have headers, then we should
1600            // never return the first row. Instead, we should attempt to
1601            // read and return the next one.
1602            if self.state.has_headers {
1603                let result = self.read_byte_record_impl(record);
1604                if self.state.trim.should_trim_fields() {
1605                    record.trim();
1606                }
1607                return result;
1608            }
1609        }
1610        if self.state.trim.should_trim_fields() {
1611            record.trim();
1612        }
1613        Ok(ok)
1614    }
1615
1616    /// Read a byte record from the underlying CSV reader, without accounting
1617    /// for headers.
1618    #[inline(always)]
1619    fn read_byte_record_impl(
1620        &mut self,
1621        record: &mut ByteRecord,
1622    ) -> Result<bool> {
1623        use csv_core::ReadRecordResult::*;
1624
1625        record.clear();
1626        record.set_position(Some(self.state.cur_pos.clone()));
1627        if self.state.eof != ReaderEofState::NotEof {
1628            return Ok(false);
1629        }
1630        let (mut outlen, mut endlen) = (0, 0);
1631        loop {
1632            let (res, nin, nout, nend) = {
1633                let input_res = self.rdr.fill_buf();
1634                if input_res.is_err() {
1635                    self.state.eof = ReaderEofState::IOError;
1636                }
1637                let input = input_res?;
1638                let (fields, ends) = record.as_parts();
1639                self.core.read_record(
1640                    input,
1641                    &mut fields[outlen..],
1642                    &mut ends[endlen..],
1643                )
1644            };
1645            self.rdr.consume(nin);
1646            let byte = self.state.cur_pos.byte();
1647            self.state
1648                .cur_pos
1649                .set_byte(byte + nin as u64)
1650                .set_line(self.core.line());
1651            outlen += nout;
1652            endlen += nend;
1653            match res {
1654                InputEmpty => continue,
1655                OutputFull => {
1656                    record.expand_fields();
1657                    continue;
1658                }
1659                OutputEndsFull => {
1660                    record.expand_ends();
1661                    continue;
1662                }
1663                Record => {
1664                    record.set_len(endlen);
1665                    self.state.add_record(record)?;
1666                    return Ok(true);
1667                }
1668                End => {
1669                    self.state.eof = ReaderEofState::Eof;
1670                    return Ok(false);
1671                }
1672            }
1673        }
1674    }
1675
1676    /// Return the current position of this CSV reader.
1677    ///
1678    /// The byte offset in the position returned can be used to `seek` this
1679    /// reader. In particular, seeking to a position returned here on the same
1680    /// data will result in parsing the same subsequent record.
1681    ///
1682    /// # Example: reading the position
1683    ///
1684    /// ```
1685    /// use std::{error::Error, io};
1686    /// use csv::{Reader, Position};
1687    ///
1688    /// # fn main() { example().unwrap(); }
1689    /// fn example() -> Result<(), Box<dyn Error>> {
1690    ///     let data = "\
1691    /// city,country,popcount
1692    /// Boston,United States,4628910
1693    /// Concord,United States,42695
1694    /// ";
1695    ///     let rdr = Reader::from_reader(io::Cursor::new(data));
1696    ///     let mut iter = rdr.into_records();
1697    ///     let mut pos = Position::new();
1698    ///     loop {
1699    ///         // Read the position immediately before each record.
1700    ///         let next_pos = iter.reader().position().clone();
1701    ///         if iter.next().is_none() {
1702    ///             break;
1703    ///         }
1704    ///         pos = next_pos;
1705    ///     }
1706    ///
1707    ///     // `pos` should now be the position immediately before the last
1708    ///     // record.
1709    ///     assert_eq!(pos.byte(), 51);
1710    ///     assert_eq!(pos.line(), 3);
1711    ///     assert_eq!(pos.record(), 2);
1712    ///     Ok(())
1713    /// }
1714    /// ```
1715    pub fn position(&self) -> &Position {
1716        &self.state.cur_pos
1717    }
1718
1719    /// Returns true if and only if this reader has been exhausted.
1720    ///
1721    /// When this returns true, no more records can be read from this reader
1722    /// (unless it has been seeked to another position).
1723    ///
1724    /// # Example
1725    ///
1726    /// ```
1727    /// use std::{error::Error, io};
1728    /// use csv::{Reader, Position};
1729    ///
1730    /// # fn main() { example().unwrap(); }
1731    /// fn example() -> Result<(), Box<dyn Error>> {
1732    ///     let data = "\
1733    /// city,country,popcount
1734    /// Boston,United States,4628910
1735    /// Concord,United States,42695
1736    /// ";
1737    ///     let mut rdr = Reader::from_reader(io::Cursor::new(data));
1738    ///     assert!(!rdr.is_done());
1739    ///     for result in rdr.records() {
1740    ///         let _ = result?;
1741    ///     }
1742    ///     assert!(rdr.is_done());
1743    ///     Ok(())
1744    /// }
1745    /// ```
1746    pub fn is_done(&self) -> bool {
1747        self.state.eof != ReaderEofState::NotEof
1748    }
1749
1750    /// Returns true if and only if this reader has been configured to
1751    /// interpret the first record as a header record.
1752    pub fn has_headers(&self) -> bool {
1753        self.state.has_headers
1754    }
1755
1756    /// Returns a reference to the underlying reader.
1757    pub fn get_ref(&self) -> &R {
1758        self.rdr.get_ref()
1759    }
1760
1761    /// Returns a mutable reference to the underlying reader.
1762    pub fn get_mut(&mut self) -> &mut R {
1763        self.rdr.get_mut()
1764    }
1765
1766    /// Unwraps this CSV reader, returning the underlying reader.
1767    ///
1768    /// Note that any leftover data inside this reader's internal buffer is
1769    /// lost.
1770    pub fn into_inner(self) -> R {
1771        self.rdr.into_inner()
1772    }
1773}
1774
1775impl<R: io::Read + io::Seek> Reader<R> {
1776    /// Seeks the underlying reader to the position given.
1777    ///
1778    /// This comes with a few caveats:
1779    ///
1780    /// * Any internal buffer associated with this reader is cleared.
1781    /// * If the given position does not correspond to a position immediately
1782    ///   before the start of a record, then the behavior of this reader is
1783    ///   unspecified.
1784    /// * Any special logic that skips the first record in the CSV reader
1785    ///   when reading or iterating over records is disabled.
1786    ///
1787    /// If the given position has a byte offset equivalent to the current
1788    /// position, then no seeking is performed.
1789    ///
1790    /// If the header row has not already been read, then this will attempt
1791    /// to read the header row before seeking. Therefore, it is possible that
1792    /// this returns an error associated with reading CSV data.
1793    ///
1794    /// Note that seeking is performed based only on the byte offset in the
1795    /// given position. Namely, the record or line numbers in the position may
1796    /// be incorrect, but this will cause any future position generated by
1797    /// this CSV reader to be similarly incorrect.
1798    ///
1799    /// # Example: seek to parse a record twice
1800    ///
1801    /// ```
1802    /// use std::{error::Error, io};
1803    /// use csv::{Reader, Position};
1804    ///
1805    /// # fn main() { example().unwrap(); }
1806    /// fn example() -> Result<(), Box<dyn Error>> {
1807    ///     let data = "\
1808    /// city,country,popcount
1809    /// Boston,United States,4628910
1810    /// Concord,United States,42695
1811    /// ";
1812    ///     let rdr = Reader::from_reader(io::Cursor::new(data));
1813    ///     let mut iter = rdr.into_records();
1814    ///     let mut pos = Position::new();
1815    ///     loop {
1816    ///         // Read the position immediately before each record.
1817    ///         let next_pos = iter.reader().position().clone();
1818    ///         if iter.next().is_none() {
1819    ///             break;
1820    ///         }
1821    ///         pos = next_pos;
1822    ///     }
1823    ///
1824    ///     // Now seek the reader back to `pos`. This will let us read the
1825    ///     // last record again.
1826    ///     iter.reader_mut().seek(pos)?;
1827    ///     let mut iter = iter.into_reader().into_records();
1828    ///     if let Some(result) = iter.next() {
1829    ///         let record = result?;
1830    ///         assert_eq!(record, vec!["Concord", "United States", "42695"]);
1831    ///         Ok(())
1832    ///     } else {
1833    ///         Err(From::from("expected at least one record but got none"))
1834    ///     }
1835    /// }
1836    /// ```
1837    pub fn seek(&mut self, pos: Position) -> Result<()> {
1838        self.byte_headers()?;
1839        self.state.seeked = true;
1840        if pos.byte() == self.state.cur_pos.byte() {
1841            return Ok(());
1842        }
1843        self.rdr.seek(io::SeekFrom::Start(pos.byte()))?;
1844        self.core.reset();
1845        self.core.set_line(pos.line());
1846        self.state.cur_pos = pos;
1847        self.state.eof = ReaderEofState::NotEof;
1848        Ok(())
1849    }
1850
1851    /// This is like `seek`, but provides direct control over how the seeking
1852    /// operation is performed via `io::SeekFrom`.
1853    ///
1854    /// The `pos` position given *should* correspond the position indicated
1855    /// by `seek_from`, but there is no requirement. If the `pos` position
1856    /// given is incorrect, then the position information returned by this
1857    /// reader will be similarly incorrect.
1858    ///
1859    /// If the header row has not already been read, then this will attempt
1860    /// to read the header row before seeking. Therefore, it is possible that
1861    /// this returns an error associated with reading CSV data.
1862    ///
1863    /// Unlike `seek`, this will always cause an actual seek to be performed.
1864    pub fn seek_raw(
1865        &mut self,
1866        seek_from: io::SeekFrom,
1867        pos: Position,
1868    ) -> Result<()> {
1869        self.byte_headers()?;
1870        self.state.seeked = true;
1871        self.rdr.seek(seek_from)?;
1872        self.core.reset();
1873        self.core.set_line(pos.line());
1874        self.state.cur_pos = pos;
1875        self.state.eof = ReaderEofState::NotEof;
1876        Ok(())
1877    }
1878}
1879
1880impl ReaderState {
1881    #[inline(always)]
1882    fn add_record(&mut self, record: &ByteRecord) -> Result<()> {
1883        let i = self.cur_pos.record();
1884        self.cur_pos.set_record(i.checked_add(1).unwrap());
1885        if !self.flexible {
1886            match self.first_field_count {
1887                None => self.first_field_count = Some(record.len() as u64),
1888                Some(expected) => {
1889                    if record.len() as u64 != expected {
1890                        return Err(Error::new(ErrorKind::UnequalLengths {
1891                            pos: record.position().map(Clone::clone),
1892                            expected_len: expected,
1893                            len: record.len() as u64,
1894                        }));
1895                    }
1896                }
1897            }
1898        }
1899        Ok(())
1900    }
1901}
1902
1903/// An owned iterator over deserialized records.
1904///
1905/// The type parameter `R` refers to the underlying `io::Read` type, and `D`
1906/// refers to the type that this iterator will deserialize a record into.
1907pub struct DeserializeRecordsIntoIter<R, D> {
1908    rdr: Reader<R>,
1909    rec: StringRecord,
1910    headers: Option<StringRecord>,
1911    _priv: PhantomData<D>,
1912}
1913
1914impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> {
1915    fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> {
1916        let headers = if !rdr.state.has_headers {
1917            None
1918        } else {
1919            rdr.headers().ok().map(Clone::clone)
1920        };
1921        DeserializeRecordsIntoIter {
1922            rdr,
1923            rec: StringRecord::new(),
1924            headers,
1925            _priv: PhantomData,
1926        }
1927    }
1928
1929    /// Return a reference to the underlying CSV reader.
1930    pub fn reader(&self) -> &Reader<R> {
1931        &self.rdr
1932    }
1933
1934    /// Return a mutable reference to the underlying CSV reader.
1935    pub fn reader_mut(&mut self) -> &mut Reader<R> {
1936        &mut self.rdr
1937    }
1938
1939    /// Drop this iterator and return the underlying CSV reader.
1940    pub fn into_reader(self) -> Reader<R> {
1941        self.rdr
1942    }
1943}
1944
1945impl<R: io::Read, D: DeserializeOwned> Iterator
1946    for DeserializeRecordsIntoIter<R, D>
1947{
1948    type Item = Result<D>;
1949
1950    fn next(&mut self) -> Option<Result<D>> {
1951        match self.rdr.read_record(&mut self.rec) {
1952            Err(err) => Some(Err(err)),
1953            Ok(false) => None,
1954            Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
1955        }
1956    }
1957}
1958
1959/// A borrowed iterator over deserialized records.
1960///
1961/// The lifetime parameter `'r` refers to the lifetime of the underlying
1962/// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read`
1963/// type, and `D` refers to the type that this iterator will deserialize a
1964/// record into.
1965pub struct DeserializeRecordsIter<'r, R: 'r, D> {
1966    rdr: &'r mut Reader<R>,
1967    rec: StringRecord,
1968    headers: Option<StringRecord>,
1969    _priv: PhantomData<D>,
1970}
1971
1972impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> {
1973    fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> {
1974        let headers = if !rdr.state.has_headers {
1975            None
1976        } else {
1977            rdr.headers().ok().map(Clone::clone)
1978        };
1979        DeserializeRecordsIter {
1980            rdr,
1981            rec: StringRecord::new(),
1982            headers,
1983            _priv: PhantomData,
1984        }
1985    }
1986
1987    /// Return a reference to the underlying CSV reader.
1988    pub fn reader(&self) -> &Reader<R> {
1989        &self.rdr
1990    }
1991
1992    /// Return a mutable reference to the underlying CSV reader.
1993    pub fn reader_mut(&mut self) -> &mut Reader<R> {
1994        &mut self.rdr
1995    }
1996}
1997
1998impl<'r, R: io::Read, D: DeserializeOwned> Iterator
1999    for DeserializeRecordsIter<'r, R, D>
2000{
2001    type Item = Result<D>;
2002
2003    fn next(&mut self) -> Option<Result<D>> {
2004        match self.rdr.read_record(&mut self.rec) {
2005            Err(err) => Some(Err(err)),
2006            Ok(false) => None,
2007            Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
2008        }
2009    }
2010}
2011
2012/// An owned iterator over records as strings.
2013pub struct StringRecordsIntoIter<R> {
2014    rdr: Reader<R>,
2015    rec: StringRecord,
2016}
2017
2018impl<R: io::Read> StringRecordsIntoIter<R> {
2019    fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> {
2020        StringRecordsIntoIter { rdr, rec: StringRecord::new() }
2021    }
2022
2023    /// Return a reference to the underlying CSV reader.
2024    pub fn reader(&self) -> &Reader<R> {
2025        &self.rdr
2026    }
2027
2028    /// Return a mutable reference to the underlying CSV reader.
2029    pub fn reader_mut(&mut self) -> &mut Reader<R> {
2030        &mut self.rdr
2031    }
2032
2033    /// Drop this iterator and return the underlying CSV reader.
2034    pub fn into_reader(self) -> Reader<R> {
2035        self.rdr
2036    }
2037}
2038
2039impl<R: io::Read> Iterator for StringRecordsIntoIter<R> {
2040    type Item = Result<StringRecord>;
2041
2042    fn next(&mut self) -> Option<Result<StringRecord>> {
2043        match self.rdr.read_record(&mut self.rec) {
2044            Err(err) => Some(Err(err)),
2045            Ok(true) => Some(Ok(self.rec.clone_truncated())),
2046            Ok(false) => None,
2047        }
2048    }
2049}
2050
2051/// A borrowed iterator over records as strings.
2052///
2053/// The lifetime parameter `'r` refers to the lifetime of the underlying
2054/// CSV `Reader`.
2055pub struct StringRecordsIter<'r, R: 'r> {
2056    rdr: &'r mut Reader<R>,
2057    rec: StringRecord,
2058}
2059
2060impl<'r, R: io::Read> StringRecordsIter<'r, R> {
2061    fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> {
2062        StringRecordsIter { rdr, rec: StringRecord::new() }
2063    }
2064
2065    /// Return a reference to the underlying CSV reader.
2066    pub fn reader(&self) -> &Reader<R> {
2067        &self.rdr
2068    }
2069
2070    /// Return a mutable reference to the underlying CSV reader.
2071    pub fn reader_mut(&mut self) -> &mut Reader<R> {
2072        &mut self.rdr
2073    }
2074}
2075
2076impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> {
2077    type Item = Result<StringRecord>;
2078
2079    fn next(&mut self) -> Option<Result<StringRecord>> {
2080        match self.rdr.read_record(&mut self.rec) {
2081            Err(err) => Some(Err(err)),
2082            Ok(true) => Some(Ok(self.rec.clone_truncated())),
2083            Ok(false) => None,
2084        }
2085    }
2086}
2087
2088/// An owned iterator over records as raw bytes.
2089pub struct ByteRecordsIntoIter<R> {
2090    rdr: Reader<R>,
2091    rec: ByteRecord,
2092}
2093
2094impl<R: io::Read> ByteRecordsIntoIter<R> {
2095    fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> {
2096        ByteRecordsIntoIter { rdr, rec: ByteRecord::new() }
2097    }
2098
2099    /// Return a reference to the underlying CSV reader.
2100    pub fn reader(&self) -> &Reader<R> {
2101        &self.rdr
2102    }
2103
2104    /// Return a mutable reference to the underlying CSV reader.
2105    pub fn reader_mut(&mut self) -> &mut Reader<R> {
2106        &mut self.rdr
2107    }
2108
2109    /// Drop this iterator and return the underlying CSV reader.
2110    pub fn into_reader(self) -> Reader<R> {
2111        self.rdr
2112    }
2113}
2114
2115impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> {
2116    type Item = Result<ByteRecord>;
2117
2118    fn next(&mut self) -> Option<Result<ByteRecord>> {
2119        match self.rdr.read_byte_record(&mut self.rec) {
2120            Err(err) => Some(Err(err)),
2121            Ok(true) => Some(Ok(self.rec.clone_truncated())),
2122            Ok(false) => None,
2123        }
2124    }
2125}
2126
2127/// A borrowed iterator over records as raw bytes.
2128///
2129/// The lifetime parameter `'r` refers to the lifetime of the underlying
2130/// CSV `Reader`.
2131pub struct ByteRecordsIter<'r, R: 'r> {
2132    rdr: &'r mut Reader<R>,
2133    rec: ByteRecord,
2134}
2135
2136impl<'r, R: io::Read> ByteRecordsIter<'r, R> {
2137    fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> {
2138        ByteRecordsIter { rdr, rec: ByteRecord::new() }
2139    }
2140
2141    /// Return a reference to the underlying CSV reader.
2142    pub fn reader(&self) -> &Reader<R> {
2143        &self.rdr
2144    }
2145
2146    /// Return a mutable reference to the underlying CSV reader.
2147    pub fn reader_mut(&mut self) -> &mut Reader<R> {
2148        &mut self.rdr
2149    }
2150}
2151
2152impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> {
2153    type Item = Result<ByteRecord>;
2154
2155    fn next(&mut self) -> Option<Result<ByteRecord>> {
2156        match self.rdr.read_byte_record(&mut self.rec) {
2157            Err(err) => Some(Err(err)),
2158            Ok(true) => Some(Ok(self.rec.clone_truncated())),
2159            Ok(false) => None,
2160        }
2161    }
2162}
2163
2164#[cfg(test)]
2165mod tests {
2166    use std::io;
2167
2168    use crate::{
2169        byte_record::ByteRecord, error::ErrorKind, string_record::StringRecord,
2170    };
2171
2172    use super::{Position, ReaderBuilder, Trim};
2173
2174    fn b(s: &str) -> &[u8] {
2175        s.as_bytes()
2176    }
2177    fn s(b: &[u8]) -> &str {
2178        ::std::str::from_utf8(b).unwrap()
2179    }
2180
2181    fn newpos(byte: u64, line: u64, record: u64) -> Position {
2182        let mut p = Position::new();
2183        p.set_byte(byte).set_line(line).set_record(record);
2184        p
2185    }
2186
2187    #[test]
2188    fn read_byte_record() {
2189        let data = b("foo,\"b,ar\",baz\nabc,mno,xyz");
2190        let mut rdr =
2191            ReaderBuilder::new().has_headers(false).from_reader(data);
2192        let mut rec = ByteRecord::new();
2193
2194        assert!(rdr.read_byte_record(&mut rec).unwrap());
2195        assert_eq!(3, rec.len());
2196        assert_eq!("foo", s(&rec[0]));
2197        assert_eq!("b,ar", s(&rec[1]));
2198        assert_eq!("baz", s(&rec[2]));
2199
2200        assert!(rdr.read_byte_record(&mut rec).unwrap());
2201        assert_eq!(3, rec.len());
2202        assert_eq!("abc", s(&rec[0]));
2203        assert_eq!("mno", s(&rec[1]));
2204        assert_eq!("xyz", s(&rec[2]));
2205
2206        assert!(!rdr.read_byte_record(&mut rec).unwrap());
2207    }
2208
2209    #[test]
2210    fn read_trimmed_records_and_headers() {
2211        let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2212        let mut rdr = ReaderBuilder::new()
2213            .has_headers(true)
2214            .trim(Trim::All)
2215            .from_reader(data);
2216        let mut rec = ByteRecord::new();
2217        assert!(rdr.read_byte_record(&mut rec).unwrap());
2218        assert_eq!("1", s(&rec[0]));
2219        assert_eq!("2", s(&rec[1]));
2220        assert_eq!("3", s(&rec[2]));
2221        let mut rec = StringRecord::new();
2222        assert!(rdr.read_record(&mut rec).unwrap());
2223        assert_eq!("1", &rec[0]);
2224        assert_eq!("", &rec[1]);
2225        assert_eq!("3", &rec[2]);
2226        {
2227            let headers = rdr.headers().unwrap();
2228            assert_eq!(3, headers.len());
2229            assert_eq!("foo", &headers[0]);
2230            assert_eq!("bar", &headers[1]);
2231            assert_eq!("baz", &headers[2]);
2232        }
2233    }
2234
2235    #[test]
2236    fn read_trimmed_header() {
2237        let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2238        let mut rdr = ReaderBuilder::new()
2239            .has_headers(true)
2240            .trim(Trim::Headers)
2241            .from_reader(data);
2242        let mut rec = ByteRecord::new();
2243        assert!(rdr.read_byte_record(&mut rec).unwrap());
2244        assert_eq!("  1", s(&rec[0]));
2245        assert_eq!("  2", s(&rec[1]));
2246        assert_eq!("  3", s(&rec[2]));
2247        {
2248            let headers = rdr.headers().unwrap();
2249            assert_eq!(3, headers.len());
2250            assert_eq!("foo", &headers[0]);
2251            assert_eq!("bar", &headers[1]);
2252            assert_eq!("baz", &headers[2]);
2253        }
2254    }
2255
2256    #[test]
2257    fn read_trimed_header_invalid_utf8() {
2258        let data = &b"foo,  b\xFFar,\tbaz\na,b,c\nd,e,f"[..];
2259        let mut rdr = ReaderBuilder::new()
2260            .has_headers(true)
2261            .trim(Trim::Headers)
2262            .from_reader(data);
2263        let mut rec = StringRecord::new();
2264
2265        // force the headers to be read
2266        let _ = rdr.read_record(&mut rec);
2267        // Check the byte headers are trimmed
2268        {
2269            let headers = rdr.byte_headers().unwrap();
2270            assert_eq!(3, headers.len());
2271            assert_eq!(b"foo", &headers[0]);
2272            assert_eq!(b"b\xFFar", &headers[1]);
2273            assert_eq!(b"baz", &headers[2]);
2274        }
2275        match *rdr.headers().unwrap_err().kind() {
2276            ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
2277                assert_eq!(pos, &newpos(0, 1, 0));
2278                assert_eq!(err.field(), 1);
2279                assert_eq!(err.valid_up_to(), 3);
2280            }
2281            ref err => panic!("match failed, got {:?}", err),
2282        }
2283    }
2284
2285    #[test]
2286    fn read_trimmed_records() {
2287        let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2288        let mut rdr = ReaderBuilder::new()
2289            .has_headers(true)
2290            .trim(Trim::Fields)
2291            .from_reader(data);
2292        let mut rec = ByteRecord::new();
2293        assert!(rdr.read_byte_record(&mut rec).unwrap());
2294        assert_eq!("1", s(&rec[0]));
2295        assert_eq!("2", s(&rec[1]));
2296        assert_eq!("3", s(&rec[2]));
2297        {
2298            let headers = rdr.headers().unwrap();
2299            assert_eq!(3, headers.len());
2300            assert_eq!("foo", &headers[0]);
2301            assert_eq!("  bar", &headers[1]);
2302            assert_eq!("\tbaz", &headers[2]);
2303        }
2304    }
2305
2306    #[test]
2307    fn read_trimmed_records_without_headers() {
2308        let data = b("a1, b1\t,\t c1\t\n");
2309        let mut rdr = ReaderBuilder::new()
2310            .has_headers(false)
2311            .trim(Trim::All)
2312            .from_reader(data);
2313        let mut rec = ByteRecord::new();
2314        assert!(rdr.read_byte_record(&mut rec).unwrap());
2315        assert_eq!("a1", s(&rec[0]));
2316        assert_eq!("b1", s(&rec[1]));
2317        assert_eq!("c1", s(&rec[2]));
2318    }
2319
2320    #[test]
2321    fn read_record_unequal_fails() {
2322        let data = b("foo\nbar,baz");
2323        let mut rdr =
2324            ReaderBuilder::new().has_headers(false).from_reader(data);
2325        let mut rec = ByteRecord::new();
2326
2327        assert!(rdr.read_byte_record(&mut rec).unwrap());
2328        assert_eq!(1, rec.len());
2329        assert_eq!("foo", s(&rec[0]));
2330
2331        match rdr.read_byte_record(&mut rec) {
2332            Err(err) => match *err.kind() {
2333                ErrorKind::UnequalLengths {
2334                    expected_len: 1,
2335                    ref pos,
2336                    len: 2,
2337                } => {
2338                    assert_eq!(pos, &Some(newpos(4, 2, 1)));
2339                }
2340                ref wrong => panic!("match failed, got {:?}", wrong),
2341            },
2342            wrong => panic!("match failed, got {:?}", wrong),
2343        }
2344    }
2345
2346    #[test]
2347    fn read_record_unequal_ok() {
2348        let data = b("foo\nbar,baz");
2349        let mut rdr = ReaderBuilder::new()
2350            .has_headers(false)
2351            .flexible(true)
2352            .from_reader(data);
2353        let mut rec = ByteRecord::new();
2354
2355        assert!(rdr.read_byte_record(&mut rec).unwrap());
2356        assert_eq!(1, rec.len());
2357        assert_eq!("foo", s(&rec[0]));
2358
2359        assert!(rdr.read_byte_record(&mut rec).unwrap());
2360        assert_eq!(2, rec.len());
2361        assert_eq!("bar", s(&rec[0]));
2362        assert_eq!("baz", s(&rec[1]));
2363
2364        assert!(!rdr.read_byte_record(&mut rec).unwrap());
2365    }
2366
2367    // This tests that even if we get a CSV error, we can continue reading
2368    // if we want.
2369    #[test]
2370    fn read_record_unequal_continue() {
2371        let data = b("foo\nbar,baz\nquux");
2372        let mut rdr =
2373            ReaderBuilder::new().has_headers(false).from_reader(data);
2374        let mut rec = ByteRecord::new();
2375
2376        assert!(rdr.read_byte_record(&mut rec).unwrap());
2377        assert_eq!(1, rec.len());
2378        assert_eq!("foo", s(&rec[0]));
2379
2380        match rdr.read_byte_record(&mut rec) {
2381            Err(err) => match err.kind() {
2382                &ErrorKind::UnequalLengths {
2383                    expected_len: 1,
2384                    ref pos,
2385                    len: 2,
2386                } => {
2387                    assert_eq!(pos, &Some(newpos(4, 2, 1)));
2388                }
2389                wrong => panic!("match failed, got {:?}", wrong),
2390            },
2391            wrong => panic!("match failed, got {:?}", wrong),
2392        }
2393
2394        assert!(rdr.read_byte_record(&mut rec).unwrap());
2395        assert_eq!(1, rec.len());
2396        assert_eq!("quux", s(&rec[0]));
2397
2398        assert!(!rdr.read_byte_record(&mut rec).unwrap());
2399    }
2400
2401    #[test]
2402    fn read_record_headers() {
2403        let data = b("foo,bar,baz\na,b,c\nd,e,f");
2404        let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
2405        let mut rec = StringRecord::new();
2406
2407        assert!(rdr.read_record(&mut rec).unwrap());
2408        assert_eq!(3, rec.len());
2409        assert_eq!("a", &rec[0]);
2410
2411        assert!(rdr.read_record(&mut rec).unwrap());
2412        assert_eq!(3, rec.len());
2413        assert_eq!("d", &rec[0]);
2414
2415        assert!(!rdr.read_record(&mut rec).unwrap());
2416
2417        {
2418            let headers = rdr.byte_headers().unwrap();
2419            assert_eq!(3, headers.len());
2420            assert_eq!(b"foo", &headers[0]);
2421            assert_eq!(b"bar", &headers[1]);
2422            assert_eq!(b"baz", &headers[2]);
2423        }
2424        {
2425            let headers = rdr.headers().unwrap();
2426            assert_eq!(3, headers.len());
2427            assert_eq!("foo", &headers[0]);
2428            assert_eq!("bar", &headers[1]);
2429            assert_eq!("baz", &headers[2]);
2430        }
2431    }
2432
2433    #[test]
2434    fn read_record_headers_invalid_utf8() {
2435        let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..];
2436        let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
2437        let mut rec = StringRecord::new();
2438
2439        assert!(rdr.read_record(&mut rec).unwrap());
2440        assert_eq!(3, rec.len());
2441        assert_eq!("a", &rec[0]);
2442
2443        assert!(rdr.read_record(&mut rec).unwrap());
2444        assert_eq!(3, rec.len());
2445        assert_eq!("d", &rec[0]);
2446
2447        assert!(!rdr.read_record(&mut rec).unwrap());
2448
2449        // Check that we can read the headers as raw bytes, but that
2450        // if we read them as strings, we get an appropriate UTF-8 error.
2451        {
2452            let headers = rdr.byte_headers().unwrap();
2453            assert_eq!(3, headers.len());
2454            assert_eq!(b"foo", &headers[0]);
2455            assert_eq!(b"b\xFFar", &headers[1]);
2456            assert_eq!(b"baz", &headers[2]);
2457        }
2458        match *rdr.headers().unwrap_err().kind() {
2459            ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
2460                assert_eq!(pos, &newpos(0, 1, 0));
2461                assert_eq!(err.field(), 1);
2462                assert_eq!(err.valid_up_to(), 1);
2463            }
2464            ref err => panic!("match failed, got {:?}", err),
2465        }
2466    }
2467
2468    #[test]
2469    fn read_record_no_headers_before() {
2470        let data = b("foo,bar,baz\na,b,c\nd,e,f");
2471        let mut rdr =
2472            ReaderBuilder::new().has_headers(false).from_reader(data);
2473        let mut rec = StringRecord::new();
2474
2475        {
2476            let headers = rdr.headers().unwrap();
2477            assert_eq!(3, headers.len());
2478            assert_eq!("foo", &headers[0]);
2479            assert_eq!("bar", &headers[1]);
2480            assert_eq!("baz", &headers[2]);
2481        }
2482
2483        assert!(rdr.read_record(&mut rec).unwrap());
2484        assert_eq!(3, rec.len());
2485        assert_eq!("foo", &rec[0]);
2486
2487        assert!(rdr.read_record(&mut rec).unwrap());
2488        assert_eq!(3, rec.len());
2489        assert_eq!("a", &rec[0]);
2490
2491        assert!(rdr.read_record(&mut rec).unwrap());
2492        assert_eq!(3, rec.len());
2493        assert_eq!("d", &rec[0]);
2494
2495        assert!(!rdr.read_record(&mut rec).unwrap());
2496    }
2497
2498    #[test]
2499    fn read_record_no_headers_after() {
2500        let data = b("foo,bar,baz\na,b,c\nd,e,f");
2501        let mut rdr =
2502            ReaderBuilder::new().has_headers(false).from_reader(data);
2503        let mut rec = StringRecord::new();
2504
2505        assert!(rdr.read_record(&mut rec).unwrap());
2506        assert_eq!(3, rec.len());
2507        assert_eq!("foo", &rec[0]);
2508
2509        assert!(rdr.read_record(&mut rec).unwrap());
2510        assert_eq!(3, rec.len());
2511        assert_eq!("a", &rec[0]);
2512
2513        assert!(rdr.read_record(&mut rec).unwrap());
2514        assert_eq!(3, rec.len());
2515        assert_eq!("d", &rec[0]);
2516
2517        assert!(!rdr.read_record(&mut rec).unwrap());
2518
2519        let headers = rdr.headers().unwrap();
2520        assert_eq!(3, headers.len());
2521        assert_eq!("foo", &headers[0]);
2522        assert_eq!("bar", &headers[1]);
2523        assert_eq!("baz", &headers[2]);
2524    }
2525
2526    #[test]
2527    fn seek() {
2528        let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2529        let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2530        rdr.seek(newpos(18, 3, 2)).unwrap();
2531
2532        let mut rec = StringRecord::new();
2533
2534        assert_eq!(18, rdr.position().byte());
2535        assert!(rdr.read_record(&mut rec).unwrap());
2536        assert_eq!(3, rec.len());
2537        assert_eq!("d", &rec[0]);
2538
2539        assert_eq!(24, rdr.position().byte());
2540        assert_eq!(4, rdr.position().line());
2541        assert_eq!(3, rdr.position().record());
2542        assert!(rdr.read_record(&mut rec).unwrap());
2543        assert_eq!(3, rec.len());
2544        assert_eq!("g", &rec[0]);
2545
2546        assert!(!rdr.read_record(&mut rec).unwrap());
2547    }
2548
2549    // Test that we can read headers after seeking even if the headers weren't
2550    // explicit read before seeking.
2551    #[test]
2552    fn seek_headers_after() {
2553        let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2554        let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2555        rdr.seek(newpos(18, 3, 2)).unwrap();
2556        assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]);
2557    }
2558
2559    // Test that we can read headers after seeking if the headers were read
2560    // before seeking.
2561    #[test]
2562    fn seek_headers_before_after() {
2563        let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2564        let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2565        let headers = rdr.headers().unwrap().clone();
2566        rdr.seek(newpos(18, 3, 2)).unwrap();
2567        assert_eq!(&headers, rdr.headers().unwrap());
2568    }
2569
2570    // Test that even if we didn't read headers before seeking, if we seek to
2571    // the current byte offset, then no seeking is done and therefore we can
2572    // still read headers after seeking.
2573    #[test]
2574    fn seek_headers_no_actual_seek() {
2575        let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2576        let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2577        rdr.seek(Position::new()).unwrap();
2578        assert_eq!("foo", &rdr.headers().unwrap()[0]);
2579    }
2580
2581    // Test that position info is reported correctly in absence of headers.
2582    #[test]
2583    fn positions_no_headers() {
2584        let mut rdr = ReaderBuilder::new()
2585            .has_headers(false)
2586            .from_reader("a,b,c\nx,y,z".as_bytes())
2587            .into_records();
2588
2589        let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2590        assert_eq!(pos.byte(), 0);
2591        assert_eq!(pos.line(), 1);
2592        assert_eq!(pos.record(), 0);
2593
2594        let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2595        assert_eq!(pos.byte(), 6);
2596        assert_eq!(pos.line(), 2);
2597        assert_eq!(pos.record(), 1);
2598    }
2599
2600    // Test that position info is reported correctly with headers.
2601    #[test]
2602    fn positions_headers() {
2603        let mut rdr = ReaderBuilder::new()
2604            .has_headers(true)
2605            .from_reader("a,b,c\nx,y,z".as_bytes())
2606            .into_records();
2607
2608        let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2609        assert_eq!(pos.byte(), 6);
2610        assert_eq!(pos.line(), 2);
2611        assert_eq!(pos.record(), 1);
2612    }
2613
2614    // Test that reading headers on empty data yields an empty record.
2615    #[test]
2616    fn headers_on_empty_data() {
2617        let mut rdr = ReaderBuilder::new().from_reader("".as_bytes());
2618        let r = rdr.byte_headers().unwrap();
2619        assert_eq!(r.len(), 0);
2620    }
2621
2622    // Test that reading the first record on empty data works.
2623    #[test]
2624    fn no_headers_on_empty_data() {
2625        let mut rdr =
2626            ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
2627        assert_eq!(rdr.records().count(), 0);
2628    }
2629
2630    // Test that reading the first record on empty data works, even if
2631    // we've tried to read headers before hand.
2632    #[test]
2633    fn no_headers_on_empty_data_after_headers() {
2634        let mut rdr =
2635            ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
2636        assert_eq!(rdr.headers().unwrap().len(), 0);
2637        assert_eq!(rdr.records().count(), 0);
2638    }
2639}
csv/reader.rs

csv/
reader.rs