itertools/
kmerge_impl.rs

1
2use size_hint;
3use Itertools;
4
5use std::mem::replace;
6use std::fmt;
7
8macro_rules! clone_fields {
9    ($name:ident, $base:expr, $($field:ident),+) => (
10        $name {
11            $(
12                $field : $base . $field .clone()
13            ),*
14        }
15    );
16}
17
18/// Head element and Tail iterator pair
19///
20/// `PartialEq`, `Eq`, `PartialOrd` and `Ord` are implemented by comparing sequences based on
21/// first items (which are guaranteed to exist).
22///
23/// The meanings of `PartialOrd` and `Ord` are reversed so as to turn the heap used in
24/// `KMerge` into a min-heap.
25#[derive(Debug)]
26struct HeadTail<I>
27    where I: Iterator
28{
29    head: I::Item,
30    tail: I,
31}
32
33impl<I> HeadTail<I>
34    where I: Iterator
35{
36    /// Constructs a `HeadTail` from an `Iterator`. Returns `None` if the `Iterator` is empty.
37    fn new(mut it: I) -> Option<HeadTail<I>> {
38        let head = it.next();
39        head.map(|h| {
40            HeadTail {
41                head: h,
42                tail: it,
43            }
44        })
45    }
46
47    /// Get the next element and update `head`, returning the old head in `Some`.
48    ///
49    /// Returns `None` when the tail is exhausted (only `head` then remains).
50    fn next(&mut self) -> Option<I::Item> {
51        if let Some(next) = self.tail.next() {
52            Some(replace(&mut self.head, next))
53        } else {
54            None
55        }
56    }
57
58    /// Hints at the size of the sequence, same as the `Iterator` method.
59    fn size_hint(&self) -> (usize, Option<usize>) {
60        size_hint::add_scalar(self.tail.size_hint(), 1)
61    }
62}
63
64impl<I> Clone for HeadTail<I>
65    where I: Iterator + Clone,
66          I::Item: Clone
67{
68    fn clone(&self) -> Self {
69        clone_fields!(HeadTail, self, head, tail)
70    }
71}
72
73/// Make `data` a heap (min-heap w.r.t the sorting).
74fn heapify<T, S>(data: &mut [T], mut less_than: S)
75    where S: FnMut(&T, &T) -> bool
76{
77    for i in (0..data.len() / 2).rev() {
78        sift_down(data, i, &mut less_than);
79    }
80}
81
82/// Sift down element at `index` (`heap` is a min-heap wrt the ordering)
83fn sift_down<T, S>(heap: &mut [T], index: usize, mut less_than: S)
84    where S: FnMut(&T, &T) -> bool
85{
86    debug_assert!(index <= heap.len());
87    let mut pos = index;
88    let mut child = 2 * pos + 1;
89    // the `pos` conditional is to avoid a bounds check
90    while pos < heap.len() && child < heap.len() {
91        let right = child + 1;
92
93        // pick the smaller of the two children
94        if right < heap.len() && less_than(&heap[right], &heap[child]) {
95            child = right;
96        }
97
98        // sift down is done if we are already in order
99        if !less_than(&heap[child], &heap[pos]) {
100            return;
101        }
102        heap.swap(pos, child);
103        pos = child;
104        child = 2 * pos + 1;
105    }
106}
107
108/// An iterator adaptor that merges an abitrary number of base iterators in ascending order.
109/// If all base iterators are sorted (ascending), the result is sorted.
110///
111/// Iterator element type is `I::Item`.
112///
113/// See [`.kmerge()`](../trait.Itertools.html#method.kmerge) for more information.
114#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
115pub struct KMerge<I>
116    where I: Iterator
117{
118    heap: Vec<HeadTail<I>>,
119}
120
121impl<I> fmt::Debug for KMerge<I>
122    where I: Iterator + fmt::Debug,
123          I::Item: fmt::Debug,
124{
125    debug_fmt_fields!(KMerge, heap);
126}
127
128/// Create an iterator that merges elements of the contained iterators using
129/// the ordering function.
130///
131/// Equivalent to `iterable.into_iter().kmerge()`.
132///
133/// ```
134/// use itertools::kmerge;
135///
136/// for elt in kmerge(vec![vec![0, 2, 4], vec![1, 3, 5], vec![6, 7]]) {
137///     /* loop body */
138/// }
139/// ```
140pub fn kmerge<I>(iterable: I) -> KMerge<<I::Item as IntoIterator>::IntoIter>
141    where I: IntoIterator,
142          I::Item: IntoIterator,
143          <<I as IntoIterator>::Item as IntoIterator>::Item: PartialOrd
144{
145    let iter = iterable.into_iter();
146    let (lower, _) = iter.size_hint();
147    let mut heap = Vec::with_capacity(lower);
148    heap.extend(iter.filter_map(|it| HeadTail::new(it.into_iter())));
149    heapify(&mut heap, |a, b| a.head < b.head);
150    KMerge { heap: heap }
151}
152
153impl<I> Clone for KMerge<I>
154    where I: Iterator + Clone,
155          I::Item: Clone
156{
157    fn clone(&self) -> KMerge<I> {
158        clone_fields!(KMerge, self, heap)
159    }
160}
161
162impl<I> Iterator for KMerge<I>
163    where I: Iterator,
164          I::Item: PartialOrd
165{
166    type Item = I::Item;
167
168    fn next(&mut self) -> Option<Self::Item> {
169        if self.heap.is_empty() {
170            return None;
171        }
172        let result = if let Some(next) = self.heap[0].next() {
173            next
174        } else {
175            self.heap.swap_remove(0).head
176        };
177        sift_down(&mut self.heap, 0, |a, b| a.head < b.head);
178        Some(result)
179    }
180
181    fn size_hint(&self) -> (usize, Option<usize>) {
182        self.heap.iter()
183                 .map(|i| i.size_hint())
184                 .fold1(size_hint::add)
185                 .unwrap_or((0, Some(0)))
186    }
187}
188
189/// An iterator adaptor that merges an abitrary number of base iterators
190/// according to an ordering function.
191///
192/// Iterator element type is `I::Item`.
193///
194/// See [`.kmerge_by()`](../trait.Itertools.html#method.kmerge_by) for more
195/// information.
196#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
197pub struct KMergeBy<I, F>
198    where I: Iterator,
199{
200    heap: Vec<HeadTail<I>>,
201    less_than: F,
202}
203
204impl<I, F> fmt::Debug for KMergeBy<I, F>
205    where I: Iterator + fmt::Debug,
206          I::Item: fmt::Debug,
207{
208    debug_fmt_fields!(KMergeBy, heap);
209}
210
211/// Create an iterator that merges elements of the contained iterators.
212///
213/// Equivalent to `iterable.into_iter().kmerge_by(less_than)`.
214pub fn kmerge_by<I, F>(iterable: I, mut less_than: F)
215    -> KMergeBy<<I::Item as IntoIterator>::IntoIter, F>
216    where I: IntoIterator,
217          I::Item: IntoIterator,
218          F: FnMut(&<<I as IntoIterator>::Item as IntoIterator>::Item,
219                   &<<I as IntoIterator>::Item as IntoIterator>::Item) -> bool
220{
221    let iter = iterable.into_iter();
222    let (lower, _) = iter.size_hint();
223    let mut heap: Vec<_> = Vec::with_capacity(lower);
224    heap.extend(iter.filter_map(|it| HeadTail::new(it.into_iter())));
225    heapify(&mut heap, |a, b| less_than(&a.head, &b.head));
226    KMergeBy { heap: heap, less_than: less_than }
227}
228
229
230impl<I, F> Iterator for KMergeBy<I, F>
231    where I: Iterator,
232          F: FnMut(&I::Item, &I::Item) -> bool
233{
234    type Item = I::Item;
235
236    fn next(&mut self) -> Option<Self::Item> {
237        if self.heap.is_empty() {
238            return None;
239        }
240        let result = if let Some(next) = self.heap[0].next() {
241            next
242        } else {
243            self.heap.swap_remove(0).head
244        };
245        let less_than = &mut self.less_than;
246        sift_down(&mut self.heap, 0, |a, b| less_than(&a.head, &b.head));
247        Some(result)
248    }
249
250    fn size_hint(&self) -> (usize, Option<usize>) {
251        self.heap.iter()
252                 .map(|i| i.size_hint())
253                 .fold1(size_hint::add)
254                 .unwrap_or((0, Some(0)))
255    }
256}