difference/
lcs.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
use std::cmp::max;

// strsplit is like `s.split(split)`, except that if `split` is "", it
// trims the leading and trailing empty elements, since the `lcs`
// logic won't handle those properly.
fn strsplit<'a>(s: &'a str, split: &str) -> Vec<&'a str> {
    let mut si = s.split(split);
    if split == "" {
        si.next();
    }
    let mut v: Vec<&str> = si.collect();
    if split == "" {
        v.pop();
    }
    v
}

// finds the longest common subsequences
// outputs the edit distance and a string containing
// all chars both inputs have in common
//
// This algorithm is based on
// https://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution
#[allow(non_snake_case)]
#[cfg_attr(feature = "cargo-clippy", allow(many_single_char_names))]
pub fn lcs(orig: &str, edit: &str, split: &str) -> (i32, String) {
    // make list by custom splits
    let a = strsplit(orig, split);
    let b = strsplit(edit, split);

    let N = a.len();
    let M = b.len();

    let mut idx: Vec<usize> = Vec::with_capacity(N * M);
    idx.resize(N * M, 0);

    for i in 0..N {
        for j in 0..M {
            if b[j] == a[i] {
                if i == 0 || j == 0 {
                    idx[i * M + j] = 1;
                } else {
                    idx[i * M + j] = idx[(i - 1) * M + j - 1] + 1;
                }
            } else if i == 0 {
                if j == 0 {
                    idx[i * M + j] = 0;
                } else {
                    idx[i * M + j] = idx[i * M + j - 1];
                }
            } else if j == 0 {
                idx[i * M + j] = idx[(i - 1) * M + j];
            } else {
                idx[i * M + j] = max(idx[i * M + j - 1], idx[(i - 1) * M + j]);
            }
        }
    }

    let mut i = (N as isize) - 1;
    let mut j = (M as isize) - 1;
    let mut lcs = Vec::new();
    while i >= 0 && j >= 0 {
        let ui = i as usize;
        let uj = j as usize;
        if a[ui] == b[uj] {
            lcs.push(a[ui]);
            i -= 1;
            j -= 1;
        } else if j == 0 && i == 0 {
            break;
        } else if i == 0 || idx[ui * M + uj - 1] > idx[(ui - 1) * M + uj] {
            j -= 1;
        } else {
            i -= 1;
        }
    }

    lcs.reverse();
    ((N + M - 2 * lcs.len()) as i32, lcs.join(split))
}

#[test]
fn test_lcs() {
    assert_eq!(lcs("test", "tost", ""), (2, "tst".to_string()));
    assert_eq!(lcs("test", "test", ""), (0, "test".to_string()));

    assert_eq!(lcs("test", "test", " "), (0, "test".to_string()));

    assert_eq!(
        lcs(
            "The quick brown fox jumps over the lazy dog",
            "The quick brown dog leaps over the lazy cat",
            "",
        ),
        (16, "The quick brown o ps over the lazy ".to_string())
    );
    assert_eq!(
        lcs(
            "The quick brown fox jumps over the lazy dog",
            "The quick brown dog leaps over the lazy cat",
            " ",
        ),
        (6, "The quick brown over the lazy".to_string())
    );

    assert_eq!(
        lcs(
            "The quick brown fox jumps over the lazy dog",
            "The quick brown dog leaps over the lazy cat",
            "\n",
        ),
        (2, "".to_string())
    );
    assert_eq!(
        lcs(
            "The quick brown fox jumps over the lazy dog",
            "The quick brown fox jumps over the lazy dog",
            "\n",
        ),
        (0, "The quick brown fox jumps over the lazy dog".to_string())
    );

    assert_eq!(
        lcs("a b : c", "b a : b : c", " "),
        (2, "a b : c".to_string())
    );

    assert_eq!(lcs("", "a b c", ""), (5, "".to_string()));

    assert_eq!(lcs("", " a", " "), (1, "".to_string()));
}