1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
// Copyright 2023 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

use linux_uapi::{
    bpf_insn, sock_filter, BPF_A, BPF_ABS, BPF_ADD, BPF_ALU, BPF_AND, BPF_DIV, BPF_EXIT, BPF_IMM,
    BPF_JA, BPF_JMP, BPF_JMP32, BPF_K, BPF_LD, BPF_LDX, BPF_LSH, BPF_MEM, BPF_MISC, BPF_MOV,
    BPF_MUL, BPF_NEG, BPF_OR, BPF_RET, BPF_RSH, BPF_ST, BPF_SUB, BPF_TAX, BPF_TXA, BPF_X, BPF_XOR,
};
use std::collections::HashMap;

use crate::{EbpfError, EbpfError::*};

// These are accessors for bits in an BPF/EBPF instruction.
// Instructions are encoded in one byte.  The first 3 LSB represent
// the operation, and the other bits represent various modifiers.
// Brief comments are given to indicate what the functions broadly
// represent, but for the gory detail, consult a detailed guide to
// BPF, like the one at https://docs.kernel.org/bpf/instruction-set.html

/// The bpf_class is the instruction type.(e.g., load/store/jump/ALU).
pub fn bpf_class(filter: &sock_filter) -> u32 {
    (filter.code & 0x07).into()
}

/// The bpf_size is the 4th and 5th bit of load and store
/// instructions.  It indicates the bit width of the load / store
/// target (8, 16, 32, 64 bits).
fn bpf_size(filter: &sock_filter) -> u32 {
    (filter.code & 0x18).into()
}

/// The addressing mode is the most significant three bits of load and
/// store instructions.  They indicate whether the instrution accesses a
/// constant, accesses from memory, or accesses from memory atomically.
pub fn bpf_addressing_mode(filter: &sock_filter) -> u32 {
    (filter.code & 0xe0).into()
}

/// Modifiers for jumps and alu operations.  For example, a jump can
/// be jeq, jtl, etc.  An ALU operation can be plus, minus, divide,
/// etc.
fn bpf_op(filter: &sock_filter) -> u32 {
    (filter.code & 0xf0).into()
}

/// The source for the operation (either a register or an immediate).
fn bpf_src(filter: &sock_filter) -> u32 {
    (filter.code & 0x08).into()
}

/// Similar to bpf_src, but also allows BPF_A - used for RET.
fn bpf_rval(filter: &sock_filter) -> u32 {
    (filter.code & 0x18).into()
}

// For details on this function, see to_be_patched in the cbpf_to_ebpf converter.
fn prep_patch(to_be_patched: &mut HashMap<i16, Vec<usize>>, cbpf_target: i16, ebpf_source: usize) {
    to_be_patched.entry(cbpf_target).or_insert_with(std::vec::Vec::new);
    to_be_patched.get_mut(&cbpf_target).unwrap().push(ebpf_source);
}

/// Transforms a program in classic BPF (cbpf, as stored in struct
/// sock_filter) to extended BPF (as stored in struct bpf_insn).
/// The bpf_code parameter is kept as an array for easy transfer
/// via FFI.  This currently only allows the subset of BPF permitted
/// by seccomp(2).
pub(crate) fn cbpf_to_ebpf(bpf_code: &[sock_filter]) -> Result<Vec<bpf_insn>, EbpfError> {
    // There are only two BPF registers, A and X. There are 10
    // EBPF registers, numbered 0-9.  We map between the two as
    // follows:

    // r[0]: We map this to A, since it can be used as a return value.
    // r[1]: ebpf makes this the memory passed in,
    // r[2]: ebpf makes this the length of the memory passed in.
    // r[6]: We map this to X, to keep it away from being clobbered by call / return.
    // r[7]: Scratch register, replaces M[0]
    //       cbpf programs running on Linux have a 16 word scratch memory.  ebpf
    //       only has scratch registers and the data passed in.  For now, we use
    //       the extra registers and hope that no one needs more than 3 of them.
    //       Can be replaced by allocating the incoming data into a buffer that
    //       has an extra 16 words, and using those words.  However, this makes an
    //       awkward API, and we don't know of existing use cases for that much
    //       scratch memory.
    // r[8]: Scratch register, replaces M[1]
    // r[9]: Scratch register, replaces M[2]
    // r[10]: ebpf makes this a pointer to the end of the stack.

    const REG_A: u8 = 0;
    const REG_X: u8 = 6;

    // Map from jump targets in the cbpf to a list of jump
    // instructions in the epbf that target it.  When you figure
    // out what the offset of the target is in the ebpf, you need
    // to patch the jump instructions to target it correctly.
    let mut to_be_patched: HashMap<i16, Vec<usize>> = HashMap::new();

    let mut ebpf_code: Vec<bpf_insn> = vec![];
    for (i, bpf_instruction) in bpf_code.iter().enumerate() {
        match bpf_class(bpf_instruction) {
            BPF_ALU => match bpf_op(bpf_instruction) {
                BPF_ADD | BPF_SUB | BPF_MUL | BPF_DIV | BPF_AND | BPF_OR | BPF_XOR | BPF_LSH
                | BPF_RSH => {
                    let mut e_instr = bpf_insn {
                        code: (BPF_ALU | bpf_op(bpf_instruction) | bpf_src(bpf_instruction)) as u8,
                        ..Default::default()
                    };
                    e_instr.set_dst_reg(REG_A);
                    if bpf_src(bpf_instruction) == BPF_K {
                        e_instr.imm = bpf_instruction.k as i32;
                    } else {
                        e_instr.set_src_reg(REG_X);
                    }
                    ebpf_code.push(e_instr);
                }
                BPF_NEG => {
                    let mut e_instr =
                        bpf_insn { code: (BPF_ALU | BPF_NEG) as u8, ..Default::default() };
                    e_instr.set_src_reg(REG_A);
                    e_instr.set_dst_reg(REG_A);
                    ebpf_code.push(e_instr);
                }
                _ => {
                    return Err(UnrecognizedCbpfError {
                        element_type: "op".to_string(),
                        value: format!("{}", bpf_op(bpf_instruction)),
                        op: "alu".to_string(),
                    });
                }
            },
            BPF_LD => {
                match bpf_addressing_mode(bpf_instruction) {
                    BPF_ABS => {
                        // A load from a given address maps in a
                        // very straightforward way.
                        let mut e_instr = bpf_insn {
                            code: (BPF_LDX | BPF_MEM | bpf_size(bpf_instruction)) as u8,
                            ..Default::default()
                        };
                        e_instr.set_dst_reg(REG_A);
                        e_instr.set_src_reg(1);
                        e_instr.off = bpf_instruction.k as i16;
                        ebpf_code.push(e_instr);
                    }
                    BPF_IMM => {
                        let mut e_instr = bpf_insn {
                            code: (BPF_LDX | BPF_IMM | bpf_size(bpf_instruction)) as u8,
                            imm: bpf_instruction.k as i32,
                            ..Default::default()
                        };
                        e_instr.set_dst_reg(REG_A);
                        ebpf_code.push(e_instr);
                    }
                    BPF_MEM => {
                        // cbpf programs running on Linux have a 16 word scratch memory.  ebpf
                        // only has scratch registers and the data passed in.  For now, we use
                        // the extra registers and hope that no one needs more than 3 of them.
                        // Can be replaced by allocating the incoming data into a buffer that
                        // has an extra 16 words, and using those words.  However, this makes an
                        // awkward API, and we don't know of existing use cases for that much
                        // scratch memory.
                        if bpf_instruction.k > 2 {
                            return Err(ScratchBufferOverflow);
                        }
                        let mut e_instr = bpf_insn {
                            code: (BPF_ALU | BPF_MOV | BPF_X) as u8,
                            ..Default::default()
                        };
                        e_instr.set_dst_reg(REG_A);
                        // See comment on reg 7 above.
                        e_instr.set_src_reg((bpf_instruction.k + 7) as u8);
                        ebpf_code.push(e_instr);
                    }
                    _ => {
                        return Err(UnrecognizedCbpfError {
                            element_type: "mode".to_string(),
                            value: format!("{}", bpf_addressing_mode(bpf_instruction)),
                            op: "ld".to_string(),
                        });
                    }
                }
            }
            BPF_JMP => {
                match bpf_op(bpf_instruction) {
                    BPF_JA => {
                        let j_instr = bpf_insn {
                            code: (BPF_JMP | BPF_JA | bpf_src(bpf_instruction)) as u8,
                            off: bpf_instruction.k as i16,
                            ..Default::default()
                        };
                        prep_patch(&mut to_be_patched, (i as i16) + j_instr.off, ebpf_code.len());
                        ebpf_code.push(j_instr);
                    }
                    _ => {
                        // In cbpf, jmps have a jump-if-true and
                        // jump-if-false branch.  ebpf only has
                        // jump-if-true.  Every cbpf jmp therefore turns
                        // into two instructions: a jmp equivalent to the
                        // original jump-if-true; if the condition
                        // evaluates to false, which falls on failure to
                        // an unconditional jump to the original
                        // jump-if-false target.
                        let mut jt_instr = bpf_insn {
                            code: (BPF_JMP32 | bpf_op(bpf_instruction) | bpf_src(bpf_instruction))
                                as u8,
                            off: bpf_instruction.jt as i16,
                            ..Default::default()
                        };
                        if bpf_src(bpf_instruction) == BPF_K {
                            jt_instr.imm = bpf_instruction.k as i32;
                        } else {
                            jt_instr.set_src_reg(REG_X);
                        }
                        jt_instr.set_dst_reg(REG_A);
                        prep_patch(&mut to_be_patched, (i as i16) + jt_instr.off, ebpf_code.len());

                        ebpf_code.push(jt_instr);

                        // Jump if false
                        let jf_instr = bpf_insn {
                            code: (BPF_JMP | BPF_JA) as u8,
                            off: bpf_instruction.jf as i16,
                            ..Default::default()
                        };
                        prep_patch(&mut to_be_patched, (i as i16) + jf_instr.off, ebpf_code.len());

                        ebpf_code.push(jf_instr);
                    }
                }
            }
            BPF_MISC => {
                let mut e_instr =
                    bpf_insn { code: (BPF_ALU | BPF_MOV | BPF_X) as u8, ..Default::default() };

                match bpf_op(bpf_instruction) {
                    BPF_TAX => {
                        e_instr.set_src_reg(REG_A);
                        e_instr.set_dst_reg(REG_X);
                    }
                    BPF_TXA => {
                        e_instr.set_src_reg(REG_X);
                        e_instr.set_dst_reg(REG_A);
                    }
                    _ => {
                        return Err(UnrecognizedCbpfError {
                            element_type: "op".to_string(),
                            value: format!("{}", bpf_op(bpf_instruction)),
                            op: "misc".to_string(),
                        });
                    }
                }
                ebpf_code.push(e_instr);
            }

            BPF_ST => {
                match bpf_addressing_mode(bpf_instruction) {
                    BPF_IMM => {
                        // Only one addressing mode, because there is only one possible destination type.
                        if bpf_instruction.k > 2 {
                            return Err(ScratchBufferOverflow);
                        }
                        let mut e_instr = bpf_insn {
                            code: (BPF_ALU | BPF_MOV | BPF_X) as u8,
                            ..Default::default()
                        };
                        e_instr.set_dst_reg((bpf_instruction.k + 7) as u8); // See comment on reg 7 above.
                        e_instr.set_src_reg(REG_A);
                        ebpf_code.push(e_instr);
                    }
                    _ => {
                        return Err(UnrecognizedCbpfError {
                            element_type: "mode".to_string(),
                            value: format!("{}", bpf_addressing_mode(bpf_instruction)),
                            op: "st".to_string(),
                        });
                    }
                }
            }
            BPF_RET => {
                if bpf_rval(bpf_instruction) != BPF_K && bpf_rval(bpf_instruction) != BPF_A {
                    return Err(UnrecognizedCbpfError {
                        element_type: "mode".to_string(),
                        value: format!("{}", bpf_addressing_mode(bpf_instruction)),
                        op: "ret".to_string(),
                    });
                }
                if bpf_rval(bpf_instruction) == BPF_K {
                    // We're returning a particular value instead of the contents
                    // of the return register, so load that value into the return
                    // register
                    let mut ld_instr = bpf_insn {
                        code: (BPF_ALU | BPF_MOV | BPF_IMM) as u8,
                        imm: bpf_instruction.k as i32,
                        ..Default::default()
                    };
                    ld_instr.set_dst_reg(REG_A);
                    ebpf_code.push(ld_instr);
                }

                let ret_instr = bpf_insn { code: (BPF_JMP | BPF_EXIT) as u8, ..Default::default() };

                ebpf_code.push(ret_instr);
            }
            _ => {
                return Err(UnrecognizedCbpfError {
                    element_type: "class".to_string(),
                    value: format!("{}", bpf_class(bpf_instruction)),
                    op: "???".to_string(),
                });
            }
        }
        if to_be_patched.contains_key(&(i as i16)) {
            for idx in to_be_patched.get(&(i as i16)).unwrap() {
                ebpf_code[*idx].off = (ebpf_code.len() - *idx - 1) as i16;
            }
            to_be_patched.remove(&(i as i16));
        }
    }
    Ok(ebpf_code)
}