1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
use IResult;
use unicode_xid::UnicodeXID;

pub fn whitespace(input: &str) -> IResult<&str, ()> {
    if input.is_empty() {
        return IResult::Error;
    }

    let bytes = input.as_bytes();
    let mut i = 0;
    while i < bytes.len() {
        let s = &input[i..];
        if bytes[i] == b'/' {
            if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
               !s.starts_with("//!") {
                if let Some(len) = s.find('\n') {
                    i += len + 1;
                    continue;
                }
                break;
            } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) &&
                      !s.starts_with("/*!") {
                match block_comment(s) {
                    IResult::Done(_, com) => {
                        i += com.len();
                        continue;
                    }
                    IResult::Error => {
                        return IResult::Error;
                    }
                }
            }
        }
        match bytes[i] {
            b' ' | 0x09...0x0d => {
                i += 1;
                continue;
            }
            b if b <= 0x7f => {}
            _ => {
                let ch = s.chars().next().unwrap();
                if is_whitespace(ch) {
                    i += ch.len_utf8();
                    continue;
                }
            }
        }
        return if i > 0 {
            IResult::Done(s, ())
        } else {
            IResult::Error
        };
    }
    IResult::Done("", ())
}

pub fn block_comment(input: &str) -> IResult<&str, &str> {
    if !input.starts_with("/*") {
        return IResult::Error;
    }

    let mut depth = 0;
    let bytes = input.as_bytes();
    let mut i = 0;
    let upper = bytes.len() - 1;
    while i < upper {
        if bytes[i] == b'/' && bytes[i + 1] == b'*' {
            depth += 1;
            i += 1; // eat '*'
        } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
            depth -= 1;
            if depth == 0 {
                return IResult::Done(&input[i + 2..], &input[..i + 2]);
            }
            i += 1; // eat '/'
        }
        i += 1;
    }
    IResult::Error
}

pub fn word_break(input: &str) -> IResult<&str, ()> {
    match input.chars().next() {
        Some(ch) if UnicodeXID::is_xid_continue(ch) => IResult::Error,
        Some(_) | None => IResult::Done(input, ()),
    }
}

pub fn skip_whitespace(input: &str) -> &str {
    match whitespace(input) {
        IResult::Done(rest, _) => rest,
        IResult::Error => input,
    }
}

fn is_whitespace(ch: char) -> bool {
    // Rust treats left-to-right mark and right-to-left mark as whitespace
    ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
}