1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
use IResult;
use unicode_xid::UnicodeXID;
pub fn whitespace(input: &str) -> IResult<&str, ()> {
if input.is_empty() {
return IResult::Error;
}
let bytes = input.as_bytes();
let mut i = 0;
while i < bytes.len() {
let s = &input[i..];
if bytes[i] == b'/' {
if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
!s.starts_with("//!") {
if let Some(len) = s.find('\n') {
i += len + 1;
continue;
}
break;
} else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) &&
!s.starts_with("/*!") {
match block_comment(s) {
IResult::Done(_, com) => {
i += com.len();
continue;
}
IResult::Error => {
return IResult::Error;
}
}
}
}
match bytes[i] {
b' ' | 0x09...0x0d => {
i += 1;
continue;
}
b if b <= 0x7f => {}
_ => {
let ch = s.chars().next().unwrap();
if is_whitespace(ch) {
i += ch.len_utf8();
continue;
}
}
}
return if i > 0 {
IResult::Done(s, ())
} else {
IResult::Error
};
}
IResult::Done("", ())
}
pub fn block_comment(input: &str) -> IResult<&str, &str> {
if !input.starts_with("/*") {
return IResult::Error;
}
let mut depth = 0;
let bytes = input.as_bytes();
let mut i = 0;
let upper = bytes.len() - 1;
while i < upper {
if bytes[i] == b'/' && bytes[i + 1] == b'*' {
depth += 1;
i += 1;
} else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
depth -= 1;
if depth == 0 {
return IResult::Done(&input[i + 2..], &input[..i + 2]);
}
i += 1;
}
i += 1;
}
IResult::Error
}
pub fn word_break(input: &str) -> IResult<&str, ()> {
match input.chars().next() {
Some(ch) if UnicodeXID::is_xid_continue(ch) => IResult::Error,
Some(_) | None => IResult::Done(input, ()),
}
}
pub fn skip_whitespace(input: &str) -> &str {
match whitespace(input) {
IResult::Done(rest, _) => rest,
IResult::Error => input,
}
}
fn is_whitespace(ch: char) -> bool {
ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
}