1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
mod earley_parse;
mod cfg;
mod types;
mod nfa_array;
mod boyer_moore;
mod threadpool;
use crate::cfg::parse_regex;
use crate::nfa_array::{Simulation, create_simulation};
use crate::boyer_moore::BM;
use crate::threadpool::Threadpool;
use std::env;
use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
use std::str::FromStr;
use std::cmp;
use std::sync::{Arc, Mutex};
type Match = (usize, Vec<String>);
fn main() {
let args: Vec<String> = env::args().collect();
let regex = &args[1];
let filename = &args[2];
let thread_n: usize = FromStr::from_str(&args[3]).unwrap();
let result= parse_regex(regex);
match result {
| None => println!("The string provided is not a regular expression"),
| Some(ast) =>
{
let new_ast = ast.collapse();
let mut sim = create_simulation(&new_ast);
let prefix = sim.enable_prefix();
let has_prefix = prefix != "";
// let mut line_n = 0;
let all_matches: Arc<Mutex<Vec<Match>>> = Arc::new(Mutex::new(Vec::new()));
if let Ok(lines) = read_lines(filename) {
let pool = Threadpool::new(thread_n);
let mut bm;
if has_prefix {
bm = BM::new(prefix);
}
else {
bm = BM::new(String::from(""));
}
for (line_n, line) in lines.flatten().enumerate() {
let mut sim_thread = sim.clone();
let line_vec: Vec<char> = line.chars().collect();
let all_matches = Arc::clone(&all_matches);
if has_prefix {
let mut bm_thread = bm.clone();
pool.execute(move || {
let results = check_line_with_prefix(&mut sim_thread, &line_vec, line_n, &mut bm_thread);
match results {
None => (),
Some(m) => {
let mut matches_arr = all_matches.lock().unwrap();
matches_arr.push((line_n, m));
}
}
});
}
else {
pool.execute(move || {
let results = check_line(&mut sim_thread, &line_vec, line_n);
match results {
None => (),
Some(m) => {
let mut matches_arr = all_matches.lock().unwrap();
matches_arr.push((line_n, m));
}
}
})
}
}
}
all_matches.lock().unwrap().sort_by(|a, b| compare_match(a, b));
for (line_n, line_matches) in &*all_matches.lock().unwrap() {
for m in line_matches {
println!("{line_n}:{m}");
}
}
},
};
}
fn check_to_end_prefix(sim: &mut Simulation, lm: &mut usize,
i: usize, line: &Vec<char>, line_len: usize) {
// finds the largest string from i to line_len that matches
let mut j = i;
while !sim.stuck() & (j < line_len) {
let c = line[j];
sim.step(c);
if sim.accepts() {
*lm = j;
}
j += 1;
}
}
fn check_to_end(sim: &mut Simulation, lm: &mut usize,
i: usize, line: &Vec<char>, line_len: usize) {
// finds the largest string from i to line_len that matches
let mut j = i;
while !sim.stuck() & (j < line_len) {
let c = line[j];
sim.step(c);
if sim.accepts() {
*lm = j + 1;
}
j += 1;
}
}
fn check_line(sim: &mut Simulation, line: &Vec<char>, line_n: usize) -> Option<Vec<String>> {
// finds matches in a line and prints out result
// LINE:MATCH
let mut lm = 0;
let line_len = line.len();
let mut i = 0;
let mut matches: Vec<String> = Vec::new();
while i < line_len {
sim.reset();
check_to_end(sim, &mut lm, i, &line, line_len);
if lm > i {
let m: String = line[i..lm].into_iter().collect();
matches.push(m);
// println!("{line_n}:{}", m);
i = lm;
}
else {
i += 1;
}
}
if matches.is_empty() {
None
}
else {
Some(matches)
}
}
fn check_line_with_prefix(sim: &mut Simulation, line: &Vec<char>,
line_n: usize, bm: &mut BM) -> Option<Vec<String>> {
// finds matches in a line and prints out result
// using Boyer-Moore Alrgoithm to find matching prefix
// LINE:MATCH
let mut lm = 0;
let line_len = line.len();
let mut i = 0;
let mut matches: Vec<String> = Vec::new();
while i < line_len {
sim.reset();
// prefix idx becomes idx to start search
let mut prefix_idx = 0;
let found_prefix = bm.find(&mut prefix_idx, &line[i..]);
i = i + prefix_idx;
if sim.accepts() {
lm = i - 1;
}
if !found_prefix {
// don't search if prefix not found in line
break;
}
check_to_end_prefix(sim, &mut lm, i, &line, line_len);
if lm >= i - 1{
let m_start = i - bm.pattern_len();
let m: String = line[m_start..=lm].into_iter().collect();
matches.push(m);
// println!("{line_n}:{}", m);
i = lm + 1;
}
else {
i += 1;
}
}
if matches.is_empty() {
None
}
else {
Some(matches)
}
}
fn compare_match((idx_a, _): & Match, (idx_b, _): & Match) -> cmp::Ordering {
(*idx_a).cmp(idx_b)
}
// From Rust by Example Docs:
// https://doc.rust-lang.org/rust-by-example/std_misc/file/read_lines.html
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where P: AsRef<Path>, {
let file = File::open(filename)?;
// bufreader for efficiency
Ok(io::BufReader::new(file).lines())
}