Commit 45066a36 authored by Angel Avelar's avatar Angel Avelar
Browse files

adding files

parents
No related merge requests found
Showing with 3093 additions and 0 deletions
+3093 -0
This diff is collapsed.
File added
This diff is collapsed.
File added
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "Debug",
"program": "${workspaceFolder}/target/debug/grep",
"args": ["aba", "./test.txt"],
"cwd": "${workspaceFolder}"
}
]
}
\ No newline at end of file
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "grep-parallel"
version = "0.1.0"
[package]
name = "grep-parallel"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
\ No newline at end of file
# For test compatibility, your makefile should include 'bin/grep' as a target
# that builds with optimizations on. For C, Makefiles will vary.
# Here is a basic example of how to do this for a rust project:
bin/grep-parallel: $(shell find src)
mkdir -p bin
cargo build --release
cp target/release/grep-parallel $@
# A 'clean' target is expected as well
clean:
rm -f bin/*
.PHONY: clean
File added
const LAST: usize = 0x7E - 0x20;
const FIRST: usize = 0x20;
const NULL: usize = LAST + 1;
#[derive(Clone)]
pub struct BM {
last_t: Vec<Vec<usize>>,
pattern: Vec<char>,
pattern_idx: usize,
pattern_len: usize,
line_idx: usize,
line_len: usize,
}
pub fn to_idx(c: char) -> usize {
if c == '\t' {
LAST
}
else if (c >=' ') & (c <= '~'){
(c as usize) - FIRST
}
else {
NULL
}
}
impl BM {
pub fn new(pattern_str: String) -> Self {
let pattern: Vec<char> = pattern_str.chars().collect();
let l = pattern.len();
if l == 0 {
BM {last_t: Vec::new(),
pattern: pattern, pattern_idx: 0, pattern_len: 0,
line_idx: 0, line_len: 0
}
}
else {
let mut new = BM {last_t: Vec::new(),
pattern: pattern, pattern_idx: *&l-1, pattern_len: l,
line_idx: 0, line_len: 0
};
new.fill_last_table();
new
}
}
pub fn fill_last_table(&mut self) {
// creates a table where t[i, j] is the last occurance
// of char i to the left of j on the pattern
// list of last idx for each char as we traverse the pattern
let mut last: Vec<usize> = Vec::new();
for _ in ' '..='~' {
self.last_t.push(Vec::new());
last.push(NULL);
}
// for horizontal tab
self.last_t.push(Vec::new());
last.push(NULL);
for (i, c) in self.pattern.iter().enumerate() {
for a in ' '..='~' {
self.last_t[to_idx(a)].push(last[to_idx(a)]);
if a == *c {
last[to_idx(a)] = i;
}
}
self.last_t[to_idx('\t')].push(last[to_idx('\t')]);
if '\t' == *c {
last[to_idx('\t')] = i;
}
}
}
pub fn bad_compare(&mut self, match_idx: &mut usize, line: &[char]) -> Option<bool> {
if self.line_idx >= self.line_len {
return None
}
if line[self.line_idx] == self.pattern[self.pattern_idx] {
if self.pattern_idx == 0 {
// match
*match_idx = self.line_idx + self.pattern_len;
Some(true)
}
else {
self.pattern_idx -= 1;
self.line_idx -= 1;
Some(false)
}
}
else {
let line_char_idx = to_idx(line[self.line_idx]);
if line_char_idx == NULL {
// println!("Chastized char: {}", line[self.line_idx]);
self.line_idx = self.line_idx + self.pattern_len;
self.pattern_idx = self.pattern_len - 1;
if self.line_idx >= self.line_len {
// searched the entire line
return None;
}
else {
return Some(false);
}
}
if self.last_t[line_char_idx][self.pattern_idx] == NULL {
// there is no matching char to the left
self.line_idx = self.line_idx + self.pattern_len;
self.pattern_idx = self.pattern_len - 1;
if self.line_idx >= self.line_len {
// searched the entire line
None
}
else {
Some(false)
}
}
else {
// shift pattern to align next occurance of line char
self.line_idx = self.line_idx
+ self.pattern_len - 1
- self.last_t[line_char_idx][self.pattern_idx];
self.pattern_idx = self.pattern_len - 1;
Some(false)
}
}
}
pub fn find(&mut self, match_idx: &mut usize, line: &[char]) -> bool{
if self.pattern_len > 0 {
let l = line.len();
// self.line = line;
self.line_len = l;
self.line_idx = self.pattern_len - 1;
// set pattern to be at last char
self.pattern_idx = self.pattern_len - 1;
while let Some(found_pattern) = self.bad_compare(match_idx, &line) {
if found_pattern {
return true;
};
}
false
}
else {
panic!("empty prefix");
}
}
pub fn pattern_len(&self) -> usize {
self.pattern_len
}
pub fn show_table(&self, char_lst: Vec<char>) {
for c in char_lst {
let a_row = &self.last_t[to_idx(c)];
print!("{}:", c);
for c in a_row {
print!(" {}", c);
}
println!("");
}
}
}
pub fn z_algorithm(input: &Vec<char>) -> Vec<usize> {
let mut z_arr = Vec::new();
z_arr.push(0);
let mut r = 0;
let mut l = 0;
let len = input.len();
for idx in 1..len {
if idx > r {
// loop until mismatch
let mut z = 0;
while input[z] == input[idx + z] {
z += 1;
if idx + z >= len {
break;
}
}
if z > 0 {
// update box boundaries
r = idx + z - 1;
l = idx;
}
z_arr.push(z);
}
else {
// inside box
let prefix_z = z_arr[idx-l];
let box_left = r - idx + 1;
if prefix_z < box_left {
z_arr.push(prefix_z);
}
else {
// match outside box
let mut z = 1;
if r + z < len {
while input[box_left-1+z] == input[r+z] {
z += 1;
if r + z >= len {
break;
}
}
}
if z > 1 {
// new match plus size of previous
z_arr.push(z+box_left-1);
r += z - 1;
l = idx;
}
else {
// same as previous
z_arr.push(box_left);
}
}
}
// println!("{idx}: ({l}, {r}) -> {:?}", z_arr);
}
z_arr
}
pub fn suffix_t(input: &Vec<char>) -> Vec<usize> {
let len = input.len();
let mut arr: Vec<usize> = Vec::new();
for _ in 0..len {
arr.push(0);
}
let mut reverse_input = input.clone();
reverse_input.reverse();
let mut longest_suffix = z_algorithm(&reverse_input);
longest_suffix.reverse();
for (j, z) in longest_suffix.iter().enumerate() {
let i = len - z;
if i != len {
arr[i] = j;
}
}
arr
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn bad_char_fill() {
let pattern = String::from("aba");
let mut bm = BM::new(pattern);
let mut b = 0;
let str_vec: Vec<char> = String::from("abababa").chars().collect();
if bm.find(&mut b, &str_vec[..]) {println!("Found it at {b}");}
}
#[test]
fn z_algo() {
let input: Vec<char> = String::from("aabcaabxaaaz").chars().collect();
let z_arr = z_algorithm(&input);
assert_eq!(z_arr, vec![0, 1, 0, 0, 3, 1, 0, 0, 2, 2, 1, 0]);
let input2: Vec<char> = String::from("abababaabab").chars().collect();
let z_arr2 = z_algorithm(&input2);
assert_eq!(z_arr2, vec![0, 0, 5, 0, 3, 0, 1, 4, 0, 2, 0]);
let input3: Vec<char> = String::from("aaaaa").chars().collect();
let z_arr3 = z_algorithm(&input3);
assert_eq!(z_arr3, vec![0, 4, 3, 2, 1]);
let input4: Vec<char> = String::from("aabaabxaaz").chars().collect();
let z_arr4 = z_algorithm(&input4);
assert_eq!(z_arr4, vec![0, 1, 0, 3, 1, 0, 0, 2, 1, 0]);
// println!("{:?}", z_arr2);
let input5: Vec<char> = String::from("pnampnam").chars().collect();
let z_arr5 = suffix_t(&input5);
println!("{:?}", z_arr5);
}
}
\ No newline at end of file
use crate::earley_parse::{CFG, ASTNode, nt, tr};
fn regex_cfg() -> CFG{
let mut g = CFG::new("UNION");
// union
g.add_rule("UNION", vec![nt("UNION"), tr('|'), nt("CONCAT")]);
g.add_rule("UNION", vec![nt("CONCAT")]);
// concatenation
g.add_rule("CONCAT", vec![nt("CONCAT"), nt("Q")]);
g.add_rule("CONCAT", vec![nt("Q")]);
// one or zero
g.add_rule("Q", vec![nt("PLUS"), tr('?')]);
g.add_rule("Q", vec![nt("PLUS")]);
// one or more
g.add_rule("PLUS", vec![nt("STAR"), tr('+')]);
g.add_rule("PLUS", vec![nt("STAR")]);
// zero or more
g.add_rule("STAR", vec![nt("EXPR"), tr('*')]);
g.add_rule("STAR", vec![nt("EXPR")]);
// parenthesis
g.add_rule("EXPR", vec![tr('('), nt("UNION"), tr(')')]);
g.add_rule("EXPR", vec![nt("CHAR")]);
// literals
g.add_rule("CHAR", vec![nt("DIGIT")]);
g.add_rule("CHAR", vec![nt("LETTER")]);
g.add_rule("CHAR", vec![nt("WHITESPACE")]);
g.add_rule("CHAR", vec![nt("NON-DIGIT")]);
g.add_rule("CHAR", vec![nt("NON-LETTER")]);
g.add_rule("CHAR", vec![nt("NON-WHITESPACE")]);
g.add_rule("CHAR", vec![nt("SPECIAL")]);
g.add_rule("CHAR", vec![nt("LITERAL-DOT")]);
g.add_rule("CHAR", vec![tr('.')]);
for c in ' '..='~' {
match c {
| '|' | '*' | '(' | ')' | '+' | '?' | '\\' =>
g.add_rule("SPECIAL", vec![tr('\\'), tr(c)]),
| '.' =>
g.add_rule("LITERAL-DOT", vec![tr('\\'), tr(c)]),
| _ =>
g.add_rule("CHAR", vec![tr(c)])
};
}
g.add_rule("CHAR", vec![tr('\t')]);
// any digit, letter, or whitespace literal
g.add_rule("DIGIT", vec![tr('\\'), tr('d')]);
g.add_rule("LETTER", vec![tr('\\'), tr('s')]);
g.add_rule("WHITESPACE", vec![tr('\\'), tr('w')]);
// negations of the above
g.add_rule("NON-DIGIT", vec![tr('\\'), tr('D')]);
g.add_rule("NON-LETTER", vec![tr('\\'), tr('S')]);
g.add_rule("NON-WHITESPACE", vec![tr('\\'), tr('W')]);
return g;
}
pub fn parse_regex(regex: &str) -> Option<ASTNode> {
let g: CFG = regex_cfg();
return g.parse(regex);
}
//! Earley Parsing for context-free-grammars.
#![cfg_attr(doctest, doc = "````no_test")] // highlight, but don't run the test (rust/issues/63193)
//! ```
//! let mut g = CFG::new("EXP");
//!
//! g.add_rule("EXP", vec![nt("EXP"), tr('-'), nt("EXP")]);
//! g.add_rule("EXP", vec![nt("TERM")]);
//!
//! g.add_rule("TERM", vec![nt("TERM"), tr('/'), nt("TERM")]);
//! g.add_rule("TERM", vec![nt("FACTOR")]);
//!
//! g.add_rule("FACTOR", vec![tr('('), nt("EXP"), tr(')')]);
//! for a in '0'..='9' {
//! g.add_rule("FACTOR", vec![tr(a)]);
//! }
//!
//! assert!(parse("5--5", &g).is_none());
//! assert!(parse("5-5", &g).is_some());
//!
//! let result = parse("(5-5)/(2-3/4)", &g);
//! assert!(result.is_some());
//! println!("{:#?}", PrettyPrint(&result.unwrap().collapse()));
//! // TERM(FACTOR('(', EXP('5', '-', '5'), ')'), '/', FACTOR('(', EXP('2', '-', TERM('3', '/', '4')), ')'))
//! ````
use std::cmp;
use std::collections::{BTreeSet, HashMap, VecDeque};
use std::rc::Rc;
pub type Terminal = char;
pub type NonTerminal = &'static str;
/// A sequence of `Symbol`s forms the right-hand-side of a CFG production.
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Symbol {
Terminal(Terminal),
NonTerminal(NonTerminal),
}
impl Symbol {
fn strval(&self) -> String {
match self {
Symbol::Terminal(c) => c.to_string(),
Symbol::NonTerminal(s) => s.to_string(),
}
}
}
/// Convenience function for creating a nonterminal `Symbol`
pub const fn nt<'a>(x: NonTerminal) -> Symbol {
Symbol::NonTerminal(x)
}
/// Convenience function for creating a terminal `Symbol`
pub const fn tr(x: Terminal) -> Symbol {
Symbol::Terminal(x)
}
/// A struct holding production rules for a CFG.
pub struct CFG {
start: NonTerminal,
rule_map: HashMap<NonTerminal, Vec<Vec<Symbol>>>,
dummy: Vec<Vec<Symbol>>,
}
impl CFG {
/// Initialize the CFG with the starting symbol.
pub fn new(start: NonTerminal) -> Self {
Self {
start: start.into(),
rule_map: HashMap::new(),
dummy: Vec::new(),
}
}
pub fn add_rule(&mut self, lhs: NonTerminal, rhs: Vec<Symbol>) {
let lhs: NonTerminal = lhs.into();
self.rule_map
.entry(lhs)
.or_insert_with(|| Vec::new())
.push(rhs)
}
pub fn rules(&self, lhs: NonTerminal) -> &[Vec<Symbol>] {
&self.rule_map.get(lhs).unwrap_or(&self.dummy)
}
/// Perform Earley parsing on the input using the given CFG.
pub fn parse(&self, input: &str) -> Option<ASTNode> {
parse(self, input)
}
}
#[derive(Debug, Clone)]
pub enum ASTNode {
Terminal(Terminal),
NonTerminal {
sym: NonTerminal,
children: Vec<ASTNode>,
},
}
impl ASTNode {
pub fn unwrap_terminal(&self) -> Terminal {
if let &Self::Terminal(c) = self {
c
} else {
panic!("Not a terminal")
}
}
/// Collapse the parse tree.
/// Returns a tree where no nonterminal node has a single child.
pub fn collapse(self) -> Self {
match self {
Self::Terminal { .. } => self,
Self::NonTerminal { mut children, sym } => {
if children.len() == 1 {
return children.pop().unwrap().collapse();
} else {
let children = children.into_iter().map(|c| c.collapse()).collect();
Self::NonTerminal { sym, children }
}
}
}
}
}
#[derive(Debug, Clone)]
struct EarleyState {
lhs: NonTerminal,
rhs: Vec<Symbol>,
rhs_idx: usize,
start_idx: usize,
left_parent: Option<Rc<EarleyState>>, // These are intern IDs
right_parent: Option<Rc<EarleyState>>,
}
impl EarleyState {
fn done(&self) -> bool {
self.rhs_idx == self.rhs.len()
}
fn new(lhs: NonTerminal, rhs: Vec<Symbol>, start_idx: usize) -> Self {
Self {
lhs,
rhs,
start_idx,
rhs_idx: 0,
left_parent: None,
right_parent: None,
}
}
fn advance(&self) -> Self {
Self {
lhs: self.lhs,
rhs: self.rhs.clone(),
rhs_idx: self.rhs_idx + 1,
start_idx: self.start_idx,
left_parent: None,
right_parent: None,
}
}
fn next_sym(&self) -> Symbol {
self.rhs[self.rhs_idx]
}
}
impl std::fmt::Display for EarleyState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} ->", self.lhs)?;
for i in 0..self.rhs.len() {
if i == self.rhs_idx {
write!(f, " .")?;
}
write!(f, " {}", self.rhs[i].strval())?;
}
write!(f, " {}", self.start_idx)
}
}
impl cmp::Ord for EarleyState {
fn cmp(&self, other: &EarleyState) -> cmp::Ordering {
// Actual compare
(self.start_idx + self.rhs_idx)
.cmp(&(other.start_idx + other.rhs_idx))
// by done
.then_with(|| self.done().cmp(&other.done()))
// doesn't matter, but needs to be consistent
.then_with(|| self.lhs.cmp(&other.lhs))
.then_with(|| self.rhs.cmp(&other.rhs))
.then_with(|| self.rhs_idx.cmp(&other.rhs_idx))
.then_with(|| self.start_idx.cmp(&other.start_idx))
}
}
impl cmp::PartialOrd for EarleyState {
fn partial_cmp(&self, other: &EarleyState) -> Option<cmp::Ordering> {
Some(cmp::Ord::cmp(self, other))
}
}
impl cmp::PartialEq for EarleyState {
fn eq(&self, other: &EarleyState) -> bool {
self.cmp(other) == cmp::Ordering::Equal
}
}
impl cmp::Eq for EarleyState {}
fn parse(cfg: &CFG, input: &str) -> Option<ASTNode> {
let chars = input.chars().collect::<Vec<_>>();
let mut mem = vec![BTreeSet::new(); input.len() + 1];
for rhs in cfg.rules(&cfg.start) {
mem[0].insert(Rc::new(EarleyState::new(cfg.start, rhs.clone(), 0)));
}
for i in 0..=input.len() {
let mut q = mem[i].iter().cloned().collect::<VecDeque<_>>();
while let Some(curr_state) = q.pop_front() {
if !curr_state.done() {
match curr_state.next_sym() {
Symbol::NonTerminal(ref nt) => {
// predict
for rhs in cfg.rules(nt) {
let mut new_state = Rc::new(EarleyState::new(nt, rhs.clone(), i));
if !mem[i].contains(&new_state) {
Rc::get_mut(&mut new_state).unwrap().left_parent =
Some(Rc::clone(&curr_state));
mem[i].insert(Rc::clone(&new_state));
q.push_back(new_state);
}
}
}
Symbol::Terminal(t) => {
// Scan
if i < input.len() && t == chars[i] {
let mut new_state = Rc::new(curr_state.advance());
if !mem[i + 1].contains(&new_state) {
Rc::get_mut(&mut new_state).unwrap().left_parent =
Some(Rc::clone(&curr_state));
mem[i + 1].insert(new_state);
}
}
}
}
} else {
// Complete
let iterlist = mem[curr_state.start_idx]
.iter()
.cloned()
.collect::<Vec<_>>();
for state in iterlist {
if !state.done() && state.next_sym() == Symbol::NonTerminal(curr_state.lhs) {
let mut new_state = Rc::new(state.advance());
if !mem[i].contains(&new_state) {
Rc::get_mut(&mut new_state).unwrap().left_parent =
Some(Rc::clone(&state));
Rc::get_mut(&mut new_state).unwrap().right_parent =
Some(Rc::clone(&curr_state));
mem[i].insert(Rc::clone(&new_state));
q.push_back(new_state);
}
}
}
}
}
}
fn generate_parse_tree(state: Rc<EarleyState>) -> ASTNode {
let mut iter = Rc::clone(&state);
let mut children = Vec::new();
for sym in state.rhs.iter().rev() {
match *sym {
Symbol::NonTerminal(_) => children.insert(
0,
generate_parse_tree(Rc::clone(iter.right_parent.as_ref().unwrap())),
),
Symbol::Terminal(tt) => children.insert(0, ASTNode::Terminal(tt)),
}
iter = Rc::clone(iter.left_parent.as_ref().unwrap());
}
return ASTNode::NonTerminal {
sym: state.lhs,
children,
};
}
mem[input.len()]
.iter()
.filter(|&s| s.lhs == cfg.start && s.start_idx == 0 && s.done())
.nth(0)
.map(|state| generate_parse_tree(Rc::clone(state)))
}
/// A struct with a pretty `Debug` impl for `ASTNode`s.
pub struct PrettyPrint<'a>(pub &'a ASTNode);
impl std::fmt::Debug for PrettyPrint<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.0 {
ASTNode::Terminal(c) => write!(f, "'{c}'"),
ASTNode::NonTerminal { sym, children } => {
let mut tup = f.debug_tuple(sym);
for child in children {
tup.field(&PrettyPrint(child));
}
tup.finish()
}
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_arith() {
let mut g = CFG::new("EXP");
g.add_rule("EXP", vec![nt("EXP"), tr('-'), nt("EXP")]);
g.add_rule("EXP", vec![nt("TERM")]);
g.add_rule("TERM", vec![nt("TERM"), tr('/'), nt("TERM")]);
g.add_rule("TERM", vec![nt("FACTOR")]);
g.add_rule("FACTOR", vec![tr('('), nt("EXP"), tr(')')]);
for a in '0'..='9' {
g.add_rule("FACTOR", vec![tr(a)]);
}
assert!(g.parse("5--5").is_none());
assert!(g.parse("5-5").is_some());
let tree = g.parse("5-6").unwrap().collapse();
if let ASTNode::NonTerminal { sym: _, children } = tree {
assert_eq!('5', children[0].unwrap_terminal());
assert_eq!('-', children[1].unwrap_terminal());
assert_eq!('6', children[2].unwrap_terminal());
} else {
panic!("Expected nonterminal");
}
let result = g.parse("(5-5)/(2-3/4)");
assert!(result.is_some());
println!("{:#?}", PrettyPrint(&result.unwrap().collapse()));
}
}
mod earley_parse;
mod cfg;
mod types;
mod nfa_array;
mod boyer_moore;
mod threadpool;
use crate::cfg::parse_regex;
use crate::nfa_array::{Simulation, create_simulation};
use crate::boyer_moore::BM;
use crate::threadpool::Threadpool;
use std::env;
use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
use std::str::FromStr;
use std::cmp;
use std::sync::{Arc, Mutex};
type Match = (usize, Vec<String>);
fn main() {
let args: Vec<String> = env::args().collect();
let regex = &args[1];
let filename = &args[2];
let thread_n: usize = FromStr::from_str(&args[3]).unwrap();
let result= parse_regex(regex);
match result {
| None => println!("The string provided is not a regular expression"),
| Some(ast) =>
{
let new_ast = ast.collapse();
let mut sim = create_simulation(&new_ast);
let prefix = sim.enable_prefix();
let has_prefix = prefix != "";
// let mut line_n = 0;
let all_matches: Arc<Mutex<Vec<Match>>> = Arc::new(Mutex::new(Vec::new()));
if let Ok(lines) = read_lines(filename) {
let pool = Threadpool::new(thread_n);
let mut bm;
if has_prefix {
bm = BM::new(prefix);
}
else {
bm = BM::new(String::from(""));
}
for (line_n, line) in lines.flatten().enumerate() {
let mut sim_thread = sim.clone();
let line_vec: Vec<char> = line.chars().collect();
let all_matches = Arc::clone(&all_matches);
if has_prefix {
let mut bm_thread = bm.clone();
pool.execute(move || {
let results = check_line_with_prefix(&mut sim_thread, &line_vec, line_n, &mut bm_thread);
match results {
None => (),
Some(m) => {
let mut matches_arr = all_matches.lock().unwrap();
matches_arr.push((line_n, m));
}
}
});
}
else {
pool.execute(move || {
let results = check_line(&mut sim_thread, &line_vec, line_n);
match results {
None => (),
Some(m) => {
let mut matches_arr = all_matches.lock().unwrap();
matches_arr.push((line_n, m));
}
}
})
}
}
}
all_matches.lock().unwrap().sort_by(|a, b| compare_match(a, b));
for (line_n, line_matches) in &*all_matches.lock().unwrap() {
for m in line_matches {
println!("{line_n}:{m}");
}
}
},
};
}
fn check_to_end_prefix(sim: &mut Simulation, lm: &mut usize,
i: usize, line: &Vec<char>, line_len: usize) {
// finds the largest string from i to line_len that matches
let mut j = i;
while !sim.stuck() & (j < line_len) {
let c = line[j];
sim.step(c);
if sim.accepts() {
*lm = j;
}
j += 1;
}
}
fn check_to_end(sim: &mut Simulation, lm: &mut usize,
i: usize, line: &Vec<char>, line_len: usize) {
// finds the largest string from i to line_len that matches
let mut j = i;
while !sim.stuck() & (j < line_len) {
let c = line[j];
sim.step(c);
if sim.accepts() {
*lm = j + 1;
}
j += 1;
}
}
fn check_line(sim: &mut Simulation, line: &Vec<char>, line_n: usize) -> Option<Vec<String>> {
// finds matches in a line and prints out result
// LINE:MATCH
let mut lm = 0;
let line_len = line.len();
let mut i = 0;
let mut matches: Vec<String> = Vec::new();
while i < line_len {
sim.reset();
check_to_end(sim, &mut lm, i, &line, line_len);
if lm > i {
let m: String = line[i..lm].into_iter().collect();
matches.push(m);
// println!("{line_n}:{}", m);
i = lm;
}
else {
i += 1;
}
}
if matches.is_empty() {
None
}
else {
Some(matches)
}
}
fn check_line_with_prefix(sim: &mut Simulation, line: &Vec<char>,
line_n: usize, bm: &mut BM) -> Option<Vec<String>> {
// finds matches in a line and prints out result
// using Boyer-Moore Alrgoithm to find matching prefix
// LINE:MATCH
let mut lm = 0;
let line_len = line.len();
let mut i = 0;
let mut matches: Vec<String> = Vec::new();
while i < line_len {
sim.reset();
// prefix idx becomes idx to start search
let mut prefix_idx = 0;
let found_prefix = bm.find(&mut prefix_idx, &line[i..]);
i = i + prefix_idx;
if sim.accepts() {
lm = i - 1;
}
if !found_prefix {
// don't search if prefix not found in line
break;
}
check_to_end_prefix(sim, &mut lm, i, &line, line_len);
if lm >= i - 1{
let m_start = i - bm.pattern_len();
let m: String = line[m_start..=lm].into_iter().collect();
matches.push(m);
// println!("{line_n}:{}", m);
i = lm + 1;
}
else {
i += 1;
}
}
if matches.is_empty() {
None
}
else {
Some(matches)
}
}
fn compare_match((idx_a, _): & Match, (idx_b, _): & Match) -> cmp::Ordering {
(*idx_a).cmp(idx_b)
}
// From Rust by Example Docs:
// https://doc.rust-lang.org/rust-by-example/std_misc/file/read_lines.html
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where P: AsRef<Path>, {
let file = File::open(filename)?;
// bufreader for efficiency
Ok(io::BufReader::new(file).lines())
}
\ No newline at end of file
use std::rc::Rc;
use std::cell::{Ref, RefMut, RefCell};
use crate::earley_parse::{ASTNode, Terminal, NonTerminal};
use crate::types::Elem;
use crate::types::Elem::{*};
pub enum Type {
Accept,
Split,
Single,
}
pub struct State {
t: Type,
key: Elem,
out1: Link,
out2: Link,
}
pub type Link = Option<Rc<RefCell<State>>>;
impl State {
pub fn new(key: Elem, t: Type) -> Rc<RefCell<Self>> {
Rc::new(RefCell::new(
State {t: t, key: key, out1: None, out2: None}))
}
pub fn add(&mut self, next: Link) {
match self.out1 {
| None => self.out1 = next.clone(),
| Some(_) => {
match self.out2 {
| None => self.out2 = next.clone(),
| Some(_) => panic!("max capacity"),
}
}
}
}
pub fn show(&self, space: &str) {
println!("{}{}", space, self.key.to_str());
match self.t {
| Type::Split =>
{
self.out1.as_ref().map(|out|
{out.borrow_mut().show(&(space.to_owned() + " "))});
self.out2.as_ref().map(|out|
{out.borrow_mut().show(&(space.to_owned() + " "))});
},
| Type::Single => {self.out1.as_ref().map(|out|
{out.borrow_mut().show(&(space.to_owned() + " "))});},
| Type::Accept => (),
}
}
pub fn next(&self){
// match self.t {
// | Type::Single => self.out1.as_ref().map(|out1|
// out1).cloned(),
// | Type::Split => self.out1.as_ref().map(|out1|
// out1).cloned(),
// | Type::Accept => None
// }
match self.t {
| Type::Single => println!("single"),
| Type::Split => println!("split"),
| Type::Accept => println!("this is an accept, no next")
}
}
}
pub struct Fragment {
head: Link,
outlst: Option<Vec<Link>>,
}
impl Fragment {
fn make_outlst(&mut self) {
match self.outlst {
| Some (_) => (),
| None =>
{
let mut newlst = Vec::new();
newlst.push(self.head.clone());
self.outlst = Some(newlst);
}
}
}
fn new_from_type(key: Elem, t: Type) -> Self {
Fragment {head: Some(State::new(key, t)),
outlst: None}
}
pub fn new(key: Elem) -> Self {
let mut new_frag = Self::new_from_type(key, Type::Single);
new_frag.make_outlst();
new_frag
}
pub fn new_split(key: Elem) -> Self {
let mut new_frag = Self::new_from_type(key, Type::Split);
new_frag.make_outlst();
new_frag
}
pub fn new_accept() -> Self {
let mut new_frag = Self::new_from_type(Elem::Accept, Type::Accept);
new_frag.make_outlst();
new_frag
}
pub fn head(&self) -> &Link {
&self.head
}
pub fn next(&self) -> Option<Ref<Link>> {
self.head.as_ref().map(|node| {
Ref::map(node.borrow(), |node| &node.out1)
})
}
fn outlst_to_state(&mut self, state: Link) {
// connect each node in outlst to head of fragment
match self.outlst.take() {
| Some(outlst) =>
{
for mut out in outlst.clone() {
match out.take() {
|Some(old_out) =>
{old_out.borrow_mut().add(state.clone());
out = Some(old_out);},
| None => (),
}
}
self.outlst = Some(outlst);
},
| None => panic!("empty outlist in outlst_to_state")
}
}
pub fn concatenate(&mut self, mut frag: Fragment) {
let outlst2 = frag.outlst.clone();
self.outlst_to_state(frag.head);
self.outlst = frag.outlst;
}
pub fn alternate(&mut self, mut frag1: Fragment, mut frag2: Fragment) {
// append outlsts before fragments go out of scope
let mut outlst_final = frag1.outlst.clone();
match outlst_final.take() {
| Some(mut outlst) =>
{
outlst.append(&mut frag2.outlst.unwrap());
outlst_final = Some(outlst);
},
| None => panic!("empty outlist in alternate")
}
// concatenate current fragment to both fragments
self.outlst_to_state(frag1.head);
self.outlst_to_state(frag2.head);
self.outlst = outlst_final;
}
pub fn qm(&mut self, mut frag: Fragment) {
// question mark
let mut outlst_final = self.outlst.clone();
match outlst_final.take() {
| Some(mut outlst) =>
{
outlst.append(&mut frag.outlst.unwrap());
outlst_final = Some(outlst);
},
| None => panic!("empty outlist in qm")
}
self.outlst_to_state(frag.head);
self.outlst = outlst_final;
}
pub fn star(&mut self, mut frag: Fragment) {
frag.outlst_to_state(self.head.clone());
self.outlst_to_state(frag.head);
}
pub fn plus(&mut self, mut frag: Fragment) {
let outlst_final = frag.outlst.clone();
frag.outlst_to_state(self.head.clone());
self.outlst_to_state(frag.head);
self.outlst = outlst_final;
}
pub fn show(&self) {
match &self.head {
| Some(node) => node.borrow_mut().show(""),
| None => (),
}
}
}
impl ASTNode {
pub fn to_Fragment(&self) -> Fragment {
match self {
| Self::Terminal(c) => {
if *c != '.' {
Fragment::new(Elem::Unique(*c))
}
else {
Fragment::new(Elem::Dot)
}
},
| Self::NonTerminal {sym, children} =>
match *sym {
| "CONCAT" =>
{
let mut frag_A = children[0].to_Fragment();
frag_A.concatenate(children[1].to_Fragment());
frag_A
},
| "UNION" =>
{
let mut new_frag = Fragment::new_split(Elem::Epsilon);
new_frag.alternate(children[0].to_Fragment(),
children[2].to_Fragment());
new_frag
},
| "Q" =>
{
let mut new_frag = Fragment::new_split(Elem::Epsilon);
let frag = children[0].to_Fragment();
new_frag.qm(frag);
new_frag
},
| "STAR" =>
{
let mut new_frag = Fragment::new_split(Elem::Epsilon);
let frag = children[0].to_Fragment();
new_frag.star(frag);
new_frag
},
| "PLUS" =>
{
let frag = Fragment::new_split(Elem::Epsilon);
let mut new_frag = children[0].to_Fragment();
new_frag.plus(frag);
new_frag
},
| "DIGIT" => Fragment::new(Elem::Digit),
| "NON-DIGIT" => Fragment::new(Elem::NonDigit),
| "LETTER" => Fragment::new(Elem::Letter),
| "NON-LETTER" => Fragment::new(Elem::NonLetter),
| "WHITESPACE" => Fragment::new(Elem::Whitespace),
| "NON-WHITESPACE" => Fragment::new(Elem::NonWhitespace),
| "SPECIAL" | "EXPR" => children[1].to_Fragment(),
| "LITERAL-DOT" => Fragment::new(Elem::Unique('.')),
| _ =>
{
unimplemented!();
}
}
}
}
}
\ No newline at end of file
use crate::earley_parse::ASTNode;
use crate::types::Elem;
use std::mem;
#[derive(Clone)]
pub struct State {
key: Elem,
out: Vec<usize>,
}
impl State {
pub fn show(&self) {
print!("{} -> ", self.key.to_str());
for i in &self.out {
print!("{} ", i);
}
println!("");
}
pub fn out(&self) -> &Vec<usize> {
&self.out
}
}
pub struct Fragment {
states: Vec<State>,
}
impl Fragment {
pub fn new() -> Self {
Fragment {states: Vec::new()}
}
pub fn _show_outlst(&self, outlst_array: &Vec<Vec<usize>>) {
println!("Printing outlst");
for (i, s_outs) in outlst_array.iter().enumerate() {
print!("Out States for {}: ", self.states[i].key.to_str());
for out in s_outs {
print!("{}, ", out);
}
println!("");
}
}
fn _add_terminal(&mut self, idx: usize, key: Elem,
outlst_array: &mut Vec<Vec<usize>>) {
// change key and outlst for state at idx
self.states.push(State {key: key, out: Vec::new()});
let mut row: Vec<usize> = Vec::new();
row.push(idx);
outlst_array.push(row);
}
fn fill(&mut self, node: &ASTNode, current_idx: &mut usize) -> usize {
// outlst_array[i] is the out list of state i
let mut outlst_array: Vec<Vec<usize>> = Vec::new();
let head_idx = self._fill(node, current_idx, &mut outlst_array);
// add start state
self.states.push(State {key: Elem::Accept, out: Vec::new()});
let accept_idx = self.states.len()-1;
for out_state in &outlst_array[head_idx] {
self.states[*out_state].out.push(accept_idx);
}
// self._show_outlst(&outlst_array);
head_idx
}
fn add_terminal(&mut self, current_idx: &mut usize, key: Elem,
outlst_array: &mut Vec<Vec<usize>>) -> usize {
// adds a Elem key to states, and changes idx
self._add_terminal(*current_idx, key, outlst_array);
let return_idx = *current_idx;
*current_idx += 1;
return_idx
}
fn update_state_out(&mut self, in_idx: usize, out_idx: usize,
outlst_array: &Vec<Vec<usize>>) {
for out_state in &outlst_array[in_idx] {
self.states[*out_state].out.push(out_idx);
}
}
fn append_vecs(outlst_array: &mut Vec<Vec<usize>>,
idx0: usize, idx1:usize) {
// combines two vectors of outlst_array at index id0
// empties the other
let mut new_outlst = mem::take(&mut outlst_array[idx1]);
new_outlst.append(&mut outlst_array[idx0]);
let _ = mem::replace(&mut outlst_array[idx0],new_outlst);
}
fn _fill(&mut self, node: &ASTNode, current_idx: &mut usize,
outlst_array: &mut Vec<Vec<usize>>) -> usize {
match node {
| ASTNode::Terminal(c) => {
if *c != '.' {
let val = self.add_terminal(current_idx, Elem::Unique(*c), outlst_array);
// self._show_outlst(&outlst_array);
val
}
else {
let val = self.add_terminal(current_idx, Elem::Dot, outlst_array);
// self._show_outlst(&outlst_array);
val
}
},
| ASTNode::NonTerminal {sym, children} =>
match *sym {
| "CONCAT" =>
{
let a_idx = self._fill(&children[0],
current_idx, outlst_array);
let b_idx = self._fill(&children[1],
current_idx, outlst_array);
self.update_state_out(a_idx, b_idx, outlst_array);
outlst_array[a_idx].clear();
outlst_array.swap(a_idx, b_idx);
// self._show_outlst(&outlst_array);
a_idx
},
| "UNION" =>
{
let ep_idx = self.add_terminal(current_idx,
Elem::Epsilon, outlst_array);
let a_idx = self._fill(&children[0],
current_idx, outlst_array);
let b_idx = self._fill(&children[2],
current_idx, outlst_array);
self.update_state_out(ep_idx, a_idx, outlst_array);
self.update_state_out(ep_idx, b_idx, outlst_array);
// new out list is the concatenation of child out lists
Self::append_vecs(outlst_array, a_idx, b_idx);
outlst_array.swap(a_idx, ep_idx);
outlst_array[a_idx].clear();
ep_idx
},
| "Q" =>
{
let ep_idx = self.add_terminal(current_idx,
Elem::Epsilon, outlst_array);
let a_idx = self._fill(&children[0],
current_idx, outlst_array);
self.update_state_out(ep_idx, a_idx, outlst_array);
Self::append_vecs(outlst_array, ep_idx, a_idx);
ep_idx
},
| "STAR" =>
{
let ep_idx = self.add_terminal(current_idx,
Elem::Epsilon, outlst_array);
let a_idx = self._fill(&children[0],
current_idx, outlst_array);
self.update_state_out(a_idx, ep_idx, outlst_array);
self.update_state_out(ep_idx, a_idx, outlst_array);
// new out list is just epsilon node
outlst_array[a_idx].clear();
ep_idx
},
| "PLUS" =>
{
let a_idx = self._fill(&children[0],
current_idx, outlst_array);
let ep_idx = self.add_terminal(current_idx,
Elem::Epsilon, outlst_array);
self.update_state_out(ep_idx, a_idx, outlst_array);
self.update_state_out(a_idx, ep_idx, outlst_array);
// new out list is epsilon node, from a_idx node
outlst_array[a_idx].clear();
outlst_array.swap(a_idx, ep_idx);
a_idx
},
| "DIGIT" => self.add_terminal(current_idx,
Elem::Digit, outlst_array),
| "NON-DIGIT" => self.add_terminal(current_idx,
Elem::NonDigit, outlst_array),
| "LETTER" => self.add_terminal(current_idx,
Elem::Letter, outlst_array),
| "NON-LETTER" => self.add_terminal(current_idx,
Elem::NonLetter, outlst_array),
| "WHITESPACE" => self.add_terminal(current_idx,
Elem::Whitespace, outlst_array),
| "NON-WHITESPACE" => self.add_terminal(current_idx,
Elem::NonWhitespace, outlst_array),
| "SPECIAL" | "EXPR" => self._fill(&children[1],
current_idx, outlst_array),
| "LITERAL-DOT" => self.add_terminal(current_idx,
Elem::Unique('.'), outlst_array),
| not_reachable =>
{
// unimplemented!();
unreachable!("{}", not_reachable);
}
}
}
}
pub fn show(&self) {
for state in &self.states {
state.show();
}
}
pub fn states(&self) -> &Vec<State> {
&self.states
}
}
#[derive(Clone)]
pub struct Simulation {
current: Vec<usize>,
next: Vec<usize>,
lastlst: Vec<isize>,
nfa: Vec<State>,
head: usize,
accept: usize,
step: isize,
prefix_start: Option<Vec<usize>>,
}
impl Simulation {
pub fn new(nfa: Vec<State>, head: usize) -> Self {
let accept = nfa.len()-1;
let mut current: Vec<usize> = Vec::new();
let mut lastlst: Vec<isize> = Vec::new();
for _ in 0..(accept+1) {
lastlst.push(-1);
}
Self::add_to_next(&nfa, head, &mut current, &mut lastlst, 1);
Simulation {
current: current,
next: Vec::new(),
lastlst: lastlst,
nfa: nfa,
accept: accept,
head: head,
step: 1,
prefix_start: None,
}
}
fn add_to_next(nfa: &Vec<State>, idx: usize,
lst: &mut Vec<usize>, lastlst: &mut Vec<isize>, step: isize) {
if (lastlst[idx] != 2*step) & (lastlst[idx] != 2*step - 1) {
if nfa[idx].key == Elem::Epsilon {
lastlst[idx] = 2*step - 1;
for out in nfa[idx].out() {
Self::add_to_next(&nfa, *out, lst, lastlst, step);
}
}
else {
lst.push(idx);
lastlst[idx] = 2*step;
}
}
}
pub fn enable_prefix(&mut self) -> String {
// checks if there is a prefix in the language
// changes the start states of the nfa to after parsing the prefix
// returns the prefix found
Self::reset(self);
let mut prefix = String::from("");
let mut is_prefix = true;
while is_prefix {
// loops until current states do not match in char
if self.current.is_empty() {
break;
}
let current_elem = &self.nfa[self.current[0]].key;
match current_elem {
| Elem::Unique(c) =>
{
let mut is_prefix = true;
for idx in &self.current {
if self.nfa[*idx].key != *current_elem {
// not a prefix
is_prefix = false;
break;
}
}
if is_prefix {
prefix.push(*c);
self.step(*c);
}
else {
break;
}
},
| _ => {is_prefix = false;},
}
}
if !self.current.is_empty() {
self.prefix_start = Some(self.current.clone());
}
prefix
}
pub fn reset(&mut self) {
self.current.clear();
for i in 0..(self.accept+1) {
self.lastlst[i] = -1;
}
self.step = 1;
match &self.prefix_start {
| Some(start) => {self.current = start.clone();},
| None =>
{Self::add_to_next(&self.nfa, self.head, &mut self.current,
&mut self.lastlst, self.step);}
}
self.next.clear();
}
fn next(&mut self, elem: char) {
for idx in self.current.iter() {
if self.nfa[*idx].key.compare(elem) {
for out_idx in &self.nfa[*idx].out {
Self::add_to_next(&self.nfa, *out_idx, &mut self.next,
&mut self.lastlst, self.step);
}
}
}
}
pub fn step(&mut self, c: char) {
self.step += 1;
self.next(c);
self.current.clear();
mem::swap(&mut self.current, &mut self.next);
}
pub fn accepts(&self) -> bool {
self.current.contains(&self.accept)
}
pub fn show(&self, message: &str) {
println!("{}", message);
println!("NFA");
for state in self.nfa.iter() {
state.show();
}
println!("Current States");
for idx in &self.current {
println!("{idx}");
}
}
pub fn show_current(&self) {
println!("Step {}", self.step);
for idx in self.current.iter() {
println!("{idx}");
}
}
pub fn stuck(&self) -> bool {
self.current.is_empty()
}
}
pub fn create_simulation(node: &ASTNode) -> Simulation {
let mut frag = Fragment::new();
let mut curr_idx = 0;
let head_idx = frag.fill(&node, &mut curr_idx);
// frag.show();
Simulation::new(frag.states, head_idx)
}
#[cfg(test)]
mod test {
// run specific test and print during tests
// cargo test testname -- --nocapture
use super::*;
use crate::cfg::parse_regex;
#[test]
fn test_array() {
let regex= "rusty";
let result= parse_regex(regex);
assert!(result.is_some());
let new_ast = result.unwrap().collapse();
let mut frag = Fragment::new();
let mut curr_idx = 0;
let head_idx = frag.fill(&new_ast, &mut curr_idx);
println!("Printing Result, head at {}", head_idx);
frag.show();
let mut sim = Simulation::new(frag.states, head_idx);
for c in regex.chars() {
sim.step(c);
}
assert!(sim.accepts());
sim.reset();
for c in regex.chars() {
sim.step(c);
}
assert!(sim.accepts());
}
#[test]
fn test_prefix() {
let regex= "Cal.";
let result= parse_regex(regex);
let new_ast = result.unwrap().collapse();
let mut sim = create_simulation(&new_ast);
// sim.show("Without prefix enabled");
let prefix = sim.enable_prefix();
// println!("Prefix found: {prefix}");
// sim.show("With Prefix enabled");
for c in "a".chars() {
sim.step(c);
}
assert!(sim.accepts());
for c in "aa".chars() {
sim.step(c);
}
assert!(!sim.accepts());
sim.reset();
// sim.show("After resetting");
for c in "a".chars() {
sim.step(c);
}
assert!(sim.accepts());
for c in "aa".chars() {
sim.step(c);
}
assert!(!sim.accepts());
}
#[test]
fn test_prefix_or() {
let regex= "Caltech|California";
let result= parse_regex(regex);
let new_ast = result.unwrap().collapse();
let mut sim = create_simulation(&new_ast);
// sim.show("Without prefix enabled");
let prefix = sim.enable_prefix();
assert!(prefix == "Cal");
// sim.show("With prefix enabled");
for c in "tech".chars() {
sim.step(c);
}
assert!(sim.accepts());
sim.reset();
for c in "ifornia".chars() {
sim.step(c);
}
assert!(sim.accepts());
sim.reset();
for c in "California".chars() {
sim.step(c);
}
assert!(!sim.accepts());
}
#[test]
fn test_prefix_empty() {
for regex in ["\\s", "\\S", "\\d", "\\D", "\\w", "\\W",
".", "a*", "a?", "Cal|(Cal)*", "Cal|(Cal)?",
"\\s|a", "\\d|0", "\\w| ", ] {
let result= parse_regex(regex);
let new_ast = result.unwrap().collapse();
let mut sim = create_simulation(&new_ast);
// sim.show("Without prefix enabled");
let prefix = sim.enable_prefix();
assert!(prefix == "");
}
}
#[test]
fn test_prefix_not_empty() {
for regex in ["(na)+ batman", "group|grep|great|gruesome",
"\\.com|\\.com", "just a string", "(na)+|(narnai)+"] {
let result= parse_regex(regex);
let new_ast = result.unwrap().collapse();
let mut sim = create_simulation(&new_ast);
// sim.show("Without prefix enabled");
let prefix = sim.enable_prefix();
assert!(prefix != "");
// println!("{prefix}");
}
}
}
\ No newline at end of file
use crate::earley_parse::{ASTNode, Terminal, NonTerminal};
use std::collections::hash_map::HashMap;
#[derive(Eq, Hash, PartialEq)]
pub enum Elem {
Epsilon,
Dot,
Digit,
Letter,
Whitespace,
NonDigit,
NonLetter,
NonWhitespace,
Unique(char),
Accept,
}
impl Elem {
pub fn compare(&self, c: char) -> bool {
match self {
| Elem::Epsilon => false,
| Elem::Dot => true,
| Elem::Digit =>
{
for d in '0'..='9' {
if d == c {return true;}
};
false
},
| Elem::NonDigit => !Elem::Digit.compare(c),
| Elem::Letter =>
{
for l in 'A'..='Z' {
if l == c {return true;}
};
for l in 'a'..='z' {
if l == c {return true;}
};
false
},
| Elem::NonLetter => !Elem::Letter.compare(c),
| Elem::Whitespace => (c == ' ' ) | (c == '\t'),
| Elem::NonWhitespace => !Elem::Whitespace.compare(c),
| Elem::Unique(a) => *a == c,
| Elem::Accept => false,
}
}
pub fn to_str(&self) -> String {
match self {
| Elem::Epsilon => String::from("epsilon"),
| Elem::Dot => String::from("Dot"),
| Elem::Digit => String::from("digit"),
| Elem::NonDigit => String::from("non-digit"),
| Elem::Letter => String::from("letter"),
| Elem::NonLetter => String::from("non-letter"),
| Elem::Whitespace => String::from("whitespace"),
| Elem::NonWhitespace => String::from("non-whitespace"),
| Elem::Unique(a) => a.to_string(),
| Elem::Accept => String::from("accept"),
}
}
}
pub struct Graph {
accept: bool,
head: LinkMap,
}
type Linky = Option<Graph>;
type LinkMap = Option<HashMap<Elem, Vec<Linky>>>;
// thoughts on what should be an option and what shouldn't
// There will be situations where I will have a key, but not a node to point to
// I can I have multiple keys with no node to point to
// I think the map should have a vector of options of nodes then because
// this allows the vector to grow, but inside it could be nones
impl Graph {
pub fn new() -> Self {
Graph {accept: false, head: None}
}
pub fn new_accept() -> Self {
Graph {accept: true, head: None}
}
pub fn new_t(terminal: Elem) -> Self {
let mut graph = Graph::new();
graph.add(terminal, None);
graph
}
pub fn add(&mut self, k: Elem, next: Linky) {
match self.head {
| Some(ref mut children) =>
{
// graph not empty, has a map with at least one key
if children.contains_key(&k) {
let next_nodes = children.get_mut(&k).unwrap();
if next.is_some()
| !contains_none(next_nodes.as_ref()) {
next_nodes.push(next);
}
}
else {
let mut next_nodes = Vec::new();
if next.is_some()
| !contains_none(next_nodes.as_ref()) {
next_nodes.push(next);
}
children.insert(k, next_nodes);
}
}
| None =>
{
// graph empty, make new map
let mut children = HashMap::new();
let mut next_nodes = Vec::new();
if next.is_some()
| !contains_none(next_nodes.as_ref()) {
next_nodes.push(next);
}
children.insert(k, next_nodes);
self.head = Some(children);
}
}
}
pub fn remove(&mut self, k: Elem) -> Option<Self> {
match self.head {
| Some(ref mut children) =>
{
if children.contains_key(&k) {
let next_nodes = children.get_mut(&k).unwrap();
if next_nodes.is_empty() {
children.remove(&k);
None
}
else {
let removed = next_nodes.pop().unwrap();
removed
}
}
else {
None
}
}
| None => None
}
}
pub fn connect_out(&mut self, k: &Elem, out: Linky) {
// finds dangling graphs in set of nodes for key k
// replaces it with the input replacement
match self.head.as_mut() {
| Some(children) =>
{
if children.contains_key(k) {
let mut next_nodes = children.get_mut(k).unwrap();
let mut i = 0;
let mut empty = next_nodes.len();
while i < next_nodes.len() {
if next_nodes[i].is_none() {
empty = i;
break;
}
i += 1;
}
if empty < next_nodes.len() {
next_nodes[empty] = out;
}
}
},
| None => (),
}
}
pub fn make_accept(&mut self) {
self.accept = true;
}
pub fn get_head(self) -> LinkMap {
self.head
}
pub fn get_child(&self, k: &Elem, idx: usize) -> Option<&Self> {
match self.head.as_ref() {
| Some(children) =>
{
if children.contains_key(k) {
children.get(k).unwrap()[idx].as_ref()
}
else {
None
}
},
| None => None
}
}
pub fn get_child_mut(&mut self, k: &Elem, idx: usize) -> Option<&mut Self> {
match self.head.as_mut() {
| Some(children) =>
{
if children.contains_key(k) {
children.get_mut(k).unwrap()[idx].as_mut()
}
else {
None
}
},
| None => None
}
}
pub fn print_graph(&self, space: &str) {
match &self.head {
| None => println!(""),
| Some(children) => {
for (key, val) in children.iter() {
for v in val {
println!("{}{}", space, key.to_str());
if v.is_some() {
v.as_ref()
.unwrap()
.print_graph(&(space.to_owned() + " "));
}
}
}
}
}
}
}
pub fn contains_none(vec: &Vec<Linky>) -> bool {
for el in vec {
if el.is_none() {
return true
}
}
false
}
pub enum Fragment {
Terminal(Graph),
NonTerminal {
head: Graph,
outs: Vec<Graph>,
}
}
impl Fragment {
pub fn from_t(el: Elem) -> Self {
Self::Terminal(Graph::new_t(el))
}
pub fn get_head(self) -> Graph {
match self {
| Self::Terminal(graph) => graph,
| Self::NonTerminal{head, ..} => head
}
}
pub fn get_outs(self) -> Vec<Graph> {
match self {
| Self::Terminal(graph) =>
{
let mut v = Vec::new();
v.push(graph);
v
},
| Self::NonTerminal{head, outs} => outs
}
}
// pub fn connect_to_fragment(self, frag: Self) -> Self{
// let mut outs: Vec<Graph> = self.get_outs();
// for out in outs {
// match out.get_head() {
// | None => (),
// | Some(head) =>
// {
// for (k, _) in head.iter() {
// out.connect_out(k, Some(frag.get_head()));
// }
// }
// }
// }
// Fragment::from_t((Elem::Dot))
// }
}
impl ASTNode {
pub fn to_Fragment(&self) -> Fragment {
match self {
| Self::Terminal(c) => {
if *c != '.' {
Fragment::from_t(Elem::Unique(*c))
}
else {
Fragment::from_t(Elem::Dot)
}
},
| Self::NonTerminal {sym, children} =>
match *sym {
// | "UNION" =>
// {
// let frag_A = children[0].to_Fragment();
// let frag_B = children[2].to_Fragment();
// let mut union_graph = Graph::new();
// union_graph.add(Elem::Epsilon, Some(frag_A.head));
// union_graph.add(Elem::Epsilon, Some(frag_B.head));
// frag_A.get_outs().append(&mut frag_B.get_outs());
// Fragment::NonTerminal {head: union_graph, outs:frag_A.get_outs()}
// },
// | "Q" =>
// {
// let graph = children[0].to_Fragment();
// let mut q_graph = Graph::new();
// q_graph.add(Elem::Epsilon, Some(graph));
// q_graph.add(Elem::Epsilon, None);
// q_graph
// },
| "DIGIT" => Fragment::from_t(Elem::Digit),
| "NON-DIGIT" => Fragment::from_t(Elem::NonDigit),
| "LETTER" => Fragment::from_t(Elem::Letter),
| "NON-LETTER" => Fragment::from_t(Elem::NonLetter),
| "WHITESPACE" => Fragment::from_t(Elem::Whitespace),
| "NON-WHITESPACE" => Fragment::from_t(Elem::NonWhitespace),
| "SPECIAL" | "EXPR" => children[1].to_Fragment(),
| "LITERAL-DOT" => Fragment::from_t(Elem::Unique('.')),
| _ =>
{
unimplemented!();
}
}
}
}
}
impl ASTNode {
pub fn to_Graph(&self) -> Graph {
match self {
| Self::Terminal(c) => {
if *c != '.' {
Graph::new_t(Elem::Unique(*c))
}
else {
Graph::new_t(Elem::Dot)
}
},
| Self::NonTerminal {sym, children} =>
match *sym {
| "UNION" =>
{
let Graph_A = children[0].to_Graph();
let Graph_B = children[2].to_Graph();
let mut union_graph = Graph::new();
union_graph.add(Elem::Epsilon, Some(Graph_A));
union_graph.add(Elem::Epsilon, Some(Graph_B));
union_graph
},
| "Q" =>
{
let graph = children[0].to_Graph();
let mut q_graph = Graph::new();
q_graph.add(Elem::Epsilon, Some(graph));
q_graph.add(Elem::Epsilon, None);
q_graph
},
| "DIGIT" => Graph::new_t(Elem::Digit),
| "NON-DIGIT" => Graph::new_t(Elem::NonDigit),
| "LETTER" => Graph::new_t(Elem::Letter),
| "NON-LETTER" => Graph::new_t(Elem::NonLetter),
| "WHITESPACE" => Graph::new_t(Elem::Whitespace),
| "NON-WHITESPACE" => Graph::new_t(Elem::NonWhitespace),
| "SPECIAL" | "EXPR" => children[1].to_Graph(),
| "LITERAL-DOT" => Graph::new_t(Elem::Unique('.')),
| _ =>
{
unimplemented!();
}
}
}
}
}
use std::ptr;
use std::mem;
use crate::earley_parse::ASTNode;
use crate::types::Elem;
use crate::types::Elem::{*};
pub struct State {
key: Elem,
out1: Link,
out2: Link,
}
type Link = *mut State;
impl State {
pub fn show(&self, space: &str) {
println!("{}{}", space, self.key.to_str());
unsafe {
if space.len() < 40 {
if !self.out1.is_null() {
(*self.out1).show(&(space.to_owned() + " "));
}
if !self.out2.is_null() {
(*self.out2).show(&(space.to_owned() + " "));
}
}
}
}
pub fn out1(&self) -> Link {
self.out1
}
pub fn out2(&self) -> Link {
self.out2
}
pub fn key(&self) -> &Elem {
&self.key
}
}
pub fn get_states(head: Link, mut states_lst: &mut Vec<Link>) {
// pushes of all state pointers to states_lst
unsafe {
if !states_lst.contains(&head) {
states_lst.push(head);
if !(*head).out1().is_null() {
get_states((*head).out1(), &mut states_lst);
}
if !(*head).out2().is_null() {
get_states((*head).out2(), &mut states_lst);
}
}
}
}
pub struct Fragment {
head: Link,
outlst: Vec<*mut State>,
}
impl Fragment {
pub fn new(key: Elem) -> Self {
let new_state = Box::into_raw(Box::new(
State {
key: key,
out1: ptr::null_mut(),
out2: ptr::null_mut(),}
));
let mut outlst = Vec::new();
outlst.push(new_state);
Fragment {head: new_state, outlst: outlst}
}
fn outlst_to_state(&mut self, state: Link) {
// connect each node in outlst to head of fragment
unsafe {
for out in &self.outlst {
if (**out).out1.is_null() {
(**out).out1 = state;
}
else if (**out).out2.is_null() {
(**out).out2 = state;
}
else {
panic!{"cannot add more than two states"};
}
}
}
}
pub fn concatenate(&mut self, frag: Fragment) {
self.outlst_to_state(frag.head);
self.outlst = frag.outlst;
}
pub fn alternate(&mut self, frag1: Fragment, mut frag2: Fragment) {
// concatenate current fragment to both fragments
self.outlst_to_state(frag1.head);
self.outlst_to_state(frag2.head);
self.outlst = frag1.outlst;
self.outlst.append(&mut frag2.outlst);
}
pub fn qm(&mut self, mut frag: Fragment) {
// question mark
self.outlst_to_state(frag.head);
self.outlst.append(&mut frag.outlst);
}
pub fn star(&mut self, mut frag: Fragment) {
// assumes frag is the body of star
frag.outlst_to_state(self.head);
self.outlst_to_state(frag.head);
}
pub fn plus(&mut self, mut frag: Fragment) {
// assumes self is the body of plus
frag.outlst_to_state(self.head);
self.outlst_to_state(frag.head);
self.outlst = frag.outlst;
}
pub fn head(&self) -> Link {
self.head
}
pub fn outlst(self) -> Vec<*mut State> {
self.outlst
}
pub fn show(&self) {
unsafe {
(*self.head).show("");
}
}
pub fn show_outlst(&self) {
unsafe {
for out in &self.outlst {
println!("{}", (**out).key.to_str());
}
}
}
pub fn accept_state(&self) -> Option<Link> {
unsafe {
if self.outlst.len() == 1 {
if (*self.outlst[0]).key == Accept {
Some(self.outlst[0])
}
else {
None
}
}
else {
None
}
}
}
}
impl ASTNode {
pub fn to_fragment(&self) -> Fragment {
match self {
| Self::Terminal(c) => {
if *c != '.' {
Fragment::new(Unique(*c))
}
else {
Fragment::new(Dot)
}
},
| Self::NonTerminal {sym, children} =>
match *sym {
| "CONCAT" =>
{
let mut new_frag = children[0].to_fragment();
new_frag.concatenate(children[1].to_fragment());
new_frag
},
| "UNION" =>
{
let mut new_frag = Fragment::new(Epsilon);
new_frag.alternate(children[0].to_fragment(),
children[2].to_fragment());
new_frag
},
| "Q" =>
{
let mut new_frag = Fragment::new(Epsilon);
new_frag.qm(children[0].to_fragment());
new_frag
},
| "STAR" =>
{
let mut new_frag = Fragment::new(Epsilon);
new_frag.star(children[0].to_fragment());
new_frag
},
| "PLUS" =>
{
let mut new_frag = children[0].to_fragment();
new_frag.plus(Fragment::new(Epsilon));
new_frag
},
| "DIGIT" => Fragment::new(Digit),
| "NON-DIGIT" => Fragment::new(NonDigit),
| "LETTER" => Fragment::new(Letter),
| "NON-LETTER" => Fragment::new(NonLetter),
| "WHITESPACE" => Fragment::new(Whitespace),
| "NON-WHITESPACE" => Fragment::new(NonWhitespace),
| "SPECIAL" | "EXPR" => children[1].to_fragment(),
| "LITERAL-DOT" => Fragment::new(Unique('.')),
| not_reachable =>
{
// unimplemented!();
unreachable!("{}", not_reachable);
}
}
}
}
}
pub struct Simulation {
current: Vec<Link>,
next: Vec<Link>,
head: Link,
accept: Link
}
impl Simulation {
pub fn new(head_frag: &Fragment) -> Self {
let mut current = Vec::new();
Simulation::add_to_next(head_frag.head(), &mut current);
Simulation {
current: current,
next: Vec::new(),
accept: head_frag.accept_state().unwrap(),
head: head_frag.head(),
}
}
fn add_to_next(node: Link, next_lst: &mut Vec<Link>) {
unsafe {
// don't add to list if is already in it, or is null
if !node.is_null() {
if !next_lst.contains(&node) {
if (*node).key == Epsilon {
// add next states if current node has epsilon connections
Simulation::add_to_next((*node).out1, next_lst);
Simulation::add_to_next((*node).out2, next_lst);
}
else {
// add node to list
next_lst.push(node);
}
}
}
}
}
pub fn reset(&mut self) {
self.current.clear();
Simulation::add_to_next(self.head, &mut self.current);
self.next.clear();
}
pub fn next(&mut self, elem: char) {
unsafe {
for node in &self.current {
if (**node).key.compare(elem) {
Simulation::add_to_next((**node).out1, &mut self.next);
Simulation::add_to_next((**node).out2, &mut self.next);
}
}
}
}
pub fn step(&mut self, c: char) {
self.next(c);
self.current.clear();
mem::swap(&mut self.current, &mut self.next);
}
pub fn head(&self) -> Link {
self.head
}
pub fn accepts(&self) -> bool {
self.current.contains(&self.accept)
}
pub fn show(&self, message: &str) {
println!("{}", message);
println!("Current States");
unsafe {
for node in &self.current {
println!("{}", (**node).key.to_str());
}
}
println!("\n");
}
pub fn show_nfa(&self) {
unsafe {(*self.head).show("")};
}
pub fn stuck(&self) -> bool {
self.current.is_empty()
}
}
pub fn free(state: Link) {
// takes raw pointers to box
unsafe {
let mut states_lst = Vec::new();
get_states(state, &mut states_lst);
for s in states_lst {
let _ = Box::from_raw(s);
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::cfg::parse_regex;
#[test]
fn literal_match() {
let regex= "rusty";
let result= parse_regex(regex);
assert!(result.is_some());
let new_ast = result.unwrap().collapse();
let mut head_frag = new_ast.to_fragment();
head_frag.concatenate(Fragment::new(Accept));
assert!(!head_frag.head().is_null());
let mut sim = Simulation::new(&head_frag);
for c in "rusty".chars() {
sim.step(c);
}
assert!(sim.accepts());
free(head_frag.head());
}
}
\ No newline at end of file
use std::thread;
use std::sync::{mpsc, Arc, Mutex};
use std::time::Duration;
type ReceiverArc = Arc<Mutex<mpsc::Receiver<Job>>>;
type Job = Box<dyn FnOnce() + Send + 'static>;
pub struct Threadpool {
// a vector of threads whose closures return unit function
workers: Vec<Worker>,
// a channel sender that sends jobs to workers
sender: Option<mpsc::Sender<Job>>,
}
impl Threadpool {
pub fn new(n: usize) -> Self {
assert!(n > 0);
let (tx, rx) = mpsc::channel();
let receiver: ReceiverArc = Arc::new(Mutex::new(rx));
let mut workers = Vec::with_capacity(n);
for i in 0..n {
workers.push(Worker::new(i, Arc::clone(&receiver)));
}
Threadpool {workers: workers, sender: Some(tx)}
}
pub fn execute<F>(&self, f: F)
// function only called once
where
F: FnOnce() + Send + 'static,
{
let job = Box::new(f);
self.sender.as_ref().unwrap().send(job).unwrap();
}
}
impl Drop for Threadpool {
fn drop(&mut self) {
// no more jobs, threads will receive error
drop(self.sender.take());
for worker in &mut self.workers {
if let Some(thread) = worker.thread.take() {
thread.join().unwrap();
}
}
}
}
struct Worker {
id: usize,
// a thread that loops and runs a job with lock
thread: Option<thread::JoinHandle<()>>,
}
impl Worker {
fn new(id: usize, rx: ReceiverArc) -> Self {
let thread = thread::spawn(move || loop {
let message = rx.lock().unwrap().recv();
// unlock receiver
match message {
Ok(job) => {
job();
}
// recv operation fail implies no more messages will be sent
// break out of infinite loop
Err(_) => {
break;
},
}
});
Worker {id: id, thread: Some(thread)}
}
}
#[derive(Eq, Hash, PartialEq, Clone)]
pub enum Elem {
Epsilon,
Dot,
Digit,
Letter,
Whitespace,
NonDigit,
NonLetter,
NonWhitespace,
Unique(char),
Accept,
Empty,
}
impl Elem {
pub fn compare(&self, c: char) -> bool {
match self {
| Elem::Epsilon => false,
| Elem::Dot =>
{
for d in ' '..='~' {
if d == c {return true;}
}
if c == '\t' {true}
else {false}
},
| Elem::Digit =>
{
for d in '0'..='9' {
if d == c {return true;}
};
false
},
| Elem::NonDigit => !Elem::Digit.compare(c),
| Elem::Letter =>
{
for l in 'A'..='Z' {
if l == c {return true;}
};
for l in 'a'..='z' {
if l == c {return true;}
};
false
},
| Elem::NonLetter => !Elem::Letter.compare(c),
| Elem::Whitespace => (c == ' ' ) | (c == '\t'),
| Elem::NonWhitespace => !Elem::Whitespace.compare(c),
| Elem::Unique(a) => *a == c,
| Elem::Accept => false,
| Elem::Empty => false,
}
}
pub fn to_str(&self) -> String {
match self {
| Elem::Epsilon => String::from("epsilon"),
| Elem::Dot => String::from("Dot"),
| Elem::Digit => String::from("digit"),
| Elem::NonDigit => String::from("non-digit"),
| Elem::Letter => String::from("letter"),
| Elem::NonLetter => String::from("non-letter"),
| Elem::Whitespace => String::from("whitespace"),
| Elem::NonWhitespace => String::from("non-whitespace"),
| Elem::Unique(a) => a.to_string(),
| Elem::Accept => String::from("accept"),
| Elem::Empty => String::from(""),
}
}
}
\ No newline at end of file
{"rustc_fingerprint":17910920460381246071,"outputs":{"15729799797837862367":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.dylib\nlib___.dylib\nlib___.a\nlib___.dylib\n/Users/rodri/.rustup/toolchains/stable-aarch64-apple-darwin\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_arch=\"aarch64\"\ntarget_endian=\"little\"\ntarget_env=\"\"\ntarget_family=\"unix\"\ntarget_feature=\"aes\"\ntarget_feature=\"crc\"\ntarget_feature=\"dit\"\ntarget_feature=\"dotprod\"\ntarget_feature=\"dpb\"\ntarget_feature=\"dpb2\"\ntarget_feature=\"fcma\"\ntarget_feature=\"fhm\"\ntarget_feature=\"flagm\"\ntarget_feature=\"fp16\"\ntarget_feature=\"frintts\"\ntarget_feature=\"jsconv\"\ntarget_feature=\"lor\"\ntarget_feature=\"lse\"\ntarget_feature=\"neon\"\ntarget_feature=\"paca\"\ntarget_feature=\"pacg\"\ntarget_feature=\"pan\"\ntarget_feature=\"pmuv3\"\ntarget_feature=\"ras\"\ntarget_feature=\"rcpc\"\ntarget_feature=\"rcpc2\"\ntarget_feature=\"rdm\"\ntarget_feature=\"sb\"\ntarget_feature=\"sha2\"\ntarget_feature=\"sha3\"\ntarget_feature=\"ssbs\"\ntarget_feature=\"vh\"\ntarget_has_atomic=\"128\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"macos\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"apple\"\nunix\n","stderr":""},"4614504638168534921":{"success":true,"status":"","code":0,"stdout":"rustc 1.77.2 (25ef9e3d8 2024-04-09)\nbinary: rustc\ncommit-hash: 25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04\ncommit-date: 2024-04-09\nhost: aarch64-apple-darwin\nrelease: 1.77.2\nLLVM version: 17.0.6\n","stderr":""}},"successes":{}}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment