Commit df9dd0cd authored by Ethan Ordentlich's avatar Ethan Ordentlich
Browse files

Initial commit

parents
No related merge requests found
Showing with 373 additions and 0 deletions
+373 -0
package cfg;
import cfg.CFG.Symbol;
import java.util.Objects;
public class ASTNode {
private final boolean isTerminal;
private final ASTNode[] children;
private final Symbol symbol;
public ASTNode(Symbol symbol, ASTNode[] children, boolean isTerminal) {
this.symbol = symbol;
this.children = children;
this.isTerminal = isTerminal;
}
public ASTNode(Symbol symbol, ASTNode[] children) {
this(symbol, children, false);
}
public ASTNode(Symbol symbol) {
this(symbol, null, true);
}
public String toString() {
if (this.isTerminal) {
return this.symbol.toString();
} else {
StringBuilder s = new StringBuilder();
for (ASTNode child : this.getChildren()) {
s.append(child.toString());
}
return s.toString();
}
}
public boolean isTerminal() {
return this.isTerminal;
}
public ASTNode[] getChildren() {
return this.children;
}
public String getValue() {
return this.symbol.toString();
}
public int numChildren() {
return this.children == null ? 0 : this.children.length;
}
public ASTNode getLeftChild() {
return this.children[0];
}
public ASTNode getRightChild() {
return this.children[this.children.length - 1];
}
/**
* Collapse the parse tree.
*
* @return a tree where no nonterminal node has a single child
*/
public ASTNode collapse() {
if (this.children == null || this.isTerminal)
return this;
// Collapse nodes with a single child
if (children.length == 1)
return children[0].collapse();
for (int i = 0; i < children.length; i++) {
children[i] = children[i].collapse();
}
return this;
}
}
package cfg;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public class CFG {
public static class Symbol {
protected String value;
public String getValue() {
return this.value;
}
@Override
public boolean equals(Object obj) {
// Subclasses are considered not equal even if the value is the same
if (obj == null || this.getClass() != obj.getClass()) {
return false;
}
return value.equals(((Symbol) obj).value);
}
@Override
public int hashCode() {
// Subclasses should hash differently
return Objects.hash(value, getClass());
}
@Override
public String toString() {
return this.value;
}
}
public static class Terminal extends Symbol {
public Terminal(String v) {
// Use multiple terminals for longer strings
assert v.length() == 1;
this.value = v;
}
}
public static class Nonterminal extends Symbol {
public Nonterminal(String v) {
this.value = v;
}
}
public final Nonterminal start;
private final Map<Nonterminal, List<Symbol[]>> ruleMap;
public CFG(Nonterminal nt) {
this.start = nt;
this.ruleMap = new HashMap<>();
}
public CFG(String string) {
this(nt(string));
}
public List<Symbol[]> getRules(Nonterminal symbol) {
return Collections.unmodifiableList(ruleMap.getOrDefault(symbol, List.of()));
}
/**
* Add a production to the grammar of the form lhs -> rhs
*
* @param lhs nonterminal on the LHS of the production
* @param rhs Symbols on the RHS of the production, may be either terminal or nonterminal
*/
public void addRule(Nonterminal lhs, Symbol... rhs) {
this.ruleMap.putIfAbsent(lhs, new ArrayList<>());
this.ruleMap.get(lhs).add(rhs);
}
// Various ergonomic ways of adding multiple rules for a single LHS
/**
* Add multiple productions for a single LHS to the grammar
*
* @param lhs nonterminal on the LHS of the production
* @param rhss Iterable of RHS's
*/
public void addRules(Nonterminal lhs, Iterable<Symbol[]> rhss) {
rhss.forEach(rhs -> addRule(lhs, rhs));
}
/**
* Add multiple productions for a single LHS to the grammar
*
* @param lhs nonterminal on the LHS of the production
* @param rhss array of RHS's
*/
public void addRules(Nonterminal lhs, Symbol[]... rhss) {
Arrays.stream(rhss).forEach(rhs -> addRule(lhs, rhs));
}
// Shorthand constructors
public static Terminal t(String v) {
return new Terminal(v);
}
public static Nonterminal nt(String v) {
return new Nonterminal(v);
}
}
package cfg;
import java.util.ArrayDeque;
import java.util.Optional;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Stream;
import cfg.CFG.Nonterminal;
import cfg.CFG.Symbol;
import cfg.CFG.Terminal;
public class EarleyParser {
/**
* Parse the given input string according to the given grammar
*
* @param input string to parse
* @param grammar CFG to parse with
* @return an ASTNode representing the parse tree, or null if the input cannot be produced by the grammar
*/
public static ASTNode parse(String input, CFG grammar) {
@SuppressWarnings("unchecked")
SortedSet<EarleyState>[] mem = Stream.generate(TreeSet::new).limit(input.length() + 1)
.toArray(TreeSet[]::new);
for (Symbol[] rhs : grammar.getRules(grammar.start)) {
mem[0].add(new EarleyState(grammar.start, rhs, 0));
}
for (int i = 0; i <= input.length(); i++) {
ArrayDeque<EarleyState> q = new ArrayDeque<>(mem[i]);
while (!q.isEmpty()) {
EarleyState currState = q.poll();
if (!currState.isDone()) {
Symbol nextRHSSymbol = currState.nextSymbol();
if (nextRHSSymbol instanceof Nonterminal) {
// Predict
Nonterminal nt = (Nonterminal) nextRHSSymbol;
for (Symbol[] rhs : grammar.getRules(nt)) {
EarleyState newState = new EarleyState(nt, rhs, i);
if (mem[i].add(newState)) {
newState.leftParent = currState;
q.add(newState);
}
}
} else {
// Scan
Terminal t = (Terminal) nextRHSSymbol;
if (i < input.length() && t.value.equals(String.valueOf(input.charAt(i)))) {
EarleyState newState = currState.advance();
if (mem[i + 1].add(newState)) {
newState.leftParent = currState;
}
}
}
} else {
// Complete
for (EarleyState state : mem[currState.startIdx]) {
if (!state.isDone() && state.nextSymbol().equals(currState.lhs)) {
EarleyState newState = state.advance();
if (mem[i].add(newState)) {
newState.leftParent = state;
newState.rightParent = currState;
q.add(newState);
}
}
}
}
}
}
Optional<EarleyState> end = mem[input.length()].stream()
.filter((s) -> s.lhs.equals(grammar.start) && s.startIdx == 0 && s.isDone()).findFirst();
return end.map(EarleyParser::generateParseTree).orElse(null);
}
public static ASTNode generateParseTree(EarleyState state) {
EarleyState iter = state;
ASTNode[] children = new ASTNode[state.rhs.length];
for (int i = state.rhs.length - 1; i >= 0; i--) {
Symbol s = state.rhs[i];
if (s instanceof Nonterminal) {
children[i] = generateParseTree(iter.rightParent);
} else {
children[i] = new ASTNode(s);
}
iter = iter.leftParent;
}
return new ASTNode(state.lhs, children);
}
}
package cfg;
import java.util.Arrays;
import java.util.Objects;
import cfg.CFG.Nonterminal;
import cfg.CFG.Symbol;
public class EarleyState implements Comparable<EarleyState> {
public Nonterminal lhs;
public Symbol[] rhs;
public int rhsIdx;
public int startIdx;
public EarleyState leftParent;
public EarleyState rightParent;
public EarleyState(Nonterminal lhs, Symbol[] rhs, int rhsIdx, int startIdx) {
this.lhs = lhs;
this.rhs = rhs;
this.rhsIdx = rhsIdx;
this.startIdx = startIdx;
}
public EarleyState(Nonterminal lhs, Symbol[] rhs, int startIdx) {
this(lhs, rhs, 0, startIdx);
}
@Override
public int hashCode() {
return Objects.hash(lhs, Arrays.hashCode(rhs), rhsIdx, startIdx);
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof EarleyState))
return false;
return compareTo((EarleyState) obj) == 0;
}
public int compareTo(EarleyState x) {
// Order rules by state first
if (startIdx + rhsIdx != x.startIdx + x.rhsIdx) {
return (startIdx + rhsIdx) - (x.startIdx + x.rhsIdx);
}
// Completed rules come first
if (isDone() != x.isDone()) {
return (rhs.length - rhsIdx) - (x.rhs.length - x.rhsIdx);
}
// \shrug, give up and arbitrary-compare at this point
return toString().compareTo(x.toString());
}
public EarleyState advance() {
return new EarleyState(this.lhs, this.rhs, this.rhsIdx + 1, this.startIdx);
}
public boolean isDone() {
return this.rhsIdx == this.rhs.length;
}
public Symbol nextSymbol() {
return this.rhs[this.rhsIdx];
}
@Override
public String toString() {
StringBuilder res = new StringBuilder(lhs.toString());
res.append(" ->");
for (int i = 0; i < rhs.length; i++) {
if (i == rhsIdx) {
res.append(" .");
}
res.append(" ").append(rhs[i].toString());
}
res.append(" ").append(startIdx);
return res.toString();
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment