mirror of
https://gitlab.kit.edu/uskyk/typicalc.git
synced 2024-11-08 10:20:41 +00:00
Parser
This commit is contained in:
parent
8700a83390
commit
8b61d32838
114
src/main/java/edu/kit/typicalc/model/parser/LambdaLexer.java
Normal file
114
src/main/java/edu/kit/typicalc/model/parser/LambdaLexer.java
Normal file
@ -0,0 +1,114 @@
|
||||
package edu.kit.typicalc.model.parser;
|
||||
|
||||
import edu.kit.typicalc.model.parser.Token.TokenType;
|
||||
import edu.kit.typicalc.util.Result;
|
||||
|
||||
/**
|
||||
* This class lexes a term given as String into tokens.
|
||||
* Tokens are lexed one by one as requested by the parser.
|
||||
*/
|
||||
public class LambdaLexer {
|
||||
/**
|
||||
* The given term as a String
|
||||
*/
|
||||
private final String term;
|
||||
/**
|
||||
* current position in the term
|
||||
*/
|
||||
private int pos = 0;
|
||||
|
||||
/**
|
||||
* Constructs a lexer that lexes the given term
|
||||
* @param term the term to lex
|
||||
*/
|
||||
public LambdaLexer(String term) {
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances the current char to the next char in the term.
|
||||
*/
|
||||
private void advance() {
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lexes and returns the next token.
|
||||
* @return the next token
|
||||
*/
|
||||
public Result<Token, ParseError> nextToken() {
|
||||
while (pos < term.length() && Character.isWhitespace(term.charAt(pos))) {
|
||||
advance();
|
||||
}
|
||||
if (pos >= term.length()) {
|
||||
// term ended, return EOF
|
||||
return new Result<>(new Token(TokenType.EOF, "", pos));
|
||||
}
|
||||
Token t;
|
||||
char c = term.charAt(pos);
|
||||
switch (c) {
|
||||
// bunch of single-character tokens
|
||||
case '.':
|
||||
t = new Token(TokenType.DOT, ".", pos);
|
||||
advance();
|
||||
return new Result<>(t);
|
||||
case '(':
|
||||
t = new Token(TokenType.LP, "(", pos);
|
||||
advance();
|
||||
return new Result<>(t);
|
||||
case ')':
|
||||
t = new Token(TokenType.RP, ")", pos);
|
||||
advance();
|
||||
return new Result<>(t);
|
||||
case '=':
|
||||
t = new Token(TokenType.EQ, "=", pos);
|
||||
advance();
|
||||
return new Result<>(t);
|
||||
case '\\':
|
||||
case 'λ':
|
||||
t = new Token(TokenType.LAMBDA, c+"", pos);
|
||||
advance();
|
||||
return new Result<>(t);
|
||||
default:
|
||||
if (Character.isLetter(c)) {
|
||||
// identifier
|
||||
StringBuilder sb = new StringBuilder();
|
||||
do {
|
||||
sb.append(term.charAt(pos));
|
||||
advance();
|
||||
} while (pos < term.length() && Character.isLetterOrDigit(term.charAt(pos)));
|
||||
String s = sb.toString();
|
||||
TokenType type;
|
||||
switch (s) {
|
||||
case "let":
|
||||
type = TokenType.LET;
|
||||
break;
|
||||
case "in":
|
||||
type = TokenType.IN;
|
||||
break;
|
||||
case "true":
|
||||
type = TokenType.TRUE;
|
||||
break;
|
||||
case "false":
|
||||
type = TokenType.FALSE;
|
||||
break;
|
||||
default:
|
||||
type = TokenType.VARIABLE;
|
||||
break;
|
||||
}
|
||||
return new Result<>(new Token(type, sb.toString(), pos));
|
||||
} else if (Character.isDigit(c)) {
|
||||
// number literal
|
||||
StringBuilder sb = new StringBuilder();
|
||||
do {
|
||||
sb.append(term.charAt(pos));
|
||||
advance();
|
||||
} while (pos < term.length() && Character.isDigit(term.charAt(pos)));
|
||||
return new Result<>(new Token(TokenType.NUMBER, sb.toString(), pos));
|
||||
} else {
|
||||
//throw new ParseException("Illegal character '" + term.charAt(pos) + "'");
|
||||
return new Result<>(null, ParseError.UNEXPECTED_CHARACTER);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
171
src/main/java/edu/kit/typicalc/model/parser/LambdaParser.java
Normal file
171
src/main/java/edu/kit/typicalc/model/parser/LambdaParser.java
Normal file
@ -0,0 +1,171 @@
|
||||
package edu.kit.typicalc.model.parser;
|
||||
|
||||
import edu.kit.typicalc.model.parser.Token.TokenType;
|
||||
import edu.kit.typicalc.model.term.AbsTerm;
|
||||
import edu.kit.typicalc.model.term.AppTerm;
|
||||
import edu.kit.typicalc.model.term.BooleanTerm;
|
||||
import edu.kit.typicalc.model.term.IntegerTerm;
|
||||
import edu.kit.typicalc.model.term.LambdaTerm;
|
||||
import edu.kit.typicalc.model.term.LetTerm;
|
||||
import edu.kit.typicalc.model.term.VarTerm;
|
||||
import edu.kit.typicalc.util.Result;
|
||||
|
||||
import java.util.EnumSet;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
public class LambdaParser {
|
||||
/**
|
||||
* lexer to translate a String into tokens
|
||||
*/
|
||||
private final LambdaLexer lexer;
|
||||
/**
|
||||
* Next token to use while parsing.
|
||||
* The following invariant holds:
|
||||
* When calling a parseX method, token is the first token of X
|
||||
* (as opposed to the last token of the previous construct).
|
||||
*/
|
||||
private Token token;
|
||||
|
||||
private static final Set<TokenType> atomStartTokens
|
||||
= EnumSet.of(TokenType.VARIABLE, TokenType.NUMBER, TokenType.TRUE,
|
||||
TokenType.FALSE, TokenType.LP);
|
||||
|
||||
/**
|
||||
* Constructs a parser with the specified String
|
||||
* @param term String to parse
|
||||
*/
|
||||
public LambdaParser(String term) {
|
||||
this.lexer = new LambdaLexer(term);
|
||||
nextToken();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets token to the next available token.
|
||||
*/
|
||||
private Optional<ParseError> nextToken() {
|
||||
Result<Token, ParseError> nextToken = lexer.nextToken();
|
||||
if (nextToken.isError()) {
|
||||
return Optional.of(nextToken.unwrapError());
|
||||
}
|
||||
token = nextToken.unwrap();
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the token type of current token matches the token type given as parameter.
|
||||
* If successful, returns that token and advances to the next token.
|
||||
* Returns false otherwise.
|
||||
* @param type the token type to compare the current token type to
|
||||
*/
|
||||
private boolean expect(TokenType type) {
|
||||
TokenType current = token.getType();
|
||||
nextToken(); // TODO: Fehlerbehandlung
|
||||
return current == type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the String given in the constructor as a term.
|
||||
* @return the term given by the String
|
||||
*/
|
||||
public Result<LambdaTerm, ParseError> parse() {
|
||||
Result<LambdaTerm, ParseError> t = parseTerm();
|
||||
if (!expect(TokenType.EOF)) {
|
||||
return new Result<>(null, ParseError.TOO_MANY_TOKENS);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a term.
|
||||
* @return the term, or an error
|
||||
*/
|
||||
private Result<LambdaTerm, ParseError> parseTerm() {
|
||||
switch (token.getType()) {
|
||||
case LAMBDA:
|
||||
Result<AbsTerm, ParseError> abs = parseAbstraction();
|
||||
return new Result<>(abs.unwrap(), abs.unwrapError());
|
||||
case LET:
|
||||
Result<LetTerm, ParseError> let = parseLet();
|
||||
return new Result<>(let.unwrap(), let.unwrapError());
|
||||
default:
|
||||
return parseApplication();
|
||||
}
|
||||
}
|
||||
|
||||
private Result<AbsTerm, ParseError> parseAbstraction() {
|
||||
nextToken();
|
||||
Result<VarTerm, ParseError> var = parseVar();
|
||||
if (!expect(TokenType.DOT)) {
|
||||
// TODO
|
||||
}
|
||||
Result<LambdaTerm, ParseError> body = parseTerm();
|
||||
// TODO: Fehlerbehandlung
|
||||
return new Result(new AbsTerm(var.unwrap(), body.unwrap()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an application or constructs of higher precedence.
|
||||
* @return the term, or an error
|
||||
*/
|
||||
private Result<LambdaTerm, ParseError> parseApplication() {
|
||||
LambdaTerm left = parseAtom().unwrap(); // TODO: Fehlerbehandlung
|
||||
while (atomStartTokens.contains(token.getType())) {
|
||||
LambdaTerm atom = parseAtom().unwrap(); // TODO: Fehlerbehandlung
|
||||
left = new AppTerm(left, atom);
|
||||
}
|
||||
return new Result<>(left);
|
||||
}
|
||||
|
||||
private Result<LetTerm, ParseError> parseLet() {
|
||||
// TODO: Fehlerbehandlung
|
||||
expect(TokenType.LET);
|
||||
VarTerm var = parseVar().unwrap();
|
||||
expect(TokenType.EQ);
|
||||
LambdaTerm def = parseTerm().unwrap();
|
||||
expect(TokenType.IN);
|
||||
LambdaTerm body = parseTerm().unwrap();
|
||||
return new Result<>(new LetTerm(var, def, body));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an atom (variable or number) or a parenthesised expression.
|
||||
* @return the term
|
||||
*/
|
||||
private Result<LambdaTerm, ParseError> parseAtom() {
|
||||
switch (token.getType()) {
|
||||
case VARIABLE:
|
||||
Result<VarTerm, ParseError> var = parseVar();
|
||||
return new Result<>(var.unwrap(), var.unwrapError());
|
||||
case NUMBER:
|
||||
String number = token.getText();
|
||||
int n;
|
||||
try {
|
||||
n = Integer.parseInt(number);
|
||||
} catch (NumberFormatException e) {
|
||||
return new Result<>(null, ParseError.UNEXPECTED_CHARACTER);
|
||||
}
|
||||
nextToken();
|
||||
return new Result<>(new IntegerTerm(n));
|
||||
case TRUE:
|
||||
case FALSE:
|
||||
String boolText = token.getText();
|
||||
boolean b = Boolean.parseBoolean(boolText);
|
||||
nextToken();
|
||||
return new Result<>(new BooleanTerm(b));
|
||||
default:
|
||||
expect(TokenType.LP);
|
||||
Result<LambdaTerm, ParseError> term = parseTerm();
|
||||
expect(TokenType.RP);
|
||||
return term;
|
||||
}
|
||||
}
|
||||
|
||||
private Result<VarTerm, ParseError> parseVar() {
|
||||
String s = token.getText();
|
||||
if (!expect(TokenType.VARIABLE)) {
|
||||
return new Result<>(null, ParseError.UNEXPECTED_TOKEN);
|
||||
}
|
||||
return new Result<>(new VarTerm(s));
|
||||
}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package edu.kit.typicalc.model.parser;
|
||||
|
||||
public enum ParseError {
|
||||
UNEXPECTED_TOKEN,
|
||||
TOO_MANY_TOKENS,
|
||||
UNEXPECTED_CHARACTER
|
||||
}
|
78
src/main/java/edu/kit/typicalc/model/parser/Token.java
Normal file
78
src/main/java/edu/kit/typicalc/model/parser/Token.java
Normal file
@ -0,0 +1,78 @@
|
||||
package edu.kit.typicalc.model.parser;
|
||||
|
||||
/**
|
||||
* A token of the Prolog language.
|
||||
*/
|
||||
public class Token {
|
||||
/**
|
||||
* Used to distinguish what kind of token we have.
|
||||
* Most of them stand for exactly one character.
|
||||
* VARIABLE and NUMBER have a regular expression associated with them.
|
||||
* EOF is a special token to indicate that the end of file is reached.
|
||||
*/
|
||||
enum TokenType {
|
||||
LAMBDA, // λ or a backslash
|
||||
VARIABLE, // [a-z][a-zA-Z0-9]* except "let" or "in" or constants
|
||||
LET, // let
|
||||
IN, // in
|
||||
TRUE, // true
|
||||
FALSE, // false
|
||||
NUMBER, // [0-9]+
|
||||
LP, // (
|
||||
RP, // )
|
||||
DOT, // .
|
||||
EQ, // =
|
||||
EOF // pseudo token if end of file is reached
|
||||
}
|
||||
|
||||
/**
|
||||
* token type of this Token
|
||||
*/
|
||||
private final TokenType type;
|
||||
/**
|
||||
* the text of this token in the source code
|
||||
*/
|
||||
private final String text;
|
||||
private final int pos;
|
||||
|
||||
/**
|
||||
* Constructs a token.
|
||||
* @param type the token type
|
||||
* @param text text of this token in the source code
|
||||
* @param pos position this token begins
|
||||
*/
|
||||
public Token(TokenType type, String text, int pos) {
|
||||
this.type = type;
|
||||
this.text = text;
|
||||
this.pos = pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the token type
|
||||
* @return token type
|
||||
*/
|
||||
public TokenType getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the text of this token in the source code
|
||||
* @return text of this token in the source code
|
||||
*/
|
||||
public String getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the position this token is in
|
||||
* @return position this token is in
|
||||
*/
|
||||
public int getPos() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return type + "(\"" + text + "\")";
|
||||
}
|
||||
}
|
7
src/main/java/edu/kit/typicalc/model/term/AbsTerm.java
Normal file
7
src/main/java/edu/kit/typicalc/model/term/AbsTerm.java
Normal file
@ -0,0 +1,7 @@
|
||||
package edu.kit.typicalc.model.term;
|
||||
|
||||
public class AbsTerm extends LambdaTerm {
|
||||
public AbsTerm(VarTerm var, LambdaTerm body) {
|
||||
// TODO
|
||||
}
|
||||
}
|
6
src/main/java/edu/kit/typicalc/model/term/AppTerm.java
Normal file
6
src/main/java/edu/kit/typicalc/model/term/AppTerm.java
Normal file
@ -0,0 +1,6 @@
|
||||
package edu.kit.typicalc.model.term;
|
||||
|
||||
public class AppTerm extends LambdaTerm {
|
||||
public AppTerm(LambdaTerm left, LambdaTerm atom) {
|
||||
}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package edu.kit.typicalc.model.term;
|
||||
|
||||
public class BooleanTerm extends ConstTerm {
|
||||
public BooleanTerm(boolean value) {
|
||||
// TODO
|
||||
}
|
||||
}
|
4
src/main/java/edu/kit/typicalc/model/term/ConstTerm.java
Normal file
4
src/main/java/edu/kit/typicalc/model/term/ConstTerm.java
Normal file
@ -0,0 +1,4 @@
|
||||
package edu.kit.typicalc.model.term;
|
||||
|
||||
public class ConstTerm extends LambdaTerm {
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package edu.kit.typicalc.model.term;
|
||||
|
||||
public class IntegerTerm extends ConstTerm {
|
||||
public IntegerTerm(int value) {
|
||||
// TODO
|
||||
}
|
||||
}
|
6
src/main/java/edu/kit/typicalc/model/term/LetTerm.java
Normal file
6
src/main/java/edu/kit/typicalc/model/term/LetTerm.java
Normal file
@ -0,0 +1,6 @@
|
||||
package edu.kit.typicalc.model.term;
|
||||
|
||||
public class LetTerm extends LambdaTerm {
|
||||
public LetTerm(VarTerm var, LambdaTerm def, LambdaTerm body) {
|
||||
}
|
||||
}
|
@ -1,4 +1,7 @@
|
||||
package edu.kit.typicalc.model.term;
|
||||
|
||||
public class VarTerm {
|
||||
public class VarTerm extends LambdaTerm {
|
||||
public VarTerm(String s) {
|
||||
super();
|
||||
}
|
||||
}
|
||||
|
28
src/main/java/edu/kit/typicalc/util/Result.java
Normal file
28
src/main/java/edu/kit/typicalc/util/Result.java
Normal file
@ -0,0 +1,28 @@
|
||||
package edu.kit.typicalc.util;
|
||||
|
||||
public class Result<T, E> {
|
||||
private final T value;
|
||||
private final E error;
|
||||
|
||||
public Result(T value) {
|
||||
this.value = value;
|
||||
this.error = null;
|
||||
}
|
||||
|
||||
public Result(T value, E error) { // TODO: Java does not allow both constructors otherwise
|
||||
this.value = value;
|
||||
this.error = error;
|
||||
}
|
||||
|
||||
public boolean isError() {
|
||||
return error != null;
|
||||
}
|
||||
|
||||
public T unwrap() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public E unwrapError() {
|
||||
return error;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user