This commit is contained in:
Arne Keller 2021-01-27 10:03:03 +01:00
parent 8700a83390
commit 8b61d32838
12 changed files with 439 additions and 1 deletions

View File

@ -0,0 +1,114 @@
package edu.kit.typicalc.model.parser;
import edu.kit.typicalc.model.parser.Token.TokenType;
import edu.kit.typicalc.util.Result;
/**
* This class lexes a term given as String into tokens.
* Tokens are lexed one by one as requested by the parser.
*/
public class LambdaLexer {
/**
* The given term as a String
*/
private final String term;
/**
* current position in the term
*/
private int pos = 0;
/**
* Constructs a lexer that lexes the given term
* @param term the term to lex
*/
public LambdaLexer(String term) {
this.term = term;
}
/**
* Advances the current char to the next char in the term.
*/
private void advance() {
pos += 1;
}
/**
* Lexes and returns the next token.
* @return the next token
*/
public Result<Token, ParseError> nextToken() {
while (pos < term.length() && Character.isWhitespace(term.charAt(pos))) {
advance();
}
if (pos >= term.length()) {
// term ended, return EOF
return new Result<>(new Token(TokenType.EOF, "", pos));
}
Token t;
char c = term.charAt(pos);
switch (c) {
// bunch of single-character tokens
case '.':
t = new Token(TokenType.DOT, ".", pos);
advance();
return new Result<>(t);
case '(':
t = new Token(TokenType.LP, "(", pos);
advance();
return new Result<>(t);
case ')':
t = new Token(TokenType.RP, ")", pos);
advance();
return new Result<>(t);
case '=':
t = new Token(TokenType.EQ, "=", pos);
advance();
return new Result<>(t);
case '\\':
case 'λ':
t = new Token(TokenType.LAMBDA, c+"", pos);
advance();
return new Result<>(t);
default:
if (Character.isLetter(c)) {
// identifier
StringBuilder sb = new StringBuilder();
do {
sb.append(term.charAt(pos));
advance();
} while (pos < term.length() && Character.isLetterOrDigit(term.charAt(pos)));
String s = sb.toString();
TokenType type;
switch (s) {
case "let":
type = TokenType.LET;
break;
case "in":
type = TokenType.IN;
break;
case "true":
type = TokenType.TRUE;
break;
case "false":
type = TokenType.FALSE;
break;
default:
type = TokenType.VARIABLE;
break;
}
return new Result<>(new Token(type, sb.toString(), pos));
} else if (Character.isDigit(c)) {
// number literal
StringBuilder sb = new StringBuilder();
do {
sb.append(term.charAt(pos));
advance();
} while (pos < term.length() && Character.isDigit(term.charAt(pos)));
return new Result<>(new Token(TokenType.NUMBER, sb.toString(), pos));
} else {
//throw new ParseException("Illegal character '" + term.charAt(pos) + "'");
return new Result<>(null, ParseError.UNEXPECTED_CHARACTER);
}
}
}
}

View File

@ -0,0 +1,171 @@
package edu.kit.typicalc.model.parser;
import edu.kit.typicalc.model.parser.Token.TokenType;
import edu.kit.typicalc.model.term.AbsTerm;
import edu.kit.typicalc.model.term.AppTerm;
import edu.kit.typicalc.model.term.BooleanTerm;
import edu.kit.typicalc.model.term.IntegerTerm;
import edu.kit.typicalc.model.term.LambdaTerm;
import edu.kit.typicalc.model.term.LetTerm;
import edu.kit.typicalc.model.term.VarTerm;
import edu.kit.typicalc.util.Result;
import java.util.EnumSet;
import java.util.Optional;
import java.util.Set;
public class LambdaParser {
/**
* lexer to translate a String into tokens
*/
private final LambdaLexer lexer;
/**
* Next token to use while parsing.
* The following invariant holds:
* When calling a parseX method, token is the first token of X
* (as opposed to the last token of the previous construct).
*/
private Token token;
private static final Set<TokenType> atomStartTokens
= EnumSet.of(TokenType.VARIABLE, TokenType.NUMBER, TokenType.TRUE,
TokenType.FALSE, TokenType.LP);
/**
* Constructs a parser with the specified String
* @param term String to parse
*/
public LambdaParser(String term) {
this.lexer = new LambdaLexer(term);
nextToken();
}
/**
* Sets token to the next available token.
*/
private Optional<ParseError> nextToken() {
Result<Token, ParseError> nextToken = lexer.nextToken();
if (nextToken.isError()) {
return Optional.of(nextToken.unwrapError());
}
token = nextToken.unwrap();
return Optional.empty();
}
/**
* Checks that the token type of current token matches the token type given as parameter.
* If successful, returns that token and advances to the next token.
* Returns false otherwise.
* @param type the token type to compare the current token type to
*/
private boolean expect(TokenType type) {
TokenType current = token.getType();
nextToken(); // TODO: Fehlerbehandlung
return current == type;
}
/**
* Parses the String given in the constructor as a term.
* @return the term given by the String
*/
public Result<LambdaTerm, ParseError> parse() {
Result<LambdaTerm, ParseError> t = parseTerm();
if (!expect(TokenType.EOF)) {
return new Result<>(null, ParseError.TOO_MANY_TOKENS);
}
return t;
}
/**
* Parses a term.
* @return the term, or an error
*/
private Result<LambdaTerm, ParseError> parseTerm() {
switch (token.getType()) {
case LAMBDA:
Result<AbsTerm, ParseError> abs = parseAbstraction();
return new Result<>(abs.unwrap(), abs.unwrapError());
case LET:
Result<LetTerm, ParseError> let = parseLet();
return new Result<>(let.unwrap(), let.unwrapError());
default:
return parseApplication();
}
}
private Result<AbsTerm, ParseError> parseAbstraction() {
nextToken();
Result<VarTerm, ParseError> var = parseVar();
if (!expect(TokenType.DOT)) {
// TODO
}
Result<LambdaTerm, ParseError> body = parseTerm();
// TODO: Fehlerbehandlung
return new Result(new AbsTerm(var.unwrap(), body.unwrap()));
}
/**
* Parses an application or constructs of higher precedence.
* @return the term, or an error
*/
private Result<LambdaTerm, ParseError> parseApplication() {
LambdaTerm left = parseAtom().unwrap(); // TODO: Fehlerbehandlung
while (atomStartTokens.contains(token.getType())) {
LambdaTerm atom = parseAtom().unwrap(); // TODO: Fehlerbehandlung
left = new AppTerm(left, atom);
}
return new Result<>(left);
}
private Result<LetTerm, ParseError> parseLet() {
// TODO: Fehlerbehandlung
expect(TokenType.LET);
VarTerm var = parseVar().unwrap();
expect(TokenType.EQ);
LambdaTerm def = parseTerm().unwrap();
expect(TokenType.IN);
LambdaTerm body = parseTerm().unwrap();
return new Result<>(new LetTerm(var, def, body));
}
/**
* Parses an atom (variable or number) or a parenthesised expression.
* @return the term
*/
private Result<LambdaTerm, ParseError> parseAtom() {
switch (token.getType()) {
case VARIABLE:
Result<VarTerm, ParseError> var = parseVar();
return new Result<>(var.unwrap(), var.unwrapError());
case NUMBER:
String number = token.getText();
int n;
try {
n = Integer.parseInt(number);
} catch (NumberFormatException e) {
return new Result<>(null, ParseError.UNEXPECTED_CHARACTER);
}
nextToken();
return new Result<>(new IntegerTerm(n));
case TRUE:
case FALSE:
String boolText = token.getText();
boolean b = Boolean.parseBoolean(boolText);
nextToken();
return new Result<>(new BooleanTerm(b));
default:
expect(TokenType.LP);
Result<LambdaTerm, ParseError> term = parseTerm();
expect(TokenType.RP);
return term;
}
}
private Result<VarTerm, ParseError> parseVar() {
String s = token.getText();
if (!expect(TokenType.VARIABLE)) {
return new Result<>(null, ParseError.UNEXPECTED_TOKEN);
}
return new Result<>(new VarTerm(s));
}
}

View File

@ -0,0 +1,7 @@
package edu.kit.typicalc.model.parser;
public enum ParseError {
UNEXPECTED_TOKEN,
TOO_MANY_TOKENS,
UNEXPECTED_CHARACTER
}

View File

@ -0,0 +1,78 @@
package edu.kit.typicalc.model.parser;
/**
* A token of the Prolog language.
*/
public class Token {
/**
* Used to distinguish what kind of token we have.
* Most of them stand for exactly one character.
* VARIABLE and NUMBER have a regular expression associated with them.
* EOF is a special token to indicate that the end of file is reached.
*/
enum TokenType {
LAMBDA, // λ or a backslash
VARIABLE, // [a-z][a-zA-Z0-9]* except "let" or "in" or constants
LET, // let
IN, // in
TRUE, // true
FALSE, // false
NUMBER, // [0-9]+
LP, // (
RP, // )
DOT, // .
EQ, // =
EOF // pseudo token if end of file is reached
}
/**
* token type of this Token
*/
private final TokenType type;
/**
* the text of this token in the source code
*/
private final String text;
private final int pos;
/**
* Constructs a token.
* @param type the token type
* @param text text of this token in the source code
* @param pos position this token begins
*/
public Token(TokenType type, String text, int pos) {
this.type = type;
this.text = text;
this.pos = pos;
}
/**
* Returns the token type
* @return token type
*/
public TokenType getType() {
return type;
}
/**
* Returns the text of this token in the source code
* @return text of this token in the source code
*/
public String getText() {
return text;
}
/**
* Returns the position this token is in
* @return position this token is in
*/
public int getPos() {
return pos;
}
@Override
public String toString() {
return type + "(\"" + text + "\")";
}
}

View File

@ -0,0 +1,7 @@
package edu.kit.typicalc.model.term;
public class AbsTerm extends LambdaTerm {
public AbsTerm(VarTerm var, LambdaTerm body) {
// TODO
}
}

View File

@ -0,0 +1,6 @@
package edu.kit.typicalc.model.term;
public class AppTerm extends LambdaTerm {
public AppTerm(LambdaTerm left, LambdaTerm atom) {
}
}

View File

@ -0,0 +1,7 @@
package edu.kit.typicalc.model.term;
public class BooleanTerm extends ConstTerm {
public BooleanTerm(boolean value) {
// TODO
}
}

View File

@ -0,0 +1,4 @@
package edu.kit.typicalc.model.term;
public class ConstTerm extends LambdaTerm {
}

View File

@ -0,0 +1,7 @@
package edu.kit.typicalc.model.term;
public class IntegerTerm extends ConstTerm {
public IntegerTerm(int value) {
// TODO
}
}

View File

@ -0,0 +1,6 @@
package edu.kit.typicalc.model.term;
public class LetTerm extends LambdaTerm {
public LetTerm(VarTerm var, LambdaTerm def, LambdaTerm body) {
}
}

View File

@ -1,4 +1,7 @@
package edu.kit.typicalc.model.term;
public class VarTerm {
public class VarTerm extends LambdaTerm {
public VarTerm(String s) {
super();
}
}

View File

@ -0,0 +1,28 @@
package edu.kit.typicalc.util;
public class Result<T, E> {
private final T value;
private final E error;
public Result(T value) {
this.value = value;
this.error = null;
}
public Result(T value, E error) { // TODO: Java does not allow both constructors otherwise
this.value = value;
this.error = error;
}
public boolean isError() {
return error != null;
}
public T unwrap() {
return value;
}
public E unwrapError() {
return error;
}
}