From 8b61d32838ce2cf0ed130d2b3e8d7720e5a4f417 Mon Sep 17 00:00:00 2001 From: Arne Keller Date: Wed, 27 Jan 2021 10:03:03 +0100 Subject: [PATCH] Parser --- .../typicalc/model/parser/LambdaLexer.java | 114 ++++++++++++ .../typicalc/model/parser/LambdaParser.java | 171 ++++++++++++++++++ .../kit/typicalc/model/parser/ParseError.java | 7 + .../edu/kit/typicalc/model/parser/Token.java | 78 ++++++++ .../edu/kit/typicalc/model/term/AbsTerm.java | 7 + .../edu/kit/typicalc/model/term/AppTerm.java | 6 + .../kit/typicalc/model/term/BooleanTerm.java | 7 + .../kit/typicalc/model/term/ConstTerm.java | 4 + .../kit/typicalc/model/term/IntegerTerm.java | 7 + .../edu/kit/typicalc/model/term/LetTerm.java | 6 + .../edu/kit/typicalc/model/term/VarTerm.java | 5 +- .../java/edu/kit/typicalc/util/Result.java | 28 +++ 12 files changed, 439 insertions(+), 1 deletion(-) create mode 100644 src/main/java/edu/kit/typicalc/model/parser/LambdaLexer.java create mode 100644 src/main/java/edu/kit/typicalc/model/parser/LambdaParser.java create mode 100644 src/main/java/edu/kit/typicalc/model/parser/ParseError.java create mode 100644 src/main/java/edu/kit/typicalc/model/parser/Token.java create mode 100644 src/main/java/edu/kit/typicalc/model/term/AbsTerm.java create mode 100644 src/main/java/edu/kit/typicalc/model/term/AppTerm.java create mode 100644 src/main/java/edu/kit/typicalc/model/term/BooleanTerm.java create mode 100644 src/main/java/edu/kit/typicalc/model/term/ConstTerm.java create mode 100644 src/main/java/edu/kit/typicalc/model/term/IntegerTerm.java create mode 100644 src/main/java/edu/kit/typicalc/model/term/LetTerm.java create mode 100644 src/main/java/edu/kit/typicalc/util/Result.java diff --git a/src/main/java/edu/kit/typicalc/model/parser/LambdaLexer.java b/src/main/java/edu/kit/typicalc/model/parser/LambdaLexer.java new file mode 100644 index 0000000..252c167 --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/parser/LambdaLexer.java @@ -0,0 +1,114 @@ +package edu.kit.typicalc.model.parser; + +import edu.kit.typicalc.model.parser.Token.TokenType; +import edu.kit.typicalc.util.Result; + +/** + * This class lexes a term given as String into tokens. + * Tokens are lexed one by one as requested by the parser. + */ +public class LambdaLexer { + /** + * The given term as a String + */ + private final String term; + /** + * current position in the term + */ + private int pos = 0; + + /** + * Constructs a lexer that lexes the given term + * @param term the term to lex + */ + public LambdaLexer(String term) { + this.term = term; + } + + /** + * Advances the current char to the next char in the term. + */ + private void advance() { + pos += 1; + } + + /** + * Lexes and returns the next token. + * @return the next token + */ + public Result nextToken() { + while (pos < term.length() && Character.isWhitespace(term.charAt(pos))) { + advance(); + } + if (pos >= term.length()) { + // term ended, return EOF + return new Result<>(new Token(TokenType.EOF, "", pos)); + } + Token t; + char c = term.charAt(pos); + switch (c) { + // bunch of single-character tokens + case '.': + t = new Token(TokenType.DOT, ".", pos); + advance(); + return new Result<>(t); + case '(': + t = new Token(TokenType.LP, "(", pos); + advance(); + return new Result<>(t); + case ')': + t = new Token(TokenType.RP, ")", pos); + advance(); + return new Result<>(t); + case '=': + t = new Token(TokenType.EQ, "=", pos); + advance(); + return new Result<>(t); + case '\\': + case 'λ': + t = new Token(TokenType.LAMBDA, c+"", pos); + advance(); + return new Result<>(t); + default: + if (Character.isLetter(c)) { + // identifier + StringBuilder sb = new StringBuilder(); + do { + sb.append(term.charAt(pos)); + advance(); + } while (pos < term.length() && Character.isLetterOrDigit(term.charAt(pos))); + String s = sb.toString(); + TokenType type; + switch (s) { + case "let": + type = TokenType.LET; + break; + case "in": + type = TokenType.IN; + break; + case "true": + type = TokenType.TRUE; + break; + case "false": + type = TokenType.FALSE; + break; + default: + type = TokenType.VARIABLE; + break; + } + return new Result<>(new Token(type, sb.toString(), pos)); + } else if (Character.isDigit(c)) { + // number literal + StringBuilder sb = new StringBuilder(); + do { + sb.append(term.charAt(pos)); + advance(); + } while (pos < term.length() && Character.isDigit(term.charAt(pos))); + return new Result<>(new Token(TokenType.NUMBER, sb.toString(), pos)); + } else { + //throw new ParseException("Illegal character '" + term.charAt(pos) + "'"); + return new Result<>(null, ParseError.UNEXPECTED_CHARACTER); + } + } + } +} \ No newline at end of file diff --git a/src/main/java/edu/kit/typicalc/model/parser/LambdaParser.java b/src/main/java/edu/kit/typicalc/model/parser/LambdaParser.java new file mode 100644 index 0000000..2625ec9 --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/parser/LambdaParser.java @@ -0,0 +1,171 @@ +package edu.kit.typicalc.model.parser; + +import edu.kit.typicalc.model.parser.Token.TokenType; +import edu.kit.typicalc.model.term.AbsTerm; +import edu.kit.typicalc.model.term.AppTerm; +import edu.kit.typicalc.model.term.BooleanTerm; +import edu.kit.typicalc.model.term.IntegerTerm; +import edu.kit.typicalc.model.term.LambdaTerm; +import edu.kit.typicalc.model.term.LetTerm; +import edu.kit.typicalc.model.term.VarTerm; +import edu.kit.typicalc.util.Result; + +import java.util.EnumSet; +import java.util.Optional; +import java.util.Set; + +public class LambdaParser { + /** + * lexer to translate a String into tokens + */ + private final LambdaLexer lexer; + /** + * Next token to use while parsing. + * The following invariant holds: + * When calling a parseX method, token is the first token of X + * (as opposed to the last token of the previous construct). + */ + private Token token; + + private static final Set atomStartTokens + = EnumSet.of(TokenType.VARIABLE, TokenType.NUMBER, TokenType.TRUE, + TokenType.FALSE, TokenType.LP); + + /** + * Constructs a parser with the specified String + * @param term String to parse + */ + public LambdaParser(String term) { + this.lexer = new LambdaLexer(term); + nextToken(); + } + + /** + * Sets token to the next available token. + */ + private Optional nextToken() { + Result nextToken = lexer.nextToken(); + if (nextToken.isError()) { + return Optional.of(nextToken.unwrapError()); + } + token = nextToken.unwrap(); + return Optional.empty(); + } + + /** + * Checks that the token type of current token matches the token type given as parameter. + * If successful, returns that token and advances to the next token. + * Returns false otherwise. + * @param type the token type to compare the current token type to + */ + private boolean expect(TokenType type) { + TokenType current = token.getType(); + nextToken(); // TODO: Fehlerbehandlung + return current == type; + } + + /** + * Parses the String given in the constructor as a term. + * @return the term given by the String + */ + public Result parse() { + Result t = parseTerm(); + if (!expect(TokenType.EOF)) { + return new Result<>(null, ParseError.TOO_MANY_TOKENS); + } + return t; + } + + /** + * Parses a term. + * @return the term, or an error + */ + private Result parseTerm() { + switch (token.getType()) { + case LAMBDA: + Result abs = parseAbstraction(); + return new Result<>(abs.unwrap(), abs.unwrapError()); + case LET: + Result let = parseLet(); + return new Result<>(let.unwrap(), let.unwrapError()); + default: + return parseApplication(); + } + } + + private Result parseAbstraction() { + nextToken(); + Result var = parseVar(); + if (!expect(TokenType.DOT)) { + // TODO + } + Result body = parseTerm(); + // TODO: Fehlerbehandlung + return new Result(new AbsTerm(var.unwrap(), body.unwrap())); + } + + /** + * Parses an application or constructs of higher precedence. + * @return the term, or an error + */ + private Result parseApplication() { + LambdaTerm left = parseAtom().unwrap(); // TODO: Fehlerbehandlung + while (atomStartTokens.contains(token.getType())) { + LambdaTerm atom = parseAtom().unwrap(); // TODO: Fehlerbehandlung + left = new AppTerm(left, atom); + } + return new Result<>(left); + } + + private Result parseLet() { + // TODO: Fehlerbehandlung + expect(TokenType.LET); + VarTerm var = parseVar().unwrap(); + expect(TokenType.EQ); + LambdaTerm def = parseTerm().unwrap(); + expect(TokenType.IN); + LambdaTerm body = parseTerm().unwrap(); + return new Result<>(new LetTerm(var, def, body)); + } + + /** + * Parses an atom (variable or number) or a parenthesised expression. + * @return the term + */ + private Result parseAtom() { + switch (token.getType()) { + case VARIABLE: + Result var = parseVar(); + return new Result<>(var.unwrap(), var.unwrapError()); + case NUMBER: + String number = token.getText(); + int n; + try { + n = Integer.parseInt(number); + } catch (NumberFormatException e) { + return new Result<>(null, ParseError.UNEXPECTED_CHARACTER); + } + nextToken(); + return new Result<>(new IntegerTerm(n)); + case TRUE: + case FALSE: + String boolText = token.getText(); + boolean b = Boolean.parseBoolean(boolText); + nextToken(); + return new Result<>(new BooleanTerm(b)); + default: + expect(TokenType.LP); + Result term = parseTerm(); + expect(TokenType.RP); + return term; + } + } + + private Result parseVar() { + String s = token.getText(); + if (!expect(TokenType.VARIABLE)) { + return new Result<>(null, ParseError.UNEXPECTED_TOKEN); + } + return new Result<>(new VarTerm(s)); + } +} diff --git a/src/main/java/edu/kit/typicalc/model/parser/ParseError.java b/src/main/java/edu/kit/typicalc/model/parser/ParseError.java new file mode 100644 index 0000000..bd75ef0 --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/parser/ParseError.java @@ -0,0 +1,7 @@ +package edu.kit.typicalc.model.parser; + +public enum ParseError { + UNEXPECTED_TOKEN, + TOO_MANY_TOKENS, + UNEXPECTED_CHARACTER +} diff --git a/src/main/java/edu/kit/typicalc/model/parser/Token.java b/src/main/java/edu/kit/typicalc/model/parser/Token.java new file mode 100644 index 0000000..3394e28 --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/parser/Token.java @@ -0,0 +1,78 @@ +package edu.kit.typicalc.model.parser; + +/** + * A token of the Prolog language. + */ +public class Token { + /** + * Used to distinguish what kind of token we have. + * Most of them stand for exactly one character. + * VARIABLE and NUMBER have a regular expression associated with them. + * EOF is a special token to indicate that the end of file is reached. + */ + enum TokenType { + LAMBDA, // λ or a backslash + VARIABLE, // [a-z][a-zA-Z0-9]* except "let" or "in" or constants + LET, // let + IN, // in + TRUE, // true + FALSE, // false + NUMBER, // [0-9]+ + LP, // ( + RP, // ) + DOT, // . + EQ, // = + EOF // pseudo token if end of file is reached + } + + /** + * token type of this Token + */ + private final TokenType type; + /** + * the text of this token in the source code + */ + private final String text; + private final int pos; + + /** + * Constructs a token. + * @param type the token type + * @param text text of this token in the source code + * @param pos position this token begins + */ + public Token(TokenType type, String text, int pos) { + this.type = type; + this.text = text; + this.pos = pos; + } + + /** + * Returns the token type + * @return token type + */ + public TokenType getType() { + return type; + } + + /** + * Returns the text of this token in the source code + * @return text of this token in the source code + */ + public String getText() { + return text; + } + + /** + * Returns the position this token is in + * @return position this token is in + */ + public int getPos() { + return pos; + } + + @Override + public String toString() { + return type + "(\"" + text + "\")"; + } +} diff --git a/src/main/java/edu/kit/typicalc/model/term/AbsTerm.java b/src/main/java/edu/kit/typicalc/model/term/AbsTerm.java new file mode 100644 index 0000000..d14aa44 --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/term/AbsTerm.java @@ -0,0 +1,7 @@ +package edu.kit.typicalc.model.term; + +public class AbsTerm extends LambdaTerm { + public AbsTerm(VarTerm var, LambdaTerm body) { + // TODO + } +} diff --git a/src/main/java/edu/kit/typicalc/model/term/AppTerm.java b/src/main/java/edu/kit/typicalc/model/term/AppTerm.java new file mode 100644 index 0000000..62fe32c --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/term/AppTerm.java @@ -0,0 +1,6 @@ +package edu.kit.typicalc.model.term; + +public class AppTerm extends LambdaTerm { + public AppTerm(LambdaTerm left, LambdaTerm atom) { + } +} diff --git a/src/main/java/edu/kit/typicalc/model/term/BooleanTerm.java b/src/main/java/edu/kit/typicalc/model/term/BooleanTerm.java new file mode 100644 index 0000000..358a722 --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/term/BooleanTerm.java @@ -0,0 +1,7 @@ +package edu.kit.typicalc.model.term; + +public class BooleanTerm extends ConstTerm { + public BooleanTerm(boolean value) { + // TODO + } +} diff --git a/src/main/java/edu/kit/typicalc/model/term/ConstTerm.java b/src/main/java/edu/kit/typicalc/model/term/ConstTerm.java new file mode 100644 index 0000000..694f82f --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/term/ConstTerm.java @@ -0,0 +1,4 @@ +package edu.kit.typicalc.model.term; + +public class ConstTerm extends LambdaTerm { +} diff --git a/src/main/java/edu/kit/typicalc/model/term/IntegerTerm.java b/src/main/java/edu/kit/typicalc/model/term/IntegerTerm.java new file mode 100644 index 0000000..5cb35f8 --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/term/IntegerTerm.java @@ -0,0 +1,7 @@ +package edu.kit.typicalc.model.term; + +public class IntegerTerm extends ConstTerm { + public IntegerTerm(int value) { + // TODO + } +} diff --git a/src/main/java/edu/kit/typicalc/model/term/LetTerm.java b/src/main/java/edu/kit/typicalc/model/term/LetTerm.java new file mode 100644 index 0000000..ad70177 --- /dev/null +++ b/src/main/java/edu/kit/typicalc/model/term/LetTerm.java @@ -0,0 +1,6 @@ +package edu.kit.typicalc.model.term; + +public class LetTerm extends LambdaTerm { + public LetTerm(VarTerm var, LambdaTerm def, LambdaTerm body) { + } +} diff --git a/src/main/java/edu/kit/typicalc/model/term/VarTerm.java b/src/main/java/edu/kit/typicalc/model/term/VarTerm.java index d00ef06..f2254bf 100644 --- a/src/main/java/edu/kit/typicalc/model/term/VarTerm.java +++ b/src/main/java/edu/kit/typicalc/model/term/VarTerm.java @@ -1,4 +1,7 @@ package edu.kit.typicalc.model.term; -public class VarTerm { +public class VarTerm extends LambdaTerm { + public VarTerm(String s) { + super(); + } } diff --git a/src/main/java/edu/kit/typicalc/util/Result.java b/src/main/java/edu/kit/typicalc/util/Result.java new file mode 100644 index 0000000..8cc01c1 --- /dev/null +++ b/src/main/java/edu/kit/typicalc/util/Result.java @@ -0,0 +1,28 @@ +package edu.kit.typicalc.util; + +public class Result { + private final T value; + private final E error; + + public Result(T value) { + this.value = value; + this.error = null; + } + + public Result(T value, E error) { // TODO: Java does not allow both constructors otherwise + this.value = value; + this.error = error; + } + + public boolean isError() { + return error != null; + } + + public T unwrap() { + return value; + } + + public E unwrapError() { + return error; + } +}