use logos::{Lexer, Logos}; use crate::ParseResult; #[derive(Logos, Debug, PartialEq, Copy, Clone)] pub enum C1Token { #[token("bool")] KwBoolean, #[token("do")] KwDo, #[token("else")] KwElse, #[token("float")] KwFloat, #[token("for")] KwFor, #[token("if")] KwIf, #[token("int")] KwInt, #[token("printf")] KwPrintf, #[token("return")] KwReturn, #[token("void")] KwVoid, #[token("while")] KwWhile, #[token("+")] Plus, #[token("-")] Minus, #[token("*")] Asterisk, #[token("/")] Slash, #[token("=")] /// = Assign, #[token("==")] /// == Equal, #[token("!=")] /// != NotEqual, #[token("<")] /// < Less, #[token(">")] /// > Greater, #[token("<=")] /// <= LessEqual, #[token(">=")] /// >= GreaterEqual, #[token("&&")] /// && And, #[token("||")] /// || Or, #[token(",")] Comma, #[token(";")] Semicolon, #[token("(")] /// ( LeftParenthesis, #[token(")")] /// ) RightParenthesis, #[token("{")] /// { LeftBrace, #[token("}")] /// } RightBrace, #[regex("[0-9]+")] ConstInt, #[regex(r"(\d+\.\d+)|(\.\d+([eE]([-+])?\d+)?)|(\d+[eE]([-+])?\d+)")] ConstFloat, #[regex("true|false")] ConstBoolean, #[regex("\"[^\n\"]*\"")] ConstString, #[regex("[a-zA-Z]+[0-9a-zA-Z]*")] Identifier, #[regex(r"/\*[^\*/]*\*/", logos::skip)] CComment, #[regex("//[^\n]*(\n)?", logos::skip)] CPPComment, // We can also use this variant to define whitespace, // or any other matches we wish to skip. #[regex(r"[ \t\f]+", logos::skip)] Whitespace, #[regex(r"[\n]")] Linebreak, // Logos requires one token variant to handle errors, // it can be named anything you wish. #[error] Error, } /// # Overview /// Extended lexer based on the logos crate. The lexer keeps track of the current token and the next token /// in the lexed text. Furthermore, the lexer keeps track of the line number in which each token is /// located, and of the text associated with each token. /// /// # Examples /// ``` /// use cb_3::C1Lexer; /// use cb_3::C1Token; /// /// let mut lexer = C1Lexer::new("void main() { /// x = 4; /// }"); /// assert_eq!(lexer.current_token(), Some(C1Token::KwVoid)); /// assert_eq!(lexer.current_line_number(), Some(1)); /// assert_eq!(lexer.peek_token(), Some(C1Token::Identifier)); /// assert_eq!(lexer.peek_line_number(), Some(1)); /// /// lexer.eat(); /// // current token is 'main' /// /// lexer.eat(); /// lexer.eat(); /// lexer.eat(); /// // current token is '{' /// /// assert_eq!(lexer.current_token(), Some(C1Token::LeftBrace)); /// assert_eq!(lexer.current_line_number(), Some(1)); /// /// // next token is 'x' /// assert_eq!(lexer.peek_token(), Some(C1Token::Identifier)); /// assert_eq!(lexer.peek_text(), Some("x")); /// assert_eq!(lexer.peek_line_number(), Some(2)); /// ``` pub struct C1Lexer<'a> { logos_lexer: Lexer<'a, C1Token>, logos_line_number: usize, current_token: Option<TokenData<'a>>, peek_token: Option<TokenData<'a>>, } impl<'a> C1Lexer<'a> { /// Initialize a new C1Lexer for the given string slice pub fn new(text: &'a str) -> C1Lexer { let mut lexer = C1Lexer { logos_lexer: C1Token::lexer(text), logos_line_number: 1, current_token: None, peek_token: None, }; lexer.current_token = lexer.next_token(); lexer.peek_token = lexer.next_token(); lexer } /// Return the C1Token variant of the current token without consuming it. /// ``` /// use cb_3::{C1Lexer, C1Token}; /// let lexer = C1Lexer::new("current next"); /// /// assert_eq!(lexer.current_token(), Some(C1Token::Identifier)); /// assert_eq!(lexer.current_text(), Some("current")); /// /// assert_eq!(lexer.current_token(), Some(C1Token::Identifier)); /// assert_eq!(lexer.current_text(), Some("current")); /// ``` pub fn current_token(&self) -> Option<C1Token> { self.current_token.token_type() } /// Return the C1Token variant of the next token without consuming it. ///``` /// use cb_3::{C1Lexer, C1Token}; /// let lexer = C1Lexer::new("current next"); /// /// assert_eq!(lexer.peek_token(), Some(C1Token::Identifier)); /// assert_eq!(lexer.peek_text(), Some("next")); /// /// assert_eq!(lexer.peek_token(), Some(C1Token::Identifier)); /// assert_eq!(lexer.peek_text(), Some("next")); /// ``` pub fn peek_token(&self) -> Option<C1Token> { self.peek_token.token_type() } /// Return the text of the current token pub fn current_text(&self) -> Option<&str> { self.current_token.text() } /// Return the text of the next token pub fn peek_text(&self) -> Option<&str> { self.peek_token.text() } /// Return the line number where the current token is located pub fn current_line_number(&self) -> Option<usize> { self.current_token.line_number() } /// Return the line number where the next token is located pub fn peek_line_number(&self) -> Option<usize> { self.peek_token.line_number() } /// Drop the current token and retrieve the next token in the text. /// ``` /// use cb_3::{C1Lexer, C1Token}; /// let mut lexer = C1Lexer::new("current next last"); /// /// assert_eq!(lexer.current_text(), Some("current")); /// assert_eq!(lexer.peek_text(), Some("next")); /// /// lexer.eat(); /// assert_eq!(lexer.current_text(), Some("next")); /// assert_eq!(lexer.peek_text(), Some("last")); /// /// lexer.eat(); /// assert_eq!(lexer.current_text(), Some("last")); /// assert_eq!(lexer.peek_text(), None); /// /// lexer.eat(); /// assert_eq!(lexer.current_text(), None); /// assert_eq!(lexer.peek_text(), None); /// ``` pub fn eat(&mut self) { self.current_token = self.peek_token.take(); self.peek_token = self.next_token(); } /// Private method for reading the next token from the logos::Lexer and extracting the required data /// from it fn next_token(&mut self) -> Option<TokenData<'a>> { // Retrieve the next token from the internal lexer if let Some(c1_token) = self.logos_lexer.next() { match c1_token { C1Token::Linebreak => { // If the token is a linebreak, increase the line number and get the next token self.logos_line_number += 1; self.next_token() } _ => Some(TokenData { // If the token is not a linebreak, initialize and return a TokenData instance token_type: c1_token, token_text: self.logos_lexer.slice(), token_line: self.logos_line_number, }), } } else { None } } } /// Hidden struct for capsuling the data associated with a token. struct TokenData<'a> { token_type: C1Token, token_text: &'a str, token_line: usize, } /// Hidden trait that makes it possible to implemented the required getter functionality directly for /// Option<TokenData>. trait TokenDataProvider<'a> { /// Return the type of the token, aka. its C1Token variant. fn token_type(&self) -> Option<C1Token>; /// Return the text of the token fn text(&self) -> Option<&str>; /// Return the line number of the token fn line_number(&self) -> Option<usize>; } impl<'a> TokenDataProvider<'a> for Option<TokenData<'a>> { fn token_type(&self) -> Option<C1Token> { self.as_ref().map(|data| data.token_type) } fn text(&self) -> Option<&'a str> { self.as_ref().map(|data| data.token_text) } fn line_number(&self) -> Option<usize> { self.as_ref().map(|data| data.token_line) } } pub struct C1Parser<'a>{ lexer: C1Lexer<'a>, } impl<'a> C1Parser<'a>{ pub fn parse(text: &'a str) -> ParseResult{ let mut parser = C1Parser::new(text); parser.programm(); Result::Ok(()) } pub fn new(text: &'a str) -> C1Parser { let parser = C1Parser { lexer: C1Lexer::new(text), }; parser } fn programm(&mut self) { match self.lexer.current_token() { Some(..) => {self.functiondefinition(); self.programm();}, None => {}, // TODO: was soll am ende gemacht werden }; } fn functiondefinition(&mut self) { match self.lexer.current_token() { Some(C1Token::KwBoolean) => { self.r#type(); self.chek_and_eat(C1Token::Identifier); self.chek_and_eat(C1Token::LeftParenthesis); self.chek_and_eat(C1Token::RightParenthesis); self.chek_and_eat(C1Token::LeftBrace); self.statementlist(); self.chek_and_eat(C1Token::RightBrace); }, Some(C1Token::KwFloat) => { self.r#type(); self.chek_and_eat(C1Token::Identifier); self.chek_and_eat(C1Token::LeftParenthesis); self.chek_and_eat(C1Token::RightParenthesis); self.chek_and_eat(C1Token::LeftBrace); self.statementlist(); self.chek_and_eat(C1Token::RightBrace); }, Some(C1Token::KwInt) => { self.r#type(); self.chek_and_eat(C1Token::Identifier); self.chek_and_eat(C1Token::LeftParenthesis); self.chek_and_eat(C1Token::RightParenthesis); self.chek_and_eat(C1Token::LeftBrace); self.statementlist(); self.chek_and_eat(C1Token::RightBrace); }, Some(C1Token::KwVoid) => { self.r#type(); self.chek_and_eat(C1Token::Identifier); self.chek_and_eat(C1Token::LeftParenthesis); self.chek_and_eat(C1Token::RightParenthesis); self.chek_and_eat(C1Token::LeftBrace); self.statementlist(); self.chek_and_eat(C1Token::RightBrace); }, other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn functioncall(&mut self) { match self.lexer.current_token() { Some(C1Token::Identifier) => { self.eat(); self.chek_and_eat(C1Token::LeftParenthesis); self.chek_and_eat(C1Token::LeftParenthesis) }, other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn statementlist(&mut self) { match self.lexer.current_token() { Some(C1Token::LeftBrace) => {self.block(); self.statementlist()}, Some(C1Token::KwIf) => {self.block(); self.statementlist()}, Some(C1Token::KwReturn) => {self.block(); self.statementlist()}, Some(C1Token::KwPrintf) => {self.block(); self.statementlist()}, Some(C1Token::Identifier) => {self.block(); self.statementlist()}, // TODO: hier fehlt noch die follows menge statementlist ist epsilon ableitbar // vermutlich eifach im other nicht teun und keine fehlerbehandlung // dann die Fehlerbehandlung der nächsten höheren funktion überlassen other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn block(&mut self) { match self.lexer.current_token() { Some(C1Token::LeftBrace) => { self.eat(); self.statementlist(); self.chek_and_eat(C1Token::RightBrace); }, Some(C1Token::KwIf) => self.statement(), Some(C1Token::KwReturn) => self.statement(), Some(C1Token::KwPrintf) => self.statement(), Some(C1Token::Identifier) => self.statement(), other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn statement(&mut self) { match self.lexer.current_token() { Some(C1Token::KwIf) => {self.ifstatement(); self.chek_and_eat(C1Token::Semicolon);}, Some(C1Token::KwReturn) => { self.returnstatement(); self.chek_and_eat(C1Token::Semicolon); }, Some(C1Token::KwPrintf) => {self.printf(); self.chek_and_eat(C1Token::Semicolon); }, Some(C1Token::Identifier) => {match self.lexer.peek_token(){ Some(C1Token::Assign) => self.statassignment(), Some(C1Token::LeftParenthesis) => self.functioncall(), other => {}, // TODO: Fehlerbehandlung hinzufügen }; }, other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn ifstatement(&mut self) { match self.lexer.current_token() { Some(C1Token::KwIf) => {self.eat(); self.chek_and_eat(C1Token::LeftParenthesis); self. assignment(); self.chek_and_eat(C1Token::LeftParenthesis); self.block(); }, other => {}, // TODO:fehlrbehandlng hinzufügen }; } fn returnstatement(&mut self) { match self.lexer.current_token() { Some(C1Token::KwReturn) => {match self.lexer.peek_token() { Some(C1Token::Minus) => self.assignment(), Some(C1Token::ConstInt) => self.assignment(), Some(C1Token::ConstFloat) => self.assignment(), Some(C1Token::ConstBoolean) => self.assignment(), Some(C1Token::LeftParenthesis) => self.assignment(), Some(C1Token::Identifier) => self.assignment(), Some(C1Token::Semicolon) => {}, // ist das richtig das dann nichts getan wird? das semicolon wird ja schon statement gegessen. other => {}, //TODO: Fehlerbehandlung hinzufügen };}, other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn printf(&mut self) { match self.lexer.current_token() { Some(C1Token::KwPrintf) => { self.eat(); self.chek_and_eat(C1Token::LeftParenthesis); self.assignment(); self.chek_and_eat(C1Token::RightParenthesis); }, other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn r#type(&mut self) { match &self.lexer.current_token() { Some(C1Token::KwBoolean) => self.eat(), Some(C1Token::KwFloat) => self.eat(), Some(C1Token::KwInt) => self.eat(), Some(C1Token::KwVoid) => self.eat(), other => {}, // TODO: fehlerbehandlung hinzufügen }; } fn statassignment(&mut self) { match&self.lexer.current_token() { Some(C1Token::Identifier) => { self.eat(); self.chek_and_eat(C1Token::Assign); self.assignment(); }, other => {}, // TODO: fehlerbehandlung hinzufügen }; } fn assignment(&mut self) { match self.lexer.current_token() { Some(C1Token::Identifier) => { match self.lexer.peek_token(){ Some(C1Token::Assign) => { self.eat(); self.eat(); self.assignment(); } other => self.expr(), }; }, Some(C1Token::Minus) => self.expr(), Some(C1Token::ConstInt) => self.expr(), Some(C1Token::ConstFloat) => self.expr(), Some(C1Token::ConstBoolean) => self.expr(), Some(C1Token::LeftParenthesis) => self.expr(), other => {}, // TODO: Fehlerbehandlung hinzufügen } } fn expr(&mut self) { match self.lexer.current_token() { Some(C1Token::Minus) => {self.simpexpr(); self.helpexpr();}, Some(C1Token::ConstInt) => {self.simpexpr(); self.helpexpr();}, Some(C1Token::ConstFloat) => {self.simpexpr(); self.helpexpr();}, Some(C1Token::ConstBoolean) => {self.simpexpr(); self.helpexpr();}, Some(C1Token::LeftParenthesis) => {self.simpexpr(); self.helpexpr();}, Some(C1Token::Identifier) => {self.simpexpr(); self.helpexpr();}, other => {}, // TODO: Fehlerbehandkung hinzufügen }; } fn helpexpr(&mut self) { match self.lexer.current_token() { Some(C1Token::Equal) => {self.eat(); self.simpexpr();}, Some(C1Token::NotEqual) => {self.eat(); self.simpexpr();}, Some(C1Token::LessEqual) => {self.eat(); self.simpexpr();}, Some(C1Token::GreaterEqual) => {self.eat(); self.simpexpr();}, Some(C1Token::Less) => {self.eat(); self.simpexpr();}, Some(C1Token::Greater) => {self.eat(); self.simpexpr();}, // TODO: hier fehlt noch die follows menge helpexpr ist epsilon ableitbar // vermutlich eifach im other nicht teun und keine fehlerbehandlung // dann die Fehlerbehandlung der nächsten höheren funktion überlassen other => {}, // TODO: Fehlerbehanddlung hinzufügen }; } fn simpexpr(&mut self) { match self.lexer.current_token() { Some(C1Token::Minus) => { self.eat(); self.term(); self.helpsimpexpr(); }, Some(C1Token::ConstInt) => {self.term(); self.helpsimpexpr()}, Some(C1Token::ConstFloat) => {self.term(); self.helpsimpexpr()}, Some(C1Token::ConstBoolean) => {self.term(); self.helpsimpexpr()}, Some(C1Token::LeftParenthesis) => {self.term(); self.helpsimpexpr()}, Some(C1Token::Identifier) => {self.term(); self.helpsimpexpr()}, other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn helpsimpexpr(&mut self) { match self.lexer.current_token() { Some(C1Token::Plus) => {self.eat(); self.term(); self.helpsimpexpr();}, Some(C1Token::Minus) => {self.eat(); self.term(); self.helpsimpexpr();}, Some(C1Token::Or) => {self.eat(); self.term(); self.helpsimpexpr();}, // TODO: hier fehlt noch die follows menge helpsimpexpr ist epsilon ableitbar // vermutlich eifach im other nicht teun und keine fehlerbehandlung // dann die Fehlerbehandlung der nächsten höheren funktion überlassen other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn term(&mut self) { match self.lexer.current_token() { Some(C1Token::ConstInt) => {self.factor(); self.helpterm();}, Some(C1Token::ConstFloat) => {self.factor(); self.helpterm();}, Some(C1Token::ConstBoolean) => {self.factor(); self.helpterm();}, Some(C1Token::LeftParenthesis) => {self.factor(); self.helpterm();} Some(C1Token::Identifier) => {self.factor(); self.helpterm();}, other => {}, // TODO: Fehlerbehandlung hinzufügen }; } fn helpterm(&mut self){ match self.lexer.current_token() { Some(C1Token::Asterisk) => {self.eat(); self.factor(); self.helpterm();}, Some(C1Token::Slash) => {self.eat(); self.factor(); self.helpterm();}, Some(C1Token::And) => {self.eat(); self.factor(); self.helpterm();}, // TODO: hier fehlt noch die follows menge helpterm ist epsilon ableitbar // vermutlich eifach im other nicht teun und keine fehlerbehandlung // dann die Fehlerbehandlung der nächsten höheren funktion überlassen other => {}, // TODO: fehlerbehandlung hinzufügen }; } fn factor(&mut self) { match self.lexer.current_token() { Some(C1Token::ConstInt) => self.eat(), Some(C1Token::ConstFloat) => self.eat(), Some(C1Token::ConstBoolean) => self.eat(), Some(C1Token::LeftParenthesis) => { self.eat(); self.assignment(); self.chek_and_eat(C1Token::LeftParenthesis); }, Some(C1Token::Identifier) => { match self.lexer.peek_token() { Some(C1Token::LeftParenthesis) => self.functioncall(), other => self.eat(), }; }, other => {}, // TODO: fehlerbehandlung hinzufügen }; } fn eat(&mut self) { self.lexer.eat(); } fn chek_and_eat(&mut self, token: C1Token){ match self.lexer.current_token.token_type(){ Some(value) => {if value == token {self.eat();} else{}}, None => {}, // TODO: Fehlerbehandlung hinzufügen } } } #[cfg(test)] mod tests { use crate::lexer::C1Lexer; use crate::C1Token; #[test] fn lines_are_counted() { let mut lexer1 = C1Lexer::new("Hello\nTest"); assert_eq!(lexer1.current_line_number(), Some(1)); assert_eq!(lexer1.peek_line_number(), Some(2)); lexer1.eat(); assert_eq!(lexer1.current_line_number(), Some(2)); assert_eq!(lexer1.peek_line_number(), None); lexer1.eat(); assert_eq!(lexer1.current_line_number(), None); assert_eq!(lexer1.peek_line_number(), None); } #[test] fn line_count_is_reset() { { let mut lexer1 = C1Lexer::new("Hello\nTest\nbla\nfoo"); lexer1.eat(); lexer1.eat(); assert_eq!(lexer1.current_line_number(), Some(3)); assert_eq!(lexer1.peek_line_number(), Some(4)); } let lexer2 = C1Lexer::new("bool foo()"); assert_eq!(lexer2.current_line_number(), Some(1)); assert_eq!(lexer2.peek_line_number(), Some(1)); } #[test] fn float_recognition() { let lexer = C1Lexer::new("1.2"); assert_eq!(lexer.current_token(), Some(C1Token::ConstFloat)); let lexer = C1Lexer::new("1.000"); assert_eq!(lexer.current_token(), Some(C1Token::ConstFloat)); let lexer = C1Lexer::new(".2"); assert_eq!(lexer.current_token(), Some(C1Token::ConstFloat)); let lexer = C1Lexer::new("1.2e4"); assert_eq!(lexer.current_token(), Some(C1Token::ConstFloat)); let lexer = C1Lexer::new("1.2e+4"); assert_eq!(lexer.current_token(), Some(C1Token::ConstFloat)); let lexer = C1Lexer::new("1.2e-10"); assert_eq!(lexer.current_token(), Some(C1Token::ConstFloat)); let lexer = C1Lexer::new("1.2E-10"); assert_eq!(lexer.current_token(), Some(C1Token::ConstFloat)); let lexer = C1Lexer::new("33E+2"); assert_eq!(lexer.current_token(), Some(C1Token::ConstFloat)); } }