From c98550907d00bf5e3c7c24f71ed08dae53887a27 Mon Sep 17 00:00:00 2001 From: Nixon Enraght-Moony Date: Wed, 29 Dec 2021 16:49:49 +0000 Subject: [PATCH] Basic parser --- .gitignore | 5 +++ Cargo.toml | 11 ++++++ hello.scm | 11 ++++++ src/main.rs | 45 +++++++++++++++++++++ src/parser.rs | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 179 insertions(+) create mode 100644 Cargo.toml create mode 100644 hello.scm create mode 100644 src/main.rs create mode 100644 src/parser.rs diff --git a/.gitignore b/.gitignore index f5503f1..8c95310 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,8 @@ Cargo.lock *.scm#* .#*.scm + + +# Added by cargo + +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..dee7742 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "handball" +version = "0.1.0" +edition = "2021" + +[features] + +[dependencies] +anyhow = "1.0.52" +fs-err = "2.6.0" +logos = "0.12.0" diff --git a/hello.scm b/hello.scm new file mode 100644 index 0000000..11d8dd0 --- /dev/null +++ b/hello.scm @@ -0,0 +1,11 @@ +; https://github.com/munificent/craftinginterpreters/blob/master/test/closure/closed_closure_in_function.lox + +(define f #f) + +((lambda () + (define local 1) + (define (f_) + (display local)) + (set! f f_))) + +(f) diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..9b78de7 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,45 @@ +mod parser; + +use fs_err as fs; + +#[derive(Debug, PartialEq, logos::Logos)] +enum Kind { + #[token("(")] + Lparen, + + #[token(")")] + Rparen, + + #[regex(r"[A-Za-z0-9!$%&*+-./:<=>?@^_~]+")] + // Most literals are floats, and can be seperated later + Symbol, + + #[regex(r"#(f|t)")] + Boolean, + + #[regex(r#""(\\.|[^"\\])*""#)] + String, + + #[error] + #[regex(r"[ \t\n\f\r]+", logos::skip)] + #[regex(r";.*", logos::skip)] + Error, +} + +fn main() -> anyhow::Result<()> { + let arg = std::env::args() + .nth(1) + .ok_or_else(|| anyhow::anyhow!("Useage: handball sources.scm"))?; + + let src = fs::read_to_string(&arg)?; + + dbg!(parser::parse(&src)); + + Ok(()) +} + +#[derive(Debug)] +enum Ast<'s> { + Tree(Vec>), + Leaf(&'s str), +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..7fae83d --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,107 @@ +use std::iter::Peekable; + +use logos::Logos; + +use crate::{Ast, Kind}; + +type Error = String; + +struct Parser<'s> { + lex: Peekable>, + errors: Vec, +} + +pub(crate) fn parse(s: &str) -> (Vec, Vec) { + let lex = Lexer { + lex: Kind::lexer(s), + } + .peekable(); + + let mut parser = Parser { + lex, + errors: vec![], + }; + + let mut trees = Vec::new(); + + while parser.lex.peek().is_some() { + if let Some(t) = parser.parse() { + trees.push(t); + } + } + + (trees, parser.errors) +} + +impl<'s> Parser<'s> { + fn parse(&mut self) -> Option> { + let Token { kind, span, src } = self.next()?; + + // match tok { + // Token::Lparen => todo!(), + // Token::Rparen => { + // self.errors.push(format!("Unexpected lparen ")); + // self.parse() + // } + // Token::Symbol => Some(Ast::Leaf(self.lex.str())), + // Token::String => todo!(), + // Token::Error => todo!(), + // } + Some(match kind { + Kind::Lparen => { + let mut bits = Vec::new(); + + while self.lex.peek()?.kind != Kind::Rparen { + bits.push(self.parse()?); + } + + let rp = self.next()?; + assert_eq!(rp.kind, Kind::Rparen); + + Ast::Tree(bits) + } + Kind::Rparen => { + self.errors.push(format!("Unexpected `)` at {:?}", span)); + // Keep going from the next token + return self.parse(); + } + Kind::Symbol | Kind::String | Kind::Boolean => Ast::Leaf(src), + Kind::Error => unreachable!("Removed by Parser::next"), + }) + } + + fn next(&mut self) -> Option> { + for tok in &mut self.lex { + if tok.kind == crate::Kind::Error { + self.errors + .push(format!("Bad token at {:?}: `{}`", tok.span, tok.src)) + } else { + return Some(tok); + } + } + self.errors.push("Unexpected EOF".to_owned()); + None + } +} + +struct Lexer<'s> { + lex: logos::Lexer<'s, crate::Kind>, +} + +impl<'s> Iterator for Lexer<'s> { + type Item = Token<'s>; + + fn next(&mut self) -> Option { + self.lex.next().map(|kind| Token { + kind, + span: self.lex.span(), + src: self.lex.slice(), + }) + } +} + +struct Token<'s> { + kind: Kind, + span: logos::Span, + src: &'s str, +}