From bd6a57220d6643223952419a68faea6d9bc0725a Mon Sep 17 00:00:00 2001 From: Julian Hurst Date: Tue, 17 Mar 2026 17:52:34 +0100 Subject: Initial parser/interp and test file --- cmd/hs/.hs.ha.swp | Bin 0 -> 12288 bytes cmd/hs/hs.ha | 91 ++++++++++ internal/interpreter/.interpreter.ha.swp | Bin 0 -> 12288 bytes internal/interpreter/interpreter.ha | 56 ++++++ internal/parser/.parser.ha.swp | Bin 0 -> 20480 bytes internal/parser/parser.ha | 293 +++++++++++++++++++++++++++++++ test.ra | 2 + 7 files changed, 442 insertions(+) create mode 100644 cmd/hs/.hs.ha.swp create mode 100644 cmd/hs/hs.ha create mode 100644 internal/interpreter/.interpreter.ha.swp create mode 100644 internal/interpreter/interpreter.ha create mode 100644 internal/parser/.parser.ha.swp create mode 100644 internal/parser/parser.ha create mode 100644 test.ra diff --git a/cmd/hs/.hs.ha.swp b/cmd/hs/.hs.ha.swp new file mode 100644 index 0000000..7e6d875 Binary files /dev/null and b/cmd/hs/.hs.ha.swp differ diff --git a/cmd/hs/hs.ha b/cmd/hs/hs.ha new file mode 100644 index 0000000..758ad62 --- /dev/null +++ b/cmd/hs/hs.ha @@ -0,0 +1,91 @@ +use fmt; +use internal::parser; +use internal::interpreter; +use io; +use os; + +export fn main() void = { + let f = os::open("test.ra")!; + defer io::close(f)!; + let p = parser::parser { + h = f, + state = parser::tokentype::START, + unreadbuf = ['0'...], + unreadcount = 0z, + }; + let ast = parser::parse(&p); + let ast = match (ast) { + case let a: parser::ast => + yield a; + case let e: parser::error => + fmt::fatal(parser::strerror(e)); + }; + walkast(ast); + let it = interpreter::interpreter { + vars = [], + }; + + interpreter::interpret(&it, ast); + + let ast = parser::parse(&p); + let ast = match (ast) { + case let a: parser::ast => + yield a; + case let e: parser::error => + fmt::fatal(parser::strerror(e)); + }; + interpreter::interpret(&it, ast); + + for (let var .. it.vars) { + fmt::printfln("{}: {}", var.0, var.1)!; + }; + + //let tk = parsetoken(&p); + //fmt::printfln("tk value: {}\ntk type: {}", tk.value, parser::strtktype(tk.tktype))!; + //tk = parsetoken(&p); + //fmt::printfln("tk value: {}\ntk type: {}", tk.value, parser::strtktype(tk.tktype))!; +}; + +fn walkast(ast: parser::ast) void = { + match (ast.value) { + case parser::assign => + fmt::println("ASSIGN")!; + case let v: parser::varname => + fmt::printfln("VARNAME: {}", v)!; + case let v: parser::operation => + fmt::print("OPERATION: ")!; + for (let arg .. v.stack) { + match (arg) { + case let i: int => + fmt::printf("{} ", i)!; + case let o: parser::operator => + switch (o) { + case parser::operator::ADD => + fmt::print("+ ")!; + case parser::operator::SUBTRACT => + fmt::print("- ")!; + case parser::operator::MULTIPLY => + fmt::print("* ")!; + case parser::operator::DIVIDE => + fmt::print("/ ")!; + }; + }; + }; + fmt::println()!; + case => + abort(); + }; + for (let c .. ast.children) { + walkast(c); + }; +}; + +//fn parsetoken(p: *parser::parser) parser::token = { +// let tk = match (parser::parsetoken(p)) { +// case let tk: parser::token => +// yield tk; +// case let e: parser::error => +// fmt::fatal(parser::strerror(e)); +// }; +// return tk; +//}; diff --git a/internal/interpreter/.interpreter.ha.swp b/internal/interpreter/.interpreter.ha.swp new file mode 100644 index 0000000..76a76c9 Binary files /dev/null and b/internal/interpreter/.interpreter.ha.swp differ diff --git a/internal/interpreter/interpreter.ha b/internal/interpreter/interpreter.ha new file mode 100644 index 0000000..5e92461 --- /dev/null +++ b/internal/interpreter/interpreter.ha @@ -0,0 +1,56 @@ +use internal::parser; +use strconv; +use strings; + +export type variable = (str, str); +export type interpreter = struct { + vars: []variable, +}; + +export type error = !(interperror | nomem); +export type interperror = !str; + +export fn interpret(it: *interpreter, tree: parser::ast) void = { + match (tree.value) { + case parser::assign => + assign(it, tree.children)!; + }; +}; + +fn assign(it: *interpreter, nodes: []parser::ast) (void | error) = { + const varname = nodes[0].value as parser::varname; + const operation = nodes[1].value as parser::operation; + + let value = 0; + + let stack: []int = []; + + for (let i = 0z; i < len(operation.stack); i += 1) { + const arg = operation.stack[i]; + match (arg) { + case let i: int => + append(stack, i)?; + case let o: parser::operator => + if (len(stack) >= 2) { + let last = len(stack) - 1; + let val = switch (o) { + case parser::operator::ADD => + yield stack[last] + stack[last-1]; + case parser::operator::SUBTRACT => + yield stack[last] - stack[last-1]; + case parser::operator::MULTIPLY => + yield stack[last] * stack[last-1]; + case parser::operator::DIVIDE => + yield stack[last] / stack[last-1]; + }; + delete(stack[last-1..]); + append(stack, val)?; + } else { + return "Invalid operation": interperror; + }; + }; + }; + + let last = len(stack) - 1; + append(it.vars, (varname, strings::dup(strconv::itos(stack[last]))?))?; +}; diff --git a/internal/parser/.parser.ha.swp b/internal/parser/.parser.ha.swp new file mode 100644 index 0000000..e2d9062 Binary files /dev/null and b/internal/parser/.parser.ha.swp differ diff --git a/internal/parser/parser.ha b/internal/parser/parser.ha new file mode 100644 index 0000000..682cb35 --- /dev/null +++ b/internal/parser/parser.ha @@ -0,0 +1,293 @@ +use memio; +use bufio; +use encoding::utf8; +use io; +use strings; +use strconv; +use ascii; +use fmt; + +//export type token = struct { +// tktype: tokentype, +// value: str, +//}; + +export type token = (varname | assign | operation); + +export type assign = rune; + +export type varname = str; + +export type argument = (int | operator); +export type operator = enum { + ADD, + SUBTRACT, + MULTIPLY, + DIVIDE, +}; +export type operation = struct { + stack: []argument, +}; + +export type tokentype = enum { + START, + VARNAME, + OPERAND, + OPERATOR, + OPERATION, + LITERAL, + ASSIGN, +}; + +export type parseerror = !(size, size, str); + +export type error = !(parseerror | io::error | utf8::invalid | strconv::invalid | strconv::overflow | nomem); + +export fn strtktype(tktype: tokentype) str = switch (tktype) { +case tokentype::START => yield "START"; +case tokentype::VARNAME => yield "VARNAME"; +case tokentype::OPERAND => yield "OPERAND"; +case tokentype::OPERATOR => yield "OPERATOR"; +case tokentype::OPERATION => yield "OPERATION"; +case tokentype::LITERAL => yield "LITERAL"; +case tokentype::ASSIGN => yield "ASSIGN"; +}; + +//export fn parseast(h: io::handle) (void | io::error) = { +// parsetoken() +//}; + +export type parser = struct { + h: io::handle, + state: tokentype, + unreadbuf: [2]rune, + unreadcount: size, +}; + +export fn strerror(e: error) str = { + static let b: [256]u8 = @undefined; + return match (e) { + case let e: parseerror => + yield fmt::bsprintf(b, "Error on col {} line {}: {}", e.0, e.1, e.2)!; + case let e: io::error => + yield io::strerror(e); + case let e: utf8::invalid => + yield utf8::strerror(e); + case let e: (strconv::invalid | strconv::overflow) => + yield strconv::strerror(e); + case nomem => + yield "No memory left"; + }; +}; + +fn unreadrune(p: *parser, rn: rune) void = { + p.unreadbuf[p.unreadcount] = rn; + p.unreadcount += 1; +}; + +fn read_rune(p: *parser) (rune | io::EOF | error) = { + if (p.unreadcount > 0) { + p.unreadcount -= 1; + return p.unreadbuf[p.unreadcount]; + } else { + return bufio::read_rune(p.h)?; + }; +}; + +fn parsevarname(p: *parser, runecount: size, linenb: size = 0) (varname | error) = { + let m = memio::dynamic(); + defer io::close(&m)!; + + return for (let r => read_rune(p)?) { + runecount += 1; + if (r == '=') { + unreadrune(p, r); + let s = strings::trim(memio::string(&m)?); + break strings::dup(s)?: varname; + }; + if (ascii::isalnum(r)) { + memio::appendrune(&m, r)?; + } else { + break (runecount, linenb, "Character is not alphanumeric"): error; + }; + } else { + return (runecount, linenb, "Syntax error: variable not assigned or used"): error; + }; +}; + +fn parseoperation(p: *parser, runecount: size, linenb: size = 0) (operation | error) = { + static let buf: [256]u8 = @undefined; + let stack: []argument = []; + + let m = memio::dynamic(); + defer io::close(&m)!; + + let checkneg = false; + + for (let r => read_rune(p)?) { + runecount += 1; + if (ascii::isdigit(r)) { + if (checkneg) { + memio::appendrune(&m, '-')?; + checkneg = false; + }; + memio::appendrune(&m, r)?; + } else { + if (checkneg) { + append(stack, operator::SUBTRACT)?; + }; + let s = memio::string(&m)?; + if (s != "") { + fmt::errorfln("Try parse num: {}", s)!; + let i = strconv::stoi(s)?; + append(stack, i)?; + memio::reset(&m); + fmt::errorfln("Parse num: {}", i)!; + }; + switch (r) { + case '+' => + append(stack, operator::ADD)?; + fmt::errorln("Parse add")!; + case '-' => + // could be negative number or subtraction + checkneg = true; + case '*' => + append(stack, operator::MULTIPLY)?; + fmt::errorln("Parse mul")!; + case '/' => + append(stack, operator::DIVIDE)?; + fmt::errorln("Parse div")!; + case ' ' => + fmt::errorln("Parse space")!; + case '\n' => + fmt::errorln("Parse newline")!; + break; + case => + const msg = fmt::bsprintf(buf, "Unexpected char '{}'", r)?; + return (runecount, linenb, msg): error; + }; + }; + }; + for (let arg .. stack) { + match (arg) { + case let i: int => + fmt::errorf("{} ", i)!; + case let o: operator => + switch (o) { + case operator::ADD => + fmt::error("+ ")!; + case operator::SUBTRACT => + fmt::error("- ")!; + case operator::MULTIPLY => + fmt::error("* ")!; + case operator::DIVIDE => + fmt::error("/ ")!; + }; + }; + }; + fmt::errorln()!; + return operation { + stack = stack, + }; +}; + +export type ast = struct { + value: token, + children: []ast, +}; + +export fn parse(p: *parser, linenb: size = 0) (ast | error) = { + let stop = false; + + //let t = parsetoken(p, linenb); + //return parse(p); + + let tree: []ast = []; + let root: ast = @undefined; + + for (!stop) { + let t = parsetoken(p, linenb)?; + match (t) { + case varname => + let node: ast = ast { + value = t, + children = [], + }; + append(tree, node)?; + case assign => + root.value = t; + case operation => + let node: ast = ast { + value = t, + children = [], + }; + append(tree, node)?; + stop = true; + }; + }; + root.children = tree; + return root; +}; + +export fn parsetoken(p: *parser, linenb: size = 0) (token | error) = { + static let buf: [256]u8 = @undefined; + let runecount = 0z; + + return switch (p.state) { + case tokentype::START => + // VARNAME + let t = parsevarname(p, 0z)?; + p.state = tokentype::VARNAME; + yield t; + case tokentype::VARNAME => + // look for ASSIGN + let r = read_rune(p)?; + yield match (r) { + case let r: rune => + yield if (r == '=') { + p.state = tokentype::ASSIGN; + yield '='; + } else { + const msg = fmt::bsprintf(buf, "Missing '=', found '{}' instead", r)?; + yield (runecount, linenb, msg): error; + }; + case io::EOF => + yield (runecount, linenb, "Unexpected EOF"): error; + }; + case tokentype::ASSIGN => + // look for OPERATION + let t = parseoperation(p, 0z)?; + p.state = tokentype::START; + yield t; + case => + yield (runecount, linenb, "Not implemented"): error; + }; + + //return for (let r => read_rune(p)?) { + // fmt::errorln(r)?; + // if (r == '=') { + // if (runecount > 0) { + // unreadrune(p, r); + // let s = memio::string(&m)?; + // break token { + // tktype = tokentype::VARNAME, + // value = strings::dup(s)?, + // }; + // } else { + // break token { + // tktype = tokentype::ASSIGN, + // value = strings::dup("=")?, + // }; + // }; + // } else { + // if (ascii::isalnum(r)) { + // memio::appendrune(&m, r)?; + // } else { + // break (runecount, linenb, "Character is not alphanumeric"): error; + // }; + // }; + // runecount += 1; + //} else { + // return (runecount, linenb, "Syntax error"): error; + //}; +}; diff --git a/test.ra b/test.ra new file mode 100644 index 0000000..93c6ebd --- /dev/null +++ b/test.ra @@ -0,0 +1,2 @@ +a=2 1 + 6 * +b=3 5 * -- cgit v1.2.3