diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..91a723255 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +venv/ +*.pyc +test/db/db/*.js diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..932f5c0d8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,37 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-ast + - id: check-yaml + - id: check-toml + - id: check-merge-conflict + - id: mixed-line-ending + - id: check-case-conflict + - repo: https://github.com/hadialqattan/pycln + rev: v2.1.5 + hooks: + - id: pycln + args: [--config=pyproject.toml, src] + types: [file] + types_or: [python, pyi] + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + language_version: python3.11 + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + name: isort (python) + args: ["--profile", "black", "--filter-files"] + +ci: + autofix_commit_msg: "[pre-commit.ci] auto fixes from pre-commit.com hooks" + autofix_prs: true + autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate" + autoupdate_schedule: weekly + submodules: false diff --git a/README.md b/README.md index 7eb743268..8f4ef7d66 100644 --- a/README.md +++ b/README.md @@ -426,6 +426,8 @@ import add! add(3, 2)! ``` +Note that due to the fact that it is impossible to export all functions of a file to all other DreamBerd files in existence, there is __no__ DreamBerd standard library. + By the way, to see DreamBerd in action, check out [this page](https://github.com/TodePond/DreamBerd/blob/main/LICENSE.md). ## Classes diff --git a/built/helper.tsx b/built/helper.tsx new file mode 100644 index 000000000..c42664532 --- /dev/null +++ b/built/helper.tsx @@ -0,0 +1,211 @@ +export class VarState { + name: string + variable: any; + allow_reassign: boolean; + priority: number; + history: Array; + updateCount: number; + onUpdatePromises: Array; + expiry: number + + constructor(name, variable, allow_reassign, priority, lifetime=-1) { + this.name = name + this.variable = variable; + this.allow_reassign = allow_reassign; + this.priority = priority; + this.history = [undefined]; + this.expiry = Infinity + + // Lifetime will be left as default unless infinity or seconds + // Line-based lifetime will be done manually with kill() + if (lifetime != -1) { + if (lifetime === Infinity) { + localStorage.setItem(name, variable); + this.expiry = -1 + } + else { + this.expiry = Date.now() + (1000 * lifetime) + } + } + + this.updateCount = 0; + this.onUpdatePromises = []; + } + + get() { + if (this.dead()) { + this.kill() + } + return this.variable; + } + + dead() { + return this.expiry !== -1 && Date.now() >= this.expiry + } + + kill() { + this.history.push(this.variable) + this.variable = undefined + } + + assign(value, priority) { + if (this.dead()) { + this.kill() + } + if (!this.allow_reassign || priority < this.priority) { + return false; + } else { + this.history.push(this.variable); + if (this.expiry == -1) { + localStorage.setItem(this.name, value); + } + this.variable = value; + this.updateCount++; + + // Check all `next` calls + const resolvedPromises: Array = []; + for (const promise of this.onUpdatePromises) { + if (this.updateCount >= promise.targetCount) { + promise.resolve(this.variable); + resolvedPromises.push(promise); + } + } + + for (const promise of resolvedPromises) { + const index = this.onUpdatePromises.indexOf(promise); + if (index !== -1) { + this.onUpdatePromises.splice(index, 1); + } + } + + return true; + } + } + + previous(prev_iter = 1) { + if (prev_iter > this.history.length) { + console.log(`Soft Error: Attempting to access prehistoric value of ${this.name}.`) + return Math.random() * Number.MAX_VALUE // Approximation of unassigned memory + } + return this.history[this.history.length - prev_iter]; + } + + next(count: number): Promise { + const targetCount = this.updateCount + count; + + if (targetCount <= this.updateCount) { + console.log(`Variable already updated ${count} times. Current value: ${this.get()}`); + return Promise.resolve(this.get()); + } + + return new Promise((resolve) => { + const promise: UpdatePromise = { + targetCount: targetCount, + resolve: resolve, + }; + this.onUpdatePromises.push(promise); + }); + } +} + +export interface UpdatePromise { + targetCount: number; + resolve: Function; +} +/// USED FOR WHEN BLOCKS +class ConditionBlockPair { + condition: () => boolean; + codeBlock: () => void; + + constructor(condition: () => boolean, codeBlock: () => void) { + this.condition = condition; + this.codeBlock = codeBlock; + } +} + +// Define a class to manage the conditions and code blocks +export class ConditionBlockManager { + pairs: ConditionBlockPair[]; + + constructor() { + this.pairs = []; + } + + addPair(condition: () => boolean, codeBlock: () => void) { + const pair = new ConditionBlockPair(condition, codeBlock); + this.pairs.push(pair); + } + + checkConditions() { + this.pairs = this.pairs.filter((pair) => { + if (pair.condition()) { + pair.codeBlock(); + return false; // Remove the pair from the list + } + return true; // Keep the pair in the list + }); + } + + startCheckingRegularly(interval: number) { + setInterval(() => { + this.checkConditions(); + }, interval); + } +} + +export class Scope { + parent?: Scope + variables: Map + + constructor(parent=undefined) { + this.variables = new Map; + this.parent = parent + } + assign(name, value, allow_reassign, priority, lifetime=-1) { + const varState = this.variables.get(name); + + if (varState !== undefined) { + // Update the existing object properties + varState.assign(value, priority); + } + else if (this.parent && this.parent.has_var(name)) { + // Only run if variable already exists + // RECUSRION RECUSRION RECUSRION RECUSRION RECUSRION RECUSRION RECUSRION RECUSRION RECUSRION + this.parent.assign(name, value, allow_reassign, priority, lifetime) + } else { + // Create a new object and store it in the map + this.variables.set(name, new VarState(name, value, allow_reassign, priority, lifetime)); + } + } + + has_var(name) { + if (this.variables.get(name) !== undefined) { + return true + } + else { + return this.parent && this.parent.has_var(name) + } + } + + get_var(name) { + if (this.variables.get(name) !== undefined) { + return this.variables.get(name)!.get() + } + else if (this.parent) { + return this.parent.get_var(name) + } + else { // Will only go here if this scope is an orphan + // Check for infinite lifetime variables + let local_var = localStorage.getItem(name) + if (local_var != null) { + return local_var; + } + + // TODO: 3const server + + + // Return literal value only if all other possibilities are ruled out + return name + } + } +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..157b6a7e2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,70 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "DreamBerd" +version = "0.9.9.9.9.9.9.9.9d" +description = "DreamBerd Compiler" +readme = {file = "README.md", content-type = "text/markdown"} +license = {file = "LICENSE"} +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 5 - Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "Topic :: Utilities", + "Typing :: Typed", +] +keywords = ["dream", "berd", "dreamberd", "compiler"] +dependencies = [ + "requests~=2.31.0", +] + +[project.optional-dependencies] +dev = [ + "mypy~=1.3.0", # must match .pre-commit-config.yaml + "pre-commit", + "pre-commit-hooks~=4.4.0", # must match .pre-commit-config.yaml + "black~=23.3.0", # must match .pre-commit-config.yaml + "isort~=5.12.0", # must match .pre-commit-config.yaml + "pycln~=2.1.3", # must match .pre-commit-config.yaml +] + +[project.urls] +"Homepage" = "https://github.com/TodePond/DreamBerd" +"Source" = "https://github.com/TodePond/DreamBerd" +"Bug Tracker" = "https://github.com/TodePond/DreamBerd/issues" + +[tool.mypy] +mypy_path = "src" +check_untyped_defs = true +disallow_any_generics = true +disallow_untyped_calls = true +disallow_untyped_defs = true +ignore_missing_imports = true +no_implicit_optional = true +no_implicit_reexport = true +show_column_numbers = true +show_error_codes = true +strict = true +strict_equality = true +warn_redundant_casts = true +warn_return_any = true +warn_unreachable = true +warn_unused_configs = true +warn_unused_ignores = true + +[tool.black] +target-version = ['py311'] + +[tool.isort] +profile = "black" +combine_as_imports = true +line_length = 79 +skip = [".git", ".github", ".venv"] + +[tool.pycln] +all = true +disable_all_dunder_policy = true diff --git a/src/README.md b/src/README.md new file mode 100644 index 000000000..c7444834b --- /dev/null +++ b/src/README.md @@ -0,0 +1,8 @@ +**Note:** This code is __under development__ and is not yet functional, let alone efficient. +# Comp-Trans-Piler +Running a perfect programming language requires a perfect compiler to run it. As such, the DreamBerd foundation has devised the next innovation in the field of compiling. The **Comp-Trans-Piler**. + +The Comp-Trans-Piler works by independently compiling DreamBerd while at the same time transpiling it to JavaScript, maximizing efficiency while staying true to DreamBerd. + +While the JavaScript compiler is extensive, it forgives barbaric practices such as loops, while at the same time lacking modern features such as Exact Equalities. As such DreamBerd first ensures that the code abides by the DreamBerd standard before passing it to lesser compilers. + diff --git a/src/joker/tokenizer.py b/src/joker/tokenizer.py new file mode 100644 index 000000000..08f5e24a4 --- /dev/null +++ b/src/joker/tokenizer.py @@ -0,0 +1,350 @@ +import inspect +import locale +import os +import re +from typing import Generator, Iterable, Sequence + +import requests + +operator_tokens = ["INC", "DEC", "ADD", "SUBTRACT", "NOT", "MULTIPLY", "EXPONENT", "DIVIDE", "AND", "OR", + "BIT_AND", "BIT_OR_EX", "BIT_OR_IN", "MODULO", "BIT_SHIFT_LEFT", "BIT_SHIFT_RIGHT_EX", "BIT_SHIFT_RIGHT_0", + "GTHAN", "LTHAN", "GEQUAL", "LEQUAL", "EQUAL", "LOOSE_EQUAL", "PRECISE_EQUAL", "LITERAL_EQUAL", + "ADD_ASSIGN", "SUBTRACT_ASSIGN", "MULT_ASSIGN", "DIV_ASSIGN", "MOD_ASSIGN", "EXP_ASSIGN", "ARROW", + '(', ')', '[', ']', '{', '}', 'NOT_EQUAL'] + +reserved_words = ["!", "IF", 'ELSE', 'TRUE', 'FALSE', 'CONST', 'VAR', 'INFINITY', 'FUNCTION', 'PREVIOUS', + 'NEXT', 'AWAIT', 'EXPORT', 'TO', 'CLASS', 'NEW', 'USE', 'DELETE', 'CURRENCY', 'WHEN', + 'RETURN', 'IMPORT', 'COMMA'] + +internal_tokens = ['INDENT', 'SPACE', 'EOF', 'NEWLINE', 'IDENTIFIER', 'NEW_FILE', 'ERROR'] + +misc_tokens = ['.', ":"] + +tokens = reserved_words + operator_tokens + internal_tokens + misc_tokens + +class Token: + __slots__ = ("token", "lexeme") + def __init__(self, token: str, lexeme: str | int | float) -> None: + global tokens + assert token.upper() in tokens + + self.token = token.upper() + self.lexeme = lexeme + + def __repr__(self) -> str: + return f'{self.token}({repr(self.lexeme)})' + + def __str__(self) -> str: + return f'{self.token}({repr(self.lexeme)})' + + +class SimpleStringCrawler: + __slots__ = ("raw", "cursor") + def __init__(self, raw: str) -> None: + self.raw = raw + self.cursor = 0 + + def pop(self) -> str: + if self.cursor == len(self.raw): + return '' + self.cursor += 1 + return self.raw[self.cursor - 1] + + def back(self, count: int = 1) -> None: + self.cursor -= count + + def peek(self, count: int = 1, ignore_space: bool = False) -> str: + if self.cursor == len(self.raw): + return '' + if ignore_space: + effective_cursor = self.cursor + while self.raw[effective_cursor] in ' \n\t\r': + effective_cursor += 1 + return self.raw[effective_cursor] + return self.raw[self.cursor:self.cursor + count] + + +class Tokenizer: + __slots__ = ("operators", "reserved_chars", "basic_mappings") + def __init__(self) -> None: + self.operators = '+-*/\\<>=()[] \t\n\r%^&|' + self.reserved_chars = '!;:.{},' + self.operators + + self.basic_mappings = { + ';': 'NOT', + '\\': 'DIVIDE', + '.': '.', + '(': '(', + ')': ')', + '[': ']', + '{': '{', + '}': '}', + ":": ':', # bruh + "!": "!", + "^": "BIT_OR_EX", + '<': 'LTHAN', + '>': 'GTHAN', + '%': 'MODULO', + ',': 'COMMA' + } + + locale.setlocale(locale.LC_ALL, '') + regional_currency = str(locale.localeconv()['currency_symbol']) + if regional_currency == '': + # For maximum international accessibility, the generic currency sign is used if there is no currency sign for the given locale + regional_currency = '¤' + self.basic_mappings[regional_currency] = 'CURRENCY' + + def is_fn_subset(self, string: str) -> bool: + # to solve the function syntax I created this regex: + # if it doesn't get exactly one match then the word is invalid + function_regex = r"(?=.)(f{0,1}u{0,1}n{0,1}c{0,1}t{0,1}i{0,1}o{0,1}n{0,1})" + groups = re.findall(function_regex, string, re.IGNORECASE) + # the and is needed because if there is no match an empty string is the resulting group + return len(groups) == 1 and groups[0] + + def getNextToken(self, file: SimpleStringCrawler) -> Token: + def readchar(i: int = 1) -> str: + return ''.join(file.pop() for _ in range(i)) + + c = readchar() + if c == '': + # The file has ended + return Token('EOF', '') + + lexeme = '' + + if c == ' ': + if file.peek(2) == ' ': + c += file.pop() + c += file.pop() + # 3-space indent + return Token('INDENT', c) + else: + print(f" -2 {c}") + return Token('SPACE', c) + + elif c in '+-*/\\<>%;' and file.peek() == '=': + file.pop() + token_map = { + '+': 'ADD_ASSIGN', + '-': 'SUBTRACT_ASSIGN', + '*': 'MULT_ASSIGN', + '/': 'DIV_ASSIGN', + '\\': 'DIV_ASSIGN', + '<': 'LEQUAL', + '>': 'GEQUAL', + '%': 'MOD_ASSIGN', + ';': 'NOT_EQUAL' + } + return Token(token_map[c], c + '=') + # Let it continue if not followed by equal sign + + + elif c in '+-*&|<': + next_char = readchar() + if c == next_char: + token_map = { + '+': 'INC', + '-': 'DEC', + '*': 'EXPONENT', + '&': 'AND', + '|': 'OR', + '<': 'BIT_SHIFT_LEFT' + } + if c == '*' and file.peek() == '=': + return Token('EXP_ASSIGN', '**=') + return Token(token_map[c], c*2) + else: + file.back() + token_map = { + '+': 'ADD', + '-': 'SUBTRACT', + '*': 'MULTIPLY', + '&': 'BIT_AND', + '|': 'BIT_OR_IN', + '<': 'LTHAN' + } + return Token(token_map[c], c) + + elif c == '=': + equals = 0 # while loop will count one over + while c == '=': + c = readchar() + equals += 1 + file.back() # Pushback + if equals == 1: + if c == ">": + # consume the ">" + readchar() + return Token('ARROW', '=>') + return Token('EQUAL', '=') + elif equals == 2: + return Token('LOOSE_EQUAL', '==') + elif equals == 3: + return Token('PRECISE_EQUAL', '===') + elif equals == 4: + return Token('LITERAL_EQUAL', '====') + else: # TODO: File splits (might have to be a preprocessor thing) + return Token('ERROR', 'Too much Equality (max is 4)') + + elif c == '>': + if file.peek() == '>': + file.pop() + if file.peek() == '>': + file.pop() + return Token('BIT_SHIFT_RIGHT_0', '>>>') + else: + return Token('BIT_SHIFT_RIGHT_EX', '>>') + else: + return Token('GTHAN', '>') + + elif c in '\"\'': + quote_format = '' + while c in '\"\'': + quote_format += c + c = file.pop() + + # leave c at the next char, it'll be added to the string + + quote = '' + while c not in '\"\'\n' and c != '': + quote += c + if c == '\\': + if file.peek() in '\"\'': + quote += file.pop() # Character already escaped + c = file.pop() + file.back() + + # check for end quotes + if c == '': + # EOF reached; User probably forgot a closing quote + # Due to ambiguity the rest of the file is now a string + # End quotes are presumed present, thus satisfying AI requirement + # Diagnosis: skill issue + return Token('STRING', quote) + elif c == '\n': + # Line breaks within strings are not allowed, so the string ends here + return Token('STRING', quote) + else: + # If there are end quotes, they must match the quote format exactly + for i in range(len(quote_format)): + c = file.pop() + if c != quote_format[-(i + 1)]: + # Mismatch + return Token('ERROR', 'String quote format mismatched') + + return Token('STRING', quote) + + # Comments or division with the wrong slash + elif c == '/': + if file.peek() == '/': + file.pop() # Get rid of thge next slash + while c not in '\n\r': + c = file.pop() + file.back() + return self.getNextToken(file) # Should capture newline + else: + return Token('DIVIDE', c) + + + elif c in self.basic_mappings.keys(): + return Token(self.basic_mappings[c], c) + + # INT and REAL (really just IDENTIFIERS) + elif c.isdigit(): + while c.isdigit(): + lexeme += c + c = readchar() + file.back() # Pushback + + # c is one character beyond the end + if c == '.': + # REAL + lexeme += '.' + c = readchar() + if c.isdigit(): + while c.isdigit(): + lexeme += c + c = readchar() + elif c not in self.operators: + return Token('ERROR', 'Non-Operator immediately after real; letters are not real') + + file.back() + + return Token('IDENTIFIER', float(lexeme)) + + else: + # INT + return Token('IDENTIFIER', int(lexeme)) + + while not c.isspace() and c not in self.reserved_chars: + lexeme += c + + c = readchar() + + if len(lexeme) > 0: + file.back() + tok = lexeme.upper() + if tok in tokens: + return Token(lexeme, lexeme) + + # Case sensitive for maximum user disgruntlement + if lexeme == 'className': + return Token('CLASS', lexeme) + elif tok == 'CLASSNAME': + # Helpful error message to help insensitive users right their ways + return Token('ERROR', + 'The className keyword is Case-Sensitive, you\'re hurting its feelings you monster') + + # check for function + if self.is_fn_subset(tok): + return Token('FUNCTION', lexeme) + else: + return Token('IDENTIFIER', lexeme) + else: # c is not alpha- only remaining case are special characters that count as whitespace + if c in os.linesep: + if len(os.linesep) == 2 and readchar() != os.linesep[1]: + file.back() + return Token('NEWLINE', c) + elif c == '\t': + # Was very tempted to force you to only use the 3 spaces but this is complicated enough already + return Token('INDENT', c) + else: + print(f" -1 {c}") + return Token('SPACE', c) + + def tokenize_file(self, path: str) -> Generator[Token, None, None]: + crawler = None + with open(path, 'r') as reader: + crawler = SimpleStringCrawler(reader.read()) + reader.close() + + token = self.getNextToken(crawler) + while token.token != 'EOF': + yield token + token = self.getNextToken(crawler) + yield token # yield EOF + + +def catch_tokenizer_errors(tokens: Iterable[Token]) -> bool: + line = 1 + has_errors = False + for token in tokens: + if token.token == 'NEWLINE': + line += 1 + elif token.token == 'ERROR': + print(f'-Tokenizer: ParseError on Line {line}: {token.lexeme}') + has_errors = True + return has_errors + +if __name__ == "__main__": + tokens = list(Tokenizer().tokenize_file(os.path.join('test', 'db', 'db', 'functions.db'))) + out = "" + for token in tokens: + if token.token == 'NEWLINE': + out += '\n' + else: + out += token.token + ' ' + print(out) + \ No newline at end of file diff --git a/src/serious/compinterpret.py b/src/serious/compinterpret.py new file mode 100644 index 000000000..223c519b2 --- /dev/null +++ b/src/serious/compinterpret.py @@ -0,0 +1,651 @@ +import inspect +import locale +import os +import re +import sys +from typing import Generator, Iterable, Sequence + +import requests + +tokens = ["STRING", "NOT", "!", "IF", 'ELSE', '(', ')', '[', ']', 'TRUE', 'FALSE', 'CONST', 'VAR', '<', '>', 'INT', + 'REAL', 'INFINITY', 'FUNCTION', 'PREVIOUS', + 'NEXT', 'AWAIT', 'NEW_FILE', 'EXPORT', 'TO', 'CLASS', 'NEW', '.', 'USE', 'PLUS', 'MINUS', 'MULTIPLY', + 'DIVIDE', '=', 'IDENTIFIER', 'INDENT', + 'SPACE', 'DELETE', 'EOF', 'NEWLINE', '{', '}', 'INC', 'DEC', 'LOOSE_EQUALITY', 'PRECISE_EQUALITY', + 'LITERAL_EQUALITY', 'ERROR', 'CURRENCY', + 'WHEN', ":", "AND", 'OR', 'RETURN', "ARROW"] + + +class Token: + __slots__ = ("token", "lexeme") + def __init__(self, token: str, lexeme: str | int | float) -> None: + global tokens + assert token.upper() in tokens + + self.token = token.upper() + self.lexeme = lexeme + + def __repr__(self) -> str: + return f'{self.token}({repr(self.lexeme)})' + + def __str__(self) -> str: + return f'{self.token}({repr(self.lexeme)})' + + +class SimpleStringCrawler: + __slots__ = ("raw", "cursor") + def __init__(self, raw: str) -> None: + self.raw = raw + self.cursor = 0 + + def pop(self) -> str: + if self.cursor == len(self.raw): + return '' + self.cursor += 1 + return self.raw[self.cursor - 1] + + def back(self, count: int = 1) -> None: + self.cursor -= count + + def peek(self, count: int = 1, ignore_space: bool = False) -> str: + if self.cursor == len(self.raw): + return '' + if ignore_space: + effective_cursor = self.cursor + while self.raw[effective_cursor] in ' \n\t\r': + effective_cursor += 1 + return self.raw[effective_cursor] + return self.raw[self.cursor:self.cursor + count] + + +class Tokenizer: + __slots__ = ("operators", "reserved_chars", "basic_mappings") + def __init__(self) -> None: + self.operators = '+-*/<>=()[] ' + self.reserved_chars = '!;:.{}' + self.operators + + self.basic_mappings = { + ';': 'NOT', + '=': 'EQUAL', + '*': 'MULTIPLY', + '/': 'DIVIDE', + '.': '.', + '(': '(', + ')': ')', + '[': ']', + '<': '<', + '>': '>', + '{': '{', + '}': '}', + ":": ':', # bruh + "!": "!" + } + + locale.setlocale(locale.LC_ALL, '') + regional_currency = str(locale.localeconv()['currency_symbol']) + if regional_currency == '': + # For maximum international accessibility, the generic currency sign is used if there is no currency sign for the given locale + regional_currency = '¤' + self.basic_mappings[regional_currency] = 'CURRENCY' + + def is_fn_subset(self, string: str) -> bool: + # to solve the function syntax I created this regex: + # if it doesn't get exactly one match then the word is invalid + function_regex = r"(?=.)(f{0,1}u{0,1}n{0,1}c{0,1}t{0,1}i{0,1}o{0,1}n{0,1})" + groups = re.findall(function_regex, string, re.IGNORECASE) + # the and is needed because if there is no match an empty string is the resulting group + return len(groups) == 1 and groups[0] + + def getNextToken(self, file: SimpleStringCrawler) -> Token: + def readchar(i: int = 1) -> str: + return ''.join(file.pop() for _ in range(i)) + + c = readchar() + if c == '': + # The file has ended + return Token('EOF', '') + + lexeme = '' + + if c == ' ': + if file.peek(2) == ' ': + file.pop() + file.pop() + # 3-space indent + return Token('INDENT', ' ') + else: + return Token('SPACE', ' ') + + elif c in '+-': + next_char = readchar() + if c == next_char: + return Token('INC' if c == '+' else 'DEC', c * 2) + else: + file.back() + return Token('PLUS' if c == '+' else 'MINUS', c) + + elif c in '&|': + next_char = readchar() + if c == next_char: + return Token('AND' if c == '&' else 'OR', c * 2) + else: + # Let em cook + file.back() + + elif c == '=': + equals = 0 # while loop will count one over + while c == '=': + c = readchar() + equals += 1 + file.back() # Pushback + if equals == 1: + if c == ">": + # consume the ">" + readchar() + return Token('ARROW', '=>') + return Token('=', '=') + elif equals == 2: + return Token('LOOSE_EQUALITY', '==') + elif equals == 3: + return Token('PRECISE_EQUALITY', '===') + elif equals == 4: + return Token('LITERAL_EQUALITY', '====') + else: # TODO: File splits (might have to be a preprocessor thing) + return Token('ERROR', 'Too much Equality (max is 4)') + + elif c in '\"\'': + quote_format = '' + while c in '\"\'': + quote_format += c + c = file.pop() + + # leave c at the next char, it'll be added to the string + + quote = '' + while c not in '\"\'\n' and c != '': + quote += c + if c == '\\': + if file.peek() in '\"\'': + quote += file.pop() # Character already escaped + c = file.pop() + file.back() + + # check for end quotes + if c == '': + # EOF reached; User probably forgot a closing quote + # Due to ambiguity the rest of the file is now a string + # End quotes are presumed present, thus satisfying AI requirement + # Diagnosis: skill issue + return Token('STRING', quote) + elif c == '\n': + # Line breaks within strings are not allowed, so the string ends here + return Token('STRING', quote) + else: + # If there are end quotes, they must match the quote format exactly + for i in range(len(quote_format)): + c = file.pop() + if c != quote_format[-(i + 1)]: + # Mismatch + return Token('ERROR', 'String quote format mismatched') + + return Token('STRING', quote) + + elif c == '/' and file.peek() == '/': + file.pop() # Get rid of thge next slash + while c not in '\n\r': + c = file.pop() + file.back() + return self.getNextToken(file) # Should capture newline + + elif c in self.basic_mappings.keys(): + return Token(self.basic_mappings[c], c) + + # INT and REAL + elif c.isdigit(): + while c.isdigit(): + lexeme += c + c = readchar() + file.back() # Pushback + + # c is one character beyond the end + if c == '.': + # REAL + lexeme += '.' + c = readchar() + if c.isdigit(): + while c.isdigit(): + lexeme += c + c = readchar() + elif c not in self.operators: + return Token('ERROR', 'Non-Operator immediately after real; letters are not real') + + file.back() + + return Token('REAL', float(lexeme)) + + else: + # INT + return Token('INT', int(lexeme)) + + while not c.isspace() and c not in self.reserved_chars: + lexeme += c + + c = readchar() + + if len(lexeme) > 0: + file.back() + tok = lexeme.upper() + if tok in tokens: + return Token(lexeme, lexeme) + + # Case sensitive for maximum user disgruntlement + if lexeme == 'className': + return Token('CLASS', lexeme) + elif tok == 'CLASSNAME': + # Helpful error message to help insensitive users right their ways + return Token('ERROR', + 'The className keyword is Case-Sensitive, you\'re hurting its feelings you monster') + + # check for function + if self.is_fn_subset(tok): + return Token('FUNCTION', lexeme) + else: + return Token('IDENTIFIER', lexeme) + else: # c is not alpha- only remaining case are special characters that count as whitespace + if c == os.linesep[0]: + if len(os.linesep) == 2 and readchar() != os.linesep[1]: + file.back() + return Token('NEWLINE', os.linesep) + elif c == '\t': + # Was very tempted to force you to only use the 3 spaces but this is complicated enough already + return Token('INDENT', c) + else: + return Token('SPACE', c) + + def tokenize_file(self, path: str) -> Generator[Token, None, None]: + crawler = None + with open(path, 'r') as reader: + crawler = SimpleStringCrawler(reader.read()) + reader.close() + + token = self.getNextToken(crawler) + while token.token != 'EOF': + yield token + token = self.getNextToken(crawler) + yield token # yield EOF + + +def catch_tokenizer_errors(tokens: Iterable[Token]) -> bool: + line = 1 + has_errors = False + for token in tokens: + if token.token == 'NEWLINE': + line += 1 + elif token.token == 'ERROR': + print(f'-Tokenizer: ParseError on Line {line}: {token.lexeme}') + has_errors = True + return has_errors + + +class VarState: + __slots__ = ("reassign", "edit", "priority") + def __init__(self, allow_reassign: bool, allow_edit: bool, priority: int) -> None: + self.reassign = allow_reassign # Can set it to something else + self.edit = allow_edit # Can call methods on this + self.priority = priority # Amount of '!' after the declaration + + +class SimpleTokenCrawler: + __slots__ = ("raw", "cursor", "current_line") + def __init__(self, raw: Sequence[Token]) -> None: + self.raw = raw + self.cursor = 0 + self.current_line = 1 + + def pop(self, ignore_space: bool = True) -> Token: + if self.cursor == len(self.raw): + return Token('EOF', '') + self.cursor += 1 + + if ignore_space: + while self.raw[self.cursor - 1].token in ['SPACE', 'INDENT']: + self.cursor += 1 + + if self.raw[self.cursor - 1].token == 'NEWLINE': + self.current_line += 1 + + return self.raw[self.cursor - 1] + + def back(self, count: int = 1, ignore_space: bool = True) -> None: + self.cursor -= count + + if ignore_space: + while self.raw[self.cursor - 1].token in ['SPACE', 'INDENT']: + self.cursor -= 1 + + def peek(self, ignore_space=True) -> Token: + if self.cursor >= len(self.raw): + return Token('EOF', '') + + if ignore_space: + offset = 0 + while self.raw[self.cursor + offset].token in ['SPACE', 'INDENT']: + offset += 1 + res = self.raw[self.cursor + offset] + else: + res = self.raw[self.cursor] + return res + + def peek_n(self, number: int, ignore_space: bool = True) -> Sequence[Token]: + token_list: list[Token] = [] + stop = False + original_cursor = self.cursor + while len(token_list) < number and not stop: + token = self.peek(ignore_space) + if token.token == 'EOF': + stop = True + token_list.append(token) + self.cursor += 1 + self.cursor = original_cursor + return token_list + + +# Running List of things that need to happen in runtime: +# Variable Lifetime checks +# `When` control flow +# Variable assignment priority + +class Parser: + __slots__ = ("tokens", "file", "js", "var_dict", "wanted_indent", "DEBUG") + def __init__(self, tokens: Sequence[Token]) -> None: + self.tokens = tokens + self.file = SimpleTokenCrawler(tokens) + self.js = "" + self.var_dict: dict[str | int | float, list[VarState]] = {} + # How much indent we are expecting to see at the moment + self.wanted_indent: dict[int, str] = {} + self.DEBUG = True + + def get_javascript(self) -> str: + return self.js + + def new_indent(self, source: str) -> None: + if not self.wanted_indent: + self.wanted_indent[0] = source + else: + index = len(self.wanted_indent) + self.wanted_indent[index] = source + + def expected_indent(self) -> int: + expected = 0 + for ind in self.wanted_indent: + if self.wanted_indent[ind]: + expected += 1 if self.wanted_indent[ind] == "+" else -1 + return expected + + def RaiseError(self, message: str) -> None: + cur_frame = inspect.currentframe() + assert cur_frame is not None + last_frame = cur_frame.f_back + assert last_frame is not None + last_code = last_frame.f_code + assert last_code is not None + caller_name = last_code.co_name + if self.DEBUG: + print(self.js) + print(f"Parser- ParseError on Line {self.file.current_line} from '{caller_name}': {message}") + + def parse(self): + self.StmtList() + return self.js + + ### Every Statement should first check if it is valid in the current location + ### If it is, it should be self-contained and output its valid JS to file.js + ### If it is not valid, it should raise an error + ### Functions should be committal- if you call a function, that means that it *should* be valid in that spot + ### Some Exceptions; For example EndStmt is non-comittal + ### FUNCTION NAMES ARE PART OF THE USER DEBUG INFO + + def StmtList(self) -> bool: + while self.file.peek().token != 'EOF': + if not self.Stmt(): + self.RaiseError('Failed to parse statement') + return False + self.file.pop() # For Completeness sake + return True + + def Stmt(self) -> bool: + # Anything with a single equals sign: x = 5, const const x = 6 + if self.wanted_indent: + self.file.peek(ignore_space=False) + indent_check = self.Check_Indent_Stmt() + if not indent_check: + return indent_check + if self.file.peek().token in ['CONST', 'VAR']: + return self.Varable_Declaration_Stmt() + if self.file.peek().token == "IDENTIFIER" and self.file.peek_n(2)[1].token in ["INC", "DEC"]: + return self.Variable_Increase_Stmt() + + # Control Flow if ( ... ) { ... } else { ... } + + # Class declarations class x { ... }, className x { ... } + + # Function declarations: fn(x) => { ... } + if self.file.peek().token == "FUNCTION": + self.file.pop() + return self.Function_Declaration_Stmt() + + # Floating expressions: print(x), x, x == 5 + return False + + # Non-Comittal + def EndStmt(self, format_template='') -> tuple[bool, int]: + i = 0 + end = False + while self.file.peek().token in '!?': # Allow any mix of ! and ? + self.file.pop() + i += 1 + end = True + + # Due to AI, new lines define a line if an ! is missing + # New line endings take the lowest priority- lower than a single ! + if self.file.peek().token == 'NEWLINE': + self.file.pop() + end = True + + if end: + self.js += f'{format_template.format(i)};' if format_template else ';' + + return end, i + + # Indent checks + def Check_Indent_Stmt(self) -> bool: + if self.file.peek().token == "}": + del self.wanted_indent[len(self.wanted_indent) - 1] + self.file.pop() + self.EndStmt() + return True + + while self.file.peek(ignore_space=False).token == "INDENT": + self.file.pop(ignore_space=False) + if self.file.peek(ignore_space=False).token == "SPACE": + self.RaiseError( + "Good try with the indentation but I think you did something wrong since it isn't a multiple of three.") + return False + return True + + # Declaration of a variable + def Varable_Declaration_Stmt(self) -> bool: + # Declaration + allow_reassign = self.file.pop().token == 'VAR' + + if self.file.peek().token not in ['CONST', 'VAR']: + self.RaiseError('Double or nothing; Need two const/var keywords to declare variable') + return False + + allow_edit = self.file.pop().token == 'VAR' + + # not sure about this check because we get here if var and const are at the beginning of the line + + if self.file.peek().token != 'IDENTIFIER': + self.RaiseError('Identify yourself NOW; Declaration requires variable to declare') + return False + + # Nothing in native JS allows you to prevent edits, so we only worry about reassignments here + # Bad reassignments will be caught by JS + if allow_reassign: + keyword = 'let' + else: + keyword = 'const' # javascript consts are cringe + + # We will handle bad edits in compile time + + var_name = self.file.pop().lexeme + + lifetime = None + + # lifetime detected + if self.file.peek().token == "<": + # remove opening lifetime identifier + self.file.pop() + + # Lifetime can either be an INT, or an INT followed by and IDENTIFIER (the only valid identifier + # after INT is 's') + # Alternatively, it can be INFINITY, which turns the variable into an environment variable + # With no specified lifetime, the variable will kill itself whenever normal variables would + + # If the lifetime is an INT, the variable lasts for that amount of lines + # If the lifetime is an INT followed by s, the variable lasts for that amount of seconds (or until + # the program dies) + # If the lifetime is INFINITY, it is a environment variable + + # To get the value of the Expr we allow it to dump to the JS, and remove it afterward to process it properly + rollback_idx = len(self.js) + if not self.Expr(): + self.RaiseError('Lifetime must be an Expression') + return False + + lifetime = self.js[rollback_idx:] # INFINITY or Expression + self.js = self.js[:rollback_idx] + + extracted_token = self.file.pop().lexeme + # this can be improved by using valid lifetime characters + while extracted_token not in [">", os.linesep]: + lifetime += str(extracted_token) + extracted_token = self.file.pop().lexeme + if extracted_token != ">": + self.RaiseError("CLOSE YOUR LIFETIME DEFINITION") + + if self.file.peek().token != '=': + self.RaiseError('PUT AN EQUALS SIGN IN YOUR DECLARATION') + return False + self.file.pop() + # pop the value (this is temporary) + self.file.pop() + self.js += f'assign(\"{var_name}\", ' + if self.Expr(): # inserts expression + success, priority = self.EndStmt(f'{keyword == "let"}, {{}}, {lifetime})') + if not success: + self.RaiseError('Declaration statement didn\'t end when it should\'ve') + if self.var_dict.get(var_name) is None: + self.var_dict[var_name] = [] + self.var_dict[var_name].append(VarState(allow_reassign, allow_edit, priority)) + else: + self.RaiseError('Failed to parse expression in declaration') + return False + return True + + def Function_Declaration_Stmt(self) -> bool: + if self.file.peek().token != "IDENTIFIER": + self.RaiseError( + f'Something isn\'t right here after the function keyword there should be an identifier but I got a {self.file.peek().token}') + return False + + function_name = self.file.pop() + + if not self.file.pop().token == "(": + self.RaiseError("I think you tried to define a function but you forgot the parenthesis for the parameters") + return False + parameters = [] + while self.file.peek().token != ")": + param = self.file.pop() + if param.token == "IDENTIFIER": + parameters.append(param) + else: + self.RaiseError(f"I was expecting a parameter but {param.lexeme} doesn't look like a valid IDENTIFIER") + # consume the closing parenthesis + self.file.pop() + if self.file.pop().token != "ARROW": + self.RaiseError(f'You say that you want a function and you give me the parenthesis but where is the "=>" ?') + return False + # At this point there is either a { for a multi-line function or a single line function without { + if self.file.peek().lexeme == "{": + self.file.pop() + if self.file.peek().token != "NEWLINE": + self.RaiseError(f'I see that you are writing more stuff after the {"{"}, if it is a single line then ' + f'you don\'t need the {"{"} if there are multiple lines then you should send this other' + f' stuff to the new line') + return False + # from now until a } appears we should check that the code is indented + self.new_indent("function") + # consume exclamation marks or new_line + self.file.pop() + else: + # it should be handled by the other functions + pass + return True + + def Variable_Increase_Stmt(self) -> bool: + var_name = self.file.pop().lexeme + operation = self.file.pop().lexeme + self.js += f'{var_name}{operation}' + success, _ = self.EndStmt() + if not success: + self.RaiseError('Declaration statement didn\'t end when it should\'ve') + return False + return True + + def Expr(self) -> bool: + return True + + +def transpile(file_path: str) -> str: + try: + # TODO: Replace with DreamBerd 3const server + response = requests.head("http://www.google.com", timeout=5) + if response.status_code != 200: + print( + "-Meta: NetworkError: DreamBerd 3const services are down, or you do not have an internet connection. Please rectify either as soon as possible.") + exit(1) + except requests.ConnectionError: + print( + "-Meta: NetworkError: DreamBerd 3const services are down, or you do not have an internet connection. Please rectify either as soon as possible. ") + exit(1) + + tokens = tuple(Tokenizer().tokenize_file(file_path)) + + if catch_tokenizer_errors(tokens): + print('\n') + print("Tokenizer reports L code, fix your code or I won't compile this garbage") + exit(1) + + parser = Parser(tokens) + if parser.parse(): + # If succeeded parsing + return parser.get_javascript() + raise RuntimeError("Somehow token error was not caught") + + +def transpile_and_save(read_file_path: str, write_file_path: str | None = None) -> None: + if write_file_path is None: + directory, filename = os.path.split(read_file_path) + head, _ = filename.rsplit(".", 1) + write_file_path = os.path.join(directory, f'{head}.js') + javascript = transpile(read_file_path) + with open(write_file_path, "w", encoding="utf-8") as write_file: + write_file.write(javascript) + + +if __name__ == '__main__': + file_path = sys.argv[1] if len(sys.argv) > 1 else os.path.join('test', 'db', 'db', 'functions.db') + transpile_and_save(file_path) diff --git a/src/serious/regpiler.py b/src/serious/regpiler.py new file mode 100644 index 000000000..2168916e4 --- /dev/null +++ b/src/serious/regpiler.py @@ -0,0 +1,439 @@ +import os +import re +from typing import Sequence + +import requests + +from compinterpret import SimpleStringCrawler, Tokenizer + +reference_tokenizer = Tokenizer() + +# https://qph.cf2.quoracdn.net/main-qimg-8d58857bb87f14c8e1ce2f6686ef3e04 +operator_precedence = { + '(': 15, + ')': 15, + '.': 15, + '++': 14, + '--': 14, + ';': 14, + '**': 13, + '*': 12, + '/': 12, + '\\': 12, + '%': 12, + '+': 11, + '-': 11, + '<<': 10, # WHY DOES JAVASCRIPT HAVE THESE? + '>>': 10, + '>>>': 10, + '<=': 9, + '<': 9, + '>': 9, + '>=': 9, + '==': 8, + '===': 8, + '====': 8, + '&': 7, + '^': 6, + '|': 5, + '&&': 4, + '||': 3, +} + +class RawToken: + __slots__ = ("token", "lexeme", "priority") + def __init__(self, token: str, lexeme: str, priority: int = 0) -> None: + self.token = token + self.lexeme = lexeme + self.priority = priority + + def __repr__(self) -> str: + return f'{self.token}({repr(self.lexeme)})' + + def __str__(self) -> str: + return f'{self.token}({repr(self.lexeme)})' + + def compare(self, other: "RawToken") -> int: + #presume both are operators + if self.priority < other.priority: + return 1 + elif self.priority > other.priority: + return -1 + else: + if operator_precedence[self.lexeme] > operator_precedence[other.lexeme]: + return 1 + elif operator_precedence[self.lexeme] < operator_precedence[other.lexeme]: + return -1 + else: + return 0 + +class RawTokenCrawler: + def __init__(self, raw: Sequence[RawToken]) -> None: + self.raw = raw + self.cursor = 0 + + def pop(self) -> RawToken | None: + if self.cursor == len(self.raw): + return None + self.cursor += 1 + return self.raw[self.cursor - 1] + + def back(self, count: int = 1) -> None: + self.cursor -= count + + def peek(self) -> RawToken | None: + if self.cursor == len(self.raw) - 1: + return None + return self.raw[self.cursor] + +def split_raw_file(path: str) -> list[str] | list[tuple[str, str]]: + with open(path, 'r', encoding="utf-8") as file: + content = file.read() + + # Split the file content using the regex pattern + split_content = re.split(r'={5,} *([^= ]*) *=+', content) + + # Get the capture groups from the regex pattern + capture_groups = re.findall(r'={5,} *([^= ]*) *=+', content) + + if len(split_content) == 1: + return split_content + else: + result = [(split_content[i], capture_groups[i]) for i in range(len(split_content))] + + return result + +def preprocess_line(line: str) -> str: + processed_line = line + # Convert ++ to += 1 and -- to -= 1 + processed_line = re.sub(r'^([^ +\\\-*\/<>=()\[\]!;:.{}\n]+)(\+|-)\2$', r'\1 \2= 1', processed_line) + + return processed_line + +def process_expr(expr: str) -> str: + expr_split = expr.split('{')[-1].split('}')[0] + if expr_split == '': + return expr # Wasn't an expression + else: + expr = expr_split + + tokens: list[RawToken] = [] + crawler = SimpleStringCrawler(expr.strip().replace('×', '*').replace('÷', '/').replace('^', '**')) + + # 0 = Identifier / Number (same thing) + # 1 = Parenthetical + # 2 = Operator (anything else) + # 3 = Redirect + + state = 3 + + if crawler.peek() in '+\\-*/<>%=)]!;:.{}': + raise ValueError('Who starts an Expression like that? I just got here!') + + while crawler.peek() != '': + match state: + case 0: + # Remove leading spaces + while crawler.peek() in ' \n\r\t': + crawler.pop() + if crawler.peek() == '': + break + if crawler.peek() == '': + break + identifier = '' + while crawler.peek() not in '+\\-*/<>%=()[]!;&:{} \n\r\t': + identifier += crawler.pop() + tokens.append(RawToken('IDENTIFIER', identifier)) + state = 3 + case 1: + # Deprecated + pass + case 2: + spaces = 0 + operator = '' + trail = 0 + while crawler.peek() in '+\\-*/<>%=&()[] \t': + if crawler.peek() in ' \t': + # Indents are 3 space + spaces += 1 if crawler.pop() == ' ' else 3 + if operator != '': + trail += 1 + else: + # & & == && + if operator != '' and trail > 0: + break + if crawler.peek() in ')]}' and operator in '([{' and operator != '': + break + operator += crawler.pop() + if crawler.peek() == '': + break + tokens.append(RawToken('OPERATION', operator, spaces)) + crawler.back(trail) + if crawler.peek() == '': + break + state = 3 + case 3: + if crawler.peek() == '': + # Valid spot to stop + # We also have to stop + break + # if crawler.peek() in '([': + # state = 1 + # continue + if crawler.peek(ignore_space=True) in '+\\-*/<>%=& \t([])': + state = 2 + continue + elif crawler.peek(ignore_space=True) in '\n\r{}:': + raise ValueError('Malformed Expression') + else: + state = 0 + continue + + postfix_tokens = [] + operator_stack: list[RawToken] = [] + + for token in tokens: + if token.token == 'OPERATION': + if ')' in token.lexeme: + while '(' not in operator_stack[-1].lexeme: + postfix_tokens.append(operator_stack.pop()) + operator_stack.pop() # Remove extra parentheses + elif '(' in token.lexeme: + operator_stack.append(token) + elif len(operator_stack) == 0 or '(' in operator_stack[-1].lexeme or token.compare(operator_stack[-1]) == 1: + operator_stack.append(token) + else: + while len(operator_stack) > 0 and token.compare(operator_stack[-1]) <= 0: + postfix_tokens.append(operator_stack.pop()) + operator_stack.append(token) + else: # IDENTIFIER + postfix_tokens.append(token) + + while len(operator_stack) > 0: + postfix_tokens.append(operator_stack.pop()) + + reconstructed: list[RawToken] = [] + + def varify(identifier: str) -> str: + if re.match(r'^[0-9]+(?:\.[0-9]+)?', identifier): + return identifier + elif re.match(r'^\".*\"$', identifier): + # String object + # This is so stupid + return f"\\\"{identifier}\\\"" + else: + return f'\"{identifier}\"' + + for token in postfix_tokens: + if token.token == 'OPERATION': + op1 = reconstructed.pop() + op2 = reconstructed.pop() + + if token.lexeme == '====': + # We do a little compile-time evaluation + if op2.lexeme == op1.lexeme: + reconstructed.append(RawToken('SYSTEM', 'true')) + else: + reconstructed.append(RawToken('SYSTEM', 'false')) + else: + reconstructed.append(RawToken('SYSTEM', f'current_scope.get_var(current_scope.get_var({varify(op2.lexeme)}){token.lexeme.strip()}current_scope.get_var({varify(op1.lexeme)}))')) + else: + reconstructed.append(token) + + out_str = "" + + # This being a loop is only really a formality because it should always parse to a single token + for token in reconstructed: + out_str += token.lexeme + + return out_str + +def preprocess_subfile(subfile: str) -> str: + # Remove comments + subfile = re.sub(r'//[^\n\r]*', '', subfile) + + # Convert all functions to common syntax + subfile = re.sub(r'=> *([^\s{}][^\n\r!?{}]+)([!?]*)', r'=> {return \1}', subfile) + subfile = re.sub(r'([^ +\\\-*\/<>=()\[\]!;:.{}\n,]+) =>', r'(\1) =>', subfile) + subfile = re.sub(r'[functio]u?n?c?t?i?o?n? +\(([^ +\\\-*\/<>=()\[\]!;:.{}\n,]+(?:, *[^ +\\\-*\/<>=()\[\]!;:.{}\n,]+)*)?\) *(?:=>)?', r'(\1) =>', subfile) + subfile = re.sub(r'[functio]u?n?c?t?i?o?n? +([^ +\\\-*\/<>=()\[\]!;:.{}\n,]+)\(([^ +\\\-*\/<>=()\[\]!;:.{}\n,]+(?:, *[^ +\\\-*\/<>=()\[\]!;:.{}\n,]+)*)?\) *(?:=>)?', r'const const \1 = (\2) =>', subfile) + + # Regularize how code blocks are formatted for easier parsing + subfile = re.sub(r'[\n\r]*{', r'{\n', subfile, re.DOTALL) + subfile = subfile.replace('}', '\n}') + + return subfile + +def transpile_subfile(subfile: str) -> str: + subfile = preprocess_subfile(subfile) + # Split the file content using the regex pattern + split_content = re.split(r'(!+|\n|\?)', subfile) + + # Get the capture groups from the regex pattern + capture_groups = re.findall(r'(!+|\n|\?)', subfile) + + result = "" + + futures: dict[int, list[str]] = {} + offset = 0 + for i in range(len(split_content)): + if split_content[i] in '!?\n': + offset -= 1 + continue + + if i+offset in futures: + result += '\n'.join(futures[i+offset]) + '\n' + futures[i] = [] + + if split_content[i].strip() == '': + continue + + result += f'// DB_DEBUG: {split_content[i]}{capture_groups[i + offset]}\n' + line, new_futures = transpile_line(preprocess_line(split_content[i]), len(capture_groups[i + offset]), + '?' in capture_groups[i + offset], i) + + result += line + '\n' + + result += 'WHEN_BLOCK_MANAGER.checkConditions(); // Check all pending when statements in case the above line changed something\n' + + for k, v in new_futures.items(): + if k not in futures: + futures[k] = v + else: + futures[k].extend(v) + + return result + +def check_indentation(match: re.Match[str], line: str) -> str: + indentation = match.group("indentation") if match.group("indentation") else "" + if indentation and len(indentation) % 3 != 0: + raise ValueError("What a strange indentation scheme that you use, this could confuse someone! Please use the" + "officially recognized 3 space indentation system, thank you. Error occurred in\n" + line) + return indentation + +def transpile_line(line: str, priority: int, debug: bool, line_num: int) -> tuple[str, dict[int, list[str]]]: + futures = {} + + # Assignment + if match := re.match( + # With named groups is possible to have "optional" groups and the regex is still cursed. + # I know that re.IGNORECASE is a thing but without it the regex looks more cursed. + r'^(?P +)?(?:(?P[Cc][Oo][Nn][Ss][Tt]) +(?=[Cc][Oo][Nn][Ss][Tt] +[Cc][Oo][Nn][Ss][Tt]))?(?P^[Cc][Oo][Nn][Ss][Tt] +(?= *([Vv][Aa][Rr]|[Cc][Oo][Nn][Ss][Tt]) +(?=[Vv][Aa][Rr]|[Cc][Oo][Nn][Ss][Tt])))?(?P[Cc][Oo][Nn][Ss][Tt]|[Vv][Aa][Rr]) +(?P[Cc][Oo][Nn][Ss][Tt]|[Vv][Aa][Rr]) +(?P[^ +\\\-*\/<>=()\[\]!;:.{}\n]+)(?:<(?P.*)>)?(?: *: *[A-Za-z]+)? *(?P[+\-\/*]?)= *(?P[^!\n?]+)', + line + ): + if match.group("invalid_mix"): + raise ValueError("You thought that having const or var three times without having all of them being const " + "was a good idea? Well it isn't so fix it") + + indentation = check_indentation(match, line) #Convenience function because this is checked a lot + + allow_reassignment = match.group("first_const").lower() == 'var' + lifetime: str | int = -1 + if match.group("lifetime") is not None: + lifetime_match = match.group("lifetime") + if lifetime_match[-1] != 's' and lifetime_match.lower() != 'infinity': + futures[int(lifetime_match)+line_num] = [f'variables.get({lifetime_match})!.kill();'] + elif lifetime_match[-1] == 's': + lifetime = int(lifetime_match[:-1]) + else: + lifetime = 'infinity' + + if match.group("third_const"): + # TODO implement const const const + pass + + value = match.group("value") + if match.group("assignment_operator"): + value = f'{match.group("var_name")} {match.group("assignment_operator")} {match.group("value")}' + + name = match.group("var_name") + if not re.match('[0-9]*.?[0-9]+', name): + # Not a number + name = f"\"{name}\"" + return f'{indentation}current_scope.assign({name}, {process_expr(value)}, {str(allow_reassignment).lower()}, {priority}, {lifetime});', futures + + # Reassignment + elif match := re.match( + r'^(?P +)?(?P(?:previous +)+)?(?P[^ +\\\-*\/<>=()\[\]!;:.{}\n]+) *(?P[+\-\/*]?)= *(?P[^!\n?]+)', + line, + re.IGNORECASE + ): + indentation = check_indentation(match, line) #Convenience function because this is checked a lot + + if match.group('assignment_operator'): + value = f'{match.group("variable")} {match.group("assignment_operator")} ({match.group("value")})' + else: + value = match.group('value') + + if match.group('prevs'): + # TODO: Time travel + pass + else: + + name = match.group("variable") + if not re.match('[0-9]*.?[0-9]+', name): + # Not a number + name = f"\"{name}\"" + + return f'{indentation}current_scope.assign({name}, {process_expr(value)}, undefined, {priority})', futures + + # single line function, in the case of the multi-line one code is "{" + elif match := re.match( + r'(?= *[functio])((?P +)?(?Pf?u?n?c?t?i?o?n?) )+(?P.+?) *(?P\(.*?\)) +=> +(?P.+)', + line, + re.IGNORECASE + ): + func_keyword = match.group("function") + func_name = match.group("name") + parameters = match.group("parameters") + # code syntax should be checked + code = match.group("code") + line = line.replace(func_keyword, "function").replace("=>", "") + return line, futures + + # replace the previous keyword with the function call (not sure if this is right) + line = re.sub(r'previous +(?!=[()]*)([^?! ]*)', r'current_scope.get_var("\1").previous()', line, 1, re.IGNORECASE) + + # Only here for debugging, when completed this should return an error if execution reaches the end + return f"{line} // TODO", futures + + +def regpile() -> None: + try: + # TODO: Replace with DreamBerd 3const server + response = requests.head("http://www.google.com", timeout=5) + if response.status_code != 200: + print( + "-Meta: NetworkError: DreamBerd 3const services are down, or you do not have an internet connection. " + "Please rectify either as soon as possible.") + exit(1) + except requests.ConnectionError: + print( + "-Meta: NetworkError: DreamBerd 3const services are down, or you do not have an internet connection. " + "Please rectify either as soon as possible. ") + exit(1) + + files = split_raw_file(os.path.join('test', 'db', 'db', 'functions.db')) + + if not os.path.isdir('built'): + os.mkdir('built') + + for file in files: + result = "" + if isinstance(file, str): + result = transpile_subfile(file) + else: + result = transpile_subfile(file[0]) + + filename = str(len(os.listdir('built'))) + '.tsx' if isinstance(file, str) else file[1] + + with open(f'src{os.sep}template.tsx', 'r', encoding="utf-8") as reader: + template = reader.read() + result = template.replace('// USER CODE HERE //', result) + + with open(os.path.join('built', filename), 'w', encoding="utf-8") as writer: + writer.write(result) + + +if __name__ == '__main__': + regpile() diff --git a/src/template.tsx b/src/template.tsx new file mode 100644 index 000000000..c57213efc --- /dev/null +++ b/src/template.tsx @@ -0,0 +1,11 @@ +// Helper functions +import { VarState, ConditionBlockManager } from "../built/helper" + +// Format: var_name -> VarState +const current_scope = new Map() +const WHEN_BLOCK_MANAGER = new ConditionBlockManager(); + + + + +// USER CODE HERE // \ No newline at end of file diff --git a/test/db/README.md b/test/db/README.md new file mode 100644 index 000000000..06333c22d --- /dev/null +++ b/test/db/README.md @@ -0,0 +1,2 @@ +# INFOHAZARDS AHEAD +**Warning:** This folder contains potentially dangerous files that could pose information hazards. Proceed with caution. \ No newline at end of file diff --git a/test/db/db/basic.db b/test/db/db/basic.db new file mode 100644 index 000000000..feb8a8b7d --- /dev/null +++ b/test/db/db/basic.db @@ -0,0 +1,8 @@ +print("Hello world")! +print("Hello world")!!! +print("Hello world")? + +if (;false) { + print("Hello world")! +} + diff --git a/test/db/db/functions.db b/test/db/db/functions.db new file mode 100644 index 000000000..fcf6041a4 --- /dev/null +++ b/test/db/db/functions.db @@ -0,0 +1,46 @@ +function add(a, b) {return a + b!} + +const const sum = function (a, b) { + return a + b! +} +const const product = (a, b) => { + return a * b! +} +const const power = (a, b) => a ** b! + +const const square = a => a * a + +const const double = (a) => { + if (a % 2 === 0) { + if (a == 2) { + return a + } + return a / 2! + } + else { + return a * 2! + } +} + +const const a = 5! + +if (a > 5) +{ + a = 4!! +} else { + a = 8!! +} + +when (a === 4) { + a = 6! // No effect +} + +class PlayerMaker { + function makePlayer() => { + class Player { + const var health = 10! + } + const const player = new Player()! + return player! + } +} \ No newline at end of file diff --git a/test/db/db/time_recursion.db b/test/db/db/time_recursion.db new file mode 100644 index 000000000..ba29847f6 --- /dev/null +++ b/test/db/db/time_recursion.db @@ -0,0 +1,13 @@ +var var x<20> = 5! +x++! + +const const 3 = 6 +print(2 + 1) + +if (x > 5) { + previous x = previous x - 3! +} +else { + print(x)! +} + diff --git a/test/db/db/time_travel.db b/test/db/db/time_travel.db new file mode 100644 index 000000000..93fa12288 --- /dev/null +++ b/test/db/db/time_travel.db @@ -0,0 +1,16 @@ +const var x<20s> = 5! +x += 7!!! +x++! +print(x) + +"""6\n8\n""" + +x = 5! // No effect +x++! +print(x) +print(previous x) + +const const playerMaker = new PlayerMaker()! +const var player1 = playerMaker.makePlayer()! +const var player2 = playerMaker.makePlayer()! + diff --git a/test/db/db/tokens.db b/test/db/db/tokens.db new file mode 100644 index 000000000..e69de29bb