TodePond · Bytestorm5 · Jun 4, 2023 · Jun 4, 2023 · Jun 4, 2023 · Jun 4, 2023
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+venv/
diff --git a/README.md b/README.md
@@ -330,6 +330,8 @@ import add!
 add(3, 2)!
 ```
 
+Note that due to the fact that it is impossible to export all functions of a file to all other DreamBerd files in existence, there is __no__ DreamBerd standard library.
+
 By the way, to see DreamBerd in action, check out [this page](https://github.com/TodePond/DreamBerd/blob/main/LICENSE.md).
 
 ## Class

diff --git a/src/README.md b/src/README.md
@@ -0,0 +1,5 @@
+**Note:** This code is __under development__ and is not yet functional, let alone efficient.
+# Compinterpreting
+Running a perfect programming language requires a perfect compiler to run it. As such, the DreamBerd foundation has devised the next innovation in the field of compiling. The **Compinterpreter**. 
+
+The Compinterpreter works by interpreting DreamBerd while at the same time transpiling it to JavaScript, maximizing efficiency while staying true to DreamBerd.
diff --git a/src/compinterpret.py b/src/compinterpret.py
@@ -0,0 +1,283 @@
+from codecs import decode
+import gettext
+from io import TextIOWrapper
+import os
+import locale
+
+tokens = ["STRING", "NOT", "!", "IF", 'ELSE', '(', ')', '[', ']', 'TRUE', 'FALSE', 'CONST', 'VAR', '<', '>', 'INT', 'REAL', 'INFINITY', 'FUNCTION', 'PREVIOUS',
+          'NEXT', 'AWAIT', 'NEW_FILE', 'EXPORT', 'TO', 'CLASS', 'NEW', '.', 'USE', 'PLUS', 'MINUS', 'MULTIPLY', 'DIVIDE', '=', 'IDENTIFIER', 'INDENT',
+           'SPACE', 'DELETE', 'EOF', 'NEWLINE', '{', '}', 'INC', 'DEC', 'LOOSE_EQUALITY', 'PRECISE_EQUALITY', 'LITERAL_EQUALITY', 'ERROR', 'CURRENCY']
+locale.setlocale(locale.LC_ALL, '')
+
+class Token():
+    def __init__(self, token: str, lexeme: str) -> None:
+        global tokens
+        assert token.upper() in tokens
+
+        self.token = token.upper()
+        self.lexeme = lexeme
+
+    def __repr__(self) -> str:
+        return f'{self.token}({repr(self.lexeme)})'
+
+    def __str__(self) -> str:
+        return f'{self.token}({repr(self.lexeme)})'
+
+class SimpleListCrawler():
+    def __init__(self, raw) -> None:
+        self.raw = raw
+        self.cursor = 0
+
+    def pop(self):
+        if self.cursor == len(self.raw):
+            return ''
+        self.cursor += 1
+        return self.raw[self.cursor-1]
+
+    def back(self, count=1):        
+        self.cursor -= count
+
+    def peek(self, count=1):   
+        if self.cursor == len(self.raw)-1:
+            return ''     
+        return self.raw[self.cursor:self.cursor+count]
+
+class Tokenizer():
+    def __init__(self) -> None:
+        self.operators = '+-*/<>=()[] '
+        self.reserved_chars = '!;.{}' + self.operators
+
+        self.basic_mappings = {
+            ';': 'NOT',
+            '=': 'EQUAL',
+            '*': 'MULTIPLY',
+            '/': 'DIVIDE',
+            '.': '.',
+            '(': '(',
+            ')': ')',
+            '[': ']',
+            '<': '<',
+            '>': '>',
+            '{': '{',
+            '}': '}'
+        }    
+
+        regional_currency = locale.localeconv()['currency_symbol']
+        if regional_currency == '':
+            # Americentrisim, baby 😎🦅🔫🔫🦅🦅🦅🔫🦅 🦅🔫🔫🔫🦅🔫🦅🔫 🦅🔫🔫🔫🦅🦅🔫🔫 🦅🔫🔫🦅🔫🦅🦅🦅 🦅🦅🔫🦅🦅🦅🦅🦅 🦅🔫🔫🦅🦅🔫🦅🦅 🦅🔫🔫🦅🔫🦅🦅🔫 🦅🔫🔫🦅🦅🔫🦅🦅 🦅🦅🔫🦅🦅🦅🦅🦅 🦅🦅🔫🔫🔫🦅🦅🔫 🦅🦅🔫🦅🔫🔫🔫🔫 🦅🦅🔫🔫🦅🦅🦅🔫 🦅🦅🔫🔫🦅🦅🦅🔫😎
+            regional_currency = '$'
+        self.basic_mappings[regional_currency] = 'CURRENCY'
+
+    def is_fn_subset(self, string):
+        target = "FUNCTION"
+        i = 0
+
+        for char in string:
+            if char == target[i]:
+                i += 1
+                if i == len(target):
+                    return True
+
+        return False
+
+    def getNextToken(self, file: SimpleListCrawler):
+        def readchar(i=1):
+            return ''.join([file.pop() for _ in range(i)])
+
+        c = readchar()
+
+        if c == '':        
+            #The file has ended
+            return Token('EOF', '')
+
+        lexeme = ''
+
+        if c == ' ':
+            if file.peek(2) == '  ':
+                file.pop()
+                file.pop()
+                # 3-space indent
+                return Token('INDENT', '   ')
+            else:
+                return Token('SPACE', ' ')
+
+        elif c == '!':
+            marks = 0 #while loop will count one over
+            while c == '!':
+                c = readchar()
+                marks += 1
+            if file.peek() != '':
+                # File might end after a statment, we want to let it end if it does
+                # We can't just blindly push it back or we get an infinite loop
+                # TODO: Make sure this doesn't happen elsewhere
+                file.back() #Pushback
+            return Token('!', '!' * marks)       
+
+        elif c in '+-':
+            next_char = readchar()
+            if c == next_char:
+                return Token('INC' if c == '+' else 'DEC', c*2)
+            else:
+                file.back()
+                return Token('PLUS' if c == '+' else 'MINUS', c)
+
+        elif c == '=':
+            equals = 0 #while loop will count one over
+            while c == '=':
+                c = readchar()
+                equals += 1
+            file.back() #Pushback
+            match equals:
+                case 1:
+                    return Token('=', '=')
+                case 2:
+                    return Token('LOOSE_EQUALITY', '==')
+                case 3:
+                    return Token('PRECISE_EQUALITY', '===')
+                case 4:
+                    return Token('LITERAL_EQUALITY', '====')
+                case _: # TODO: File splits (might have to be a preprocessor thing)
+                    return Token('ERROR', 'Too much Equality (max is 4)')
+
+        elif c in '\"\'':
+            quote_format = ''
+            while c in '\"\'':
+                quote_format += c
+                c = file.pop()
+
+            #leave c at the next char, it'll be added to the string
+
+            quote = ''
+            while c not in '\"\'\n' and c != '':
+                quote += c
+                if c == '\\':
+                    if file.peek() in '\"\'':
+                        quote += file.pop() #Character already escaped
+                c = file.pop()
+            file.back()
+
+            # check for end quotes
+            if c == '':
+                # EOF reached; User probably forgot a closing quote
+                # Due to ambiguity the rest of the file is now a string
+                # End quotes are presumed present, thus satisfying AI requirement
+                # Diagnosis: skill issue
+                return Token('STRING', quote)
+            elif c == '\n':
+                # Line breaks within strings are not allowed, so the string ends here
+                return Token('STRING', quote)
+            else:
+                # If there are end quotes, they must match the quote format exactly            
+                for i in range(len(quote_format)):
+                    c = file.pop()
+                    if c != quote_format[-(i+1)]:
+                        # Mismatch
+                        return Token('ERROR', 'String quote format mismatched')
+
+                return Token('STRING', quote)
+
+        elif c == '/' and file.peek() == '/':
+            file.pop() #Get rid of thge next slash
+            while c not in '\n\r':
+                c = file.pop()
+            file.back()
+            return self.getNextToken(file) #Should capture newline
+
+        elif c in self.basic_mappings.keys():
+            return Token(self.basic_mappings[c], c)
+
+        #INT and REAL
+        elif c.isdigit():            
+            while c.isdigit():
+                lexeme += c
+                c = readchar()
+            file.back() #Pushback
+
+            # c is one character beyond the end
+            if c == '.':
+                #REAL
+                lexeme += '.'
+                c = readchar()
+                if c.isdigit():
+                    while c.isdigit():
+                        lexeme += c
+                        c = readchar()
+                elif c not in self.operators:
+                    return Token('ERROR', 'Non-Operator immediately after real; letters are not real')
+
+                file.back()
+
+                return Token('REAL', float(lexeme))
+
+            else:
+                #INT            
+                return Token('INT', int(lexeme))
+
+        while not c.isspace() and c not in self.reserved_chars:
+            lexeme += c       
+
+            c = readchar()    
+
+        if len(lexeme) > 0: 
+            file.back()
+            tok = lexeme.upper()
+            if tok in tokens:
+                return Token(lexeme, lexeme)
+
+            #check for function
+            if self.is_fn_subset(tok):
+                return Token('FUNCTION', lexeme)
+            else:
+                return Token('IDENTIFIER', lexeme)
+        else: #c is not alpha- only remaining case are special characters that count as whitespace
+            if c == '\n':
+                if readchar() != '\r':
+                    file.back()
+                return Token('!', c)
+            elif c == '\r':
+                if readchar() != '\n':
+                    file.back()
+                return Token('!', c)
+            elif c == '\t':
+                # Was very tempted to force you to only use the 3 spaces but this is complicated enough already
+                return Token('INDENT', c)
+            else:
+                return Token('SPACE', c)
+
+    def tokenize_file(self, path):  
+        crawler = None  
+        with open(path, 'r') as reader:
+            crawler = SimpleListCrawler(reader.read())
+            reader.close()
+
+        token = self.getNextToken(crawler)
+        while token.token != 'EOF':
+            yield token
+            token = self.getNextToken(crawler)
+        yield token #yield EOF
+
+
+def catch_tokenizer_errors(tokens: list[Token]):
+    line = 1
+    has_errors = False
+    for token in tokens:
+        if token.token == 'NEWLINE':
+            line += 1
+        elif token.token == 'ERROR':
+            print(f'-Tokenizer: ParseError on Line {line}: {token.lexeme}')
+            has_errors = True
+    return has_errors
+
+class Parser():
+    def __init__(self) -> None:
+        pass
+
+
+if __name__ == '__main__':
+    tokens = list(Tokenizer().tokenize_file('test\\db\\db\\time_travel.db'))
+
+    if catch_tokenizer_errors(tokens):
+        print('\n')
+        print("Tokenizer reports L code, fix your code or I won't compile this garbage")
+        exit(1)
+
diff --git a/test/db/README.md b/test/db/README.md
@@ -0,0 +1,2 @@
+# INFOHAZARDS AHEAD
+**Warning:** This folder contains potentially dangerous files that could pose information hazards. Proceed with caution.
diff --git a/test/db/db/basic.db b/test/db/db/basic.db
@@ -0,0 +1,8 @@
+print("Hello world")!
+print("Hello world")!!!
+print("Hello world")?
+
+if (;false) {
+    print("Hello world")!
+}
+
diff --git a/test/db/db/time_travel.db b/test/db/db/time_travel.db
@@ -0,0 +1,40 @@
+const var x = 5!
+x++!
+print(x)
+previous x = 7!!!
+
+"""6\n8\n"""
+
+const var x = 5!
+x++!
+print(x)
+print(previous x)
+
+"""6\n5\n"""
+
+fnc gaming() => {
+   if (x == 5) {
+      print(previous x * x)
+   }    
+}
+
+class Player {
+   const var health = 10!
+}
+
+const var player1 = new Player()!
+const var player2 = new Player()! //Error: Can't have more than one 'Player' instance!
+
+class PlayerMaker {
+   function makePlayer() => {
+      class Player {
+         const var health = 10!
+      }
+      const const player = new Player()!
+      return player!
+   }
+}
+
+const const playerMaker = new PlayerMaker()!
+const var player1 = playerMaker.makePlayer()!
+const var player2 = playerMaker.makePlayer()!
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# INFOHAZARDS AHEAD
		Warning: This folder contains potentially dangerous files that could pose information hazards. Proceed with caution.