TegraExplorer/source/script/lexer.c

312 lines
7.5 KiB
C

#include "lexer.h"
#include "types.h"
#include "args.h"
#include <mem/heap.h>
static inline int isValidWord(char c) {
char r = c | 0x20;
return ((r >= 'a' && r <= 'z') || c == '_');
}
static inline int isValidNum(char c) {
return (c >= '0' && c <= '9');
}
static inline int isValidVar(char c) {
return (isValidWord(c) || isValidNum(c));
}
static inline int isValidHexNum(char c) {
char r = c | 0x20;
return (isValidNum(r) || (r >= 'a' && r <= 'f'));
}
#define makeLexarToken(token, var) ((lexarToken_t) {token, var})
typedef struct {
u8 tokenC;
u8 tokenN;
} lexarTranslation_t;
lexarTranslation_t lexarTranslations[] = {
{'}', RCBracket},
{',', Seperator},
{'+', Plus},
{'-', Minus},
{'*', Multiply},
{'/', Division},
{'%', Mod},
{'!', Not},
{':', Selector},
{')', RBracket},
{']', RSBracket},
{'(', LBracket},
{'{', LCBracket},
{'=', Equal},
{'[', LSBracket},
{'<', Smaller},
{'>', Bigger},
{'\0', 0},
};
/*
Should we make vars with next char being '(' a function and vars with an equals (or [x] wait how are we gonna spot that) after it to be an assignmentVar
*/
char lexarDebugGetTokenC(u8 tokenN) {
for (int i = 0; lexarTranslations[i].tokenC; i++) {
if (lexarTranslations[i].tokenN == tokenN) {
return lexarTranslations[i].tokenC;
}
}
if (tokenN == EquationSeperator)
return ';';
return '?';
}
/*
* !! we need to remake this
void lexarVectorClear(lexarVector_t* vec) {
for (int i = 0; i < vec->stored; i++) {
if (vec->tokens[i].token == Variable || vec->tokens[i].token == StrLit)
if (vec->tokens[i].text != NULL)
free(vec->tokens[i].text);
}
free(vec->tokens);
}
*/
void lexarVectorClear(Vector_t *v){
vecPDefArray(lexarToken_t*, entries, v);
for (int i = 0; i < v->count; i++){
if (entries[i].token != IntLit && entries[i].text != NULL){
free(entries[i].text);
}
}
vecFreePtr(v);
}
#define ELIFC(c) else if (*in == c)
Vector_t runLexer(const char* in, u32 len) {
const char *start = in;
Vector_t vec = newVec(sizeof(lexarToken_t), 16);
// store last var for re-assignment
// var -> func if next obj is '('
// var -> assignment if next obj is '='
// var -> arrassignment if next obj is '[' and before '=' is ']'
// maybe measure len between ( ) and [ ], so this doesn't have to be done during runtime?
// We also have to support (()). maybe if '(' set indent level, then if ')' minus indent level, set len. indent level contains {u8 level, u16 token, u16 startoffset}
u32 lastAssignment = 0;
while ((in - start) < len) {
lexarToken_t* lx = vecGetArray(lexarToken_t*, vec);
if ((lx[vec.count - 2].token == StrLit || lx[vec.count - 2].token == IntLit || lx[vec.count - 2].token == Variable || lx[vec.count - 2].token == RSBracket || lx[vec.count - 2].token == RBracket)
&& (lx[vec.count - 1].token == Variable || lx[vec.count - 1].token == LCBracket || lx[vec.count - 1].token == RCBracket)) {
if (!(lx[lastAssignment].token == ArrayVariableAssignment && lx[vec.count - 1].token == Variable && lx[vec.count - 2].token == RSBracket)) {
lexarToken_t holder = lx[vec.count - 1];
lx[vec.count - 1] = makeLexarToken(EquationSeperator, 0);
vecAddElement(&vec, holder);
lx = vecGetArray(lexarToken_t*, vec);
}
}
if (isValidWord(*in)) {
char* startWord = in;
in++;
while (isValidVar(*in))
in++;
vecAddElement(&vec, (makeLexarToken(Variable, utils_copyStringSize(startWord, in - startWord))));
continue;
}
else if (isValidNum(*in) || (*in == '-' && isValidNum(in[1]))) {
int parse = 0;
u8 negative = (*in == '-');
if (negative)
in++;
if (*in == '0' && (in[1] | 0x20) == 'x') {
in += 2;
while (isValidHexNum(*in)) {
parse = parse * 16 + (*in & 0x0F) + (*in >= 'A' ? 9 : 0);
in++;
}
}
else while (isValidNum(*in)) {
parse = parse * 10 + *in++ - '0';
}
if (negative)
parse *= -1;
vecAddElement(&vec, makeLexarToken(IntLit, parse));
continue;
}
ELIFC('(') {
if (lx[vec.count - 1].token == Variable)
lx[vec.count - 1].token = Function;
vecAddElement(&vec, makeLexarToken(LBracket, 0));
}
ELIFC('[') {
if (lx[vec.count - 1].token == Variable)
lx[vec.count - 1].token = ArrayVariable;
vecAddElement(&vec, makeLexarToken(LSBracket, 0));
}
ELIFC('.'){
if (lx[vec.count - 1].token == Variable)
lx[vec.count - 1].token = MemberVariable;
}
ELIFC('=') { // Do we need to keep = if the vars are assignments anyway?
if (in[1] == '='){
vecAddElement(&vec, makeLexarToken(EqualEqual, 0));
in++;
continue;
}
if (lx[vec.count - 1].token == Variable)
lx[vec.count - 1].token = VariableAssignment;
else if (lx[vec.count - 1].token == RSBracket) {
int back = 1;
while (lx[vec.count - back].token != ArrayVariable) {
back++;
if (vec.count - back < 0)
break; // major error
}
if (lx[vec.count - back].token == ArrayVariable) {
lx[vec.count - back].token = ArrayVariableAssignment;
lastAssignment = vec.count - back;
in++;
continue;
}
}
lastAssignment = 0;
}
ELIFC('{') {
if (lx[vec.count - 1].token == VariableAssignment) {
lx[vec.count - 1].token = FunctionAssignment;
}
vecAddElement(&vec, makeLexarToken(LCBracket, 0));
}
ELIFC('"') {
char* startStr = ++in;
int len = 0;
while (*in != '"') {
in++;
}
len = in - startStr;
char* storage = malloc(len + 1);
int pos = 0;
for (int i = 0; i < len; i++) {
if (startStr[i] == '\\') {
if (startStr[i + 1] == 'n') {
storage[pos++] = '\n';
i++;
continue;
}
if (startStr[i + 1] == 'r') {
storage[pos++] = '\r';
i++;
continue;
}
}
storage[pos++] = startStr[i];
}
storage[pos] = '\0';
vecAddElement(&vec, makeLexarToken(StrLit, storage));
}
ELIFC('#') {
while (*in != '\n')
in++;
}
ELIFC('&') {
if (in[1] == '&') {
vecAddElement(&vec, makeLexarToken(LogicAND, 0));
in++;
}
else {
vecAddElement(&vec, makeLexarToken(AND, 0));
}
}
ELIFC('|') {
if (in[1] == '|') {
vecAddElement(&vec, makeLexarToken(LogicOR, 0));
in++;
}
else {
vecAddElement(&vec, makeLexarToken(OR, 0));
}
}
ELIFC('>'){
if (in[1] == '>'){
vecAddElement(&vec, makeLexarToken(BitShiftRight, 0));
in++;
}
else {
int a = (in[1] == '=') ? 1 : 0;
vecAddElement(&vec, makeLexarToken(Bigger, 0));
in += a;
}
}
ELIFC('<'){
if (in[1] == '<'){
vecAddElement(&vec, makeLexarToken(BitShiftLeft, 0));
in++;
}
else {
int a = (in[1] == '=') ? 1 : 0;
vecAddElement(&vec, makeLexarToken(Smaller + a, 0));
in += a;
}
}
else {
int val = 0;
for (int i = 0; lexarTranslations[i].tokenC; i++) {
if (lexarTranslations[i].tokenC == *in) {
val = lexarTranslations[i].tokenN;
break;
}
}
in++;
if (*in == '=' && val >= Smaller && val <= Not) {
val++;
in++;
}
if (val != Invalid)
vecAddElement(&vec, makeLexarToken(val, 0));
continue;
}
in++;
}
lexarToken_t* lx = vecGetArray(lexarToken_t*, vec);
if ((lx[vec.count - 2].token == StrLit || lx[vec.count - 2].token == IntLit || lx[vec.count - 2].token == Variable || lx[vec.count - 2].token == RSBracket || lx[vec.count - 2].token == RBracket)
&& (lx[vec.count - 1].token == Variable || lx[vec.count - 1].token == LCBracket || lx[vec.count - 1].token == RCBracket)) {
lexarToken_t holder = lx[vec.count - 1];
lx[vec.count - 1] = makeLexarToken(EquationSeperator, 0);
vecAddElement(&vec, holder);
}
vecAddElement(&vec, makeLexarToken(EquationSeperator, 0));
return vec;
}