149 lines
3.6 KiB
C
149 lines
3.6 KiB
C
#include "tokens.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define ERR_LEN 256
|
|
|
|
/*
|
|
* Grammar:
|
|
* token
|
|
* expr
|
|
* (op expr expr)
|
|
* (list expr expr ... )
|
|
*/
|
|
|
|
// Is the char a standalone token?
|
|
static const char singleTokens[] = "()+-*/='";
|
|
|
|
int isSingle(const char c)
|
|
{
|
|
int i = 0;
|
|
while (singleTokens[i] != '\0') {
|
|
if (singleTokens[i] == c) {
|
|
return singleTokens[i];
|
|
}
|
|
i++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int isDigit(const char c)
|
|
{
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
|
|
int isHex(const char c)
|
|
{
|
|
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f');
|
|
}
|
|
|
|
int isWhitespace(const char c)
|
|
{
|
|
return c == ' ' || c == '\t' || c == '\n';
|
|
}
|
|
|
|
int notWhitespace(const char c)
|
|
{
|
|
return !isWhitespace(c);
|
|
}
|
|
|
|
// Return needs to be freed, if not null
|
|
struct Slice* nf_tokenize(const char* input, struct Error* err)
|
|
{
|
|
if (!input) {
|
|
err->context = malloc(sizeof(char) * ERR_LEN);
|
|
strcpy(err->context, "no input");
|
|
return NULL;
|
|
}
|
|
|
|
int token_count = MAX_TOK_CNT;
|
|
struct Slice* slices = malloc(sizeof(struct Slice) * token_count);
|
|
while (slices == NULL) {
|
|
token_count /= 2;
|
|
slices = malloc(sizeof(struct Slice) * token_count);
|
|
}
|
|
|
|
int i = 0;
|
|
int slice = 0;
|
|
|
|
int parens = 0;
|
|
while (input[i] != '\0') {
|
|
int l = 1;
|
|
// printd("input: '%c'\n", input[i]);
|
|
|
|
if (isWhitespace(input[i]) || input[i] == ';') {
|
|
i++;
|
|
continue;
|
|
}
|
|
|
|
if (input[i] == '(') {
|
|
parens++;
|
|
} else if (input[i] == ')') {
|
|
parens--;
|
|
if (parens < 0) {
|
|
err->context = malloc(sizeof(char) * ERR_LEN + 1);
|
|
err->code = MISMATCHED_PARENS;
|
|
int start = i > ERR_LEN ? i - ERR_LEN : 0;
|
|
strncpy(err->context, &input[start], ERR_LEN);
|
|
free(slices);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
slices[slice].text = &input[i];
|
|
|
|
if (isSingle(input[i])) {
|
|
i++;
|
|
} else if (input[i] == '"') {
|
|
if (input[i + 1] == '"' && input[i + 2] == '"') {
|
|
// Triple-quoted block
|
|
i += 2;
|
|
slices[slice].text += 2;
|
|
for (;;) {
|
|
i++;
|
|
if (input[i] == '"' && input[i + 1] == '"' && input[i + 2] == '"') {
|
|
break;
|
|
}
|
|
l++;
|
|
if (input[i] == '\0' || input[i + 1] == '\0' || input[i + 2] == '\0') {
|
|
err->context = malloc(sizeof(char) * ERR_LEN + 1);
|
|
err->code = UNEXPECTED_EOF;
|
|
int start = i > ERR_LEN ? i - ERR_LEN : 0;
|
|
strncpy(err->context, &input[start], ERR_LEN);
|
|
free(slices);
|
|
return NULL;
|
|
}
|
|
}
|
|
} else {
|
|
// Simple string
|
|
while (input[++i] != '"' && input[i] != '\0') {
|
|
l++;
|
|
}
|
|
}
|
|
i++;
|
|
} else {
|
|
while (!isWhitespace(input[++i]) && !isSingle(input[i]) && input[i] != '"' && input[i] != '\0') {
|
|
l++;
|
|
}
|
|
}
|
|
|
|
slices[slice].length = l;
|
|
slice++;
|
|
}
|
|
|
|
if (parens != 0) {
|
|
err->context = malloc(sizeof(char) * ERR_LEN);
|
|
err->code = MISMATCHED_PARENS;
|
|
int start = i > ERR_LEN ? i - ERR_LEN : 0;
|
|
strncpy(err->context, &input[start], ERR_LEN);
|
|
free(slices);
|
|
return NULL;
|
|
}
|
|
|
|
slices[slice].text = NULL;
|
|
slices[slice].length = 0;
|
|
|
|
return slices;
|
|
}
|