2016-04-18 04:25:36 -04:00
|
|
|
#include "tokens.h"
|
2022-01-07 16:55:03 -05:00
|
|
|
|
2016-04-18 04:25:36 -04:00
|
|
|
#include <stdlib.h>
|
2021-07-21 11:26:04 -04:00
|
|
|
#include <string.h>
|
2016-04-18 04:25:36 -04:00
|
|
|
|
2021-07-21 11:26:04 -04:00
|
|
|
#define ERR_LEN 256
|
|
|
|
|
2016-04-18 04:25:36 -04:00
|
|
|
/*
|
|
|
|
* Grammar:
|
|
|
|
* token
|
|
|
|
* expr
|
|
|
|
* (op expr expr)
|
|
|
|
* (list expr expr ... )
|
|
|
|
*/
|
|
|
|
|
|
|
|
// Is the char a standalone token?
|
2021-12-13 10:47:35 -05:00
|
|
|
static const char singleTokens[] = "()+-*/='";
|
2022-01-07 16:55:03 -05:00
|
|
|
|
|
|
|
int isSingle(const char c)
|
|
|
|
{
|
2016-04-18 04:25:36 -04:00
|
|
|
int i = 0;
|
2022-01-07 16:55:03 -05:00
|
|
|
while (singleTokens[i] != '\0') {
|
|
|
|
if (singleTokens[i] == c) {
|
2016-04-18 04:25:36 -04:00
|
|
|
return singleTokens[i];
|
2022-01-07 16:55:03 -05:00
|
|
|
}
|
2016-04-18 04:25:36 -04:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-01-07 16:55:03 -05:00
|
|
|
int isDigit(const char c)
|
|
|
|
{
|
2016-04-18 04:25:36 -04:00
|
|
|
return c >= '0' && c <= '9';
|
|
|
|
}
|
|
|
|
|
2022-01-07 16:55:03 -05:00
|
|
|
int isHex(const char c)
|
|
|
|
{
|
2020-08-02 16:16:26 -04:00
|
|
|
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f');
|
|
|
|
}
|
|
|
|
|
2022-01-07 16:55:03 -05:00
|
|
|
int isWhitespace(const char c)
|
|
|
|
{
|
2020-05-04 10:03:35 -04:00
|
|
|
return c == ' ' || c == '\t' || c == '\n';
|
|
|
|
}
|
|
|
|
|
2022-01-07 16:55:03 -05:00
|
|
|
int notWhitespace(const char c)
|
|
|
|
{
|
2020-05-04 10:03:35 -04:00
|
|
|
return !isWhitespace(c);
|
|
|
|
}
|
|
|
|
|
2020-05-04 18:14:41 -04:00
|
|
|
// Return needs to be freed, if not null
|
2022-01-07 16:55:03 -05:00
|
|
|
struct Slice* nf_tokenize(const char* input, struct Error* err)
|
2016-04-18 04:25:36 -04:00
|
|
|
{
|
2022-01-07 16:55:03 -05:00
|
|
|
if (!input) {
|
2021-12-13 10:47:35 -05:00
|
|
|
err->context = malloc(sizeof(char) * ERR_LEN);
|
|
|
|
strcpy(err->context, "no input");
|
2016-04-18 04:25:36 -04:00
|
|
|
return NULL;
|
2021-07-21 11:26:04 -04:00
|
|
|
}
|
2016-04-18 04:25:36 -04:00
|
|
|
|
2021-07-21 11:26:04 -04:00
|
|
|
int token_count = MAX_TOK_CNT;
|
2022-01-07 16:55:03 -05:00
|
|
|
struct Slice* slices = malloc(sizeof(struct Slice) * token_count);
|
|
|
|
while (slices == NULL) {
|
2021-07-21 11:26:04 -04:00
|
|
|
token_count /= 2;
|
|
|
|
slices = malloc(sizeof(struct Slice) * token_count);
|
|
|
|
}
|
2016-04-18 04:25:36 -04:00
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
int slice = 0;
|
|
|
|
|
|
|
|
int parens = 0;
|
2022-01-07 16:55:03 -05:00
|
|
|
while (input[i] != '\0') {
|
2020-05-10 13:51:33 -04:00
|
|
|
int l = 1;
|
|
|
|
// printd("input: '%c'\n", input[i]);
|
2020-05-04 10:03:35 -04:00
|
|
|
|
2022-01-07 16:55:03 -05:00
|
|
|
if (isWhitespace(input[i]) || input[i] == ';') {
|
2020-05-04 10:03:35 -04:00
|
|
|
i++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-01-07 16:55:03 -05:00
|
|
|
if (input[i] == '(') {
|
2016-04-18 04:25:36 -04:00
|
|
|
parens++;
|
|
|
|
} else if (input[i] == ')') {
|
|
|
|
parens--;
|
2022-01-07 16:55:03 -05:00
|
|
|
if (parens < 0) {
|
2021-12-13 10:47:35 -05:00
|
|
|
err->context = malloc(sizeof(char) * ERR_LEN + 1);
|
|
|
|
err->code = MISMATCHED_PARENS;
|
2021-07-21 11:26:04 -04:00
|
|
|
int start = i > ERR_LEN ? i - ERR_LEN : 0;
|
2021-12-13 10:47:35 -05:00
|
|
|
strncpy(err->context, &input[start], ERR_LEN);
|
2020-05-08 02:29:06 -04:00
|
|
|
free(slices);
|
|
|
|
return NULL;
|
|
|
|
}
|
2016-04-18 04:25:36 -04:00
|
|
|
}
|
|
|
|
|
2020-05-10 13:51:33 -04:00
|
|
|
slices[slice].text = &input[i];
|
|
|
|
|
2022-01-07 16:55:03 -05:00
|
|
|
if (isSingle(input[i])) {
|
2016-04-18 04:25:36 -04:00
|
|
|
i++;
|
2022-01-07 16:55:03 -05:00
|
|
|
} else if (input[i] == '"') {
|
|
|
|
if (input[i + 1] == '"' && input[i + 2] == '"') {
|
2021-12-13 10:47:35 -05:00
|
|
|
// Triple-quoted block
|
|
|
|
i += 2;
|
|
|
|
slices[slice].text += 2;
|
2022-01-07 16:55:03 -05:00
|
|
|
for (;;) {
|
2021-12-13 10:47:35 -05:00
|
|
|
i++;
|
2022-01-07 16:55:03 -05:00
|
|
|
if (input[i] == '"' && input[i + 1] == '"' &&
|
|
|
|
input[i + 2] == '"') {
|
2021-12-13 10:47:35 -05:00
|
|
|
break;
|
|
|
|
}
|
2022-01-07 16:55:03 -05:00
|
|
|
if (input[i] == '\0' || input[i + 1] == '\0' ||
|
|
|
|
input[i + 2] == '\0') {
|
2021-12-13 10:47:35 -05:00
|
|
|
err->context = malloc(sizeof(char) * ERR_LEN + 1);
|
|
|
|
err->code = UNEXPECTED_EOF;
|
|
|
|
int start = i > ERR_LEN ? i - ERR_LEN : 0;
|
|
|
|
strncpy(err->context, &input[start], ERR_LEN);
|
|
|
|
free(slices);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Simple string
|
2022-01-07 16:55:03 -05:00
|
|
|
while (input[++i] != '"' && input[i] != '\0') {
|
2021-12-13 10:47:35 -05:00
|
|
|
l++;
|
|
|
|
}
|
2020-05-10 02:27:59 -04:00
|
|
|
}
|
|
|
|
i++;
|
2020-05-04 10:03:35 -04:00
|
|
|
} else {
|
2022-01-07 16:55:03 -05:00
|
|
|
while (!isWhitespace(input[++i]) && !isSingle(input[i]) &&
|
|
|
|
input[i] != '\0') {
|
2020-05-04 10:03:35 -04:00
|
|
|
l++;
|
|
|
|
}
|
|
|
|
}
|
2020-05-10 13:51:33 -04:00
|
|
|
|
|
|
|
slices[slice].length = l;
|
|
|
|
slice++;
|
2016-04-18 04:25:36 -04:00
|
|
|
}
|
|
|
|
|
2022-01-07 16:55:03 -05:00
|
|
|
if (parens != 0) {
|
2021-12-13 10:47:35 -05:00
|
|
|
err->context = malloc(sizeof(char) * ERR_LEN);
|
2021-12-15 14:25:09 -05:00
|
|
|
err->code = MISMATCHED_PARENS;
|
2021-07-21 11:26:04 -04:00
|
|
|
int start = i > ERR_LEN ? i - ERR_LEN : 0;
|
2021-12-13 10:47:35 -05:00
|
|
|
strncpy(err->context, &input[start], ERR_LEN);
|
2016-04-18 04:25:36 -04:00
|
|
|
free(slices);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
slices[slice].text = NULL;
|
|
|
|
slices[slice].length = 0;
|
|
|
|
|
|
|
|
return slices;
|
|
|
|
}
|