pebblisp/src/tokens.c

149 lines
3.6 KiB
C
Raw Normal View History

2016-04-18 04:25:36 -04:00
#include "tokens.h"
2016-04-18 04:25:36 -04:00
#include <stdlib.h>
#include <string.h>
2016-04-18 04:25:36 -04:00
#define ERR_LEN 256
2016-04-18 04:25:36 -04:00
/*
* Grammar:
* token
* expr
* (op expr expr)
* (list expr expr ... )
*/
// Is the char a standalone token?
static const char singleTokens[] = "()+-*/='";
int isSingle(const char c)
{
2016-04-18 04:25:36 -04:00
int i = 0;
while (singleTokens[i] != '\0') {
if (singleTokens[i] == c) {
2016-04-18 04:25:36 -04:00
return singleTokens[i];
}
2016-04-18 04:25:36 -04:00
i++;
}
return 0;
}
int isDigit(const char c)
{
2016-04-18 04:25:36 -04:00
return c >= '0' && c <= '9';
}
int isHex(const char c)
{
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f');
}
int isWhitespace(const char c)
{
2020-05-04 10:03:35 -04:00
return c == ' ' || c == '\t' || c == '\n';
}
int notWhitespace(const char c)
{
2020-05-04 10:03:35 -04:00
return !isWhitespace(c);
}
2020-05-04 18:14:41 -04:00
// Return needs to be freed, if not null
struct Slice* nf_tokenize(const char* input, struct Error* err)
2016-04-18 04:25:36 -04:00
{
if (!input) {
err->context = malloc(sizeof(char) * ERR_LEN);
strcpy(err->context, "no input");
2016-04-18 04:25:36 -04:00
return NULL;
}
2016-04-18 04:25:36 -04:00
int token_count = MAX_TOK_CNT;
struct Slice* slices = malloc(sizeof(struct Slice) * token_count);
while (slices == NULL) {
token_count /= 2;
slices = malloc(sizeof(struct Slice) * token_count);
}
2016-04-18 04:25:36 -04:00
int i = 0;
int slice = 0;
int parens = 0;
while (input[i] != '\0') {
int l = 1;
// printd("input: '%c'\n", input[i]);
2020-05-04 10:03:35 -04:00
if (isWhitespace(input[i]) || input[i] == ';') {
2020-05-04 10:03:35 -04:00
i++;
continue;
}
if (input[i] == '(') {
2016-04-18 04:25:36 -04:00
parens++;
} else if (input[i] == ')') {
parens--;
if (parens < 0) {
err->context = malloc(sizeof(char) * ERR_LEN + 1);
err->code = MISMATCHED_PARENS;
int start = i > ERR_LEN ? i - ERR_LEN : 0;
strncpy(err->context, &input[start], ERR_LEN);
2020-05-08 02:29:06 -04:00
free(slices);
return NULL;
}
2016-04-18 04:25:36 -04:00
}
slices[slice].text = &input[i];
if (isSingle(input[i])) {
2016-04-18 04:25:36 -04:00
i++;
} else if (input[i] == '"') {
if (input[i + 1] == '"' && input[i + 2] == '"') {
// Triple-quoted block
i += 2;
slices[slice].text += 2;
for (;;) {
i++;
if (input[i] == '"' && input[i + 1] == '"' && input[i + 2] == '"') {
break;
}
l++;
if (input[i] == '\0' || input[i + 1] == '\0' || input[i + 2] == '\0') {
err->context = malloc(sizeof(char) * ERR_LEN + 1);
err->code = UNEXPECTED_EOF;
int start = i > ERR_LEN ? i - ERR_LEN : 0;
strncpy(err->context, &input[start], ERR_LEN);
free(slices);
return NULL;
}
}
} else {
// Simple string
while (input[++i] != '"' && input[i] != '\0') {
l++;
}
}
i++;
2020-05-04 10:03:35 -04:00
} else {
while (!isWhitespace(input[++i]) && !isSingle(input[i]) && input[i] != '"' && input[i] != '\0') {
2020-05-04 10:03:35 -04:00
l++;
}
}
slices[slice].length = l;
slice++;
2016-04-18 04:25:36 -04:00
}
if (parens != 0) {
err->context = malloc(sizeof(char) * ERR_LEN);
2021-12-15 14:25:09 -05:00
err->code = MISMATCHED_PARENS;
int start = i > ERR_LEN ? i - ERR_LEN : 0;
strncpy(err->context, &input[start], ERR_LEN);
2016-04-18 04:25:36 -04:00
free(slices);
return NULL;
}
slices[slice].text = NULL;
slices[slice].length = 0;
return slices;
}