|
CS456 - Systems Programming
| Displaying exercises/e6/solution/lex.c
#include "lex.h"
char *input; // Input string
int pos = 0; // Position within the string
void startlex(char *s)
{
input = s;
pos = 0;
}
// Gets the next character, or a -1 on end of input.
int get(void)
{
if (input[pos] == '\0') return -1;
return input[pos++];
}
// Unget a character (as long as it is not the end of input):
int unget(int c)
{
if (pos > 0 && c != -1) pos--;
}
// If the next character is c, then move past it and return TRUE, otherwise
// do nothing and return FALSE
bool next(int c)
{
if (input[pos] == c) {
pos++;
return TRUE;
}
return FALSE;
}
static struct keyword {
char *name;
token_t tval;
} keywords[] = {
{"true", T_TRUE},
{"false", T_FALSE},
{"null", T_NULL},
{NULL, T_UNKNOWN}
};
/**
* JSON strings are always double-quoted and may contain 0 or more characters
* with the following allowed for characters:
* char : any-Unicode-character-except-"-or-\-or-control-character
* | \" | \\ | \/ | \b | \f | \n | \r | \t| \u four-hex-digits
* For this assignment you do not need to support \u hex sequences, although
* you may.
*/
static token_t lexstring(char *word)
{
int c, wp = 0, escape = FALSE;
while ((c = get()) != '"') {
if (c == '\\' && !escape) {
escape = TRUE;
continue;
}
if (escape) {
switch(c) {
case '\"':
case '\\':
case '/':
case '\n':
break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
}
escape = FALSE;
}
// Add support for \x sequences
word[wp++] = c;
}
word[wp] = '\0';
return T_STRING;
}
/**
* Returns the next token.
*/
token_t lex(char *word)
{
char stop[] = " \t\n[]{},:\"";
int wp = 0;
char c, *end;
double val;
word[wp] = '\0';
for (c = get(); isspace(c); c = get())
;
switch(c) {
case '\0':
case EOF: return T_EOI;
case '"': return lexstring(word);
case '[': return T_OBRAC;
case ']': return T_CBRAC;
case '{': return T_OCBRACE;
case '}': return T_CCBRACE;
case ',': return T_COMMA;
case ':': return T_COLON;
default:
do {
word[wp++] = c;
c = get();
} while (c != '\0' && c != EOF && strchr(stop,c) == NULL );
unget(c);
word[wp] = '\0';
if (isdigit(word[0]) || word[0] == '-') {
val = strtod(word, &end);
if (*end != '\0') {
printf("Malformed number\n");
return T_UNKNOWN;
}
return T_NUMBER;
}
for(int i=0; keywords[i].name != NULL; i++)
if (strcasecmp(word, keywords[i].name) == 0) return keywords[i].tval;
printf("Lexed unknown token '%s'\n", word);
return T_UNKNOWN;
}
// Not reached
return T_EOI;
}
|