|
CS456 - Systems Programming
| Displaying exercises/e7/files/lex.c
#include "main.h"
struct cur_tok _cur;
int _cstack[100];
int _csp = 0;
/**
* A Variable that keeps track of what line of input the lexer is on, you can
* use this to print more useful error messages and help you debug your parser.
*/
int _line = 1;
FILE *input = NULL;
void startlex(FILE *fp)
{
input = fp;
_cur.tok = lex(_cur.buf, &_cur.val);
}
// Gets the next character, returns EOF (-1) on end of input, pops from the
// unget character stack first if it's not empty.
int get(void)
{
int c;
if (_csp) c = _cstack[--_csp];
else c = fgetc(input);
if (c == '\n') _line++;
return c;
}
// Unget a character, pushes it to the unget character stack:
void unget(int c)
{
if (c == '\n') _line--;
if (c != EOF) _cstack[_csp++] = c;
}
// If the next character matches c, then move past it and return TRUE,
// otherwise put it back and return FALSE
bool next(int c)
{
int ch = get();
if (ch == c) return TRUE;
unget(ch);
return FALSE;
}
/**
* JSON strings are always double-quoted and may contain 0 or more characters
* with the following allowed for characters:
* char : any-Unicode-character-except-"-or-\-or-control-character
* | \" | \\ | \/ | \b | \f | \n | \r | \t| \u four-hex-digits
* For this assignment you do not need to support \u hex sequences, although
* you may.
*/
token_t lexstring(char *word)
{
int c, wp = 0, escape = FALSE;
while ((c = get()) != EOF) {
if (!escape && c == '\"') break;
if (c == '\\' && !escape) {
escape = TRUE;
continue;
}
if (escape) {
switch(c) {
case '\"':
case '\\':
case '/':
case '\n':
break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
}
escape = FALSE;
}
// Add support for \x sequences
word[wp++] = c;
}
word[wp] = '\0';
return T_STRING;
}
static struct keyword {
char *name;
token_t tval;
} keywords[] = {
{"true", T_TRUE},
{"false", T_FALSE},
{"null", T_NULL},
{NULL, T_UNKNOWN}
};
static token_t lex(char *word, double *val)
{
char stop[] = " \t\n[]{},:\"";
int wp = 0;
char c = get(), *end;
word[wp] = '\0';
while (isspace(c)) c = get();
switch(c) {
case '\0':
case EOF: return T_EOI;
case '"': return lexstring(word);
case '[': return T_OBRAC;
case ']': return T_CBRAC;
case '{': return T_OCBRACE;
case '}': return T_CCBRACE;
case ',': return T_COMMA;
case ':': return T_COLON;
default:
do {
word[wp++] = c;
c = get();
} while (c != '\0' && c != EOF && strchr(stop,c) == NULL );
unget(c);
word[wp] = '\0';
if (isdigit(word[0]) || word[0] == '-') {
*val = strtod(word, &end);
if (*end != '\0') {
printf("Malformed number\n");
return T_UNKNOWN;
}
return T_NUMBER;
}
for(int i=0; keywords[i].name != NULL; i++)
if (strcasecmp(word, keywords[i].name) == 0) return keywords[i].tval;
printf("Lexed unknown token '%s'\n", word);
return T_UNKNOWN;
}
// Not reached
return T_EOI;
}
/**
* Matches a given token or dies if it doesn't match
*/
void match(token_t tok)
{
char *p;
if (tok == _cur.tok) _cur.tok = lex(_cur.buf, &_cur.val);
else {
printf("Syntax error on line %d\n", _line);
exit(1);
}
}
/**
* Accept (match) the current if it matches (and returns true (1))
* otherwise indicates failure to match (returns 0), but is not fatal.
*/
int accept(token_t tok)
{
if (_cur.tok == tok) {
match(tok);
return 1;
}
return 0;
}
|