#include "lex.h"

char *input;	// Input string
int pos = 0;	// Position within the string

void startlex(char *s)
{
  input = s;
  pos = 0;
}

// Gets the next character, or a -1 on end of input.
int get(void)
{
  if (input[pos] == '\0') return -1;
  return input[pos++];
}

// Unget a character (as long as it is not the end of input):
int unget(int c)
{
  if (pos > 0 && c != -1) pos--;
}

// If the next character is c, then move past it and return TRUE, otherwise
// do nothing and return FALSE
bool next(int c)
{
  if (input[pos] == c) {
    pos++;
    return TRUE;
  }
  return FALSE;
}

static struct keyword {
  char *name;
  token_t tval;
} keywords[] = {
  {"true", T_TRUE},
  {"false", T_FALSE},
  {"null", T_NULL},
  {NULL, T_UNKNOWN}
};


/**
 * JSON strings are always double-quoted and may contain 0 or more characters
 * with the following allowed for characters:
 * char	  : any-Unicode-character-except-"-or-\-or-control-character
 *	  | \" | \\ | \/ | \b | \f | \n | \r | \t| \u four-hex-digits 
 * For this assignment you do not need to support \u hex sequences, although
 * you may.
 */

static token_t lexstring(char *word)
{
  int c, wp = 0, escape = FALSE;

  while ((c = get()) != '"') {
    if (c == '\\' && !escape) {
      escape = TRUE;
      continue;
    }
    if (escape) {
      switch(c) {
	case '\"':
	case '\\':
	case '/':
	case '\n':
	  break;
	case 'b': c = '\b'; break;
	case 'f': c = '\f'; break;
	case 'n': c = '\n'; break;
	case 'r': c = '\r'; break;
	case 't': c = '\t'; break;
      }
      escape = FALSE;
    }
    // Add support for \x sequences
    word[wp++] = c;
  }
  word[wp] = '\0';
  return T_STRING;
}

/**
 * Returns the next token.
 */
token_t lex(char *word)
{
  char stop[] = " \t\n[]{},:\"";
  int wp = 0;
  char c, *end;
  double val;

  word[wp] = '\0';

  for (c = get(); isspace(c); c = get())
    ;

  switch(c) {
    case '\0':
    case EOF: return T_EOI;
    case '"': return lexstring(word);
    case '[': return T_OBRAC;
    case ']': return T_CBRAC;
    case '{': return T_OCBRACE;
    case '}': return T_CCBRACE;
    case ',': return T_COMMA;
    case ':': return T_COLON;
    default:
      do {
	word[wp++] = c;
	c = get();
      } while (c != '\0' && c != EOF && strchr(stop,c) == NULL );
      unget(c);
      word[wp] = '\0';
      if (isdigit(word[0]) || word[0] == '-') {
	val = strtod(word, &end);
	if (*end != '\0') {
	  printf("Malformed number\n");
	  return T_UNKNOWN;
	}
	return T_NUMBER;
      }
      for(int i=0; keywords[i].name != NULL; i++)
	if (strcasecmp(word, keywords[i].name) == 0) return keywords[i].tval;
      printf("Lexed unknown token '%s'\n", word);
      return T_UNKNOWN;
  }
  // Not reached
  return T_EOI;
}