- #include
- #include
- #include
- #include
- #include
- #include
-
- typedef enum {
- TK_PUNCT, // Punctuators
- TK_NUM, // Numeric literals
- TK_EOF, // End-of-file markers
- } TokenKind;
-
- // Token type
- typedef struct Token Token;
- struct Token {
- TokenKind kind; // Token kind
- Token *next; // Next token
- int val; // If kind is TK_NUM, its value
- char *loc; // Token location
- int len; // Token length
- };
-
- // Input string
- static char *current_input;
-
- // Reports an error and exit.
- static void error(char *fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n");
- exit(1);
- }
-
- // Reports an error location and exit.
- static void verror_at(char *loc, char *fmt, va_list ap) {
- int pos = loc - current_input;
- fprintf(stderr, "%s\n", current_input);
- fprintf(stderr, "%*s", pos, ""); // print pos spaces.
- fprintf(stderr, "^ ");
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n");
- exit(1);
- }
-
- static void error_at(char *loc, char *fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- verror_at(loc, fmt, ap);
- }
-
- static void error_tok(Token *tok, char *fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- verror_at(tok->loc, fmt, ap);
- }
-
- // Consumes the current token if it matches `s`.
- static bool equal(Token *tok, char *op) {
- return memcmp(tok->loc, op, tok->len) == 0 && op[tok->len] == '\0';
- }
-
- // Ensure that the current token is `s`.
- static Token *skip(Token *tok, char *s) {
- if (!equal(tok, s))
- error_tok(tok, "expected '%s'", s);
- return tok->next;
- }
-
- // Ensure that the current token is TK_NUM.
- static int get_number(Token *tok) {
- if (tok->kind != TK_NUM)
- error_tok(tok, "expected a number");
- return tok->val;
- }
-
- // Create a new token.
- static Token *new_token(TokenKind kind, char *start, char *end) {
- Token *tok = calloc(1, sizeof(Token));
- tok->kind = kind;
- tok->loc = start;
- tok->len = end - start;
- return tok;
- }
-
- // Tokenize `current_input` and returns new tokens.
- static Token *tokenize(void) {
- char *p = current_input;
- Token head = {};
- Token *cur = &head;
-
- while (*p) {
- // Skip whitespace characters.
- if (isspace(*p)) {
- p++;
- continue;
- }
-
- // Numeric literal
- if (isdigit(*p)) {
- cur = cur->next = new_token(TK_NUM, p, p);
- char *q = p;
- cur->val = strtoul(p, &p, 10);
- cur->len = p - q;
- continue;
- }
-
- // Punctuator
- if (*p == '+' || *p == '-') {
- cur = cur->next = new_token(TK_PUNCT, p, p + 1);
- p++;
- continue;
- }
-
- error_at(p, "invalid token");
- }
-
- cur = cur->next = new_token(TK_EOF, p, p);
- return head.next;
- }
-
- int main(int argc, char **argv) {
- if (argc != 2)
- error("%s: invalid number of arguments", argv[0]);
-
- current_input = argv[1];
- Token *tok = tokenize();
-
- printf(" .globl main\n");
- printf("main:\n");
-
- // The first token must be a number
- printf(" mov $%d, %%rax\n", get_number(tok));
- tok = tok->next;
-
- // ... followed by either `+
` or `- `. - while (tok->kind != TK_EOF) {
- if (equal(tok, "+")) {
- printf(" add $%d, %%rax\n", get_number(tok->next));
- tok = tok->next->next;
- continue;
- }
-
- tok = skip(tok, "-");
- printf(" sub $%d, %%rax\n", get_number(tok));
- tok = tok->next;
- }
-
- printf(" ret\n");
- return 0;
- }
- cc -o test test.c
- ./test "1 +2 +3" > tmp.s
- cc -o tmp tmp.s
- ./tmp
- echo $?
./test "1 + 2 * +3" > tmp.s
1 + 2 * +3
^ invalid token

