• Linux C编译器从零开发二


    自定义分词器
    test.c
    1. #include
    2. #include
    3. #include
    4. #include
    5. #include
    6. #include
    7. typedef enum {
    8. TK_PUNCT, // Punctuators
    9. TK_NUM, // Numeric literals
    10. TK_EOF, // End-of-file markers
    11. } TokenKind;
    12. // Token type
    13. typedef struct Token Token;
    14. struct Token {
    15. TokenKind kind; // Token kind
    16. Token *next; // Next token
    17. int val; // If kind is TK_NUM, its value
    18. char *loc; // Token location
    19. int len; // Token length
    20. };
    21. // Input string
    22. static char *current_input;
    23. // Reports an error and exit.
    24. static void error(char *fmt, ...) {
    25. va_list ap;
    26. va_start(ap, fmt);
    27. vfprintf(stderr, fmt, ap);
    28. fprintf(stderr, "\n");
    29. exit(1);
    30. }
    31. // Reports an error location and exit.
    32. static void verror_at(char *loc, char *fmt, va_list ap) {
    33. int pos = loc - current_input;
    34. fprintf(stderr, "%s\n", current_input);
    35. fprintf(stderr, "%*s", pos, ""); // print pos spaces.
    36. fprintf(stderr, "^ ");
    37. vfprintf(stderr, fmt, ap);
    38. fprintf(stderr, "\n");
    39. exit(1);
    40. }
    41. static void error_at(char *loc, char *fmt, ...) {
    42. va_list ap;
    43. va_start(ap, fmt);
    44. verror_at(loc, fmt, ap);
    45. }
    46. static void error_tok(Token *tok, char *fmt, ...) {
    47. va_list ap;
    48. va_start(ap, fmt);
    49. verror_at(tok->loc, fmt, ap);
    50. }
    51. // Consumes the current token if it matches `s`.
    52. static bool equal(Token *tok, char *op) {
    53. return memcmp(tok->loc, op, tok->len) == 0 && op[tok->len] == '\0';
    54. }
    55. // Ensure that the current token is `s`.
    56. static Token *skip(Token *tok, char *s) {
    57. if (!equal(tok, s))
    58. error_tok(tok, "expected '%s'", s);
    59. return tok->next;
    60. }
    61. // Ensure that the current token is TK_NUM.
    62. static int get_number(Token *tok) {
    63. if (tok->kind != TK_NUM)
    64. error_tok(tok, "expected a number");
    65. return tok->val;
    66. }
    67. // Create a new token.
    68. static Token *new_token(TokenKind kind, char *start, char *end) {
    69. Token *tok = calloc(1, sizeof(Token));
    70. tok->kind = kind;
    71. tok->loc = start;
    72. tok->len = end - start;
    73. return tok;
    74. }
    75. // Tokenize `current_input` and returns new tokens.
    76. static Token *tokenize(void) {
    77. char *p = current_input;
    78. Token head = {};
    79. Token *cur = &head;
    80. while (*p) {
    81. // Skip whitespace characters.
    82. if (isspace(*p)) {
    83. p++;
    84. continue;
    85. }
    86. // Numeric literal
    87. if (isdigit(*p)) {
    88. cur = cur->next = new_token(TK_NUM, p, p);
    89. char *q = p;
    90. cur->val = strtoul(p, &p, 10);
    91. cur->len = p - q;
    92. continue;
    93. }
    94. // Punctuator
    95. if (*p == '+' || *p == '-') {
    96. cur = cur->next = new_token(TK_PUNCT, p, p + 1);
    97. p++;
    98. continue;
    99. }
    100. error_at(p, "invalid token");
    101. }
    102. cur = cur->next = new_token(TK_EOF, p, p);
    103. return head.next;
    104. }
    105. int main(int argc, char **argv) {
    106. if (argc != 2)
    107. error("%s: invalid number of arguments", argv[0]);
    108. current_input = argv[1];
    109. Token *tok = tokenize();
    110. printf(" .globl main\n");
    111. printf("main:\n");
    112. // The first token must be a number
    113. printf(" mov $%d, %%rax\n", get_number(tok));
    114. tok = tok->next;
    115. // ... followed by either `+ ` or `- `.
    116. while (tok->kind != TK_EOF) {
    117. if (equal(tok, "+")) {
    118. printf(" add $%d, %%rax\n", get_number(tok->next));
    119. tok = tok->next->next;
    120. continue;
    121. }
    122. tok = skip(tok, "-");
    123. printf(" sub $%d, %%rax\n", get_number(tok));
    124. tok = tok->next;
    125. }
    126. printf(" ret\n");
    127. return 0;
    128. }
     编译
    1. cc -o test test.c
    2. ./test "1 +2 +3" > tmp.s
    3. cc -o tmp tmp.s
    4. ./tmp
    5. echo $?
    错误处理

    ./test "1 + 2 * +3"  > tmp.s
    1 + 2 * +3
          ^ invalid token


    创作不易,小小的支持一下吧!

  • 相关阅读:
    JVM-Java字节码的组成部分
    《数据结构、算法与应用C++语言描述》-栈的应用-开关盒布线问题
    HashMap部分八股文
    为保证软件开发质量,如何提高检查效率?
    如何完善文件传输审批流程,降低企业文件安全风险?
    AUTOSAR从入门到精通-汽车CAN网络信息安全研究
    高斯消元
    nginx 的进程建通信机制-共享内存/channel/信号
    IDEA最实用的设置
    用于独立系统应用的光伏MPPT铅酸电池充电控制器建模(Simulink实现)
  • 原文地址:https://blog.csdn.net/qq_30220519/article/details/139706646