• readv、io_uring、liburing and command cat


    使用多种方式实现 cat 命令的等效程序。

    readv version of cat

    一次最大数据量,4K x 1024 是不会存在问题的,iovecs 最大块数有内核限制 1024 块,每块大小 4K,大于时程序运行出错。

    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    
    #define BLOCK_SZ    4096
    
    /*
     * Returns the size of the file whose open file descriptor is passed in.
     * Properly handles regular file and block devices as well. Pretty.
     * */
    
    off_t get_file_size(int fd) {
        struct stat st;
    
        if(fstat(fd, &st) < 0) {
            perror("fstat");
            return -1;
        }
        if (S_ISBLK(st.st_mode)) {
            unsigned long long bytes;
            if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) {
                perror("ioctl");
                return -1;
            }
            return bytes;
        } else if (S_ISREG(st.st_mode)) // Regular file
            return st.st_size;
    
        return -1;
    }
    
    /*
     * Output a string of characters of len length to stdout.
     * We use buffered output here to be efficient,
     * since we need to output character-by-character.
     * */
    void output_to_console(char *buf, int len) {
        while (len--) {
            fputc(*buf++, stdout);
        }
    }
    
    int read_and_print_file(char *file_name) {
        struct iovec *iovecs;
        int file_fd = open(file_name, O_RDONLY);
        if (file_fd < 0) {
            perror("open");
            return 1;
        }
    
        off_t file_sz = get_file_size(file_fd);
        off_t bytes_remaining = file_sz;
        int blocks = (int) file_sz / BLOCK_SZ;
        if (file_sz % BLOCK_SZ) blocks++;
        iovecs = malloc(sizeof(struct iovec) * blocks);
    
        int current_block = 0;
    
        /* 申请并初始化 iovecs 块以满足存储文件要求,每块 4K 大小
         * For the file we're reading, allocate enough blocks to be able to hold
         * the file data. Each block is described in an iovec structure, which is
         * passed to readv as part of the array of iovecs.
         * */
        while (bytes_remaining) {
            off_t bytes_to_read = bytes_remaining;
            if (bytes_to_read > BLOCK_SZ)
                bytes_to_read = BLOCK_SZ;
    
    
            void *buf;
            if( posix_memalign(&buf, BLOCK_SZ, BLOCK_SZ)) { // 分配 4K 对齐的大小为 4K 的内存块, int posix_memalign(void **memptr, size_t alignment, size_t size);
                perror("posix_memalign");
                return 1;
            }
            iovecs[current_block].iov_base = buf;
            iovecs[current_block].iov_len = bytes_to_read; // iov_len: long unsigned int 8bytes, 当前块的大小
            current_block++;
            bytes_remaining -= bytes_to_read;
        }
        // printf("blocks %d\n", blocks);
        /*
         * The readv() call will block until all iovec buffers are filled with
         * file data. Once it returns, we should be able to access the file data
         * from the iovecs and print them on the console.
         * */
        int ret = readv(file_fd, iovecs, blocks);
        if (ret < 0) {
            perror("readv");
            return 1;
        }
    
        for (int i = 0; i < blocks; i++)
            output_to_console(iovecs[i].iov_base, iovecs[i].iov_len);
    
        return 0;
    }
    
    int main(int argc, char *argv[]) {
        if (argc < 2) {
            fprintf(stderr, "Usage: %s  [ ...]\n",
                    argv[0]);
            return 1;
        }
    
        printf("MAX blocks: %ld\n", sysconf(_SC_IOV_MAX)); // 1024,iovecs 最大块数
    
        /*
         * For each file that is passed in as the argument, call the
         * read_and_print_file() function.
         * */
        for (int i = 1; i < argc; i++) {
            if(read_and_print_file(argv[i])) {
                fprintf(stderr, "Error reading file\n");
                return 1;
            }
        }
    
        return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124

    io_uring version of cat

    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    
    /* If your compilation fails because the header file below is missing,
     * your kernel is probably too old to support io_uring.
     * */
    #include 
    
    #define QUEUE_DEPTH 1
    #define BLOCK_SZ    1024
    
    /* This is x86 specific */
    #define read_barrier()  __asm__ __volatile__("":::"memory")
    #define write_barrier() __asm__ __volatile__("":::"memory")
    
    struct app_io_sq_ring {
        unsigned *head;
        unsigned *tail;
        unsigned *ring_mask;
        unsigned *ring_entries;
        unsigned *flags;
        unsigned *array;
    };
    
    struct app_io_cq_ring {
        unsigned *head;
        unsigned *tail;
        unsigned *ring_mask;
        unsigned *ring_entries;
        struct io_uring_cqe *cqes;
    };
    
    struct submitter {
        int ring_fd;
        struct app_io_sq_ring sq_ring;
        struct io_uring_sqe *sqes;
        struct app_io_cq_ring cq_ring;
    };
    
    struct file_info {
        off_t file_sz;
        struct iovec iovecs[];      /* Referred by readv/writev */
    };
    
    /*
     * This code is written in the days when io_uring-related system calls are not
     * part of standard C libraries. So, we roll our own system call wrapper
     * functions.
     * */
    
    int io_uring_setup(unsigned entries, struct io_uring_params *p)
    {
        return (int) syscall(__NR_io_uring_setup, entries, p);
    }
    
    int io_uring_enter(int ring_fd, unsigned int to_submit,
                              unsigned int min_complete, unsigned int flags)
    {
        return (int) syscall(__NR_io_uring_enter, ring_fd, to_submit, min_complete,
                       flags, NULL, 0);
    }
    
    /*
     * Returns the size of the file whose open file descriptor is passed in.
     * Properly handles regular file and block devices as well. Pretty.
     * */
    
    off_t get_file_size(int fd) {
        struct stat st;
    
        if(fstat(fd, &st) < 0) {
            perror("fstat");
            return -1;
        }
        if (S_ISBLK(st.st_mode)) {
            unsigned long long bytes;
            if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) {
                perror("ioctl");
                return -1;
            }
            return bytes;
        } else if (S_ISREG(st.st_mode))
            return st.st_size;
    
        return -1;
    }
    
    /*
     * io_uring requires a lot of setup which looks pretty hairy, but isn't all
     * that difficult to understand. Because of all this boilerplate code,
     * io_uring's author has created liburing, which is relatively easy to use.
     * However, you should take your time and understand this code. It is always
     * good to know how it all works underneath. Apart from bragging rights,
     * it does offer you a certain strange geeky peace.
     * */
    // io_uring 需要很多设置,看起来很麻烦,但并不难理解。由于所有这些样板代码,io_uring 的作者创建了 liburing,它相对易于使用。 但是,您应该花时间理解这段代码。 知道这一切在下面是如何运作的总是很好的。
    
    int app_setup_uring(struct submitter *s) {
        struct app_io_sq_ring *sring = &s->sq_ring;
        struct app_io_cq_ring *cring = &s->cq_ring;
        struct io_uring_params p;
        void *sq_ptr, *cq_ptr;
    
        /*
         * We need to pass in the io_uring_params structure to the io_uring_setup()
         * call zeroed out. We could set any flags if we need to, but for this
         * example, we don't.
         * */
        memset(&p, 0, sizeof(p));
        s->ring_fd = io_uring_setup(QUEUE_DEPTH, &p); // 调用 io_uring_setup系统调用,返回时将填充 io_uring_param 结构
        if (s->ring_fd < 0) {
            perror("io_uring_setup");
            return 1;
        }
    
        /*
         * io_uring communication happens via 2 shared kernel-user space ring buffers,
         * which can be jointly mapped with a single mmap() call in recent kernels. 
         * While the completion queue is directly manipulated, the submission queue 
         * has an indirection array in between. We map that in as well.
         * */
        /*
        io_uring 通信通过 2 个共享的内核-用户空间环形缓冲区,这些缓冲区可以与最近内核中的单个 mmap() 调用联合映射。虽然完成队列是直接操作的,但提交队列之间有一个间接数组。
        */
    
        int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned);
        int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
    
        /* In kernel version 5.4 and above, it is possible to map the submission and 
         * completion buffers with a single mmap() call. Rather than check for kernel 
         * versions, the recommended way is to just check the features field of the 
         * io_uring_params structure, which is a bit mask. If the 
         * IORING_FEAT_SINGLE_MMAP is set, then we can do away with the second mmap()
         * call to map the completion ring.
         * */
        /*
        在内核版本 5.4 及更高版本中,可以使用单个 mmap() 调用来映射提交和完成缓冲区。 推荐的方法是检查 io_uring_params 结构的 features 字段,而不是检查内核版本,这是一个位掩码。
        */
        if (p.features & IORING_FEAT_SINGLE_MMAP) {
            // 新版本内核,CQ 和 SQ 统一,它们的大小也要统一
            if (cring_sz > sring_sz) {
                sring_sz = cring_sz;
            }
            cring_sz = sring_sz;
        }
    
        /* Map in the submission and completion queue ring buffers.
         * Older kernels only map in the submission queue, though.
         * */
        // 在提交和完成队列环形缓冲区中映射
        sq_ptr = mmap(0, sring_sz, PROT_READ | PROT_WRITE, 
                MAP_SHARED | MAP_POPULATE,
                s->ring_fd, IORING_OFF_SQ_RING);
        if (sq_ptr == MAP_FAILED) {
            perror("mmap");
            return 1;
        }
    
        if (p.features & IORING_FEAT_SINGLE_MMAP) {
            // 新版本内核 CQ 和 SQ 统一
            cq_ptr = sq_ptr;
        } else {
            /* Map in the completion queue ring buffer in older kernels separately */
            // 老版本内核分别映射 CQ 和 SQ
            cq_ptr = mmap(0, cring_sz, PROT_READ | PROT_WRITE, 
                    MAP_SHARED | MAP_POPULATE,
                    s->ring_fd, IORING_OFF_CQ_RING);
            if (cq_ptr == MAP_FAILED) {
                perror("mmap");
                return 1;
            }
        }
        /* Save useful fields in a global app_io_sq_ring struct for later
         * easy reference */
        sring->head = sq_ptr + p.sq_off.head;
        sring->tail = sq_ptr + p.sq_off.tail;
        sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
        sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
        sring->flags = sq_ptr + p.sq_off.flags;
        sring->array = sq_ptr + p.sq_off.array;
    
        /* Map in the submission queue entries array */
        s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
                PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
                s->ring_fd, IORING_OFF_SQES);
        if (s->sqes == MAP_FAILED) {
            perror("mmap");
            return 1;
        }
    
        /* Save useful fields in a global app_io_cq_ring struct for later
         * easy reference */
        cring->head = cq_ptr + p.cq_off.head;
        cring->tail = cq_ptr + p.cq_off.tail;
        cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
        cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
        cring->cqes = cq_ptr + p.cq_off.cqes;
    
        return 0;
    }
    
    /*
     * Output a string of characters of len length to stdout.
     * We use buffered output here to be efficient,
     * since we need to output character-by-character.
     * */
    void output_to_console(char *buf, int len) {
        while (len--) {
            fputc(*buf++, stdout);
        }
    }
    
    /*
     * Read from completion queue.
     * In this function, we read completion events from the completion queue, get
     * the data buffer that will have the file data and print it to the console.
     * */
    // 从完成队列中读取。在此函数中,我们从完成队列中读取完成事件,获取将包含文件数据的数据缓冲区并将其打印到控制台。
    void read_from_cq(struct submitter *s) {
        struct file_info *fi;
        struct app_io_cq_ring *cring = &s->cq_ring;
        struct io_uring_cqe *cqe;
        unsigned head, reaped = 0;
    
        head = *cring->head;
    
        do {
            read_barrier();
            /*
             * Remember, this is a ring buffer. If head == tail, it means that the
             * buffer is empty.
             * */
            if (head == *cring->tail)
                break;
    
            /* Get the entry */
            cqe = &cring->cqes[head & *s->cq_ring.ring_mask];
            fi = (struct file_info*) cqe->user_data;
            if (cqe->res < 0)
                fprintf(stderr, "Error: %s\n", strerror(abs(cqe->res)));
    
            int blocks = (int) fi->file_sz / BLOCK_SZ;
            if (fi->file_sz % BLOCK_SZ) blocks++;
    
            for (int i = 0; i < blocks; i++)
                output_to_console(fi->iovecs[i].iov_base, fi->iovecs[i].iov_len);
    
            head++;
        } while (1);
    
        *cring->head = head;
        write_barrier();
    }
    /*
     * Submit to submission queue.
     * In this function, we submit requests to the submission queue. You can submit
     * many types of requests. Ours is going to be the readv() request, which we
     * specify via IORING_OP_READV.
     *
     * */
    // 提交到提交队列。在这个函数中,我们将请求提交到提交队列。您可以提交多种类型的请求。我们的将是 readv() 请求,我们通过 IORING_OP_READV 指定。
    
    int submit_to_sq(char *file_path, struct submitter *s) {
        struct file_info *fi;
    
        int file_fd = open(file_path, O_RDONLY);
        if (file_fd < 0 ) {
            perror("open");
            return 1;
        }
    
        struct app_io_sq_ring *sring = &s->sq_ring;
        unsigned index = 0, current_block = 0, tail = 0, next_tail = 0;
    
        off_t file_sz = get_file_size(file_fd);
        if (file_sz < 0)
            return 1;
        off_t bytes_remaining = file_sz;
        int blocks = (int) file_sz / BLOCK_SZ;
        if (file_sz % BLOCK_SZ) blocks++;
    
        fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
        if (!fi) {
            fprintf(stderr, "Unable to allocate memory\n");
            return 1;
        }
        fi->file_sz = file_sz;
    
        /*
         * For each block of the file we need to read, we allocate an iovec struct
         * which is indexed into the iovecs array. This array is passed in as part
         * of the submission. If you don't understand this, then you need to look
         * up how the readv() and writev() system calls work.
         * */
        // 对于我们需要读取的文件的每个块,我们分配一个 iovec 结构体,该结构体被索引到 iovecs 数组中。该数组作为提交的一部分传入。 如果你不明白这一点,那么你需要查看 readv() 和 writev() 系统调用是如何工作的。
        while (bytes_remaining) {
            off_t bytes_to_read = bytes_remaining;
            if (bytes_to_read > BLOCK_SZ)
                bytes_to_read = BLOCK_SZ;
    
            fi->iovecs[current_block].iov_len = bytes_to_read;
    
            void *buf;
            if( posix_memalign(&buf, BLOCK_SZ, BLOCK_SZ)) {
                perror("posix_memalign");
                return 1;
            }
            fi->iovecs[current_block].iov_base = buf;
    
            current_block++;
            bytes_remaining -= bytes_to_read;
        }
    
        /* Add our submission queue entry to the tail of the SQE ring buffer */
        next_tail = tail = *sring->tail;
        next_tail++;
        read_barrier();
        index = tail & *s->sq_ring.ring_mask;
        struct io_uring_sqe *sqe = &s->sqes[index];
        sqe->fd = file_fd;
        sqe->flags = 0;
        sqe->opcode = IORING_OP_READV;
        sqe->addr = (unsigned long) fi->iovecs;
        sqe->len = blocks;
        sqe->off = 0;
        sqe->user_data = (unsigned long long) fi;
        sring->array[index] = index;
        tail = next_tail;
    
        /* Update the tail so the kernel can see it. */
        if(*sring->tail != tail) {
            *sring->tail = tail;
            write_barrier();
        }
    
        /*
         * Tell the kernel we have submitted events with the io_uring_enter() system
         * call. We also pass in the IOURING_ENTER_GETEVENTS flag which causes the
         * io_uring_enter() call to wait until min_complete events (the 3rd param)
         * complete.
         * */
        // 使用 io_uring_enter() 系统调用告诉内核我们已经提交了事件。我们还传入了 IOURING_ENTER_GETEVENTS 标志,这会导致 io_uring_enter() 调用等待 min_complete 事件(第三个参数)完成。
        int ret = io_uring_enter(s->ring_fd, 1,1,
                IORING_ENTER_GETEVENTS);
        if(ret < 0) {
            perror("io_uring_enter");
            return 1;
        }
    
        return 0;
    }
    
    int main(int argc, char *argv[]) {
        struct submitter *s;
    
        if (argc < 2) {
            fprintf(stderr, "Usage: %s \n", argv[0]);
            return 1;
        }
    
        s = malloc(sizeof(*s));
        if (!s) {
            perror("malloc");
            return 1;
        }
        memset(s, 0, sizeof(*s));
    
        if(app_setup_uring(s)) {
            fprintf(stderr, "Unable to setup uring!\n");
            return 1;
        }
    
        for (int i = 1; i < argc; i++) {
            if(submit_to_sq(argv[i], s)) {
                fprintf(stderr, "Error reading file\n");
                return 1;
            }
            read_from_cq(s);
        }
    
        return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182
    • 183
    • 184
    • 185
    • 186
    • 187
    • 188
    • 189
    • 190
    • 191
    • 192
    • 193
    • 194
    • 195
    • 196
    • 197
    • 198
    • 199
    • 200
    • 201
    • 202
    • 203
    • 204
    • 205
    • 206
    • 207
    • 208
    • 209
    • 210
    • 211
    • 212
    • 213
    • 214
    • 215
    • 216
    • 217
    • 218
    • 219
    • 220
    • 221
    • 222
    • 223
    • 224
    • 225
    • 226
    • 227
    • 228
    • 229
    • 230
    • 231
    • 232
    • 233
    • 234
    • 235
    • 236
    • 237
    • 238
    • 239
    • 240
    • 241
    • 242
    • 243
    • 244
    • 245
    • 246
    • 247
    • 248
    • 249
    • 250
    • 251
    • 252
    • 253
    • 254
    • 255
    • 256
    • 257
    • 258
    • 259
    • 260
    • 261
    • 262
    • 263
    • 264
    • 265
    • 266
    • 267
    • 268
    • 269
    • 270
    • 271
    • 272
    • 273
    • 274
    • 275
    • 276
    • 277
    • 278
    • 279
    • 280
    • 281
    • 282
    • 283
    • 284
    • 285
    • 286
    • 287
    • 288
    • 289
    • 290
    • 291
    • 292
    • 293
    • 294
    • 295
    • 296
    • 297
    • 298
    • 299
    • 300
    • 301
    • 302
    • 303
    • 304
    • 305
    • 306
    • 307
    • 308
    • 309
    • 310
    • 311
    • 312
    • 313
    • 314
    • 315
    • 316
    • 317
    • 318
    • 319
    • 320
    • 321
    • 322
    • 323
    • 324
    • 325
    • 326
    • 327
    • 328
    • 329
    • 330
    • 331
    • 332
    • 333
    • 334
    • 335
    • 336
    • 337
    • 338
    • 339
    • 340
    • 341
    • 342
    • 343
    • 344
    • 345
    • 346
    • 347
    • 348
    • 349
    • 350
    • 351
    • 352
    • 353
    • 354
    • 355
    • 356
    • 357
    • 358
    • 359
    • 360
    • 361
    • 362
    • 363
    • 364
    • 365
    • 366
    • 367
    • 368
    • 369
    • 370
    • 371
    • 372
    • 373
    • 374
    • 375
    • 376
    • 377
    • 378
    • 379
    • 380
    • 381
    • 382
    • 383
    • 384
    • 385
    • 386
    • 387
    • 388
    • 389
    • 390
    • 391

    liburing version of cat

    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    
    #define QUEUE_DEPTH 1
    #define BLOCK_SZ    1024
    
    struct file_info {
        off_t file_sz;
        struct iovec iovecs[];      /* Referred by readv/writev */
    };
    
    /*
    * Returns the size of the file whose open file descriptor is passed in.
    * Properly handles regular file and block devices as well. Pretty.
    * */
    
    off_t get_file_size(int fd) {
        struct stat st;
    
        if(fstat(fd, &st) < 0) {
            perror("fstat");
            return -1;
        }
        if (S_ISBLK(st.st_mode)) {
            unsigned long long bytes;
            if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) {
                perror("ioctl");
                return -1;
            }
            return bytes;
        } else if (S_ISREG(st.st_mode))
            return st.st_size;
    
        return -1;
    }
    
    /*
     * Output a string of characters of len length to stdout.
     * We use buffered output here to be efficient,
     * since we need to output character-by-character.
     * */
    void output_to_console(char *buf, int len) {
        while (len--) {
            fputc(*buf++, stdout);
        }
    }
    
    /*
     * Wait for a completion to be available, fetch the data from
     * the readv operation and print it to the console.
     * */
    
    int get_completion_and_print(struct io_uring *ring) {
        struct io_uring_cqe *cqe;
        int ret = io_uring_wait_cqe(ring, &cqe);
        if (ret < 0) {
            perror("io_uring_wait_cqe");
            return 1;
        }
        if (cqe->res < 0) {
            fprintf(stderr, "Async readv failed.\n");
            return 1;
        }
        struct file_info *fi = io_uring_cqe_get_data(cqe);
        int blocks = (int) fi->file_sz / BLOCK_SZ;
        if (fi->file_sz % BLOCK_SZ) blocks++;
        for (int i = 0; i < blocks; i ++)
            output_to_console(fi->iovecs[i].iov_base, fi->iovecs[i].iov_len);
    
        io_uring_cqe_seen(ring, cqe);
        return 0;
    }
    
    /*
     * Submit the readv request via liburing
     * */
    
    int submit_read_request(char *file_path, struct io_uring *ring) {
        int file_fd = open(file_path, O_RDONLY);
        if (file_fd < 0) {
            perror("open");
            return 1;
        }
        off_t file_sz = get_file_size(file_fd);
        off_t bytes_remaining = file_sz;
        off_t offset = 0;
        int current_block = 0;
        int blocks = (int) file_sz / BLOCK_SZ;
        if (file_sz % BLOCK_SZ) blocks++;
        struct file_info *fi = malloc(sizeof(*fi) + (sizeof(struct iovec) * blocks));
    
        /*
         * For each block of the file we need to read, we allocate an iovec struct
         * which is indexed into the iovecs array. This array is passed in as part
         * of the submission. If you don't understand this, then you need to look
         * up how the readv() and writev() system calls work.
         * */
        // 对于我们需要读取的文件的每个块,我们分配一个 iovec 结构体,该结构体被索引到 iovecs 数组中。
        while (bytes_remaining) {
            off_t bytes_to_read = bytes_remaining;
            if (bytes_to_read > BLOCK_SZ)
                bytes_to_read = BLOCK_SZ;
    
            offset += bytes_to_read;
            fi->iovecs[current_block].iov_len = bytes_to_read;
    
            void *buf;
            if( posix_memalign(&buf, BLOCK_SZ, BLOCK_SZ)) {
                perror("posix_memalign");
                return 1;
            }
            fi->iovecs[current_block].iov_base = buf;
    
            current_block++;
            bytes_remaining -= bytes_to_read;
        }
        fi->file_sz = file_sz;
    
        /* Get an SQE */
        struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
        /* Setup a readv operation */
        io_uring_prep_readv(sqe, file_fd, fi->iovecs, blocks, 0);
        /* Set user data */
        io_uring_sqe_set_data(sqe, fi);
        /* Finally, submit the request */
        io_uring_submit(ring);
    
        return 0;
    }
    
    int main(int argc, char *argv[]) {
        struct io_uring ring;
    
        if (argc < 2) {
            fprintf(stderr, "Usage: %s [file name] <[file name] ...>\n",
                    argv[0]);
            return 1;
        }
    
        /* Initialize io_uring */
        io_uring_queue_init(QUEUE_DEPTH, &ring, 0);
    
        for (int i = 1; i < argc; i++) {
            int ret = submit_read_request(argv[i], &ring);
            if (ret) {
                fprintf(stderr, "Error reading file: %s\n", argv[i]);
                return 1;
            }
            get_completion_and_print(&ring);
        }
    
        /* Call the clean-up function. */
        io_uring_queue_exit(&ring);
        return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
  • 相关阅读:
    杀戮空间2游戏开服架设好后怎么查找自己服务器
    TSN标准化与虹科组网测试方案:赋能多领域以太网新发展
    QT子线程或自定义类操作访问主界面UI控件的几种方法
    Leetcode75颜色分类
    9.4 RetLibc实战之利用VirtualAlloc
    kubernetes部署rocketmq集群
    应急响应-web
    拓扑排序及其衍生
    AI 能多强「GitHub 热点速览」
    好的代码是优质资产、莫让代码成为负债
  • 原文地址:https://blog.csdn.net/weixin_39541632/article/details/127812541