FIO是一款方便IO性能测试的工具,以统计全面、模式灵活深得用户欢心。当前支持libaio、sync等IO引擎。在存储系统开发中,如何快速快速全面评测系统IO性能?給FIO添加调用存储系统提供的IO的插件,是一个好办法。那么,该怎么添加一个插件呢?
首先,得熟悉几个基本的公共数据结构。
fio 插件的公共接口
单任务IO latency/IOPS/BW 统计信息
fio/fio.h:
struct thread_data {
struct flist_head opt_list;
unsigned long flags;
struct thread_options o;
void *eo;
pthread_t thread;
unsigned int thread_number;
unsigned int subjob_number;
unsigned int groupid;
struct thread_stat ts __attribute__ ((aligned(8)));
int client_type;
struct io_log *slat_log;
struct io_log *clat_log;
struct io_log *clat_hist_log;
struct io_log *lat_log;
struct io_log *bw_log;
struct io_log *iops_log;
struct workqueue log_compress_wq;
struct thread_data *parent;
uint64_t stat_io_bytes[DDIR_RWDIR_CNT];
struct timespec bw_sample_time;
uint64_t stat_io_blocks[DDIR_RWDIR_CNT];
struct timespec iops_sample_time;
volatile int update_rusage;
struct fio_sem *rusage_sem;
struct rusage ru_start;
struct rusage ru_end;
struct fio_file **files;
unsigned char *file_locks;
unsigned int files_size;
unsigned int files_index;
unsigned int nr_open_files;
unsigned int nr_done_files;
union {
unsigned int next_file;
struct frand_state next_file_state;
.....
int error;
int sig;
int done;
int stop_io;
pid_t pid;
char *orig_buffer;
size_t orig_buffer_size;
volatile int runstate;
volatile bool terminate;
bool last_was_sync;
enum fio_ddir last_ddir;
int mmapfd;
void *iolog_buf;
FILE *iolog_f;
unsigned long rand_seeds[FIO_RAND_NR_OFFS];
struct frand_state bsrange_state[DDIR_RWDIR_CNT];
struct frand_state verify_state;
struct frand_state trim_state;
struct frand_state delay_state;
struct frand_state buf_state;
struct frand_state buf_state_prev;
struct frand_state dedupe_state;
struct frand_state zone_state;
struct zone_split_index **zone_state_index;
unsigned int verify_batch;
unsigned int trim_batch;
struct thread_io_list *vstate;
int shm_id;
/*
* IO engine hooks, contains everything needed to submit an io_u
* to any of the available IO engines.
*/
struct ioengine_ops *io_ops;
int io_ops_init;
/*
* IO engine private data and dlhandle.
*/
void *io_ops_data;
.....
基本读IO操作单元
接受fio 打出的IO请求,分发到具体的IO引擎。 fio/fio/io_u.h
struct io_u {
struct timespec start_time;
struct timespec issue_time;
**struct fio_file *file;**
unsigned int flags;
enum fio_ddir ddir;
/*
* For replay workloads, we may want to account as a different
* IO type than what is being submitted.
*/
enum fio_ddir acct_ddir;
/*
* Write generation
*/
unsigned short numberio;
/*
* Allocated/set buffer and length
*/
unsigned long buflen;
unsigned long long offset;
void *buf;
.......
struct io_piece *ipo;
unsigned int resid;
unsigned int error;
/*
* io engine private data
*/
union {
unsigned int index;
unsigned int seen;
void *engine_data;
};
union {
struct flist_head verify_list;
struct workqueue_work work;
};
/*
* Callback for io completion
*/
int (*end_io)(struct thread_data *, struct io_u **);
union {
#ifdef CONFIG_LIBAIO
** struct iocb iocb;**
#endif
#ifdef CONFIG_POSIXAIO
os_aiocb_t aiocb;
#endif
#ifdef FIO_HAVE_SGIO
struct sg_io_hdr hdr;
#endif
#ifdef CONFIG_GUASI
guasi_req_t greq;
#endif
#ifdef CONFIG_SOLARISAIO
aio_result_t resultp;
#endif
#ifdef FIO_HAVE_BINJECT
struct b_user_cmd buc;
#endif
#ifdef CONFIG_RDMA
struct ibv_mr *mr;
#endif
void *mmap_data;
};
uio 提供的基本操作:
extern struct io_u *__get_io_u(struct thread_data *);
extern struct io_u *get_io_u(struct thread_data *);
extern void put_io_u(struct thread_data *, struct io_u *);
extern void clear_io_u(struct thread_data *, struct io_u *);
extern void requeue_io_u(struct thread_data *, struct io_u **);
extern int __must_check io_u_sync_complete(struct thread_data *, struct io_u *);
extern int __must_check io_u_queued_complete(struct thread_data *, int);
extern void io_u_queued(struct thread_data *, struct io_u *);
extern int io_u_quiesce(struct thread_data *);
extern void io_u_log_error(struct thread_data *, struct io_u *);
extern void io_u_mark_depth(struct thread_data *, unsigned int);
extern void fill_io_buffer(struct thread_data *, void *, unsigned int, unsigned int);
extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int, unsigned int);
void io_u_mark_complete(struct thread_data *, unsigned int);
void io_u_mark_submit(struct thread_data *, unsigned int);
bool queue_full(const struct thread_data *);
int do_io_u_sync(const struct thread_data *, struct io_u *);
int do_io_u_trim(const struct thread_data *, struct io_u *);
代码位置: fio/engines/engine.h
定义并实现ioengine ops
struct ioengine_ops {
struct flist_head list;
const char *name;
int version;
int flags;
int (*setup)(struct thread_data *);
int (*init)(struct thread_data *);
int (*prep)(struct thread_data *, struct io_u *);
int (*queue)(struct thread_data *, struct io_u *);
int (*commit)(struct thread_data *);
int (*getevents)(struct thread_data *, unsigned int, unsigned int, const struct timespec *);
struct io_u *(*event)(struct thread_data *, int);
char *(*errdetails)(struct io_u *);
int (*cancel)(struct thread_data *, struct io_u *);
void (*cleanup)(struct thread_data *);
int (*open_file)(struct thread_data *, struct fio_file *);
int (*close_file)(struct thread_data *, struct fio_file *);
int (*invalidate)(struct thread_data *, struct fio_file *);
int (*unlink_file)(struct thread_data *, struct fio_file *);
int (*get_file_size)(struct thread_data *, struct fio_file *);
void (*terminate)(struct thread_data *);
int (*iomem_alloc)(struct thread_data *, size_t);
void (*iomem_free)(struct thread_data *);
int (*io_u_init)(struct thread_data *, struct io_u *);
void (*io_u_free)(struct thread_data *, struct io_u *);
int option_struct_size;
struct fio_option *options;
};
注册ioengine ops
extern struct ioengine_ops *load_ioengine(struct thread_data *);
extern void register_ioengine(struct ioengine_ops *);
extern void unregister_ioengine(struct ioengine_ops *);
extern void free_ioengine(struct thread_data *);
extern void close_ioengine(struct thread_data *);
fio libaio 插件的实现和分析
下面,以libaio的支持为例,进行分析:
代码位置: fio/engines/libaio.c
static struct ioengine_ops ioengine = {
.name = "libaio",
.version = FIO_IOOPS_VERSION,
.init = fio_libaio_init,
.prep = fio_libaio_prep,
.queue = fio_libaio_queue,
.commit = fio_libaio_commit,
.cancel = fio_libaio_cancel,
.getevents = fio_libaio_getevents,
.event = fio_libaio_event,
.cleanup = fio_libaio_cleanup,
.open_file = generic_open_file,
.close_file = generic_close_file,
.get_file_size = generic_get_file_size,
.options = options,
.option_struct_size = sizeof(struct libaio_options),
};
name
定义支持fio的IO引擎的名称
version
定义当前改实现的版本
init
调用libaio的初始化函数;申请空间;倒腾对应的成员到thread data中去。
static int fio_libaio_init(struct thread_data *td)
{
struct libaio_options *o = td->eo;
struct libaio_data *ld;
int err = 0;
ld = calloc(1, sizeof(*ld));
/*
* First try passing in 0 for queue depth, since we don't
* care about the user ring. If that fails, the kernel is too old
* and we need the right depth.
*/
if (!o->userspace_reap)
err = io_queue_init(INT_MAX, &ld->aio_ctx);
if (o->userspace_reap || err == -EINVAL)
err = io_queue_init(td->o.iodepth, &ld->aio_ctx);
if (err) {
td_verror(td, -err, "io_queue_init");
log_err("fio: check /proc/sys/fs/aio-max-nr\n");
free(ld);
return 1;
}
ld->entries = td->o.iodepth;
ld->is_pow2 = is_power_of_2(ld->entries);
ld->aio_events = calloc(ld->entries, sizeof(struct io_event));
ld->iocbs = calloc(ld->entries, sizeof(struct iocb *));
ld->io_us = calloc(ld->entries, sizeof(struct io_u *));
td->io_ops_data = ld;
return 0;
}
prep
处理从FIO传过来的IO请求,拿到xfer_buf/xfer_buflen/ io_u->offset,作为 io_prep_pread的参数, 用它来初始化libaio的IOrequest。
static int fio_libaio_prep(struct thread_data fio_unused *td, struct io_u *io_u)
{
struct fio_file *f = io_u->file;
if (io_u->ddir == DDIR_READ)
io_prep_pread(&io_u->iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
else if (io_u->ddir == DDIR_WRITE)
io_prep_pwrite(&io_u->iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
else if (ddir_sync(io_u->ddir))
io_prep_fsync(&io_u->iocb, f->fd);
return 0;
}
请求入队
把fio套件打出来的请求加入libaio内部的请求队列进去:
static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
{
struct libaio_data *ld = td->io_ops_data;
fio_ro_check(td, io_u);
if (ld->queued == td->o.iodepth)
return FIO_Q_BUSY;
/*
* fsync is tricky, since it can fail and we need to do it
* serialized with other io. the reason is that linux doesn't
* support aio fsync yet. So return busy for the case where we
* have pending io, to let fio complete those first.
*/
if (ddir_sync(io_u->ddir)) {
if (ld->queued)
return FIO_Q_BUSY;
do_io_u_sync(td, io_u);
return FIO_Q_COMPLETED;
}
if (io_u->ddir == DDIR_TRIM) {
if (ld->queued)
return FIO_Q_BUSY;
do_io_u_trim(td, io_u);
return FIO_Q_COMPLETED;
}
ld->iocbs[ld->head] = &io_u->iocb;
ld->io_us[ld->head] = io_u;
ring_inc(ld, &ld->head, 1);
return FIO_Q_QUEUED;
}
提交IO请求
把上面queue中的请求通过 libaio io_submit API 真正提交:
static int fio_libaio_commit(struct thread_data *td)
{
struct libaio_data *ld = td->io_ops_data;
struct iocb **iocbs;
struct io_u **io_us;
struct timespec ts;
int ret, wait_start = 0;
if (!ld->queued)
return 0;
do {
long nr = ld->queued;
nr = min((unsigned int) nr, ld->entries - ld->tail);
io_us = ld->io_us + ld->tail;
iocbs = ld->iocbs + ld->tail;
ret = io_submit(ld->aio_ctx, nr, iocbs);
if (ret > 0) {
fio_libaio_queued(td, io_us, ret);
io_u_mark_submit(td, ret);
ld->queued -= ret;
ring_inc(ld, &ld->tail, ret);
ret = 0;
wait_start = 0;
取消执行
取消提交的IO请求。同样是从统一的thread_data 和 uio请求里拿相关参数, 返回实际完成的io 请求的数量。
static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
{
struct libaio_data *ld = td->io_ops_data;
return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
}
抓取已经完成的IO的请求
上面提交的IO请求,多少已经完成,需要统计。同样调用libaio 对应的getevents接口:
static int fio_libaio_getevents(struct thread_data *td, unsigned int min,
unsigned int max, const struct timespec *t)
{
struct libaio_data *ld = td->io_ops_data;
struct libaio_options *o = td->eo;
unsigned actual_min = td->o.iodepth_batch_complete_min == 0 ? 0 : min;
struct timespec __lt, *lt = NULL;
int r, events = 0;
if (t) {
__lt = *t;
lt = &__lt;
}
do {
if (o->userspace_reap == 1
&& actual_min == 0
&& ((struct aio_ring *)(ld->aio_ctx))->magic
== AIO_RING_MAGIC) {
r = user_io_getevents(ld->aio_ctx, max,
ld->aio_events + events);
} else {
r = io_getevents(ld->aio_ctx, actual_min,
max, ld->aio_events + events, lt);
}
if (r > 0)
events += r;
else if ((min && r == 0) || r == -EAGAIN) {
fio_libaio_commit(td);
usleep(100);
} else if (r != -EINTR)
break;
} while (events < min);
return r < 0 ? r : events;
}
从IO 完成的请求的event中找到对应的IO请求
统计时需要,因为libaio可能是一批发射请求的,需要知道哪些IO请求已经完成:
static struct io_u *fio_libaio_event(struct thread_data *td, int event)
{
struct libaio_data *ld = td->io_ops_data;
struct io_event *ev;
struct io_u *io_u;
ev = ld->aio_events + event;
io_u = container_of(ev->obj, struct io_u, iocb);
if (ev->res != io_u->xfer_buflen) {
if (ev->res > io_u->xfer_buflen)
io_u->error = -ev->res;
else
io_u->resid = io_u->xfer_buflen - ev->res;
} else
io_u->error = 0;
return io_u;
}
清理环境
测试完成之后,需要释放之前申请的内存。
static void fio_libaio_cleanup(struct thread_data *td)
{
struct libaio_data *ld = td->io_ops_data;
if (ld) {
/*
* Work-around to avoid huge RCU stalls at exit time. If we
* don't do this here, then it'll be torn down by exit_aio().
* But for that case we can parallellize the freeing, thus
* speeding it up a lot.
*/
if (!(td->flags & TD_F_CHILD))
io_destroy(ld->aio_ctx);
free(ld->aio_events);
free(ld->iocbs);
free(ld->io_us);
free(ld);
}
}
打开文件
测试前需要打开文件。对于通用的块设备、文件系统中的文件,直接调用generic_open_file就可以,它主要内容如下:
int generic_open_file(struct thread_data *td, struct fio_file *f)
{
int is_std = 0;
int flags = 0;
int from_hash = 0;
....
if (td_trim(td))
goto skip_flags;
if (td->o.odirect)
flags |= OS_O_DIRECT;
if (td->o.oatomic) {
if (!FIO_O_ATOMIC) {
td_verror(td, EINVAL, "OS does not support atomic IO");
return 1;
}
flags |= OS_O_DIRECT | FIO_O_ATOMIC;
}
if (td->o.sync_io)
flags |= O_SYNC;
if (td->o.create_on_open && td->o.allow_create)
flags |= O_CREAT;
skip_flags:
if (f->filetype != FIO_TYPE_FILE)
flags |= FIO_O_NOATIME;
open_again:
if (td_write(td)) {
if (!read_only)
flags |= O_RDWR;
if (f->filetype == FIO_TYPE_FILE && td->o.allow_create)
flags |= O_CREAT;
if (is_std)
f->fd = dup(STDOUT_FILENO);
else
from_hash = file_lookup_open(f, flags);
} else if (td_read(td)) {
if (f->filetype == FIO_TYPE_CHAR && !read_only)
flags |= O_RDWR;
else
flags |= O_RDONLY;
if (is_std)
f->fd = dup(STDIN_FILENO);
else
from_hash = file_lookup_open(f, flags);
} else if (td_trim(td)) {
assert(!td_rw(td)); /* should have matched above */
flags |= O_RDWR;
from_hash = file_lookup_open(f, flags);
}
....
}
关闭文件
同上,关闭测试完成的文件,对于块设备和文件系统中的文件,调用close系统调用就可以generic_close_file:
int generic_close_file(struct thread_data fio_unused *td, struct fio_file *f)
{
int ret = 0;
dprint(FD_FILE, "fd close %s\n", f->file_name);
remove_file_hash(f);
if (close(f->fd) < 0)
ret = errno;
f->fd = -1;
if (f->shadow_fd != -1) {
close(f->shadow_fd);
f->shadow_fd = -1;
}
f->engine_pos = 0;
return ret;
}
获取文件大小
对于libaio支持的块设备或文件,也是调用通用的接口去stat 文件大小:
/*
* This function i.e. get_file_size() is the default .get_file_size
* implementation of majority of I/O engines.
*/
int generic_get_file_size(struct thread_data *td, struct fio_file *f)
{
return get_file_size(td, f);
}
选性描述
对于libaio的option,具体描述如下:
static struct fio_option options[] = {
{
.name = "userspace_reap",
.lname = "Libaio userspace reaping",
.type = FIO_OPT_STR_SET,
.off1 = offsetof(struct libaio_options, userspace_reap),
.help = "Use alternative user-space reap implementation",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_LIBAIO,
},
{
.name = NULL,
},
};
选项大小
告诉fio 测试套件,libaio引擎的option需占用的空间大小:
.option_struct_size = sizeof(struct libaio_options),
libaio引擎的注册和注销
调用engine通用的注册和注销接口如下:
static void fio_init fio_libaio_register(void)
{
register_ioengine(&ioengine);
}
static void fio_exit fio_libaio_unregister(void)
{
unregister_ioengine(&ioengine);
}