等待队列在内核中很有用途,尤其用在中断处理、进程同步及定时。

  等待队列实现了在事件上的条件等待:希望等待特定事件的进程把自己放进合适的等待队列,并放弃控制权。因为,等待队列表示一组睡眠的进程,当某一条件变为真时,由内核唤醒他们。

1.每个等待队列都有一个等待队列头(wait queue head),等待队列头是一个类型为wait_queue_head_t的数据结构:

struct __wait_queue_head {
    spinlock_t lock;
    struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;

  因为等待队列是由中断处理程序和主要内核函数修改的,因此必须对其双向链表进行保护以免对其进行同时访问,因为同时访问会导致不可预测的后果。同步是通过等待队列头中的lock自旋锁达到的。task_list字段是等待进程链表的头。

 

2.等待队列链表中的元素类型为wait_queue_t:

struct __wait_queue {
    unsigned int flags;
#define WQ_FLAG_EXCLUSIVE    0x01
    void *private;
    wait_queue_func_t func;
    struct list_head task_list;
};
typedef struct __wait_queue wait_queue_t;

  flags为1表示睡眠进程是互斥进程,由内核有选择的唤醒。

  flags为0表示睡眠进程是非互斥进程,由内核在等待条件成立时唤醒。

  WQ_FLAG_EXCLUSIVE用于赋给flgas,表示该睡眠进程是一个互斥进程。

进程描述符的指针,用于指向struct task_struct类型的进程描述符p/current。

  func表示等待队列中睡眠进程应该用什么方式唤醒。

  task_list表示要插入等待队列头的链表指针。

 

3.DECLARE_WAIT_QUEUE_HEAD

#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {                \
    .lock        = __SPIN_LOCK_UNLOCKED(name.lock),        \
    .task_list    = { &(name).task_list, &(name).task_list } }

#define DECLARE_WAIT_QUEUE_HEAD(name) \
    wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
DECLARE_WAIT_QUEUE_HEAD(name)用来声明并初始化一个等待队列头,如:
DECLARE_WAIT_QUEUE_HEAD(wq_head);

 

4.DECLARE_WAITQUEUE

#define __WAITQUEUE_INITIALIZER(name, tsk) {                \
    .private    = tsk,                        \
    .func        = default_wake_function,            \
    .task_list    = { NULL, NULL } }

#define DECLARE_WAITQUEUE(name, tsk)                    \
    wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
DECLARE_WAITQUEUE(name, tsk)用来声明并初始化一个等待队列项,如:
DECLARE_WAITQUEUE(wq, current);

 

5.init_waitqueue_head

void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
{
    spin_lock_init(&q->lock);
    lockdep_set_class(&q->lock, key);
    INIT_LIST_HEAD(&q->task_list);
}


#define init_waitqueue_head(q)                \
    do {                        \
        static struct lock_class_key __key;    \
                            \
        __init_waitqueue_head((q), &__key);    \
    } while (0)

init_waitqueue_head(q)可以用来初始化动态分配的等待队列头变量,如:

wait_queue_head_t wq_head;
init_waitqueue_head(wq_head);

 

6.init_waitqueue_entry

static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
    q->flags = 0;
    q->private = p;
    q->func = default_wake_function;
}
init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)用于初始化一个等待队列项,即把一个进程传递给一个wait_queue_t类型的变量,作用同
DECLARE_WAITQUEUE(name, tsk)一样,只不过前者是初始化动态分配的变量,后者只需传递一个等待队列项的名字和进程即可,使用如下:
wait_queue_t wq;
    init_waitqueue_entry(&wq, current);

 

7.init_waitqueue_func_entry

static inline void init_waitqueue_func_entry(wait_queue_t *q,
                    wait_queue_func_t func)
{
    q->flags = 0;
    q->private = NULL;
    q->func = func;
}

   init_waitqueue_func_entry用于初始化等待队列项的自定义唤醒函数。

 

8.add_wait_queue

static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
{
    list_add(&new->task_list, &head->task_list);
}

void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
    unsigned long flags;

    wait->flags &= ~WQ_FLAG_EXCLUSIVE;
    spin_lock_irqsave(&q->lock, flags);
    __add_wait_queue(q, wait);
    spin_unlock_irqrestore(&q->lock, flags);
}

  add_wait_queue的作用是将一个等待队列项加入到等待队列头中。如将上面3,4或5,6的wq加入到wq_head:

add_wait_queue(&wq_head, &wq);

 

9.add_wait_queue_exclusive

void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
{
    unsigned long flags;

    wait->flags |= WQ_FLAG_EXCLUSIVE;
    spin_lock_irqsave(&q->lock, flags);
    __add_wait_queue_tail(q, wait);
    spin_unlock_irqrestore(&q->lock, flags);
}

  add_wait_queue_exclusive的作用是将一个互斥进程的等待队列项加入等待队列头,可以看出其与add_wait_queue函数体的不同之处在于

   wait->flags |= WQ_FLAG_EXCLUSIVE;add_wait_queue中是将flags的最低位清0.

 

10.remove_wait_queue

static inline void __remove_wait_queue(wait_queue_head_t *head,
                            wait_queue_t *old)
{
    list_del(&old->task_list);
}


void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
    unsigned long flags;

    spin_lock_irqsave(&q->lock, flags);
    __remove_wait_queue(q, wait);
    spin_unlock_irqrestore(&q->lock, flags);
}

  remove_wait_queue的作用是从等待队列中删除一个链表节点。

 

11.__wake_up_common与__wake_up

/*
 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
 * number) then we wake all the non-exclusive tasks and one exclusive task.
 *
 * There are circumstances in which we can try to wake a task which has already
 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
 * zero in this (rare) case, and we handle it by continuing to scan the queue.
 */
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
            int nr_exclusive, int wake_flags, void *key)
{
    wait_queue_t *curr, *next;

    list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
        unsigned flags = curr->flags;

        if (curr->func(curr, mode, wake_flags, key) &&
                (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
            break;
    }
}

/**
 * __wake_up - wake up threads blocked on a waitqueue.
 * @q: the waitqueue
 * @mode: which threads
 * @nr_exclusive: how many wake-one or wake-many threads to wake up
 * @key: is directly passed to the wakeup function
 *
 * It may be assumed that this function implies a write memory barrier before
 * changing the task state if and only if any tasks are woken up.
 */
void __wake_up(wait_queue_head_t *q, unsigned int mode,
            int nr_exclusive, void *key)
{
    unsigned long flags;

    spin_lock_irqsave(&q->lock, flags);
    __wake_up_common(q, mode, nr_exclusive, 0, key);
    spin_unlock_irqrestore(&q->lock, flags);
}

  通过核心函数__wake_up_common我们可以看出,当指定唤醒的互斥进程数为0(nr_exclusive == 0)时,此函数会唤醒处在同一等待队列上的所有节点。当指定唤醒的互斥进程数为非0时,此函数会唤醒同一等待队列上的所有的非互斥进程和一个互斥进程。我们通常将等待相同条件的等待进程加入到同一个等待队列上。如果等待队列上有互斥进程,那么依据互斥对临界区的访问规则,我们只需唤醒其中一个互斥进程即可,唤醒多个互斥进程的结果还是会让他们因抢占临界区而进入竞态,如果等待队列上没有互斥进程,那么在执行wake_up系列函数时,我们全唤醒即可。

 

12.wake_up系列函数

#define wake_up(x)            __wake_up(x, TASK_NORMAL, 1, NULL)
#define wake_up_nr(x, nr)        __wake_up(x, TASK_NORMAL, nr, NULL)
#define wake_up_all(x)            __wake_up(x, TASK_NORMAL, 0, NULL)
#define wake_up_locked(x)        __wake_up_locked((x), TASK_NORMAL)

#define wake_up_interruptible(x)    __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr)    __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
#define wake_up_interruptible_all(x)    __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
#define wake_up_interruptible_sync(x)    __wake_up_sync((x), TASK_INTERRUPTIBLE, 1)

  

13.wait_event

/**
 * finish_wait - clean up after waiting in a queue
 * @q: waitqueue waited on
 * @wait: wait descriptor
 *
 * Sets current thread back to running state and removes
 * the wait descriptor from the given waitqueue if still
 * queued.
 */
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
    unsigned long flags;

    __set_current_state(TASK_RUNNING);
    /*
     * We can check for list emptiness outside the lock
     * IFF:
     *  - we use the "careful" check that verifies both
     *    the next and prev pointers, so that there cannot
     *    be any half-pending updates in progress on other
     *    CPU's that we haven't seen yet (and that might
     *    still change the stack area.
     * and
     *  - all other users take the lock (ie we can only
     *    have _one_ other CPU that looks at or modifies
     *    the list).
     */
    if (!list_empty_careful(&wait->task_list)) {
        spin_lock_irqsave(&q->lock, flags);
        list_del_init(&wait->task_list);
        spin_unlock_irqrestore(&q->lock, flags);
    }
}

#define DEFINE_WAIT_FUNC(name, function) \
wait_queue_t name = { \
.private = current, \
.func = function, \
.task_list = LIST_HEAD_INIT((name).task_list), \
}

#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)

/*
 * Note: we use "set_current_state()" _after_ the wait-queue add,
 * because we need a memory barrier there on SMP, so that any
 * wake-function that tests for the wait-queue being active
 * will be guaranteed to see waitqueue addition _or_ subsequent
 * tests in this thread will see the wakeup having taken place.
 *
 * The spin_unlock() itself is semi-permeable and only protects
 * one way (it only protects stuff inside the critical region and
 * stops them from bleeding out - it would still allow subsequent
 * loads to move into the critical region).
 */
void
prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
    unsigned long flags;

    wait->flags &= ~WQ_FLAG_EXCLUSIVE;
    spin_lock_irqsave(&q->lock, flags);
    if (list_empty(&wait->task_list))
        __add_wait_queue(q, wait);
    set_current_state(state);
    spin_unlock_irqrestore(&q->lock, flags);
}


#define __wait_event(wq, condition)                     \
do {                                    \
    DEFINE_WAIT(__wait);                        \
                                    \
    for (;;) {                            \
        prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);    \
        if (condition)                        \
            break;                        \
        schedule();                        \
    }                                \
    finish_wait(&wq, &__wait);                    \
} while (0)


/**
 * wait_event - sleep until a condition gets true
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 */
#define wait_event(wq, condition)                     \
do {                                    \
    if (condition)                             \
        break;                            \
    __wait_event(wq, condition);                    \
} while (0)

   在使用wait_event时,我们只需传入一个wait_queue_head_t类型的等待队列头wq和一个等待条件condition.

  __wait_event中,我们先通过DEFINE_WAIT声明定义一个wait_queue_t类型的变量__wait,然后将当前进程current赋给等待队列项__wait变量成员的.

private.

  prepare_to_wait先判断等待队列头wq是否为空,为空则将__wait加入到wq,然后将当前进程设为不可中断的等待状态TASK_UNINTERRUPTIBLE。

  回到__wait_event中判断condition是否成立,不成立则让CPU调度执行其他的进程,否则退出for(;;)循环执行finish_wait,finish_wait的作用是将当前进程设为TASK_RUNNING状态,并将等待队列项从等待队列中删除。

   

14.wait_event系列函数

#define __wait_event_timeout(wq, condition, ret)            \
do {                                    \
    DEFINE_WAIT(__wait);                        \
                                    \
    for (;;) {                            \
        prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);    \
        if (condition)                        \
            break;                        \
        ret = schedule_timeout(ret);                \
        if (!ret)                        \
            break;                        \
    }                                \
    finish_wait(&wq, &__wait);                    \
} while (0)

/**
 * wait_event_timeout - sleep until a condition gets true or a timeout elapses
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @timeout: timeout, in jiffies
 *
 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function returns 0 if the @timeout elapsed, and the remaining
 * jiffies if the condition evaluated to true before the timeout elapsed.
 */
#define wait_event_timeout(wq, condition, timeout)            \
({                                    \
    long __ret = timeout;                        \
    if (!(condition))                         \
        __wait_event_timeout(wq, condition, __ret);        \
    __ret;                                \
})

#define __wait_event_interruptible(wq, condition, ret)            \
do {                                    \
    DEFINE_WAIT(__wait);                        \
                                    \
    for (;;) {                            \
        prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);    \
        if (condition)                        \
            break;                        \
        if (!signal_pending(current)) {                \
            schedule();                    \
            continue;                    \
        }                            \
        ret = -ERESTARTSYS;                    \
        break;                            \
    }                                \
    finish_wait(&wq, &__wait);                    \
} while (0)

/**
 * wait_event_interruptible - sleep until a condition gets true
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function will return -ERESTARTSYS if it was interrupted by a
 * signal and 0 if @condition evaluated to true.
 */
#define wait_event_interruptible(wq, condition)                \
({                                    \
    int __ret = 0;                            \
    if (!(condition))                        \
        __wait_event_interruptible(wq, condition, __ret);    \
    __ret;                                \
})

#define __wait_event_interruptible_timeout(wq, condition, ret)        \
do {                                    \
    DEFINE_WAIT(__wait);                        \
                                    \
    for (;;) {                            \
        prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);    \
        if (condition)                        \
            break;                        \
        if (!signal_pending(current)) {                \
            ret = schedule_timeout(ret);            \
            if (!ret)                    \
                break;                    \
            continue;                    \
        }                            \
        ret = -ERESTARTSYS;                    \
        break;                            \
    }                                \
    finish_wait(&wq, &__wait);                    \
} while (0)

/**
 * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @timeout: timeout, in jiffies
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
 * was interrupted by a signal, and the remaining jiffies otherwise
 * if the condition evaluated to true before the timeout elapsed.
 */
#define wait_event_interruptible_timeout(wq, condition, timeout)    \
({                                    \
    long __ret = timeout;                        \
    if (!(condition))                        \
        __wait_event_interruptible_timeout(wq, condition, __ret); \
    __ret;                                \
})

 wait_event_timeout把wait_event的schedule()改成了schedule_timeout()。

 wait_event_interruptible把wait_event的TASK_UNINTERRUPTIBLE改成了TASK_INTERRUPTIBLE。

 wait_event_interruptible_timeout把wait_event的schedule()与TASK_UNINTERRUPTIBLE改成了schedule_timeout()和TASK_INTERRUPTIBLE。