等待队列在内核中很有用途,尤其用在中断处理、进程同步及定时。
等待队列实现了在事件上的条件等待:希望等待特定事件的进程把自己放进合适的等待队列,并放弃控制权。因为,等待队列表示一组睡眠的进程,当某一条件变为真时,由内核唤醒他们。
1.每个等待队列都有一个等待队列头(wait queue head),等待队列头是一个类型为wait_queue_head_t的数据结构:
struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;
因为等待队列是由中断处理程序和主要内核函数修改的,因此必须对其双向链表进行保护以免对其进行同时访问,因为同时访问会导致不可预测的后果。同步是通过等待队列头中的lock自旋锁达到的。task_list字段是等待进程链表的头。
2.等待队列链表中的元素类型为wait_queue_t:
struct __wait_queue {
unsigned int flags;
#define WQ_FLAG_EXCLUSIVE 0x01
void *private;
wait_queue_func_t func;
struct list_head task_list;
};
typedef struct __wait_queue wait_queue_t;
flags为1表示睡眠进程是互斥进程,由内核有选择的唤醒。
flags为0表示睡眠进程是非互斥进程,由内核在等待条件成立时唤醒。
WQ_FLAG_EXCLUSIVE用于赋给flgas,表示该睡眠进程是一个互斥进程。
进程描述符的指针,用于指向struct task_struct类型的进程描述符p/current。
func表示等待队列中睡眠进程应该用什么方式唤醒。
task_list表示要插入等待队列头的链表指针。
3.DECLARE_WAIT_QUEUE_HEAD
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.task_list = { &(name).task_list, &(name).task_list } }
#define DECLARE_WAIT_QUEUE_HEAD(name) \
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
DECLARE_WAIT_QUEUE_HEAD(name)用来声明并初始化一个等待队列头,如:
DECLARE_WAIT_QUEUE_HEAD(wq_head);
4.DECLARE_WAITQUEUE
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
.private = tsk, \
.func = default_wake_function, \
.task_list = { NULL, NULL } }
#define DECLARE_WAITQUEUE(name, tsk) \
wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
DECLARE_WAITQUEUE(name, tsk)用来声明并初始化一个等待队列项,如:
DECLARE_WAITQUEUE(wq, current);
5.init_waitqueue_head
void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
{
spin_lock_init(&q->lock);
lockdep_set_class(&q->lock, key);
INIT_LIST_HEAD(&q->task_list);
}
#define init_waitqueue_head(q) \
do { \
static struct lock_class_key __key; \
\
__init_waitqueue_head((q), &__key); \
} while (0)
init_waitqueue_head(q)可以用来初始化动态分配的等待队列头变量,如:
wait_queue_head_t wq_head;
init_waitqueue_head(wq_head);
6.init_waitqueue_entry
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
q->flags = 0;
q->private = p;
q->func = default_wake_function;
}
init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)用于初始化一个等待队列项,即把一个进程传递给一个wait_queue_t类型的变量,作用同
DECLARE_WAITQUEUE(name, tsk)一样,只不过前者是初始化动态分配的变量,后者只需传递一个等待队列项的名字和进程即可,使用如下:
wait_queue_t wq;
init_waitqueue_entry(&wq, current);
7.init_waitqueue_func_entry
static inline void init_waitqueue_func_entry(wait_queue_t *q,
wait_queue_func_t func)
{
q->flags = 0;
q->private = NULL;
q->func = func;
}
init_waitqueue_func_entry用于初始化等待队列项的自定义唤醒函数。
8.add_wait_queue
static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
{
list_add(&new->task_list, &head->task_list);
}
void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
add_wait_queue的作用是将一个等待队列项加入到等待队列头中。如将上面3,4或5,6的wq加入到wq_head:
add_wait_queue(&wq_head, &wq);
9.add_wait_queue_exclusive
void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue_tail(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
add_wait_queue_exclusive的作用是将一个互斥进程的等待队列项加入等待队列头,可以看出其与add_wait_queue函数体的不同之处在于
wait->flags |= WQ_FLAG_EXCLUSIVE;add_wait_queue中是将flags的最低位清0.
10.remove_wait_queue
static inline void __remove_wait_queue(wait_queue_head_t *head,
wait_queue_t *old)
{
list_del(&old->task_list);
}
void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
spin_lock_irqsave(&q->lock, flags);
__remove_wait_queue(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
remove_wait_queue的作用是从等待队列中删除一个链表节点。
11.__wake_up_common与__wake_up
/*
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
* wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
* number) then we wake all the non-exclusive tasks and one exclusive task.
*
* There are circumstances in which we can try to wake a task which has already
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
/**
* __wake_up - wake up threads blocked on a waitqueue.
* @q: the waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: is directly passed to the wakeup function
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags;
spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
}
通过核心函数__wake_up_common我们可以看出,当指定唤醒的互斥进程数为0(nr_exclusive == 0)时,此函数会唤醒处在同一等待队列上的所有节点。当指定唤醒的互斥进程数为非0时,此函数会唤醒同一等待队列上的所有的非互斥进程和一个互斥进程。我们通常将等待相同条件的等待进程加入到同一个等待队列上。如果等待队列上有互斥进程,那么依据互斥对临界区的访问规则,我们只需唤醒其中一个互斥进程即可,唤醒多个互斥进程的结果还是会让他们因抢占临界区而进入竞态,如果等待队列上没有互斥进程,那么在执行wake_up系列函数时,我们全唤醒即可。
12.wake_up系列函数
#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL)
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1)
13.wait_event
/**
* finish_wait - clean up after waiting in a queue
* @q: waitqueue waited on
* @wait: wait descriptor
*
* Sets current thread back to running state and removes
* the wait descriptor from the given waitqueue if still
* queued.
*/
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
__set_current_state(TASK_RUNNING);
/*
* We can check for list emptiness outside the lock
* IFF:
* - we use the "careful" check that verifies both
* the next and prev pointers, so that there cannot
* be any half-pending updates in progress on other
* CPU's that we haven't seen yet (and that might
* still change the stack area.
* and
* - all other users take the lock (ie we can only
* have _one_ other CPU that looks at or modifies
* the list).
*/
if (!list_empty_careful(&wait->task_list)) {
spin_lock_irqsave(&q->lock, flags);
list_del_init(&wait->task_list);
spin_unlock_irqrestore(&q->lock, flags);
}
}
#define DEFINE_WAIT_FUNC(name, function) \
wait_queue_t name = { \
.private = current, \
.func = function, \
.task_list = LIST_HEAD_INIT((name).task_list), \
}
#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
/*
* Note: we use "set_current_state()" _after_ the wait-queue add,
* because we need a memory barrier there on SMP, so that any
* wake-function that tests for the wait-queue being active
* will be guaranteed to see waitqueue addition _or_ subsequent
* tests in this thread will see the wakeup having taken place.
*
* The spin_unlock() itself is semi-permeable and only protects
* one way (it only protects stuff inside the critical region and
* stops them from bleeding out - it would still allow subsequent
* loads to move into the critical region).
*/
void
prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue(q, wait);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}
#define __wait_event(wq, condition) \
do { \
DEFINE_WAIT(__wait); \
\
for (;;) { \
prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
if (condition) \
break; \
schedule(); \
} \
finish_wait(&wq, &__wait); \
} while (0)
/**
* wait_event - sleep until a condition gets true
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*/
#define wait_event(wq, condition) \
do { \
if (condition) \
break; \
__wait_event(wq, condition); \
} while (0)
在使用wait_event时,我们只需传入一个wait_queue_head_t类型的等待队列头wq和一个等待条件condition.
__wait_event中,我们先通过DEFINE_WAIT声明定义一个wait_queue_t类型的变量__wait,然后将当前进程current赋给等待队列项__wait变量成员的.
private.
prepare_to_wait先判断等待队列头wq是否为空,为空则将__wait加入到wq,然后将当前进程设为不可中断的等待状态TASK_UNINTERRUPTIBLE。
回到__wait_event中判断condition是否成立,不成立则让CPU调度执行其他的进程,否则退出for(;;)循环执行finish_wait,finish_wait的作用是将当前进程设为TASK_RUNNING状态,并将等待队列项从等待队列中删除。
14.wait_event系列函数
#define __wait_event_timeout(wq, condition, ret) \
do { \
DEFINE_WAIT(__wait); \
\
for (;;) { \
prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
if (condition) \
break; \
ret = schedule_timeout(ret); \
if (!ret) \
break; \
} \
finish_wait(&wq, &__wait); \
} while (0)
/**
* wait_event_timeout - sleep until a condition gets true or a timeout elapses
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, in jiffies
*
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function returns 0 if the @timeout elapsed, and the remaining
* jiffies if the condition evaluated to true before the timeout elapsed.
*/
#define wait_event_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
if (!(condition)) \
__wait_event_timeout(wq, condition, __ret); \
__ret; \
})
#define __wait_event_interruptible(wq, condition, ret) \
do { \
DEFINE_WAIT(__wait); \
\
for (;;) { \
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
if (condition) \
break; \
if (!signal_pending(current)) { \
schedule(); \
continue; \
} \
ret = -ERESTARTSYS; \
break; \
} \
finish_wait(&wq, &__wait); \
} while (0)
/**
* wait_event_interruptible - sleep until a condition gets true
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
#define wait_event_interruptible(wq, condition) \
({ \
int __ret = 0; \
if (!(condition)) \
__wait_event_interruptible(wq, condition, __ret); \
__ret; \
})
#define __wait_event_interruptible_timeout(wq, condition, ret) \
do { \
DEFINE_WAIT(__wait); \
\
for (;;) { \
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
if (condition) \
break; \
if (!signal_pending(current)) { \
ret = schedule_timeout(ret); \
if (!ret) \
break; \
continue; \
} \
ret = -ERESTARTSYS; \
break; \
} \
finish_wait(&wq, &__wait); \
} while (0)
/**
* wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, in jiffies
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
* was interrupted by a signal, and the remaining jiffies otherwise
* if the condition evaluated to true before the timeout elapsed.
*/
#define wait_event_interruptible_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
if (!(condition)) \
__wait_event_interruptible_timeout(wq, condition, __ret); \
__ret; \
})
wait_event_timeout把wait_event的schedule()改成了schedule_timeout()。
wait_event_interruptible把wait_event的TASK_UNINTERRUPTIBLE改成了TASK_INTERRUPTIBLE。
wait_event_interruptible_timeout把wait_event的schedule()与TASK_UNINTERRUPTIBLE改成了schedule_timeout()和TASK_INTERRUPTIBLE。