Reputation: 168
I have written a sample linux device driver code which will create two kernel threads and each will increment a single global variable. I have used wait-queues to perform the task of incrementing the variable, and each thread will wait on the wait queue until a timer expires and each thread is woken up at random.
But problem is when I inserted this module, the whole system is just freezing up, and I have to restart the machine. This is happening every time I inserted the module. I tried debugging the kthread code to see if I am entering dead-lock situation by mistake but I am unable to figure out anything wrong with the code.
Can anyone please tell me what I am doing wrong in the code to get the hang-up situation?
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/semaphore.h>
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/sched.h>
#include <linux/kthread.h>
spinlock_t my_si_lock;
pid_t kthread_pid1;
pid_t kthread_pid2 ;
static DECLARE_WAIT_QUEUE_HEAD(wqueue);
static struct timer_list my_timer;
int kthread_num;
/* the timer callback */
void my_timer_callback( unsigned long data ){
printk(KERN_INFO "my_timer_callback called (%ld).\n", jiffies );
if (waitqueue_active(&wqueue)) {
wake_up_interruptible(&wqueue);
}
}
/*Routine for the first thread */
static int kthread_routine_1(void *kthread_num)
{
//int num=(int)(*(int*)kthread_num);
int *num=(int *)kthread_num;
char kthread_name[15];
unsigned long flags;
DECLARE_WAITQUEUE(wait, current);
printk(KERN_INFO "Inside daemon_routine() %ld\n",current->pid);
allow_signal(SIGKILL);
allow_signal(SIGTERM);
do{
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&wqueue, &wait);
spin_lock_irqsave(&my_si_lock, flags);
printk(KERN_INFO "kernel_daemon [%d] incrementing the shared data=%d\n",current->pid,(*num)++);
spin_unlock_irqrestore(&my_si_lock, flags);
remove_wait_queue(&wqueue, &wait);
if (kthread_should_stop()) {
break;
}
}while(!signal_pending(current));
set_current_state(TASK_RUNNING);
return 0;
}
/*Routine for the second thread */
static int kthread_routine_2(void *kthread_num)
{
//int num=(int)(*(int*)kthread_num);
int *num=(int *)kthread_num;
char kthread_name[15];
unsigned long flags;
DECLARE_WAITQUEUE(wait, current);
printk(KERN_INFO "Inside daemon_routine() %ld\n",current->pid);
allow_signal(SIGKILL);
allow_signal(SIGTERM);
do{
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&wqueue, &wait);
spin_lock_irqsave(&my_si_lock, flags);
printk(KERN_INFO "kernel_daemon [%d] incrementing the shared data=%d\n",current->pid,(*num)++);
spin_unlock_irqrestore(&my_si_lock, flags);
remove_wait_queue(&wqueue, &wait);
if (kthread_should_stop()) {
break;
}
}while(!signal_pending(current));
set_current_state(TASK_RUNNING);
return 0;
}
static int __init signalexample_module_init(void)
{
int ret;
spin_lock_init(&my_si_lock);
init_waitqueue_head(&wqueue);
kthread_num=1;
printk(KERN_INFO "starting the first kernel thread with id ");
kthread_pid1 = kthread_run(kthread_routine_1,&kthread_num,"first_kthread");
printk(KERN_INFO "%ld \n",(long)kthread_pid1);
if(kthread_pid1< 0 ){
printk(KERN_ALERT "Kernel thread [1] creation failed\n");
return -1;
}
printk(KERN_INFO "starting the second kernel thread with id");
kthread_pid2 = kthread_run(kthread_routine_2,&kthread_num,"second_kthread");
printk(KERN_INFO "%ld \n",(long)kthread_pid2);
if(kthread_pid2 < 0 ){
printk(KERN_ALERT "Kernel thread [2] creation failed\n");
return -1;
}
setup_timer( &my_timer, my_timer_callback, 0 );
ret = mod_timer( &my_timer, jiffies + msecs_to_jiffies(2000) );
if (ret) {
printk("Error in mod_timer\n");
return -EINVAL;
}
return 0;
}
static void __exit signalexample_module_exit(void)
{
del_timer(&my_timer);
}
module_init(signalexample_module_init);
module_exit(signalexample_module_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Demonstrates use of kthread");
Upvotes: 2
Views: 1964
Reputation: 9633
You need a call to schedule()
in both of your thread functions:
/* In kernel thread function... */
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&wqueue, &wait);
schedule(); /* Add this call here */
spin_lock_irqsave(&my_si_lock, flags);
/* etc... */
Calling set_current_state(TASK_INTERRUPTIBLE)
sets the state in the current process' task structure, which allows the scheduler to move the process off of the run queue once it sleeps. But then you have to tell the scheduler, "Okay, I've set a new state. Reschedule me now." You're missing this second step, so the changed flag won't take effect until the next time the scheduler decides to suspend your thread, and there's no way to know how soon that will happen, or which line of your code it's executing when it happens (except in the locked code - that shouldn't be interrupted).
I'm not really sure why it's causing your whole system to lock up, because your system's state is pretty unpredictable. Since the kernel threads weren't waiting for the timer to expire before grabbing locks and looping, I have no idea when you could expect the scheduler to actually take action on the new task struct states, and a lot of things could be happening in the meantime. Your threads are repeatedly calling add_wait_queue(&wqueue, &wait);
and remove_wait_queue(&wqueue, &wait);
, so who knows what state the wait queue is in by the time your timer callback fires. In fact, since the kernel threads are spinning, this code has a race condition:
if (waitqueue_active(&wqueue)) {
wake_up_interruptible(&wqueue);
}
It's possible that you have active tasks on the waitqueue when the if statement is executed, only to have them emptied out by the time wake_up_interruptible(&wqueue);
is called.
By the way, I'm assuming your current goal of incrementing a global variable is just an exercise to learn waitqueues and sleep states. If you ever want to actually implement a shared counter, look at atomic operations instead, and you'll be able to dump the spinlock.
If you decide to keep the spinlock, you should switch to using the DEFINE_SPINLOCK()
macro instead.
Also, as I mentioned in my comment, you should change your two kthread_pid
variables to be of task_struct *
type. You also need a call to kthread_stop(kthread_pid);
in your __exit routine for each of the threads you start. kthread_should_stop()
will never return true if you don't ever tell them to stop.
Upvotes: 3