Peter
Peter

Reputation: 1883

suspicious RCU usage?

I'm running custom Fedora 17 Kernel 3.3.0-0.rc5.git3.1.yfkm2.fc17.x86_64, and the warning was shown on dmesg:

[  858.634304]
[  858.634324] ===============================
[  858.634350] [ INFO: suspicious RCU usage. ]
[  858.634375] 3.3.0-0.rc5.git3.1.yfkm2.fc17.x86_64 #1 Not tainted
[  858.634409] -------------------------------
[  858.634435] kernel/pid.c:425 find_task_by_pid_ns() needs rcu_read_lock() protection!
[  858.634478]
[  858.634479] other info that might help us debug this:
[  858.634480]
[  858.634528]
[  858.634529] rcu_scheduler_active = 1, debug_locks = 0
[  858.634567] no locks held by monitor/10550.
[  858.634591]
[  858.634592] stack backtrace:
[  858.634620] Pid: 10550, comm: monitor Not tainted 3.3.0-0.rc5.git3.1.yfkm2.fc17.x86_64 #1
[  858.634666] Call Trace:
[  858.634688]  [<ffffffff810c8c55>] lockdep_rcu_suspicious+0xe5/0x100
[  858.634727]  [<ffffffff81086921>] find_task_by_pid_ns+0x81/0xa0
[  858.634762]  [<ffffffff81086962>] find_task_by_vpid+0x22/0x30
[  858.634798]  [<ffffffff8131ccd5>] yfkm2_is_pid_running+0x15/0x40
[  858.634835]  [<ffffffff8131ce54>] sys_yfkm2_monitor+0x14/0x80
[  858.634870]  [<ffffffff816a6ba9>] system_call_fastpath+0x16/0x1b

monitor is user application that call sys_yfkm2_monitor syscall passing a pid to it. The custom code worked as expected but I'm curious with the warning message shown on dmesg. What am I doing wrong?

The user application monitor.c:

#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#define SYS_yfkm2_monitor   __NR_yfkm2_monitor
#define SYS_yfkm2_notifyme  __NR_yfkm2_notifyme

int main (int argc, char *argv[])
{

    if (argc < 2) {
        printf("Error. Use %s <PID>\n", argv[0]);
        return 1;
    }
        pid_t pid = atoi(argv[1]);
        long ret;


        ret = syscall(SYS_yfkm2_monitor, pid);

    if (ret == 0){
        printf("Sucess on adding %d!\n", pid);
        return 0;
    } else {
        printf("Failure! Is %s a valid PID?\n", argv[1]);
        return 1;
    }
}

The Kernel code:

#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/kthread.h>

#define YFKM2_KT_TIMEOUT (1*HZ) /* 1 second */

struct yfkm2 {
    pid_t monitor;      /* PID to monitor */
    pid_t notifyme;     /* PID to notify */
    struct list_head list;  /* Linked List struct */
};

/* How many Kernel Threads are running? */
atomic_t yfkm2_kthread_run_count = ATOMIC_INIT(0);

/* Define and initialize yfkm2_(linked)list */
LIST_HEAD(yfkm2_list);

/* Define and initialize yfkm2_(read&write)lock */
DEFINE_RWLOCK(yfkm2_lock);

/*
 * yfkm2_is_pid_running(pid_t pid)
 *
 * Check if pid is running
 *
 * return 0 if pid is running
 * return 1 if pid is not running
 */
int yfkm2_is_pid_running(pid_t pid)
{
    struct task_struct *q;

    q = find_task_by_vpid(pid);

    if (q != NULL && q->pid == pid)
        return 0;

    return 1;
}

/*
 * yfkm2_kill(pid_t pid)
 *
 * Kills pid
 *
 * return 0 if pid was running and send SIGKILL to pid
 * return 1 if pid is not running
 */
int yfkm2_kill(pid_t pid)
{
    struct task_struct *q;

    q = find_task_by_vpid(pid);

    if (q != NULL) {
        force_sig(SIGKILL, q);
        return 0;
    }
    return 1;
}

/*
 * int yfkm2_kthread(void *data)
 *
 * The Kernel Thread
 *
 * Traverse the yfkm2_list looking for yfkm2->notifyme that are not 0.
 * If any found, check if correspondent yfkm2->monitor is still running. If not
 * kill yfkm2->notifyme. After traversing the list, check if the list is empty.
 * If so return 0. If not sleep one second and start again.
 *
 * return 0 if yfkm2_list is empty
 * should never return 1
 */
int yfkm2_kthread(void *data) /* data is NEVER used */
{
    struct yfkm2 *yfkm2_tmp, *yfkm2_tmp2;
    bool empty;

    while (true) {
        /* Needs write protection due possible item removal from list */
        write_lock(&yfkm2_lock); /* Write lock */
        list_for_each_entry_safe(yfkm2_tmp, yfkm2_tmp2,
                        &yfkm2_list, list) {
            if (yfkm2_tmp->notifyme != 0) {
                if (yfkm2_is_pid_running(yfkm2_tmp->monitor) != 0) {
                    yfkm2_kill(yfkm2_tmp->notifyme);
                    list_del(&yfkm2_tmp->list);
                    kfree(yfkm2_tmp);
                }
            }
        }
        write_unlock(&yfkm2_lock); /* Write unlock */

        read_lock(&yfkm2_lock); /* Read lock */
        empty = list_empty(&yfkm2_list);
        read_unlock(&yfkm2_lock); /* Read unlock */

        if (empty) {
            /* The counter is increased at sys_yfkm2_notifyme()
             * Before exit, decrease atomic run counter */
            atomic_dec(&yfkm2_kthread_run_count);
            return 0;
        }

        set_current_state(TASK_INTERRUPTIBLE);
        schedule_timeout(YFKM2_KT_TIMEOUT);
    }
    /* Before exit, decrease atomic run counter */
    atomic_dec(&yfkm2_kthread_run_count);
    return 1;
}

/*
 * asmlinkage long sys_yfkm2_monitor(pid_t monitor)
 *
 * The system call that check if monitor correspond to a running pid and stores
 * monitor at yfkm2_list->monitor
 *
 * return 0 if pid is running
 * return 1 if pid is not running
 */
asmlinkage long sys_yfkm2_monitor(pid_t monitor)
{
    struct yfkm2 *yfkm2_tmp;

    if (yfkm2_is_pid_running(monitor) == 0) {

        yfkm2_tmp = kmalloc(sizeof(*yfkm2_tmp), GFP_KERNEL);
        yfkm2_tmp->monitor = monitor;
        yfkm2_tmp->notifyme = 0;

        write_lock(&yfkm2_lock);
        list_add(&yfkm2_tmp->list, &yfkm2_list);
        write_unlock(&yfkm2_lock);

        return 0;
    }


    return 1;
}

/*
 * asmlinkage long sys_yfkm2_notifyme(pid_t monitor, pid_t notifyme)
 *
 * The system call that looks for monitor at yfkm2_list->monitor. If found
 * store notifyme at yfkm2_list->notifyme. It also starts the kernel thread
 * if it is not running.
 *
 * return 0 if pid is running
 * return 1 if pid is not running
 */
asmlinkage long sys_yfkm2_notifyme(pid_t monitor, pid_t notifyme)
{
    struct yfkm2 *yfkm2_tmp;
    bool found_monitored_pid = false;

    write_lock(&yfkm2_lock); /* Write lock */
    list_for_each_entry(yfkm2_tmp, &yfkm2_list, list) {
        if (yfkm2_tmp->monitor == monitor) {
            yfkm2_tmp->notifyme = notifyme;

            found_monitored_pid = true;

            break;
        }
    }
    write_unlock(&yfkm2_lock); /* Write unlock */

    if (found_monitored_pid) {
        if (atomic_read(&yfkm2_kthread_run_count) < 1) {
            /* The counter is decreased at yfkm2_kthread()
             * Before start, increase atomic run counter */
            atomic_inc(&yfkm2_kthread_run_count);
            kthread_run(&yfkm2_kthread, NULL, "yfkm2_kthread");
        }

        return 0;
    } else {
        return 1;
    }
}

Upvotes: 0

Views: 1722

Answers (2)

caf
caf

Reputation: 239121

You are not performing correct locking on the task list. For example, your yfkm2_kill() function should be:

int yfkm2_kill(pid_t pid)
{
    struct task_struct *q;

    rcu_read_lock();
    q = find_task_by_vpid(pid);
    if (q)
        get_task_struct(q);
    rcu_read_unlock();

    if (q == NULL)
        return 1;

    force_sig(SIGKILL, q);
    put_task_struct(q);
    return 0;
}

...but your whole design appears to be severely racy. For example, one of the ->monitor tasks could exit and be replaced with a new, different task with the same PID before your kernel thread notices.

Upvotes: 3

ugoren
ugoren

Reputation: 16441

You seem to be running code without the required locks.
Such things tend to work, except that they crash once in a while (possibly a long while).

I don't know these functions so much, but it seems like find_task_by_vpid should be called under some RCU lock (probably the one that protects the process list), in read mode.

Upvotes: 1

Related Questions