[PATCH 3] utrace core This adds the utrace facility, a new modular interface in the kernel for implementing user thread tracing and debugging. This fits on top of the tracehook_* layer, so the new code is well-isolated. The new interface is in , and Documentation/utrace.txt describes it. It allows for multiple separate tracing engines to work in parallel without interfering with each other. Higher-level tracing facilities can be implemented as loadable kernel modules using this layer. The new facility is made optional under CONFIG_UTRACE. Normal configurations will always want to enable it. It's optional to emphasize the clean separation of the code, and in case some stripped-down embedded configurations might want to omit it to save space (when ptrace and the like can never be used). Signed-off-by: Roland McGrath --- Documentation/DocBook/Makefile | 2 Documentation/DocBook/utrace.tmpl | 23 Documentation/utrace.txt | 579 +++++++++ include/linux/sched.h | 5 include/linux/tracehook.h | 85 + include/linux/utrace.h | 544 +++++++++ init/Kconfig | 18 kernel/Makefile | 1 kernel/utrace.c | 2263 ++++++++++++++++++++++++++++++++++++++ 9 files changed, 3502 insertions(+), 18 deletions(-) create kernel/utrace.c create Documentation/utrace.txt create Documentation/DocBook/utrace.tmpl create include/linux/utrace.h Index: b/kernel/Makefile =================================================================== --- a/kernel/Makefile +++ b/kernel/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o +obj-$(CONFIG_UTRACE) += utrace.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is Index: b/kernel/utrace.c =================================================================== --- /dev/null +++ b/kernel/utrace.c @@ -0,0 +1,2263 @@ +/* + * utrace infrastructure interface for debugging user processes + * + * Copyright (C) 2006, 2007 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * Red Hat Author: Roland McGrath. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +#define UTRACE_DEBUG 1 +#ifdef UTRACE_DEBUG +#define CHECK_INIT(p) atomic_set(&(p)->check_dead, 1) +#define CHECK_DEAD(p) BUG_ON(!atomic_dec_and_test(&(p)->check_dead)) +#else +#define CHECK_INIT(p) do { } while (0) +#define CHECK_DEAD(p) do { } while (0) +#endif + +/* + * Per-thread structure task_struct.utrace points to. + * + * The task itself never has to worry about this going away after + * some event is found set in task_struct.utrace_flags. + * Once created, this pointer is changed only when the task is quiescent + * (TASK_TRACED or TASK_STOPPED with the siglock held, or dead). + * + * For other parties, the pointer to this is protected by RCU and + * task_lock. Since call_rcu is never used while the thread is alive and + * using this struct utrace, we can overlay the RCU data structure used + * only for a dead struct with some local state used only for a live utrace + * on an active thread. + */ +struct utrace +{ + union { + struct rcu_head dead; + struct { + struct task_struct *cloning; + struct utrace_signal *signal; + } live; + struct { + unsigned long flags; + } exit; + } u; + + struct list_head engines; + spinlock_t lock; +#ifdef UTRACE_DEBUG + atomic_t check_dead; +#endif +}; + +static struct kmem_cache *utrace_cachep; +static struct kmem_cache *utrace_engine_cachep; + +static int __init +utrace_init(void) +{ + utrace_cachep = + kmem_cache_create("utrace_cache", + sizeof(struct utrace), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + utrace_engine_cachep = + kmem_cache_create("utrace_engine_cache", + sizeof(struct utrace_attached_engine), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + return 0; +} +subsys_initcall(utrace_init); + + +/* + * Make sure target->utrace is allocated, and return with it locked on + * success. This function mediates startup races. The creating parent + * task has priority, and other callers will delay here to let its call + * succeed and take the new utrace lock first. + */ +static struct utrace * +utrace_first_engine(struct task_struct *target, + struct utrace_attached_engine *engine) + __acquires(utrace->lock) +{ + struct utrace *utrace; + + /* + * If this is a newborn thread and we are not the creator, + * we have to wait for it. The creator gets the first chance + * to attach. The PF_STARTING flag is cleared after its + * report_clone hook has had a chance to run. + */ + if (target->flags & PF_STARTING) { + utrace = current->utrace; + if (utrace == NULL || utrace->u.live.cloning != target) { + yield(); + return (signal_pending(current) + ? ERR_PTR(-ERESTARTNOINTR) : NULL); + } + } + + utrace = kmem_cache_alloc(utrace_cachep, GFP_KERNEL); + if (unlikely(utrace == NULL)) + return ERR_PTR(-ENOMEM); + + utrace->u.live.cloning = NULL; + utrace->u.live.signal = NULL; + INIT_LIST_HEAD(&utrace->engines); + list_add(&engine->entry, &utrace->engines); + spin_lock_init(&utrace->lock); + CHECK_INIT(utrace); + + spin_lock(&utrace->lock); + task_lock(target); + if (likely(target->utrace == NULL)) { + rcu_assign_pointer(target->utrace, utrace); + + /* + * The task_lock protects us against another thread doing + * the same thing. We might still be racing against + * tracehook_release_task. It's called with ->exit_state + * set to EXIT_DEAD and then checks ->utrace with an + * smp_mb() in between. If EXIT_DEAD is set, then + * release_task might have checked ->utrace already and saw + * it NULL; we can't attach. If we see EXIT_DEAD not yet + * set after our barrier, then we know release_task will + * see our target->utrace pointer. + */ + smp_mb(); + if (likely(target->exit_state != EXIT_DEAD)) { + task_unlock(target); + return utrace; + } + + /* + * The target has already been through release_task. + * Our caller will restart and notice it's too late now. + */ + target->utrace = NULL; + } + + /* + * Another engine attached first, so there is a struct already. + * A null return says to restart looking for the existing one. + */ + task_unlock(target); + spin_unlock(&utrace->lock); + kmem_cache_free(utrace_cachep, utrace); + + return NULL; +} + +static void +utrace_free(struct rcu_head *rhead) +{ + struct utrace *utrace = container_of(rhead, struct utrace, u.dead); + kmem_cache_free(utrace_cachep, utrace); +} + +/* + * Called with utrace locked. Clean it up and free it via RCU. + */ +static void +rcu_utrace_free(struct utrace *utrace) + __releases(utrace->lock) +{ + CHECK_DEAD(utrace); + spin_unlock(&utrace->lock); + INIT_RCU_HEAD(&utrace->u.dead); + call_rcu(&utrace->u.dead, utrace_free); +} + +static void +utrace_engine_free(struct rcu_head *rhead) +{ + struct utrace_attached_engine *engine = + container_of(rhead, struct utrace_attached_engine, rhead); + kmem_cache_free(utrace_engine_cachep, engine); +} + +static inline void +rcu_engine_free(struct utrace_attached_engine *engine) +{ + CHECK_DEAD(engine); + call_rcu(&engine->rhead, utrace_engine_free); +} + + +/* + * Remove the utrace pointer from the task, unless there is a pending + * forced signal (or it's quiescent in utrace_get_signal). We know it's + * quiescent now, and so are guaranteed it will have to take utrace->lock + * before it can set ->exit_state if it's not set now. + */ +static inline void +utrace_clear_tsk(struct task_struct *tsk, struct utrace *utrace) +{ + if (tsk->exit_state || utrace->u.live.signal == NULL) { + task_lock(tsk); + if (likely(tsk->utrace != NULL)) { + rcu_assign_pointer(tsk->utrace, NULL); + tsk->utrace_flags &= UTRACE_ACTION_NOREAP; + } + task_unlock(tsk); + } +} + +/* + * Called with utrace locked and the target quiescent (maybe current). + * If this was the last engine and there is no parting forced signal + * pending, utrace is left locked and not freed, but is removed from the task. + */ +static void +remove_engine(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct utrace *utrace) +{ + list_del_rcu(&engine->entry); + if (list_empty(&utrace->engines)) + utrace_clear_tsk(tsk, utrace); + rcu_engine_free(engine); +} + + +#define DEATH_EVENTS (UTRACE_EVENT(DEATH) | UTRACE_EVENT(QUIESCE)) + +/* + * Called with utrace locked, after remove_engine may have run. + * Passed the flags from all remaining engines, i.e. zero if none + * left. Install the flags in tsk->utrace_flags and return with + * utrace unlocked. If no engines are left and there is no parting + * forced signal pending, utrace is freed. + */ +static void +check_dead_utrace(struct task_struct *tsk, struct utrace *utrace, + unsigned long flags) + __releases(utrace->lock) +{ + long exit_state = 0; + + if (!tsk->exit_state && utrace->u.live.signal != NULL) + /* + * There is a pending forced signal. It may have been + * left by an engine now detached. The empty utrace + * remains attached until it can be processed. + */ + flags |= UTRACE_ACTION_QUIESCE; + + /* + * If tracing was preventing a SIGCHLD or self-reaping + * and is no longer, we'll do that report or reaping now. + */ + if (((tsk->utrace_flags &~ flags) & UTRACE_ACTION_NOREAP) + && tsk->exit_state) { + /* + * While holding the utrace lock, mark that it's been done. + * For self-reaping, we need to change tsk->exit_state + * before clearing tsk->utrace_flags, so that the real + * parent can't see it in EXIT_ZOMBIE momentarily and reap + * it. If tsk was the group_leader, an exec by another + * thread can release_task it despite our NOREAP. Holding + * tasklist_lock for reading excludes de_thread until we + * decide what to do. + */ + read_lock(&tasklist_lock); + if (tsk->exit_signal == -1) { /* Self-reaping thread. */ + exit_state = xchg(&tsk->exit_state, EXIT_DEAD); + read_unlock(&tasklist_lock); + + BUG_ON(exit_state != EXIT_ZOMBIE); + exit_state = EXIT_DEAD; /* Reap it below. */ + + /* + * Now that we've changed its state to DEAD, + * it's safe to install the new tsk->utrace_flags + * value without the UTRACE_ACTION_NOREAP bit set. + */ + } + else if (thread_group_empty(tsk)) /* Normal solo zombie. */ + /* + * We need to prevent the real parent from reaping + * until after we've called do_notify_parent, below. + * It can get into wait_task_zombie any time after + * the UTRACE_ACTION_NOREAP bit is cleared. It's + * safe for that to do everything it does until its + * release_task call starts tearing things down. + * Holding tasklist_lock for reading prevents + * release_task from proceeding until we've done + * everything we need to do. + */ + exit_state = EXIT_ZOMBIE; + else + /* + * Delayed group leader, nothing to do yet. + * This is also the situation with the old + * group leader in an exec by another thread, + * which will call release_task itself. + */ + read_unlock(&tasklist_lock); + } + + /* + * When it's in TASK_STOPPED state, do not set UTRACE_EVENT(JCTL). + * That bit indicates utrace_report_jctl has not run yet, but it + * may have. Set UTRACE_ACTION_QUIESCE instead to be sure that + * once it resumes it will recompute its flags in utrace_quiescent. + */ + if (((flags &~ tsk->utrace_flags) & UTRACE_EVENT(JCTL)) + && tsk->state == TASK_STOPPED) { + flags &= ~UTRACE_EVENT(JCTL); + flags |= UTRACE_ACTION_QUIESCE; + } + + tsk->utrace_flags = flags; + if (flags) + spin_unlock(&utrace->lock); + else { + BUG_ON(tsk->utrace == utrace); + rcu_utrace_free(utrace); + } + + /* + * Now we're finished updating the utrace state. + * Do a pending self-reaping or parent notification. + */ + if (exit_state == EXIT_ZOMBIE) { + do_notify_parent(tsk, tsk->exit_signal); + + /* + * If SIGCHLD was ignored, that set tsk->exit_signal = -1 + * to tell us to reap it immediately. + */ + if (tsk->exit_signal == -1) { + exit_state = xchg(&tsk->exit_state, EXIT_DEAD); + BUG_ON(exit_state != EXIT_ZOMBIE); + exit_state = EXIT_DEAD; /* Reap it below. */ + } + read_unlock(&tasklist_lock); /* See comment above. */ + } + if (exit_state == EXIT_DEAD) + /* + * Note this can wind up in utrace_reap and do more callbacks. + * Our callers must be in places where that is OK. + */ + release_task(tsk); +} + +/* + * Get the target thread to quiesce. Return nonzero if it's already quiescent. + * Return zero if it will report a QUIESCE event soon. + * If interrupt is nonzero, wake it like a signal would so it quiesces ASAP. + * If interrupt is zero, just make sure it quiesces before going to user mode. + */ +static int +quiesce(struct task_struct *target, int interrupt) +{ + int ret; + + target->utrace_flags |= UTRACE_ACTION_QUIESCE; + read_barrier_depends(); + + if (target->exit_state) + goto dead; + + /* + * First a quick check without the siglock. If it's in TASK_TRACED + * or TASK_STOPPED already, we know it is going to go through + * utrace_get_signal before it resumes. + */ + ret = 1; + switch (target->state) { + case TASK_TRACED: + break; + + case TASK_STOPPED: + /* + * If it will call utrace_report_jctl but has not gotten + * through it yet, then don't consider it quiescent yet. + * utrace_report_jctl will take target->utrace->lock and + * clear UTRACE_EVENT(JCTL) once it finishes. After that, + * it is considered quiescent; when it wakes up, it will go + * through utrace_get_signal before doing anything else. + */ + if (!(target->utrace_flags & UTRACE_EVENT(JCTL))) + break; + + default: + /* + * Now get the siglock and check again. + */ + spin_lock_irq(&target->sighand->siglock); + if (unlikely(target->exit_state)) { + spin_unlock_irq(&target->sighand->siglock); + goto dead; + } + switch (target->state) { + case TASK_TRACED: + break; + + case TASK_STOPPED: + ret = !(target->utrace_flags & UTRACE_EVENT(JCTL)); + break; + + default: + /* + * It is not stopped, so tell it to stop soon. + */ + ret = 0; + if (interrupt) + signal_wake_up(target, 0); + else { + set_tsk_thread_flag(target, TIF_SIGPENDING); + kick_process(target); + } + break; + } + spin_unlock_irq(&target->sighand->siglock); + } + + return ret; + +dead: + /* + * On the exit path, it's only truly quiescent if it has + * already been through utrace_report_death, or never will. + */ + return !(target->utrace_flags & DEATH_EVENTS); +} + + +static struct utrace_attached_engine * +matching_engine(struct utrace *utrace, int flags, + const struct utrace_engine_ops *ops, void *data) +{ + struct utrace_attached_engine *engine; + list_for_each_entry_rcu(engine, &utrace->engines, entry) { + if ((flags & UTRACE_ATTACH_MATCH_OPS) + && engine->ops != ops) + continue; + if ((flags & UTRACE_ATTACH_MATCH_DATA) + && engine->data != data) + continue; + return engine; + } + return ERR_PTR(-ENOENT); +} + + +/** + * utrace_attach - Attach new engine to a thread, or look up attached engines. + * @target: thread to attach to + * @flags: %UTRACE_ATTACH_* flags + * @ops: callback table for new engine + * @data: engine private data pointer + * + * The caller must ensure that the @target thread does not get freed, + * i.e. hold a ref or be its parent. + * + * If %UTRACE_ATTACH_CREATE is not specified, you only look up an existing + * engine already attached to the thread. If %UTRACE_ATTACH_MATCH_* bits + * are set, only consider matching engines. If %UTRACE_ATTACH_EXCLUSIVE is + * set, attempting to attach a second (matching) engine fails with -%EEXIST. + */ +struct utrace_attached_engine * +utrace_attach(struct task_struct *target, int flags, + const struct utrace_engine_ops *ops, void *data) +{ + struct utrace *utrace; + struct utrace_attached_engine *engine; + +restart: + rcu_read_lock(); + utrace = rcu_dereference(target->utrace); + smp_rmb(); + if (unlikely(target->exit_state == EXIT_DEAD)) { + /* + * The target has already been reaped. + * Check this first; a race with reaping may lead to restart. + */ + rcu_read_unlock(); + if (!(flags & UTRACE_ATTACH_CREATE)) + return ERR_PTR(-ENOENT); + return ERR_PTR(-ESRCH); + } + + if (utrace == NULL) { + rcu_read_unlock(); + + if (!(flags & UTRACE_ATTACH_CREATE)) + return ERR_PTR(-ENOENT); + + engine = kmem_cache_alloc(utrace_engine_cachep, GFP_KERNEL); + if (unlikely(engine == NULL)) + return ERR_PTR(-ENOMEM); + engine->flags = 0; + CHECK_INIT(engine); + + goto first; + } + + if (!(flags & UTRACE_ATTACH_CREATE)) { + engine = matching_engine(utrace, flags, ops, data); + rcu_read_unlock(); + return engine; + } + rcu_read_unlock(); + + engine = kmem_cache_alloc(utrace_engine_cachep, GFP_KERNEL); + if (unlikely(engine == NULL)) + return ERR_PTR(-ENOMEM); + engine->flags = 0; + CHECK_INIT(engine); + + rcu_read_lock(); + utrace = rcu_dereference(target->utrace); + if (unlikely(utrace == NULL)) { /* Race with detach. */ + rcu_read_unlock(); + goto first; + } + spin_lock(&utrace->lock); + + if (flags & UTRACE_ATTACH_EXCLUSIVE) { + struct utrace_attached_engine *old; + old = matching_engine(utrace, flags, ops, data); + if (!IS_ERR(old)) { + spin_unlock(&utrace->lock); + rcu_read_unlock(); + kmem_cache_free(utrace_engine_cachep, engine); + return ERR_PTR(-EEXIST); + } + } + + if (unlikely(rcu_dereference(target->utrace) != utrace)) { + /* + * We lost a race with other CPUs doing a sequence + * of detach and attach before we got in. + */ + spin_unlock(&utrace->lock); + rcu_read_unlock(); + kmem_cache_free(utrace_engine_cachep, engine); + goto restart; + } + rcu_read_unlock(); + + list_add_tail_rcu(&engine->entry, &utrace->engines); + goto finish; + +first: + utrace = utrace_first_engine(target, engine); + if (IS_ERR(utrace) || unlikely(utrace == NULL)) { + kmem_cache_free(utrace_engine_cachep, engine); + if (unlikely(utrace == NULL)) /* Race condition. */ + goto restart; + return ERR_PTR(PTR_ERR(utrace)); + } + +finish: + engine->ops = ops; + engine->data = data; + + spin_unlock(&utrace->lock); + + return engine; +} +EXPORT_SYMBOL_GPL(utrace_attach); + +/* + * When an engine is detached, the target thread may still see it and make + * callbacks until it quiesces. We reset its event flags to just QUIESCE + * and install a special ops vector whose callback is dead_engine_delete. + * When the target thread quiesces, it can safely free the engine itself. + */ +static u32 +dead_engine_delete(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + return UTRACE_ACTION_DETACH; +} + +static const struct utrace_engine_ops dead_engine_ops = +{ + .report_quiesce = &dead_engine_delete +}; + + +/* + * Called with utrace locked. Recompute the union of engines' flags. + */ +static inline unsigned long +rescan_flags(struct utrace *utrace) +{ + struct utrace_attached_engine *engine; + unsigned long flags = 0; + list_for_each_entry(engine, &utrace->engines, entry) + flags |= engine->flags | UTRACE_EVENT(REAP); + return flags; +} + +/* + * Only these flags matter any more for a dead task (exit_state set). + * We use this mask on flags installed in ->utrace_flags after + * exit_notify (and possibly utrace_report_death) has run. + * This ensures that utrace_release_task knows positively that + * utrace_report_death will not run later. + */ +#define DEAD_FLAGS_MASK (UTRACE_EVENT(REAP) | UTRACE_ACTION_NOREAP) + +/* + * Flags bits in utrace->u.exit.flags word. These are private + * communication among utrace_report_death, utrace_release_task, + * utrace_detach, and utrace_set_flags. + */ +#define EXIT_FLAG_DEATH 1 /* utrace_report_death running */ +#define EXIT_FLAG_DELAYED_GROUP_LEADER 2 /* utrace_delayed_group_leader ran */ +#define EXIT_FLAG_REAP 4 /* release_task ran */ + + +/* + * We may have been the one keeping the target thread quiescent. + * Check if it should wake up now. + * Called with utrace locked, and unlocks it on return. + * If we were keeping it stopped, resume it. + * If we were keeping its zombie from reporting/self-reap, do it now. + */ +static void +wake_quiescent(unsigned long old_flags, + struct utrace *utrace, struct task_struct *target) + __releases(utrace->lock) +{ + unsigned long flags; + + /* + * Update the set of events of interest from the union + * of the interests of the remaining tracing engines. + */ + flags = rescan_flags(utrace); + if (target->exit_state) { + BUG_ON(utrace->u.exit.flags & EXIT_FLAG_DEATH); + flags &= DEAD_FLAGS_MASK; + } + check_dead_utrace(target, utrace, flags); + + if (target->exit_state || (flags & UTRACE_ACTION_QUIESCE)) + return; + + read_lock(&tasklist_lock); + if (!unlikely(target->exit_state)) { + /* + * The target is not dead and should not be in tracing stop + * any more. Wake it unless it's in job control stop. + */ + spin_lock_irq(&target->sighand->siglock); + if (target->signal->flags & SIGNAL_STOP_STOPPED) { + int stop_count = target->signal->group_stop_count; + target->state = TASK_STOPPED; + spin_unlock_irq(&target->sighand->siglock); + + /* + * If tracing was preventing a CLD_STOPPED report + * and is no longer, do that report right now. + */ + if (stop_count == 0 + && ((old_flags &~ flags) & UTRACE_ACTION_NOREAP)) + do_notify_parent_cldstop(target, CLD_STOPPED); + } + else { + /* + * Wake the task up. + */ + recalc_sigpending_and_wake(target); + wake_up_state(target, TASK_STOPPED | TASK_TRACED); + spin_unlock_irq(&target->sighand->siglock); + } + } + read_unlock(&tasklist_lock); +} + +/* + * The engine is supposed to be attached. The caller really needs + * rcu_read_lock if it wants to look at the engine struct + * (e.g. engine->data), to be sure it hasn't been freed by utrace_reap + * asynchronously--unless he has synchronized with his report_reap + * callback, which would have happened before then. A simultaneous + * utrace_detach call or UTRACE_ACTION_DETACH return from a callback can + * also free the engine if rcu_read_lock is not held, but that is in the + * tracing engine's power to avoid. + * + * Get the utrace lock for the target task. + * Returns the struct if locked, or ERR_PTR(-errno). + * + * This has to be robust against races with: + * utrace_detach calls + * UTRACE_ACTION_DETACH after reports + * utrace_report_death + * utrace_release_task + */ +static struct utrace * +get_utrace_lock_attached(struct task_struct *target, + struct utrace_attached_engine *engine) + __acquires(utrace->lock) +{ + struct utrace *utrace; + + rcu_read_lock(); + utrace = rcu_dereference(target->utrace); + smp_rmb(); + if (unlikely(utrace == NULL) + || unlikely(target->exit_state == EXIT_DEAD)) + /* + * If all engines detached already, utrace is clear. + * Otherwise, we're called after utrace_release_task might + * have started. A call to this engine's report_reap + * callback might already be in progress or engine might + * even have been freed already. + */ + utrace = ERR_PTR(-ESRCH); + else { + spin_lock(&utrace->lock); + if (unlikely(rcu_dereference(target->utrace) != utrace) + || unlikely(rcu_dereference(engine->ops) + == &dead_engine_ops)) { + /* + * By the time we got the utrace lock, + * it had been reaped or detached already. + */ + spin_unlock(&utrace->lock); + utrace = ERR_PTR(-ESRCH); + } + } + rcu_read_unlock(); + + return utrace; +} + +/** + * utrace_detach - Detach a tracing engine from a thread. + * @target: thread to detach from + * @engine: engine attached to @target + * + * After this, the engine data structure is no longer accessible, and the + * thread might be reaped. The thread will start running again if it was + * being kept quiescent and no longer has any attached engines asserting + * %UTRACE_ACTION_QUIESCE. + * + * If the target thread is not already quiescent, then a callback to this + * engine might be in progress or about to start on another CPU. If it's + * quiescent when utrace_detach() is called, then after successful return + * it's guaranteed that no more callbacks to the ops vector will be done. + * The only exception is %SIGKILL (and exec by another thread in the group), + * which breaks quiescence and can cause asynchronous %DEATH and/or %REAP + * callbacks even when %UTRACE_ACTION_QUIESCE is set. In that event, + * utrace_detach() fails with -%ESRCH or -%EALREADY to indicate that the + * report_reap() or report_death() callbacks have begun or will run imminently. + */ +int +utrace_detach(struct task_struct *target, + struct utrace_attached_engine *engine) +{ + struct utrace *utrace; + unsigned long flags; + + utrace = get_utrace_lock_attached(target, engine); + if (unlikely(IS_ERR(utrace))) + return PTR_ERR(utrace); + + /* + * On the exit path, DEATH and QUIESCE event bits are set only + * before utrace_report_death has taken the lock. At that point, + * the death report will come soon, so disallow detach until it's + * done. This prevents us from racing with it detaching itself. + */ + if (target->exit_state + && (unlikely(target->utrace_flags & DEATH_EVENTS) + || unlikely(utrace->u.exit.flags & (EXIT_FLAG_DEATH + | EXIT_FLAG_REAP)))) { + /* + * We have already started the death report, or + * even entered release_task. We can't prevent + * the report_death and report_reap callbacks, + * so tell the caller they will happen. + */ + int ret = ((utrace->u.exit.flags & EXIT_FLAG_REAP) + ? -ESRCH : -EALREADY); + spin_unlock(&utrace->lock); + return ret; + } + + flags = engine->flags; + engine->flags = UTRACE_EVENT(QUIESCE) | UTRACE_ACTION_QUIESCE; + rcu_assign_pointer(engine->ops, &dead_engine_ops); + + if (quiesce(target, 1)) { + remove_engine(engine, target, utrace); + wake_quiescent(flags, utrace, target); + } + else + spin_unlock(&utrace->lock); + + + return 0; +} +EXPORT_SYMBOL_GPL(utrace_detach); + + +/* + * Called with utrace->lock held. + * Notify and clean up all engines, then free utrace. + */ +static void +utrace_reap(struct task_struct *target, struct utrace *utrace) + __releases(utrace->lock) +{ + struct utrace_attached_engine *engine, *next; + const struct utrace_engine_ops *ops; + +restart: + list_for_each_entry_safe(engine, next, &utrace->engines, entry) { + list_del_rcu(&engine->entry); + + /* + * Now nothing else refers to this engine. + */ + if (engine->flags & UTRACE_EVENT(REAP)) { + ops = rcu_dereference(engine->ops); + if (ops != &dead_engine_ops) { + spin_unlock(&utrace->lock); + (*ops->report_reap)(engine, target); + rcu_engine_free(engine); + spin_lock(&utrace->lock); + goto restart; + } + } + rcu_engine_free(engine); + } + + rcu_utrace_free(utrace); +} + +/* + * Called by release_task. After this, target->utrace must be cleared. + */ +void +utrace_release_task(struct task_struct *target) +{ + struct utrace *utrace; + + task_lock(target); + utrace = rcu_dereference(target->utrace); + rcu_assign_pointer(target->utrace, NULL); + task_unlock(target); + + if (unlikely(utrace == NULL)) + return; + + spin_lock(&utrace->lock); + /* + * If the list is empty, utrace is already on its way to be freed. + * We raced with detach and we won the task_lock race but lost the + * utrace->lock race. All we have to do is let RCU run. + */ + if (!unlikely(list_empty(&utrace->engines))) { + utrace->u.exit.flags |= EXIT_FLAG_REAP; + + if (!(target->utrace_flags & DEATH_EVENTS)) { + utrace_reap(target, utrace); /* Unlocks and frees. */ + return; + } + + /* + * The target will do some final callbacks but hasn't + * finished them yet. We know because it clears these + * event bits after it's done. Instead of cleaning up here + * and requiring utrace_report_death to cope with it, we + * delay the REAP report and the teardown until after the + * target finishes its death reports. + */ + } + spin_unlock(&utrace->lock); +} + +/** + * utrace_set_flags - Change the flags for a tracing engine. + * @target: thread to affect + * @engine: attached engine to affect + * @flags: new flags value + * + * This resets the event flags and the action state flags. + * If %UTRACE_ACTION_QUIESCE and %UTRACE_EVENT(%QUIESCE) are set, + * this will cause a report_quiesce() callback soon, maybe immediately. + * If %UTRACE_ACTION_QUIESCE was set before and is no longer set by + * any engine, this will wake the thread up. + * + * This fails with -%EALREADY and does nothing if you try to clear + * %UTRACE_EVENT(%DEATH) when the report_death() callback may already have + * begun, if you try to clear %UTRACE_EVENT(%REAP) when the report_reap() + * callback may already have begun, if you try to newly set + * %UTRACE_ACTION_NOREAP when the target may already have sent its + * parent %SIGCHLD, or if you try to newly set %UTRACE_EVENT(%DEATH), + * %UTRACE_EVENT(%QUIESCE), or %UTRACE_ACTION_QUIESCE, when the target is + * already dead or dying. It can fail with -%ESRCH when the target has + * already been detached (including forcible detach on reaping). If + * the target was quiescent before the call, then after a successful + * call, no event callbacks not requested in the new flags will be + * made, and a report_quiesce() callback will always be made if + * requested. These rules provide for coherent synchronization based + * on quiescence, even when %SIGKILL is breaking quiescence. + */ +int +utrace_set_flags(struct task_struct *target, + struct utrace_attached_engine *engine, + unsigned long flags) +{ + struct utrace *utrace; + int report; + unsigned long old_flags, old_utrace_flags; + int ret = -EALREADY; + +#ifdef ARCH_HAS_SINGLE_STEP + if (! ARCH_HAS_SINGLE_STEP) +#endif + WARN_ON(flags & UTRACE_ACTION_SINGLESTEP); +#ifdef ARCH_HAS_BLOCK_STEP + if (! ARCH_HAS_BLOCK_STEP) +#endif + WARN_ON(flags & UTRACE_ACTION_BLOCKSTEP); + + utrace = get_utrace_lock_attached(target, engine); + if (unlikely(IS_ERR(utrace))) + return PTR_ERR(utrace); + +restart: /* See below. */ + + old_utrace_flags = target->utrace_flags; + old_flags = engine->flags; + + if (target->exit_state + && (((flags &~ old_flags) & (UTRACE_ACTION_QUIESCE + | UTRACE_ACTION_NOREAP + | DEATH_EVENTS)) + || ((utrace->u.exit.flags & EXIT_FLAG_DEATH) + && ((old_flags &~ flags) & DEATH_EVENTS)) + || ((utrace->u.exit.flags & EXIT_FLAG_REAP) + && ((old_flags &~ flags) & UTRACE_EVENT(REAP))))) { + spin_unlock(&utrace->lock); + return ret; + } + + /* + * When setting these flags, it's essential that we really + * synchronize with exit_notify. They cannot be set after + * exit_notify takes the tasklist_lock. By holding the read + * lock here while setting the flags, we ensure that the calls + * to tracehook_notify_death and tracehook_report_death will + * see the new flags. This ensures that utrace_release_task + * knows positively that utrace_report_death will be called or + * that it won't. + */ + if ((flags &~ old_utrace_flags) & (UTRACE_ACTION_NOREAP + | DEATH_EVENTS)) { + read_lock(&tasklist_lock); + if (unlikely(target->exit_state)) { + read_unlock(&tasklist_lock); + spin_unlock(&utrace->lock); + return ret; + } + target->utrace_flags |= flags; + read_unlock(&tasklist_lock); + } + + engine->flags = flags; + target->utrace_flags |= flags; + ret = 0; + + report = 0; + if ((old_flags ^ flags) & UTRACE_ACTION_QUIESCE) { + if (flags & UTRACE_ACTION_QUIESCE) { + report = (quiesce(target, 1) + && (flags & UTRACE_EVENT(QUIESCE))); + spin_unlock(&utrace->lock); + } + else + goto wake; + } + else if (((old_flags &~ flags) & UTRACE_ACTION_NOREAP) + && target->exit_state) + goto wake; + else { + /* + * If we're asking for single-stepping or syscall tracing, + * we need to pass through utrace_quiescent before resuming + * in user mode to get those effects, even if the target is + * not going to be quiescent right now. + */ + if (!(target->utrace_flags & UTRACE_ACTION_QUIESCE) + && !target->exit_state + && ((flags &~ old_utrace_flags) + & (UTRACE_ACTION_SINGLESTEP | UTRACE_ACTION_BLOCKSTEP + | UTRACE_EVENT_SYSCALL))) + quiesce(target, 0); + spin_unlock(&utrace->lock); + } + + if (report) { /* Already quiescent, won't report itself. */ + u32 action = (*engine->ops->report_quiesce)(engine, target); + if (action & UTRACE_ACTION_DETACH) + utrace_detach(target, engine); + else if (action & UTRACE_ACTION_NEWSTATE) { + /* + * The callback has us changing the flags yet + * again. Since we released the lock, they + * could have changed asynchronously just now. + * We must refetch the current flags to change + * the %UTRACE_ACTION_STATE_MASK bits. If the + * target thread started dying, then there is + * nothing we can do--but that failure is due + * to the report_quiesce() callback after the + * original utrace_set_flags has already + * succeeded, so we don't want to return + * failure here (hence leave ret = 0). + */ + utrace = get_utrace_lock_attached(target, engine); + if (!unlikely(IS_ERR(utrace))) { + flags = action & UTRACE_ACTION_STATE_MASK; + flags |= (engine->flags + &~ UTRACE_ACTION_STATE_MASK); + goto restart; + } + } + } + + return ret; + +wake: + /* + * It's quiescent now and needs to wake up. + * + * On the exit path, it's only truly quiescent if it has + * already been through utrace_report_death, or never will. + */ + if (unlikely(target->exit_state) + && unlikely(target->utrace_flags & DEATH_EVENTS)) + spin_unlock(&utrace->lock); + else + wake_quiescent(old_flags, utrace, target); + + return ret; +} +EXPORT_SYMBOL_GPL(utrace_set_flags); + +/* + * While running an engine callback, no locks are held. + * If a callback updates its engine's action state, then + * we need to take the utrace lock to install the flags update. + */ +static inline u32 +update_action(struct task_struct *tsk, struct utrace *utrace, + struct utrace_attached_engine *engine, + u32 ret) +{ + if (ret & UTRACE_ACTION_DETACH) + rcu_assign_pointer(engine->ops, &dead_engine_ops); + else if ((ret & UTRACE_ACTION_NEWSTATE) + && ((ret ^ engine->flags) & UTRACE_ACTION_STATE_MASK)) { +#ifdef ARCH_HAS_SINGLE_STEP + if (! ARCH_HAS_SINGLE_STEP) +#endif + WARN_ON(ret & UTRACE_ACTION_SINGLESTEP); +#ifdef ARCH_HAS_BLOCK_STEP + if (! ARCH_HAS_BLOCK_STEP) +#endif + WARN_ON(ret & UTRACE_ACTION_BLOCKSTEP); + spin_lock(&utrace->lock); + /* + * If we're changing something other than just QUIESCE, + * make sure we pass through utrace_quiescent before + * resuming even if we aren't going to stay quiescent. + * That's where we get the correct union of all engines' + * flags after they've finished changing, and apply changes. + */ + if (((ret ^ engine->flags) & (UTRACE_ACTION_STATE_MASK + & ~UTRACE_ACTION_QUIESCE))) + tsk->utrace_flags |= UTRACE_ACTION_QUIESCE; + engine->flags &= ~UTRACE_ACTION_STATE_MASK; + engine->flags |= ret & UTRACE_ACTION_STATE_MASK; + tsk->utrace_flags |= engine->flags; + spin_unlock(&utrace->lock); + } + else + ret |= engine->flags & UTRACE_ACTION_STATE_MASK; + return ret; +} + +#define REPORT(callback, ...) do { \ + u32 ret = (*rcu_dereference(engine->ops)->callback) \ + (engine, tsk, ##__VA_ARGS__); \ + action = update_action(tsk, utrace, engine, ret); \ + } while (0) + + +/* + * Called with utrace->lock held, returns with it released. + */ +static u32 +remove_detached(struct task_struct *tsk, struct utrace *utrace, + u32 action, unsigned long mask) + __releases(utrace->lock) +{ + struct utrace_attached_engine *engine, *next; + unsigned long flags = 0; + + list_for_each_entry_safe(engine, next, &utrace->engines, entry) { + if (engine->ops == &dead_engine_ops) + remove_engine(engine, tsk, utrace); + else + flags |= engine->flags | UTRACE_EVENT(REAP); + } + check_dead_utrace(tsk, utrace, flags & mask); + + flags &= UTRACE_ACTION_STATE_MASK; + return flags | (action & UTRACE_ACTION_OP_MASK); +} + +/* + * Called after an event report loop. Remove any engines marked for detach. + */ +static inline u32 +check_detach(struct task_struct *tsk, u32 action) +{ + if (action & UTRACE_ACTION_DETACH) { + /* + * This must be current to be sure it's not possibly + * getting into utrace_report_death. + */ + struct utrace *utrace; + BUG_ON(tsk != current); + utrace = tsk->utrace; + spin_lock(&utrace->lock); + action = remove_detached(tsk, utrace, action, ~0UL); + } + return action; +} + +static inline int +check_quiescent(struct task_struct *tsk, u32 action) +{ + if (action & UTRACE_ACTION_STATE_MASK) + return utrace_quiescent(tsk, NULL); + return 0; +} + +/* + * Called iff UTRACE_EVENT(CLONE) flag is set. + * This notification call blocks the wake_up_new_task call on the child. + * So we must not quiesce here. tracehook_report_clone_complete will do + * a quiescence check momentarily. + */ +void +utrace_report_clone(unsigned long clone_flags, struct task_struct *child) +{ + struct task_struct *tsk = current; + struct utrace *utrace = tsk->utrace; + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + unsigned long action; + + utrace->u.live.cloning = child; + + /* XXX must change for sharing */ + action = UTRACE_ACTION_RESUME; + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + if (engine->flags & UTRACE_EVENT(CLONE)) + REPORT(report_clone, clone_flags, child); + if (action & UTRACE_ACTION_HIDE) + break; + } + + utrace->u.live.cloning = NULL; + + check_detach(tsk, action); +} + +static unsigned long +report_quiescent(struct task_struct *tsk, struct utrace *utrace, u32 action) +{ + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + if (engine->flags & UTRACE_EVENT(QUIESCE)) + REPORT(report_quiesce); + action |= engine->flags & UTRACE_ACTION_STATE_MASK; + } + + return check_detach(tsk, action); +} + +/* + * Called iff UTRACE_EVENT(JCTL) flag is set. + */ +int +utrace_report_jctl(int what) +{ + struct task_struct *tsk = current; + struct utrace *utrace = tsk->utrace; + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + unsigned long action; + + /* XXX must change for sharing */ + action = UTRACE_ACTION_RESUME; + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + if (engine->flags & UTRACE_EVENT(JCTL)) + REPORT(report_jctl, what); + if (action & UTRACE_ACTION_HIDE) + break; + } + + /* + * We are becoming quiescent, so report it now. + * We don't block in utrace_quiescent because we are stopping anyway. + * We know that upon resuming we'll go through tracehook_induce_signal, + * which will keep us quiescent or set us up to resume with tracing. + */ + action = report_quiescent(tsk, utrace, action); + + if (what == CLD_STOPPED && tsk->state != TASK_STOPPED) { + /* + * The event report hooks could have blocked, though + * it should have been briefly. Make sure we're in + * TASK_STOPPED state again to block properly, unless + * we've just come back out of job control stop. + */ + spin_lock_irq(&tsk->sighand->siglock); + if (tsk->signal->flags & SIGNAL_STOP_STOPPED) + set_current_state(TASK_STOPPED); + spin_unlock_irq(&tsk->sighand->siglock); + } + + /* + * We clear the UTRACE_EVENT(JCTL) bit to indicate that we are now + * in a truly quiescent TASK_STOPPED state. After this, we can be + * detached by another thread. Setting UTRACE_ACTION_QUIESCE + * ensures that we will go through utrace_quiescent and recompute + * flags after we resume. + */ + spin_lock(&utrace->lock); + tsk->utrace_flags &= ~UTRACE_EVENT(JCTL); + tsk->utrace_flags |= UTRACE_ACTION_QUIESCE; + spin_unlock(&utrace->lock); + + return action & UTRACE_JCTL_NOSIGCHLD; +} + + +/* + * Return nonzero if there is a SIGKILL that should be waking us up. + * Called with the siglock held. + */ +static inline int +sigkill_pending(struct task_struct *tsk) +{ + return ((sigismember(&tsk->pending.signal, SIGKILL) + || sigismember(&tsk->signal->shared_pending.signal, SIGKILL)) + && !unlikely(sigismember(&tsk->blocked, SIGKILL))); +} + +/* + * Called if UTRACE_EVENT(QUIESCE) or UTRACE_ACTION_QUIESCE flag is set. + * Also called after other event reports. + * It is a good time to block. + * Returns nonzero if we woke up prematurely due to SIGKILL. + * + * The signal pointer is nonzero when called from utrace_get_signal, + * where a pending forced signal can be processed right away. Otherwise, + * we keep UTRACE_ACTION_QUIESCE set after resuming so that utrace_get_signal + * will be entered before user mode. + */ +int +utrace_quiescent(struct task_struct *tsk, struct utrace_signal *signal) +{ + struct utrace *utrace = tsk->utrace; + unsigned long action; + +restart: + /* XXX must change for sharing */ + + action = report_quiescent(tsk, utrace, UTRACE_ACTION_RESUME); + + /* + * If some engines want us quiescent, we block here. + */ + if (action & UTRACE_ACTION_QUIESCE) { + int killed; + + if (signal != NULL) { + BUG_ON(utrace->u.live.signal != NULL); + utrace->u.live.signal = signal; + } + + spin_lock_irq(&tsk->sighand->siglock); + /* + * If wake_quiescent is trying to wake us up now, it will + * have cleared the QUIESCE flag before trying to take the + * siglock. Now we have the siglock, so either it has + * already cleared the flag, or it will wake us up after we + * release the siglock it's waiting for. + * Never stop when there is a SIGKILL bringing us down. + */ + killed = sigkill_pending(tsk); + if (!killed && (tsk->utrace_flags & UTRACE_ACTION_QUIESCE)) { + set_current_state(TASK_TRACED); + /* + * If there is a group stop in progress, + * we must participate in the bookkeeping. + */ + if (tsk->signal->group_stop_count > 0) + --tsk->signal->group_stop_count; + spin_unlock_irq(&tsk->sighand->siglock); + schedule(); + } + else + spin_unlock_irq(&tsk->sighand->siglock); + + if (signal != NULL) { + /* + * We know the struct stays in place when its + * u.live.signal is set, see check_dead_utrace. + * This makes it safe to clear its pointer here. + */ + BUG_ON(tsk->utrace != utrace); + BUG_ON(utrace->u.live.signal != signal); + utrace->u.live.signal = NULL; + } + + if (killed) /* Game over, man! */ + return 1; + + /* + * We've woken up. One engine could be waking us up while + * another has asked us to quiesce. So check afresh. We + * could have been detached while quiescent. Now we are no + * longer quiescent, so don't need to do any RCU locking. + * But we do need to check our utrace pointer anew. + */ + utrace = tsk->utrace; + if (tsk->utrace_flags + & (UTRACE_EVENT(QUIESCE) | UTRACE_ACTION_STATE_MASK)) + goto restart; + } + else if (tsk->utrace_flags & UTRACE_ACTION_QUIESCE) { + /* + * Our flags are out of date. + * Update the set of events of interest from the union + * of the interests of the remaining tracing engines. + * This may notice that there are no engines left + * and clean up the struct utrace. It's left in place + * and the QUIESCE flag set as long as utrace_get_signal + * still needs to process a pending forced signal. + */ + unsigned long flags; + utrace = rcu_dereference(tsk->utrace); + spin_lock(&utrace->lock); + flags = rescan_flags(utrace); + if (flags == 0) + utrace_clear_tsk(tsk, utrace); + check_dead_utrace(tsk, utrace, flags); + } + + /* + * We're resuming. Update the machine layer tracing state and then go. + */ +#ifdef ARCH_HAS_SINGLE_STEP + if (action & UTRACE_ACTION_SINGLESTEP) + tracehook_enable_single_step(tsk); + else + tracehook_disable_single_step(tsk); +#endif +#ifdef ARCH_HAS_BLOCK_STEP + if ((action & (UTRACE_ACTION_BLOCKSTEP|UTRACE_ACTION_SINGLESTEP)) + == UTRACE_ACTION_BLOCKSTEP) + tracehook_enable_block_step(tsk); + else + tracehook_disable_block_step(tsk); +#endif + if (tsk->utrace_flags & UTRACE_EVENT_SYSCALL) + tracehook_enable_syscall_trace(tsk); + else + tracehook_disable_syscall_trace(tsk); + + return 0; +} + + +/* + * Called iff UTRACE_EVENT(EXIT) flag is set. + */ +void +utrace_report_exit(long *exit_code) +{ + struct task_struct *tsk = current; + struct utrace *utrace = tsk->utrace; + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + unsigned long action; + long orig_code = *exit_code; + + /* XXX must change for sharing */ + action = UTRACE_ACTION_RESUME; + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + if (engine->flags & UTRACE_EVENT(EXIT)) + REPORT(report_exit, orig_code, exit_code); + } + action = check_detach(tsk, action); + check_quiescent(tsk, action); +} + +/* + * Called with utrace locked, unlocks it on return. Unconditionally + * recompute the flags after report_death is finished. This may notice + * that there are no engines left and free the utrace struct. + */ +static void +finish_report_death(struct task_struct *tsk, struct utrace *utrace) + __releases(utrace->lock) +{ + /* + * After we unlock (possibly inside utrace_reap for callbacks) with + * this flag clear, competing utrace_detach/utrace_set_flags calls + * know that we've finished our callbacks and any detach bookkeeping. + */ + utrace->u.exit.flags &= EXIT_FLAG_REAP; + + if (utrace->u.exit.flags & EXIT_FLAG_REAP) + /* + * utrace_release_task was already called in parallel. + * We must complete its work now. + */ + utrace_reap(tsk, utrace); + else + /* + * Clear out any detached engines and in the process + * recompute the flags. Mask off event bits we can't + * see any more. This tells utrace_release_task we + * have already finished, if it comes along later. + * Note this all happens on the already-locked utrace, + * which might already be removed from the task. + */ + remove_detached(tsk, utrace, 0, DEAD_FLAGS_MASK); +} + +/* + * Called with utrace locked, unlocks it on return. + * EXIT_FLAG_DELAYED_GROUP_LEADER is set. + * Do second report_death callbacks for engines using NOREAP. + */ +static void +report_delayed_group_leader(struct task_struct *tsk, struct utrace *utrace) + __releases(utrace->lock) +{ + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + u32 action; + + utrace->u.exit.flags |= EXIT_FLAG_DEATH; + spin_unlock(&utrace->lock); + + /* XXX must change for sharing */ + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); +#define NOREAP_DEATH (UTRACE_EVENT(DEATH) | UTRACE_ACTION_NOREAP) + if ((engine->flags & NOREAP_DEATH) == NOREAP_DEATH) + REPORT(report_death); + } + + spin_lock(&utrace->lock); + finish_report_death(tsk, utrace); +} + +/* + * Called iff UTRACE_EVENT(DEATH) or UTRACE_ACTION_QUIESCE flag is set. + * + * It is always possible that we are racing with utrace_release_task here, + * if UTRACE_ACTION_NOREAP is not set, or in the case of non-leader exec + * where the old leader will get released regardless of NOREAP. For this + * reason, utrace_release_task checks for the event bits that get us here, + * and delays its cleanup for us to do. + */ +void +utrace_report_death(struct task_struct *tsk, struct utrace *utrace) +{ + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + u32 action; + + BUG_ON(!tsk->exit_state); + + /* + * We are presently considered "quiescent"--which is accurate + * inasmuch as we won't run any more user instructions ever again. + * But for utrace_detach and utrace_set_flags to be robust, they + * must be sure whether or not we will run any more callbacks. If + * a call comes in before we do, taking the lock here synchronizes + * us so we don't run any callbacks just disabled. Calls that come + * in while we're running the callbacks will see the report_death + * flag and know that we are not yet fully quiescent for purposes + * of detach bookkeeping. + */ + spin_lock(&utrace->lock); + BUG_ON(utrace->u.exit.flags & EXIT_FLAG_DEATH); + utrace->u.exit.flags &= EXIT_FLAG_REAP; + utrace->u.exit.flags |= EXIT_FLAG_DEATH; + spin_unlock(&utrace->lock); + + /* XXX must change for sharing */ + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + if (engine->flags & UTRACE_EVENT(DEATH)) + REPORT(report_death); + if (engine->flags & UTRACE_EVENT(QUIESCE)) + REPORT(report_quiesce); + } + + spin_lock(&utrace->lock); + if (unlikely(utrace->u.exit.flags & EXIT_FLAG_DELAYED_GROUP_LEADER)) + /* + * Another thread's release_task came along and + * removed the delayed_group_leader condition, + * but after we might have started callbacks. + * Do the second report_death callback right now. + */ + report_delayed_group_leader(tsk, utrace); + else + finish_report_death(tsk, utrace); +} + +/* + * We're called from release_task when delayed_group_leader(tsk) was + * previously true and is no longer true, and NOREAP was set. + * This means no parent notifications have happened for this zombie. + */ +void +utrace_report_delayed_group_leader(struct task_struct *tsk) +{ + struct utrace *utrace; + + rcu_read_lock(); + utrace = rcu_dereference(tsk->utrace); + if (unlikely(utrace == NULL)) { + rcu_read_unlock(); + return; + } + spin_lock(&utrace->lock); + rcu_read_unlock(); + + utrace->u.exit.flags |= EXIT_FLAG_DELAYED_GROUP_LEADER; + + /* + * If utrace_report_death is still running, or release_task has + * started already, there is nothing more to do now. + */ + if ((utrace->u.exit.flags & (EXIT_FLAG_DEATH | EXIT_FLAG_REAP)) + || !likely(tsk->utrace_flags & UTRACE_ACTION_NOREAP)) + spin_unlock(&utrace->lock); + else + report_delayed_group_leader(tsk, utrace); +} + +/* + * Called iff UTRACE_EVENT(VFORK_DONE) flag is set. + */ +void +utrace_report_vfork_done(pid_t child_pid) +{ + struct task_struct *tsk = current; + struct utrace *utrace = tsk->utrace; + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + unsigned long action; + + /* XXX must change for sharing */ + action = UTRACE_ACTION_RESUME; + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + if (engine->flags & UTRACE_EVENT(VFORK_DONE)) + REPORT(report_vfork_done, child_pid); + if (action & UTRACE_ACTION_HIDE) + break; + } + action = check_detach(tsk, action); + check_quiescent(tsk, action); +} + +/* + * Called iff UTRACE_EVENT(EXEC) flag is set. + */ +void +utrace_report_exec(struct linux_binprm *bprm, struct pt_regs *regs) +{ + struct task_struct *tsk = current; + struct utrace *utrace = tsk->utrace; + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + unsigned long action; + + /* XXX must change for sharing */ + action = UTRACE_ACTION_RESUME; + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + if (engine->flags & UTRACE_EVENT(EXEC)) + REPORT(report_exec, bprm, regs); + if (action & UTRACE_ACTION_HIDE) + break; + } + action = check_detach(tsk, action); + check_quiescent(tsk, action); +} + +/* + * Called iff UTRACE_EVENT(SYSCALL_{ENTRY,EXIT}) flag is set. + */ +void +utrace_report_syscall(struct pt_regs *regs, int is_exit) +{ + struct task_struct *tsk = current; + struct utrace *utrace = tsk->utrace; + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + unsigned long action, ev; + int killed; + +/* + XXX pass syscall # to engine hook directly, let it return inhibit-action + to reset to -1 + long syscall = tracehook_syscall_number(regs, is_exit); +*/ + + ev = is_exit ? UTRACE_EVENT(SYSCALL_EXIT) : UTRACE_EVENT(SYSCALL_ENTRY); + + /* XXX must change for sharing */ + action = UTRACE_ACTION_RESUME; + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + if (engine->flags & ev) { + if (is_exit) + REPORT(report_syscall_exit, regs); + else + REPORT(report_syscall_entry, regs); + } + if (action & UTRACE_ACTION_HIDE) + break; + } + action = check_detach(tsk, action); + killed = check_quiescent(tsk, action); + + if (!is_exit) { + if (unlikely(killed)) + /* + * We are continuing despite QUIESCE because of a + * SIGKILL. Don't let the system call actually + * proceed. + */ + tracehook_abort_syscall(regs); + + /* + * Clear TIF_SIGPENDING if it no longer needs to be set. + * It may have been set as part of quiescence, and won't + * ever have been cleared by another thread. For other + * reports, we can just leave it set and will go through + * utrace_get_signal to reset things. But here we are + * about to enter a syscall, which might bail out with an + * -ERESTART* error if it's set now. + */ + if (signal_pending(tsk)) { + spin_lock_irq(&tsk->sighand->siglock); + recalc_sigpending(); + spin_unlock_irq(&tsk->sighand->siglock); + } + } +} + + +/* + * This is pointed to by the utrace struct, but it's really a private + * structure between utrace_get_signal and utrace_inject_signal. + */ +struct utrace_signal +{ + siginfo_t *const info; + struct k_sigaction *return_ka; + int signr; +}; + + +/* + * Call each interested tracing engine's report_signal callback. + */ +static u32 +report_signal(struct task_struct *tsk, struct pt_regs *regs, + struct utrace *utrace, u32 action, + unsigned long flags1, unsigned long flags2, siginfo_t *info, + const struct k_sigaction *ka, struct k_sigaction *return_ka) +{ + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + + /* XXX must change for sharing */ + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + if ((engine->flags & flags1) && (engine->flags & flags2)) { + u32 disp = action & UTRACE_ACTION_OP_MASK; + action &= ~UTRACE_ACTION_OP_MASK; + REPORT(report_signal, regs, disp, info, ka, return_ka); + if ((action & UTRACE_ACTION_OP_MASK) == 0) + action |= disp; + if (action & UTRACE_ACTION_HIDE) + break; + } + } + + return action; +} + +void +utrace_signal_handler_singlestep(struct task_struct *tsk, struct pt_regs *regs) +{ + u32 action; + action = report_signal(tsk, regs, tsk->utrace, UTRACE_SIGNAL_HANDLER, + UTRACE_EVENT_SIGNAL_ALL, + UTRACE_ACTION_SINGLESTEP|UTRACE_ACTION_BLOCKSTEP, + NULL, NULL, NULL); + action = check_detach(tsk, action); + check_quiescent(tsk, action); +} + + +/* + * This is the hook from the signals code, called with the siglock held. + * Here is the ideal place to quiesce. We also dequeue and intercept signals. + */ +int +utrace_get_signal(struct task_struct *tsk, struct pt_regs *regs, + siginfo_t *info, struct k_sigaction *return_ka) + __releases(tsk->sighand->siglock) + __acquires(tsk->sighand->siglock) +{ + struct utrace *utrace; + struct utrace_signal signal = { info, return_ka, 0 }; + struct k_sigaction *ka; + unsigned long action, event; + + /* + * We could have been considered quiescent while we were in + * TASK_STOPPED, and detached asynchronously. If we woke up + * and checked tsk->utrace_flags before that was finished, + * we might be here with utrace already removed or in the + * middle of being removed. + */ + rcu_read_lock(); + utrace = rcu_dereference(tsk->utrace); + if (unlikely(utrace == NULL)) { + rcu_read_unlock(); + return 0; + } + if (!(tsk->utrace_flags & UTRACE_EVENT(JCTL))) { + /* + * It's possible we might have just been in TASK_STOPPED + * and subject to the aforementioned race. + * + * RCU makes it safe to get the utrace->lock even if it's + * being freed. Once we have that lock, either an external + * detach has finished and this struct has been freed, or + * else we know we are excluding any other detach attempt. + * Since we are no longer in TASK_STOPPED now, all we + * needed the lock for was to order any quiesce() call after us. + */ + spin_unlock_irq(&tsk->sighand->siglock); + spin_lock(&utrace->lock); + if (unlikely(tsk->utrace != utrace)) { + spin_unlock(&utrace->lock); + rcu_read_unlock(); + cond_resched(); + return -1; + } + spin_unlock(&utrace->lock); + spin_lock_irq(&tsk->sighand->siglock); + } + rcu_read_unlock(); + + /* + * If a signal was injected previously, it could not use our + * stack space directly. It had to allocate a data structure, + * which we can now copy out of and free. + * + * We don't have to lock access to u.live.signal because it's only + * touched by utrace_inject_signal when we're quiescent. + */ + if (utrace->u.live.signal != NULL) { + signal.signr = utrace->u.live.signal->signr; + copy_siginfo(info, utrace->u.live.signal->info); + if (utrace->u.live.signal->return_ka) + *return_ka = *utrace->u.live.signal->return_ka; + else + signal.return_ka = NULL; + kfree(utrace->u.live.signal); + utrace->u.live.signal = NULL; + } + + /* + * If we should quiesce, now is the time. + * First stash a pointer to the state on our stack, + * so that utrace_inject_signal can tell us what to do. + */ + if (tsk->utrace_flags & UTRACE_ACTION_QUIESCE) { + int killed = sigkill_pending(tsk); + if (!killed) { + spin_unlock_irq(&tsk->sighand->siglock); + + killed = utrace_quiescent(tsk, &signal); + + /* + * Noone wants us quiescent any more, we can take + * signals. Unless we have a forced signal to take, + * back out to the signal code to resynchronize after + * releasing the siglock. + */ + if (signal.signr == 0 && !killed) + /* + * This return value says to reacquire the + * siglock and check again. This will check + * for a pending group stop and process it + * before coming back here. + */ + return -1; + + spin_lock_irq(&tsk->sighand->siglock); + } + if (killed) { + /* + * The only reason we woke up now was because of a + * SIGKILL. Don't do normal dequeuing in case it + * might get a signal other than SIGKILL. That would + * perturb the death state so it might differ from + * what the debugger would have allowed to happen. + * Instead, pluck out just the SIGKILL to be sure + * we'll die immediately with nothing else different + * from the quiescent state the debugger wanted us in. + */ + sigset_t sigkill_only; + sigfillset(&sigkill_only); + sigdelset(&sigkill_only, SIGKILL); + killed = dequeue_signal(tsk, &sigkill_only, info); + BUG_ON(killed != SIGKILL); + *return_ka = tsk->sighand->action[killed - 1]; + return killed; + } + } + + /* + * If a signal was injected, everything is in place now. Go do it. + */ + if (signal.signr != 0) { + if (signal.return_ka == NULL) { + ka = &tsk->sighand->action[signal.signr - 1]; + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + *return_ka = *ka; + } + else + BUG_ON(signal.return_ka != return_ka); + return signal.signr; + } + + /* + * If noone is interested in intercepting signals, let the caller + * just dequeue them normally. + */ + if ((tsk->utrace_flags & UTRACE_EVENT_SIGNAL_ALL) == 0) + return 0; + + /* + * Steal the next signal so we can let tracing engines examine it. + * From the signal number and sigaction, determine what normal + * delivery would do. If no engine perturbs it, we'll do that + * by returning the signal number after setting *return_ka. + */ + signal.signr = dequeue_signal(tsk, &tsk->blocked, info); + if (signal.signr == 0) + return 0; + + BUG_ON(signal.signr != info->si_signo); + + ka = &tsk->sighand->action[signal.signr - 1]; + *return_ka = *ka; + + /* + * We are never allowed to interfere with SIGKILL, + * just punt after filling in *return_ka for our caller. + */ + if (signal.signr == SIGKILL) + return signal.signr; + + if (ka->sa.sa_handler == SIG_IGN) { + event = UTRACE_EVENT(SIGNAL_IGN); + action = UTRACE_SIGNAL_IGN; + } + else if (ka->sa.sa_handler != SIG_DFL) { + event = UTRACE_EVENT(SIGNAL); + action = UTRACE_ACTION_RESUME; + } + else if (sig_kernel_coredump(signal.signr)) { + event = UTRACE_EVENT(SIGNAL_CORE); + action = UTRACE_SIGNAL_CORE; + } + else if (sig_kernel_ignore(signal.signr)) { + event = UTRACE_EVENT(SIGNAL_IGN); + action = UTRACE_SIGNAL_IGN; + } + else if (sig_kernel_stop(signal.signr)) { + event = UTRACE_EVENT(SIGNAL_STOP); + action = (signal.signr == SIGSTOP + ? UTRACE_SIGNAL_STOP : UTRACE_SIGNAL_TSTP); + } + else { + event = UTRACE_EVENT(SIGNAL_TERM); + action = UTRACE_SIGNAL_TERM; + } + + if (tsk->utrace_flags & event) { + /* + * We have some interested engines, so tell them about the + * signal and let them change its disposition. + */ + + spin_unlock_irq(&tsk->sighand->siglock); + + action = report_signal(tsk, regs, utrace, action, event, event, + info, ka, return_ka); + action &= UTRACE_ACTION_OP_MASK; + + if (action & UTRACE_SIGNAL_HOLD) { + struct sigqueue *q = sigqueue_alloc(); + if (likely(q != NULL)) { + q->flags = 0; + copy_siginfo(&q->info, info); + } + action &= ~UTRACE_SIGNAL_HOLD; + spin_lock_irq(&tsk->sighand->siglock); + sigaddset(&tsk->pending.signal, info->si_signo); + if (likely(q != NULL)) + list_add(&q->list, &tsk->pending.list); + } + else + spin_lock_irq(&tsk->sighand->siglock); + + recalc_sigpending(); + } + + /* + * We express the chosen action to the signals code in terms + * of a representative signal whose default action does it. + */ + switch (action) { + case UTRACE_SIGNAL_IGN: + /* + * We've eaten the signal. That's all we do. + * Tell the caller to restart. + */ + spin_unlock_irq(&tsk->sighand->siglock); + return -1; + + case UTRACE_ACTION_RESUME: + case UTRACE_SIGNAL_DELIVER: + /* + * The handler will run. We do the SA_ONESHOT work here + * since the normal path will only touch *return_ka now. + */ + if (return_ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + break; + + case UTRACE_SIGNAL_TSTP: + signal.signr = SIGTSTP; + tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; + return_ka->sa.sa_handler = SIG_DFL; + break; + + case UTRACE_SIGNAL_STOP: + signal.signr = SIGSTOP; + tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; + return_ka->sa.sa_handler = SIG_DFL; + break; + + case UTRACE_SIGNAL_TERM: + signal.signr = SIGTERM; + return_ka->sa.sa_handler = SIG_DFL; + break; + + case UTRACE_SIGNAL_CORE: + signal.signr = SIGQUIT; + return_ka->sa.sa_handler = SIG_DFL; + break; + + default: + BUG(); + } + + return signal.signr; +} + + +/** + * utrace_inject_signal - Cause a specified signal delivery. + * @target: thread to process the signal + * @engine: engine attached to @target + * @action: signal disposition + * @info: signal number and details + * @ka: sigaction() settings to follow when @action is %UTRACE_SIGNAL_DELIVER + * + * The @target thread must be quiescent (or the current thread). + * The @action has %UTRACE_SIGNAL_* bits as returned from a report_signal() + * callback. If @ka is non-null, it gives the sigaction to follow for + * %UTRACE_SIGNAL_DELIVER; otherwise, the installed sigaction at the time + * of delivery is used. + */ +int +utrace_inject_signal(struct task_struct *target, + struct utrace_attached_engine *engine, + u32 action, siginfo_t *info, + const struct k_sigaction *ka) +{ + struct utrace *utrace; + struct utrace_signal *signal; + int ret; + + if (info->si_signo == 0 || !valid_signal(info->si_signo)) + return -EINVAL; + + utrace = get_utrace_lock_attached(target, engine); + if (unlikely(IS_ERR(utrace))) + return PTR_ERR(utrace); + + ret = 0; + signal = utrace->u.live.signal; + if (unlikely(target->exit_state)) + ret = -ESRCH; + else if (signal == NULL) { + ret = -ENOSYS; /* XXX */ + } + else if (signal->signr != 0) + ret = -EAGAIN; + else { + if (info != signal->info) + copy_siginfo(signal->info, info); + + switch (action) { + default: + ret = -EINVAL; + break; + + case UTRACE_SIGNAL_IGN: + break; + + case UTRACE_ACTION_RESUME: + case UTRACE_SIGNAL_DELIVER: + /* + * The handler will run. We do the SA_ONESHOT work + * here since the normal path will not touch the + * real sigaction when using an injected signal. + */ + if (ka == NULL) + signal->return_ka = NULL; + else if (ka != signal->return_ka) + *signal->return_ka = *ka; + if (ka && ka->sa.sa_flags & SA_ONESHOT) { + struct k_sigaction *a; + a = &target->sighand->action[info->si_signo-1]; + spin_lock_irq(&target->sighand->siglock); + a->sa.sa_handler = SIG_DFL; + spin_unlock_irq(&target->sighand->siglock); + } + signal->signr = info->si_signo; + break; + + case UTRACE_SIGNAL_TSTP: + signal->signr = SIGTSTP; + spin_lock_irq(&target->sighand->siglock); + target->signal->flags |= SIGNAL_STOP_DEQUEUED; + spin_unlock_irq(&target->sighand->siglock); + signal->return_ka->sa.sa_handler = SIG_DFL; + break; + + case UTRACE_SIGNAL_STOP: + signal->signr = SIGSTOP; + spin_lock_irq(&target->sighand->siglock); + target->signal->flags |= SIGNAL_STOP_DEQUEUED; + spin_unlock_irq(&target->sighand->siglock); + signal->return_ka->sa.sa_handler = SIG_DFL; + break; + + case UTRACE_SIGNAL_TERM: + signal->signr = SIGTERM; + signal->return_ka->sa.sa_handler = SIG_DFL; + break; + + case UTRACE_SIGNAL_CORE: + signal->signr = SIGQUIT; + signal->return_ka->sa.sa_handler = SIG_DFL; + break; + } + } + + spin_unlock(&utrace->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(utrace_inject_signal); + +/** + * utrace_regset - Prepare to access a thread's machine state. + * @target: thread to examine + * @engine: engine attached to @target + * @view: &struct utrace_regset_view providing machine state description + * @which: index into regsets provided by @view + * + * Prepare to access thread's machine state, + * see &struct utrace_regset in . + * The given thread must be quiescent (or the current thread). When this + * returns, the &struct utrace_regset calls may be used to interrogate or + * change the thread's state. Do not cache the returned pointer when the + * thread can resume. You must call utrace_regset() to ensure that + * context switching has completed and consistent state is available. + */ +const struct utrace_regset * +utrace_regset(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, int which) +{ + if (unlikely((unsigned) which >= view->n)) + return NULL; + + if (target != current) + wait_task_inactive(target); + + return &view->regsets[which]; +} +EXPORT_SYMBOL_GPL(utrace_regset); + +/* + * This is declared in linux/tracehook.h and defined in machine-dependent + * code. We put the export here to ensure no machine forgets it. + */ +EXPORT_SYMBOL_GPL(utrace_native_view); + + +/** + * utrace_tracer_task - Find the task using ptrace on this one. + * @target: task in question + * + * Return the &struct task_struct for the task using ptrace on this one, + * or %NULL. Must be called with rcu_read_lock() held to keep the returned + * struct alive. + * + * At exec time, this may be called with task_lock() still held from when + * tracehook_unsafe_exec() was just called. In that case it must give + * results consistent with those unsafe_exec() results, i.e. non-%NULL if + * any %LSM_UNSAFE_PTRACE_* bits were set. + * + * The value is also used to display after "TracerPid:" in /proc/PID/status, + * where it is called with only rcu_read_lock() held. + */ +struct task_struct * +utrace_tracer_task(struct task_struct *target) +{ + struct utrace *utrace; + struct task_struct *tracer = NULL; + + utrace = rcu_dereference(target->utrace); + if (utrace != NULL) { + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + const struct utrace_engine_ops *ops; + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, + entry); + ops = rcu_dereference(engine->ops); + if (ops->tracer_task) { + tracer = (*ops->tracer_task)(engine, target); + if (tracer != NULL) + break; + } + } + } + + return tracer; +} + +int +utrace_allow_access_process_vm(struct task_struct *target) +{ + struct utrace *utrace; + int ret = 0; + + rcu_read_lock(); + utrace = rcu_dereference(target->utrace); + if (utrace != NULL) { + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + const struct utrace_engine_ops *ops; + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, + entry); + ops = rcu_dereference(engine->ops); + if (ops->allow_access_process_vm) { + ret = (*ops->allow_access_process_vm)(engine, + target, + current); + if (ret) + break; + } + } + } + rcu_read_unlock(); + + return ret; +} + +/* + * Called on the current task to return LSM_UNSAFE_* bits implied by tracing. + * Called with task_lock() held. + */ +int +utrace_unsafe_exec(struct task_struct *tsk) +{ + struct utrace *utrace = tsk->utrace; + struct list_head *pos, *next; + struct utrace_attached_engine *engine; + const struct utrace_engine_ops *ops; + int unsafe = 0; + + /* XXX must change for sharing */ + list_for_each_safe_rcu(pos, next, &utrace->engines) { + engine = list_entry(pos, struct utrace_attached_engine, entry); + ops = rcu_dereference(engine->ops); + if (ops->unsafe_exec) + unsafe |= (*ops->unsafe_exec)(engine, tsk); + } + + return unsafe; +} Index: b/Documentation/utrace.txt =================================================================== --- /dev/null +++ b/Documentation/utrace.txt @@ -0,0 +1,579 @@ +DRAFT DRAFT DRAFT WORK IN PROGRESS DRAFT DRAFT DRAFT + +This is work in progress and likely to change. + + + Roland McGrath + +--- + + User Debugging Data & Event Rendezvous + ---- --------- ---- - ----- ---------- + +See linux/utrace.h for all the declarations used here. +See also linux/tracehook.h for the utrace_regset declarations. + +The UTRACE is infrastructure code for tracing and controlling user +threads. This is the foundation for writing tracing engines, which +can be loadable kernel modules. The UTRACE interfaces provide three +basic facilities: + +* Thread event reporting + + Tracing engines can request callbacks for events of interest in + the thread: signals, system calls, exit, exec, clone, etc. + +* Core thread control + + Tracing engines can prevent a thread from running (keeping it in + TASK_TRACED state), or make it single-step or block-step (when + hardware supports it). Engines can cause a thread to abort system + calls, they change the behaviors of signals, and they can inject + signal-style actions at will. + +* Thread machine state access + + Tracing engines can read and write a thread's registers and + similar per-thread CPU state. + + + Tracing engines + ------- ------- + +The basic actors in UTRACE are the thread and the tracing engine. +A tracing engine is some body of code that calls into the utrace_* +interfaces, represented by a struct utrace_engine_ops. (Usually it's a +kernel module, though the legacy ptrace support is a tracing engine +that is not in a kernel module.) The UTRACE interface operates on +individual threads (struct task_struct). If an engine wants to +treat several threads as a group, that is up to its higher-level +code. Using the UTRACE starts out by attaching an engine to a thread. + + struct utrace_attached_engine * + utrace_attach(struct task_struct *target, int flags, + const struct utrace_engine_ops *ops, void *data); + +Calling utrace_attach is what sets up a tracing engine to trace a +thread. Use UTRACE_ATTACH_CREATE in flags, and pass your engine's ops. +Check the return value with IS_ERR. If successful, it returns a +struct pointer that is the handle used in all other utrace_* calls. +The data argument is stored in the utrace_attached_engine structure, +for your code to use however it wants. + + int utrace_detach(struct task_struct *target, + struct utrace_attached_engine *engine); + +The utrace_detach call removes an engine from a thread. +No more callbacks will be made after this returns success. + + +An attached engine does nothing by default. +An engine makes something happen by setting its flags. + + int utrace_set_flags(struct task_struct *target, + struct utrace_attached_engine *engine, + unsigned long flags); + +The synchronization issues related to these two calls +are discussed further below in "Teardown Races". + + + Action Flags + ------ ----- + +There are two kinds of flags that an attached engine can set: event +flags, and action flags. Event flags register interest in particular +events; when an event happens and an engine has the right event flag +set, it gets a callback. Action flags change the normal behavior of +the thread. The action flags available are: + + UTRACE_ACTION_QUIESCE + + The thread will stay quiescent (see below). As long as + any engine asserts the QUIESCE action flag, the thread + will not resume running in user mode. (Usually it will + be in TASK_TRACED state.) Nothing will wake the thread + up except for SIGKILL (and implicit SIGKILLs such as a + core dump in another thread sharing the same address + space, or a group exit, fatal signal, or exec in another + thread in the same thread group). + + UTRACE_ACTION_SINGLESTEP + + When the thread runs, it will run one instruction and + then trap. (Exiting a system call or entering a signal + handler is considered "an instruction" for this.) This + is available on most machines. This can be used only if + ARCH_HAS_SINGLE_STEP is #define'd by + and evaluates to nonzero. + + UTRACE_ACTION_BLOCKSTEP + + When the thread runs, it will run until the next branch + taken, and then trap. (Exiting a system call or + entering a signal handler is considered taking a branch + for this.) When the SINGLESTEP flag is set, BLOCKSTEP + has no effect. This is only available on some machines. + This can be used only if ARCH_HAS_BLOCK_STEP is + #define'd by and evaluates to nonzero. + + UTRACE_ACTION_NOREAP + + When the thread exits or stops for job control, its + parent process will not receive a SIGCHLD and the + parent's wait calls will not wake up or report the child + as dead. Even a self-reaping thread will remain a + zombie. Note that this cannot prevent the reaping done + when an exec is done by another thread in the same + thread group; in that event, a REAP event (and callback + if requested) will happen regardless of this flag. + A well-behaved tracing engine does not want to interfere + with the parent's normal notifications. This is + provided mainly for the ptrace compatibility code to + implement the traditional behavior. + +Event flags are specified using the macro UTRACE_EVENT(TYPE). +Each event type is associated with a report_* callback in struct +utrace_engine_ops. A tracing engine can leave unused callbacks NULL. +The only callbacks required are those used by the event flags it sets. + +Many engines can be attached to each thread. When a thread has an +event, each engine gets a report_* callback if it has set the event flag +for that event type. Engines are called in the order they attached. + +Each callback takes arguments giving the details of the particular +event. The first two arguments two every callback are the struct +utrace_attached_engine and struct task_struct pointers for the engine +and the thread producing the event. Usually this will be the current +thread that is running the callback functions. + +The return value of report_* callbacks is a bitmask. Some bits are +common to all callbacks, and some are particular to that callback and +event type. The value zero (UTRACE_ACTION_RESUME) always means the +simplest thing: do what would have happened with no tracing engine here. +These are the flags that can be set in any report_* return value: + + UTRACE_ACTION_NEWSTATE + + Update the action state flags, described above. Those + bits from the return value (UTRACE_ACTION_STATE_MASK) + replace those bits in the engine's flags. This has the + same effect as calling utrace_set_flags, but is a more + efficient short-cut. To change the event flags, you must + call utrace_set_flags. + + UTRACE_ACTION_DETACH + + Detach this engine. This has the effect of calling + utrace_detach, but is a more efficient short-cut. + + UTRACE_ACTION_HIDE + + Hide this event from other tracing engines. This is + only appropriate to do when the event was induced by + some action of this engine, such as a breakpoint trap. + Some events cannot be hidden, since every engine has to + know about them: exit, death, reap. + +The return value bits in UTRACE_ACTION_OP_MASK indicate a change to the +normal behavior of the event taking place. If zero, the thread does +whatever that event normally means. For report_signal, other values +control the disposition of the signal. + + + Quiescence + ---------- + +To control another thread and access its state, it must be "quiescent". +This means that it is stopped and won't start running again while we access +it. A quiescent thread is stopped in a place close to user mode, where the +user state can be accessed safely; either it's about to return to user +mode, or it's just entered the kernel from user mode, or it has already +finished exiting (EXIT_ZOMBIE). Setting the UTRACE_ACTION_QUIESCE action +flag will force the attached thread to become quiescent soon. After +setting the flag, an engine must wait for an event callback when the thread +becomes quiescent. The thread may be running on another CPU, or may be in +an uninterruptible wait. When it is ready to be examined, it will make +callbacks to engines that set the UTRACE_EVENT(QUIESCE) event flag. + +As long as some engine has UTRACE_ACTION_QUIESCE set, then the thread will +remain stopped. SIGKILL will wake it up, but it will not run user code. +When the flag is cleared via utrace_set_flags or a callback return value, +the thread starts running again. (See also "Teardown Races", below.) + +During the event callbacks (report_*), the thread in question makes the +callback from a safe place. It is not quiescent, but it can safely access +its own state. Callbacks can access thread state directly without setting +the QUIESCE action flag. If a callback does want to prevent the thread +from resuming normal execution, it *must* use the QUIESCE action state +rather than simply blocking; see "Core Events & Callbacks", below. + + + Thread control + ------ ------- + +These calls must be made on a quiescent thread (or the current thread): + + int utrace_inject_signal(struct task_struct *target, + struct utrace_attached_engine *engine, + u32 action, siginfo_t *info, + const struct k_sigaction *ka); + +Cause a specified signal delivery in the target thread. This is not +like kill, which generates a signal to be dequeued and delivered later. +Injection directs the thread to deliver a signal now, before it next +resumes in user mode or dequeues any other pending signal. It's as if +the tracing engine intercepted a signal event and its report_signal +callback returned the action argument as its value (see below). The +info and ka arguments serve the same purposes as their counterparts in +a report_signal callback. + + const struct utrace_regset * + utrace_regset(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int which); + +Get access to machine state for the thread. The struct utrace_regset_view +indicates a view of machine state, corresponding to a user mode +architecture personality (such as 32-bit or 64-bit versions of a machine). +The which argument selects one of the register sets available in that view. +The utrace_regset call must be made before accessing any machine state, +each time the thread has been running and has then become quiescent. +It ensures that the thread's state is ready to be accessed, and returns +the struct utrace_regset giving its accessor functions. + +XXX needs front ends for argument checks, export utrace_native_view + + + Core Events & Callbacks + ---- ------ - --------- + +Event reporting callbacks have details particular to the event type, but +are all called in similar environments and have the same constraints. +Callbacks are made from safe spots, where no locks are held, no special +resources are pinned, and the user-mode state of the thread is accessible. +So, callback code has a pretty free hand. But to be a good citizen, +callback code should never block for long periods. It is fine to block in +kmalloc and the like, but never wait for i/o or for user mode to do +something. If you need the thread to wait, set UTRACE_ACTION_QUIESCE and +return from the callback quickly. When your i/o finishes or whatever, you +can use utrace_set_flags to resume the thread. + +Well-behaved callbacks are important to maintain two essential properties +of the interface. The first of these is that unrelated tracing engines not +interfere with each other. If your engine's event callback does not return +quickly, then another engine won't get the event notification in a timely +manner. The second important property is that tracing be as noninvasive as +possible to the normal operation of the system overall and of the traced +thread in particular. That is, attached tracing engines should not perturb +a thread's behavior, except to the extent that changing its user-visible +state is explicitly what you want to do. (Obviously some perturbation is +unavoidable, primarily timing changes, ranging from small delays due to the +overhead of tracing, to arbitrary pauses in user code execution when a user +stops a thread with a debugger for examination. When doing asynchronous +utrace_attach to a thread doing a system call, more troublesome side +effects are possible.) Even when you explicitly want the pertrubation of +making the traced thread block, just blocking directly in your callback has +more unwanted effects. For example, the CLONE event callbacks are called +when the new child thread has been created but not yet started running; the +child can never be scheduled until the CLONE tracing callbacks return. +(This allows engines tracing the parent to attach to the child.) If a +CLONE event callback blocks the parent thread, it also prevents the child +thread from running (even to process a SIGKILL). If what you want is to +make both the parent and child block, then use utrace_attach on the child +and then set the QUIESCE action state flag on both threads. A more crucial +problem with blocking in callbacks is that it can prevent SIGKILL from +working. A thread that is blocking due to UTRACE_ACTION_QUIESCE will still +wake up and die immediately when sent a SIGKILL, as all threads should. +Relying on the utrace infrastructure rather than on private synchronization +calls in event callbacks is an important way to help keep tracing robustly +noninvasive. + + +EVENT(REAP) Dead thread has been reaped +Callback: + void (*report_reap)(struct utrace_attached_engine *engine, + struct task_struct *tsk); + +This means the parent called wait, or else this was a detached thread or +a process whose parent ignores SIGCHLD. This cannot happen while the +UTRACE_ACTION_NOREAP flag is set. This is the only callback you are +guaranteed to get (if you set the flag; but see "Teardown Races", below). + +Unlike other callbacks, this can be called from the parent's context +rather than from the traced thread itself--it must not delay the parent by +blocking. This callback is different from all others, it returns void. +Once you get this callback, your engine is automatically detached and you +cannot access this thread or use this struct utrace_attached_engine handle +any longer. This is the place to clean up your data structures and +synchronize with your code that might try to make utrace_* calls using this +engine data structure. The struct is still valid during this callback, +but will be freed soon after it returns (via RCU). + +In all other callbacks, the return value is as described above. +The common UTRACE_ACTION_* flags in the return value are always observed. +Unless otherwise specified below, other bits in the return value are ignored. + + +EVENT(QUIESCE) Thread is quiescent +Callback: + u32 (*report_quiesce)(struct utrace_attached_engine *engine, + struct task_struct *tsk); + +This is the least interesting callback. It happens at any safe spot, +including after any other event callback. This lets the tracing engine +know that it is safe to access the thread's state, or to report to users +that it has stopped running user code. + +EVENT(CLONE) Thread is creating a child +Callback: + u32 (*report_clone)(struct utrace_attached_engine *engine, + struct task_struct *parent, + unsigned long clone_flags, + struct task_struct *child); + +A clone/clone2/fork/vfork system call has succeeded in creating a new +thread or child process. The new process is fully formed, but not yet +running. During this callback, other tracing engines are prevented from +using utrace_attach asynchronously on the child, so that engines tracing +the parent get the first opportunity to attach. After this callback +returns, the child will start and the parent's system call will return. +If CLONE_VFORK is set, the parent will block before returning. + +EVENT(VFORK_DONE) Finished waiting for CLONE_VFORK child +Callback: + u32 (*report_vfork_done)(struct utrace_attached_engine *engine, + struct task_struct *parent, pid_t child_pid); + +Event reported for parent using CLONE_VFORK or vfork system call. +The child has died or exec'd, so the vfork parent has unblocked +and is about to return child_pid. + +UTRACE_EVENT(EXEC) Completed exec +Callback: + u32 (*report_exec)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + const struct linux_binprm *bprm, + struct pt_regs *regs); + +An execve system call has succeeded and the new program is about to +start running. The initial user register state is handy to be tweaked +directly, or utrace_regset can be used for full machine state access. + +UTRACE_EVENT(EXIT) Thread is exiting +Callback: + u32 (*report_exit)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + long orig_code, long *code); + +The thread is exiting and cannot be prevented from doing so, but all its +state is still live. The *code value will be the wait result seen by +the parent, and can be changed by this engine or others. The orig_code +value is the real status, not changed by any tracing engine. + +UTRACE_EVENT(DEATH) Thread has finished exiting +Callback: + u32 (*report_death)(struct utrace_attached_engine *engine, + struct task_struct *tsk); + +The thread is really dead now. If the UTRACE_ACTION_NOREAP flag remains +set after this callback, it remains an unreported zombie; If the flag was +not set already, then it is too late to set it now--its parent has already +been sent SIGCHLD. Otherwise, it might be reaped by its parent, or +self-reap immediately. Though the actual reaping may happen in parallel, a +report_reap callback will always be ordered after a report_death callback. + +UTRACE_EVENT(SYSCALL_ENTRY) Thread has entered kernel for a system call +Callback: + u32 (*report_syscall_entry)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs); + +The system call number and arguments can be seen and modified in the +registers. The return value register has -ENOSYS, which will be +returned for an invalid system call. The macro tracehook_abort_syscall(regs) +will abort the system call so that we go immediately to syscall exit, +and return -ENOSYS (or whatever the register state is changed to). If +tracing enginges keep the thread quiescent here, the system call will +not be performed until it resumes. + +UTRACE_EVENT(SYSCALL_EXIT) Thread is leaving kernel after a system call +Callback: + u32 (*report_syscall_exit)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs); + +The return value can be seen and modified in the registers. If the +thread is allowed to resume, it will see any pending signals and then +return to user mode. + +UTRACE_EVENT(SIGNAL) Signal caught by user handler +UTRACE_EVENT(SIGNAL_IGN) Signal with no effect (SIG_IGN or default) +UTRACE_EVENT(SIGNAL_STOP) Job control stop signal +UTRACE_EVENT(SIGNAL_TERM) Fatal termination signal +UTRACE_EVENT(SIGNAL_CORE) Fatal core-dump signal +UTRACE_EVENT_SIGNAL_ALL All of the above (bitmask) +Callback: + u32 (*report_signal)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + u32 action, siginfo_t *info, + const struct k_sigaction *orig_ka, + struct k_sigaction *return_ka); + +There are five types of signal events, but all use the same callback. +These happen when a thread is dequeuing a signal to be delivered. +(Not immediately when the signal is sent, and not when the signal is +blocked.) No signal event is reported for SIGKILL; no tracing engine +can prevent it from killing the thread immediately. The specific +event types allow an engine to trace signals based on what they do. +UTRACE_EVENT_SIGNAL_ALL is all of them OR'd together, to trace all +signals (except SIGKILL). A subset of these event flags can be used +e.g. to catch only fatal signals, not handled ones, or to catch only +core-dump signals, not normal termination signals. + +The action argument says what the signal's default disposition is: + + UTRACE_SIGNAL_DELIVER Run the user handler from sigaction. + UTRACE_SIGNAL_IGN Do nothing, ignore the signal. + UTRACE_SIGNAL_TERM Terminate the process. + UTRACE_SIGNAL_CORE Terminate the process a write a core dump. + UTRACE_SIGNAL_STOP Absolutely stop the process, a la SIGSTOP. + UTRACE_SIGNAL_TSTP Job control stop (no stop if orphaned). + +This selection is made from consulting the process's sigaction and the +default action for the signal number, but may already have been changed by +an earlier tracing engine (in which case you see its override). A return +value of UTRACE_ACTION_RESUME means to carry out this action. If instead +UTRACE_SIGNAL_* bits are in the return value, that overrides the normal +behavior of the signal. + +The signal number and other details of the signal are in info, and +this data can be changed to make the thread see a different signal. +A return value of UTRACE_SIGNAL_DELIVER says to follow the sigaction in +return_ka, which can specify a user handler or SIG_IGN to ignore the +signal or SIG_DFL to follow the default action for info->si_signo. +The orig_ka parameter shows the process's sigaction at the time the +signal was dequeued, and return_ka initially contains this. Tracing +engines can modify return_ka to change the effects of delivery. +For other UTRACE_SIGNAL_* return values, return_ka is ignored. + +UTRACE_SIGNAL_HOLD is a flag bit that can be OR'd into the return +value. It says to push the signal back on the thread's queue, with +the signal number and details possibly changed in info. When the +thread is allowed to resume, it will dequeue and report it again. + + + Teardown Races + -------- ----- + +Ordinarily synchronization issues for tracing engines are kept fairly +straightforward by using quiescence (see above): you make a thread +quiescent and then once it makes the report_quiesce callback it cannot +do anything else that would result in another callback, until you let +it. This simple arrangement avoids complex and error-prone code in +each one of a tracing engine's event callbacks to keep them serialized +with the engine's other operations done on that thread from another +thread of control. However, giving tracing engines complete power to +keep a traced thread stuck in place runs afoul of a more important +kind of simplicity that the kernel overall guarantees: nothing can +prevent or delay SIGKILL from making a thread die and release its +resources. To preserve this important property of SIGKILL, it as a +special case can break quiescence like nothing else normally can. +This includes both explicit SIGKILL signals and the implicit SIGKILL +sent to each other thread in the same thread group by a thread doing +an exec, or processing a fatal signal, or making an exit_group system +call. A tracing engine can prevent a thread from beginning the exit +or exec or dying by signal (other than SIGKILL) if it is attached to +that thread, but once the operation begins, no tracing engine can +prevent or delay all other threads in the same thread group dying. + +As described above, the report_reap callback is always the final event +in the life cycle of a traced thread. Tracing engines can use this as +the trigger to clean up their own data structures. The report_death +callback is always the penultimate event a tracing engine might see, +except when the thread was already in the midst of dying when the +engine attached. Many tracing engines will have no interest in when a +parent reaps a dead process, and nothing they want to do with a zombie +thread once it dies; for them, the report_death callback is the +natural place to clean up data structures and detach. To facilitate +writing such engines robustly, given the asynchrony of SIGKILL, and +without error-prone manual implementation of synchronization schemes, +the utrace infrastructure provides some special guarantees about the +report_death and report_reap callbacks. It still takes some care to +be sure your tracing engine is robust to teardown races, but these +rules make it reasonably straightforward and concise to handle a lot +of corner cases correctly. + +The first sort of guarantee concerns the core data structures +themselves. struct utrace_attached_engine is allocated using RCU, as +is task_struct. If you call utrace_attach under rcu_read_lock, then +the pointer it returns will always be valid while in the RCU critical +section. (Note that utrace_attach can block doing memory allocation, +so you must consider the real critical section to start when +utrace_attach returns. utrace_attach can never block when not given +the UTRACE_ATTACH_CREATE flag bit). Conversely, you can call +utrace_attach outside of rcu_read_lock and though the pointer can +become stale asynchronously if the thread dies and is reaped, you can +safely pass it to a subsequent utrace_set_flags or utrace_detach call +and will just get an -ESRCH error return. However, you must be sure +the task_struct remains valid, either via get_task_struct or via RCU. +The utrace infrastructure never holds task_struct references of its +own. Though neither rcu_read_lock nor any other lock is held while +making a callback, it's always guaranteed that the task_struct and +the struct utrace_attached_engine passed as arguments remain valid +until the callback function returns. + +The second guarantee is the serialization of death and reap event +callbacks for a given thread. The actual reaping by the parent +(release_task call) can occur simultaneously while the thread is +still doing the final steps of dying, including the report_death +callback. If a tracing engine has requested both DEATH and REAP +event reports, it's guaranteed that the report_reap callback will not +be made until after the report_death callback has returned. If the +report_death callback itself detaches from the thread (with +utrace_detach or with UTRACE_ACTION_DETACH in its return value), then +the report_reap callback will never be made. Thus it is safe for a +report_death callback to clean up data structures and detach. + +The final sort of guarantee is that a tracing engine will know for +sure whether or not the report_death and/or report_reap callbacks +will be made for a certain thread. These teardown races are +disambiguated by the error return values of utrace_set_flags and +utrace_detach. Normally utrace_detach returns zero, and this means +that no more callbacks will be made. If the thread is in the midst +of dying, utrace_detach returns -EALREADY to indicate that the +report_death callback may already be in progress; when you get this +error, you know that any cleanup your report_death callback does is +about to happen or has just happened--note that if the report_death +callback does not detach, the engine remains attached until the +thread gets reaped. If the thread is in the midst of being reaped, +utrace_detach returns -ESRCH to indicate that the report_reap +callback may already be in progress; this means the engine is +implicitly detached when the callback completes. This makes it +possible for a tracing engine that has decided asynchronously to +detach from a thread to safely clean up its data structures, knowing +that no report_death or report_reap callback will try to do the +same. utrace_detach returns -ESRCH when the struct +utrace_attached_engine has already been detached, but is still a +valid pointer because of rcu_read_lock. If RCU is used properly, a +tracing engine can use this to safely synchronize its own +independent multiple threads of control with each other and with its +event callbacks that detach. + +In the same vein, utrace_set_flags normally returns zero; if the +target thread was quiescent before the call, then after a successful +call, no event callbacks not requested in the new flags will be made, +and a report_quiesce callback will always be made if requested. It +fails with -EALREADY if you try to clear UTRACE_EVENT(DEATH) when the +report_death callback may already have begun, if you try to clear +UTRACE_EVENT(REAP) when the report_reap callback may already have +begun, if you try to newly set UTRACE_ACTION_NOREAP when the target +may already have sent its parent SIGCHLD, or if you try to newly set +UTRACE_EVENT(DEATH), UTRACE_EVENT(QUIESCE), or UTRACE_ACTION_QUIESCE, +when the target is already dead or dying. Like utrace_detach, it +returns -ESRCH when the thread has already been detached (including +forcible detach on reaping). This lets the tracing engine know for +sure which event callbacks it will or won't see after utrace_set_flags +has returned. By checking for errors, it can know whether to clean up +its data structures immediately or to let its callbacks do the work. Index: b/Documentation/DocBook/Makefile =================================================================== --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -9,7 +9,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ procfs-guide.xml writing_usb_driver.xml \ - kernel-api.xml filesystems.xml lsm.xml usb.xml \ + kernel-api.xml filesystems.xml lsm.xml utrace.xml usb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml Index: b/Documentation/DocBook/utrace.tmpl =================================================================== --- /dev/null +++ b/Documentation/DocBook/utrace.tmpl @@ -0,0 +1,23 @@ + + + + + + The utrace User Debugging Infrastructure + + + + +The utrace core API +!Iinclude/linux/utrace.h +!Ekernel/utrace.c + + +Machine state access via utrace +!Finclude/linux/tracehook.h struct utrace_regset +!Finclude/linux/tracehook.h struct utrace_regset_view +!Finclude/linux/tracehook.h utrace_native_view + + + Index: b/include/linux/tracehook.h =================================================================== --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -28,6 +28,7 @@ #include #include +#include struct linux_binprm; struct pt_regs; @@ -342,6 +343,7 @@ utrace_regset_copyin_ignore(unsigned int */ static inline void tracehook_init_task(struct task_struct *child) { + utrace_init_task(child); } /* @@ -350,6 +352,9 @@ static inline void tracehook_init_task(s */ static inline void tracehook_release_task(struct task_struct *p) { + smp_mb(); + if (tsk_utrace_struct(p) != NULL) + utrace_release_task(p); } /* @@ -360,7 +365,20 @@ static inline void tracehook_release_tas */ static inline int tracehook_check_released(struct task_struct *p) { - return 0; + int bad = 0; + BUG_ON(p->exit_state != EXIT_DEAD); + if (unlikely(tsk_utrace_struct(p) != NULL)) { + /* + * In a race condition, utrace_attach will temporarily set + * it, but then check p->exit_state and clear it. It does + * all this under task_lock, so we take the lock to check + * that there is really a bug and not just that known race. + */ + task_lock(p); + bad = unlikely(tsk_utrace_struct(p) != NULL); + task_unlock(p); + } + return bad; } /* @@ -371,7 +389,7 @@ static inline int tracehook_check_releas static inline int tracehook_notify_cldstop(struct task_struct *tsk, const siginfo_t *info) { - return 0; + return (tsk_utrace_flags(tsk) & UTRACE_ACTION_NOREAP); } /* @@ -385,7 +403,11 @@ static inline int tracehook_notify_cldst static inline int tracehook_notify_death(struct task_struct *tsk, int *noreap, void **death_cookie) { - *death_cookie = NULL; + *death_cookie = tsk_utrace_struct(tsk); + if (tsk_utrace_flags(tsk) & UTRACE_ACTION_NOREAP) { + *noreap = 1; + return 1; + } *noreap = 0; return 0; } @@ -398,7 +420,8 @@ static inline int tracehook_notify_death static inline int tracehook_consider_fatal_signal(struct task_struct *tsk, int sig) { - return 0; + return (tsk_utrace_flags(tsk) & (UTRACE_EVENT(SIGNAL_TERM) + | UTRACE_EVENT(SIGNAL_CORE))); } /* @@ -411,7 +434,7 @@ static inline int tracehook_consider_ign int sig, void __user *handler) { - return 0; + return (tsk_utrace_flags(tsk) & UTRACE_EVENT(SIGNAL_IGN)); } @@ -422,7 +445,7 @@ static inline int tracehook_consider_ign */ static inline int tracehook_induce_sigpending(struct task_struct *tsk) { - return 0; + return unlikely(tsk_utrace_flags(tsk) & UTRACE_ACTION_QUIESCE); } /* @@ -437,6 +460,8 @@ static inline int tracehook_get_signal(s siginfo_t *info, struct k_sigaction *return_ka) { + if (unlikely(tsk_utrace_flags(tsk))) + return utrace_get_signal(tsk, regs, info, return_ka); return 0; } @@ -449,6 +474,8 @@ static inline int tracehook_get_signal(s */ static inline int tracehook_finish_stop(int last_one) { + if (tsk_utrace_flags(current) & UTRACE_EVENT(JCTL)) + return utrace_report_jctl(CLD_STOPPED); return 0; } @@ -460,7 +487,7 @@ static inline int tracehook_finish_stop( */ static inline int tracehook_inhibit_wait_stopped(struct task_struct *child) { - return 0; + return (tsk_utrace_flags(child) & UTRACE_ACTION_NOREAP); } /* @@ -470,7 +497,7 @@ static inline int tracehook_inhibit_wait */ static inline int tracehook_inhibit_wait_zombie(struct task_struct *child) { - return 0; + return (tsk_utrace_flags(child) & UTRACE_ACTION_NOREAP); } /* @@ -480,7 +507,7 @@ static inline int tracehook_inhibit_wait */ static inline int tracehook_inhibit_wait_continued(struct task_struct *child) { - return 0; + return (tsk_utrace_flags(child) & UTRACE_ACTION_NOREAP); } @@ -490,13 +517,9 @@ static inline int tracehook_inhibit_wait */ static inline int tracehook_unsafe_exec(struct task_struct *tsk) { + if (tsk_utrace_flags(tsk)) + return utrace_unsafe_exec(tsk); return 0; -// if (p->ptrace & PT_PTRACED) { -// if (p->ptrace & PT_PTRACE_CAP) -// unsafe |= LSM_UNSAFE_PTRACE_CAP; -// else -// unsafe |= LSM_UNSAFE_PTRACE; -// } } /* @@ -511,6 +534,8 @@ static inline int tracehook_unsafe_exec( */ static inline struct task_struct *tracehook_tracer_task(struct task_struct *p) { + if (tsk_utrace_flags(p)) + return utrace_tracer_task(p); return NULL; } @@ -522,6 +547,8 @@ static inline int tracehook_allow_access { if (tsk == current) return 1; + if (tsk_utrace_flags(tsk)) + return utrace_allow_access_process_vm(tsk); return 0; } @@ -533,7 +560,7 @@ static inline int tracehook_allow_access */ static inline int tracehook_expect_breakpoints(struct task_struct *tsk) { - return 0; + return (tsk_utrace_flags(tsk) & UTRACE_EVENT(SIGNAL_CORE)); } @@ -556,6 +583,10 @@ static inline int tracehook_expect_break static inline void tracehook_report_death(struct task_struct *tsk, int exit_state, void *death_cookie) { + smp_mb(); + if (tsk_utrace_flags(tsk) & (UTRACE_EVENT(DEATH) + | UTRACE_EVENT(QUIESCE))) + utrace_report_death(tsk, death_cookie); } /* @@ -565,14 +596,18 @@ static inline void tracehook_report_deat */ static inline void tracehook_report_delayed_group_leader(struct task_struct *p) { + utrace_report_delayed_group_leader(p); } /* - * exec completed + * exec completed, we are shortly going to return to user mode. + * The freshly initialized register state can be seen and changed here. */ static inline void tracehook_report_exec(struct linux_binprm *bprm, struct pt_regs *regs) { + if (tsk_utrace_flags(current) & UTRACE_EVENT(EXEC)) + utrace_report_exec(bprm, regs); } /* @@ -581,6 +616,8 @@ static inline void tracehook_report_exec */ static inline void tracehook_report_exit(long *exit_code) { + if (tsk_utrace_flags(current) & UTRACE_EVENT(EXIT)) + utrace_report_exit(exit_code); } /* @@ -595,6 +632,8 @@ static inline void tracehook_report_exit static inline void tracehook_report_clone(unsigned long clone_flags, struct task_struct *child) { + if (tsk_utrace_flags(current) & UTRACE_EVENT(CLONE)) + utrace_report_clone(clone_flags, child); } /* @@ -608,6 +647,8 @@ static inline void tracehook_report_clon pid_t pid, struct task_struct *child) { + if (tsk_utrace_flags(current) & UTRACE_ACTION_QUIESCE) + utrace_quiescent(current, NULL); } /* @@ -619,6 +660,8 @@ static inline void tracehook_report_clon static inline void tracehook_report_vfork_done(struct task_struct *child, pid_t child_pid) { + if (tsk_utrace_flags(current) & UTRACE_EVENT(VFORK_DONE)) + utrace_report_vfork_done(child_pid); } /* @@ -626,6 +669,9 @@ static inline void tracehook_report_vfor */ static inline void tracehook_report_syscall(struct pt_regs *regs, int is_exit) { + if (tsk_utrace_flags(current) & (is_exit ? UTRACE_EVENT(SYSCALL_EXIT) + : UTRACE_EVENT(SYSCALL_ENTRY))) + utrace_report_syscall(regs, is_exit); } /* @@ -645,6 +691,11 @@ static inline void tracehook_report_hand const sigset_t *oldset, struct pt_regs *regs) { + struct task_struct *tsk = current; + if ((tsk_utrace_flags(tsk) & UTRACE_EVENT_SIGNAL_ALL) + && (tsk_utrace_flags(tsk) & (UTRACE_ACTION_SINGLESTEP + | UTRACE_ACTION_BLOCKSTEP))) + utrace_signal_handler_singlestep(tsk, regs); } Index: b/include/linux/utrace.h =================================================================== --- /dev/null +++ b/include/linux/utrace.h @@ -0,0 +1,544 @@ +/* + * utrace infrastructure interface for debugging user processes + * + * Copyright (C) 2006, 2007 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * Red Hat Author: Roland McGrath. + * + * This interface allows for notification of interesting events in a thread. + * It also mediates access to thread state such as registers. + * Multiple unrelated users can be associated with a single thread. + * We call each of these a tracing engine. + * + * A tracing engine starts by calling utrace_attach() on the chosen thread, + * passing in a set of hooks (&struct utrace_engine_ops), and some + * associated data. This produces a &struct utrace_attached_engine, which + * is the handle used for all other operations. An attached engine has its + * ops vector, its data, and a flags word controlled by utrace_set_flags(). + * + * Each engine's flags word contains two kinds of flags: events of + * interest, and action state flags. + * + * For each event flag that is set, that engine will get the + * appropriate ops->report_*() callback when the event occurs. The + * &struct utrace_engine_ops need not provide callbacks for an event + * unless the engine sets one of the associated event flags. + * + * Action state flags change the normal behavior of the thread. + * These bits are in %UTRACE_ACTION_STATE_MASK; these can be OR'd into + * flags set with utrace_set_flags(). Also, every callback that return + * an action value can reset these bits for the engine (see below). + * + * The bits %UTRACE_ACTION_STATE_MASK of all attached engines are OR'd + * together, so each action is in force as long as any engine requests it. + * As long as some engine sets the %UTRACE_ACTION_QUIESCE flag, the thread + * will block and not resume running user code. When the last engine + * clears its %UTRACE_ACTION_QUIESCE flag, the thread will resume running. + */ + +#ifndef _LINUX_UTRACE_H +#define _LINUX_UTRACE_H 1 + +#include +#include +#include +#include + +struct linux_binprm; +struct pt_regs; +struct utrace; +struct utrace_signal; +struct utrace_regset; +struct utrace_regset_view; + + +/* + * Flags in &struct task_struct.utrace_flags and + * &struct utrace_attached_engine.flags. + * Low four bits are %UTRACE_ACTION_STATE_MASK bits (below). + * Higher bits are events of interest. + */ +#define UTRACE_FIRST_EVENT 4 +#define UTRACE_EVENT_BITS (BITS_PER_LONG - UTRACE_FIRST_EVENT) +#define UTRACE_EVENT_MASK (-1UL &~ UTRACE_ACTION_STATE_MASK) + +enum utrace_events { + _UTRACE_EVENT_QUIESCE, /* Tracing requests stop. */ + _UTRACE_EVENT_REAP, /* Zombie reaped, no more tracing possible. */ + _UTRACE_EVENT_CLONE, /* Successful clone/fork/vfork just done. */ + _UTRACE_EVENT_VFORK_DONE, /* vfork woke from waiting for child. */ + _UTRACE_EVENT_EXEC, /* Successful execve just completed. */ + _UTRACE_EVENT_EXIT, /* Thread exit in progress. */ + _UTRACE_EVENT_DEATH, /* Thread has died. */ + _UTRACE_EVENT_SYSCALL_ENTRY, /* User entered kernel for system call. */ + _UTRACE_EVENT_SYSCALL_EXIT, /* Returning to user after system call. */ + _UTRACE_EVENT_SIGNAL, /* Signal delivery will run a user handler. */ + _UTRACE_EVENT_SIGNAL_IGN, /* No-op signal to be delivered. */ + _UTRACE_EVENT_SIGNAL_STOP, /* Signal delivery will suspend. */ + _UTRACE_EVENT_SIGNAL_TERM, /* Signal delivery will terminate. */ + _UTRACE_EVENT_SIGNAL_CORE, /* Signal delivery will dump core. */ + _UTRACE_EVENT_JCTL, /* Job control stop or continue completed. */ + _UTRACE_NEVENTS +}; +#define UTRACE_EVENT_BIT(type) (UTRACE_FIRST_EVENT + _UTRACE_EVENT_##type) +#define UTRACE_EVENT(type) (1UL << UTRACE_EVENT_BIT(type)) + +/* + * All the kinds of signal events. These all use the report_signal callback. + */ +#define UTRACE_EVENT_SIGNAL_ALL (UTRACE_EVENT(SIGNAL) \ + | UTRACE_EVENT(SIGNAL_IGN) \ + | UTRACE_EVENT(SIGNAL_STOP) \ + | UTRACE_EVENT(SIGNAL_TERM) \ + | UTRACE_EVENT(SIGNAL_CORE)) +/* + * Both kinds of syscall events; these call the report_syscall_entry and + * report_syscall_exit callbacks, respectively. + */ +#define UTRACE_EVENT_SYSCALL \ + (UTRACE_EVENT(SYSCALL_ENTRY) | UTRACE_EVENT(SYSCALL_EXIT)) + + +/* + * Action flags, in return value of callbacks. + * + * %UTRACE_ACTION_RESUME (zero) is the return value to do nothing special. + * For each particular callback, some bits in %UTRACE_ACTION_OP_MASK can + * be set in the return value to change the thread's behavior (see below). + * + * If %UTRACE_ACTION_NEWSTATE is set, then the %UTRACE_ACTION_STATE_MASK + * bits in the return value replace the engine's flags as in utrace_set_flags + * (but the event flags remained unchanged). + * + * If %UTRACE_ACTION_HIDE is set, then the callbacks to other engines + * should be suppressed for this event. This is appropriate only when + * the event was artificially provoked by something this engine did, + * such as setting a breakpoint. + * + * If %UTRACE_ACTION_DETACH is set, this engine is detached as by + * utrace_detach(). The action bits in %UTRACE_ACTION_OP_MASK work as + * normal, but the engine's %UTRACE_ACTION_STATE_MASK bits will no longer + * affect the thread. + */ +#define UTRACE_ACTION_RESUME 0x0000 /* Continue normally after event. */ +#define UTRACE_ACTION_HIDE 0x0010 /* Hide event from other tracing. */ +#define UTRACE_ACTION_DETACH 0x0020 /* Detach me, state flags ignored. */ +#define UTRACE_ACTION_NEWSTATE 0x0040 /* Replace state bits. */ + +/* + * These flags affect the state of the thread until they are changed via + * utrace_set_flags() or by the next callback to the same engine that uses + * %UTRACE_ACTION_NEWSTATE. + */ +#define UTRACE_ACTION_QUIESCE 0x0001 /* Stay quiescent after callbacks. */ +#define UTRACE_ACTION_SINGLESTEP 0x0002 /* Resume for one instruction. */ +#define UTRACE_ACTION_BLOCKSTEP 0x0004 /* Resume until next branch. */ +#define UTRACE_ACTION_NOREAP 0x0008 /* Inhibit parent SIGCHLD and wait. */ +#define UTRACE_ACTION_STATE_MASK 0x000f /* Lasting state bits. */ + +/* + * These flags have meanings specific to the particular event report hook. + */ +#define UTRACE_ACTION_OP_MASK 0xff00 + +/* + * Action flags in return value and argument of report_signal() callback. + */ +#define UTRACE_SIGNAL_DELIVER 0x0100 /* Deliver according to sigaction. */ +#define UTRACE_SIGNAL_IGN 0x0200 /* Ignore the signal. */ +#define UTRACE_SIGNAL_TERM 0x0300 /* Terminate the process. */ +#define UTRACE_SIGNAL_CORE 0x0400 /* Terminate with core dump. */ +#define UTRACE_SIGNAL_STOP 0x0500 /* Deliver as absolute stop. */ +#define UTRACE_SIGNAL_TSTP 0x0600 /* Deliver as job control stop. */ +#define UTRACE_SIGNAL_HOLD 0x1000 /* Flag, push signal back on queue. */ +/* + * This value is passed to a report_signal() callback after a signal + * handler is entered while %UTRACE_ACTION_SINGLESTEP is in force. + * For this callback, no signal will never actually be delivered regardless + * of the return value, and the other callback parameters are null. + */ +#define UTRACE_SIGNAL_HANDLER 0x0700 + +/* + * Action flag in return value of report_jctl(). + */ +#define UTRACE_JCTL_NOSIGCHLD 0x0100 /* Do not notify the parent. */ + + +/* + * Flags for utrace_attach(). + */ +#define UTRACE_ATTACH_CREATE 0x0010 /* Attach a new engine. */ +#define UTRACE_ATTACH_EXCLUSIVE 0x0020 /* Refuse if existing match. */ +#define UTRACE_ATTACH_MATCH_OPS 0x0001 /* Match engines on ops. */ +#define UTRACE_ATTACH_MATCH_DATA 0x0002 /* Match engines on data. */ +#define UTRACE_ATTACH_MATCH_MASK 0x000f + + +#ifdef CONFIG_UTRACE +/** + * struct utrace_attached_engine - Per-engine per-thread structure. + * @ops: &struct utrace_engine_ops pointer passed to utrace_attach() + * @data: engine-private void * passed to utrace_attach() + * @flags: current flags set by utrace_set_flags() + * + * The task itself never has to worry about engines detaching while + * it's doing event callbacks. These structures are freed only when + * the task is quiescent. For other parties, the list is protected + * by RCU and utrace->lock. + */ +struct utrace_attached_engine +{ +/* private: */ + struct list_head entry; /* Entry on thread's utrace.engines list. */ + struct rcu_head rhead; + atomic_t check_dead; + +/* public: */ + const struct utrace_engine_ops *ops; + void *data; + + unsigned long flags; +}; + + +struct utrace_engine_ops +{ + /* + * Event reporting hooks. + * + * Return values contain %UTRACE_ACTION_* flag bits. + * The %UTRACE_ACTION_OP_MASK bits are specific to each kind of event. + * + * All report_*() hooks are called with no locks held, in a generally + * safe environment when we will be returning to user mode soon. + * It is fine to block for memory allocation and the like, but all + * hooks are *asynchronous* and must not block on external events. + * If you want the thread to block, request %UTRACE_ACTION_QUIESCE in + * your hook; then later wake it up with utrace_set_flags(). + */ + + /* + * Event reported for parent, before child might run. + * The %PF_STARTING flag prevents other engines from attaching + * before this one has its chance. + */ + u32 (*report_clone)(struct utrace_attached_engine *engine, + struct task_struct *parent, + unsigned long clone_flags, + struct task_struct *child); + + /* + * Event reported for parent using %CLONE_VFORK or vfork() system call. + * The child has died or exec'd, so the vfork parent has unblocked + * and is about to return @child_pid. + */ + u32 (*report_vfork_done)(struct utrace_attached_engine *engine, + struct task_struct *parent, pid_t child_pid); + + /* + * Event reported after %UTRACE_ACTION_QUIESCE is set, when the target + * thread is quiescent. Either it's the current thread, or it's in + * %TASK_TRACED or %TASK_STOPPED and will not resume running until the + * %UTRACE_ACTION_QUIESCE flag is no longer asserted by any engine. + */ + u32 (*report_quiesce)(struct utrace_attached_engine *engine, + struct task_struct *tsk); + + /* + * Thread dequeuing a signal to be delivered. + * The @action and @return_ka values say what %UTRACE_ACTION_RESUME + * will do (possibly already influenced by another tracing engine). + * An %UTRACE_SIGNAL_* return value overrides the signal disposition. + * The @info data (including @info->si_signo) can be changed at will. + * Changing @return_ka affects the sigaction that will be used. + * The @orig_ka value is the one in force before other tracing + * engines intervened. + */ + u32 (*report_signal)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs, + u32 action, siginfo_t *info, + const struct k_sigaction *orig_ka, + struct k_sigaction *return_ka); + + /* + * Job control event completing, about to send %SIGCHLD to parent + * with %CLD_STOPPED or %CLD_CONTINUED as given in type. + * %UTRACE_JOBSTOP_NOSIGCHLD in the return value inhibits that. + */ + u32 (*report_jctl)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + int type); + + /* + * Thread has just completed an exec. + * The initial user register state is handy to be tweaked directly. + */ + u32 (*report_exec)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + const struct linux_binprm *bprm, + struct pt_regs *regs); + + /* + * Thread has entered the kernel to request a system call. + * The user register state is handy to be tweaked directly. + */ + u32 (*report_syscall_entry)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs); + + /* + * Thread is about to leave the kernel after a system call request. + * The user register state is handy to be tweaked directly. + */ + u32 (*report_syscall_exit)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs); + + /* + * Thread is exiting and cannot be prevented from doing so, + * but all its state is still live. The @code value will be + * the wait result seen by the parent, and can be changed by + * this engine or others. The @orig_code value is the real + * status, not changed by any tracing engine. + */ + u32 (*report_exit)(struct utrace_attached_engine *engine, + struct task_struct *tsk, + long orig_code, long *code); + + /* + * Thread is really dead now. If %UTRACE_ACTION_NOREAP is in force, + * it remains an unreported zombie. Otherwise, it might be reaped + * by its parent, or self-reap immediately. Though the actual + * reaping may happen in parallel, a report_reap() callback will + * always be ordered after a report_death() callback. + * + * If %UTRACE_ACTION_NOREAP is in force and this was a group_leader + * dying with threads still in the group (delayed_group_leader()), + * then there can be a second report_death() callback later when + * the group_leader is no longer delayed. This second callback can + * be made from another thread's context, but it will always be + * serialized after the first report_death() callback and before + * the report_reap() callback. It's possible that + * delayed_group_leader() will already be true by the time it can + * be checked inside the first report_death callback made at the + * time of death, but that a second callback will be made almost + * immediately thereafter. + */ + u32 (*report_death)(struct utrace_attached_engine *engine, + struct task_struct *tsk); + + /* + * Called when someone reaps the dead task (parent, init, or self). + * No more callbacks are made after this one. + * The engine is always detached. + * There is nothing more a tracing engine can do about this thread. + */ + void (*report_reap)(struct utrace_attached_engine *engine, + struct task_struct *tsk); + + /* + * Miscellaneous hooks. These are not associated with event reports. + * Any of these may be null if the engine has nothing to say. + * These hooks are called in more constrained environments and should + * not block or do very much. + */ + + /* + * Return nonzero iff the @caller task should be allowed to access + * the memory of the target task via /proc/PID/mem and so forth, + * by dint of this engine's attachment to the target. + */ + int (*allow_access_process_vm)(struct utrace_attached_engine *engine, + struct task_struct *target, + struct task_struct *caller); + + /* + * Return %LSM_UNSAFE_* bits that apply to the exec in progress + * due to tracing done by this engine. These bits indicate that + * someone is able to examine the process and so a set-UID or similar + * privilege escalation may not be safe to permit. + * + * Called with task_lock() held. + */ + int (*unsafe_exec)(struct utrace_attached_engine *engine, + struct task_struct *target); + + /* + * Return the &struct task_struct for the task using ptrace on this + * one, or %NULL. Always called with rcu_read_lock() held to keep the + * returned struct alive. + * + * At exec time, this may be called with task_lock(target) still + * held from when unsafe_exec() was just called. In that case it + * must give results consistent with those unsafe_exec() results, + * i.e. non-%NULL if any %LSM_UNSAFE_PTRACE_* bits were set. + * + * The value is also used to display after "TracerPid:" in + * /proc/PID/status, where it is called with only rcu_read_lock held. + * + * If this engine returns %NULL, another engine may supply the result. + */ + struct task_struct *(*tracer_task)(struct utrace_attached_engine *, + struct task_struct *target); +}; + + +/* + * These are the exported entry points for tracing engines to use. + */ +struct utrace_attached_engine *utrace_attach(struct task_struct *target, + int flags, + const struct utrace_engine_ops *, + void *data); +int utrace_detach(struct task_struct *target, + struct utrace_attached_engine *engine); +int utrace_set_flags(struct task_struct *target, + struct utrace_attached_engine *engine, + unsigned long flags); +int utrace_inject_signal(struct task_struct *target, + struct utrace_attached_engine *engine, + u32 action, siginfo_t *info, + const struct k_sigaction *ka); +const struct utrace_regset *utrace_regset(struct task_struct *target, + struct utrace_attached_engine *, + const struct utrace_regset_view *, + int which); + + +/* + * Hooks in call these entry points to the utrace dispatch. + */ +int utrace_quiescent(struct task_struct *, struct utrace_signal *); +void utrace_release_task(struct task_struct *); +int utrace_get_signal(struct task_struct *, struct pt_regs *, + siginfo_t *, struct k_sigaction *); +void utrace_report_clone(unsigned long clone_flags, struct task_struct *child); +void utrace_report_vfork_done(pid_t child_pid); +void utrace_report_exit(long *exit_code); +void utrace_report_death(struct task_struct *, struct utrace *); +void utrace_report_delayed_group_leader(struct task_struct *); +int utrace_report_jctl(int type); +void utrace_report_exec(struct linux_binprm *bprm, struct pt_regs *regs); +void utrace_report_syscall(struct pt_regs *regs, int is_exit); +struct task_struct *utrace_tracer_task(struct task_struct *); +int utrace_allow_access_process_vm(struct task_struct *); +int utrace_unsafe_exec(struct task_struct *); +void utrace_signal_handler_singlestep(struct task_struct *, struct pt_regs *); + +/* + * uses these accessors to avoid #ifdef CONFIG_UTRACE. + */ +static inline unsigned long tsk_utrace_flags(struct task_struct *tsk) +{ + return tsk->utrace_flags; +} +static inline struct utrace *tsk_utrace_struct(struct task_struct *tsk) +{ + return tsk->utrace; +} +static inline void utrace_init_task(struct task_struct *child) +{ + child->utrace_flags = 0; + child->utrace = NULL; +} + +#else /* !CONFIG_UTRACE */ + +static unsigned long tsk_utrace_flags(struct task_struct *tsk) +{ + return 0; +} +static struct utrace *tsk_utrace_struct(struct task_struct *tsk) +{ + return NULL; +} +static inline void utrace_init_task(struct task_struct *child) +{ +} + +/* + * The calls to these should all be in if (0) and optimized out entirely. + * We have stubs here only so tracehook.h doesn't need to #ifdef them + * to avoid external references in case of unoptimized compilation. + */ +static inline int utrace_quiescent(struct task_struct *tsk, void *ignored) +{ + BUG(); + return 0; +} +static inline void utrace_release_task(struct task_struct *tsk) +{ + BUG(); +} +static inline int utrace_get_signal(struct task_struct *tsk, + struct pt_regs *regs, + siginfo_t *info, struct k_sigaction *ka) +{ + BUG(); + return 0; +} +static inline void utrace_report_clone(unsigned long clone_flags, + struct task_struct *child) +{ + BUG(); +} +static inline void utrace_report_vfork_done(pid_t child_pid) +{ + BUG(); +} +static inline void utrace_report_exit(long *exit_code) +{ + BUG(); +} +static inline void utrace_report_death(struct task_struct *tsk, void *ignored) +{ + BUG(); +} +static inline void utrace_report_delayed_group_leader(struct task_struct *tsk) +{ + BUG(); +} +static inline int utrace_report_jctl(int type) +{ + BUG(); + return 0; +} +static inline void utrace_report_exec(struct linux_binprm *bprm, + struct pt_regs *regs) +{ + BUG(); +} +static inline void utrace_report_syscall(struct pt_regs *regs, int is_exit) +{ + BUG(); +} +static inline struct task_struct *utrace_tracer_task(struct task_struct *tsk) +{ + BUG(); + return NULL; +} +static inline int utrace_allow_access_process_vm(struct task_struct *tsk) +{ + BUG(); + return 0; +} +static inline int utrace_unsafe_exec(struct task_struct *tsk) +{ + BUG(); + return 0; +} +static inline void utrace_signal_handler_singlestep(struct task_struct *tsk, + struct pt_regs *regs) +{ + BUG(); +} + +#endif /* CONFIG_UTRACE */ + +#endif /* linux/utrace.h */ Index: b/include/linux/sched.h =================================================================== --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -942,6 +942,11 @@ struct task_struct { struct audit_context *audit_context; seccomp_t seccomp; +#ifdef CONFIG_UTRACE + struct utrace *utrace; + unsigned long utrace_flags; +#endif + /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; Index: b/init/Kconfig =================================================================== --- a/init/Kconfig +++ b/init/Kconfig @@ -569,6 +569,24 @@ config STOP_MACHINE Need stop_machine() primitive. endmenu +menu "Process debugging support" + +config UTRACE + bool "Infrastructure for tracing and debugging user processes" + default y + depends on MODULES + help + Enable the utrace process tracing interface. + This is an internal kernel interface to track events in user + threads, extract and change user thread state. This interface + is exported to kernel modules, and is also used to implement ptrace. + If you disable this, no facilities for debugging user processes + will be available, nor the facilities used by UML and other + applications. Unless you are making a specially stripped-down + kernel and are very sure you don't need these facilitiies, + say Y. +endmenu + menu "Block layer" source "block/Kconfig" endmenu