[PATCH 3] utrace core

This adds the utrace facility, a new modular interface in the kernel for
implementing user thread tracing and debugging.  This fits on top of the
tracehook_* layer, so the new code is well-isolated.

The new interface is in <linux/utrace.h>, and Documentation/utrace.txt
describes it.  It allows for multiple separate tracing engines to work in
parallel without interfering with each other.  Higher-level tracing
facilities can be implemented as loadable kernel modules using this layer.

The new facility is made optional under CONFIG_UTRACE.
Normal configurations will always want to enable it.
It's optional to emphasize the clean separation of the code,
and in case some stripped-down embedded configurations might want to
omit it to save space (when ptrace and the like can never be used).

Signed-off-by: Roland McGrath <roland@redhat.com>

---

 Documentation/DocBook/Makefile    |    2 
 Documentation/DocBook/utrace.tmpl |   23 
 Documentation/utrace.txt          |  579 +++++++++
 include/linux/sched.h             |    5 
 include/linux/tracehook.h         |   85 +
 include/linux/utrace.h            |  544 +++++++++
 init/Kconfig                      |   18 
 kernel/Makefile                   |    1 
 kernel/utrace.c                   | 2263 ++++++++++++++++++++++++++++++++++++++
 9 files changed, 3502 insertions(+), 18 deletions(-)
 create kernel/utrace.c
 create Documentation/utrace.txt
 create Documentation/DocBook/utrace.tmpl
 create include/linux/utrace.h

Index: b/kernel/Makefile
===================================================================
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
+obj-$(CONFIG_UTRACE) += utrace.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
Index: b/kernel/utrace.c
===================================================================
--- /dev/null
+++ b/kernel/utrace.c
@@ -0,0 +1,2263 @@
+/*
+ * utrace infrastructure interface for debugging user processes
+ *
+ * Copyright (C) 2006, 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * Red Hat Author: Roland McGrath.
+ */
+
+#include <linux/utrace.h>
+#include <linux/tracehook.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/tracehook.h>
+
+
+#define UTRACE_DEBUG 1
+#ifdef UTRACE_DEBUG
+#define CHECK_INIT(p)	atomic_set(&(p)->check_dead, 1)
+#define CHECK_DEAD(p)	BUG_ON(!atomic_dec_and_test(&(p)->check_dead))
+#else
+#define CHECK_INIT(p)	do { } while (0)
+#define CHECK_DEAD(p)	do { } while (0)
+#endif
+
+/*
+ * Per-thread structure task_struct.utrace points to.
+ *
+ * The task itself never has to worry about this going away after
+ * some event is found set in task_struct.utrace_flags.
+ * Once created, this pointer is changed only when the task is quiescent
+ * (TASK_TRACED or TASK_STOPPED with the siglock held, or dead).
+ *
+ * For other parties, the pointer to this is protected by RCU and
+ * task_lock.  Since call_rcu is never used while the thread is alive and
+ * using this struct utrace, we can overlay the RCU data structure used
+ * only for a dead struct with some local state used only for a live utrace
+ * on an active thread.
+ */
+struct utrace
+{
+	union {
+		struct rcu_head dead;
+		struct {
+			struct task_struct *cloning;
+			struct utrace_signal *signal;
+		} live;
+		struct {
+			unsigned long flags;
+		} exit;
+	} u;
+
+	struct list_head engines;
+	spinlock_t lock;
+#ifdef UTRACE_DEBUG
+	atomic_t check_dead;
+#endif
+};
+
+static struct kmem_cache *utrace_cachep;
+static struct kmem_cache *utrace_engine_cachep;
+
+static int __init
+utrace_init(void)
+{
+	utrace_cachep =
+		kmem_cache_create("utrace_cache",
+				  sizeof(struct utrace), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+	utrace_engine_cachep =
+		kmem_cache_create("utrace_engine_cache",
+				  sizeof(struct utrace_attached_engine), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+	return 0;
+}
+subsys_initcall(utrace_init);
+
+
+/*
+ * Make sure target->utrace is allocated, and return with it locked on
+ * success.  This function mediates startup races.  The creating parent
+ * task has priority, and other callers will delay here to let its call
+ * succeed and take the new utrace lock first.
+ */
+static struct utrace *
+utrace_first_engine(struct task_struct *target,
+		    struct utrace_attached_engine *engine)
+	__acquires(utrace->lock)
+{
+	struct utrace *utrace;
+
+	/*
+	 * If this is a newborn thread and we are not the creator,
+	 * we have to wait for it.  The creator gets the first chance
+	 * to attach.  The PF_STARTING flag is cleared after its
+	 * report_clone hook has had a chance to run.
+	 */
+	if (target->flags & PF_STARTING) {
+		utrace = current->utrace;
+		if (utrace == NULL || utrace->u.live.cloning != target) {
+			yield();
+			return (signal_pending(current)
+				? ERR_PTR(-ERESTARTNOINTR) : NULL);
+		}
+	}
+
+	utrace = kmem_cache_alloc(utrace_cachep, GFP_KERNEL);
+	if (unlikely(utrace == NULL))
+		return ERR_PTR(-ENOMEM);
+
+	utrace->u.live.cloning = NULL;
+	utrace->u.live.signal = NULL;
+	INIT_LIST_HEAD(&utrace->engines);
+	list_add(&engine->entry, &utrace->engines);
+	spin_lock_init(&utrace->lock);
+	CHECK_INIT(utrace);
+
+	spin_lock(&utrace->lock);
+	task_lock(target);
+	if (likely(target->utrace == NULL)) {
+		rcu_assign_pointer(target->utrace, utrace);
+
+		/*
+		 * The task_lock protects us against another thread doing
+		 * the same thing.  We might still be racing against
+		 * tracehook_release_task.  It's called with ->exit_state
+		 * set to EXIT_DEAD and then checks ->utrace with an
+		 * smp_mb() in between.  If EXIT_DEAD is set, then
+		 * release_task might have checked ->utrace already and saw
+		 * it NULL; we can't attach.  If we see EXIT_DEAD not yet
+		 * set after our barrier, then we know release_task will
+		 * see our target->utrace pointer.
+		 */
+		smp_mb();
+		if (likely(target->exit_state != EXIT_DEAD)) {
+			task_unlock(target);
+			return utrace;
+		}
+
+		/*
+		 * The target has already been through release_task.
+		 * Our caller will restart and notice it's too late now.
+		 */
+		target->utrace = NULL;
+	}
+
+	/*
+	 * Another engine attached first, so there is a struct already.
+	 * A null return says to restart looking for the existing one.
+	 */
+	task_unlock(target);
+	spin_unlock(&utrace->lock);
+	kmem_cache_free(utrace_cachep, utrace);
+
+	return NULL;
+}
+
+static void
+utrace_free(struct rcu_head *rhead)
+{
+	struct utrace *utrace = container_of(rhead, struct utrace, u.dead);
+	kmem_cache_free(utrace_cachep, utrace);
+}
+
+/*
+ * Called with utrace locked.  Clean it up and free it via RCU.
+ */
+static void
+rcu_utrace_free(struct utrace *utrace)
+	__releases(utrace->lock)
+{
+	CHECK_DEAD(utrace);
+	spin_unlock(&utrace->lock);
+	INIT_RCU_HEAD(&utrace->u.dead);
+	call_rcu(&utrace->u.dead, utrace_free);
+}
+
+static void
+utrace_engine_free(struct rcu_head *rhead)
+{
+	struct utrace_attached_engine *engine =
+		container_of(rhead, struct utrace_attached_engine, rhead);
+	kmem_cache_free(utrace_engine_cachep, engine);
+}
+
+static inline void
+rcu_engine_free(struct utrace_attached_engine *engine)
+{
+	CHECK_DEAD(engine);
+	call_rcu(&engine->rhead, utrace_engine_free);
+}
+
+
+/*
+ * Remove the utrace pointer from the task, unless there is a pending
+ * forced signal (or it's quiescent in utrace_get_signal).  We know it's
+ * quiescent now, and so are guaranteed it will have to take utrace->lock
+ * before it can set ->exit_state if it's not set now.
+ */
+static inline void
+utrace_clear_tsk(struct task_struct *tsk, struct utrace *utrace)
+{
+	if (tsk->exit_state || utrace->u.live.signal == NULL) {
+		task_lock(tsk);
+		if (likely(tsk->utrace != NULL)) {
+			rcu_assign_pointer(tsk->utrace, NULL);
+			tsk->utrace_flags &= UTRACE_ACTION_NOREAP;
+		}
+		task_unlock(tsk);
+	}
+}
+
+/*
+ * Called with utrace locked and the target quiescent (maybe current).
+ * If this was the last engine and there is no parting forced signal
+ * pending, utrace is left locked and not freed, but is removed from the task.
+ */
+static void
+remove_engine(struct utrace_attached_engine *engine,
+	      struct task_struct *tsk, struct utrace *utrace)
+{
+	list_del_rcu(&engine->entry);
+	if (list_empty(&utrace->engines))
+		utrace_clear_tsk(tsk, utrace);
+	rcu_engine_free(engine);
+}
+
+
+#define DEATH_EVENTS (UTRACE_EVENT(DEATH) | UTRACE_EVENT(QUIESCE))
+
+/*
+ * Called with utrace locked, after remove_engine may have run.
+ * Passed the flags from all remaining engines, i.e. zero if none
+ * left.  Install the flags in tsk->utrace_flags and return with
+ * utrace unlocked.  If no engines are left and there is no parting
+ * forced signal pending, utrace is freed.
+ */
+static void
+check_dead_utrace(struct task_struct *tsk, struct utrace *utrace,
+		  unsigned long flags)
+	__releases(utrace->lock)
+{
+	long exit_state = 0;
+
+	if (!tsk->exit_state && utrace->u.live.signal != NULL)
+		/*
+		 * There is a pending forced signal.  It may have been
+		 * left by an engine now detached.  The empty utrace
+		 * remains attached until it can be processed.
+		 */
+		flags |= UTRACE_ACTION_QUIESCE;
+
+	/*
+	 * If tracing was preventing a SIGCHLD or self-reaping
+	 * and is no longer, we'll do that report or reaping now.
+	 */
+	if (((tsk->utrace_flags &~ flags) & UTRACE_ACTION_NOREAP)
+	    && tsk->exit_state) {
+		/*
+		 * While holding the utrace lock, mark that it's been done.
+		 * For self-reaping, we need to change tsk->exit_state
+		 * before clearing tsk->utrace_flags, so that the real
+		 * parent can't see it in EXIT_ZOMBIE momentarily and reap
+		 * it.  If tsk was the group_leader, an exec by another
+		 * thread can release_task it despite our NOREAP.  Holding
+		 * tasklist_lock for reading excludes de_thread until we
+		 * decide what to do.
+		 */
+		read_lock(&tasklist_lock);
+		if (tsk->exit_signal == -1) { /* Self-reaping thread.  */
+			exit_state = xchg(&tsk->exit_state, EXIT_DEAD);
+			read_unlock(&tasklist_lock);
+
+			BUG_ON(exit_state != EXIT_ZOMBIE);
+			exit_state = EXIT_DEAD;	/* Reap it below.  */
+
+			/*
+			 * Now that we've changed its state to DEAD,
+			 * it's safe to install the new tsk->utrace_flags
+			 * value without the UTRACE_ACTION_NOREAP bit set.
+			 */
+		}
+		else if (thread_group_empty(tsk)) /* Normal solo zombie.  */
+			/*
+			 * We need to prevent the real parent from reaping
+			 * until after we've called do_notify_parent, below.
+			 * It can get into wait_task_zombie any time after
+			 * the UTRACE_ACTION_NOREAP bit is cleared.  It's
+			 * safe for that to do everything it does until its
+			 * release_task call starts tearing things down.
+			 * Holding tasklist_lock for reading prevents
+			 * release_task from proceeding until we've done
+			 * everything we need to do.
+			 */
+			exit_state = EXIT_ZOMBIE;
+		else
+			/*
+			 * Delayed group leader, nothing to do yet.
+			 * This is also the situation with the old
+			 * group leader in an exec by another thread,
+			 * which will call release_task itself.
+			 */
+			read_unlock(&tasklist_lock);
+	}
+
+	/*
+	 * When it's in TASK_STOPPED state, do not set UTRACE_EVENT(JCTL).
+	 * That bit indicates utrace_report_jctl has not run yet, but it
+	 * may have.  Set UTRACE_ACTION_QUIESCE instead to be sure that
+	 * once it resumes it will recompute its flags in utrace_quiescent.
+	 */
+	if (((flags &~ tsk->utrace_flags) & UTRACE_EVENT(JCTL))
+	    && tsk->state == TASK_STOPPED) {
+		flags &= ~UTRACE_EVENT(JCTL);
+		flags |= UTRACE_ACTION_QUIESCE;
+	}
+
+	tsk->utrace_flags = flags;
+	if (flags)
+		spin_unlock(&utrace->lock);
+	else {
+		BUG_ON(tsk->utrace == utrace);
+		rcu_utrace_free(utrace);
+	}
+
+	/*
+	 * Now we're finished updating the utrace state.
+	 * Do a pending self-reaping or parent notification.
+	 */
+	if (exit_state == EXIT_ZOMBIE) {
+		do_notify_parent(tsk, tsk->exit_signal);
+
+		/*
+		 * If SIGCHLD was ignored, that set tsk->exit_signal = -1
+		 * to tell us to reap it immediately.
+		 */
+		if (tsk->exit_signal == -1) {
+			exit_state = xchg(&tsk->exit_state, EXIT_DEAD);
+			BUG_ON(exit_state != EXIT_ZOMBIE);
+			exit_state = EXIT_DEAD;	/* Reap it below.  */
+		}
+		read_unlock(&tasklist_lock); /* See comment above.  */
+	}
+	if (exit_state == EXIT_DEAD)
+		/*
+		 * Note this can wind up in utrace_reap and do more callbacks.
+		 * Our callers must be in places where that is OK.
+		 */
+		release_task(tsk);
+}
+
+/*
+ * Get the target thread to quiesce.  Return nonzero if it's already quiescent.
+ * Return zero if it will report a QUIESCE event soon.
+ * If interrupt is nonzero, wake it like a signal would so it quiesces ASAP.
+ * If interrupt is zero, just make sure it quiesces before going to user mode.
+ */
+static int
+quiesce(struct task_struct *target, int interrupt)
+{
+	int ret;
+
+	target->utrace_flags |= UTRACE_ACTION_QUIESCE;
+	read_barrier_depends();
+
+	if (target->exit_state)
+		goto dead;
+
+	/*
+	 * First a quick check without the siglock.  If it's in TASK_TRACED
+	 * or TASK_STOPPED already, we know it is going to go through
+	 * utrace_get_signal before it resumes.
+	 */
+	ret = 1;
+	switch (target->state) {
+	case TASK_TRACED:
+		break;
+
+	case TASK_STOPPED:
+		/*
+		 * If it will call utrace_report_jctl but has not gotten
+		 * through it yet, then don't consider it quiescent yet.
+		 * utrace_report_jctl will take target->utrace->lock and
+		 * clear UTRACE_EVENT(JCTL) once it finishes.  After that,
+		 * it is considered quiescent; when it wakes up, it will go
+		 * through utrace_get_signal before doing anything else.
+		 */
+		if (!(target->utrace_flags & UTRACE_EVENT(JCTL)))
+			break;
+
+	default:
+		/*
+		 * Now get the siglock and check again.
+		 */
+		spin_lock_irq(&target->sighand->siglock);
+		if (unlikely(target->exit_state)) {
+			spin_unlock_irq(&target->sighand->siglock);
+			goto dead;
+		}
+		switch (target->state) {
+		case TASK_TRACED:
+			break;
+
+		case TASK_STOPPED:
+			ret = !(target->utrace_flags & UTRACE_EVENT(JCTL));
+			break;
+
+		default:
+			/*
+			 * It is not stopped, so tell it to stop soon.
+			 */
+			ret = 0;
+			if (interrupt)
+				signal_wake_up(target, 0);
+			else {
+				set_tsk_thread_flag(target, TIF_SIGPENDING);
+				kick_process(target);
+			}
+			break;
+		}
+		spin_unlock_irq(&target->sighand->siglock);
+	}
+
+	return ret;
+
+dead:
+	/*
+	 * On the exit path, it's only truly quiescent if it has
+	 * already been through utrace_report_death, or never will.
+	 */
+	return !(target->utrace_flags & DEATH_EVENTS);
+}
+
+
+static struct utrace_attached_engine *
+matching_engine(struct utrace *utrace, int flags,
+		const struct utrace_engine_ops *ops, void *data)
+{
+	struct utrace_attached_engine *engine;
+	list_for_each_entry_rcu(engine, &utrace->engines, entry) {
+		if ((flags & UTRACE_ATTACH_MATCH_OPS)
+		    && engine->ops != ops)
+			continue;
+		if ((flags & UTRACE_ATTACH_MATCH_DATA)
+		    && engine->data != data)
+			continue;
+		return engine;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+
+/**
+ * utrace_attach - Attach new engine to a thread, or look up attached engines.
+ * @target: thread to attach to
+ * @flags: %UTRACE_ATTACH_* flags
+ * @ops: callback table for new engine
+ * @data: engine private data pointer
+ *
+ * The caller must ensure that the @target thread does not get freed,
+ * i.e. hold a ref or be its parent.
+ *
+ * If %UTRACE_ATTACH_CREATE is not specified, you only look up an existing
+ * engine already attached to the thread.  If %UTRACE_ATTACH_MATCH_* bits
+ * are set, only consider matching engines.  If %UTRACE_ATTACH_EXCLUSIVE is
+ * set, attempting to attach a second (matching) engine fails with -%EEXIST.
+ */
+struct utrace_attached_engine *
+utrace_attach(struct task_struct *target, int flags,
+	     const struct utrace_engine_ops *ops, void *data)
+{
+	struct utrace *utrace;
+	struct utrace_attached_engine *engine;
+
+restart:
+	rcu_read_lock();
+	utrace = rcu_dereference(target->utrace);
+	smp_rmb();
+	if (unlikely(target->exit_state == EXIT_DEAD)) {
+		/*
+		 * The target has already been reaped.
+		 * Check this first; a race with reaping may lead to restart.
+		 */
+		rcu_read_unlock();
+		if (!(flags & UTRACE_ATTACH_CREATE))
+			return ERR_PTR(-ENOENT);
+		return ERR_PTR(-ESRCH);
+	}
+
+	if (utrace == NULL) {
+		rcu_read_unlock();
+
+		if (!(flags & UTRACE_ATTACH_CREATE))
+			return ERR_PTR(-ENOENT);
+
+		engine = kmem_cache_alloc(utrace_engine_cachep, GFP_KERNEL);
+		if (unlikely(engine == NULL))
+			return ERR_PTR(-ENOMEM);
+		engine->flags = 0;
+		CHECK_INIT(engine);
+
+		goto first;
+	}
+
+	if (!(flags & UTRACE_ATTACH_CREATE)) {
+		engine = matching_engine(utrace, flags, ops, data);
+		rcu_read_unlock();
+		return engine;
+	}
+	rcu_read_unlock();
+
+	engine = kmem_cache_alloc(utrace_engine_cachep, GFP_KERNEL);
+	if (unlikely(engine == NULL))
+		return ERR_PTR(-ENOMEM);
+	engine->flags = 0;
+	CHECK_INIT(engine);
+
+	rcu_read_lock();
+	utrace = rcu_dereference(target->utrace);
+	if (unlikely(utrace == NULL)) { /* Race with detach.  */
+		rcu_read_unlock();
+		goto first;
+	}
+	spin_lock(&utrace->lock);
+
+	if (flags & UTRACE_ATTACH_EXCLUSIVE) {
+		struct utrace_attached_engine *old;
+		old = matching_engine(utrace, flags, ops, data);
+		if (!IS_ERR(old)) {
+			spin_unlock(&utrace->lock);
+			rcu_read_unlock();
+			kmem_cache_free(utrace_engine_cachep, engine);
+			return ERR_PTR(-EEXIST);
+		}
+	}
+
+	if (unlikely(rcu_dereference(target->utrace) != utrace)) {
+		/*
+		 * We lost a race with other CPUs doing a sequence
+		 * of detach and attach before we got in.
+		 */
+		spin_unlock(&utrace->lock);
+		rcu_read_unlock();
+		kmem_cache_free(utrace_engine_cachep, engine);
+		goto restart;
+	}
+	rcu_read_unlock();
+
+	list_add_tail_rcu(&engine->entry, &utrace->engines);
+	goto finish;
+
+first:
+	utrace = utrace_first_engine(target, engine);
+	if (IS_ERR(utrace) || unlikely(utrace == NULL)) {
+		kmem_cache_free(utrace_engine_cachep, engine);
+		if (unlikely(utrace == NULL)) /* Race condition.  */
+			goto restart;
+		return ERR_PTR(PTR_ERR(utrace));
+	}
+
+finish:
+	engine->ops = ops;
+	engine->data = data;
+
+	spin_unlock(&utrace->lock);
+
+	return engine;
+}
+EXPORT_SYMBOL_GPL(utrace_attach);
+
+/*
+ * When an engine is detached, the target thread may still see it and make
+ * callbacks until it quiesces.  We reset its event flags to just QUIESCE
+ * and install a special ops vector whose callback is dead_engine_delete.
+ * When the target thread quiesces, it can safely free the engine itself.
+ */
+static u32
+dead_engine_delete(struct utrace_attached_engine *engine,
+		   struct task_struct *tsk)
+{
+	return UTRACE_ACTION_DETACH;
+}
+
+static const struct utrace_engine_ops dead_engine_ops =
+{
+	.report_quiesce = &dead_engine_delete
+};
+
+
+/*
+ * Called with utrace locked.  Recompute the union of engines' flags.
+ */
+static inline unsigned long
+rescan_flags(struct utrace *utrace)
+{
+	struct utrace_attached_engine *engine;
+	unsigned long flags = 0;
+	list_for_each_entry(engine, &utrace->engines, entry)
+		flags |= engine->flags | UTRACE_EVENT(REAP);
+	return flags;
+}
+
+/*
+ * Only these flags matter any more for a dead task (exit_state set).
+ * We use this mask on flags installed in ->utrace_flags after
+ * exit_notify (and possibly utrace_report_death) has run.
+ * This ensures that utrace_release_task knows positively that
+ * utrace_report_death will not run later.
+ */
+#define DEAD_FLAGS_MASK	(UTRACE_EVENT(REAP) | UTRACE_ACTION_NOREAP)
+
+/*
+ * Flags bits in utrace->u.exit.flags word.  These are private
+ * communication among utrace_report_death, utrace_release_task,
+ * utrace_detach, and utrace_set_flags.
+ */
+#define	EXIT_FLAG_DEATH			1 /* utrace_report_death running */
+#define	EXIT_FLAG_DELAYED_GROUP_LEADER	2 /* utrace_delayed_group_leader ran */
+#define	EXIT_FLAG_REAP			4 /* release_task ran */
+
+
+/*
+ * We may have been the one keeping the target thread quiescent.
+ * Check if it should wake up now.
+ * Called with utrace locked, and unlocks it on return.
+ * If we were keeping it stopped, resume it.
+ * If we were keeping its zombie from reporting/self-reap, do it now.
+ */
+static void
+wake_quiescent(unsigned long old_flags,
+	       struct utrace *utrace, struct task_struct *target)
+	__releases(utrace->lock)
+{
+	unsigned long flags;
+
+	/*
+	 * Update the set of events of interest from the union
+	 * of the interests of the remaining tracing engines.
+	 */
+	flags = rescan_flags(utrace);
+	if (target->exit_state) {
+		BUG_ON(utrace->u.exit.flags & EXIT_FLAG_DEATH);
+		flags &= DEAD_FLAGS_MASK;
+	}
+	check_dead_utrace(target, utrace, flags);
+
+	if (target->exit_state || (flags & UTRACE_ACTION_QUIESCE))
+		return;
+
+	read_lock(&tasklist_lock);
+	if (!unlikely(target->exit_state)) {
+		/*
+		 * The target is not dead and should not be in tracing stop
+		 * any more.  Wake it unless it's in job control stop.
+		 */
+		spin_lock_irq(&target->sighand->siglock);
+		if (target->signal->flags & SIGNAL_STOP_STOPPED) {
+			int stop_count = target->signal->group_stop_count;
+			target->state = TASK_STOPPED;
+			spin_unlock_irq(&target->sighand->siglock);
+
+			/*
+			 * If tracing was preventing a CLD_STOPPED report
+			 * and is no longer, do that report right now.
+			 */
+			if (stop_count == 0
+			    && ((old_flags &~ flags) & UTRACE_ACTION_NOREAP))
+				do_notify_parent_cldstop(target, CLD_STOPPED);
+		}
+		else {
+			/*
+			 * Wake the task up.
+			 */
+			recalc_sigpending_and_wake(target);
+			wake_up_state(target, TASK_STOPPED | TASK_TRACED);
+			spin_unlock_irq(&target->sighand->siglock);
+		}
+	}
+	read_unlock(&tasklist_lock);
+}
+
+/*
+ * The engine is supposed to be attached.  The caller really needs
+ * rcu_read_lock if it wants to look at the engine struct
+ * (e.g. engine->data), to be sure it hasn't been freed by utrace_reap
+ * asynchronously--unless he has synchronized with his report_reap
+ * callback, which would have happened before then.  A simultaneous
+ * utrace_detach call or UTRACE_ACTION_DETACH return from a callback can
+ * also free the engine if rcu_read_lock is not held, but that is in the
+ * tracing engine's power to avoid.
+ *
+ * Get the utrace lock for the target task.
+ * Returns the struct if locked, or ERR_PTR(-errno).
+ *
+ * This has to be robust against races with:
+ *	utrace_detach calls
+ *	UTRACE_ACTION_DETACH after reports
+ *	utrace_report_death
+ *	utrace_release_task
+ */
+static struct utrace *
+get_utrace_lock_attached(struct task_struct *target,
+			 struct utrace_attached_engine *engine)
+	__acquires(utrace->lock)
+{
+	struct utrace *utrace;
+
+	rcu_read_lock();
+	utrace = rcu_dereference(target->utrace);
+	smp_rmb();
+	if (unlikely(utrace == NULL)
+	    || unlikely(target->exit_state == EXIT_DEAD))
+		/*
+		 * If all engines detached already, utrace is clear.
+		 * Otherwise, we're called after utrace_release_task might
+		 * have started.  A call to this engine's report_reap
+		 * callback might already be in progress or engine might
+		 * even have been freed already.
+		 */
+		utrace = ERR_PTR(-ESRCH);
+	else {
+		spin_lock(&utrace->lock);
+		if (unlikely(rcu_dereference(target->utrace) != utrace)
+		    || unlikely(rcu_dereference(engine->ops)
+				== &dead_engine_ops)) {
+			/*
+			 * By the time we got the utrace lock,
+			 * it had been reaped or detached already.
+			 */
+			spin_unlock(&utrace->lock);
+			utrace = ERR_PTR(-ESRCH);
+		}
+	}
+	rcu_read_unlock();
+
+	return utrace;
+}
+
+/**
+ * utrace_detach - Detach a tracing engine from a thread.
+ * @target: thread to detach from
+ * @engine: engine attached to @target
+ *
+ * After this, the engine data structure is no longer accessible, and the
+ * thread might be reaped.  The thread will start running again if it was
+ * being kept quiescent and no longer has any attached engines asserting
+ * %UTRACE_ACTION_QUIESCE.
+ *
+ * If the target thread is not already quiescent, then a callback to this
+ * engine might be in progress or about to start on another CPU.  If it's
+ * quiescent when utrace_detach() is called, then after successful return
+ * it's guaranteed that no more callbacks to the ops vector will be done.
+ * The only exception is %SIGKILL (and exec by another thread in the group),
+ * which breaks quiescence and can cause asynchronous %DEATH and/or %REAP
+ * callbacks even when %UTRACE_ACTION_QUIESCE is set.  In that event,
+ * utrace_detach() fails with -%ESRCH or -%EALREADY to indicate that the
+ * report_reap() or report_death() callbacks have begun or will run imminently.
+ */
+int
+utrace_detach(struct task_struct *target,
+	      struct utrace_attached_engine *engine)
+{
+	struct utrace *utrace;
+	unsigned long flags;
+
+	utrace = get_utrace_lock_attached(target, engine);
+	if (unlikely(IS_ERR(utrace)))
+		return PTR_ERR(utrace);
+
+	/*
+	 * On the exit path, DEATH and QUIESCE event bits are set only
+	 * before utrace_report_death has taken the lock.  At that point,
+	 * the death report will come soon, so disallow detach until it's
+	 * done.  This prevents us from racing with it detaching itself.
+	 */
+	if (target->exit_state
+	    && (unlikely(target->utrace_flags & DEATH_EVENTS)
+		|| unlikely(utrace->u.exit.flags & (EXIT_FLAG_DEATH
+						    | EXIT_FLAG_REAP)))) {
+		/*
+		 * We have already started the death report, or
+		 * even entered release_task.  We can't prevent
+		 * the report_death and report_reap callbacks,
+		 * so tell the caller they will happen.
+		 */
+		int ret = ((utrace->u.exit.flags & EXIT_FLAG_REAP)
+			   ? -ESRCH : -EALREADY);
+		spin_unlock(&utrace->lock);
+		return ret;
+	}
+
+	flags = engine->flags;
+	engine->flags = UTRACE_EVENT(QUIESCE) | UTRACE_ACTION_QUIESCE;
+	rcu_assign_pointer(engine->ops, &dead_engine_ops);
+
+	if (quiesce(target, 1)) {
+		remove_engine(engine, target, utrace);
+		wake_quiescent(flags, utrace, target);
+	}
+	else
+		spin_unlock(&utrace->lock);
+
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(utrace_detach);
+
+
+/*
+ * Called with utrace->lock held.
+ * Notify and clean up all engines, then free utrace.
+ */
+static void
+utrace_reap(struct task_struct *target, struct utrace *utrace)
+	__releases(utrace->lock)
+{
+	struct utrace_attached_engine *engine, *next;
+	const struct utrace_engine_ops *ops;
+
+restart:
+	list_for_each_entry_safe(engine, next, &utrace->engines, entry) {
+		list_del_rcu(&engine->entry);
+
+		/*
+		 * Now nothing else refers to this engine.
+		 */
+		if (engine->flags & UTRACE_EVENT(REAP)) {
+			ops = rcu_dereference(engine->ops);
+			if (ops != &dead_engine_ops) {
+				spin_unlock(&utrace->lock);
+				(*ops->report_reap)(engine, target);
+				rcu_engine_free(engine);
+				spin_lock(&utrace->lock);
+				goto restart;
+			}
+		}
+		rcu_engine_free(engine);
+	}
+
+	rcu_utrace_free(utrace);
+}
+
+/*
+ * Called by release_task.  After this, target->utrace must be cleared.
+ */
+void
+utrace_release_task(struct task_struct *target)
+{
+	struct utrace *utrace;
+
+	task_lock(target);
+	utrace = rcu_dereference(target->utrace);
+	rcu_assign_pointer(target->utrace, NULL);
+	task_unlock(target);
+
+	if (unlikely(utrace == NULL))
+		return;
+
+	spin_lock(&utrace->lock);
+	/*
+	 * If the list is empty, utrace is already on its way to be freed.
+	 * We raced with detach and we won the task_lock race but lost the
+	 * utrace->lock race.  All we have to do is let RCU run.
+	 */
+	if (!unlikely(list_empty(&utrace->engines))) {
+		utrace->u.exit.flags |= EXIT_FLAG_REAP;
+
+		if (!(target->utrace_flags & DEATH_EVENTS)) {
+			utrace_reap(target, utrace); /* Unlocks and frees.  */
+			return;
+		}
+
+		/*
+		 * The target will do some final callbacks but hasn't
+		 * finished them yet.  We know because it clears these
+		 * event bits after it's done.  Instead of cleaning up here
+		 * and requiring utrace_report_death to cope with it, we
+		 * delay the REAP report and the teardown until after the
+		 * target finishes its death reports.
+		 */
+	}
+	spin_unlock(&utrace->lock);
+}
+
+/**
+ * utrace_set_flags - Change the flags for a tracing engine.
+ * @target: thread to affect
+ * @engine: attached engine to affect
+ * @flags: new flags value
+ *
+ * This resets the event flags and the action state flags.
+ * If %UTRACE_ACTION_QUIESCE and %UTRACE_EVENT(%QUIESCE) are set,
+ * this will cause a report_quiesce() callback soon, maybe immediately.
+ * If %UTRACE_ACTION_QUIESCE was set before and is no longer set by
+ * any engine, this will wake the thread up.
+ *
+ * This fails with -%EALREADY and does nothing if you try to clear
+ * %UTRACE_EVENT(%DEATH) when the report_death() callback may already have
+ * begun, if you try to clear %UTRACE_EVENT(%REAP) when the report_reap()
+ * callback may already have begun, if you try to newly set
+ * %UTRACE_ACTION_NOREAP when the target may already have sent its
+ * parent %SIGCHLD, or if you try to newly set %UTRACE_EVENT(%DEATH),
+ * %UTRACE_EVENT(%QUIESCE), or %UTRACE_ACTION_QUIESCE, when the target is
+ * already dead or dying.  It can fail with -%ESRCH when the target has
+ * already been detached (including forcible detach on reaping).  If
+ * the target was quiescent before the call, then after a successful
+ * call, no event callbacks not requested in the new flags will be
+ * made, and a report_quiesce() callback will always be made if
+ * requested.  These rules provide for coherent synchronization based
+ * on quiescence, even when %SIGKILL is breaking quiescence.
+ */
+int
+utrace_set_flags(struct task_struct *target,
+		 struct utrace_attached_engine *engine,
+		 unsigned long flags)
+{
+	struct utrace *utrace;
+	int report;
+	unsigned long old_flags, old_utrace_flags;
+	int ret = -EALREADY;
+
+#ifdef ARCH_HAS_SINGLE_STEP
+	if (! ARCH_HAS_SINGLE_STEP)
+#endif
+		WARN_ON(flags & UTRACE_ACTION_SINGLESTEP);
+#ifdef ARCH_HAS_BLOCK_STEP
+	if (! ARCH_HAS_BLOCK_STEP)
+#endif
+		WARN_ON(flags & UTRACE_ACTION_BLOCKSTEP);
+
+	utrace = get_utrace_lock_attached(target, engine);
+	if (unlikely(IS_ERR(utrace)))
+		return PTR_ERR(utrace);
+
+restart:			/* See below. */
+
+	old_utrace_flags = target->utrace_flags;
+	old_flags = engine->flags;
+
+	if (target->exit_state
+	    && (((flags &~ old_flags) & (UTRACE_ACTION_QUIESCE
+					 | UTRACE_ACTION_NOREAP
+					 | DEATH_EVENTS))
+		|| ((utrace->u.exit.flags & EXIT_FLAG_DEATH)
+		    && ((old_flags &~ flags) & DEATH_EVENTS))
+		|| ((utrace->u.exit.flags & EXIT_FLAG_REAP)
+		    && ((old_flags &~ flags) & UTRACE_EVENT(REAP))))) {
+		spin_unlock(&utrace->lock);
+		return ret;
+	}
+
+	/*
+	 * When setting these flags, it's essential that we really
+	 * synchronize with exit_notify.  They cannot be set after
+	 * exit_notify takes the tasklist_lock.  By holding the read
+	 * lock here while setting the flags, we ensure that the calls
+	 * to tracehook_notify_death and tracehook_report_death will
+	 * see the new flags.  This ensures that utrace_release_task
+	 * knows positively that utrace_report_death will be called or
+	 * that it won't.
+	 */
+	if ((flags &~ old_utrace_flags) & (UTRACE_ACTION_NOREAP
+					   | DEATH_EVENTS)) {
+		read_lock(&tasklist_lock);
+		if (unlikely(target->exit_state)) {
+			read_unlock(&tasklist_lock);
+			spin_unlock(&utrace->lock);
+			return ret;
+		}
+		target->utrace_flags |= flags;
+		read_unlock(&tasklist_lock);
+	}
+
+	engine->flags = flags;
+	target->utrace_flags |= flags;
+	ret = 0;
+
+	report = 0;
+	if ((old_flags ^ flags) & UTRACE_ACTION_QUIESCE) {
+		if (flags & UTRACE_ACTION_QUIESCE) {
+			report = (quiesce(target, 1)
+				  && (flags & UTRACE_EVENT(QUIESCE)));
+			spin_unlock(&utrace->lock);
+		}
+		else
+			goto wake;
+	}
+	else if (((old_flags &~ flags) & UTRACE_ACTION_NOREAP)
+		 && target->exit_state)
+		goto wake;
+	else {
+		/*
+		 * If we're asking for single-stepping or syscall tracing,
+		 * we need to pass through utrace_quiescent before resuming
+		 * in user mode to get those effects, even if the target is
+		 * not going to be quiescent right now.
+		 */
+		if (!(target->utrace_flags & UTRACE_ACTION_QUIESCE)
+		    && !target->exit_state
+		    && ((flags &~ old_utrace_flags)
+			& (UTRACE_ACTION_SINGLESTEP | UTRACE_ACTION_BLOCKSTEP
+			   | UTRACE_EVENT_SYSCALL)))
+			quiesce(target, 0);
+		spin_unlock(&utrace->lock);
+	}
+
+	if (report) {	/* Already quiescent, won't report itself.  */
+		u32 action = (*engine->ops->report_quiesce)(engine, target);
+		if (action & UTRACE_ACTION_DETACH)
+			utrace_detach(target, engine);
+		else if (action & UTRACE_ACTION_NEWSTATE) {
+			/*
+			 * The callback has us changing the flags yet
+			 * again.  Since we released the lock, they
+			 * could have changed asynchronously just now.
+			 * We must refetch the current flags to change
+			 * the %UTRACE_ACTION_STATE_MASK bits.  If the
+			 * target thread started dying, then there is
+			 * nothing we can do--but that failure is due
+			 * to the report_quiesce() callback after the
+			 * original utrace_set_flags has already
+			 * succeeded, so we don't want to return
+			 * failure here (hence leave ret = 0).
+			 */
+			utrace = get_utrace_lock_attached(target, engine);
+			if (!unlikely(IS_ERR(utrace))) {
+				flags = action & UTRACE_ACTION_STATE_MASK;
+				flags |= (engine->flags
+					  &~ UTRACE_ACTION_STATE_MASK);
+				goto restart;
+			}
+		}
+	}
+
+	return ret;
+
+wake:
+	/*
+	 * It's quiescent now and needs to wake up.
+	 *
+	 * On the exit path, it's only truly quiescent if it has
+	 * already been through utrace_report_death, or never will.
+	 */
+	if (unlikely(target->exit_state)
+	    && unlikely(target->utrace_flags & DEATH_EVENTS))
+		spin_unlock(&utrace->lock);
+	else
+		wake_quiescent(old_flags, utrace, target);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(utrace_set_flags);
+
+/*
+ * While running an engine callback, no locks are held.
+ * If a callback updates its engine's action state, then
+ * we need to take the utrace lock to install the flags update.
+ */
+static inline u32
+update_action(struct task_struct *tsk, struct utrace *utrace,
+	      struct utrace_attached_engine *engine,
+	      u32 ret)
+{
+	if (ret & UTRACE_ACTION_DETACH)
+		rcu_assign_pointer(engine->ops, &dead_engine_ops);
+	else if ((ret & UTRACE_ACTION_NEWSTATE)
+		 && ((ret ^ engine->flags) & UTRACE_ACTION_STATE_MASK)) {
+#ifdef ARCH_HAS_SINGLE_STEP
+		if (! ARCH_HAS_SINGLE_STEP)
+#endif
+			WARN_ON(ret & UTRACE_ACTION_SINGLESTEP);
+#ifdef ARCH_HAS_BLOCK_STEP
+		if (! ARCH_HAS_BLOCK_STEP)
+#endif
+			WARN_ON(ret & UTRACE_ACTION_BLOCKSTEP);
+		spin_lock(&utrace->lock);
+		/*
+		 * If we're changing something other than just QUIESCE,
+		 * make sure we pass through utrace_quiescent before
+		 * resuming even if we aren't going to stay quiescent.
+		 * That's where we get the correct union of all engines'
+		 * flags after they've finished changing, and apply changes.
+		 */
+		if (((ret ^ engine->flags) & (UTRACE_ACTION_STATE_MASK
+					      & ~UTRACE_ACTION_QUIESCE)))
+			tsk->utrace_flags |= UTRACE_ACTION_QUIESCE;
+		engine->flags &= ~UTRACE_ACTION_STATE_MASK;
+		engine->flags |= ret & UTRACE_ACTION_STATE_MASK;
+		tsk->utrace_flags |= engine->flags;
+		spin_unlock(&utrace->lock);
+	}
+	else
+		ret |= engine->flags & UTRACE_ACTION_STATE_MASK;
+	return ret;
+}
+
+#define REPORT(callback, ...) do { \
+	u32 ret = (*rcu_dereference(engine->ops)->callback) \
+		(engine, tsk, ##__VA_ARGS__); \
+	action = update_action(tsk, utrace, engine, ret); \
+	} while (0)
+
+
+/*
+ * Called with utrace->lock held, returns with it released.
+ */
+static u32
+remove_detached(struct task_struct *tsk, struct utrace *utrace,
+		u32 action, unsigned long mask)
+	__releases(utrace->lock)
+{
+	struct utrace_attached_engine *engine, *next;
+	unsigned long flags = 0;
+
+	list_for_each_entry_safe(engine, next, &utrace->engines, entry) {
+		if (engine->ops == &dead_engine_ops)
+			remove_engine(engine, tsk, utrace);
+		else
+			flags |= engine->flags | UTRACE_EVENT(REAP);
+	}
+	check_dead_utrace(tsk, utrace, flags & mask);
+
+	flags &= UTRACE_ACTION_STATE_MASK;
+	return flags | (action & UTRACE_ACTION_OP_MASK);
+}
+
+/*
+ * Called after an event report loop.  Remove any engines marked for detach.
+ */
+static inline u32
+check_detach(struct task_struct *tsk, u32 action)
+{
+	if (action & UTRACE_ACTION_DETACH) {
+		/*
+		 * This must be current to be sure it's not possibly
+		 * getting into utrace_report_death.
+		 */
+		struct utrace *utrace;
+		BUG_ON(tsk != current);
+		utrace = tsk->utrace;
+		spin_lock(&utrace->lock);
+		action = remove_detached(tsk, utrace, action, ~0UL);
+	}
+	return action;
+}
+
+static inline int
+check_quiescent(struct task_struct *tsk, u32 action)
+{
+	if (action & UTRACE_ACTION_STATE_MASK)
+		return utrace_quiescent(tsk, NULL);
+	return 0;
+}
+
+/*
+ * Called iff UTRACE_EVENT(CLONE) flag is set.
+ * This notification call blocks the wake_up_new_task call on the child.
+ * So we must not quiesce here.  tracehook_report_clone_complete will do
+ * a quiescence check momentarily.
+ */
+void
+utrace_report_clone(unsigned long clone_flags, struct task_struct *child)
+{
+	struct task_struct *tsk = current;
+	struct utrace *utrace = tsk->utrace;
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+	unsigned long action;
+
+	utrace->u.live.cloning = child;
+
+	/* XXX must change for sharing */
+	action = UTRACE_ACTION_RESUME;
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		if (engine->flags & UTRACE_EVENT(CLONE))
+			REPORT(report_clone, clone_flags, child);
+		if (action & UTRACE_ACTION_HIDE)
+			break;
+	}
+
+	utrace->u.live.cloning = NULL;
+
+	check_detach(tsk, action);
+}
+
+static unsigned long
+report_quiescent(struct task_struct *tsk, struct utrace *utrace, u32 action)
+{
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		if (engine->flags & UTRACE_EVENT(QUIESCE))
+			REPORT(report_quiesce);
+		action |= engine->flags & UTRACE_ACTION_STATE_MASK;
+	}
+
+	return check_detach(tsk, action);
+}
+
+/*
+ * Called iff UTRACE_EVENT(JCTL) flag is set.
+ */
+int
+utrace_report_jctl(int what)
+{
+	struct task_struct *tsk = current;
+	struct utrace *utrace = tsk->utrace;
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+	unsigned long action;
+
+	/* XXX must change for sharing */
+	action = UTRACE_ACTION_RESUME;
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		if (engine->flags & UTRACE_EVENT(JCTL))
+			REPORT(report_jctl, what);
+		if (action & UTRACE_ACTION_HIDE)
+			break;
+	}
+
+	/*
+	 * We are becoming quiescent, so report it now.
+	 * We don't block in utrace_quiescent because we are stopping anyway.
+	 * We know that upon resuming we'll go through tracehook_induce_signal,
+	 * which will keep us quiescent or set us up to resume with tracing.
+	 */
+	action = report_quiescent(tsk, utrace, action);
+
+	if (what == CLD_STOPPED && tsk->state != TASK_STOPPED) {
+		/*
+		 * The event report hooks could have blocked, though
+		 * it should have been briefly.  Make sure we're in
+		 * TASK_STOPPED state again to block properly, unless
+		 * we've just come back out of job control stop.
+		 */
+		spin_lock_irq(&tsk->sighand->siglock);
+		if (tsk->signal->flags & SIGNAL_STOP_STOPPED)
+			set_current_state(TASK_STOPPED);
+		spin_unlock_irq(&tsk->sighand->siglock);
+	}
+
+	/*
+	 * We clear the UTRACE_EVENT(JCTL) bit to indicate that we are now
+	 * in a truly quiescent TASK_STOPPED state.  After this, we can be
+	 * detached by another thread.  Setting UTRACE_ACTION_QUIESCE
+	 * ensures that we will go through utrace_quiescent and recompute
+	 * flags after we resume.
+	 */
+	spin_lock(&utrace->lock);
+	tsk->utrace_flags &= ~UTRACE_EVENT(JCTL);
+	tsk->utrace_flags |= UTRACE_ACTION_QUIESCE;
+	spin_unlock(&utrace->lock);
+
+	return action & UTRACE_JCTL_NOSIGCHLD;
+}
+
+
+/*
+ * Return nonzero if there is a SIGKILL that should be waking us up.
+ * Called with the siglock held.
+ */
+static inline int
+sigkill_pending(struct task_struct *tsk)
+{
+	return ((sigismember(&tsk->pending.signal, SIGKILL)
+		 || sigismember(&tsk->signal->shared_pending.signal, SIGKILL))
+		&& !unlikely(sigismember(&tsk->blocked, SIGKILL)));
+}
+
+/*
+ * Called if UTRACE_EVENT(QUIESCE) or UTRACE_ACTION_QUIESCE flag is set.
+ * Also called after other event reports.
+ * It is a good time to block.
+ * Returns nonzero if we woke up prematurely due to SIGKILL.
+ *
+ * The signal pointer is nonzero when called from utrace_get_signal,
+ * where a pending forced signal can be processed right away.  Otherwise,
+ * we keep UTRACE_ACTION_QUIESCE set after resuming so that utrace_get_signal
+ * will be entered before user mode.
+ */
+int
+utrace_quiescent(struct task_struct *tsk, struct utrace_signal *signal)
+{
+	struct utrace *utrace = tsk->utrace;
+	unsigned long action;
+
+restart:
+	/* XXX must change for sharing */
+
+	action = report_quiescent(tsk, utrace, UTRACE_ACTION_RESUME);
+
+	/*
+	 * If some engines want us quiescent, we block here.
+	 */
+	if (action & UTRACE_ACTION_QUIESCE) {
+		int killed;
+
+		if (signal != NULL) {
+			BUG_ON(utrace->u.live.signal != NULL);
+			utrace->u.live.signal = signal;
+		}
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		/*
+		 * If wake_quiescent is trying to wake us up now, it will
+		 * have cleared the QUIESCE flag before trying to take the
+		 * siglock.  Now we have the siglock, so either it has
+		 * already cleared the flag, or it will wake us up after we
+		 * release the siglock it's waiting for.
+		 * Never stop when there is a SIGKILL bringing us down.
+		 */
+		killed = sigkill_pending(tsk);
+		if (!killed && (tsk->utrace_flags & UTRACE_ACTION_QUIESCE)) {
+			set_current_state(TASK_TRACED);
+			/*
+			 * If there is a group stop in progress,
+			 * we must participate in the bookkeeping.
+			 */
+			if (tsk->signal->group_stop_count > 0)
+				--tsk->signal->group_stop_count;
+			spin_unlock_irq(&tsk->sighand->siglock);
+			schedule();
+		}
+		else
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+		if (signal != NULL) {
+			/*
+			 * We know the struct stays in place when its
+			 * u.live.signal is set, see check_dead_utrace.
+			 * This makes it safe to clear its pointer here.
+			 */
+			BUG_ON(tsk->utrace != utrace);
+			BUG_ON(utrace->u.live.signal != signal);
+			utrace->u.live.signal = NULL;
+		}
+
+		if (killed)	/* Game over, man!  */
+			return 1;
+
+		/*
+		 * We've woken up.  One engine could be waking us up while
+		 * another has asked us to quiesce.  So check afresh.  We
+		 * could have been detached while quiescent.  Now we are no
+		 * longer quiescent, so don't need to do any RCU locking.
+		 * But we do need to check our utrace pointer anew.
+		 */
+		utrace = tsk->utrace;
+		if (tsk->utrace_flags
+		    & (UTRACE_EVENT(QUIESCE) | UTRACE_ACTION_STATE_MASK))
+			goto restart;
+	}
+	else if (tsk->utrace_flags & UTRACE_ACTION_QUIESCE) {
+		/*
+		 * Our flags are out of date.
+		 * Update the set of events of interest from the union
+		 * of the interests of the remaining tracing engines.
+		 * This may notice that there are no engines left
+		 * and clean up the struct utrace.  It's left in place
+		 * and the QUIESCE flag set as long as utrace_get_signal
+		 * still needs to process a pending forced signal.
+		 */
+		unsigned long flags;
+		utrace = rcu_dereference(tsk->utrace);
+		spin_lock(&utrace->lock);
+		flags = rescan_flags(utrace);
+		if (flags == 0)
+			utrace_clear_tsk(tsk, utrace);
+		check_dead_utrace(tsk, utrace, flags);
+	}
+
+	/*
+	 * We're resuming.  Update the machine layer tracing state and then go.
+	 */
+#ifdef ARCH_HAS_SINGLE_STEP
+	if (action & UTRACE_ACTION_SINGLESTEP)
+		tracehook_enable_single_step(tsk);
+	else
+		tracehook_disable_single_step(tsk);
+#endif
+#ifdef ARCH_HAS_BLOCK_STEP
+	if ((action & (UTRACE_ACTION_BLOCKSTEP|UTRACE_ACTION_SINGLESTEP))
+	    == UTRACE_ACTION_BLOCKSTEP)
+		tracehook_enable_block_step(tsk);
+	else
+		tracehook_disable_block_step(tsk);
+#endif
+	if (tsk->utrace_flags & UTRACE_EVENT_SYSCALL)
+		tracehook_enable_syscall_trace(tsk);
+	else
+		tracehook_disable_syscall_trace(tsk);
+
+	return 0;
+}
+
+
+/*
+ * Called iff UTRACE_EVENT(EXIT) flag is set.
+ */
+void
+utrace_report_exit(long *exit_code)
+{
+	struct task_struct *tsk = current;
+	struct utrace *utrace = tsk->utrace;
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+	unsigned long action;
+	long orig_code = *exit_code;
+
+	/* XXX must change for sharing */
+	action = UTRACE_ACTION_RESUME;
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		if (engine->flags & UTRACE_EVENT(EXIT))
+			REPORT(report_exit, orig_code, exit_code);
+	}
+	action = check_detach(tsk, action);
+	check_quiescent(tsk, action);
+}
+
+/*
+ * Called with utrace locked, unlocks it on return.  Unconditionally
+ * recompute the flags after report_death is finished.  This may notice
+ * that there are no engines left and free the utrace struct.
+ */
+static void
+finish_report_death(struct task_struct *tsk, struct utrace *utrace)
+	__releases(utrace->lock)
+{
+	/*
+	 * After we unlock (possibly inside utrace_reap for callbacks) with
+	 * this flag clear, competing utrace_detach/utrace_set_flags calls
+	 * know that we've finished our callbacks and any detach bookkeeping.
+	 */
+	utrace->u.exit.flags &= EXIT_FLAG_REAP;
+
+	if (utrace->u.exit.flags & EXIT_FLAG_REAP)
+		/*
+		 * utrace_release_task was already called in parallel.
+		 * We must complete its work now.
+		 */
+		utrace_reap(tsk, utrace);
+	else
+		/*
+		 * Clear out any detached engines and in the process
+		 * recompute the flags.  Mask off event bits we can't
+		 * see any more.  This tells utrace_release_task we
+		 * have already finished, if it comes along later.
+		 * Note this all happens on the already-locked utrace,
+		 * which might already be removed from the task.
+		 */
+		remove_detached(tsk, utrace, 0, DEAD_FLAGS_MASK);
+}
+
+/*
+ * Called with utrace locked, unlocks it on return.
+ * EXIT_FLAG_DELAYED_GROUP_LEADER is set.
+ * Do second report_death callbacks for engines using NOREAP.
+ */
+static void
+report_delayed_group_leader(struct task_struct *tsk, struct utrace *utrace)
+	__releases(utrace->lock)
+{
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+	u32 action;
+
+	utrace->u.exit.flags |= EXIT_FLAG_DEATH;
+	spin_unlock(&utrace->lock);
+
+	/* XXX must change for sharing */
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+#define NOREAP_DEATH (UTRACE_EVENT(DEATH) | UTRACE_ACTION_NOREAP)
+		if ((engine->flags & NOREAP_DEATH) == NOREAP_DEATH)
+			REPORT(report_death);
+	}
+
+	spin_lock(&utrace->lock);
+	finish_report_death(tsk, utrace);
+}
+
+/*
+ * Called iff UTRACE_EVENT(DEATH) or UTRACE_ACTION_QUIESCE flag is set.
+ *
+ * It is always possible that we are racing with utrace_release_task here,
+ * if UTRACE_ACTION_NOREAP is not set, or in the case of non-leader exec
+ * where the old leader will get released regardless of NOREAP.  For this
+ * reason, utrace_release_task checks for the event bits that get us here,
+ * and delays its cleanup for us to do.
+ */
+void
+utrace_report_death(struct task_struct *tsk, struct utrace *utrace)
+{
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+	u32 action;
+
+	BUG_ON(!tsk->exit_state);
+
+	/*
+	 * We are presently considered "quiescent"--which is accurate
+	 * inasmuch as we won't run any more user instructions ever again.
+	 * But for utrace_detach and utrace_set_flags to be robust, they
+	 * must be sure whether or not we will run any more callbacks.  If
+	 * a call comes in before we do, taking the lock here synchronizes
+	 * us so we don't run any callbacks just disabled.  Calls that come
+	 * in while we're running the callbacks will see the report_death
+	 * flag and know that we are not yet fully quiescent for purposes
+	 * of detach bookkeeping.
+	 */
+	spin_lock(&utrace->lock);
+	BUG_ON(utrace->u.exit.flags & EXIT_FLAG_DEATH);
+	utrace->u.exit.flags &= EXIT_FLAG_REAP;
+	utrace->u.exit.flags |= EXIT_FLAG_DEATH;
+	spin_unlock(&utrace->lock);
+
+	/* XXX must change for sharing */
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		if (engine->flags & UTRACE_EVENT(DEATH))
+			REPORT(report_death);
+		if (engine->flags & UTRACE_EVENT(QUIESCE))
+			REPORT(report_quiesce);
+	}
+
+	spin_lock(&utrace->lock);
+	if (unlikely(utrace->u.exit.flags & EXIT_FLAG_DELAYED_GROUP_LEADER))
+		/*
+		 * Another thread's release_task came along and
+		 * removed the delayed_group_leader condition,
+		 * but after we might have started callbacks.
+		 * Do the second report_death callback right now.
+		 */
+		report_delayed_group_leader(tsk, utrace);
+	else
+		finish_report_death(tsk, utrace);
+}
+
+/*
+ * We're called from release_task when delayed_group_leader(tsk) was
+ * previously true and is no longer true, and NOREAP was set.
+ * This means no parent notifications have happened for this zombie.
+ */
+void
+utrace_report_delayed_group_leader(struct task_struct *tsk)
+{
+	struct utrace *utrace;
+
+	rcu_read_lock();
+	utrace = rcu_dereference(tsk->utrace);
+	if (unlikely(utrace == NULL)) {
+		rcu_read_unlock();
+		return;
+	}
+	spin_lock(&utrace->lock);
+	rcu_read_unlock();
+
+	utrace->u.exit.flags |= EXIT_FLAG_DELAYED_GROUP_LEADER;
+
+	/*
+	 * If utrace_report_death is still running, or release_task has
+	 * started already, there is nothing more to do now.
+	 */
+	if ((utrace->u.exit.flags & (EXIT_FLAG_DEATH | EXIT_FLAG_REAP))
+	    || !likely(tsk->utrace_flags & UTRACE_ACTION_NOREAP))
+		spin_unlock(&utrace->lock);
+	else
+		report_delayed_group_leader(tsk, utrace);
+}
+
+/*
+ * Called iff UTRACE_EVENT(VFORK_DONE) flag is set.
+ */
+void
+utrace_report_vfork_done(pid_t child_pid)
+{
+	struct task_struct *tsk = current;
+	struct utrace *utrace = tsk->utrace;
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+	unsigned long action;
+
+	/* XXX must change for sharing */
+	action = UTRACE_ACTION_RESUME;
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		if (engine->flags & UTRACE_EVENT(VFORK_DONE))
+			REPORT(report_vfork_done, child_pid);
+		if (action & UTRACE_ACTION_HIDE)
+			break;
+	}
+	action = check_detach(tsk, action);
+	check_quiescent(tsk, action);
+}
+
+/*
+ * Called iff UTRACE_EVENT(EXEC) flag is set.
+ */
+void
+utrace_report_exec(struct linux_binprm *bprm, struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	struct utrace *utrace = tsk->utrace;
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+	unsigned long action;
+
+	/* XXX must change for sharing */
+	action = UTRACE_ACTION_RESUME;
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		if (engine->flags & UTRACE_EVENT(EXEC))
+			REPORT(report_exec, bprm, regs);
+		if (action & UTRACE_ACTION_HIDE)
+			break;
+	}
+	action = check_detach(tsk, action);
+	check_quiescent(tsk, action);
+}
+
+/*
+ * Called iff UTRACE_EVENT(SYSCALL_{ENTRY,EXIT}) flag is set.
+ */
+void
+utrace_report_syscall(struct pt_regs *regs, int is_exit)
+{
+	struct task_struct *tsk = current;
+	struct utrace *utrace = tsk->utrace;
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+	unsigned long action, ev;
+	int killed;
+
+/*
+  XXX pass syscall # to engine hook directly, let it return inhibit-action
+  to reset to -1
+	long syscall = tracehook_syscall_number(regs, is_exit);
+*/
+
+	ev = is_exit ? UTRACE_EVENT(SYSCALL_EXIT) : UTRACE_EVENT(SYSCALL_ENTRY);
+
+	/* XXX must change for sharing */
+	action = UTRACE_ACTION_RESUME;
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		if (engine->flags & ev) {
+			if (is_exit)
+				REPORT(report_syscall_exit, regs);
+			else
+				REPORT(report_syscall_entry, regs);
+		}
+		if (action & UTRACE_ACTION_HIDE)
+			break;
+	}
+	action = check_detach(tsk, action);
+	killed = check_quiescent(tsk, action);
+
+	if (!is_exit) {
+		if (unlikely(killed))
+			/*
+			 * We are continuing despite QUIESCE because of a
+			 * SIGKILL.  Don't let the system call actually
+			 * proceed.
+			 */
+			tracehook_abort_syscall(regs);
+
+		/*
+		 * Clear TIF_SIGPENDING if it no longer needs to be set.
+		 * It may have been set as part of quiescence, and won't
+		 * ever have been cleared by another thread.  For other
+		 * reports, we can just leave it set and will go through
+		 * utrace_get_signal to reset things.  But here we are
+		 * about to enter a syscall, which might bail out with an
+		 * -ERESTART* error if it's set now.
+		 */
+		if (signal_pending(tsk)) {
+			spin_lock_irq(&tsk->sighand->siglock);
+			recalc_sigpending();
+			spin_unlock_irq(&tsk->sighand->siglock);
+		}
+	}
+}
+
+
+/*
+ * This is pointed to by the utrace struct, but it's really a private
+ * structure between utrace_get_signal and utrace_inject_signal.
+ */
+struct utrace_signal
+{
+	siginfo_t *const info;
+	struct k_sigaction *return_ka;
+	int signr;
+};
+
+
+/*
+ * Call each interested tracing engine's report_signal callback.
+ */
+static u32
+report_signal(struct task_struct *tsk, struct pt_regs *regs,
+	      struct utrace *utrace, u32 action,
+	      unsigned long flags1, unsigned long flags2, siginfo_t *info,
+	      const struct k_sigaction *ka, struct k_sigaction *return_ka)
+{
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+
+	/* XXX must change for sharing */
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		if ((engine->flags & flags1) && (engine->flags & flags2)) {
+			u32 disp = action & UTRACE_ACTION_OP_MASK;
+			action &= ~UTRACE_ACTION_OP_MASK;
+			REPORT(report_signal, regs, disp, info, ka, return_ka);
+			if ((action & UTRACE_ACTION_OP_MASK) == 0)
+				action |= disp;
+			if (action & UTRACE_ACTION_HIDE)
+				break;
+		}
+	}
+
+	return action;
+}
+
+void
+utrace_signal_handler_singlestep(struct task_struct *tsk, struct pt_regs *regs)
+{
+	u32 action;
+	action = report_signal(tsk, regs, tsk->utrace, UTRACE_SIGNAL_HANDLER,
+			       UTRACE_EVENT_SIGNAL_ALL,
+			       UTRACE_ACTION_SINGLESTEP|UTRACE_ACTION_BLOCKSTEP,
+			       NULL, NULL, NULL);
+	action = check_detach(tsk, action);
+	check_quiescent(tsk, action);
+}
+
+
+/*
+ * This is the hook from the signals code, called with the siglock held.
+ * Here is the ideal place to quiesce.  We also dequeue and intercept signals.
+ */
+int
+utrace_get_signal(struct task_struct *tsk, struct pt_regs *regs,
+		  siginfo_t *info, struct k_sigaction *return_ka)
+	__releases(tsk->sighand->siglock)
+	__acquires(tsk->sighand->siglock)
+{
+	struct utrace *utrace;
+	struct utrace_signal signal = { info, return_ka, 0 };
+	struct k_sigaction *ka;
+	unsigned long action, event;
+
+	/*
+	 * We could have been considered quiescent while we were in
+	 * TASK_STOPPED, and detached asynchronously.  If we woke up
+	 * and checked tsk->utrace_flags before that was finished,
+	 * we might be here with utrace already removed or in the
+	 * middle of being removed.
+	 */
+	rcu_read_lock();
+	utrace = rcu_dereference(tsk->utrace);
+	if (unlikely(utrace == NULL)) {
+		rcu_read_unlock();
+		return 0;
+	}
+	if (!(tsk->utrace_flags & UTRACE_EVENT(JCTL))) {
+		/*
+		 * It's possible we might have just been in TASK_STOPPED
+		 * and subject to the aforementioned race.
+		 *
+		 * RCU makes it safe to get the utrace->lock even if it's
+		 * being freed.  Once we have that lock, either an external
+		 * detach has finished and this struct has been freed, or
+		 * else we know we are excluding any other detach attempt.
+		 * Since we are no longer in TASK_STOPPED now, all we
+		 * needed the lock for was to order any quiesce() call after us.
+		 */
+		spin_unlock_irq(&tsk->sighand->siglock);
+		spin_lock(&utrace->lock);
+		if (unlikely(tsk->utrace != utrace)) {
+			spin_unlock(&utrace->lock);
+			rcu_read_unlock();
+			cond_resched();
+			return -1;
+		}
+		spin_unlock(&utrace->lock);
+		spin_lock_irq(&tsk->sighand->siglock);
+	}
+	rcu_read_unlock();
+
+	/*
+	 * If a signal was injected previously, it could not use our
+	 * stack space directly.  It had to allocate a data structure,
+	 * which we can now copy out of and free.
+	 *
+	 * We don't have to lock access to u.live.signal because it's only
+	 * touched by utrace_inject_signal when we're quiescent.
+	 */
+	if (utrace->u.live.signal != NULL) {
+		signal.signr = utrace->u.live.signal->signr;
+		copy_siginfo(info, utrace->u.live.signal->info);
+		if (utrace->u.live.signal->return_ka)
+			*return_ka = *utrace->u.live.signal->return_ka;
+		else
+			signal.return_ka = NULL;
+		kfree(utrace->u.live.signal);
+		utrace->u.live.signal = NULL;
+	}
+
+	/*
+	 * If we should quiesce, now is the time.
+	 * First stash a pointer to the state on our stack,
+	 * so that utrace_inject_signal can tell us what to do.
+	 */
+	if (tsk->utrace_flags & UTRACE_ACTION_QUIESCE) {
+		int killed = sigkill_pending(tsk);
+		if (!killed) {
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			killed = utrace_quiescent(tsk, &signal);
+
+			/*
+			 * Noone wants us quiescent any more, we can take
+			 * signals.  Unless we have a forced signal to take,
+			 * back out to the signal code to resynchronize after
+			 * releasing the siglock.
+			 */
+			if (signal.signr == 0 && !killed)
+				/*
+				 * This return value says to reacquire the
+				 * siglock and check again.  This will check
+				 * for a pending group stop and process it
+				 * before coming back here.
+				 */
+				return -1;
+
+			spin_lock_irq(&tsk->sighand->siglock);
+		}
+		if (killed) {
+			/*
+			 * The only reason we woke up now was because of a
+			 * SIGKILL.  Don't do normal dequeuing in case it
+			 * might get a signal other than SIGKILL.  That would
+			 * perturb the death state so it might differ from
+			 * what the debugger would have allowed to happen.
+			 * Instead, pluck out just the SIGKILL to be sure
+			 * we'll die immediately with nothing else different
+			 * from the quiescent state the debugger wanted us in.
+			 */
+			sigset_t sigkill_only;
+			sigfillset(&sigkill_only);
+			sigdelset(&sigkill_only, SIGKILL);
+			killed = dequeue_signal(tsk, &sigkill_only, info);
+			BUG_ON(killed != SIGKILL);
+			*return_ka = tsk->sighand->action[killed - 1];
+			return killed;
+		}
+	}
+
+	/*
+	 * If a signal was injected, everything is in place now.  Go do it.
+	 */
+	if (signal.signr != 0) {
+		if (signal.return_ka == NULL) {
+			ka = &tsk->sighand->action[signal.signr - 1];
+			if (ka->sa.sa_flags & SA_ONESHOT)
+				ka->sa.sa_handler = SIG_DFL;
+			*return_ka = *ka;
+		}
+		else
+			BUG_ON(signal.return_ka != return_ka);
+		return signal.signr;
+	}
+
+	/*
+	 * If noone is interested in intercepting signals, let the caller
+	 * just dequeue them normally.
+	 */
+	if ((tsk->utrace_flags & UTRACE_EVENT_SIGNAL_ALL) == 0)
+		return 0;
+
+	/*
+	 * Steal the next signal so we can let tracing engines examine it.
+	 * From the signal number and sigaction, determine what normal
+	 * delivery would do.  If no engine perturbs it, we'll do that
+	 * by returning the signal number after setting *return_ka.
+	 */
+	signal.signr = dequeue_signal(tsk, &tsk->blocked, info);
+	if (signal.signr == 0)
+		return 0;
+
+	BUG_ON(signal.signr != info->si_signo);
+
+	ka = &tsk->sighand->action[signal.signr - 1];
+	*return_ka = *ka;
+
+	/*
+	 * We are never allowed to interfere with SIGKILL,
+	 * just punt after filling in *return_ka for our caller.
+	 */
+	if (signal.signr == SIGKILL)
+		return signal.signr;
+
+	if (ka->sa.sa_handler == SIG_IGN) {
+		event = UTRACE_EVENT(SIGNAL_IGN);
+		action = UTRACE_SIGNAL_IGN;
+	}
+	else if (ka->sa.sa_handler != SIG_DFL) {
+		event = UTRACE_EVENT(SIGNAL);
+		action = UTRACE_ACTION_RESUME;
+	}
+	else if (sig_kernel_coredump(signal.signr)) {
+		event = UTRACE_EVENT(SIGNAL_CORE);
+		action = UTRACE_SIGNAL_CORE;
+	}
+	else if (sig_kernel_ignore(signal.signr)) {
+		event = UTRACE_EVENT(SIGNAL_IGN);
+		action = UTRACE_SIGNAL_IGN;
+	}
+	else if (sig_kernel_stop(signal.signr)) {
+		event = UTRACE_EVENT(SIGNAL_STOP);
+		action = (signal.signr == SIGSTOP
+			  ? UTRACE_SIGNAL_STOP : UTRACE_SIGNAL_TSTP);
+	}
+	else {
+		event = UTRACE_EVENT(SIGNAL_TERM);
+		action = UTRACE_SIGNAL_TERM;
+	}
+
+	if (tsk->utrace_flags & event) {
+		/*
+		 * We have some interested engines, so tell them about the
+		 * signal and let them change its disposition.
+		 */
+
+		spin_unlock_irq(&tsk->sighand->siglock);
+
+		action = report_signal(tsk, regs, utrace, action, event, event,
+				       info, ka, return_ka);
+		action &= UTRACE_ACTION_OP_MASK;
+
+		if (action & UTRACE_SIGNAL_HOLD) {
+			struct sigqueue *q = sigqueue_alloc();
+			if (likely(q != NULL)) {
+				q->flags = 0;
+				copy_siginfo(&q->info, info);
+			}
+			action &= ~UTRACE_SIGNAL_HOLD;
+			spin_lock_irq(&tsk->sighand->siglock);
+			sigaddset(&tsk->pending.signal, info->si_signo);
+			if (likely(q != NULL))
+				list_add(&q->list, &tsk->pending.list);
+		}
+		else
+			spin_lock_irq(&tsk->sighand->siglock);
+
+		recalc_sigpending();
+	}
+
+	/*
+	 * We express the chosen action to the signals code in terms
+	 * of a representative signal whose default action does it.
+	 */
+	switch (action) {
+	case UTRACE_SIGNAL_IGN:
+		/*
+		 * We've eaten the signal.  That's all we do.
+		 * Tell the caller to restart.
+		 */
+		spin_unlock_irq(&tsk->sighand->siglock);
+		return -1;
+
+	case UTRACE_ACTION_RESUME:
+	case UTRACE_SIGNAL_DELIVER:
+		/*
+		 * The handler will run.  We do the SA_ONESHOT work here
+		 * since the normal path will only touch *return_ka now.
+		 */
+		if (return_ka->sa.sa_flags & SA_ONESHOT)
+			ka->sa.sa_handler = SIG_DFL;
+		break;
+
+	case UTRACE_SIGNAL_TSTP:
+		signal.signr = SIGTSTP;
+		tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+		return_ka->sa.sa_handler = SIG_DFL;
+		break;
+
+	case UTRACE_SIGNAL_STOP:
+		signal.signr = SIGSTOP;
+		tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+		return_ka->sa.sa_handler = SIG_DFL;
+		break;
+
+	case UTRACE_SIGNAL_TERM:
+		signal.signr = SIGTERM;
+		return_ka->sa.sa_handler = SIG_DFL;
+		break;
+
+	case UTRACE_SIGNAL_CORE:
+		signal.signr = SIGQUIT;
+		return_ka->sa.sa_handler = SIG_DFL;
+		break;
+
+	default:
+		BUG();
+	}
+
+	return signal.signr;
+}
+
+
+/**
+ * utrace_inject_signal - Cause a specified signal delivery.
+ * @target: thread to process the signal
+ * @engine: engine attached to @target
+ * @action: signal disposition
+ * @info: signal number and details
+ * @ka: sigaction() settings to follow when @action is %UTRACE_SIGNAL_DELIVER
+ *
+ * The @target thread must be quiescent (or the current thread).
+ * The @action has %UTRACE_SIGNAL_* bits as returned from a report_signal()
+ * callback.  If @ka is non-null, it gives the sigaction to follow for
+ * %UTRACE_SIGNAL_DELIVER; otherwise, the installed sigaction at the time
+ * of delivery is used.
+ */
+int
+utrace_inject_signal(struct task_struct *target,
+		     struct utrace_attached_engine *engine,
+		     u32 action, siginfo_t *info,
+		     const struct k_sigaction *ka)
+{
+	struct utrace *utrace;
+	struct utrace_signal *signal;
+	int ret;
+
+	if (info->si_signo == 0 || !valid_signal(info->si_signo))
+		return -EINVAL;
+
+	utrace = get_utrace_lock_attached(target, engine);
+	if (unlikely(IS_ERR(utrace)))
+		return PTR_ERR(utrace);
+
+	ret = 0;
+	signal = utrace->u.live.signal;
+	if (unlikely(target->exit_state))
+		ret = -ESRCH;
+	else if (signal == NULL) {
+		ret = -ENOSYS;	/* XXX */
+	}
+	else if (signal->signr != 0)
+		ret = -EAGAIN;
+	else {
+		if (info != signal->info)
+			copy_siginfo(signal->info, info);
+
+		switch (action) {
+		default:
+			ret = -EINVAL;
+			break;
+
+		case UTRACE_SIGNAL_IGN:
+			break;
+
+		case UTRACE_ACTION_RESUME:
+		case UTRACE_SIGNAL_DELIVER:
+			/*
+			 * The handler will run.  We do the SA_ONESHOT work
+			 * here since the normal path will not touch the
+			 * real sigaction when using an injected signal.
+			 */
+			if (ka == NULL)
+				signal->return_ka = NULL;
+			else if (ka != signal->return_ka)
+				*signal->return_ka = *ka;
+			if (ka && ka->sa.sa_flags & SA_ONESHOT) {
+				struct k_sigaction *a;
+				a = &target->sighand->action[info->si_signo-1];
+				spin_lock_irq(&target->sighand->siglock);
+				a->sa.sa_handler = SIG_DFL;
+				spin_unlock_irq(&target->sighand->siglock);
+			}
+			signal->signr = info->si_signo;
+			break;
+
+		case UTRACE_SIGNAL_TSTP:
+			signal->signr = SIGTSTP;
+			spin_lock_irq(&target->sighand->siglock);
+			target->signal->flags |= SIGNAL_STOP_DEQUEUED;
+			spin_unlock_irq(&target->sighand->siglock);
+			signal->return_ka->sa.sa_handler = SIG_DFL;
+			break;
+
+		case UTRACE_SIGNAL_STOP:
+			signal->signr = SIGSTOP;
+			spin_lock_irq(&target->sighand->siglock);
+			target->signal->flags |= SIGNAL_STOP_DEQUEUED;
+			spin_unlock_irq(&target->sighand->siglock);
+			signal->return_ka->sa.sa_handler = SIG_DFL;
+			break;
+
+		case UTRACE_SIGNAL_TERM:
+			signal->signr = SIGTERM;
+			signal->return_ka->sa.sa_handler = SIG_DFL;
+			break;
+
+		case UTRACE_SIGNAL_CORE:
+			signal->signr = SIGQUIT;
+			signal->return_ka->sa.sa_handler = SIG_DFL;
+			break;
+		}
+	}
+
+	spin_unlock(&utrace->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(utrace_inject_signal);
+
+/**
+ * utrace_regset - Prepare to access a thread's machine state.
+ * @target: thread to examine
+ * @engine: engine attached to @target
+ * @view: &struct utrace_regset_view providing machine state description
+ * @which: index into regsets provided by @view
+ *
+ * Prepare to access thread's machine state,
+ * see &struct utrace_regset in <linux/tracehook.h>.
+ * The given thread must be quiescent (or the current thread).  When this
+ * returns, the &struct utrace_regset calls may be used to interrogate or
+ * change the thread's state.  Do not cache the returned pointer when the
+ * thread can resume.  You must call utrace_regset() to ensure that
+ * context switching has completed and consistent state is available.
+ */
+const struct utrace_regset *
+utrace_regset(struct task_struct *target,
+	      struct utrace_attached_engine *engine,
+	      const struct utrace_regset_view *view, int which)
+{
+	if (unlikely((unsigned) which >= view->n))
+		return NULL;
+
+	if (target != current)
+		wait_task_inactive(target);
+
+	return &view->regsets[which];
+}
+EXPORT_SYMBOL_GPL(utrace_regset);
+
+/*
+ * This is declared in linux/tracehook.h and defined in machine-dependent
+ * code.  We put the export here to ensure no machine forgets it.
+ */
+EXPORT_SYMBOL_GPL(utrace_native_view);
+
+
+/**
+ * utrace_tracer_task - Find the task using ptrace on this one.
+ * @target: task in question
+ *
+ * Return the &struct task_struct for the task using ptrace on this one,
+ * or %NULL.  Must be called with rcu_read_lock() held to keep the returned
+ * struct alive.
+ *
+ * At exec time, this may be called with task_lock() still held from when
+ * tracehook_unsafe_exec() was just called.  In that case it must give
+ * results consistent with those unsafe_exec() results, i.e. non-%NULL if
+ * any %LSM_UNSAFE_PTRACE_* bits were set.
+ *
+ * The value is also used to display after "TracerPid:" in /proc/PID/status,
+ * where it is called with only rcu_read_lock() held.
+ */
+struct task_struct *
+utrace_tracer_task(struct task_struct *target)
+{
+	struct utrace *utrace;
+	struct task_struct *tracer = NULL;
+
+	utrace = rcu_dereference(target->utrace);
+	if (utrace != NULL) {
+		struct list_head *pos, *next;
+		struct utrace_attached_engine *engine;
+		const struct utrace_engine_ops *ops;
+		list_for_each_safe_rcu(pos, next, &utrace->engines) {
+			engine = list_entry(pos, struct utrace_attached_engine,
+					    entry);
+			ops = rcu_dereference(engine->ops);
+			if (ops->tracer_task) {
+				tracer = (*ops->tracer_task)(engine, target);
+				if (tracer != NULL)
+					break;
+			}
+		}
+	}
+
+	return tracer;
+}
+
+int
+utrace_allow_access_process_vm(struct task_struct *target)
+{
+	struct utrace *utrace;
+	int ret = 0;
+
+	rcu_read_lock();
+	utrace = rcu_dereference(target->utrace);
+	if (utrace != NULL) {
+		struct list_head *pos, *next;
+		struct utrace_attached_engine *engine;
+		const struct utrace_engine_ops *ops;
+		list_for_each_safe_rcu(pos, next, &utrace->engines) {
+			engine = list_entry(pos, struct utrace_attached_engine,
+					    entry);
+			ops = rcu_dereference(engine->ops);
+			if (ops->allow_access_process_vm) {
+				ret = (*ops->allow_access_process_vm)(engine,
+								      target,
+								      current);
+				if (ret)
+					break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/*
+ * Called on the current task to return LSM_UNSAFE_* bits implied by tracing.
+ * Called with task_lock() held.
+ */
+int
+utrace_unsafe_exec(struct task_struct *tsk)
+{
+	struct utrace *utrace = tsk->utrace;
+	struct list_head *pos, *next;
+	struct utrace_attached_engine *engine;
+	const struct utrace_engine_ops *ops;
+	int unsafe = 0;
+
+	/* XXX must change for sharing */
+	list_for_each_safe_rcu(pos, next, &utrace->engines) {
+		engine = list_entry(pos, struct utrace_attached_engine, entry);
+		ops = rcu_dereference(engine->ops);
+		if (ops->unsafe_exec)
+			unsafe |= (*ops->unsafe_exec)(engine, tsk);
+	}
+
+	return unsafe;
+}
Index: b/Documentation/utrace.txt
===================================================================
--- /dev/null
+++ b/Documentation/utrace.txt
@@ -0,0 +1,579 @@
+DRAFT DRAFT DRAFT	WORK IN PROGRESS	DRAFT DRAFT DRAFT
+
+This is work in progress and likely to change.
+
+
+	Roland McGrath <roland@redhat.com>
+
+---
+
+		User Debugging Data & Event Rendezvous
+		---- --------- ---- - ----- ----------
+
+See linux/utrace.h for all the declarations used here.
+See also linux/tracehook.h for the utrace_regset declarations.
+
+The UTRACE is infrastructure code for tracing and controlling user
+threads.  This is the foundation for writing tracing engines, which
+can be loadable kernel modules.  The UTRACE interfaces provide three
+basic facilities:
+
+* Thread event reporting
+
+  Tracing engines can request callbacks for events of interest in
+  the thread: signals, system calls, exit, exec, clone, etc.
+
+* Core thread control
+
+  Tracing engines can prevent a thread from running (keeping it in
+  TASK_TRACED state), or make it single-step or block-step (when
+  hardware supports it).  Engines can cause a thread to abort system
+  calls, they change the behaviors of signals, and they can inject
+  signal-style actions at will.
+
+* Thread machine state access
+
+  Tracing engines can read and write a thread's registers and
+  similar per-thread CPU state.
+
+
+	Tracing engines
+	------- -------
+
+The basic actors in UTRACE are the thread and the tracing engine.
+A tracing engine is some body of code that calls into the utrace_*
+interfaces, represented by a struct utrace_engine_ops.  (Usually it's a
+kernel module, though the legacy ptrace support is a tracing engine
+that is not in a kernel module.)  The UTRACE interface operates on
+individual threads (struct task_struct).  If an engine wants to
+treat several threads as a group, that is up to its higher-level
+code.  Using the UTRACE starts out by attaching an engine to a thread.
+
+	struct utrace_attached_engine *
+	utrace_attach(struct task_struct *target, int flags,
+		      const struct utrace_engine_ops *ops, void *data);
+
+Calling utrace_attach is what sets up a tracing engine to trace a
+thread.  Use UTRACE_ATTACH_CREATE in flags, and pass your engine's ops.
+Check the return value with IS_ERR.  If successful, it returns a
+struct pointer that is the handle used in all other utrace_* calls.
+The data argument is stored in the utrace_attached_engine structure,
+for your code to use however it wants.
+
+	int utrace_detach(struct task_struct *target,
+			  struct utrace_attached_engine *engine);
+
+The utrace_detach call removes an engine from a thread.
+No more callbacks will be made after this returns success.
+
+
+An attached engine does nothing by default.
+An engine makes something happen by setting its flags.
+
+	int utrace_set_flags(struct task_struct *target,
+			     struct utrace_attached_engine *engine,
+			     unsigned long flags);
+
+The synchronization issues related to these two calls
+are discussed further below in "Teardown Races".
+
+
+	Action Flags
+	------ -----
+
+There are two kinds of flags that an attached engine can set: event
+flags, and action flags.  Event flags register interest in particular
+events; when an event happens and an engine has the right event flag
+set, it gets a callback.  Action flags change the normal behavior of
+the thread.  The action flags available are:
+
+	UTRACE_ACTION_QUIESCE
+
+		The thread will stay quiescent (see below).  As long as
+		any engine asserts the QUIESCE action flag, the thread
+		will not resume running in user mode.  (Usually it will
+		be in TASK_TRACED state.)  Nothing will wake the thread
+		up except for SIGKILL (and implicit SIGKILLs such as a
+		core dump in another thread sharing the same address
+		space, or a group exit, fatal signal, or exec in another
+		thread in the same thread group).
+
+	UTRACE_ACTION_SINGLESTEP
+
+		When the thread runs, it will run one instruction and
+		then trap.  (Exiting a system call or entering a signal
+		handler is considered "an instruction" for this.)  This
+		is available on most machines.  This can be used only if
+		ARCH_HAS_SINGLE_STEP is #define'd by <asm/tracehook.h>
+		and evaluates to nonzero.
+
+	UTRACE_ACTION_BLOCKSTEP
+
+		When the thread runs, it will run until the next branch
+		taken, and then trap.  (Exiting a system call or
+		entering a signal handler is considered taking a branch
+		for this.)  When the SINGLESTEP flag is set, BLOCKSTEP
+		has no effect.  This is only available on some machines.
+		This can be used only if ARCH_HAS_BLOCK_STEP is
+		#define'd by <asm/tracehook.h> and evaluates to nonzero.
+
+	UTRACE_ACTION_NOREAP
+
+		When the thread exits or stops for job control, its
+		parent process will not receive a SIGCHLD and the
+		parent's wait calls will not wake up or report the child
+		as dead.  Even a self-reaping thread will remain a
+		zombie.  Note that this cannot prevent the reaping done
+		when an exec is done by another thread in the same
+		thread group; in that event, a REAP event (and callback
+		if requested) will happen regardless of this flag.
+		A well-behaved tracing engine does not want to interfere
+		with the parent's normal notifications.  This is
+		provided mainly for the ptrace compatibility code to
+		implement the traditional behavior.
+
+Event flags are specified using the macro UTRACE_EVENT(TYPE).
+Each event type is associated with a report_* callback in struct
+utrace_engine_ops.  A tracing engine can leave unused callbacks NULL.
+The only callbacks required are those used by the event flags it sets.
+
+Many engines can be attached to each thread.  When a thread has an
+event, each engine gets a report_* callback if it has set the event flag
+for that event type.  Engines are called in the order they attached.
+
+Each callback takes arguments giving the details of the particular
+event.  The first two arguments two every callback are the struct
+utrace_attached_engine and struct task_struct pointers for the engine
+and the thread producing the event.  Usually this will be the current
+thread that is running the callback functions.
+
+The return value of report_* callbacks is a bitmask.  Some bits are
+common to all callbacks, and some are particular to that callback and
+event type.  The value zero (UTRACE_ACTION_RESUME) always means the
+simplest thing: do what would have happened with no tracing engine here.
+These are the flags that can be set in any report_* return value:
+
+	UTRACE_ACTION_NEWSTATE
+
+		Update the action state flags, described above.  Those
+		bits from the return value (UTRACE_ACTION_STATE_MASK)
+		replace those bits in the engine's flags.  This has the
+		same effect as calling utrace_set_flags, but is a more
+		efficient short-cut.  To change the event flags, you must
+		call utrace_set_flags.
+
+	UTRACE_ACTION_DETACH
+
+		Detach this engine.  This has the effect of calling
+		utrace_detach, but is a more efficient short-cut.
+
+	UTRACE_ACTION_HIDE
+
+		Hide this event from other tracing engines.  This is
+		only appropriate to do when the event was induced by
+		some action of this engine, such as a breakpoint trap.
+		Some events cannot be hidden, since every engine has to
+		know about them: exit, death, reap.
+
+The return value bits in UTRACE_ACTION_OP_MASK indicate a change to the
+normal behavior of the event taking place.  If zero, the thread does
+whatever that event normally means.  For report_signal, other values
+control the disposition of the signal.
+
+
+	Quiescence
+	----------
+
+To control another thread and access its state, it must be "quiescent".
+This means that it is stopped and won't start running again while we access
+it.  A quiescent thread is stopped in a place close to user mode, where the
+user state can be accessed safely; either it's about to return to user
+mode, or it's just entered the kernel from user mode, or it has already
+finished exiting (EXIT_ZOMBIE).  Setting the UTRACE_ACTION_QUIESCE action
+flag will force the attached thread to become quiescent soon.  After
+setting the flag, an engine must wait for an event callback when the thread
+becomes quiescent.  The thread may be running on another CPU, or may be in
+an uninterruptible wait.  When it is ready to be examined, it will make
+callbacks to engines that set the UTRACE_EVENT(QUIESCE) event flag.
+
+As long as some engine has UTRACE_ACTION_QUIESCE set, then the thread will
+remain stopped.  SIGKILL will wake it up, but it will not run user code.
+When the flag is cleared via utrace_set_flags or a callback return value,
+the thread starts running again.  (See also "Teardown Races", below.)
+
+During the event callbacks (report_*), the thread in question makes the
+callback from a safe place.  It is not quiescent, but it can safely access
+its own state.  Callbacks can access thread state directly without setting
+the QUIESCE action flag.  If a callback does want to prevent the thread
+from resuming normal execution, it *must* use the QUIESCE action state
+rather than simply blocking; see "Core Events & Callbacks", below.
+
+
+	Thread control
+	------ -------
+
+These calls must be made on a quiescent thread (or the current thread):
+
+	int utrace_inject_signal(struct task_struct *target,
+				 struct utrace_attached_engine *engine,
+				 u32 action, siginfo_t *info,
+				 const struct k_sigaction *ka);
+
+Cause a specified signal delivery in the target thread.  This is not
+like kill, which generates a signal to be dequeued and delivered later.
+Injection directs the thread to deliver a signal now, before it next
+resumes in user mode or dequeues any other pending signal.  It's as if
+the tracing engine intercepted a signal event and its report_signal
+callback returned the action argument as its value (see below).  The
+info and ka arguments serve the same purposes as their counterparts in
+a report_signal callback.
+
+	const struct utrace_regset *
+	utrace_regset(struct task_struct *target,
+		      struct utrace_attached_engine *engine,
+		      const struct utrace_regset_view *view,
+		      int which);
+
+Get access to machine state for the thread.  The struct utrace_regset_view
+indicates a view of machine state, corresponding to a user mode
+architecture personality (such as 32-bit or 64-bit versions of a machine).
+The which argument selects one of the register sets available in that view.
+The utrace_regset call must be made before accessing any machine state,
+each time the thread has been running and has then become quiescent.
+It ensures that the thread's state is ready to be accessed, and returns
+the struct utrace_regset giving its accessor functions.
+
+XXX needs front ends for argument checks, export utrace_native_view
+
+
+	Core Events & Callbacks
+	---- ------ - ---------
+
+Event reporting callbacks have details particular to the event type, but
+are all called in similar environments and have the same constraints.
+Callbacks are made from safe spots, where no locks are held, no special
+resources are pinned, and the user-mode state of the thread is accessible.
+So, callback code has a pretty free hand.  But to be a good citizen,
+callback code should never block for long periods.  It is fine to block in
+kmalloc and the like, but never wait for i/o or for user mode to do
+something.  If you need the thread to wait, set UTRACE_ACTION_QUIESCE and
+return from the callback quickly.  When your i/o finishes or whatever, you
+can use utrace_set_flags to resume the thread.
+
+Well-behaved callbacks are important to maintain two essential properties
+of the interface.  The first of these is that unrelated tracing engines not
+interfere with each other.  If your engine's event callback does not return
+quickly, then another engine won't get the event notification in a timely
+manner.  The second important property is that tracing be as noninvasive as
+possible to the normal operation of the system overall and of the traced
+thread in particular.  That is, attached tracing engines should not perturb
+a thread's behavior, except to the extent that changing its user-visible
+state is explicitly what you want to do.  (Obviously some perturbation is
+unavoidable, primarily timing changes, ranging from small delays due to the
+overhead of tracing, to arbitrary pauses in user code execution when a user
+stops a thread with a debugger for examination.  When doing asynchronous
+utrace_attach to a thread doing a system call, more troublesome side
+effects are possible.)  Even when you explicitly want the pertrubation of
+making the traced thread block, just blocking directly in your callback has
+more unwanted effects.  For example, the CLONE event callbacks are called
+when the new child thread has been created but not yet started running; the
+child can never be scheduled until the CLONE tracing callbacks return.
+(This allows engines tracing the parent to attach to the child.)  If a
+CLONE event callback blocks the parent thread, it also prevents the child
+thread from running (even to process a SIGKILL).  If what you want is to
+make both the parent and child block, then use utrace_attach on the child
+and then set the QUIESCE action state flag on both threads.  A more crucial
+problem with blocking in callbacks is that it can prevent SIGKILL from
+working.  A thread that is blocking due to UTRACE_ACTION_QUIESCE will still
+wake up and die immediately when sent a SIGKILL, as all threads should.
+Relying on the utrace infrastructure rather than on private synchronization
+calls in event callbacks is an important way to help keep tracing robustly
+noninvasive.
+
+
+EVENT(REAP)		Dead thread has been reaped
+Callback:
+	void (*report_reap)(struct utrace_attached_engine *engine,
+			    struct task_struct *tsk);
+
+This means the parent called wait, or else this was a detached thread or
+a process whose parent ignores SIGCHLD.  This cannot happen while the
+UTRACE_ACTION_NOREAP flag is set.  This is the only callback you are
+guaranteed to get (if you set the flag; but see "Teardown Races", below).
+
+Unlike other callbacks, this can be called from the parent's context
+rather than from the traced thread itself--it must not delay the parent by
+blocking.  This callback is different from all others, it returns void.
+Once you get this callback, your engine is automatically detached and you
+cannot access this thread or use this struct utrace_attached_engine handle
+any longer.  This is the place to clean up your data structures and
+synchronize with your code that might try to make utrace_* calls using this
+engine data structure.  The struct is still valid during this callback,
+but will be freed soon after it returns (via RCU).
+
+In all other callbacks, the return value is as described above.
+The common UTRACE_ACTION_* flags in the return value are always observed.
+Unless otherwise specified below, other bits in the return value are ignored.
+
+
+EVENT(QUIESCE)		Thread is quiescent
+Callback:
+	u32 (*report_quiesce)(struct utrace_attached_engine *engine,
+			      struct task_struct *tsk);
+
+This is the least interesting callback.  It happens at any safe spot,
+including after any other event callback.  This lets the tracing engine
+know that it is safe to access the thread's state, or to report to users
+that it has stopped running user code.
+
+EVENT(CLONE)		Thread is creating a child
+Callback:
+	u32 (*report_clone)(struct utrace_attached_engine *engine,
+			    struct task_struct *parent,
+			    unsigned long clone_flags,
+			    struct task_struct *child);
+
+A clone/clone2/fork/vfork system call has succeeded in creating a new
+thread or child process.  The new process is fully formed, but not yet
+running.  During this callback, other tracing engines are prevented from
+using utrace_attach asynchronously on the child, so that engines tracing
+the parent get the first opportunity to attach.  After this callback
+returns, the child will start and the parent's system call will return.
+If CLONE_VFORK is set, the parent will block before returning.
+
+EVENT(VFORK_DONE)	Finished waiting for CLONE_VFORK child
+Callback:
+	u32 (*report_vfork_done)(struct utrace_attached_engine *engine,
+				 struct task_struct *parent, pid_t child_pid);
+
+Event reported for parent using CLONE_VFORK or vfork system call.
+The child has died or exec'd, so the vfork parent has unblocked
+and is about to return child_pid.
+
+UTRACE_EVENT(EXEC)		Completed exec
+Callback:
+	u32 (*report_exec)(struct utrace_attached_engine *engine,
+			   struct task_struct *tsk,
+			   const struct linux_binprm *bprm,
+			   struct pt_regs *regs);
+
+An execve system call has succeeded and the new program is about to
+start running.  The initial user register state is handy to be tweaked
+directly, or utrace_regset can be used for full machine state access.
+
+UTRACE_EVENT(EXIT)		Thread is exiting
+Callback:
+	u32 (*report_exit)(struct utrace_attached_engine *engine,
+			   struct task_struct *tsk,
+			   long orig_code, long *code);
+
+The thread is exiting and cannot be prevented from doing so, but all its
+state is still live.  The *code value will be the wait result seen by
+the parent, and can be changed by this engine or others.  The orig_code
+value is the real status, not changed by any tracing engine.
+
+UTRACE_EVENT(DEATH)		Thread has finished exiting
+Callback:
+	u32 (*report_death)(struct utrace_attached_engine *engine,
+			    struct task_struct *tsk);
+
+The thread is really dead now.  If the UTRACE_ACTION_NOREAP flag remains
+set after this callback, it remains an unreported zombie; If the flag was
+not set already, then it is too late to set it now--its parent has already
+been sent SIGCHLD.  Otherwise, it might be reaped by its parent, or
+self-reap immediately.  Though the actual reaping may happen in parallel, a
+report_reap callback will always be ordered after a report_death callback.
+
+UTRACE_EVENT(SYSCALL_ENTRY)	Thread has entered kernel for a system call
+Callback:
+	u32 (*report_syscall_entry)(struct utrace_attached_engine *engine,
+				    struct task_struct *tsk,
+				    struct pt_regs *regs);
+
+The system call number and arguments can be seen and modified in the
+registers.  The return value register has -ENOSYS, which will be
+returned for an invalid system call.  The macro tracehook_abort_syscall(regs)
+will abort the system call so that we go immediately to syscall exit,
+and return -ENOSYS (or whatever the register state is changed to).  If
+tracing enginges keep the thread quiescent here, the system call will
+not be performed until it resumes.
+
+UTRACE_EVENT(SYSCALL_EXIT)	Thread is leaving kernel after a system call
+Callback:
+	u32 (*report_syscall_exit)(struct utrace_attached_engine *engine,
+				   struct task_struct *tsk,
+				   struct pt_regs *regs);
+
+The return value can be seen and modified in the registers.  If the
+thread is allowed to resume, it will see any pending signals and then
+return to user mode.
+
+UTRACE_EVENT(SIGNAL)		Signal caught by user handler
+UTRACE_EVENT(SIGNAL_IGN)		Signal with no effect (SIG_IGN or default)
+UTRACE_EVENT(SIGNAL_STOP)	Job control stop signal
+UTRACE_EVENT(SIGNAL_TERM)	Fatal termination signal
+UTRACE_EVENT(SIGNAL_CORE)	Fatal core-dump signal
+UTRACE_EVENT_SIGNAL_ALL		All of the above (bitmask)
+Callback:
+	u32 (*report_signal)(struct utrace_attached_engine *engine,
+			     struct task_struct *tsk,
+			     u32 action, siginfo_t *info,
+			     const struct k_sigaction *orig_ka,
+			     struct k_sigaction *return_ka);
+
+There are five types of signal events, but all use the same callback.
+These happen when a thread is dequeuing a signal to be delivered.
+(Not immediately when the signal is sent, and not when the signal is
+blocked.)  No signal event is reported for SIGKILL; no tracing engine
+can prevent it from killing the thread immediately.  The specific
+event types allow an engine to trace signals based on what they do.
+UTRACE_EVENT_SIGNAL_ALL is all of them OR'd together, to trace all
+signals (except SIGKILL).  A subset of these event flags can be used
+e.g. to catch only fatal signals, not handled ones, or to catch only
+core-dump signals, not normal termination signals.
+
+The action argument says what the signal's default disposition is:
+
+	UTRACE_SIGNAL_DELIVER	Run the user handler from sigaction.
+	UTRACE_SIGNAL_IGN	Do nothing, ignore the signal.
+	UTRACE_SIGNAL_TERM	Terminate the process.
+	UTRACE_SIGNAL_CORE	Terminate the process a write a core dump.
+	UTRACE_SIGNAL_STOP	Absolutely stop the process, a la SIGSTOP.
+	UTRACE_SIGNAL_TSTP	Job control stop (no stop if orphaned).
+
+This selection is made from consulting the process's sigaction and the
+default action for the signal number, but may already have been changed by
+an earlier tracing engine (in which case you see its override).  A return
+value of UTRACE_ACTION_RESUME means to carry out this action.  If instead
+UTRACE_SIGNAL_* bits are in the return value, that overrides the normal
+behavior of the signal.
+
+The signal number and other details of the signal are in info, and
+this data can be changed to make the thread see a different signal.
+A return value of UTRACE_SIGNAL_DELIVER says to follow the sigaction in
+return_ka, which can specify a user handler or SIG_IGN to ignore the
+signal or SIG_DFL to follow the default action for info->si_signo.
+The orig_ka parameter shows the process's sigaction at the time the
+signal was dequeued, and return_ka initially contains this.  Tracing
+engines can modify return_ka to change the effects of delivery.
+For other UTRACE_SIGNAL_* return values, return_ka is ignored.
+
+UTRACE_SIGNAL_HOLD is a flag bit that can be OR'd into the return
+value.  It says to push the signal back on the thread's queue, with
+the signal number and details possibly changed in info.  When the
+thread is allowed to resume, it will dequeue and report it again.
+
+
+	Teardown Races
+	-------- -----
+
+Ordinarily synchronization issues for tracing engines are kept fairly
+straightforward by using quiescence (see above): you make a thread
+quiescent and then once it makes the report_quiesce callback it cannot
+do anything else that would result in another callback, until you let
+it.  This simple arrangement avoids complex and error-prone code in
+each one of a tracing engine's event callbacks to keep them serialized
+with the engine's other operations done on that thread from another
+thread of control.  However, giving tracing engines complete power to
+keep a traced thread stuck in place runs afoul of a more important
+kind of simplicity that the kernel overall guarantees: nothing can
+prevent or delay SIGKILL from making a thread die and release its
+resources.  To preserve this important property of SIGKILL, it as a
+special case can break quiescence like nothing else normally can.
+This includes both explicit SIGKILL signals and the implicit SIGKILL
+sent to each other thread in the same thread group by a thread doing
+an exec, or processing a fatal signal, or making an exit_group system
+call.  A tracing engine can prevent a thread from beginning the exit
+or exec or dying by signal (other than SIGKILL) if it is attached to
+that thread, but once the operation begins, no tracing engine can
+prevent or delay all other threads in the same thread group dying.
+
+As described above, the report_reap callback is always the final event
+in the life cycle of a traced thread.  Tracing engines can use this as
+the trigger to clean up their own data structures.  The report_death
+callback is always the penultimate event a tracing engine might see,
+except when the thread was already in the midst of dying when the
+engine attached.  Many tracing engines will have no interest in when a
+parent reaps a dead process, and nothing they want to do with a zombie
+thread once it dies; for them, the report_death callback is the
+natural place to clean up data structures and detach.  To facilitate
+writing such engines robustly, given the asynchrony of SIGKILL, and
+without error-prone manual implementation of synchronization schemes,
+the utrace infrastructure provides some special guarantees about the
+report_death and report_reap callbacks.  It still takes some care to
+be sure your tracing engine is robust to teardown races, but these
+rules make it reasonably straightforward and concise to handle a lot
+of corner cases correctly.
+
+The first sort of guarantee concerns the core data structures
+themselves.  struct utrace_attached_engine is allocated using RCU, as
+is task_struct.  If you call utrace_attach under rcu_read_lock, then
+the pointer it returns will always be valid while in the RCU critical
+section.  (Note that utrace_attach can block doing memory allocation,
+so you must consider the real critical section to start when
+utrace_attach returns.  utrace_attach can never block when not given
+the UTRACE_ATTACH_CREATE flag bit).  Conversely, you can call
+utrace_attach outside of rcu_read_lock and though the pointer can
+become stale asynchronously if the thread dies and is reaped, you can
+safely pass it to a subsequent utrace_set_flags or utrace_detach call
+and will just get an -ESRCH error return.  However, you must be sure
+the task_struct remains valid, either via get_task_struct or via RCU.
+The utrace infrastructure never holds task_struct references of its
+own.  Though neither rcu_read_lock nor any other lock is held while
+making a callback, it's always guaranteed that the task_struct and
+the struct utrace_attached_engine passed as arguments remain valid
+until the callback function returns.
+
+The second guarantee is the serialization of death and reap event
+callbacks for a given thread.  The actual reaping by the parent
+(release_task call) can occur simultaneously while the thread is
+still doing the final steps of dying, including the report_death
+callback.  If a tracing engine has requested both DEATH and REAP
+event reports, it's guaranteed that the report_reap callback will not
+be made until after the report_death callback has returned.  If the
+report_death callback itself detaches from the thread (with
+utrace_detach or with UTRACE_ACTION_DETACH in its return value), then
+the report_reap callback will never be made.  Thus it is safe for a
+report_death callback to clean up data structures and detach.
+
+The final sort of guarantee is that a tracing engine will know for
+sure whether or not the report_death and/or report_reap callbacks
+will be made for a certain thread.  These teardown races are
+disambiguated by the error return values of utrace_set_flags and
+utrace_detach.  Normally utrace_detach returns zero, and this means
+that no more callbacks will be made.  If the thread is in the midst
+of dying, utrace_detach returns -EALREADY to indicate that the
+report_death callback may already be in progress; when you get this
+error, you know that any cleanup your report_death callback does is
+about to happen or has just happened--note that if the report_death
+callback does not detach, the engine remains attached until the
+thread gets reaped.  If the thread is in the midst of being reaped,
+utrace_detach returns -ESRCH to indicate that the report_reap
+callback may already be in progress; this means the engine is
+implicitly detached when the callback completes.  This makes it
+possible for a tracing engine that has decided asynchronously to
+detach from a thread to safely clean up its data structures, knowing
+that no report_death or report_reap callback will try to do the
+same.  utrace_detach returns -ESRCH when the struct
+utrace_attached_engine has already been detached, but is still a
+valid pointer because of rcu_read_lock.  If RCU is used properly, a
+tracing engine can use this to safely synchronize its own
+independent multiple threads of control with each other and with its
+event callbacks that detach.
+
+In the same vein, utrace_set_flags normally returns zero; if the
+target thread was quiescent before the call, then after a successful
+call, no event callbacks not requested in the new flags will be made,
+and a report_quiesce callback will always be made if requested.  It
+fails with -EALREADY if you try to clear UTRACE_EVENT(DEATH) when the
+report_death callback may already have begun, if you try to clear
+UTRACE_EVENT(REAP) when the report_reap callback may already have
+begun, if you try to newly set UTRACE_ACTION_NOREAP when the target
+may already have sent its parent SIGCHLD, or if you try to newly set
+UTRACE_EVENT(DEATH), UTRACE_EVENT(QUIESCE), or UTRACE_ACTION_QUIESCE,
+when the target is already dead or dying.  Like utrace_detach, it
+returns -ESRCH when the thread has already been detached (including
+forcible detach on reaping).  This lets the tracing engine know for
+sure which event callbacks it will or won't see after utrace_set_flags
+has returned.  By checking for errors, it can know whether to clean up
+its data structures immediately or to let its callbacks do the work.
Index: b/Documentation/DocBook/Makefile
===================================================================
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -9,7 +9,7 @@
 DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
 	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
 	    procfs-guide.xml writing_usb_driver.xml \
-	    kernel-api.xml filesystems.xml lsm.xml usb.xml \
+	    kernel-api.xml filesystems.xml lsm.xml utrace.xml usb.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml
 
Index: b/Documentation/DocBook/utrace.tmpl
===================================================================
--- /dev/null
+++ b/Documentation/DocBook/utrace.tmpl
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="utrace">
+ <bookinfo>
+  <title>The utrace User Debugging Infrastructure</title>
+ </bookinfo>
+
+<toc></toc>
+
+<chapter><title>The utrace core API</title>
+!Iinclude/linux/utrace.h
+!Ekernel/utrace.c
+    </chapter>
+
+<chapter><title>Machine state access via utrace</title>
+!Finclude/linux/tracehook.h struct utrace_regset
+!Finclude/linux/tracehook.h struct utrace_regset_view
+!Finclude/linux/tracehook.h utrace_native_view
+    </chapter>
+
+</book>
Index: b/include/linux/tracehook.h
===================================================================
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -28,6 +28,7 @@
 
 #include <linux/sched.h>
 #include <linux/uaccess.h>
+#include <linux/utrace.h>
 struct linux_binprm;
 struct pt_regs;
 
@@ -342,6 +343,7 @@ utrace_regset_copyin_ignore(unsigned int
  */
 static inline void tracehook_init_task(struct task_struct *child)
 {
+	utrace_init_task(child);
 }
 
 /*
@@ -350,6 +352,9 @@ static inline void tracehook_init_task(s
  */
 static inline void tracehook_release_task(struct task_struct *p)
 {
+	smp_mb();
+	if (tsk_utrace_struct(p) != NULL)
+		utrace_release_task(p);
 }
 
 /*
@@ -360,7 +365,20 @@ static inline void tracehook_release_tas
  */
 static inline int tracehook_check_released(struct task_struct *p)
 {
-	return 0;
+	int bad = 0;
+	BUG_ON(p->exit_state != EXIT_DEAD);
+	if (unlikely(tsk_utrace_struct(p) != NULL)) {
+		/*
+		 * In a race condition, utrace_attach will temporarily set
+		 * it, but then check p->exit_state and clear it.  It does
+		 * all this under task_lock, so we take the lock to check
+		 * that there is really a bug and not just that known race.
+		 */
+		task_lock(p);
+		bad = unlikely(tsk_utrace_struct(p) != NULL);
+		task_unlock(p);
+	}
+	return bad;
 }
 
 /*
@@ -371,7 +389,7 @@ static inline int tracehook_check_releas
 static inline int tracehook_notify_cldstop(struct task_struct *tsk,
 					   const siginfo_t *info)
 {
-	return 0;
+	return (tsk_utrace_flags(tsk) & UTRACE_ACTION_NOREAP);
 }
 
 /*
@@ -385,7 +403,11 @@ static inline int tracehook_notify_cldst
 static inline int tracehook_notify_death(struct task_struct *tsk,
 					 int *noreap, void **death_cookie)
 {
-	*death_cookie = NULL;
+	*death_cookie = tsk_utrace_struct(tsk);
+	if (tsk_utrace_flags(tsk) & UTRACE_ACTION_NOREAP) {
+		*noreap = 1;
+		return 1;
+	}
 	*noreap = 0;
 	return 0;
 }
@@ -398,7 +420,8 @@ static inline int tracehook_notify_death
 static inline int tracehook_consider_fatal_signal(struct task_struct *tsk,
 						  int sig)
 {
-	return 0;
+	return (tsk_utrace_flags(tsk) & (UTRACE_EVENT(SIGNAL_TERM)
+					 | UTRACE_EVENT(SIGNAL_CORE)));
 }
 
 /*
@@ -411,7 +434,7 @@ static inline int tracehook_consider_ign
 						    int sig,
 						    void __user *handler)
 {
-	return 0;
+	return (tsk_utrace_flags(tsk) & UTRACE_EVENT(SIGNAL_IGN));
 }
 
 
@@ -422,7 +445,7 @@ static inline int tracehook_consider_ign
  */
 static inline int tracehook_induce_sigpending(struct task_struct *tsk)
 {
-	return 0;
+	return unlikely(tsk_utrace_flags(tsk) & UTRACE_ACTION_QUIESCE);
 }
 
 /*
@@ -437,6 +460,8 @@ static inline int tracehook_get_signal(s
 				       siginfo_t *info,
 				       struct k_sigaction *return_ka)
 {
+	if (unlikely(tsk_utrace_flags(tsk)))
+		return utrace_get_signal(tsk, regs, info, return_ka);
 	return 0;
 }
 
@@ -449,6 +474,8 @@ static inline int tracehook_get_signal(s
  */
 static inline int tracehook_finish_stop(int last_one)
 {
+	if (tsk_utrace_flags(current) & UTRACE_EVENT(JCTL))
+		return utrace_report_jctl(CLD_STOPPED);
 	return 0;
 }
 
@@ -460,7 +487,7 @@ static inline int tracehook_finish_stop(
  */
 static inline int tracehook_inhibit_wait_stopped(struct task_struct *child)
 {
-	return 0;
+	return (tsk_utrace_flags(child) & UTRACE_ACTION_NOREAP);
 }
 
 /*
@@ -470,7 +497,7 @@ static inline int tracehook_inhibit_wait
  */
 static inline int tracehook_inhibit_wait_zombie(struct task_struct *child)
 {
-	return 0;
+	return (tsk_utrace_flags(child) & UTRACE_ACTION_NOREAP);
 }
 
 /*
@@ -480,7 +507,7 @@ static inline int tracehook_inhibit_wait
  */
 static inline int tracehook_inhibit_wait_continued(struct task_struct *child)
 {
-	return 0;
+	return (tsk_utrace_flags(child) & UTRACE_ACTION_NOREAP);
 }
 
 
@@ -490,13 +517,9 @@ static inline int tracehook_inhibit_wait
  */
 static inline int tracehook_unsafe_exec(struct task_struct *tsk)
 {
+	if (tsk_utrace_flags(tsk))
+		return utrace_unsafe_exec(tsk);
 	return 0;
-//	if (p->ptrace & PT_PTRACED) {
-//		if (p->ptrace & PT_PTRACE_CAP)
-//			unsafe |= LSM_UNSAFE_PTRACE_CAP;
-//		else
-//			unsafe |= LSM_UNSAFE_PTRACE;
-//	}
 }
 
 /*
@@ -511,6 +534,8 @@ static inline int tracehook_unsafe_exec(
  */
 static inline struct task_struct *tracehook_tracer_task(struct task_struct *p)
 {
+	if (tsk_utrace_flags(p))
+		return utrace_tracer_task(p);
 	return NULL;
 }
 
@@ -522,6 +547,8 @@ static inline int tracehook_allow_access
 {
 	if (tsk == current)
 		return 1;
+	if (tsk_utrace_flags(tsk))
+		return utrace_allow_access_process_vm(tsk);
 	return 0;
 }
 
@@ -533,7 +560,7 @@ static inline int tracehook_allow_access
  */
 static inline int tracehook_expect_breakpoints(struct task_struct *tsk)
 {
-	return 0;
+	return (tsk_utrace_flags(tsk) & UTRACE_EVENT(SIGNAL_CORE));
 }
 
 
@@ -556,6 +583,10 @@ static inline int tracehook_expect_break
 static inline void tracehook_report_death(struct task_struct *tsk,
 					  int exit_state, void *death_cookie)
 {
+	smp_mb();
+	if (tsk_utrace_flags(tsk) & (UTRACE_EVENT(DEATH)
+				     | UTRACE_EVENT(QUIESCE)))
+		utrace_report_death(tsk, death_cookie);
 }
 
 /*
@@ -565,14 +596,18 @@ static inline void tracehook_report_deat
  */
 static inline void tracehook_report_delayed_group_leader(struct task_struct *p)
 {
+	utrace_report_delayed_group_leader(p);
 }
 
 /*
- * exec completed
+ * exec completed, we are shortly going to return to user mode.
+ * The freshly initialized register state can be seen and changed here.
  */
 static inline void tracehook_report_exec(struct linux_binprm *bprm,
 					 struct pt_regs *regs)
 {
+	if (tsk_utrace_flags(current) & UTRACE_EVENT(EXEC))
+		utrace_report_exec(bprm, regs);
 }
 
 /*
@@ -581,6 +616,8 @@ static inline void tracehook_report_exec
  */
 static inline void tracehook_report_exit(long *exit_code)
 {
+	if (tsk_utrace_flags(current) & UTRACE_EVENT(EXIT))
+		utrace_report_exit(exit_code);
 }
 
 /*
@@ -595,6 +632,8 @@ static inline void tracehook_report_exit
 static inline void tracehook_report_clone(unsigned long clone_flags,
 					  struct task_struct *child)
 {
+	if (tsk_utrace_flags(current) & UTRACE_EVENT(CLONE))
+		utrace_report_clone(clone_flags, child);
 }
 
 /*
@@ -608,6 +647,8 @@ static inline void tracehook_report_clon
 						   pid_t pid,
 						   struct task_struct *child)
 {
+	if (tsk_utrace_flags(current) & UTRACE_ACTION_QUIESCE)
+		utrace_quiescent(current, NULL);
 }
 
 /*
@@ -619,6 +660,8 @@ static inline void tracehook_report_clon
 static inline void tracehook_report_vfork_done(struct task_struct *child,
 					       pid_t child_pid)
 {
+	if (tsk_utrace_flags(current) & UTRACE_EVENT(VFORK_DONE))
+		utrace_report_vfork_done(child_pid);
 }
 
 /*
@@ -626,6 +669,9 @@ static inline void tracehook_report_vfor
  */
 static inline void tracehook_report_syscall(struct pt_regs *regs, int is_exit)
 {
+	if (tsk_utrace_flags(current) & (is_exit ? UTRACE_EVENT(SYSCALL_EXIT)
+					 : UTRACE_EVENT(SYSCALL_ENTRY)))
+		utrace_report_syscall(regs, is_exit);
 }
 
 /*
@@ -645,6 +691,11 @@ static inline void tracehook_report_hand
 						  const sigset_t *oldset,
 						  struct pt_regs *regs)
 {
+	struct task_struct *tsk = current;
+	if ((tsk_utrace_flags(tsk) & UTRACE_EVENT_SIGNAL_ALL)
+	    && (tsk_utrace_flags(tsk) & (UTRACE_ACTION_SINGLESTEP
+					 | UTRACE_ACTION_BLOCKSTEP)))
+		utrace_signal_handler_singlestep(tsk, regs);
 }
 
 
Index: b/include/linux/utrace.h
===================================================================
--- /dev/null
+++ b/include/linux/utrace.h
@@ -0,0 +1,544 @@
+/*
+ * utrace infrastructure interface for debugging user processes
+ *
+ * Copyright (C) 2006, 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * Red Hat Author: Roland McGrath.
+ *
+ * This interface allows for notification of interesting events in a thread.
+ * It also mediates access to thread state such as registers.
+ * Multiple unrelated users can be associated with a single thread.
+ * We call each of these a tracing engine.
+ *
+ * A tracing engine starts by calling utrace_attach() on the chosen thread,
+ * passing in a set of hooks (&struct utrace_engine_ops), and some
+ * associated data.  This produces a &struct utrace_attached_engine, which
+ * is the handle used for all other operations.  An attached engine has its
+ * ops vector, its data, and a flags word controlled by utrace_set_flags().
+ *
+ * Each engine's flags word contains two kinds of flags: events of
+ * interest, and action state flags.
+ *
+ * For each event flag that is set, that engine will get the
+ * appropriate ops->report_*() callback when the event occurs.  The
+ * &struct utrace_engine_ops need not provide callbacks for an event
+ * unless the engine sets one of the associated event flags.
+ *
+ * Action state flags change the normal behavior of the thread.
+ * These bits are in %UTRACE_ACTION_STATE_MASK; these can be OR'd into
+ * flags set with utrace_set_flags().  Also, every callback that return
+ * an action value can reset these bits for the engine (see below).
+ *
+ * The bits %UTRACE_ACTION_STATE_MASK of all attached engines are OR'd
+ * together, so each action is in force as long as any engine requests it.
+ * As long as some engine sets the %UTRACE_ACTION_QUIESCE flag, the thread
+ * will block and not resume running user code.  When the last engine
+ * clears its %UTRACE_ACTION_QUIESCE flag, the thread will resume running.
+ */
+
+#ifndef _LINUX_UTRACE_H
+#define _LINUX_UTRACE_H	1
+
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+
+struct linux_binprm;
+struct pt_regs;
+struct utrace;
+struct utrace_signal;
+struct utrace_regset;
+struct utrace_regset_view;
+
+
+/*
+ * Flags in &struct task_struct.utrace_flags and
+ * &struct utrace_attached_engine.flags.
+ * Low four bits are %UTRACE_ACTION_STATE_MASK bits (below).
+ * Higher bits are events of interest.
+ */
+#define UTRACE_FIRST_EVENT	4
+#define UTRACE_EVENT_BITS	(BITS_PER_LONG - UTRACE_FIRST_EVENT)
+#define UTRACE_EVENT_MASK	(-1UL &~ UTRACE_ACTION_STATE_MASK)
+
+enum utrace_events {
+	_UTRACE_EVENT_QUIESCE,	/* Tracing requests stop.  */
+	_UTRACE_EVENT_REAP,  	/* Zombie reaped, no more tracing possible.  */
+	_UTRACE_EVENT_CLONE,	/* Successful clone/fork/vfork just done.  */
+	_UTRACE_EVENT_VFORK_DONE, /* vfork woke from waiting for child.  */
+	_UTRACE_EVENT_EXEC,	/* Successful execve just completed.  */
+	_UTRACE_EVENT_EXIT,	/* Thread exit in progress.  */
+	_UTRACE_EVENT_DEATH,	/* Thread has died.  */
+	_UTRACE_EVENT_SYSCALL_ENTRY, /* User entered kernel for system call. */
+	_UTRACE_EVENT_SYSCALL_EXIT, /* Returning to user after system call.  */
+	_UTRACE_EVENT_SIGNAL,	/* Signal delivery will run a user handler.  */
+	_UTRACE_EVENT_SIGNAL_IGN, /* No-op signal to be delivered.  */
+	_UTRACE_EVENT_SIGNAL_STOP, /* Signal delivery will suspend.  */
+	_UTRACE_EVENT_SIGNAL_TERM, /* Signal delivery will terminate.  */
+	_UTRACE_EVENT_SIGNAL_CORE, /* Signal delivery will dump core.  */
+	_UTRACE_EVENT_JCTL,	/* Job control stop or continue completed.  */
+	_UTRACE_NEVENTS
+};
+#define UTRACE_EVENT_BIT(type)	(UTRACE_FIRST_EVENT + _UTRACE_EVENT_##type)
+#define UTRACE_EVENT(type)	(1UL << UTRACE_EVENT_BIT(type))
+
+/*
+ * All the kinds of signal events.  These all use the report_signal callback.
+ */
+#define UTRACE_EVENT_SIGNAL_ALL	(UTRACE_EVENT(SIGNAL) \
+				 | UTRACE_EVENT(SIGNAL_IGN) \
+				 | UTRACE_EVENT(SIGNAL_STOP) \
+				 | UTRACE_EVENT(SIGNAL_TERM) \
+				 | UTRACE_EVENT(SIGNAL_CORE))
+/*
+ * Both kinds of syscall events; these call the report_syscall_entry and
+ * report_syscall_exit callbacks, respectively.
+ */
+#define UTRACE_EVENT_SYSCALL	\
+	(UTRACE_EVENT(SYSCALL_ENTRY) | UTRACE_EVENT(SYSCALL_EXIT))
+
+
+/*
+ * Action flags, in return value of callbacks.
+ *
+ * %UTRACE_ACTION_RESUME (zero) is the return value to do nothing special.
+ * For each particular callback, some bits in %UTRACE_ACTION_OP_MASK can
+ * be set in the return value to change the thread's behavior (see below).
+ *
+ * If %UTRACE_ACTION_NEWSTATE is set, then the %UTRACE_ACTION_STATE_MASK
+ * bits in the return value replace the engine's flags as in utrace_set_flags
+ * (but the event flags remained unchanged).
+ *
+ * If %UTRACE_ACTION_HIDE is set, then the callbacks to other engines
+ * should be suppressed for this event.  This is appropriate only when
+ * the event was artificially provoked by something this engine did,
+ * such as setting a breakpoint.
+ *
+ * If %UTRACE_ACTION_DETACH is set, this engine is detached as by
+ * utrace_detach().  The action bits in %UTRACE_ACTION_OP_MASK work as
+ * normal, but the engine's %UTRACE_ACTION_STATE_MASK bits will no longer
+ * affect the thread.
+ */
+#define UTRACE_ACTION_RESUME	0x0000 /* Continue normally after event.  */
+#define UTRACE_ACTION_HIDE	0x0010 /* Hide event from other tracing.  */
+#define UTRACE_ACTION_DETACH	0x0020 /* Detach me, state flags ignored.  */
+#define UTRACE_ACTION_NEWSTATE	0x0040 /* Replace state bits.  */
+
+/*
+ * These flags affect the state of the thread until they are changed via
+ * utrace_set_flags() or by the next callback to the same engine that uses
+ * %UTRACE_ACTION_NEWSTATE.
+ */
+#define UTRACE_ACTION_QUIESCE	0x0001 /* Stay quiescent after callbacks.  */
+#define UTRACE_ACTION_SINGLESTEP 0x0002 /* Resume for one instruction.  */
+#define UTRACE_ACTION_BLOCKSTEP 0x0004 /* Resume until next branch.  */
+#define UTRACE_ACTION_NOREAP	0x0008 /* Inhibit parent SIGCHLD and wait.  */
+#define UTRACE_ACTION_STATE_MASK 0x000f /* Lasting state bits.  */
+
+/*
+ * These flags have meanings specific to the particular event report hook.
+ */
+#define UTRACE_ACTION_OP_MASK	0xff00
+
+/*
+ * Action flags in return value and argument of report_signal() callback.
+ */
+#define UTRACE_SIGNAL_DELIVER	0x0100 /* Deliver according to sigaction.  */
+#define UTRACE_SIGNAL_IGN	0x0200 /* Ignore the signal.  */
+#define UTRACE_SIGNAL_TERM	0x0300 /* Terminate the process.  */
+#define UTRACE_SIGNAL_CORE	0x0400 /* Terminate with core dump.  */
+#define UTRACE_SIGNAL_STOP	0x0500 /* Deliver as absolute stop.  */
+#define UTRACE_SIGNAL_TSTP	0x0600 /* Deliver as job control stop.  */
+#define UTRACE_SIGNAL_HOLD	0x1000 /* Flag, push signal back on queue.  */
+/*
+ * This value is passed to a report_signal() callback after a signal
+ * handler is entered while %UTRACE_ACTION_SINGLESTEP is in force.
+ * For this callback, no signal will never actually be delivered regardless
+ * of the return value, and the other callback parameters are null.
+ */
+#define UTRACE_SIGNAL_HANDLER	0x0700
+
+/*
+ * Action flag in return value of report_jctl().
+ */
+#define UTRACE_JCTL_NOSIGCHLD	0x0100 /* Do not notify the parent.  */
+
+
+/*
+ * Flags for utrace_attach().
+ */
+#define UTRACE_ATTACH_CREATE		0x0010 /* Attach a new engine.  */
+#define UTRACE_ATTACH_EXCLUSIVE		0x0020 /* Refuse if existing match.  */
+#define UTRACE_ATTACH_MATCH_OPS		0x0001 /* Match engines on ops.  */
+#define UTRACE_ATTACH_MATCH_DATA	0x0002 /* Match engines on data.  */
+#define UTRACE_ATTACH_MATCH_MASK	0x000f
+
+
+#ifdef CONFIG_UTRACE
+/**
+ * struct utrace_attached_engine - Per-engine per-thread structure.
+ * @ops: &struct utrace_engine_ops pointer passed to utrace_attach()
+ * @data: engine-private void * passed to utrace_attach()
+ * @flags: current flags set by utrace_set_flags()
+ *
+ * The task itself never has to worry about engines detaching while
+ * it's doing event callbacks.  These structures are freed only when
+ * the task is quiescent.  For other parties, the list is protected
+ * by RCU and utrace->lock.
+ */
+struct utrace_attached_engine
+{
+/* private: */
+	struct list_head entry;	/* Entry on thread's utrace.engines list.  */
+	struct rcu_head rhead;
+	atomic_t check_dead;
+
+/* public: */
+	const struct utrace_engine_ops *ops;
+	void *data;
+
+	unsigned long flags;
+};
+
+
+struct utrace_engine_ops
+{
+	/*
+	 * Event reporting hooks.
+	 *
+	 * Return values contain %UTRACE_ACTION_* flag bits.
+	 * The %UTRACE_ACTION_OP_MASK bits are specific to each kind of event.
+	 *
+	 * All report_*() hooks are called with no locks held, in a generally
+	 * safe environment when we will be returning to user mode soon.
+	 * It is fine to block for memory allocation and the like, but all
+	 * hooks are *asynchronous* and must not block on external events.
+	 * If you want the thread to block, request %UTRACE_ACTION_QUIESCE in
+	 * your hook; then later wake it up with utrace_set_flags().
+	 */
+
+	/*
+	 * Event reported for parent, before child might run.
+	 * The %PF_STARTING flag prevents other engines from attaching
+	 * before this one has its chance.
+	 */
+	u32 (*report_clone)(struct utrace_attached_engine *engine,
+			    struct task_struct *parent,
+			    unsigned long clone_flags,
+			    struct task_struct *child);
+
+	/*
+	 * Event reported for parent using %CLONE_VFORK or vfork() system call.
+	 * The child has died or exec'd, so the vfork parent has unblocked
+	 * and is about to return @child_pid.
+	 */
+	u32 (*report_vfork_done)(struct utrace_attached_engine *engine,
+				 struct task_struct *parent, pid_t child_pid);
+
+	/*
+	 * Event reported after %UTRACE_ACTION_QUIESCE is set, when the target
+	 * thread is quiescent.  Either it's the current thread, or it's in
+	 * %TASK_TRACED or %TASK_STOPPED and will not resume running until the
+	 * %UTRACE_ACTION_QUIESCE flag is no longer asserted by any engine.
+	 */
+	u32 (*report_quiesce)(struct utrace_attached_engine *engine,
+			      struct task_struct *tsk);
+
+	/*
+	 * Thread dequeuing a signal to be delivered.
+	 * The @action and @return_ka values say what %UTRACE_ACTION_RESUME
+	 * will do (possibly already influenced by another tracing engine).
+	 * An %UTRACE_SIGNAL_* return value overrides the signal disposition.
+	 * The @info data (including @info->si_signo) can be changed at will.
+	 * Changing @return_ka affects the sigaction that will be used.
+	 * The @orig_ka value is the one in force before other tracing
+	 * engines intervened.
+	 */
+	u32 (*report_signal)(struct utrace_attached_engine *engine,
+			     struct task_struct *tsk,
+			     struct pt_regs *regs,
+			     u32 action, siginfo_t *info,
+			     const struct k_sigaction *orig_ka,
+			     struct k_sigaction *return_ka);
+
+	/*
+	 * Job control event completing, about to send %SIGCHLD to parent
+	 * with %CLD_STOPPED or %CLD_CONTINUED as given in type.
+	 * %UTRACE_JOBSTOP_NOSIGCHLD in the return value inhibits that.
+	 */
+	u32 (*report_jctl)(struct utrace_attached_engine *engine,
+			   struct task_struct *tsk,
+			   int type);
+
+	/*
+	 * Thread has just completed an exec.
+	 * The initial user register state is handy to be tweaked directly.
+	 */
+	u32 (*report_exec)(struct utrace_attached_engine *engine,
+			   struct task_struct *tsk,
+			   const struct linux_binprm *bprm,
+			   struct pt_regs *regs);
+
+	/*
+	 * Thread has entered the kernel to request a system call.
+	 * The user register state is handy to be tweaked directly.
+	 */
+	u32 (*report_syscall_entry)(struct utrace_attached_engine *engine,
+				    struct task_struct *tsk,
+				    struct pt_regs *regs);
+
+	/*
+	 * Thread is about to leave the kernel after a system call request.
+	 * The user register state is handy to be tweaked directly.
+	 */
+	u32 (*report_syscall_exit)(struct utrace_attached_engine *engine,
+				   struct task_struct *tsk,
+				   struct pt_regs *regs);
+
+	/*
+	 * Thread is exiting and cannot be prevented from doing so,
+	 * but all its state is still live.  The @code value will be
+	 * the wait result seen by the parent, and can be changed by
+	 * this engine or others.  The @orig_code value is the real
+	 * status, not changed by any tracing engine.
+	 */
+	u32 (*report_exit)(struct utrace_attached_engine *engine,
+			   struct task_struct *tsk,
+			   long orig_code, long *code);
+
+	/*
+	 * Thread is really dead now.  If %UTRACE_ACTION_NOREAP is in force,
+	 * it remains an unreported zombie.  Otherwise, it might be reaped
+	 * by its parent, or self-reap immediately.  Though the actual
+	 * reaping may happen in parallel, a report_reap() callback will
+	 * always be ordered after a report_death() callback.
+	 *
+	 * If %UTRACE_ACTION_NOREAP is in force and this was a group_leader
+	 * dying with threads still in the group (delayed_group_leader()),
+	 * then there can be a second report_death() callback later when
+	 * the group_leader is no longer delayed.  This second callback can
+	 * be made from another thread's context, but it will always be
+	 * serialized after the first report_death() callback and before
+	 * the report_reap() callback.  It's possible that
+	 * delayed_group_leader() will already be true by the time it can
+	 * be checked inside the first report_death callback made at the
+	 * time of death, but that a second callback will be made almost
+	 * immediately thereafter.
+	 */
+	u32 (*report_death)(struct utrace_attached_engine *engine,
+			    struct task_struct *tsk);
+
+	/*
+	 * Called when someone reaps the dead task (parent, init, or self).
+	 * No more callbacks are made after this one.
+	 * The engine is always detached.
+	 * There is nothing more a tracing engine can do about this thread.
+	 */
+	void (*report_reap)(struct utrace_attached_engine *engine,
+			    struct task_struct *tsk);
+
+	/*
+	 * Miscellaneous hooks.  These are not associated with event reports.
+	 * Any of these may be null if the engine has nothing to say.
+	 * These hooks are called in more constrained environments and should
+	 * not block or do very much.
+	 */
+
+	/*
+	 * Return nonzero iff the @caller task should be allowed to access
+	 * the memory of the target task via /proc/PID/mem and so forth,
+	 * by dint of this engine's attachment to the target.
+	 */
+	int (*allow_access_process_vm)(struct utrace_attached_engine *engine,
+				       struct task_struct *target,
+				       struct task_struct *caller);
+
+	/*
+	 * Return %LSM_UNSAFE_* bits that apply to the exec in progress
+	 * due to tracing done by this engine.  These bits indicate that
+	 * someone is able to examine the process and so a set-UID or similar
+	 * privilege escalation may not be safe to permit.
+	 *
+	 * Called with task_lock() held.
+	 */
+	int (*unsafe_exec)(struct utrace_attached_engine *engine,
+			   struct task_struct *target);
+
+	/*
+	 * Return the &struct task_struct for the task using ptrace on this
+	 * one, or %NULL.  Always called with rcu_read_lock() held to keep the
+	 * returned struct alive.
+	 *
+	 * At exec time, this may be called with task_lock(target) still
+	 * held from when unsafe_exec() was just called.  In that case it
+	 * must give results consistent with those unsafe_exec() results,
+	 * i.e. non-%NULL if any %LSM_UNSAFE_PTRACE_* bits were set.
+	 *
+	 * The value is also used to display after "TracerPid:" in
+	 * /proc/PID/status, where it is called with only rcu_read_lock held.
+	 *
+	 * If this engine returns %NULL, another engine may supply the result.
+	 */
+	struct task_struct *(*tracer_task)(struct utrace_attached_engine *,
+					   struct task_struct *target);
+};
+
+
+/*
+ * These are the exported entry points for tracing engines to use.
+ */
+struct utrace_attached_engine *utrace_attach(struct task_struct *target,
+					     int flags,
+					     const struct utrace_engine_ops *,
+					     void *data);
+int utrace_detach(struct task_struct *target,
+		  struct utrace_attached_engine *engine);
+int utrace_set_flags(struct task_struct *target,
+		     struct utrace_attached_engine *engine,
+		     unsigned long flags);
+int utrace_inject_signal(struct task_struct *target,
+			 struct utrace_attached_engine *engine,
+			 u32 action, siginfo_t *info,
+			 const struct k_sigaction *ka);
+const struct utrace_regset *utrace_regset(struct task_struct *target,
+					  struct utrace_attached_engine *,
+					  const struct utrace_regset_view *,
+					  int which);
+
+
+/*
+ * Hooks in <linux/tracehook.h> call these entry points to the utrace dispatch.
+ */
+int utrace_quiescent(struct task_struct *, struct utrace_signal *);
+void utrace_release_task(struct task_struct *);
+int utrace_get_signal(struct task_struct *, struct pt_regs *,
+		      siginfo_t *, struct k_sigaction *);
+void utrace_report_clone(unsigned long clone_flags, struct task_struct *child);
+void utrace_report_vfork_done(pid_t child_pid);
+void utrace_report_exit(long *exit_code);
+void utrace_report_death(struct task_struct *, struct utrace *);
+void utrace_report_delayed_group_leader(struct task_struct *);
+int utrace_report_jctl(int type);
+void utrace_report_exec(struct linux_binprm *bprm, struct pt_regs *regs);
+void utrace_report_syscall(struct pt_regs *regs, int is_exit);
+struct task_struct *utrace_tracer_task(struct task_struct *);
+int utrace_allow_access_process_vm(struct task_struct *);
+int utrace_unsafe_exec(struct task_struct *);
+void utrace_signal_handler_singlestep(struct task_struct *, struct pt_regs *);
+
+/*
+ * <linux/tracehook.h> uses these accessors to avoid #ifdef CONFIG_UTRACE.
+ */
+static inline unsigned long tsk_utrace_flags(struct task_struct *tsk)
+{
+	return tsk->utrace_flags;
+}
+static inline struct utrace *tsk_utrace_struct(struct task_struct *tsk)
+{
+	return tsk->utrace;
+}
+static inline void utrace_init_task(struct task_struct *child)
+{
+	child->utrace_flags = 0;
+	child->utrace = NULL;
+}
+
+#else  /* !CONFIG_UTRACE */
+
+static unsigned long tsk_utrace_flags(struct task_struct *tsk)
+{
+	return 0;
+}
+static struct utrace *tsk_utrace_struct(struct task_struct *tsk)
+{
+	return NULL;
+}
+static inline void utrace_init_task(struct task_struct *child)
+{
+}
+
+/*
+ * The calls to these should all be in if (0) and optimized out entirely.
+ * We have stubs here only so tracehook.h doesn't need to #ifdef them
+ * to avoid external references in case of unoptimized compilation.
+ */
+static inline int utrace_quiescent(struct task_struct *tsk, void *ignored)
+{
+	BUG();
+	return 0;
+}
+static inline void utrace_release_task(struct task_struct *tsk)
+{
+	BUG();
+}
+static inline int utrace_get_signal(struct task_struct *tsk,
+				    struct pt_regs *regs,
+				    siginfo_t *info, struct k_sigaction *ka)
+{
+	BUG();
+	return 0;
+}
+static inline void utrace_report_clone(unsigned long clone_flags,
+				       struct task_struct *child)
+{
+	BUG();
+}
+static inline void utrace_report_vfork_done(pid_t child_pid)
+{
+	BUG();
+}
+static inline void utrace_report_exit(long *exit_code)
+{
+	BUG();
+}
+static inline void utrace_report_death(struct task_struct *tsk, void *ignored)
+{
+	BUG();
+}
+static inline void utrace_report_delayed_group_leader(struct task_struct *tsk)
+{
+	BUG();
+}
+static inline int utrace_report_jctl(int type)
+{
+	BUG();
+	return 0;
+}
+static inline void utrace_report_exec(struct linux_binprm *bprm,
+				      struct pt_regs *regs)
+{
+	BUG();
+}
+static inline void utrace_report_syscall(struct pt_regs *regs, int is_exit)
+{
+	BUG();
+}
+static inline struct task_struct *utrace_tracer_task(struct task_struct *tsk)
+{
+	BUG();
+	return NULL;
+}
+static inline int utrace_allow_access_process_vm(struct task_struct *tsk)
+{
+	BUG();
+	return 0;
+}
+static inline int utrace_unsafe_exec(struct task_struct *tsk)
+{
+	BUG();
+	return 0;
+}
+static inline void utrace_signal_handler_singlestep(struct task_struct *tsk,
+						    struct pt_regs *regs)
+{
+	BUG();
+}
+
+#endif  /* CONFIG_UTRACE */
+
+#endif	/* linux/utrace.h */
Index: b/include/linux/sched.h
===================================================================
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -942,6 +942,11 @@ struct task_struct {
 	struct audit_context *audit_context;
 	seccomp_t seccomp;
 
+#ifdef CONFIG_UTRACE
+	struct utrace *utrace;
+	unsigned long utrace_flags;
+#endif
+
 /* Thread group tracking */
    	u32 parent_exec_id;
    	u32 self_exec_id;
Index: b/init/Kconfig
===================================================================
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -569,6 +569,24 @@ config STOP_MACHINE
 	  Need stop_machine() primitive.
 endmenu
 
+menu "Process debugging support"
+
+config UTRACE
+	bool "Infrastructure for tracing and debugging user processes"
+	default y
+	depends on MODULES
+	help
+	  Enable the utrace process tracing interface.
+	  This is an internal kernel interface to track events in user
+	  threads, extract and change user thread state.  This interface
+	  is exported to kernel modules, and is also used to implement ptrace.
+	  If you disable this, no facilities for debugging user processes
+	  will be available, nor the facilities used by UML and other
+	  applications.  Unless you are making a specially stripped-down
+	  kernel and are very sure you don't need these facilitiies,
+	  say Y.
+endmenu
+
 menu "Block layer"
 source "block/Kconfig"
 endmenu