OpenBSD manual page server

Manual Page Search Parameters
SYSTRACE(4) Device Drivers Manual SYSTRACE(4)

systraceenforce and generate policies for system calls

pseudo-device systrace [count]

systrace attaches to processes and enforces policies for system calls. A pseudo-device, /dev/systrace, allows userland processes to control the behavior of systrace through an ioctl(2) interface.

systrace can assign the following policies to system calls:

Send a message of the type SYSTR_MSG_ASK, and put the process to sleep until a STRIOCANSWER ioctl(2) is made.
Immediately allow the system call.
Immediately return an error code.
Sends SIGKILL to the traced process.

A read(2) operation on the systrace pseudo-device will block if there are no pending messages, or return the following structure:

struct str_message {
	int msg_type;
#define SYSTR_MSG_ASK		1
#define SYSTR_MSG_RES		2
#define SYSTR_MSG_EMUL		3
#define SYSTR_MSG_CHILD		4
#define SYSTR_MSG_UGID		5
#define SYSTR_MSG_POLICYFREE	6
#define SYSTR_MSG_EXECVE	7
	pid_t msg_pid;
	u_int16_t msg_seqnr;	/* answer has to match seqnr */
	short msg_policy;
	union {
		struct str_msg_emul msg_emul;
		struct str_msg_ugid msg_ugid;
		struct str_msg_ask msg_ask;
		struct str_msg_child msg_child;
		struct str_msg_execve msg_execve;
	} msg_data;
};

struct str_msg_emul {
	char emul[SYSTR_EMULEN];
};

struct str_msg_ugid {
	uid_t uid;
	gid_t gid;
};

struct str_msg_execve {
	char path[MAXPATHLEN];
};

struct str_msg_ask {
	int code;
	int argsize;
	register_t args[SYSTR_MAXARGS];
	register_t rval[2];
	int result;
};

struct str_msg_child {
	pid_t new_pid;		/* -1 if child exited */
};

These messages are all sent to the userland control process.

SYSTR_MSG_ASK
This message is sent whenever the kernel does not have a cached simple policy for system call number code within the currently set emulation.
SYSTR_MSG_RES
This message is sent whenever a system call is flagged with SYSTR_FLAGS_RESULT.
SYSTR_MSG_EMUL
This message is sent whenever the emulation of a process changes.
SYSTR_MSG_CHILD
This message is sent whenever a process gains or loses a child. In the latter case, the event is raised when the child exits, but not when it is reaped.
SYSTR_MSG_UGID
This message is sent whenever the effective UID or GID has changed during the execution of a system call.
SYSTR_MSG_POLICYFREE
This is sent whenever the kernel frees the policy identified by msg_policy.
SYSTR_MSG_EXECVE
This message is sent whenever, before a call to execve(2) a process is privileged (technically, the process has the PS_SUGID or PS_SUGIDEXEC flag set), but after the call these privileges have been dropped. The new image name is specified in the path argument.

systrace supports the following ioctl(2) commands:

int *
Return a systrace file descriptor for further ioctl(2) operations. The returned systrace file descriptor is not inherited by a child created with fork(2). Similarly, they cannot be passed across UNIX-domain sockets.
pid_t *
Attach to a process, unless:
  1. It's the process that's doing the attaching.
  2. It's a system process.
  3. It's being traced already.
  4. You do not own the process and you're not root.
  5. It's init(8), and the kernel was not compiled with option INSECURE.
pid_t *
Wake up a process if it is waiting for an answer, and detach from it.
struct systrace_answer *
Tell systrace what to do with a system call that was assigned a policy of SYSTR_POLICY_ASK.
struct systrace_answer {
	pid_t stra_pid;	    /* PID of process being traced */
	u_int16_t stra_seqnr;
	short reserved;
	uid_t stra_seteuid; /* Elevated privileges for syscall */
	uid_t stra_setegid;
	int stra_policy;    /* Policy to assign */
	int stra_error;	    /* Return value of denied syscall
			       (will return EPERM if zero) */
	int stra_flags;
#define	SYSTR_FLAGS_RESULT  0x001    /* Report syscall result */
#define SYSTR_FLAGS_SETEUID 0x002
#define SYSTR_FLAGS_SETEGID 0x004
};
pid_t *
Report the current emulation a process is using inside the msg_emul structure.
struct systrace_replace *
Arrange for system call arguments to be replaced by arguments supplied by the monitoring process.
struct systrace_replace {
	pid_t strr_pid;
	u_int16_t strr_seqnr;
	int16_t reserved;
	int strr_nrepl;		/* # of arguments to replace */
	caddr_t	strr_base;		/* Base user memory */
	size_t strr_len;		/* Length of memory */
	int strr_argind[SYSTR_MAXARGS];	/* Argument indexes */
	size_t strr_off[SYSTR_MAXARGS];	/* Argument offsets */
	size_t strr_offlen[SYSTR_MAXARGS]; /* Argument sizes */
	int32_t strr_flags[SYSTR_MAXARGS];
};
struct systrace_io *
Copy data in/out of the process being traced.
struct systrace_io {
	pid_t strio_pid;    /* PID of process being traced */
	int strio_op;
#define	SYSTR_READ	1
#define	SYSTR_WRITE	2
	void *strio_offs;
	void *strio_addr;
	size_t strio_len;
};
struct systrace_policy *
Manipulate the set of policies.
struct systrace_policy {
	int strp_op;
#define	SYSTR_POLICY_NEW	1  /* Allocate a new policy */
#define	SYSTR_POLICY_ASSIGN	2  /* Assign policy to process */
#define	SYSTR_POLICY_MODIFY	3  /* Modify an entry */
	int strp_num;
	union {
		struct {
			short code;
#define SYSTR_POLICY_ASK	0
#define SYSTR_POLICY_PERMIT	1
#define SYSTR_POLICY_NEVER	2
#define SYSTR_POLICY_KILL	3
			short policy;
		} assign;
		pid_t pid;
		int maxents;
	} strp_data;
#define strp_pid	strp_data.pid
#define strp_maxents	strp_data.maxents
#define strp_code	strp_data.assign.code
#define strp_policy	strp_data.assign.policy
};

The SYSTR_POLICY_NEW operation allocates a new policy of strp_maxents entries with each initialized to SYSTR_POLICY_ASK, and returns the new policy number into strp_num.

The SYSTR_POLICY_ASSIGN operation attaches the policy identified by strp_num to strp_pid, with a maximum of strp_maxents entries.

The SYSTR_POLICY_MODIFY operation changes the entry indexed by strp_code to strp_policy.

struct systrace_getcwd *
Set the working directory of the calling process to the directory associated with file descriptor strgd_atfd in the process named by strgd_pid. If strgd_atfd is set to the special value AT_FDCWD, then the current working directory of the named process is used instead.
struct systrace_getcwd {
	pid_t strgd_pid;
	int   strgd_atfd;
};
Restore the working directory of the current process.
struct systrace_inject *
Inject a buffer into the stackgap of the traced process. This accommodates for the manipulation of non-scalar arguments. The actual replacement is not done until system call time, and its presence in the stackgap is only guaranteed for the duration of that system call.
struct systrace_inject {
	/* On return, this contains the stackgap address. */
	caddr_t stri_addr;
	size_t  stri_len;
	pid_t   stri_pid;
};
struct systrace_scriptname *
Set the path of executed scripts to sn_scriptname.
struct systrace_scriptname {
	pid_t sn_pid;
	char  sn_scriptname[MAXPATHLEN];
};

/dev/systrace
system call tracing facility

The following is an example program that traces another process, printing out the path to any open(2) system calls it performs.

#include <sys/param.h>
#include <sys/ioctl.h>
#include <dev/systrace.h>

#include <err.h>
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>

/*
 * Number of system calls that will be covered in our policy.
 */
#define NSYSCALLS 512

int
main(int argc, char *argv[])
{
	struct systrace_policy strpol;
	struct systrace_answer strans;
	struct systrace_io strio;
	struct str_message strmsg;
	int fd, cfd, pid, i;
	ssize_t n;
	void *p;
	char c;

	if ((fd = open("/dev/systrace", O_RDONLY)) == -1)
		err(1, "/dev/systrace");

	/*
	 * Get a systrace descriptor.
	 */
	if (ioctl(fd, STRIOCCLONE, &cfd) == -1)
		err(1, "STRIOCCLONE");
	close(fd);

	/* Gather the PID of a process to systrace from somewhere. */
	/* ... */

	if (ioctl(cfd, STRIOCATTACH, &pid) == -1)
		err(1, "STRIOCATTACH");

	/* Install one policy. */
	strpol.strp_op = SYSTR_POLICY_NEW;
	strpol.strp_maxents = NSYSCALLS;

	if (ioctl(cfd, STRIOCPOLICY, &strpol) == -1)
		err(1, "STRIOCPOLICY NEW");

	strpol.strp_op = SYSTR_POLICY_ASSIGN;
	strpol.strp_pid = pid;

	if (ioctl(cfd, STRIOCPOLICY, &strpol) == -1)
		err(1, "STRIOCPOLICY ASSIGN");

	/* Permit all system calls. */
	for (i = 0; i < NSYSCALLS; i++) {
		strpol.strp_op = SYSTR_POLICY_MODIFY;
		strpol.strp_code = i;
		strpol.strp_policy = SYSTR_POLICY_PERMIT;

		if (ioctl(cfd, STRIOCPOLICY, &strpol) == -1)
			err(1, "STRIOCPOLICY MODIFY");
	}

	/* Ask us about open(2) system calls. */
	strpol.strp_op = SYSTR_POLICY_MODIFY;
	strpol.strp_code = 5; /* open(2) */
	strpol.strp_policy = SYSTR_POLICY_ASK;

	if (ioctl(cfd, STRIOCPOLICY, &strpol) == -1)
		err(1, "STRIOCPOLICY MODIFY");

	/*
	 * Now this process just answers requests for the operations the
	 * traced process performs that we have requested systrace to ask
	 * us about.
	 */
	while ((n = read(cfd, &strmsg, sizeof(strmsg))) ==
	    sizeof(strmsg)) {
		switch (strmsg.msg_type) {
		case SYSTR_MSG_ASK:
			/* Print out the path argument to open(2). */
			memcpy(&p, &strmsg.msg_data.msg_ask.args,
			    sizeof(p));
			printf("open(");
			do {
				memset(&strio, 0, sizeof(strio));
				strio.strio_pid = strmsg.msg_pid;
				strio.strio_op = SYSTR_READ;
				strio.strio_offs = p;
				strio.strio_addr = &c;
				strio.strio_len = 1;

				if (ioctl(cfd, STRIOCIO, &strio) == -1)
					err(1, "STRIOCIO");
				putchar(c);
				(unsigned char *)p += sizeof(char);
			} while (c != '\0');
			printf(")\n");

			memset(&strans, 0, sizeof(strans));
			strans.stra_pid = strmsg.msg_pid;
			strans.stra_seqnr = strmsg.msg_seqnr;
			strans.stra_policy = SYSTR_POLICY_PERMIT;

			if (ioctl(cfd, STRIOCANSWER, &strans) == -1)
				err(1, "STRIOCANSWER");
			break;
		}
	}
	if (n == -1)
		err(1, "read");
	close(cfd);
	exit(0);
}

systrace(1), ioctl(2), read(2), options(4), securelevel(7)

The systrace facility first appeared in OpenBSD 3.2.

When creating new policies, if strp_maxents is not large enough to accommodate any system calls needed for fundamental process operations, the traced process will block forever.

November 3, 2015 OpenBSD-5.9