Add lwip: a new lwIP-based TCP/IP service

This commit adds a new TCP/IP service to MINIX 3. As its core, the service uses the lwIP TCP/IP stack for maintenance reasons. The service aims to be compatible with NetBSD userland, including its low-level network management utilities. It also aims to support modern features such as IPv6. In summary, the new LWIP service has support for the following main features: - TCP, UDP, RAW sockets with mostly standard BSD API semantics; - IPv6 support: host mode (complete) and router mode (partial); - most of the standard BSD API socket options (SO_); - all of the standard BSD API message flags (MSG_); - the most used protocol-specific socket and control options; - a default loopback interface and the ability to create one more; - configuration-free ethernet interfaces and driver tracking; - queuing and multiple concurrent requests to each ethernet driver; - standard ioctl(2)-based BSD interface management; - radix tree backed, destination-based routing; - routing sockets for standard BSD route reporting and management; - multicast traffic and multicast group membership tracking; - Berkeley Packet Filter (BPF) devices; - standard and custom sysctl(7) nodes for many internals; - a slab allocation based, hybrid static/dynamic memory pool model. Many of its modules come with fairly elaborate comments that cover many aspects of what is going on. The service is primarily a socket driver built on top of the libsockdriver library, but for BPF devices it is at the same time also a character driver. Change-Id: Ib0c02736234b21143915e5fcc0fda8fe408f046f
2016-09-29 23:07:07 +00:00 · 2016-09-29 23:07:07 +00:00 · ef8d499e2d
commit ef8d499e2d
parent 0f03189a6a
60 changed files with 25803 additions and 11 deletions
--- a/distrib/sets/lists/minix-base/mi
+++ b/distrib/sets/lists/minix-base/mi
@ -194,7 +194,7 @@
 ./etc/system.conf.d/hello                               minix-base
 ./etc/system.conf.d/inet                                minix-base      obsolete
 ./etc/system.conf.d/ipc                                 minix-base
-./etc/system.conf.d/lwip                                minix-base      obsolete
+./etc/system.conf.d/lwip                                minix-base
 ./etc/system.conf.d/random                              minix-base
 ./etc/system.conf.d/uds                                 minix-base
 ./etc/system.conf.d/usb_hub                             minix-base
@ -277,7 +277,7 @@
 ./service/is                                            minix-base
 ./service/isofs                                         minix-base
 ./service/log                                           minix-base
-./service/lwip                                          minix-base      obsolete
+./service/lwip                                          minix-base
 ./service/memory                                        minix-base
 ./service/mfs                                           minix-base
 ./service/mib                                           minix-base
--- a/distrib/sets/lists/minix-comp/mi
+++ b/distrib/sets/lists/minix-comp/mi
@ -1182,6 +1182,7 @@
 ./usr/include/minix/blockdriver.h                       minix-comp
 ./usr/include/minix/blockdriver_mt.h                    minix-comp
 ./usr/include/minix/board.h                             minix-comp
+./usr/include/minix/bpf.h                               minix-comp
 ./usr/include/minix/btrace.h                            minix-comp
 ./usr/include/minix/callnr.h                            minix-comp
 ./usr/include/minix/chardriver.h                        minix-comp
@ -1208,6 +1209,7 @@
 ./usr/include/minix/hgfs.h                              minix-comp
 ./usr/include/minix/i2c.h                               minix-comp
 ./usr/include/minix/i2cdriver.h                         minix-comp
+./usr/include/minix/if.h                                minix-comp
 ./usr/include/minix/input.h                             minix-comp
 ./usr/include/minix/inputdriver.h                       minix-comp
 ./usr/include/minix/ioctl.h                             minix-comp
--- a/distrib/sets/lists/minix-debug/mi
+++ b/distrib/sets/lists/minix-debug/mi
@ -200,7 +200,7 @@
 ./usr/libdata/debug/service/is.debug                    minix-debug     debug
 ./usr/libdata/debug/service/isofs.debug                 minix-debug     debug
 ./usr/libdata/debug/service/log.debug                   minix-debug     debug
-./usr/libdata/debug/service/lwip.debug                  minix-debug     debug,obsolete
+./usr/libdata/debug/service/lwip.debug                  minix-debug     debug
 ./usr/libdata/debug/service/memory.debug                minix-debug     debug
 ./usr/libdata/debug/service/mfs.debug                   minix-debug     debug
 ./usr/libdata/debug/service/mib.debug                   minix-debug     debug
--- a/minix/commands/DESCRIBE/DESCRIBE.sh
+++ b/minix/commands/DESCRIBE/DESCRIBE.sh
@ -130,6 +130,9 @@ do
 	;;
    6,0)	des="line printer, parallel port" dev=lp
 	;;
+    7,0)
+	des="Berkeley Packet Filter device" dev=bpf
+	;;
    9,0)
 	des="unix98 pseudoterminal master" dev=ptmx
 	;;
--- a/minix/commands/MAKEDEV/MAKEDEV.sh
+++ b/minix/commands/MAKEDEV/MAKEDEV.sh
@ -33,6 +33,7 @@ RAMDISK_DEVICES="
 STD_DEVICES="
 	${RAMDISK_DEVICES}
 	bmp085b1s77 bmp085b2s77 bmp085b3s77
+	bpf
 	eepromb1s50 eepromb1s51 eepromb1s52 eepromb1s53
 	eepromb1s54 eepromb1s55 eepromb1s56 eepromb1s57
 	eepromb2s50 eepromb2s51 eepromb2s52 eepromb2s53
@ -128,6 +129,7 @@ Where key is one of the following:
  tty00 ... tty03         # Make serial lines
  ttyp0 ... ttyq0 ...     # Make tty, pty pairs
  audio mixer		  # Make audio devices
+  bpf                     # Make /dev/bpf
  klog                    # Make /dev/klog
  ptmx                    # Make /dev/ptmx
  random                  # Make /dev/random, /dev/urandom
@ -215,6 +217,13 @@ do

 		makedev bmp085b${bus}s77 c ${major} 0 ${uname} ${gname} 444
 		;;
+	bpf)
+		# Berkeley Packet Filter device, for the LWIP service
+		# This is a cloning device, but some programs (e.g., dhclient)
+		# assume individual devices are numbered, so also create bpf0.
+		makedev ${dev} c 7 0 ${uname} ${gname} 600
+		makedev ${dev}0 c 7 0 ${uname} ${gname} 600
+		;;
 	c[0-3]d[0-7])
 		# Whole disk devices.
 		disk=`expr ${dev} : '...\\(.\\)'`
--- a/minix/fs/procfs/service.c
+++ b/minix/fs/procfs/service.c
@ -125,7 +125,7 @@ service_get_policies(struct policies * pol, index_t slot)
 		{ .label = "ptyfs", .policy_str = "" },
 		{ .label = "vbfs", .policy_str = "" },
 		/* net */
-		{ .label = "lwip", .policy_str = "" },
+		{ .label = "lwip", .policy_str = "reset" },
 		/* servers */
 		{ .label = "devman", .policy_str = "restart" },
 		{ .label = "ds", .policy_str = "restart" },
--- a/minix/include/minix/Makefile
+++ b/minix/include/minix/Makefile
@ -5,14 +5,14 @@ INCSDIR= /usr/include/minix
 INCS+=	paths.h param.h
 INCS+=	acpi.h audio_fw.h bitmap.h \
 	bdev.h blockdriver.h blockdriver_mt.h \
-	board.h btrace.h \
+	board.h bpf.h btrace.h \
 	callnr.h chardriver.h clkconf.h com.h \
 	config.h const.h cpufeature.h \
 	debug.h devio.h devman.h dmap.h \
 	driver.h drivers.h drvlib.h ds.h \
 	endpoint.h fb.h fsdriver.h fslib.h gpio.h gcov.h hash.h \
-	hgfs.h i2c.h i2cdriver.h ioctl.h input.h \
-	inputdriver.h ipc.h ipc_filter.h ipcconst.h \
+	hgfs.h i2c.h i2cdriver.h if.h input.h inputdriver.h \
+	ioctl.h ipc.h ipc_filter.h ipcconst.h \
 	keymap.h log.h mmio.h mthread.h minlib.h \
 	netdriver.h optset.h padconf.h partition.h portio.h \
 	priv.h procfs.h profile.h \
--- a/minix/include/minix/bpf.h
+++ b/minix/include/minix/bpf.h
@ -0,0 +1,42 @@
+#ifndef _MINIX_BPF_H
+#define _MINIX_BPF_H
+
+#include <net/bpf.h>
+
+/*
+ * MINIX3-specific extensions to the NetBSD Berkeley Packet Filter header.
+ * These extensions are necessary because NetBSD BPF uses a few ioctl(2)
+ * structure formats that contain pointers--something that MINIX3 has to avoid,
+ * due to its memory granting mechanisms.  Thus, those ioctl(2) calls have to
+ * be converted from NetBSD to MINIX3 format.  We currently do that in libc.
+ * This header specifies the numbers and formats for the MINIX3 versions.
+ *
+ * See <minix/if.h> for details on how things work here.
+ */
+
+/* BIOCSETF: set BPF filter program. */
+/*
+ * This ioctl is an exception, as it is write-only, so we do not need the
+ * original structure.  Also, the size of this structure is currently slightly
+ * over 4KB, which makes it too big for a regular ioctl call.  Thus, we have to
+ * use a big ioctl call.  Note that future changes of BPF_MAXINSNS will
+ * unfortunately (necessarily) change the ioctl call number.
+ */
+struct minix_bpf_program {
+	u_int mbf_len;
+	struct bpf_insn mbf_insns[BPF_MAXINSNS];
+};
+
+#define MINIX_BIOCSETF		_IOW_BIG(2, struct minix_bpf_program)
+
+/* BIOCGDLTLIST: retrieve list of possible data link types. */
+#define MINIX_BPF_MAXDLT	256
+
+struct minix_bpf_dltlist {
+	struct bpf_dltlist mbfl_dltlist;		/* MUST be first */
+	u_int mbfl_list[MINIX_BPF_MAXDLT];
+};
+
+#define MINIX_BIOCGDLTLIST	_IOWR('B', 119, struct minix_bpf_dltlist)
+
+#endif /* !_MINIX_BPF_H */
--- a/minix/include/minix/dmap.h
+++ b/minix/include/minix/dmap.h
@ -25,7 +25,7 @@
 #define TTY_MAJOR		   4	/*  4 = /dev/tty00  (ttys)            */
 #define CTTY_MAJOR		   5	/*  5 = /dev/tty                      */
 #define PRINTER_MAJOR		   6	/*  6 = /dev/lp     (printer driver)  */
-					/*  7 = (unused)                      */
+#define TCPIP_MAJOR		   7	/*  7 = /dev/bpf    (TCP/IP service)  */
 					/*  8 = /dev/c1                       */
 #define PTY_MAJOR		   9	/*  9 = /dev/ptyp0  (pty driver)      */
 					/* 10 = /dev/c2                       */
--- a/minix/include/minix/if.h
+++ b/minix/include/minix/if.h
@ -0,0 +1,51 @@
+#ifndef _MINIX_IF_H
+#define _MINIX_IF_H
+
+#include <net/if.h>
+#include <net/if_media.h>
+
+/*
+ * MINIX3-specific extensions to the network interface headers.  These
+ * extensions are necessary because NetBSD IF uses a few ioctl(2) structure
+ * formats that contain pointers--something that MINIX3 has to avoid, due to
+ * its memory granting mechanisms.  Thus, those ioctl(2) calls have to be
+ * converted from NetBSD to MINIX3 format.  We currently do that in libc.
+ * This header specifies the numbers and formats for the MINIX3 versions.
+ *
+ * The general idea is that we rewrite the ioctl request data to include both
+ * the original structure and a buffer for the array of values to which the
+ * original structure uses a pointer.  Important: in those cases, the original
+ * structure is expected to be the first element of the replacement structure.
+ *
+ * There is typically no configured upper bound for the maximum number of
+ * values in the array, and so we pick size values that are hopefully always
+ * oversized and yet keep the ioctl sizes within the range of regular ioctls
+ * (4095 bytes, as per sys/ioccom.h).  If there may be larger amounts of data,
+ * we have to use "big" ioctls.
+ *
+ * For the replacement ioctl codes, we use the original ioctl class and number
+ * with a different size.  That should virtually eliminate the possibility of
+ * accidental collisions.
+ */
+
+/* SIOCGIFMEDIA: retrieve interface media status and types. */
+#define MINIX_IF_MAXMEDIA	256
+
+struct minix_ifmediareq {
+	struct ifmediareq mifm_ifm;			/* MUST be first */
+	int mifm_list[MINIX_IF_MAXMEDIA];
+};
+
+#define MINIX_SIOCGIFMEDIA	_IOWR('i', 54, struct minix_ifmediareq)
+
+/* SIOCIFGCLONERS: retrieve interface "cloners" (virtual types). */
+#define MINIX_IF_MAXCLONERS	128
+
+struct minix_if_clonereq {
+	struct if_clonereq mifcr_ifcr;			/* MUST be first */
+	char mifcr_buffer[MINIX_IF_MAXCLONERS * IFNAMSIZ];
+};
+
+#define MINIX_SIOCIFGCLONERS	_IOWR('i', 120, struct minix_if_clonereq)
+
+#endif /* !_MINIX_IF_H */
--- a/minix/include/minix/sysctl.h
+++ b/minix/include/minix/sysctl.h
@ -28,6 +28,7 @@
 #define MINIX_TEST	0
 #define MINIX_MIB	1
 #define MINIX_PROC	2
+#define MINIX_LWIP	3

 /*
 * These identifiers, under MINIX_TEST, are used by test87 to test the MIB
--- a/minix/lib/libc/sys/ioctl.c
+++ b/minix/lib/libc/sys/ioctl.c
@ -9,6 +9,10 @@
 #include <sys/ioccom.h>
 #include <stdarg.h>
 #include <fcntl.h>
+#include <stdlib.h>
+#include <minix/if.h>
+#include <minix/bpf.h>
+#include <assert.h>

 static void rewrite_i2c_netbsd_to_minix(minix_i2c_ioctl_exec_t *out,
    i2c_ioctl_exec_t *in);
@ -45,6 +49,199 @@ static void rewrite_i2c_minix_to_netbsd(i2c_ioctl_exec_t *out,
  }
 }

+/*
+ * Convert a network interface related IOCTL with pointers to a flat format
+ * suitable for MINIX3.  Return a pointer to the new data on success, or zero
+ * (with errno set) on failure.  The original request code is given in
+ * 'request' and must be replaced by the new request code to be used.
+ */
+static vir_bytes
+ioctl_convert_if_to_minix(void * data, unsigned long * request)
+{
+	struct minix_ifmediareq *mifm;
+	struct ifmediareq *ifm;
+	struct minix_if_clonereq *mifcr;
+	struct if_clonereq *ifcr;
+
+	switch (*request) {
+	case SIOCGIFMEDIA:
+		ifm = (struct ifmediareq *)data;
+
+		mifm = (struct minix_ifmediareq *)malloc(sizeof(*mifm));
+		if (mifm != NULL) {
+			/*
+			 * The count may exceed MINIX_IF_MAXMEDIA, and should
+			 * be truncated as needed by the IF implementation.
+			 */
+			memcpy(&mifm->mifm_ifm, ifm, sizeof(*ifm));
+
+			*request = MINIX_SIOCGIFMEDIA;
+		} else
+			errno = ENOMEM;
+
+		return (vir_bytes)mifm;
+
+	case SIOCIFGCLONERS:
+		ifcr = (struct if_clonereq *)data;
+
+		mifcr = (struct minix_if_clonereq *)malloc(sizeof(*mifcr));
+		if (mifcr != NULL) {
+			/*
+			 * The count may exceed MINIX_IF_MAXCLONERS, and should
+			 * be truncated as needed by the IF implementation.
+			 */
+			memcpy(&mifcr->mifcr_ifcr, ifcr, sizeof(*ifcr));
+
+			*request = MINIX_SIOCIFGCLONERS;
+		} else
+			errno = ENOMEM;
+
+		return (vir_bytes)mifcr;
+
+	default:
+		assert(0);
+
+		errno = ENOTTY;
+		return 0;
+	}
+}
+
+/*
+ * Convert a the result of a network interface related IOCTL with pointers from
+ * the flat format used to make the call to MINIX3.  Called on success only.
+ * The given request code is that of the (NetBSD-type) original.
+ */
+static void
+ioctl_convert_if_from_minix(vir_bytes addr, void * data, unsigned long request)
+{
+	struct minix_ifmediareq *mifm;
+	struct ifmediareq *ifm;
+	struct minix_if_clonereq *mifcr;
+	struct if_clonereq *ifcr;
+	int count;
+
+	switch (request) {
+	case SIOCGIFMEDIA:
+		mifm = (struct minix_ifmediareq *)addr;
+		ifm = (struct ifmediareq *)data;
+
+		memcpy(ifm, &mifm->mifm_ifm, sizeof(*ifm));
+
+		if (ifm->ifm_ulist != NULL && ifm->ifm_count > 0)
+			memcpy(ifm->ifm_ulist, mifm->mifm_list,
+			    ifm->ifm_count * sizeof(ifm->ifm_ulist[0]));
+
+		break;
+
+	case SIOCIFGCLONERS:
+		mifcr = (struct minix_if_clonereq *)addr;
+		ifcr = (struct if_clonereq *)data;
+
+		memcpy(ifcr, &mifcr->mifcr_ifcr, sizeof(*ifcr));
+
+		count = (ifcr->ifcr_count < ifcr->ifcr_total) ?
+		    ifcr->ifcr_count : ifcr->ifcr_total;
+		if (ifcr->ifcr_buffer != NULL && count > 0)
+			memcpy(ifcr->ifcr_buffer, mifcr->mifcr_buffer,
+			    count * IFNAMSIZ);
+
+		break;
+
+	default:
+		assert(0);
+	}
+}
+
+/*
+ * Convert a BPF (Berkeley Packet Filter) related IOCTL with pointers to a flat
+ * format suitable for MINIX3.  Return a pointer to the new data on success, or
+ * zero (with errno set) on failure.  The original request code is given in
+ * 'request' and must be replaced by the new request code to be used.
+ */
+static vir_bytes
+ioctl_convert_bpf_to_minix(void * data, unsigned long * request)
+{
+	struct minix_bpf_program *mbf;
+	struct bpf_program *bf;
+	struct minix_bpf_dltlist *mbfl;
+	struct bpf_dltlist *bfl;
+
+	switch (*request) {
+	case BIOCSETF:
+		bf = (struct bpf_program *)data;
+
+		if (bf->bf_len > __arraycount(mbf->mbf_insns)) {
+			errno = EINVAL;
+			return 0;
+		}
+
+		mbf = (struct minix_bpf_program *)malloc(sizeof(*mbf));
+		if (mbf != NULL) {
+			mbf->mbf_len = bf->bf_len;
+			memcpy(mbf->mbf_insns, bf->bf_insns,
+			    bf->bf_len * sizeof(mbf->mbf_insns[0]));
+
+			*request = MINIX_BIOCSETF;
+		} else
+			errno = ENOMEM;
+
+		return (vir_bytes)mbf;
+
+	case BIOCGDLTLIST:
+		bfl = (struct bpf_dltlist *)data;
+
+		mbfl = (struct minix_bpf_dltlist *)malloc(sizeof(*mbfl));
+		if (mbfl != NULL) {
+			/*
+			 * The length may exceed MINIX_BPF_MAXDLT, and should
+			 * be truncated as needed by the BPF implementation.
+			 */
+			memcpy(&mbfl->mbfl_dltlist, bfl, sizeof(*bfl));
+
+			*request = MINIX_BIOCGDLTLIST;
+		} else
+			errno = ENOMEM;
+
+		return (vir_bytes)mbfl;
+
+	default:
+		assert(0);
+
+		errno = ENOTTY;
+		return 0;
+	}
+}
+
+/*
+ * Convert a the result of BPF (Berkeley Packet Filter) related IOCTL with
+ * pointers from the flat format used to make the call to MINIX3.  Called on
+ * success only.  The given request code is that of the (NetBSD-type) original.
+ */
+static void
+ioctl_convert_bpf_from_minix(vir_bytes addr, void * data,
+	unsigned long request)
+{
+	struct minix_bpf_dltlist *mbfl;
+	struct bpf_dltlist *bfl;
+
+	switch (request) {
+	case BIOCGDLTLIST:
+		mbfl = (struct minix_bpf_dltlist *)addr;
+		bfl = (struct bpf_dltlist *)data;
+
+		memcpy(bfl, &mbfl->mbfl_dltlist, sizeof(*bfl));
+
+		if (bfl->bfl_list != NULL && bfl->bfl_len > 0)
+			memcpy(bfl->bfl_list, mbfl->mbfl_list,
+			    bfl->bfl_len * sizeof(bfl->bfl_list[0]));
+
+		break;
+
+	default:
+		assert(0);
+	}
+}
+
 /*
 * Library implementation of FIOCLEX and FIONCLEX.
 */
@ -110,6 +307,7 @@ ioctl_to_fcntl(int fd, unsigned long request, void * data)

 int     ioctl(int fd, unsigned long request, ...)
 {
+  minix_i2c_ioctl_exec_t i2c;
  int r, request_save;
  message m;
  vir_bytes addr;
@ -124,8 +322,6 @@ int     ioctl(int fd, unsigned long request, ...)
   * To support compatibility with interfaces on other systems, certain
   * requests are re-written to flat structures (i.e. without pointers).
   */
-  minix_i2c_ioctl_exec_t i2c;
-
  request_save = request;

  switch (request) {
@ -142,6 +338,19 @@ int     ioctl(int fd, unsigned long request, ...)
 		addr = (vir_bytes) &i2c;
 		request = MINIX_I2C_IOCTL_EXEC;
 		break;
+
+	case SIOCGIFMEDIA:
+	case SIOCIFGCLONERS:
+		if ((addr = ioctl_convert_if_to_minix(data, &request)) == 0)
+			return -1;	/* errno has already been set */
+		break;
+
+	case BIOCSETF:
+	case BIOCGDLTLIST:
+		if ((addr = ioctl_convert_bpf_to_minix(data, &request)) == 0)
+			return -1;	/* errno has already been set */
+		break;
+
 	default:
 		/* Keep original as-is */
 		addr = (vir_bytes)data;
@ -155,11 +364,30 @@ int     ioctl(int fd, unsigned long request, ...)

  r = _syscall(VFS_PROC_NR, VFS_IOCTL, &m);

-  /* Translate back to original form */
+  /*
+   * Translate back to original form.  Do this on failure as well, as
+   * temporarily allocated resources may have to be freed up again.
+   */
  switch (request_save) {
 	case I2C_IOCTL_EXEC:
 		rewrite_i2c_minix_to_netbsd(data, &i2c);
 		break;
+
+	case SIOCGIFMEDIA:
+	case SIOCIFGCLONERS:
+		if (r == 0)
+			ioctl_convert_if_from_minix(addr, data, request_save);
+		free((void *)addr);
+		break;
+
+	case BIOCGDLTLIST:
+		if (r == 0)
+			ioctl_convert_bpf_from_minix(addr, data, request_save);
+		/* FALLTHROUGH */
+	case BIOCSETF:
+		free((void *)addr);
+		break;
+
 	default:
 		/* Nothing to do */
 		break;
--- a/minix/net/Makefile
+++ b/minix/net/Makefile
@ -1,6 +1,7 @@
 .include <bsd.own.mk>

 .if ${MKIMAGEONLY} == "no"
+SUBDIR+=	lwip
 SUBDIR+=	uds
 .endif # ${MKIMAGEONLY} == "no"

--- a/minix/net/lwip/Makefile
+++ b/minix/net/lwip/Makefile
@ -0,0 +1,34 @@
+# Makefile for the lwIP TCP/IP socket driver service (LWIP)
+
+.include <bsd.own.mk>
+
+PROG=	lwip
+SRCS=	lwip.c mempool.c pchain.c addr.c addrpol.c tcpisn.c mcast.c ipsock.c \
+	pktsock.c tcpsock.c udpsock.c rawsock.c ifdev.c ifaddr.c loopif.c \
+	ethif.c ndev.c rttree.c route.c rtsock.c lnksock.c lldata.c mibtree.c \
+	ifconf.c bpfdev.c bpf_filter.c util.c
+
+FILES=${PROG}.conf
+FILESNAME=${PROG}
+FILESDIR= /etc/system.conf.d
+
+CPPFLAGS+=	-I${NETBSDSRCDIR}/minix/lib/liblwip/dist/src/include
+CPPFLAGS+=	-I${NETBSDSRCDIR}/minix/lib/liblwip/lib
+
+# Disabling USE_INET6 only superficially hides IPv6 support in the service.
+.if (${USE_INET6} != "no")
+CPPFLAGS+=	-DINET6
+.endif
+
+# Some warnings are the result of usage of lwIP macros.  We must not generate
+# errors for those, but even producing the warnings is not helpful, so we
+# disable them altogether.
+CPPFLAGS+=	-Wno-address
+
+DPADD+=	${LIBLWIP} ${LIBSOCKEVENT} ${LIBSOCKDRIVER} ${LIBCHARDRIVER} \
+	${LIBSYS} ${LIBTIMERS}
+LDADD+=	-llwip -lsockevent -lsockdriver -lchardriver -lsys -ltimers
+
+WARNS?=	5
+
+.include <minix.service.mk>
--- a/minix/net/lwip/addr.c
+++ b/minix/net/lwip/addr.c
@ -0,0 +1,692 @@
+/* LWIP service - addr.c - socket address verification and conversion */
+
+#include "lwip.h"
+
+/*
+ * Return TRUE if the given socket address is of type AF_UNSPEC, or FALSE
+ * otherwise.
+ */
+int
+addr_is_unspec(const struct sockaddr * addr, socklen_t addr_len)
+{
+
+	return (addr_len >= offsetof(struct sockaddr, sa_data) &&
+	    addr->sa_family == AF_UNSPEC);
+}
+
+/*
+ * Check whether the given multicast address is generally valid.  This check
+ * should not be moved into addr_get_inet(), as we do not want to forbid
+ * creating routes for such addresses, for example.  We do however apply the
+ * restrictions here to all provided source and destination addresses.  Return
+ * TRUE if the address is an acceptable multicast address, or FALSE otherwise.
+ */
+int
+addr_is_valid_multicast(const ip_addr_t * ipaddr)
+{
+	uint8_t scope;
+
+	assert(ip_addr_ismulticast(ipaddr));
+
+	/* We apply restrictions to IPv6 multicast addresses only. */
+	if (IP_IS_V6(ipaddr)) {
+		scope = ip6_addr_multicast_scope(ip_2_ip6(ipaddr));
+
+		if (scope == IP6_MULTICAST_SCOPE_RESERVED0 ||
+		    scope == IP6_MULTICAST_SCOPE_RESERVEDF)
+			return FALSE;
+
+		/*
+		 * We do not impose restrictions on the three defined embedded
+		 * flags, even though we put no effort into supporting them,
+		 * especially in terms of automatically creating routes for
+		 * all cases.  We do force the fourth flag to be zero.
+		 * Unfortunately there is no lwIP macro to check for this flag.
+		 */
+		if (ip_2_ip6(ipaddr)->addr[0] & PP_HTONL(0x00800000UL))
+			return FALSE;
+
+		/* Prevent KAME-embedded zone IDs from entering the system. */
+		if (ip6_addr_has_scope(ip_2_ip6(ipaddr), IP6_UNKNOWN) &&
+		    (ip_2_ip6(ipaddr)->addr[0] & PP_HTONL(0x0000ffffUL)))
+			return FALSE;
+	}
+
+	return TRUE;
+}
+
+/*
+ * Load a sockaddr structure, as copied from userland, as a lwIP-style IP
+ * address and (optionally) a port number.  The expected type of IP address is
+ * given as 'type', which must be one of IPADDR_TYPE_{V4,ANY,V6}.  If it is
+ * IPADDR_TYPE_V4, 'addr' is expected to point to a sockaddr_in structure.  If
+ * it is IPADDR_TYPE_{ANY,V6}, 'addr' is expected to point to a sockaddr_in6
+ * structure.  For the _ANY case, the result will be an _ANY address only if it
+ * is the unspecified (all-zeroes) address and a _V6 address in all other
+ * cases.  For the _V6 case, the result will always be a _V6 address.  The
+ * length of the structure pointed to by 'addr' is given as 'addr_len'.  If the
+ * boolean 'kame' flag is set, addresses will be interpreted to be KAME style,
+ * meaning that for scoped IPv6 addresses, the zone is embedded in the address
+ * rather than given in sin6_scope_id.  On success, store the resulting IP
+ * address in 'ipaddr'.  If 'port' is not NULL, store the port number in it;
+ * otherwise, ignore the port number.  On any parsing failure, return an
+ * appropriate negative error code.
+ */
+int
+addr_get_inet(const struct sockaddr * addr, socklen_t addr_len, uint8_t type,
+	ip_addr_t * ipaddr, int kame, uint16_t * port)
+{
+	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
+	ip6_addr_t *ip6addr;
+	uint32_t ifindex;
+
+	switch (type) {
+	case IPADDR_TYPE_V4:
+		if (addr_len != sizeof(sin))
+			return EINVAL;
+
+		/*
+		 * Getting around strict aliasing problems.  Oh, the irony of
+		 * doing an extra memcpy so that the compiler can do a better
+		 * job at optimizing..
+		 */
+		memcpy(&sin, addr, sizeof(sin));
+
+		if (sin.sin_family != AF_INET)
+			return EAFNOSUPPORT;
+
+		ip_addr_set_ip4_u32(ipaddr, sin.sin_addr.s_addr);
+
+		if (port != NULL)
+			*port = ntohs(sin.sin_port);
+
+		return OK;
+
+	case IPADDR_TYPE_ANY:
+	case IPADDR_TYPE_V6:
+		if (addr_len != sizeof(sin6))
+			return EINVAL;
+
+		/* Again, strict aliasing.. */
+		memcpy(&sin6, addr, sizeof(sin6));
+
+		if (sin6.sin6_family != AF_INET6)
+			return EAFNOSUPPORT;
+
+		memset(ipaddr, 0, sizeof(*ipaddr));
+
+		/*
+		 * This is a bit ugly, but NetBSD does not expose s6_addr32 and
+		 * s6_addr is a series of bytes, which is a mismatch for lwIP.
+		 * The alternative would be another memcpy..
+		 */
+		ip6addr = ip_2_ip6(ipaddr);
+		assert(sizeof(ip6addr->addr) == sizeof(sin6.sin6_addr));
+		memcpy(ip6addr->addr, &sin6.sin6_addr, sizeof(ip6addr->addr));
+
+		/*
+		 * If the address may have a scope, extract the zone ID.
+		 * Where the zone ID is depends on the 'kame' parameter: KAME-
+		 * style addresses have it embedded within the address, whereas
+		 * non-KAME addresses use the (misnamed) sin6_scope_id field.
+		 */
+		if (ip6_addr_has_scope(ip6addr, IP6_UNKNOWN)) {
+			if (kame) {
+				ifindex =
+				    ntohl(ip6addr->addr[0]) & 0x0000ffffUL;
+
+				ip6addr->addr[0] &= PP_HTONL(0xffff0000UL);
+			} else {
+				/*
+				 * Reject KAME-style addresses for normal
+				 * socket calls, to save ourselves the trouble
+				 * of mixed address styles elsewhere.
+				 */
+				if (ip6addr->addr[0] & PP_HTONL(0x0000ffffUL))
+					return EINVAL;
+
+				ifindex = sin6.sin6_scope_id;
+			}
+
+			/*
+			 * Reject invalid zone IDs.  This also enforces that
+			 * no zone IDs wider than eight bits enter the system.
+			 * As a side effect, it is not possible to add routes
+			 * for invalid zones, but that should be no problem.
+			 */
+			if (ifindex != 0 &&
+			    ifdev_get_by_index(ifindex) == NULL)
+				return ENXIO;
+
+			ip6_addr_set_zone(ip6addr, ifindex);
+		} else
+			ip6_addr_clear_zone(ip6addr);
+
+		/*
+		 * Set the type to ANY if it was ANY and the address itself is
+		 * ANY as well.  Otherwise, we are binding to a specific IPv6
+		 * address, so IPV6_V6ONLY stops being relevant and we should
+		 * leave the address set to V6.  Destination addresses for ANY
+		 * are set to V6 elsewhere.
+		 */
+		if (type == IPADDR_TYPE_ANY && ip6_addr_isany(ip6addr))
+			IP_SET_TYPE(ipaddr, type);
+		else
+			IP_SET_TYPE(ipaddr, IPADDR_TYPE_V6);
+
+		if (port != NULL)
+			*port = ntohs(sin6.sin6_port);
+
+		return OK;
+
+	default:
+		return EAFNOSUPPORT;
+	}
+}
+
+/*
+ * Store an lwIP-style IP address and port number as a sockaddr structure
+ * (sockaddr_in or sockaddr_in6, depending on the given IP address) to be
+ * copied to userland.  The result is stored in the buffer pointed to by
+ * 'addr'.  Before the call, 'addr_len' must be set to the size of this buffer.
+ * This is an internal check to prevent buffer overflows, and must not be used
+ * to validate input, since a mismatch will trigger a panic.  After the call,
+ * 'addr_len' will be set to the size of the resulting structure.  The lwIP-
+ * style address is given as 'ipaddr'.  If the boolean 'kame' flag is set, the
+ * address will be stored KAME-style, meaning that for scoped IPv6 addresses,
+ * the address zone will be stored embedded in the address rather than in
+ * sin6_scope_id.  If relevant, 'port' contains the port number in host-byte
+ * order; otherwise it should be set to zone.
+ */
+void
+addr_put_inet(struct sockaddr * addr, socklen_t * addr_len,
+	const ip_addr_t * ipaddr, int kame, uint16_t port)
+{
+	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
+	const ip6_addr_t *ip6addr;
+	uint32_t zone;
+
+	switch (IP_GET_TYPE(ipaddr)) {
+	case IPADDR_TYPE_V4:
+		if (*addr_len < sizeof(sin))
+			panic("provided address buffer too small");
+
+		memset(&sin, 0, sizeof(sin));
+
+		sin.sin_len = sizeof(sin);
+		sin.sin_family = AF_INET;
+		sin.sin_port = htons(port);
+		sin.sin_addr.s_addr = ip_addr_get_ip4_u32(ipaddr);
+
+		memcpy(addr, &sin, sizeof(sin));
+		*addr_len = sizeof(sin);
+
+		break;
+
+	case IPADDR_TYPE_ANY:
+	case IPADDR_TYPE_V6:
+		if (*addr_len < sizeof(sin6))
+			panic("provided address buffer too small");
+
+		ip6addr = ip_2_ip6(ipaddr);
+
+		memset(&sin6, 0, sizeof(sin6));
+
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_port = htons(port);
+		memcpy(&sin6.sin6_addr, ip6addr->addr, sizeof(sin6.sin6_addr));
+
+		/*
+		 * If the IPv6 address has a zone set, it must be scoped, and
+		 * we put the zone in the result.  It may occur that a scoped
+		 * IPv6 address does not have a zone here though, for example
+		 * if packet routing fails for sendto() with a zoneless address
+		 * on an unbound socket, resulting in an RTM_MISS message.  In
+		 * such cases, simply leave the zone index blank in the result.
+		 */
+		if (ip6_addr_has_zone(ip6addr)) {
+			assert(ip6_addr_has_scope(ip6addr, IP6_UNKNOWN));
+
+			zone = ip6_addr_zone(ip6addr);
+			assert(zone <= UINT8_MAX);
+
+			if (kame)
+				sin6.sin6_addr.s6_addr[3] = zone;
+			else
+				sin6.sin6_scope_id = zone;
+		}
+
+		memcpy(addr, &sin6, sizeof(sin6));
+		*addr_len = sizeof(sin6);
+
+		break;
+
+	default:
+		panic("unknown IP address type: %u", IP_GET_TYPE(ipaddr));
+	}
+}
+
+/*
+ * Load a link-layer sockaddr structure (sockaddr_dl), as copied from userland,
+ * and return the contained name and/or hardware address.  The address is
+ * provided as 'addr', with length 'addr_len'.  On success, return OK.  If
+ * 'name' is not NULL, it must be of size 'name_max', and will be used to store
+ * the (null-terminated) interface name in the given structure if present, or
+ * the empty string if not.  If 'hwaddr' is not NULL, it will be used to store
+ * the hardware address in the given structure, which must in that case be
+ * present and exactly 'hwaddr_len' bytes long.  On any parsing failure, return
+ * an appropriate negative error code.
+ */
+int
+addr_get_link(const struct sockaddr * addr, socklen_t addr_len, char * name,
+	size_t name_max, uint8_t * hwaddr, size_t hwaddr_len)
+{
+	struct sockaddr_dlx sdlx;
+	size_t nlen, alen;
+
+	if (addr_len < offsetof(struct sockaddr_dlx, sdlx_data))
+		return EINVAL;
+
+	/*
+	 * We cannot prevent callers from passing in massively oversized
+	 * sockaddr_dl structure.  However, we insist that all the actual data
+	 * be contained within the size of our sockaddr_dlx version.
+	 */
+	if (addr_len > sizeof(sdlx))
+		addr_len = sizeof(sdlx);
+
+	memcpy(&sdlx, addr, addr_len);
+
+	if (sdlx.sdlx_family != AF_LINK)
+		return EAFNOSUPPORT;
+
+	/* Address selectors are not currently supported. */
+	if (sdlx.sdlx_slen != 0)
+		return EINVAL;
+
+	nlen = (size_t)sdlx.sdlx_nlen;
+	alen = (size_t)sdlx.sdlx_alen;
+
+	/* The nlen and alen fields are 8-bit, so no risks of overflow here. */
+	if (addr_len < offsetof(struct sockaddr_dlx, sdlx_data) + nlen + alen)
+		return EINVAL;
+
+	/*
+	 * Copy out the name, truncating it if needed.  The name in the
+	 * sockaddr is not null terminated, so we have to do that.  If the
+	 * sockaddr has no name, copy out an empty name.
+	 */
+	if (name != NULL) {
+		assert(name_max > 0);
+
+		if (name_max > nlen + 1)
+			name_max = nlen + 1;
+
+		memcpy(name, sdlx.sdlx_data, name_max - 1);
+		name[name_max - 1] = '\0';
+	}
+
+	/*
+	 * Copy over the hardware address.  For simplicity, we require that the
+	 * caller specify the exact hardware address length.
+	 */
+	if (hwaddr != NULL) {
+		if (alen != hwaddr_len)
+			return EINVAL;
+
+		memcpy(hwaddr, sdlx.sdlx_data + nlen, hwaddr_len);
+	}
+
+	return OK;
+}
+
+/*
+ * Store a link-layer sockaddr structure (sockaddr_dl), to be copied to
+ * userland.  The result is stored in the buffer pointed to by 'addr'.  Before
+ * the call, 'addr_len' must be set to the size of this buffer.  This is an
+ * internal check to prevent buffer overflows, and must not be used to validate
+ * input, since a mismatch will trigger a panic.  After the call, 'addr_len'
+ * will be set to the size of the resulting structure.  The given interface
+ * index 'ifindex' and (IFT_) interface type 'type' will always be stored in
+ * the resulting structure.  If 'name' is not NULL, it must be a null-
+ * terminated interface name string which will be included in the structure.
+ * If 'hwaddr' is not NULL, it must be a hardware address of length
+ * 'hwaddr_len', which will also be included in the structure.
+ */
+void
+addr_put_link(struct sockaddr * addr, socklen_t * addr_len, uint32_t ifindex,
+	uint32_t type, const char * name, const uint8_t * hwaddr,
+	size_t hwaddr_len)
+{
+	struct sockaddr_dlx sdlx;
+	size_t name_len;
+	socklen_t len;
+
+	name_len = (name != NULL) ? strlen(name) : 0;
+
+	if (hwaddr == NULL)
+		hwaddr_len = 0;
+
+	assert(name_len < IFNAMSIZ);
+	assert(hwaddr_len <= NETIF_MAX_HWADDR_LEN);
+
+	len = offsetof(struct sockaddr_dlx, sdlx_data) + name_len + hwaddr_len;
+
+	if (*addr_len < len)
+		panic("provided address buffer too small");
+
+	memset(&sdlx, 0, sizeof(sdlx));
+	sdlx.sdlx_len = len;
+	sdlx.sdlx_family = AF_LINK;
+	sdlx.sdlx_index = ifindex;
+	sdlx.sdlx_type = type;
+	sdlx.sdlx_nlen = name_len;
+	sdlx.sdlx_alen = hwaddr_len;
+	if (name_len > 0)
+		memcpy(sdlx.sdlx_data, name, name_len);
+	if (hwaddr_len > 0)
+		memcpy(sdlx.sdlx_data + name_len, hwaddr, hwaddr_len);
+
+	memcpy(addr, &sdlx, len);
+	*addr_len = len;
+}
+
+/*
+ * Convert an IPv4 or IPv6 netmask, given as sockaddr structure 'addr', to a
+ * prefix length.  The length of the sockaddr structure is given as 'addr_len'.
+ * For consistency with addr_get_inet(), the expected address type is given as
+ * 'type', and must be either IPADDR_TYPE_V4 or IPADDR_TYPE_V6.  On success,
+ * return OK with the number of set prefix bits returned in 'prefix', and
+ * optionally with a lwIP representation of the netmask stored in 'ipaddr' (if
+ * not NULL).  On failure, return an appropriate negative error code.  Note
+ * that this function does not support compressed IPv4 network masks; such
+ * addresses must be expanded before a call to this function.
+ */
+int
+addr_get_netmask(const struct sockaddr * addr, socklen_t addr_len,
+	uint8_t type, unsigned int * prefix, ip_addr_t * ipaddr)
+{
+	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
+	unsigned int byte, bit;
+	uint32_t val;
+
+	switch (type) {
+	case IPADDR_TYPE_V4:
+		if (addr_len != sizeof(sin))
+			return EINVAL;
+
+		memcpy(&sin, addr, sizeof(sin));
+
+		if (sin.sin_family != AF_INET)
+			return EAFNOSUPPORT;
+
+		val = ntohl(sin.sin_addr.s_addr);
+
+		/* Find the first zero bit. */
+		for (bit = 0; bit < IP4_BITS; bit++)
+			if (!(val & (1 << (IP4_BITS - bit - 1))))
+				break;
+
+		*prefix = bit;
+
+		/* All bits after the first zero bit must also be zero. */
+		if (bit < IP4_BITS &&
+		    (val & ((1 << (IP4_BITS - bit - 1)) - 1)))
+			return EINVAL;
+
+		if (ipaddr != NULL)
+			ip_addr_set_ip4_u32(ipaddr, sin.sin_addr.s_addr);
+
+		return OK;
+
+	case IPADDR_TYPE_V6:
+		if (addr_len != sizeof(sin6))
+			return EINVAL;
+
+		memcpy(&sin6, addr, sizeof(sin6));
+
+		if (sin6.sin6_family != AF_INET6)
+			return EAFNOSUPPORT;
+
+		/* Find the first zero bit. */
+		for (byte = 0; byte < __arraycount(sin6.sin6_addr.s6_addr);
+		    byte++)
+			if (sin6.sin6_addr.s6_addr[byte] != 0xff)
+				break;
+
+		/* If all bits are set, there is nothing more to do. */
+		if (byte == __arraycount(sin6.sin6_addr.s6_addr)) {
+			*prefix = __arraycount(sin6.sin6_addr.s6_addr) * NBBY;
+
+			return OK;
+		}
+
+		for (bit = 0; bit < NBBY; bit++)
+			if (!(sin6.sin6_addr.s6_addr[byte] &
+			    (1 << (NBBY - bit - 1))))
+				break;
+
+		*prefix = byte * NBBY + bit;
+
+		/* All bits after the first zero bit must also be zero. */
+		if (bit < NBBY && (sin6.sin6_addr.s6_addr[byte] &
+		    ((1 << (NBBY - bit - 1)) - 1)))
+			return EINVAL;
+
+		for (byte++; byte < __arraycount(sin6.sin6_addr.s6_addr);
+		    byte++)
+			if (sin6.sin6_addr.s6_addr[byte] != 0)
+				return EINVAL;
+
+		if (ipaddr != NULL) {
+			ip_addr_set_zero_ip6(ipaddr);
+
+			memcpy(ip_2_ip6(ipaddr)->addr, &sin6.sin6_addr,
+			    sizeof(ip_2_ip6(ipaddr)->addr));
+		}
+
+		return OK;
+
+	default:
+		panic("unknown IP address type: %u", type);
+	}
+}
+
+/*
+ * Generate a raw network mask based on the given prefix length.
+ */
+void
+addr_make_netmask(uint8_t * addr, socklen_t addr_len, unsigned int prefix)
+{
+	unsigned int byte, bit;
+
+	byte = prefix / NBBY;
+	bit = prefix % NBBY;
+
+	assert(byte + !!bit <= addr_len);
+
+	if (byte > 0)
+		memset(addr, 0xff, byte);
+	if (bit != 0)
+		addr[byte++] = (uint8_t)(0xff << (NBBY - bit));
+	if (byte < addr_len)
+		memset(&addr[byte], 0, addr_len - byte);
+}
+
+/*
+ * Store a network mask as a sockaddr structure, in 'addr'.  Before the call,
+ * 'addr_len' must be set to the memory size of 'addr'.  The address type is
+ * given as 'type', and must be either IPADDR_TYPE_V4 or IPADDR_TYPE_V6.  The
+ * prefix length from which to generate the network mask is given as 'prefix'.
+ * Upon return, 'addr_len' is set to the size of the resulting sockaddr
+ * structure.
+ */
+void
+addr_put_netmask(struct sockaddr * addr, socklen_t * addr_len, uint8_t type,
+	unsigned int prefix)
+{
+	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
+
+	switch (type) {
+	case IPADDR_TYPE_V4:
+		if (*addr_len < sizeof(sin))
+			panic("provided address buffer too small");
+
+		assert(prefix <= IP4_BITS);
+
+		memset(&sin, 0, sizeof(sin));
+		sin.sin_len = sizeof(sin);
+		sin.sin_family = AF_INET;
+
+		addr_make_netmask((uint8_t *)&sin.sin_addr.s_addr,
+		    sizeof(sin.sin_addr.s_addr), prefix);
+
+		memcpy(addr, &sin, sizeof(sin));
+		*addr_len = sizeof(sin);
+
+		break;
+
+	case IPADDR_TYPE_V6:
+		if (*addr_len < sizeof(sin6))
+			panic("provided address buffer too small");
+
+		assert(prefix <= IP6_BITS);
+
+		memset(&sin6, 0, sizeof(sin6));
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_family = AF_INET6;
+
+		addr_make_netmask(sin6.sin6_addr.s6_addr,
+		    sizeof(sin6.sin6_addr.s6_addr), prefix);
+
+		memcpy(addr, &sin6, sizeof(sin6));
+		*addr_len = sizeof(sin6);
+
+		break;
+
+	default:
+		panic("unknown IP address type: %u", type);
+	}
+}
+
+/*
+ * Normalize the given address in 'src' to the given number of prefix bits,
+ * setting all other bits to zero.  Return the result in 'dst'.
+ */
+void
+addr_normalize(ip_addr_t * dst, const ip_addr_t * src, unsigned int prefix)
+{
+	unsigned int addr_len, byte, bit;
+	const uint8_t *srcaddr;
+	uint8_t type, *dstaddr;
+
+	type = IP_GET_TYPE(src);
+
+	memset(dst, 0, sizeof(*dst));
+	IP_SET_TYPE(dst, type);
+
+	switch (type) {
+	case IPADDR_TYPE_V4:
+		srcaddr = (const uint8_t *)&ip_2_ip4(src)->addr;
+		dstaddr = (uint8_t *)&ip_2_ip4(dst)->addr;
+		addr_len = sizeof(ip_2_ip4(src)->addr);
+
+		break;
+
+	case IPADDR_TYPE_V6:
+		ip6_addr_set_zone(ip_2_ip6(dst), ip6_addr_zone(ip_2_ip6(src)));
+
+		srcaddr = (const uint8_t *)&ip_2_ip6(src)->addr;
+		dstaddr = (uint8_t *)&ip_2_ip6(dst)->addr;
+		addr_len = sizeof(ip_2_ip6(src)->addr);
+
+		break;
+
+	default:
+		panic("unknown IP address type: %u", type);
+	}
+
+	byte = prefix / NBBY;
+	bit = prefix % NBBY;
+
+	assert(byte + !!bit <= addr_len);
+
+	if (byte > 0)
+		memcpy(dstaddr, srcaddr, byte);
+	if (bit != 0) {
+		dstaddr[byte] =
+		    srcaddr[byte] & (uint8_t)(0xff << (NBBY - bit));
+		byte++;
+	}
+}
+
+/*
+ * Return the number of common bits between the given two addresses, up to the
+ * given maximum.  Thus, return a value between 0 and 'max' inclusive.
+ */
+unsigned int
+addr_get_common_bits(const ip_addr_t * ipaddr1, const ip_addr_t * ipaddr2,
+	unsigned int max)
+{
+	unsigned int addr_len, prefix, bit;
+	const uint8_t *addr1, *addr2;
+	uint8_t byte;
+
+	switch (IP_GET_TYPE(ipaddr1)) {
+	case IPADDR_TYPE_V4:
+		assert(IP_IS_V4(ipaddr2));
+
+		addr1 = (const uint8_t *)&ip_2_ip4(ipaddr1)->addr;
+		addr2 = (const uint8_t *)&ip_2_ip4(ipaddr2)->addr;
+		addr_len = sizeof(ip_2_ip4(ipaddr1)->addr);
+
+		break;
+
+	case IPADDR_TYPE_V6:
+		assert(IP_IS_V6(ipaddr2));
+
+		addr1 = (const uint8_t *)&ip_2_ip6(ipaddr1)->addr;
+		addr2 = (const uint8_t *)&ip_2_ip6(ipaddr2)->addr;
+		addr_len = sizeof(ip_2_ip6(ipaddr1)->addr);
+
+		break;
+
+	default:
+		panic("unknown IP address type: %u", IP_GET_TYPE(ipaddr1));
+	}
+
+	if (addr_len > max * NBBY)
+		addr_len = max * NBBY;
+
+	prefix = 0;
+
+	for (prefix = 0; addr_len > 0; addr1++, addr2++, prefix += NBBY) {
+		if ((byte = (*addr1 ^ *addr2)) != 0) {
+			/* TODO: see if we want a lookup table for this. */
+			for (bit = 0; bit < NBBY; bit++, prefix++)
+				if (byte & (1 << (NBBY - bit - 1)))
+					break;
+			break;
+		}
+	}
+
+	if (prefix > max)
+		prefix = max;
+
+	return prefix;
+}
+
+/*
+ * Convert the given IPv4 address to an IPv4-mapped IPv6 address.
+ */
+void
+addr_make_v4mapped_v6(ip_addr_t * dst, const ip4_addr_t * src)
+{
+
+	IP_ADDR6(dst, 0, 0, PP_HTONL(0x0000ffffUL), ip4_addr_get_u32(src));
+}
--- a/minix/net/lwip/addr.h
+++ b/minix/net/lwip/addr.h
@ -0,0 +1,33 @@
+#ifndef MINIX_NET_LWIP_ADDR_H
+#define MINIX_NET_LWIP_ADDR_H
+
+int addr_is_unspec(const struct sockaddr * addr, socklen_t addr_len);
+
+int addr_is_valid_multicast(const ip_addr_t * ipaddr);
+
+int addr_get_inet(const struct sockaddr * addr, socklen_t addr_len,
+	uint8_t type, ip_addr_t * ipaddr, int kame, uint16_t * port);
+void addr_put_inet(struct sockaddr * addr, socklen_t * addr_len,
+	const ip_addr_t * ipaddr, int kame, uint16_t port);
+
+int addr_get_link(const struct sockaddr * addr, socklen_t addr_len,
+	char * name, size_t name_max, uint8_t * hwaddr, size_t hwaddr_len);
+void addr_put_link(struct sockaddr * addr, socklen_t * addr_len,
+	uint32_t ifindex, uint32_t type, const char * name,
+	const uint8_t * hwaddr, size_t hwaddr_len);
+
+int addr_get_netmask(const struct sockaddr * addr, socklen_t addr_len,
+	uint8_t type, unsigned int * prefix, ip_addr_t * ipaddr);
+void addr_make_netmask(uint8_t * addr, socklen_t addr_len,
+	unsigned int prefix);
+void addr_put_netmask(struct sockaddr * addr, socklen_t * addr_len,
+	uint8_t type, unsigned int prefix);
+
+void addr_normalize(ip_addr_t * dst, const ip_addr_t * src,
+	unsigned int prefix);
+unsigned int addr_get_common_bits(const ip_addr_t * addr1,
+	const ip_addr_t * addr2, unsigned int max);
+
+void addr_make_v4mapped_v6(ip_addr_t * dst, const ip4_addr_t * src);
+
+#endif /* !MINIX_NET_LWIP_ADDR_H */
--- a/minix/net/lwip/addrpol.c
+++ b/minix/net/lwip/addrpol.c
@ -0,0 +1,143 @@
+/* LWIP service - addrpol.c - address policy table and values */
+/*
+ * The main purpose of this module is to implement the address policy table
+ * described in RFC 6724.  In general, the policy table is used for two
+ * purposes: source address selection, which is part of this service, and
+ * destination address selection, which is implemented in libc.  NetBSD 7, the
+ * version that MINIX 3 is synced against at this moment, does not actually
+ * implement the libc part yet, though.  That will change with NetBSD 8, where
+ * libc uses sysctl(7) to obtain the kernel's policy table, which itself can be
+ * changed with the new ip6addrctl(8) utility.  Once we resync to NetBSD 8, we
+ * will also have to support this new functionality, and this module is where
+ * it would be implemented.  Since NetBSD 7 is even lacking the necessary
+ * definitions, we cannot do that ahead of time, though.  Thus, until then,
+ * this module is rather simple, as it only implements a static policy table
+ * used for source address selection.  No changes beyond this module should be
+ * necessary, e.g. we are purposely not caching labels for local addresses.
+ */
+
+#include "lwip.h"
+
+/*
+ * Address policy table.  Currently hardcoded to the default of RFC 6724.
+ * Sorted by prefix length, so that the first match is always also the longest.
+ */
+static const struct {
+	ip_addr_t ipaddr;
+	unsigned int prefix;
+	int precedence;
+	int label;
+} addrpol_table[] = {
+	{ IPADDR6_INIT_HOST(0, 0, 0, 1),		128, 50,  0 },
+	{ IPADDR6_INIT_HOST(0, 0, 0x0000ffffUL, 0),	 96, 35,  4 },
+	{ IPADDR6_INIT_HOST(0, 0, 0, 0),		 96,  1,  3 },
+	{ IPADDR6_INIT_HOST(0x20010000UL, 0, 0, 0),	 32,  5,  5 },
+	{ IPADDR6_INIT_HOST(0x20020000UL, 0, 0, 0),	 16, 30,  2 },
+	{ IPADDR6_INIT_HOST(0x3ffe0000UL, 0, 0, 0),	 16,  1, 12 },
+	{ IPADDR6_INIT_HOST(0xfec00000UL, 0, 0, 0),	 10,  1, 11 },
+	{ IPADDR6_INIT_HOST(0xfc000000UL, 0, 0, 0),	  7,  3, 13 },
+	{ IPADDR6_INIT_HOST(0, 0, 0, 0),		  0, 40,  1 }
+};
+
+/*
+ * Obtain the label value for the given IP address from the address policy
+ * table.  Currently only IPv6 addresses may be given.  This function is linear
+ * in number of address policy table entries, requiring a relatively expensive
+ * normalization operation for each entry, so it should not be called lightly.
+ * Its results should not be cached beyond local contexts either, because the
+ * policy table itself may be changed from userland (in the future).
+ *
+ * TODO: convert IPv4 addresses to IPv4-mapped IPv6 addresses.
+ * TODO: embed the interface index in link-local addresses.
+ */
+int
+addrpol_get_label(const ip_addr_t * iporig)
+{
+	ip_addr_t ipaddr;
+	unsigned int i;
+
+	assert(IP_IS_V6(iporig));
+
+	/*
+	 * The policy table is sorted by prefix length such that the first
+	 * match is also the one with the longest prefix, and as such the best.
+	 */
+	for (i = 0; i < __arraycount(addrpol_table); i++) {
+		addr_normalize(&ipaddr, iporig, addrpol_table[i].prefix);
+
+		if (ip_addr_cmp(&addrpol_table[i].ipaddr, &ipaddr))
+			return addrpol_table[i].label;
+	}
+
+	/*
+	 * We cannot possibly get here with the default policy table, because
+	 * the last entry will always match.  It is not clear what we should
+	 * return if there is no matching entry, though.  For now, we return
+	 * the default label value for the default (::/0) entry, which is 1.
+	 */
+	return 1;
+}
+
+/*
+ * Return an opaque positive value (possibly zero) that represents the scope of
+ * the given IP address.  A larger value indicates a wider scope.  The 'is_src'
+ * flag indicates whether the address is a source or a destination address,
+ * which affects the value returned for unknown addresses.  A scope is a direct
+ * function of only the given address, so the result may be cached on a per-
+ * address basis without risking invalidation at any point in time.
+ */
+int
+addrpol_get_scope(const ip_addr_t * ipaddr, int is_src)
+{
+	const ip6_addr_t *ip6addr;
+
+	/*
+	 * For now, all IPv4 addresses are considered global.  This function is
+	 * currently called only for IPv6 addresses anyway.
+	 */
+	if (IP_IS_V4(ipaddr))
+		return IP6_MULTICAST_SCOPE_GLOBAL;
+
+	assert(IP_IS_V6(ipaddr));
+
+	ip6addr = ip_2_ip6(ipaddr);
+
+	/*
+	 * These are ordered not by ascending scope, but (roughly) by expected
+	 * likeliness to match, for performance reasons.
+	 */
+	if (ip6_addr_isglobal(ip6addr))
+		return IP6_MULTICAST_SCOPE_GLOBAL;
+
+	if (ip6_addr_islinklocal(ip6addr) || ip6_addr_isloopback(ip6addr))
+		return IP6_MULTICAST_SCOPE_LINK_LOCAL;
+
+	/*
+	 * We deliberately deviate from RFC 6724 Sec. 3.1 by considering
+	 * Unique-Local Addresses (ULAs) to be of smaller scope than global
+	 * addresses, to avoid that during source address selection, a
+	 * preferred ULA is picked over a deprecated global address when given
+	 * a global address as destination, as that would likely result in
+	 * broken two-way communication.
+	 */
+	if (ip6_addr_isuniquelocal(ip6addr))
+		return IP6_MULTICAST_SCOPE_ORGANIZATION_LOCAL;
+
+	if (ip6_addr_ismulticast(ip6addr))
+		return ip6_addr_multicast_scope(ip6addr);
+
+	/* Site-local addresses are deprecated. */
+	if (ip6_addr_issitelocal(ip6addr))
+		return IP6_MULTICAST_SCOPE_SITE_LOCAL;
+
+	/*
+	 * If the address is a source address, give it a scope beyond global to
+	 * make sure that a "real" global address is picked first.  If the
+	 * address is a destination address, give it a global scope so as to
+	 * pick "real" global addresses over unknown-scope source addresses.
+	 */
+	if (is_src)
+		return IP6_MULTICAST_SCOPE_RESERVEDF; /* greater than GLOBAL */
+	else
+		return IP6_MULTICAST_SCOPE_GLOBAL;
+}
--- a/minix/net/lwip/bpf_filter.c
+++ b/minix/net/lwip/bpf_filter.c
@ -0,0 +1,561 @@
+/* LWIP service - bpf_filter.c - Berkeley Packet Filter core implementation */
+/*
+ * This is basically a drop-in replacement of NetBSD's bpf_filter.c, which
+ * itself can be compiled for either the NetBSD kernel or for userland.  On
+ * MINIX 3, we would like to perform certain checks that NetBSD implements only
+ * for its kernel (e.g., memory store access validation) while replacing the
+ * NetBSD kernel specifics with our own (pbuf instead of mbuf, no BPF contexts
+ * for now, etc.).  As a result, it is easier to reimplement the whole thing,
+ * because there is not all that much to it.
+ *
+ * Support for the standard BSD API allows us to run standard tests against
+ * this module from userland, where _MINIX_SYSTEM is not defined.  MINIX 3
+ * specific extensions are enabled only if _MINIX_SYSTEM is defined.
+ */
+#include <string.h>
+#include <limits.h>
+#include <net/bpf.h>
+#include <minix/bitmap.h>
+
+#ifdef _MINIX_SYSTEM
+#include "lwip.h"
+
+/*
+ * Obtain an unsigned 32-bit value in network byte order from the pbuf chain
+ * 'pbuf' at offset 'k'.  The given offset is guaranteed to be within bounds.
+ */
+static uint32_t
+bpf_get32_ext(const struct pbuf * pbuf, uint32_t k)
+{
+	uint32_t val;
+	unsigned int i;
+
+	/*
+	 * Find the pbuf that contains the first byte.  We expect that most
+	 * filters will operate only on the headers of packets, so that we
+	 * mostly avoid going through this O(n) loop.  Since only the superuser
+	 * can open BPF devices at all, we need not be worried about abuse in
+	 * this regard.  However, it turns out that this loop is particularly
+	 * CPU-intensive after all, we can probably improve it by caching the
+	 * last visited pbuf, as read locality is likely high.
+	 */
+	while (k >= pbuf->len) {
+		k -= pbuf->len;
+		pbuf = pbuf->next;
+		assert(pbuf != NULL);
+	}
+
+	/*
+	 * We assume that every pbuf has some data, but we make no assumptions
+	 * about any minimum amount of data per pbuf.  Therefore, we may have
+	 * to take the bytes from anywhere between one and four pbufs.
+	 * Hopefully the compiler will unroll this loop for us.
+	 */
+	val = (uint32_t)(((u_char *)pbuf->payload)[k]) << 24;
+
+	for (i = 0; i < 3; i++) {
+		if (k >= (uint32_t)pbuf->len - 1) {
+			k = 0;
+			pbuf = pbuf->next;
+			assert(pbuf != NULL);
+		} else
+			k++;
+		val = (val << 8) | (uint32_t)(((u_char *)pbuf->payload)[k]);
+	}
+
+	return val;
+}
+
+/*
+ * Obtain an unsigned 16-bit value in network byte order from the pbuf chain
+ * 'pbuf' at offset 'k'.  The given offset is guaranteed to be within bounds.
+ */
+static uint32_t
+bpf_get16_ext(const struct pbuf * pbuf, uint32_t k)
+{
+
+	/* As above. */
+	while (k >= pbuf->len) {
+		k -= pbuf->len;
+		pbuf = pbuf->next;
+		assert(pbuf != NULL);
+	}
+
+	/*
+	 * There are only two possible cases to cover here: either the two
+	 * bytes are in the same pbuf, or they are in subsequent ones.
+	 */
+	if (k < (uint32_t)pbuf->len - 1) {
+		return ((uint32_t)(((u_char *)pbuf->payload)[k]) << 8) |
+		    (uint32_t)(((u_char *)pbuf->next->payload)[k + 1]);
+	} else {
+		assert(pbuf->next != NULL);
+		return ((uint32_t)(((u_char *)pbuf->payload)[k]) << 8) |
+		    (uint32_t)(((u_char *)pbuf->next->payload)[0]);
+	}
+}
+
+/*
+ * Obtain an unsigned 8-bit value from the pbuf chain 'pbuf' at offset 'k'.
+ * The given offset is guaranteed to be within bounds.
+ */
+static uint32_t
+bpf_get8_ext(const struct pbuf * pbuf, uint32_t k)
+{
+
+	/* As above. */
+	while (k >= pbuf->len) {
+		k -= pbuf->len;
+		pbuf = pbuf->next;
+		assert(pbuf != NULL);
+	}
+
+	return (uint32_t)(((u_char *)pbuf->payload)[k]);
+}
+
+#endif /* _MINIX_SYSTEM */
+
+/*
+ * Execute a BPF filter program on (the first part of) a packet, and return the
+ * maximum size of the packet that should be delivered to the filter owner.
+ *
+ * The 'pc' parameter points to an array of BPF instructions that together form
+ * the filter program to be executed.  If 'pc' is NULL, the packet is fully
+ * accepted.  Otherwise, the given program MUST have passed a previous call to
+ * bpf_validate().  Not doing so will allow for arbitrary memory access.
+ *
+ * The 'packet' array contains up to the whole packet.  The value of 'total'
+ * denotes the total length of the packet; 'len' contains the size of the array
+ * 'packet'.  Chunked storage of the packet is not supported at this time.
+ *
+ * If executing the program succeeds, the return value is the maximum number of
+ * bytes from the packet to be delivered.  The return value may exceed the full
+ * packet size.  If the number of bytes returned is zero, the packet is to be
+ * ignored.  If the program fails to execute properly and return a value, a
+ * value of zero is returned as well, thus also indicating that the packet
+ * should be ignored.  This is intentional: it saves filter programs from
+ * having to perform explicit checks on the packet they are filtering.
+ */
+u_int
+bpf_filter(const struct bpf_insn * pc, const u_char * packet, u_int total,
+	u_int len)
+#ifdef _MINIX_SYSTEM
+{
+
+	return bpf_filter_ext(pc, NULL /*pbuf*/, packet, total, len);
+}
+
+u_int
+bpf_filter_ext(const struct bpf_insn * pc, const struct pbuf * pbuf,
+	const u_char * packet, u_int total, u_int len)
+#endif /* _MINIX_SYSTEM */
+{
+	uint32_t k, a, x, mem[BPF_MEMWORDS];
+
+	/* An empty program accepts all packets. */
+	if (pc == NULL)
+		return UINT_MAX;
+
+	/*
+	 * We need not clear 'mem': the checker guarantees that each memory
+	 * store word is always written before it is read.
+	 */
+	a = 0;
+	x = 0;
+
+	/* Execute the program. */
+	for (;; pc++) {
+		k = pc->k;
+
+		switch (pc->code) {
+		case BPF_LD+BPF_W+BPF_IND:	/* A <- P[X+k:4] */
+			if (k + x < k)
+				return 0;
+			k += x;
+			/* FALLTHROUGH */
+		case BPF_LD+BPF_W+BPF_ABS:	/* A <- P[k:4] */
+			/*
+			 * 'k' may have any value, so check bounds in such a
+			 * way that 'k' cannot possibly overflow and wrap.
+			 */
+			if (len >= 3 && k < len - 3)
+				a = ((uint32_t)packet[k] << 24) |
+				    ((uint32_t)packet[k + 1] << 16) |
+				    ((uint32_t)packet[k + 2] << 8) |
+				    (uint32_t)packet[k + 3];
+#ifdef _MINIX_SYSTEM
+			else if (total >= 3 && k < total - 3)
+				a = bpf_get32_ext(pbuf, k);
+#endif /* _MINIX_SYSTEM */
+			else
+				return 0;
+			break;
+		case BPF_LD+BPF_H+BPF_IND:	/* A <- P[X+k:2] */
+			if (k + x < k)
+				return 0;
+			k += x;
+			/* FALLTHROUGH */
+		case BPF_LD+BPF_H+BPF_ABS:	/* A <- P[k:2] */
+			/* As above. */
+			if (len >= 1 && k < len - 1)
+				a = ((uint32_t)packet[k] << 8) |
+				    (uint32_t)packet[k + 1];
+#ifdef _MINIX_SYSTEM
+			else if (total >= 1 && k < total - 1)
+				a = bpf_get16_ext(pbuf, k);
+#endif /* _MINIX_SYSTEM */
+			else
+				return 0;
+			break;
+		case BPF_LD+BPF_B+BPF_IND:	/* A <- P[X+k:1] */
+			if (k + x < k)
+				return 0;
+			k += x;
+			/* FALLTHROUGH */
+		case BPF_LD+BPF_B+BPF_ABS:	/* A <- P[k:1] */
+			if (k < len)
+				a = (uint32_t)packet[k];
+#ifdef _MINIX_SYSTEM
+			else if (k < total)
+				a = bpf_get8_ext(pbuf, k);
+#endif /* _MINIX_SYSTEM */
+			else
+				return 0;
+			break;
+		case BPF_LD+BPF_W+BPF_LEN:	/* A <- len */
+			a = total;
+			break;
+		case BPF_LD+BPF_IMM:		/* A <- k */
+			a = k;
+			break;
+		case BPF_LD+BPF_MEM:		/* A <- M[k] */
+			a = mem[k];
+			break;
+
+		case BPF_LDX+BPF_IMM:		/* X <- k */
+			x = k;
+			break;
+		case BPF_LDX+BPF_MEM:		/* X <- M[k] */
+			x = mem[k];
+			break;
+		case BPF_LDX+BPF_LEN:		/* X <- len */
+			x = total;
+			break;
+		case BPF_LDX+BPF_B+BPF_MSH:	/* X <- 4*(P[k:1]&0xf) */
+			if (k < len)
+				x = ((uint32_t)packet[k] & 0xf) << 2;
+#ifdef _MINIX_SYSTEM
+			else if (k < total)
+				x = (bpf_get8_ext(pbuf, k) & 0xf) << 2;
+#endif /* _MINIX_SYSTEM */
+			else
+				return 0;
+			break;
+
+		case BPF_ST:			/* M[k] <- A */
+			mem[k] = a;
+			break;
+
+		case BPF_STX:			/* M[k] <- X */
+			mem[k] = x;
+			break;
+
+		case BPF_ALU+BPF_ADD+BPF_K:	/* A <- A + k */
+			a += k;
+			break;
+		case BPF_ALU+BPF_SUB+BPF_K:	/* A <- A - k */
+			a -= k;
+			break;
+		case BPF_ALU+BPF_MUL+BPF_K:	/* A <- A * k */
+			a *= k;
+			break;
+		case BPF_ALU+BPF_DIV+BPF_K:	/* A <- A / k */
+			a /= k;
+			break;
+		case BPF_ALU+BPF_MOD+BPF_K:	/* A <- A % k */
+			a %= k;
+			break;
+		case BPF_ALU+BPF_AND+BPF_K:	/* A <- A & k */
+			a &= k;
+			break;
+		case BPF_ALU+BPF_OR+BPF_K:	/* A <- A | k */
+			a |= k;
+			break;
+		case BPF_ALU+BPF_XOR+BPF_K:	/* A <- A ^ k */
+			a ^= k;
+			break;
+		case BPF_ALU+BPF_LSH+BPF_K:	/* A <- A << k */
+			a <<= k;
+			break;
+		case BPF_ALU+BPF_RSH+BPF_K:	/* A <- A >> k */
+			a >>= k;
+			break;
+		case BPF_ALU+BPF_ADD+BPF_X:	/* A <- A + X */
+			a += x;
+			break;
+		case BPF_ALU+BPF_SUB+BPF_X:	/* A <- A - X */
+			a -= x;
+			break;
+		case BPF_ALU+BPF_MUL+BPF_X:	/* A <- A * X */
+			a *= x;
+			break;
+		case BPF_ALU+BPF_DIV+BPF_X:	/* A <- A / X */
+			if (x == 0)
+				return 0;
+			a /= x;
+			break;
+		case BPF_ALU+BPF_MOD+BPF_X:	/* A <- A % X */
+			if (x == 0)
+				return 0;
+			a %= x;
+			break;
+		case BPF_ALU+BPF_AND+BPF_X:	/* A <- A & X */
+			a &= x;
+			break;
+		case BPF_ALU+BPF_OR+BPF_X:	/* A <- A | X */
+			a |= x;
+			break;
+		case BPF_ALU+BPF_XOR+BPF_X:	/* A <- A ^ X */
+			a ^= x;
+			break;
+		case BPF_ALU+BPF_LSH+BPF_X:	/* A <- A << X */
+			if (x >= 32)
+				return 0;
+			a <<= x;
+			break;
+		case BPF_ALU+BPF_RSH+BPF_X:	/* A <- A >> X */
+			if (x >= 32)
+				return 0;
+			a >>= x;
+			break;
+		case BPF_ALU+BPF_NEG:		/* A <- -A */
+			a = -a;
+			break;
+
+		case BPF_JMP+BPF_JA:		/* pc += k */
+			pc += k;
+			break;
+		case BPF_JMP+BPF_JGT+BPF_K:	/* pc += (A > k) ? jt : jf */
+			pc += (a > k) ? pc->jt : pc->jf;
+			break;
+		case BPF_JMP+BPF_JGE+BPF_K:	/* pc += (A >= k) ? jt : jf */
+			pc += (a >= k) ? pc->jt : pc->jf;
+			break;
+		case BPF_JMP+BPF_JEQ+BPF_K:	/* pc += (A == k) ? jt : jf */
+			pc += (a == k) ? pc->jt : pc->jf;
+			break;
+		case BPF_JMP+BPF_JSET+BPF_K:	/* pc += (A & k) ? jt : jf */
+			pc += (a & k) ? pc->jt : pc->jf;
+			break;
+		case BPF_JMP+BPF_JGT+BPF_X:	/* pc += (A > X) ? jt : jf */
+			pc += (a > x) ? pc->jt : pc->jf;
+			break;
+		case BPF_JMP+BPF_JGE+BPF_X:	/* pc += (A >= X) ? jt : jf */
+			pc += (a >= x) ? pc->jt : pc->jf;
+			break;
+		case BPF_JMP+BPF_JEQ+BPF_X:	/* pc += (A == X) ? jt : jf */
+			pc += (a == x) ? pc->jt : pc->jf;
+			break;
+		case BPF_JMP+BPF_JSET+BPF_X:	/* pc += (A & X) ? jt : jf */
+			pc += (a & x) ? pc->jt : pc->jf;
+			break;
+
+		case BPF_RET+BPF_A:		/* accept A bytes */
+			return a;
+		case BPF_RET+BPF_K:		/* accept K bytes */
+			return k;
+
+		case BPF_MISC+BPF_TAX:		/* X <- A */
+			x = a;
+			break;
+		case BPF_MISC+BPF_TXA:		/* A <- X */
+			a = x;
+			break;
+
+		default:			/* unknown instruction */
+			return 0;
+		}
+	}
+
+	/* NOTREACHED */
+}
+
+/*
+ * In order to avoid having to perform explicit memory allocation, we store
+ * some validation state on the stack, using data types that are as small as
+ * possible for the current definitions.  The data types, and in fact the whole
+ * assumption that we can store the state on the stack, may need to be revised
+ * if certain constants are increased in the future.  As of writing, the
+ * validation routine uses a little over 1KB of stack memory.
+ */
+#if BPF_MEMWORDS <= 16	/* value as of writing: 16 */
+typedef uint16_t meminv_t;
+#else
+#error "increased BPF_MEMWORDS may require code revision"
+#endif
+
+#if BPF_MAXINSNS > 2048	/* value as of writing: 512 */
+#error "increased BPF_MAXINSNS may require code revision"
+#endif
+
+/*
+ * Verify that the given filter program is safe to execute, by performing as
+ * many static validity checks as possible.  The program is given as 'insns',
+ * which must be an array of 'ninsns' BPF instructions.  Unlike bpf_filter(),
+ * this function does not accept empty filter programs.  The function returns 1
+ * if the program was successfully validated, or 0 if the program should not be
+ * accepted.
+ */
+int
+bpf_validate(const struct bpf_insn * insns, int ninsns)
+{
+	bitchunk_t reachable[BITMAP_CHUNKS(BPF_MAXINSNS)];
+	meminv_t invalid, meminv[BPF_MAXINSNS];
+	const struct bpf_insn *insn;
+	u_int pc, count, target;
+	int advance;
+
+	if (insns == NULL || ninsns <= 0 || ninsns > BPF_MAXINSNS)
+		return 0;
+	count = (u_int)ninsns;
+
+	memset(reachable, 0, sizeof(reachable[0]) * BITMAP_CHUNKS(count));
+	memset(meminv, 0, sizeof(meminv[0]) * count);
+
+	SET_BIT(reachable, 0);
+	meminv[0] = (meminv_t)~0;
+
+	for (pc = 0; pc < count; pc++) {
+		/* We completely ignore instructions that are not reachable. */
+		if (!GET_BIT(reachable, pc))
+			continue;
+
+		invalid = meminv[pc];
+		advance = 1;
+
+		insn = &insns[pc];
+
+		switch (insn->code) {
+		case BPF_LD+BPF_W+BPF_ABS:
+		case BPF_LD+BPF_H+BPF_ABS:
+		case BPF_LD+BPF_B+BPF_ABS:
+		case BPF_LD+BPF_W+BPF_IND:
+		case BPF_LD+BPF_H+BPF_IND:
+		case BPF_LD+BPF_B+BPF_IND:
+		case BPF_LD+BPF_LEN:
+		case BPF_LD+BPF_IMM:
+		case BPF_LDX+BPF_IMM:
+		case BPF_LDX+BPF_LEN:
+		case BPF_LDX+BPF_B+BPF_MSH:
+		case BPF_ALU+BPF_ADD+BPF_K:
+		case BPF_ALU+BPF_SUB+BPF_K:
+		case BPF_ALU+BPF_MUL+BPF_K:
+		case BPF_ALU+BPF_AND+BPF_K:
+		case BPF_ALU+BPF_OR+BPF_K:
+		case BPF_ALU+BPF_XOR+BPF_K:
+		case BPF_ALU+BPF_ADD+BPF_X:
+		case BPF_ALU+BPF_SUB+BPF_X:
+		case BPF_ALU+BPF_MUL+BPF_X:
+		case BPF_ALU+BPF_DIV+BPF_X:
+		case BPF_ALU+BPF_MOD+BPF_X:
+		case BPF_ALU+BPF_AND+BPF_X:
+		case BPF_ALU+BPF_OR+BPF_X:
+		case BPF_ALU+BPF_XOR+BPF_X:
+		case BPF_ALU+BPF_LSH+BPF_X:
+		case BPF_ALU+BPF_RSH+BPF_X:
+		case BPF_ALU+BPF_NEG:
+		case BPF_MISC+BPF_TAX:
+		case BPF_MISC+BPF_TXA:
+			/* Nothing we can check for these. */
+			break;
+		case BPF_ALU+BPF_DIV+BPF_K:
+		case BPF_ALU+BPF_MOD+BPF_K:
+			/* No division by zero. */
+			if (insn->k == 0)
+				return 0;
+			break;
+		case BPF_ALU+BPF_LSH+BPF_K:
+		case BPF_ALU+BPF_RSH+BPF_K:
+			/* Do not invoke undefined behavior. */
+			if (insn->k >= 32)
+				return 0;
+			break;
+		case BPF_LD+BPF_MEM:
+		case BPF_LDX+BPF_MEM:
+			/*
+			 * Only allow loading words that have been stored in
+			 * all execution paths leading up to this instruction.
+			 */
+			if (insn->k >= BPF_MEMWORDS ||
+			    (invalid & (1 << insn->k)))
+				return 0;
+			break;
+		case BPF_ST:
+		case BPF_STX:
+			if (insn->k >= BPF_MEMWORDS)
+				return 0;
+			invalid &= ~(1 << insn->k);
+			break;
+		case BPF_JMP+BPF_JA:
+			/*
+			 * Make sure that the target instruction of the jump is
+			 * still part of the program, and mark it as reachable.
+			 */
+			if (insn->k >= count - pc - 1)
+				return 0;
+			target = pc + insn->k + 1;
+			SET_BIT(reachable, target);
+			meminv[target] |= invalid;
+			advance = 0;
+			break;
+		case BPF_JMP+BPF_JGT+BPF_K:
+		case BPF_JMP+BPF_JGE+BPF_K:
+		case BPF_JMP+BPF_JEQ+BPF_K:
+		case BPF_JMP+BPF_JSET+BPF_K:
+		case BPF_JMP+BPF_JGT+BPF_X:
+		case BPF_JMP+BPF_JGE+BPF_X:
+		case BPF_JMP+BPF_JEQ+BPF_X:
+		case BPF_JMP+BPF_JSET+BPF_X:
+			/*
+			 * Make sure that both target instructions are still
+			 * part of the program, and mark both as reachable.
+			 * There is no chance that the additions will overflow.
+			 */
+			target = pc + insn->jt + 1;
+			if (target >= count)
+				return 0;
+			SET_BIT(reachable, target);
+			meminv[target] |= invalid;
+
+			target = pc + insn->jf + 1;
+			if (target >= count)
+				return 0;
+			SET_BIT(reachable, target);
+			meminv[target] |= invalid;
+
+			advance = 0;
+			break;
+		case BPF_RET+BPF_A:
+		case BPF_RET+BPF_K:
+			advance = 0;
+			break;
+		default:
+			return 0;
+		}
+
+		/*
+		 * After most instructions, we simply advance to the next.  For
+		 * one thing, this means that there must be a next instruction
+		 * at all.
+		 */
+		if (advance) {
+			if (pc + 1 == count)
+				return 0;
+			SET_BIT(reachable, pc + 1);
+			meminv[pc + 1] |= invalid;
+		}
+	}
+
+	/* The program has passed all our basic tests. */
+	return 1;
+}
--- a/minix/net/lwip/bpfdev.c
+++ b/minix/net/lwip/bpfdev.c
--- a/minix/net/lwip/bpfdev.h
+++ b/minix/net/lwip/bpfdev.h
@ -0,0 +1,18 @@
+#ifndef MINIX_NET_LWIP_BPFDEV_H
+#define MINIX_NET_LWIP_BPFDEV_H
+
+/*
+ * BPF link structure, used to abstract away the details of the BPF structure
+ * from other modules.
+ */
+struct bpfdev_link {
+	TAILQ_ENTRY(bpfdev_link) bpfl_next;
+};
+
+void bpfdev_init(void);
+void bpfdev_process(message * m_ptr, int ipc_status);
+void bpfdev_detach(struct bpfdev_link * bpf);
+void bpfdev_input(struct bpfdev_link * bpf, const struct pbuf * pbuf);
+void bpfdev_output(struct bpfdev_link * bpf, const struct pbuf * pbuf);
+
+#endif /* !MINIX_NET_LWIP_BPFDEV_H */
--- a/minix/net/lwip/ethif.c
+++ b/minix/net/lwip/ethif.c
--- a/minix/net/lwip/ethif.h
+++ b/minix/net/lwip/ethif.h
@ -0,0 +1,24 @@
+#ifndef MINIX_NET_LWIP_ETHIF_H
+#define MINIX_NET_LWIP_ETHIF_H
+
+#include "ndev.h"
+
+struct ethif;
+
+void ethif_init(void);
+
+struct ethif *ethif_add(ndev_id_t id, const char * name, uint32_t caps);
+int ethif_enable(struct ethif * ethif, const char * name,
+	const struct ndev_hwaddr * hwaddr, uint8_t hwaddr_len, uint32_t caps,
+	uint32_t link, uint32_t media);
+void ethif_disable(struct ethif * ethif);
+void ethif_remove(struct ethif * ethif);
+
+void ethif_configured(struct ethif * ethif, int32_t result);
+void ethif_sent(struct ethif * ethif, int32_t result);
+void ethif_received(struct ethif * ethif, int32_t result);
+
+void ethif_status(struct ethif * ethif, uint32_t link, uint32_t media,
+	uint32_t oerror, uint32_t coll, uint32_t ierror, uint32_t iqdrop);
+
+#endif /* !MINIX_NET_LWIP_ETHIF_H */
--- a/minix/net/lwip/ifaddr.c
+++ b/minix/net/lwip/ifaddr.c
--- a/minix/net/lwip/ifaddr.h
+++ b/minix/net/lwip/ifaddr.h
@ -0,0 +1,70 @@
+#ifndef MINIX_NET_LWIP_IFADDR_H
+#define MINIX_NET_LWIP_IFADDR_H
+
+/* Possible values of ifdev_v6flags[] elements. */
+#define IFADDR_V6F_AUTOCONF	0x01	/* autoconfigured address, no subnet */
+#define IFADDR_V6F_TEMPORARY	0x02	/* temporary (privacy) address */
+#define IFADDR_V6F_HWBASED	0x04	/* auto-derived from MAC address */
+
+typedef int ifaddr_v4_num_t;		/* interface IPv4 address number */
+typedef int ifaddr_v6_num_t;		/* interface IPv6 address number */
+typedef int ifaddr_dl_num_t;		/* interface link address number */
+
+extern int ifaddr_auto_linklocal;
+extern int ifaddr_accept_rtadv;
+
+void ifaddr_init(struct ifdev * ifdev);
+
+int ifaddr_v4_find(struct ifdev * ifdev, const struct sockaddr_in * addr,
+	ifaddr_v4_num_t * num);
+int ifaddr_v4_enum(struct ifdev * ifdev, ifaddr_v4_num_t * num);
+int ifaddr_v4_get(struct ifdev * ifdev, ifaddr_v4_num_t num,
+	struct sockaddr_in * addr, struct sockaddr_in * mask,
+	struct sockaddr_in * bcast, struct sockaddr_in * dest);
+int ifaddr_v4_get_flags(struct ifdev * ifdev, ifaddr_v4_num_t num);
+int ifaddr_v4_add(struct ifdev * ifdev, const struct sockaddr_in * addr,
+	const struct sockaddr_in * mask, const struct sockaddr_in * bcast,
+	const struct sockaddr_in * dest, int flags);
+void ifaddr_v4_del(struct ifdev * ifdev, ifaddr_v4_num_t num);
+void ifaddr_v4_clear(struct ifdev * ifdev);
+struct ifdev *ifaddr_v4_map_by_addr(const ip4_addr_t * ip4addr);
+
+int ifaddr_v6_find(struct ifdev * ifdev, const struct sockaddr_in6 * addr6,
+	ifaddr_v6_num_t * num);
+int ifaddr_v6_enum(struct ifdev * ifdev, ifaddr_v6_num_t * num);
+void ifaddr_v6_get(struct ifdev * ifdev, ifaddr_v6_num_t num,
+	struct sockaddr_in6 * addr6, struct sockaddr_in6 * mask6,
+	struct sockaddr_in6 * dest6);
+int ifaddr_v6_get_flags(struct ifdev * ifdev, ifaddr_v6_num_t num);
+void ifaddr_v6_get_lifetime(struct ifdev * ifdev, ifaddr_v6_num_t num,
+	struct in6_addrlifetime * lifetime);
+int ifaddr_v6_add(struct ifdev * ifdev, const struct sockaddr_in6 * addr6,
+	const struct sockaddr_in6 * mask6, const struct sockaddr_in6 * dest6,
+	int flags, const struct in6_addrlifetime * lifetime);
+void ifaddr_v6_del(struct ifdev * ifdev, ifaddr_v6_num_t num);
+void ifaddr_v6_clear(struct ifdev * ifdev);
+void ifaddr_v6_check(struct ifdev * ifdev);
+void ifaddr_v6_set_up(struct ifdev * ifdev);
+void ifaddr_v6_set_linklocal(struct ifdev * ifdev);
+struct ifdev *ifaddr_v6_map_by_addr(const ip6_addr_t * ip6addr);
+
+struct ifdev *ifaddr_map_by_addr(const ip_addr_t * ipaddr);
+struct ifdev *ifaddr_map_by_subnet(const ip_addr_t * ipaddr);
+const ip_addr_t *ifaddr_select(const ip_addr_t * dst_addr,
+	struct ifdev * ifdev, struct ifdev ** ifdevp);
+int ifaddr_is_zone_mismatch(const ip6_addr_t * ipaddr, struct ifdev * ifdev);
+
+int ifaddr_dl_find(struct ifdev * ifdev, const struct sockaddr_dlx * addr,
+	socklen_t addr_len, ifaddr_dl_num_t * num);
+int ifaddr_dl_enum(struct ifdev * ifdev, ifaddr_dl_num_t * num);
+void ifaddr_dl_get(struct ifdev * ifdev, ifaddr_dl_num_t num,
+	struct sockaddr_dlx * addr);
+int ifaddr_dl_get_flags(struct ifdev * ifdev, ifaddr_dl_num_t num);
+int ifaddr_dl_add(struct ifdev * ifdev, const struct sockaddr_dlx * addr,
+	socklen_t addr_len, int flags);
+int ifaddr_dl_del(struct ifdev * ifdev, ifaddr_dl_num_t num);
+void ifaddr_dl_clear(struct ifdev * ifdev);
+void ifaddr_dl_update(struct ifdev * ifdev, const uint8_t * hwaddr,
+	int is_factory);
+
+#endif /* !MINIX_NET_LWIP_IFADDR_H */
--- a/minix/net/lwip/ifconf.c
+++ b/minix/net/lwip/ifconf.c
@ -0,0 +1,930 @@
+/* LWIP service - ifconf.c - interface configuration */
+
+#include "lwip.h"
+#include "ifaddr.h"
+#include "lldata.h"
+
+#include <net/if_media.h>
+#include <minix/if.h>
+
+#define LOOPBACK_IFNAME		"lo0"	/* name of the loopback interface */
+
+/*
+ * Initialize the first loopback device, which is present by default.
+ */
+void
+ifconf_init(void)
+{
+	const struct sockaddr_in addr = {
+	    .sin_family = AF_INET,
+	    .sin_addr = { htonl(INADDR_LOOPBACK) }
+	};
+	struct sockaddr_in6 ll_addr6 = {
+	    .sin6_family = AF_INET6,
+	};
+	const struct sockaddr_in6 lo_addr6 = {
+	    .sin6_family = AF_INET6,
+	    .sin6_addr = IN6ADDR_LOOPBACK_INIT
+	};
+	const struct in6_addrlifetime lifetime = {
+	    .ia6t_vltime = ND6_INFINITE_LIFETIME,
+	    .ia6t_pltime = ND6_INFINITE_LIFETIME
+	};
+	struct sockaddr_in6 mask6;
+	struct ifdev *ifdev;
+	socklen_t addr_len;
+	int r;
+
+	if ((r = ifdev_create(LOOPBACK_IFNAME)) != OK)
+		panic("unable to create loopback interface: %d", r);
+
+	if ((ifdev = ifdev_find_by_name(LOOPBACK_IFNAME)) == NULL)
+		panic("unable to find loopback interface");
+
+	if ((r = ifaddr_v4_add(ifdev, &addr, NULL, NULL, NULL, 0)) != OK)
+		panic("unable to set IPv4 address on loopback interface: %d",
+		    r);
+
+	addr_len = sizeof(mask6);
+	addr_put_netmask((struct sockaddr *)&mask6, &addr_len, IPADDR_TYPE_V6,
+	    64 /*prefix*/);
+
+	ll_addr6.sin6_addr.s6_addr[0] = 0xfe;
+	ll_addr6.sin6_addr.s6_addr[1] = 0x80;
+	ll_addr6.sin6_addr.s6_addr[15] = ifdev_get_index(ifdev);
+
+	if ((r = ifaddr_v6_add(ifdev, &ll_addr6, &mask6, NULL, 0,
+	    &lifetime)) != OK)
+		panic("unable to set IPv6 address on loopback interface: %d",
+		    r);
+
+	addr_len = sizeof(mask6);
+	addr_put_netmask((struct sockaddr *)&mask6, &addr_len, IPADDR_TYPE_V6,
+	    128 /*prefix*/);
+
+	if ((r = ifaddr_v6_add(ifdev, &lo_addr6, &mask6, NULL, 0,
+	    &lifetime)) != OK)
+		panic("unable to set IPv6 address on loopback interface: %d",
+		    r);
+
+	if ((r = ifdev_set_ifflags(ifdev, IFF_UP)) != OK)
+		panic("unable to bring up loopback interface");
+}
+
+/*
+ * Process an address family independent IOCTL request with an "ifreq"
+ * structure.
+ */
+static int
+ifconf_ioctl_ifreq(unsigned long request, const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct ifreq ifr;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &ifr, sizeof(ifr))) != OK)
+		return r;
+
+	if (request != SIOCIFCREATE) {
+		ifr.ifr_name[sizeof(ifr.ifr_name) - 1] = '\0';
+
+		if ((ifdev = ifdev_find_by_name(ifr.ifr_name)) == NULL)
+			return ENXIO;
+	} else
+		ifdev = NULL;
+
+	switch (request) {
+	case SIOCGIFFLAGS:
+		ifr.ifr_flags = ifdev_get_ifflags(ifdev);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	case SIOCSIFFLAGS:
+		/*
+		 * Unfortunately, ifr_flags is a signed integer and the sign
+		 * bit is in fact used as a flag, so without explicit casting
+		 * we end up setting all upper bits of the (full) integer.  If
+		 * NetBSD ever extends the field, this assert should trigger..
+		 */
+		assert(sizeof(ifr.ifr_flags) == sizeof(short));
+
+		return ifdev_set_ifflags(ifdev, (unsigned short)ifr.ifr_flags);
+
+	case SIOCGIFMETRIC:
+		ifr.ifr_metric = ifdev_get_metric(ifdev);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	case SIOCSIFMETRIC:
+		/* The metric is not used within the operating system. */
+		ifdev_set_metric(ifdev, ifr.ifr_metric);
+
+		return OK;
+
+	case SIOCSIFMEDIA:
+		return ifdev_set_ifmedia(ifdev, ifr.ifr_media);
+
+	case SIOCGIFMTU:
+		ifr.ifr_mtu = ifdev_get_mtu(ifdev);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	case SIOCSIFMTU:
+		return ifdev_set_mtu(ifdev, ifr.ifr_mtu);
+
+	case SIOCIFCREATE:
+		if (memchr(ifr.ifr_name, '\0', sizeof(ifr.ifr_name)) == NULL)
+			return EINVAL;
+
+		return ifdev_create(ifr.ifr_name);
+
+	case SIOCIFDESTROY:
+		return ifdev_destroy(ifdev);
+
+	case SIOCGIFDLT:
+		ifr.ifr_dlt = ifdev_get_dlt(ifdev);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	case SIOCGIFINDEX:
+		ifr.ifr_index = ifdev_get_index(ifdev);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an address family independent IOCTL request with an "ifcapreq"
+ * structure.
+ */
+static int
+ifconf_ioctl_ifcap(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct ifcapreq ifcr;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &ifcr, sizeof(ifcr))) != OK)
+		return r;
+
+	ifcr.ifcr_name[sizeof(ifcr.ifcr_name) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(ifcr.ifcr_name)) == NULL)
+		return ENXIO;
+
+	switch (request) {
+	case SIOCSIFCAP:
+		return ifdev_set_ifcap(ifdev, ifcr.ifcr_capenable);
+
+	case SIOCGIFCAP:
+		ifdev_get_ifcap(ifdev, &ifcr.ifcr_capabilities,
+		    &ifcr.ifcr_capenable);
+
+		return sockdriver_copyout(data, 0, &ifcr, sizeof(ifcr));
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an address family independent IOCTL request with an "ifmediareq"
+ * structure.
+ */
+static int
+ifconf_ioctl_ifmedia(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct ifmediareq ifm;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &ifm, sizeof(ifm))) != OK)
+		return r;
+
+	ifm.ifm_name[sizeof(ifm.ifm_name) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(ifm.ifm_name)) == NULL)
+		return ENXIO;
+
+	switch (request) {
+	case MINIX_SIOCGIFMEDIA:
+		if ((r = ifdev_get_ifmedia(ifdev, &ifm.ifm_current,
+		    &ifm.ifm_active)) != OK)
+			return r;
+		ifm.ifm_mask = 0;
+
+		switch (ifdev_get_link(ifdev)) {
+		case LINK_STATE_UP:
+			ifm.ifm_status = IFM_AVALID | IFM_ACTIVE;
+			break;
+		case LINK_STATE_DOWN:
+			ifm.ifm_status = IFM_AVALID;
+			break;
+		default:
+			ifm.ifm_status = 0;
+			break;
+		}
+
+		/*
+		 * TODO: support for the list of supported media types.  This
+		 * one is not easy, because we cannot simply suspend the IOCTL
+		 * and query the driver.  For now, return only entry (which is
+		 * the minimum for ifconfig(8) not to complain), namely the
+		 * currently selected one.
+		 */
+		if (ifm.ifm_ulist != NULL) {
+			if (ifm.ifm_count < 1)
+				return ENOMEM;
+
+			/*
+			 * Copy out the 'list', which consists of one entry.
+			 * If we were to produce multiple entries, we would
+			 * have to check against the MINIX_IF_MAXMEDIA limit.
+			 */
+			if ((r = sockdriver_copyout(data,
+			    offsetof(struct minix_ifmediareq, mifm_list),
+			    &ifm.ifm_current, sizeof(ifm.ifm_current))) != OK)
+				return r;
+		}
+		ifm.ifm_count = 1;
+
+		return sockdriver_copyout(data, 0, &ifm, sizeof(ifm));
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an address family independent IOCTL request with an "if_clonereq"
+ * structure.
+ */
+static int
+ifconf_ioctl_ifclone(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct if_clonereq ifcr;
+	const char *ptr;
+	char name[IFNAMSIZ];
+	size_t off;
+	unsigned int num;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &ifcr, sizeof(ifcr))) != OK)
+		return r;
+
+	if (ifcr.ifcr_count < 0)
+		return EINVAL;
+
+	off = offsetof(struct minix_if_clonereq, mifcr_buffer);
+
+	for (num = 0; (ptr = ifdev_enum_vtypes(num)) != NULL; num++) {
+		/* Prevent overflow in case we ever have over 128 vtypes.. */
+		if (num == MINIX_IF_MAXCLONERS)
+			break;
+
+		if (ifcr.ifcr_buffer == NULL ||
+		    num >= (unsigned int)ifcr.ifcr_count)
+			continue;
+
+		memset(name, 0, sizeof(name));
+		strlcpy(name, ptr, sizeof(name));
+
+		if ((r = sockdriver_copyout(data, off, name,
+		    sizeof(name))) != OK)
+			return r;
+
+		off += sizeof(name);
+	}
+
+	ifcr.ifcr_total = num;
+
+	return sockdriver_copyout(data, 0, &ifcr, sizeof(ifcr));
+}
+
+/*
+ * Process an address family independent IOCTL request with an "if_addrprefreq"
+ * structure.
+ */
+static int
+ifconf_ioctl_ifaddrpref(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct if_addrprefreq ifap;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &ifap, sizeof(ifap))) != OK)
+		return r;
+
+	ifap.ifap_name[sizeof(ifap.ifap_name) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(ifap.ifap_name)) == NULL)
+		return ENXIO;
+
+	/*
+	 * For now, we simply support only a preference of 0.  We do not try to
+	 * look up the given address, nor do we return the looked up address.
+	 */
+	switch (request) {
+	case SIOCSIFADDRPREF:
+		if (ifap.ifap_preference != 0)
+			return EINVAL;
+
+		return OK;
+
+	case SIOCGIFADDRPREF:
+		ifap.ifap_preference = 0;
+
+		return sockdriver_copyout(data, 0, &ifap, sizeof(ifap));
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an IOCTL request for AF_INET with an "ifreq" structure.
+ */
+static int
+ifconf_ioctl_v4_ifreq(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct sockaddr_in addr, mask, bcast, dest, *sin = NULL /*gcc*/;
+	struct ifdev *ifdev;
+	struct ifreq ifr;
+	ifaddr_v4_num_t num;
+	int r, flags;
+
+	if ((r = sockdriver_copyin(data, 0, &ifr, sizeof(ifr))) != OK)
+		return r;
+
+	ifr.ifr_name[sizeof(ifr.ifr_name) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(ifr.ifr_name)) == NULL)
+		return ENXIO;
+
+	switch (request) {
+	case SIOCGIFADDR:
+	case SIOCGIFNETMASK:
+	case SIOCGIFBRDADDR:
+	case SIOCGIFDSTADDR:
+		/* Retrieve all addresses, then copy out the desired one. */
+		switch (request) {
+		case SIOCGIFADDR:	sin = &addr; break;
+		case SIOCGIFNETMASK:	sin = &mask; break;
+		case SIOCGIFBRDADDR:	sin = &bcast; break;
+		case SIOCGIFDSTADDR:	sin = &dest; break;
+		}
+
+		sin->sin_len = 0;
+
+		if ((r = ifaddr_v4_get(ifdev, (ifaddr_v4_num_t)0, &addr, &mask,
+		    &bcast, &dest)) != OK)
+			return r;
+
+		if (sin->sin_len == 0) /* not filled in */
+			return EADDRNOTAVAIL;
+
+		memcpy(&ifr.ifr_addr, sin, sizeof(*sin));
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	case SIOCGIFAFLAG_IN:
+		if ((r = ifaddr_v4_find(ifdev,
+		    (struct sockaddr_in *)&ifr.ifr_addr, &num)) != OK)
+			return r;
+
+		ifr.ifr_addrflags = ifaddr_v4_get_flags(ifdev, num);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	case SIOCSIFADDR:
+		/*
+		 * This one is slightly different from the rest, in that we
+		 * either set or update the primary address: if we set it, we
+		 * must let _add() generate a matching netmask automatically,
+		 * while if we update it, _add() would fail unless we first
+		 * delete the old entry.
+		 */
+		sin = (struct sockaddr_in *)&ifr.ifr_addr;
+
+		if ((r = ifaddr_v4_get(ifdev, (ifaddr_v4_num_t)0, &addr, &mask,
+		    &bcast, &dest)) == OK) {
+			flags = ifaddr_v4_get_flags(ifdev, (ifaddr_v4_num_t)0);
+
+			ifaddr_v4_del(ifdev, (ifaddr_v4_num_t)0);
+
+			/*
+			 * If setting the new address fails, reinstating the
+			 * old address should always work.  This is really ugly
+			 * as it generates routing socket noise, but this call
+			 * is deprecated anyway.
+			 */
+			if ((r = ifaddr_v4_add(ifdev, sin, &mask, &bcast,
+			    &dest, 0 /*flags*/)) != OK)
+				(void)ifaddr_v4_add(ifdev, &addr, &mask,
+				    &bcast, &dest, flags);
+
+			return r;
+		} else
+			return ifaddr_v4_add(ifdev, sin, NULL /*mask*/,
+			    NULL /*bcast*/, NULL /*dest*/, 0 /*flags*/);
+
+	case SIOCSIFNETMASK:
+	case SIOCSIFBRDADDR:
+	case SIOCSIFDSTADDR:
+		/* These calls only update the existing primary address. */
+		if ((r = ifaddr_v4_get(ifdev, (ifaddr_v4_num_t)0, &addr, &mask,
+		    &bcast, &dest)) != OK)
+			return r;
+
+		sin = (struct sockaddr_in *)&ifr.ifr_addr;
+
+		switch (request) {
+		case SIOCSIFNETMASK: memcpy(&mask, sin, sizeof(mask)); break;
+		case SIOCSIFBRDADDR: memcpy(&bcast, sin, sizeof(bcast)); break;
+		case SIOCSIFDSTADDR: memcpy(&dest, sin, sizeof(dest)); break;
+		}
+
+		return ifaddr_v4_add(ifdev, &addr, &mask, &bcast, &dest,
+		    ifaddr_v4_get_flags(ifdev, (ifaddr_v4_num_t)0));
+
+	case SIOCDIFADDR:
+		if ((r = ifaddr_v4_find(ifdev,
+		    (struct sockaddr_in *)&ifr.ifr_addr, &num)) != OK)
+			return r;
+
+		ifaddr_v4_del(ifdev, num);
+
+		return OK;
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an IOCTL request for AF_INET with an "ifaliasreq" structure.
+ */
+static int
+ifconf_ioctl_v4_ifalias(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct ifaliasreq ifra;
+	struct sockaddr_in dest;
+	ifaddr_v4_num_t num;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &ifra, sizeof(ifra))) != OK)
+		return r;
+
+	ifra.ifra_name[sizeof(ifra.ifra_name) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(ifra.ifra_name)) == NULL)
+		return ENXIO;
+
+	switch (request) {
+	case SIOCAIFADDR:
+		return ifaddr_v4_add(ifdev,
+		    (struct sockaddr_in *)&ifra.ifra_addr,
+		    (struct sockaddr_in *)&ifra.ifra_mask,
+		    (struct sockaddr_in *)&ifra.ifra_broadaddr,
+		    (struct sockaddr_in *)&ifra.ifra_dstaddr, 0 /*flags*/);
+
+	case SIOCGIFALIAS:
+		if ((r = ifaddr_v4_find(ifdev,
+		    (struct sockaddr_in *)&ifra.ifra_addr, &num)) != OK)
+			return r;
+
+		/*
+		 * The broadcast and destination address are stored in the same
+		 * ifaliasreq field.  We cannot pass a pointer to the same
+		 * field to ifaddr_v4_get().  So, use a temporary variable.
+		 */
+		(void)ifaddr_v4_get(ifdev, num,
+		    (struct sockaddr_in *)&ifra.ifra_addr,
+		    (struct sockaddr_in *)&ifra.ifra_mask,
+		    (struct sockaddr_in *)&ifra.ifra_broadaddr, &dest);
+
+		if (ifra.ifra_broadaddr.sa_len == 0)
+			memcpy(&ifra.ifra_dstaddr, &dest, sizeof(dest));
+
+		return sockdriver_copyout(data, 0, &ifra, sizeof(ifra));
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an IOCTL request for AF_INET.
+ */
+static int
+ifconf_ioctl_v4(unsigned long request, const struct sockdriver_data * data,
+	endpoint_t user_endpt)
+{
+
+	switch (request) {
+	case SIOCSIFADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCSIFBRDADDR:
+	case SIOCSIFNETMASK:
+	case SIOCDIFADDR:
+		if (!util_is_root(user_endpt))
+			return EPERM;
+
+		/* FALLTHROUGH */
+	case SIOCGIFADDR:
+	case SIOCGIFDSTADDR:
+	case SIOCGIFBRDADDR:
+	case SIOCGIFNETMASK:
+	case SIOCGIFAFLAG_IN:
+		return ifconf_ioctl_v4_ifreq(request, data);
+
+	case SIOCAIFADDR:
+		if (!util_is_root(user_endpt))
+			return EPERM;
+
+		/* FALLTHROUGH */
+	case SIOCGIFALIAS:
+		return ifconf_ioctl_v4_ifalias(request, data);
+
+	default:
+		return ENOTTY;
+	}
+}
+
+#ifdef INET6
+/*
+ * Process an IOCTL request for AF_INET6 with an "in6_ifreq" structure.
+ */
+static int
+ifconf_ioctl_v6_ifreq(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct in6_ifreq ifr;
+	ifaddr_v6_num_t num;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &ifr, sizeof(ifr))) != OK)
+		return r;
+
+	ifr.ifr_name[sizeof(ifr.ifr_name) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(ifr.ifr_name)) == NULL)
+		return ENXIO;
+
+	if ((r = ifaddr_v6_find(ifdev, &ifr.ifr_addr, &num)) != OK)
+		return r;
+
+	switch (request) {
+	case SIOCGIFADDR_IN6:
+		/* This IOCTL basically checks if the given address exists. */
+		ifaddr_v6_get(ifdev, num, &ifr.ifr_addr, NULL, NULL);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	case SIOCDIFADDR_IN6:
+		ifaddr_v6_del(ifdev, num);
+
+		return OK;
+
+	case SIOCGIFNETMASK_IN6:
+		ifaddr_v6_get(ifdev, num, NULL, &ifr.ifr_addr, NULL);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	case SIOCGIFAFLAG_IN6:
+		ifr.ifr_ifru.ifru_flags6 = ifaddr_v6_get_flags(ifdev, num);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	case SIOCGIFALIFETIME_IN6:
+		ifaddr_v6_get_lifetime(ifdev, num,
+		    &ifr.ifr_ifru.ifru_lifetime);
+
+		return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an IOCTL request for AF_INET6 with an "in6_aliasreq" structure.
+ */
+static int
+ifconf_ioctl_v6_ifalias(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct in6_aliasreq ifra;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &ifra, sizeof(ifra))) != OK)
+		return r;
+
+	ifra.ifra_name[sizeof(ifra.ifra_name) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(ifra.ifra_name)) == NULL)
+		return ENXIO;
+
+	switch (request) {
+	case SIOCAIFADDR_IN6:
+		return ifaddr_v6_add(ifdev, &ifra.ifra_addr,
+		    &ifra.ifra_prefixmask, &ifra.ifra_dstaddr,
+		    ifra.ifra_flags, &ifra.ifra_lifetime);
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an IOCTL request for AF_INET6 with an "in6_ndireq" structure.
+ */
+static int
+ifconf_ioctl_v6_ndireq(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct in6_ndireq ndi;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &ndi, sizeof(ndi))) != OK)
+		return r;
+
+	ndi.ifname[sizeof(ndi.ifname) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(ndi.ifname)) == NULL)
+		return ENXIO;
+
+	switch (request) {
+	case SIOCGIFINFO_IN6:
+		memset(&ndi.ndi, 0, sizeof(ndi.ndi));
+
+		ndi.ndi.linkmtu = ifdev_get_mtu(ifdev);
+		ndi.ndi.flags = ifdev_get_nd6flags(ifdev);
+		ndi.ndi.initialized = 1;
+		/* TODO: all the other fields.. */
+
+		return sockdriver_copyout(data, 0, &ndi, sizeof(ndi));
+
+	case SIOCSIFINFO_IN6:
+		/* TODO: all the other fields.. */
+
+		/* FALLTHROUGH */
+	case SIOCSIFINFO_FLAGS:
+		return ifdev_set_nd6flags(ifdev, ndi.ndi.flags);
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an IOCTL request for AF_INET6 with an "in6_nbrinfo" structure.
+ */
+static int
+ifconf_ioctl_v6_nbrinfo(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct sockaddr_in6 addr;
+	struct in6_nbrinfo nbri;
+	lldata_ndp_num_t num;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &nbri, sizeof(nbri))) != OK)
+		return r;
+
+	nbri.ifname[sizeof(nbri.ifname) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(nbri.ifname)) == NULL)
+		return ENXIO;
+
+	switch (request) {
+	case SIOCGNBRINFO_IN6:
+		/*
+		 * Convert the given in6_addr to a full sockaddr_in6, mainly
+		 * for internal consistency.  It would have been nice if the
+		 * KAME management API had had any sort of consistency itself.
+		 */
+		memset(&addr, 0, sizeof(addr));
+		addr.sin6_family = AF_INET6;
+		memcpy(&addr.sin6_addr.s6_addr, &nbri.addr,
+		    sizeof(addr.sin6_addr.s6_addr));
+
+		if ((r = lldata_ndp_find(ifdev, &addr, &num)) != OK)
+			return r;
+
+		lldata_ndp_get_info(num, &nbri.asked, &nbri.isrouter,
+		    &nbri.state, &nbri.expire);
+
+		return sockdriver_copyout(data, 0, &nbri, sizeof(nbri));
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an IOCTL request for AF_INET6.
+ */
+static int
+ifconf_ioctl_v6(unsigned long request, const struct sockdriver_data * data,
+	endpoint_t user_endpt)
+{
+
+	switch (request) {
+	case SIOCDIFADDR_IN6:
+		if (!util_is_root(user_endpt))
+			return EPERM;
+
+		/* FALLTHROUGH */
+	case SIOCGIFADDR_IN6:
+	case SIOCGIFNETMASK_IN6:
+	case SIOCGIFAFLAG_IN6:
+	case SIOCGIFALIFETIME_IN6:
+		return ifconf_ioctl_v6_ifreq(request, data);
+
+	case SIOCAIFADDR_IN6:
+		if (!util_is_root(user_endpt))
+			return EPERM;
+
+		return ifconf_ioctl_v6_ifalias(request, data);
+
+	case SIOCSIFINFO_IN6:
+	case SIOCSIFINFO_FLAGS:
+		if (!util_is_root(user_endpt))
+			return EPERM;
+
+		/* FALLTHROUGH */
+	case SIOCGIFINFO_IN6:
+		return ifconf_ioctl_v6_ndireq(request, data);
+
+	case SIOCGNBRINFO_IN6:
+		return ifconf_ioctl_v6_nbrinfo(request, data);
+
+	default:
+		return ENOTTY;
+	}
+}
+#endif /* INET6 */
+
+/*
+ * Process an IOCTL request for AF_LINK with an "if_laddrreq" structure.
+ */
+static int
+ifconf_ioctl_dl_lifaddr(unsigned long request,
+	const struct sockdriver_data * data)
+{
+	struct ifdev *ifdev;
+	struct if_laddrreq iflr;
+	ifaddr_dl_num_t num;
+	int r;
+
+	if ((r = sockdriver_copyin(data, 0, &iflr, sizeof(iflr))) != OK)
+		return r;
+
+	iflr.iflr_name[sizeof(iflr.iflr_name) - 1] = '\0';
+
+	if ((ifdev = ifdev_find_by_name(iflr.iflr_name)) == NULL)
+		return ENXIO;
+
+	switch (request) {
+	case SIOCGLIFADDR:
+		if (iflr.flags & IFLR_PREFIX) {
+			/* We ignore the prefix length, like NetBSD does. */
+			if ((r = ifaddr_dl_find(ifdev,
+			    (struct sockaddr_dlx *)&iflr.addr,
+			    sizeof(iflr.addr), &num)) != OK)
+				return r;
+		} else
+			num = (ifaddr_dl_num_t)0; /* this always works */
+
+		ifaddr_dl_get(ifdev, num, (struct sockaddr_dlx *)&iflr.addr);
+		iflr.flags = ifaddr_dl_get_flags(ifdev, num);
+		memset(&iflr.dstaddr, 0, sizeof(iflr.dstaddr));
+
+		return sockdriver_copyout(data, 0, &iflr, sizeof(iflr));
+
+	case SIOCALIFADDR:
+		return ifaddr_dl_add(ifdev, (struct sockaddr_dlx *)&iflr.addr,
+		    sizeof(iflr.addr), iflr.flags);
+
+	case SIOCDLIFADDR:
+		if ((r = ifaddr_dl_find(ifdev,
+		    (struct sockaddr_dlx *)&iflr.addr, sizeof(iflr.addr),
+		    &num)) != OK)
+			return r;
+
+		return ifaddr_dl_del(ifdev, num);
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an IOCTL request for AF_LINK.
+ */
+static int
+ifconf_ioctl_dl(unsigned long request, const struct sockdriver_data * data,
+	endpoint_t user_endpt)
+{
+
+	switch (request) {
+	case SIOCALIFADDR:
+	case SIOCDLIFADDR:
+		if (!util_is_root(user_endpt))
+			return EPERM;
+
+		/* FALLTHROUGH */
+	case SIOCGLIFADDR:
+		return ifconf_ioctl_dl_lifaddr(request, data);
+
+	default:
+		return ENOTTY;
+	}
+}
+
+/*
+ * Process an IOCTL request.  This routine is shared between TCP, UDP, RAW, and
+ * link sockets.  The given socket may be used to obtain the target domain:
+ * AF_INET, AF_INET6, or AF_LINK.
+ */
+int
+ifconf_ioctl(struct sock * sock, unsigned long request,
+	const struct sockdriver_data * data, endpoint_t user_endpt)
+{
+	int domain;
+
+	domain = sockevent_get_domain(sock);
+
+	switch (request) {
+	case SIOCSIFFLAGS:
+	case SIOCSIFMETRIC:
+	case SIOCSIFMEDIA:
+	case SIOCSIFMTU:
+	case SIOCIFCREATE:
+	case SIOCIFDESTROY:
+		if (!util_is_root(user_endpt))
+			return EPERM;
+
+		/* FALLTHROUGH */
+	case SIOCGIFFLAGS:
+	case SIOCGIFMETRIC:
+	case SIOCGIFMTU:
+	case SIOCGIFDLT:
+	case SIOCGIFINDEX:
+		return ifconf_ioctl_ifreq(request, data);
+
+	case SIOCSIFCAP:
+		if (!util_is_root(user_endpt))
+			return EPERM;
+
+		/* FALLTHROUGH */
+	case SIOCGIFCAP:
+		return ifconf_ioctl_ifcap(request, data);
+
+	case MINIX_SIOCGIFMEDIA:
+		return ifconf_ioctl_ifmedia(request, data);
+
+	case MINIX_SIOCIFGCLONERS:
+		return ifconf_ioctl_ifclone(request, data);
+
+	case SIOCSIFADDRPREF:
+		if (!util_is_root(user_endpt))
+			return EPERM;
+
+		/* FALLTHROUGH */
+	case SIOCGIFADDRPREF:
+		return ifconf_ioctl_ifaddrpref(request, data);
+
+	default:
+		switch (domain) {
+		case AF_INET:
+			return ifconf_ioctl_v4(request, data, user_endpt);
+
+#ifdef INET6
+		case AF_INET6:
+			return ifconf_ioctl_v6(request, data, user_endpt);
+#endif /* INET6 */
+
+		case AF_LINK:
+			return ifconf_ioctl_dl(request, data, user_endpt);
+
+		default:
+			return ENOTTY;
+		}
+	}
+}
--- a/minix/net/lwip/ifdev.c
+++ b/minix/net/lwip/ifdev.c
--- a/minix/net/lwip/ifdev.h
+++ b/minix/net/lwip/ifdev.h
@ -0,0 +1,155 @@
+#ifndef MINIX_NET_LWIP_IFDEV_H
+#define MINIX_NET_LWIP_IFDEV_H
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+
+/*
+ * NetBSD makes setting a hardware address through ifconfig(8) a whole lot
+ * harder than it needs to be, namely by keeping a list of possible hardware
+ * addresses and marking one of them as active.  For us, that level of extra
+ * flexibility is completely useless.  In order to shield individual interface
+ * modules from having to deal with the rather extended interface for the list
+ * management, we maintain the list in ifdev and simply use a iop_set_hwaddr()
+ * call to the modules when the active address changes.  This setting is the
+ * maximum number of hardware addresses in the list maintained by ifdev.  It
+ * should be at least 2, or changing hardware addresses will not be possible.
+ */
+#define IFDEV_NUM_HWADDRS	3
+
+struct ifdev;
+struct bpfdev_link;
+struct sockaddr_dlx;
+
+/* Interface operations table. */
+struct ifdev_ops {
+	err_t (* iop_init)(struct ifdev * ifdev, struct netif * netif);
+	err_t (* iop_input)(struct pbuf * pbuf, struct netif * netif);
+	err_t (* iop_output)(struct ifdev * ifdev, struct pbuf * pbuf,
+	    struct netif * netif);
+	err_t (* iop_output_v4)(struct netif * netif, struct pbuf * pbuf,
+	    const ip4_addr_t * ipaddr);
+	err_t (* iop_output_v6)(struct netif * netif, struct pbuf * pbuf,
+	    const ip6_addr_t * ipaddr);
+	void (* iop_hdrcmplt)(struct ifdev * ifdev, struct pbuf * pbuf);
+	void (* iop_poll)(struct ifdev * ifdev);
+	int (* iop_set_ifflags)(struct ifdev * ifdev, unsigned int ifflags);
+	void (* iop_get_ifcap)(struct ifdev * ifdev, uint64_t * ifcap,
+	    uint64_t * ifena);
+	int (* iop_set_ifcap)(struct ifdev * ifdev, uint64_t ifcap);
+	void (* iop_get_ifmedia)(struct ifdev * ifdev, int * ifcurrent,
+	    int * ifactive);
+	int (* iop_set_ifmedia)(struct ifdev * ifdev, int ifmedia);
+	void (* iop_set_promisc)(struct ifdev * ifdev, int promisc);
+	int (* iop_set_hwaddr)(struct ifdev * ifdev, const uint8_t * hwaddr);
+	int (* iop_set_mtu)(struct ifdev * ifdev, unsigned int mtu);
+	int (* iop_destroy)(struct ifdev * ifdev);
+};
+
+/* Hardware address list entry.  The first entry, if any, is the active one. */
+struct ifdev_hwaddr {
+	uint8_t ifhwa_addr[NETIF_MAX_HWADDR_LEN];
+	uint8_t ifhwa_flags;
+};
+#define IFHWAF_VALID		0x01	/* entry contains an address */
+#define IFHWAF_FACTORY		0x02	/* factory (device-given) address */
+
+/* Interface structure. */
+struct ifdev {
+	TAILQ_ENTRY(ifdev) ifdev_next;	/* list of active interfaces */
+	char ifdev_name[IFNAMSIZ];	/* interface name, null terminated */
+	unsigned int ifdev_ifflags;	/* NetBSD-style interface flags */
+	unsigned int ifdev_dlt;		/* data link type (DLT_) */
+	unsigned int ifdev_promisc;	/* number of promiscuity requestors */
+	struct netif ifdev_netif;	/* lwIP interface structure */
+	struct if_data ifdev_data;	/* NetBSD-style interface data */
+	char ifdev_v4set;		/* interface has an IPv4 address? */
+	uint8_t ifdev_v6prefix[LWIP_IPV6_NUM_ADDRESSES]; /* IPv6 prefixes */
+	uint8_t ifdev_v6flags[LWIP_IPV6_NUM_ADDRESSES]; /* v6 address flags */
+	uint8_t ifdev_v6state[LWIP_IPV6_NUM_ADDRESSES]; /* v6 shadow states */
+	uint8_t ifdev_v6scope[LWIP_IPV6_NUM_ADDRESSES]; /* cached v6 scopes */
+	struct ifdev_hwaddr ifdev_hwlist[IFDEV_NUM_HWADDRS];	/* HW addr's */
+	uint32_t ifdev_nd6flags;	/* ND6-related flags (ND6_IFF_) */
+	const struct ifdev_ops *ifdev_ops; /* interface operations table */
+	TAILQ_HEAD(, bpfdev_link) ifdev_bpf; /* list of attached BPF devices */
+};
+
+#define ifdev_get_name(ifdev)	((ifdev)->ifdev_name)
+#define ifdev_get_ifflags(ifdev) ((ifdev)->ifdev_ifflags)
+#define ifdev_get_dlt(ifdev)	((ifdev)->ifdev_dlt)
+#define ifdev_is_promisc(ifdev)	((ifdev)->ifdev_promisc != 0)
+#define ifdev_get_netif(ifdev)	(&(ifdev)->ifdev_netif)
+#define ifdev_get_nd6flags(ifdev) ((ifdev)->ifdev_nd6flags)
+#define ifdev_get_iftype(ifdev)	((ifdev)->ifdev_data.ifi_type)
+#define ifdev_get_hwlen(ifdev)	((ifdev)->ifdev_data.ifi_addrlen)
+#define ifdev_get_hdrlen(ifdev)	((ifdev)->ifdev_data.ifi_hdrlen)
+#define ifdev_get_link(ifdev)	((ifdev)->ifdev_data.ifi_link_state)
+#define ifdev_get_mtu(ifdev)	((ifdev)->ifdev_data.ifi_mtu)
+#define ifdev_get_metric(ifdev)	((ifdev)->ifdev_data.ifi_metric)
+#define ifdev_get_ifdata(ifdev)	(&(ifdev)->ifdev_data)
+#define ifdev_is_loopback(ifdev) ((ifdev)->ifdev_ifflags & IFF_LOOPBACK)
+#define ifdev_is_up(ifdev)	((ifdev)->ifdev_ifflags & IFF_UP)
+#define ifdev_is_link_up(ifdev)	(netif_is_link_up(&(ifdev)->ifdev_netif))
+#define ifdev_set_metric(ifdev, metric)	\
+	((void)((ifdev)->ifdev_data.ifi_metric = (metric)))
+#define ifdev_get_index(ifdev)  \
+	((uint32_t)(netif_get_index(ifdev_get_netif(ifdev))))
+
+#define ifdev_output_drop(ifdev) ((ifdev)->ifdev_data.ifi_oerrors++)
+
+#define netif_get_ifdev(netif)	((struct ifdev *)(netif)->state)
+
+void ifdev_init(void);
+void ifdev_poll(void);
+
+void ifdev_register(const char * name, int (* create)(const char *));
+
+void ifdev_input(struct ifdev * ifdev, struct pbuf * pbuf,
+	struct netif * netif, int to_bpf);
+err_t ifdev_output(struct ifdev * ifdev, struct pbuf * pbuf,
+	struct netif * netif, int to_bpf, int hdrcmplt);
+
+void ifdev_attach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl);
+void ifdev_detach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl);
+
+struct ifdev *ifdev_get_by_index(uint32_t ifindex);
+struct ifdev *ifdev_find_by_name(const char * name);
+struct ifdev *ifdev_enum(struct ifdev * last);
+
+int ifdev_check_name(const char * name, unsigned int * vtype_slot);
+
+int ifdev_set_promisc(struct ifdev * ifdev);
+void ifdev_clear_promisc(struct ifdev * ifdev);
+
+int ifdev_set_ifflags(struct ifdev * ifdev, unsigned int ifflags);
+void ifdev_update_ifflags(struct ifdev * ifdev, unsigned int ifflags);
+
+void ifdev_get_ifcap(struct ifdev * ifdev, uint64_t * ifcap,
+	uint64_t * ifena);
+int ifdev_set_ifcap(struct ifdev * ifdev, uint64_t ifena);
+
+int ifdev_get_ifmedia(struct ifdev * ifdev, int * ifcurrent, int * ifactive);
+int ifdev_set_ifmedia(struct ifdev * ifdev, int ifmedia);
+
+int ifdev_set_mtu(struct ifdev * ifdev, unsigned int mtu);
+
+int ifdev_set_nd6flags(struct ifdev * ifdev, uint32_t nd6flags);
+
+void ifdev_add(struct ifdev * ifdev, const char * name, unsigned int ifflags,
+	unsigned int iftype, size_t hdrlen, size_t addrlen, unsigned int dlt,
+	unsigned int mtu, uint32_t nd6flags, const struct ifdev_ops * iop);
+int ifdev_remove(struct ifdev * ifdev);
+
+struct ifdev *ifdev_get_loopback(void);
+
+void ifdev_update_link(struct ifdev * ifdev, int link);
+void ifdev_update_hwaddr(struct ifdev * ifdev, const uint8_t * hwaddr,
+	int is_factory);
+
+int ifdev_create(const char * name);
+int ifdev_destroy(struct ifdev * ifdev);
+const char *ifdev_enum_vtypes(unsigned int num);
+
+#endif /* !MINIX_NET_LWIP_IFDEV_H */
--- a/minix/net/lwip/ipsock.c
+++ b/minix/net/lwip/ipsock.c
@ -0,0 +1,761 @@
+/* LWIP service - ipsock.c - shared IP-level socket code */
+
+#include "lwip.h"
+#include "ifaddr.h"
+
+#define ip6_hdr __netbsd_ip6_hdr	/* conflicting definitions */
+#include <net/route.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet6/in6_pcb.h>
+#undef ip6_hdr
+
+/* The following are sysctl(7) settings. */
+int lwip_ip4_forward = 0;		/* We patch lwIP to check these.. */
+int lwip_ip6_forward = 0;		/*  ..two settings at run time.   */
+static int ipsock_v6only = 1;
+
+/* The CTL_NET PF_INET IPPROTO_IP subtree. */
+static struct rmib_node net_inet_ip_table[] = {
+/* 1*/	[IPCTL_FORWARDING]	= RMIB_INTPTR(RMIB_RW, &lwip_ip4_forward,
+				    "forwarding",
+				    "Enable forwarding of INET diagrams"),
+/* 3*/	[IPCTL_DEFTTL]		= RMIB_INT(RMIB_RO, IP_DEFAULT_TTL, "ttl",
+				    "Default TTL for an INET diagram"),
+/*23*/	[IPCTL_LOOPBACKCKSUM]	= RMIB_FUNC(RMIB_RW | CTLTYPE_INT, sizeof(int),
+				    loopif_cksum, "do_loopback_cksum",
+				    "Perform IP checksum on loopback"),
+};
+
+static struct rmib_node net_inet_ip_node =
+    RMIB_NODE(RMIB_RO, net_inet_ip_table, "ip", "IPv4 related settings");
+
+/* The CTL_NET PF_INET6 IPPROTO_IPV6 subtree. */
+static struct rmib_node net_inet6_ip6_table[] = {
+/* 1*/	[IPV6CTL_FORWARDING]	= RMIB_INTPTR(RMIB_RW, &lwip_ip6_forward,
+				    "forwarding",
+				    "Enable forwarding of INET6 diagrams"),
+				/*
+				 * The following functionality is not
+				 * implemented in lwIP at this time.
+				 */
+/* 2*/	[IPV6CTL_SENDREDIRECTS]	= RMIB_INT(RMIB_RO, 0, "redirect", "Enable "
+				    "sending of ICMPv6 redirect messages"),
+/* 3*/	[IPV6CTL_DEFHLIM]	= RMIB_INT(RMIB_RO, IP_DEFAULT_TTL, "hlim",
+				    "Hop limit for an INET6 datagram"),
+/*12*/	[IPV6CTL_ACCEPT_RTADV]	= RMIB_INTPTR(RMIB_RW, &ifaddr_accept_rtadv,
+				    "accept_rtadv",
+				    "Accept router advertisements"),
+/*16*/	[IPV6CTL_DAD_COUNT]	= RMIB_INT(RMIB_RO,
+				    LWIP_IPV6_DUP_DETECT_ATTEMPTS, "dad_count",
+				    "Number of Duplicate Address Detection "
+				    "probes to send"),
+/*24*/	[IPV6CTL_V6ONLY]	= RMIB_INTPTR(RMIB_RW, &ipsock_v6only,
+				    "v6only", "Disallow PF_INET6 sockets from "
+				    "connecting to PF_INET sockets"),
+				/*
+				 * The following setting is significantly
+				 * different from NetBSD, and therefore it has
+				 * a somewhat different description as well.
+				 */
+/*35*/	[IPV6CTL_AUTO_LINKLOCAL]= RMIB_INTPTR(RMIB_RW, &ifaddr_auto_linklocal,
+				    "auto_linklocal", "Enable global support "
+				    "for adding IPv6link-local addresses to "
+				    "interfaces"),
+				/*
+				 * Temporary addresses are managed entirely by
+				 * userland.  We only maintain the settings.
+				 */
+/*+0*/	[IPV6CTL_MAXID]		= RMIB_INT(RMIB_RW, 0, "use_tempaddr",
+				    "Use temporary address"),
+/*+1*/	[IPV6CTL_MAXID + 1]	= RMIB_INT(RMIB_RW, 86400, "temppltime",
+				    "Preferred lifetime of a temporary "
+				    "address"),
+/*+2*/	[IPV6CTL_MAXID + 2]	= RMIB_INT(RMIB_RW, 604800, "tempvltime",
+				    "Valid lifetime of a temporary address"),
+};
+
+static struct rmib_node net_inet6_ip6_node =
+    RMIB_NODE(RMIB_RO, net_inet6_ip6_table, "ip6", "IPv6 related settings");
+
+/*
+ * Initialize the IP sockets module.
+ */
+void
+ipsock_init(void)
+{
+
+	/*
+	 * Register the net.inet.ip and net.inet6.ip6 subtrees.  Unlike for the
+	 * specific protocols (TCP/UDP/RAW), here the IPv4 and IPv6 subtrees
+	 * are and must be separate, even though many settings are shared
+	 * between the two at the lwIP level.  Ultimately we may have to split
+	 * the subtrees for the specific protocols, too, though..
+	 */
+	mibtree_register_inet(AF_INET, IPPROTO_IP, &net_inet_ip_node);
+	mibtree_register_inet(AF_INET6, IPPROTO_IPV6, &net_inet6_ip6_node);
+}
+
+/*
+ * Return the lwIP IP address type (IPADDR_TYPE_) for the given IP socket.
+ */
+static int
+ipsock_get_type(struct ipsock * ip)
+{
+
+	if (!(ip->ip_flags & IPF_IPV6))
+		return IPADDR_TYPE_V4;
+	else if (ip->ip_flags & IPF_V6ONLY)
+		return IPADDR_TYPE_V6;
+	else
+		return IPADDR_TYPE_ANY;
+}
+
+/*
+ * Create an IP socket, for the given (PF_/AF_) domain and initial send and
+ * receive buffer sizes.  Return the lwIP IP address type that should be used
+ * to create the corresponding PCB.  Return a pointer to the libsockevent
+ * socket in 'sockp'.  This function must not allocate any resources in any
+ * form, as socket creation may still fail later, in which case no destruction
+ * function is called.
+ */
+int
+ipsock_socket(struct ipsock * ip, int domain, size_t sndbuf, size_t rcvbuf,
+	struct sock ** sockp)
+{
+
+	ip->ip_flags = (domain == AF_INET6) ? IPF_IPV6 : 0;
+
+	if (domain == AF_INET6 && ipsock_v6only)
+		ip->ip_flags |= IPF_V6ONLY;
+
+	ip->ip_sndbuf = sndbuf;
+	ip->ip_rcvbuf = rcvbuf;
+
+	/* Important: when adding settings here, also change ipsock_clone(). */
+
+	*sockp = &ip->ip_sock;
+
+	return ipsock_get_type(ip);
+}
+
+/*
+ * Clone the given socket 'ip' into the new socket 'newip', using the socket
+ * identifier 'newid'.  In particular, tell libsockevent about the clone and
+ * copy over any settings from 'ip' to 'newip' that can be inherited on a
+ * socket.  Cloning is used for new TCP connections arriving on listening TCP
+ * sockets.  This function must not fail.
+ */
+void
+ipsock_clone(struct ipsock * ip, struct ipsock * newip, sockid_t newid)
+{
+
+	sockevent_clone(&ip->ip_sock, &newip->ip_sock, newid);
+
+	/* Inherit all settings from the original socket. */
+	newip->ip_flags = ip->ip_flags;
+	newip->ip_sndbuf = ip->ip_sndbuf;
+	newip->ip_rcvbuf = ip->ip_rcvbuf;
+}
+
+/*
+ * Create an <any> address for the given socket, taking into account whether
+ * the socket is IPv4, IPv6, or mixed.  The generated address, stored in
+ * 'ipaddr', will have the same type as returned from the ipsock_socket() call.
+ */
+void
+ipsock_get_any_addr(struct ipsock * ip, ip_addr_t * ipaddr)
+{
+
+	ip_addr_set_any(ipsock_is_ipv6(ip), ipaddr);
+
+	if (ipsock_is_ipv6(ip) && !ipsock_is_v6only(ip))
+		IP_SET_TYPE(ipaddr, IPADDR_TYPE_ANY);
+}
+
+/*
+ * Verify whether the given (properly scoped) IP address is a valid source
+ * address for the given IP socket.  The 'allow_mcast' flag indicates whether
+ * the source address is allowed to be a multicast address.  Return OK on
+ * success.  If 'ifdevp' is not NULL, it is filled with either the interface
+ * that owns the address, or NULL if the address is (while valid) not
+ * associated with a particular interface.  On failure, return a negative error
+ * code.  This function must be called, in one way or another, for every source
+ * address used for binding or sending on a IP-layer socket.
+ */
+int
+ipsock_check_src_addr(struct ipsock * ip, ip_addr_t * ipaddr, int allow_mcast,
+	struct ifdev ** ifdevp)
+{
+	ip6_addr_t *ip6addr;
+	struct ifdev *ifdev;
+	uint32_t inaddr, zone;
+	int is_mcast;
+
+	/*
+	 * TODO: for now, forbid binding to multicast addresses.  Callers that
+	 * never allow multicast addresses anyway (e.g., IPV6_PKTINFO) should
+	 * do their own check for this; the one here may eventually be removed.
+	 */
+	is_mcast = ip_addr_ismulticast(ipaddr);
+
+	if (is_mcast && !allow_mcast)
+		return EADDRNOTAVAIL;
+
+	if (IP_IS_V6(ipaddr)) {
+		/*
+		 * The given address must not have a KAME-style embedded zone.
+		 * This check is already performed in addr_get_inet(), but we
+		 * have to replicate it here because not all source addresses
+		 * go through addr_get_inet().
+		 */
+		ip6addr = ip_2_ip6(ipaddr);
+
+		if (ip6_addr_has_scope(ip6addr, IP6_UNKNOWN) &&
+		    (ip6addr->addr[0] & PP_HTONL(0x0000ffffUL)))
+			return EINVAL;
+
+		/*
+		 * lwIP does not support IPv4-mapped IPv6 addresses, so these
+		 * must be converted to plain IPv4 addresses instead.  The IPv4
+		 * 'any' address is not supported in this form.  In V6ONLY
+		 * mode, refuse connecting or sending to IPv4-mapped addresses
+		 * at all.
+		 */
+		if (ip6_addr_isipv4mappedipv6(ip6addr)) {
+			if (ipsock_is_v6only(ip))
+				return EINVAL;
+
+			inaddr = ip6addr->addr[3];
+
+			if (inaddr == PP_HTONL(INADDR_ANY))
+				return EADDRNOTAVAIL;
+
+			ip_addr_set_ip4_u32(ipaddr, inaddr);
+		}
+	}
+
+	ifdev = NULL;
+
+	if (!ip_addr_isany(ipaddr)) {
+		if (IP_IS_V6(ipaddr) &&
+		    ip6_addr_lacks_zone(ip_2_ip6(ipaddr), IP6_UNKNOWN))
+			return EADDRNOTAVAIL;
+
+		/*
+		 * If the address is a unicast address, it must be assigned to
+		 * an interface.  Otherwise, if it is a zoned multicast
+		 * address, the zone denotes the interface.  For global
+		 * multicast addresses, we cannot determine an interface.
+		 */
+		if (!is_mcast) {
+			if ((ifdev = ifaddr_map_by_addr(ipaddr)) == NULL)
+				return EADDRNOTAVAIL;
+		} else {
+			/* Some multicast addresses are not acceptable. */
+			if (!addr_is_valid_multicast(ipaddr))
+				return EINVAL;
+
+			if (IP_IS_V6(ipaddr) &&
+			    ip6_addr_has_zone(ip_2_ip6(ipaddr))) {
+				zone = ip6_addr_zone(ip_2_ip6(ipaddr));
+
+				if ((ifdev = ifdev_get_by_index(zone)) == NULL)
+					return ENXIO;
+			}
+		}
+	}
+
+	if (ifdevp != NULL)
+		*ifdevp = ifdev;
+
+	return OK;
+}
+
+/*
+ * Retrieve and validate a source address for use in a socket bind call on
+ * socket 'ip'.  The user-provided address is given as 'addr', with length
+ * 'addr_len'.  The socket's current local IP address and port are given as
+ * 'local_ip' and 'local_port', respectively; for raw sockets, the given local
+ * port number is always zero.  The caller's endpoint is given as 'user_endpt',
+ * used to make sure only root can bind to local port numbers.  The boolean
+ * 'allow_mcast' flag indicates whether the source address is allowed to be a
+ * multicast address.  On success, return OK with the source IP address stored
+ * in 'src_addr' and, if 'src_port' is not NULL, the port number to bind to
+ * stored in 'portp'.  Otherwise, return a negative error code.  This function
+ * performs all the tasks necessary before the socket can be bound using a lwIP
+ * call.
+ */
+int
+ipsock_get_src_addr(struct ipsock * ip, const struct sockaddr * addr,
+	socklen_t addr_len, endpoint_t user_endpt, ip_addr_t * local_ip,
+	uint16_t local_port, int allow_mcast, ip_addr_t * src_addr,
+	uint16_t * src_port)
+{
+	uint16_t port;
+	int r;
+
+	/*
+	 * If the socket has been bound already, it cannot be bound again.
+	 * We check this by checking whether the current local port is non-
+	 * zero.  This rule does not apply to raw sockets, but raw sockets have
+	 * no port numbers anyway, so this conveniently works out.  However,
+	 * raw sockets may not be rebound after being connected, but that is
+	 * checked before we even get here.
+	 */
+	if (local_port != 0)
+		return EINVAL;
+
+	/* Parse the user-provided address. */
+	if ((r = addr_get_inet(addr, addr_len, ipsock_get_type(ip), src_addr,
+	    FALSE /*kame*/, &port)) != OK)
+		return r;
+
+	/* Validate the user-provided address. */
+	if ((r = ipsock_check_src_addr(ip, src_addr, allow_mcast,
+	    NULL /*ifdevp*/)) != OK)
+		return r;
+
+	/*
+	 * If we are interested in port numbers at all (for non-raw sockets,
+	 * meaning portp is not NULL), make sure that only the superuser can
+	 * bind to privileged port numbers.  For raw sockets, only the
+	 * superuser can open a socket anyway, so we need no check here.
+	 */
+	if (src_port != NULL) {
+		if (port != 0 && port < IPPORT_RESERVED &&
+		    !util_is_root(user_endpt))
+			return EACCES;
+
+		*src_port = port;
+	}
+
+	return OK;
+}
+
+/*
+ * Retrieve and validate a destination address for use in a socket connect or
+ * sendto call.  The user-provided address is given as 'addr', with length
+ * 'addr_len'.  The socket's current local IP address is given as 'local_addr'.
+ * On success, return OK with the destination IP address stored in 'dst_addr'
+ * and, if 'dst_port' is not NULL, the port number to bind to stored in
+ * 'dst_port'.  Otherwise, return a negative error code.  This function must be
+ * called, in one way or another, for every destination address used for
+ * connecting or sending on a IP-layer socket.
+ */
+int
+ipsock_get_dst_addr(struct ipsock * ip, const struct sockaddr * addr,
+	socklen_t addr_len, const ip_addr_t * local_addr, ip_addr_t * dst_addr,
+	uint16_t * dst_port)
+{
+	uint16_t port;
+	int r;
+
+	/* Parse the user-provided address. */
+	if ((r = addr_get_inet(addr, addr_len, ipsock_get_type(ip), dst_addr,
+	    FALSE /*kame*/, &port)) != OK)
+		return r;
+
+	/* Destination addresses are always specific. */
+	if (IP_GET_TYPE(dst_addr) == IPADDR_TYPE_ANY)
+		IP_SET_TYPE(dst_addr, IPADDR_TYPE_V6);
+
+	/*
+	 * lwIP does not support IPv4-mapped IPv6 addresses, so these must be
+	 * supported to plain IPv4 addresses instead.  In V6ONLY mode, refuse
+	 * connecting or sending to IPv4-mapped addresses at all.
+	 */
+	if (IP_IS_V6(dst_addr) &&
+	    ip6_addr_isipv4mappedipv6(ip_2_ip6(dst_addr))) {
+		if (ipsock_is_v6only(ip))
+			return EINVAL;
+
+		ip_addr_set_ip4_u32(dst_addr, ip_2_ip6(dst_addr)->addr[3]);
+	}
+
+	/*
+	 * Now make sure that the local and remote addresses are of the same
+	 * family.  The local address may be of type IPADDR_TYPE_ANY, which is
+	 * allowed for both IPv4 and IPv6.  Even for connectionless socket
+	 * types we must perform this check as part of connect calls (as well
+	 * as sendto calls!) because otherwise we will create problems for
+	 * sysctl based socket enumeration (i.e., netstat), which uses the
+	 * local IP address type to determine the socket family.
+	 */
+	if (IP_GET_TYPE(local_addr) != IPADDR_TYPE_ANY &&
+	    IP_IS_V6(local_addr) != IP_IS_V6(dst_addr))
+		return EINVAL;
+
+	/*
+	 * TODO: on NetBSD, an 'any' destination address is replaced with a
+	 * local interface address.
+	 */
+	if (ip_addr_isany(dst_addr))
+		return EHOSTUNREACH;
+
+	/*
+	 * If the address is a multicast address, the multicast address itself
+	 * must be valid.
+	 */
+	if (ip_addr_ismulticast(dst_addr) &&
+	    !addr_is_valid_multicast(dst_addr))
+		return EINVAL;
+
+	/*
+	 * TODO: decide whether to add a zone to a scoped IPv6 address that
+	 * lacks a zone.  For now, we let lwIP handle this, as lwIP itself
+	 * will always add the zone at some point.  If anything changes there,
+	 * this would be the place to set the zone (using a route lookup).
+	 */
+
+	/*
+	 * For now, we do not forbid or alter any other particular destination
+	 * addresses.
+	 */
+
+	if (dst_port != NULL) {
+		/*
+		 * Disallow connecting/sending to port zero.  There is no error
+		 * code that applies well to this case, so we copy NetBSD's.
+		 */
+		if (port == 0)
+			return EADDRNOTAVAIL;
+
+		*dst_port = port;
+	}
+
+	return OK;
+}
+
+/*
+ * Store the address 'ipaddr' associated with the socket 'ip' (for example, it
+ * may be the local or remote IP address of the socket) as a sockaddr structure
+ * in 'addr'.  A port number is provided as 'port' (in host-byte order) if
+ * relevant, and zero is passed in otherwise.  This function MUST only be
+ * called from contexts where 'addr' is a buffer provided by libsockevent or
+ * libsockdriver, meaning that it is of size SOCKADDR_MAX.  The value pointed
+ * to by 'addr_len' is not expected to be initialized in calls to this function
+ * (and will typically zero).  On return, 'addr_len' is filled with the length
+ * of the address generated in 'addr'.  This function never fails.
+ */
+void
+ipsock_put_addr(struct ipsock * ip, struct sockaddr * addr,
+	socklen_t * addr_len, ip_addr_t * ipaddr, uint16_t port)
+{
+	ip_addr_t mappedaddr;
+
+	/*
+	 * If the socket is an AF_INET6-type socket, and the given address is
+	 * an IPv4-type address, store it as an IPv4-mapped IPv6 address.
+	 */
+	if (ipsock_is_ipv6(ip) && IP_IS_V4(ipaddr)) {
+		addr_make_v4mapped_v6(&mappedaddr, ip_2_ip4(ipaddr));
+
+		ipaddr = &mappedaddr;
+	}
+
+	/*
+	 * We have good reasons to keep the sockdriver and sockevent APIs as
+	 * they are, namely, defaulting 'addr_len' to zero such that the caller
+	 * must provide a non-zero length (only) when returning a valid
+	 * address.  The consequence here is that we have to know the size of
+	 * the provided buffer.  For libsockevent callbacks, we are always
+	 * guaranteed to get a buffer of at least this size.
+	 */
+	*addr_len = SOCKADDR_MAX;
+
+	addr_put_inet(addr, addr_len, ipaddr, FALSE /*kame*/, port);
+}
+
+/*
+ * Set socket options on an IP socket.
+ */
+int
+ipsock_setsockopt(struct ipsock * ip, int level, int name,
+	const struct sockdriver_data * data, socklen_t len,
+	struct ipopts * ipopts)
+{
+	int r, val, allow;
+	uint8_t type;
+
+	switch (level) {
+	case SOL_SOCKET:
+		switch (name) {
+		case SO_SNDBUF:
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			if (val <= 0 || (size_t)val < ipopts->sndmin ||
+			    (size_t)val > ipopts->sndmax)
+				return EINVAL;
+
+			ip->ip_sndbuf = val;
+
+			return OK;
+
+		case SO_RCVBUF:
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			if (val <= 0 || (size_t)val < ipopts->rcvmin ||
+			    (size_t)val > ipopts->rcvmax)
+				return EINVAL;
+
+			ip->ip_rcvbuf = val;
+
+			return OK;
+		}
+
+		break;
+
+	case IPPROTO_IP:
+		if (ipsock_is_ipv6(ip))
+			break;
+
+		switch (name) {
+		case IP_TOS:
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			if (val < 0 || val > UINT8_MAX)
+				return EINVAL;
+
+			*ipopts->tos = (uint8_t)val;
+
+			return OK;
+
+		case IP_TTL:
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			if (val < 0 || val > UINT8_MAX)
+				return EINVAL;
+
+			*ipopts->ttl = (uint8_t)val;
+
+			return OK;
+		}
+
+		break;
+
+	case IPPROTO_IPV6:
+		if (!ipsock_is_ipv6(ip))
+			break;
+
+		switch (name) {
+		case IPV6_UNICAST_HOPS:
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			if (val < -1 || val > UINT8_MAX)
+				return EINVAL;
+
+			if (val == -1)
+				val = IP_DEFAULT_TTL;
+
+			*ipopts->ttl = val;
+
+			return OK;
+
+		case IPV6_TCLASS:
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			if (val < -1 || val > UINT8_MAX)
+				return EINVAL;
+
+			if (val == -1)
+				val = 0;
+
+			*ipopts->tos = val;
+
+			return OK;
+
+		case IPV6_V6ONLY:
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			/*
+			 * If the socket has been bound to an actual address,
+			 * we still allow the option to be changed, but it no
+			 * longer has any effect.
+			 */
+			type = IP_GET_TYPE(ipopts->local_ip);
+			allow = (type == IPADDR_TYPE_ANY ||
+			    (type == IPADDR_TYPE_V6 &&
+			    ip_addr_isany(ipopts->local_ip)));
+
+			if (val) {
+				ip->ip_flags |= IPF_V6ONLY;
+
+				type = IPADDR_TYPE_V6;
+			} else {
+				ip->ip_flags &= ~IPF_V6ONLY;
+
+				type = IPADDR_TYPE_ANY;
+			}
+
+			if (allow)
+				IP_SET_TYPE(ipopts->local_ip, type);
+
+			return OK;
+		}
+
+		break;
+	}
+
+	return ENOPROTOOPT;
+}
+
+/*
+ * Retrieve socket options on an IP socket.
+ */
+int
+ipsock_getsockopt(struct ipsock * ip, int level, int name,
+	const struct sockdriver_data * data, socklen_t * len,
+	struct ipopts * ipopts)
+{
+	int val;
+
+	switch (level) {
+	case SOL_SOCKET:
+		switch (name) {
+		case SO_SNDBUF:
+			val = ip->ip_sndbuf;
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+
+		case SO_RCVBUF:
+			val = ip->ip_rcvbuf;
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+		}
+
+		break;
+
+	case IPPROTO_IP:
+		if (ipsock_is_ipv6(ip))
+			break;
+
+		switch (name) {
+		case IP_TOS:
+			val = (int)*ipopts->tos;
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+
+		case IP_TTL:
+			val = (int)*ipopts->ttl;
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+		}
+
+		break;
+
+	case IPPROTO_IPV6:
+		if (!ipsock_is_ipv6(ip))
+			break;
+
+		switch (name) {
+		case IPV6_UNICAST_HOPS:
+			val = *ipopts->ttl;
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+
+		case IPV6_TCLASS:
+			val = *ipopts->tos;
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+
+		case IPV6_V6ONLY:
+			val = !!(ip->ip_flags & IPF_V6ONLY);
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+		}
+
+		break;
+	}
+
+	return ENOPROTOOPT;
+}
+
+/*
+ * Fill the given kinfo_pcb sysctl(7) structure with IP-level information.
+ */
+void
+ipsock_get_info(struct kinfo_pcb * ki, const ip_addr_t * local_ip,
+	uint16_t local_port, const ip_addr_t * remote_ip, uint16_t remote_port)
+{
+	ip_addr_t ipaddr;
+	socklen_t len;
+	uint8_t type;
+
+	len = sizeof(ki->ki_spad); /* use this for the full size, not ki_src */
+
+	addr_put_inet(&ki->ki_src, &len, local_ip, TRUE /*kame*/, local_port);
+
+	/*
+	 * At this point, the local IP address type has already been used to
+	 * determine whether this is an IPv4 or IPv6 socket.  While not ideal,
+	 * that is the best we can do: we cannot use IPv4-mapped IPv6 addresses
+	 * in lwIP PCBs, we cannot store the original type in those PCBs, and
+	 * we also cannot rely on the PCB having an associated ipsock object
+	 * anymore.  We also cannot use the ipsock only when present: it could
+	 * make a TCP PCB "jump" from IPv6 to IPv4 in the netstat listing when
+	 * it goes into TIME_WAIT state, for example.
+	 *
+	 * So, use *only* the type of the local IP address to determine whether
+	 * this is an IPv4 or an IPv6 socket.  At the same time, do *not* rely
+	 * on the remote IP address being IPv4 for a local IPv4 address; it may
+	 * be of type IPADDR_TYPE_V6 for an unconnected socket bound to an
+	 * IPv4-mapped IPv6 address.  Pretty messy, but we're limited by what
+	 * lwIP offers here.  Since it's just netstat, it need not be perfect.
+	 */
+	if ((type = IP_GET_TYPE(local_ip)) == IPADDR_TYPE_V4) {
+		if (!ip_addr_isany(local_ip) || local_port != 0)
+			ki->ki_prstate = INP_BOUND;
+
+		/*
+		 * Make sure the returned socket address types are consistent.
+		 * The only case where the remote IP address is not IPv4 here
+		 * is when it is not set yet, so there is no need to check
+		 * whether it is the 'any' address: it always is.
+		 */
+		if (IP_GET_TYPE(remote_ip) != IPADDR_TYPE_V4) {
+			ip_addr_set_zero_ip4(&ipaddr);
+
+			remote_ip = &ipaddr;
+		}
+	} else {
+		if (!ip_addr_isany(local_ip) || local_port != 0)
+			ki->ki_prstate = IN6P_BOUND;
+		if (type != IPADDR_TYPE_ANY)
+			ki->ki_pflags |= IN6P_IPV6_V6ONLY;
+	}
+
+	len = sizeof(ki->ki_dpad); /* use this for the full size, not ki_dst */
+
+	addr_put_inet(&ki->ki_dst, &len, remote_ip, TRUE /*kame*/,
+	    remote_port);
+
+	/* Check the type of the *local* IP address here.  See above. */
+	if (!ip_addr_isany(remote_ip) || remote_port != 0) {
+		if (type == IPADDR_TYPE_V4)
+			ki->ki_prstate = INP_CONNECTED;
+		else
+			ki->ki_prstate = IN6P_CONNECTED;
+	}
+}
--- a/minix/net/lwip/ipsock.h
+++ b/minix/net/lwip/ipsock.h
@ -0,0 +1,95 @@
+#ifndef MINIX_NET_LWIP_IPSOCK_H
+#define MINIX_NET_LWIP_IPSOCK_H
+
+/* IP-level socket, shared by TCP, UDP, and RAW. */
+struct ipsock {
+	struct sock ip_sock;		/* socket object, MUST be first */
+	unsigned int ip_flags;		/* all socket flags */
+	size_t ip_sndbuf;		/* send buffer size */
+	size_t ip_rcvbuf;		/* receive buffer size */
+};
+
+/*
+ * Socket flags.  In order to reduce memory consumption, all these flags are
+ * stored in the same field (ipsock.ip_flags) and thus must not overlap between
+ * the same users of the field, and that is why they are all here.  For
+ * example, UDPF/PKTF/IPF should all be unique, and TCPF/IPF should be unique,
+ * but UDPF/PKTF may overlap with TCPF and UDPF may overlap with RAWF.  In
+ * practice, we have no UDPF or RAWF flags and plenty of space to make all
+ * flags unique anyway.
+ */
+#define IPF_IPV6		0x0000001	/* socket is IPv6 */
+#define IPF_V6ONLY		0x0000002	/* socket is IPv6 only */
+
+#define PKTF_RECVINFO		0x0000010	/* receive ancillary PKTINFO */
+#define PKTF_RECVTTL		0x0000020	/* receive ancillary TTL */
+#define PKTF_RECVTOS		0x0000040	/* receive ancillary TOS */
+#define PKTF_MCAWARE		0x0000080	/* owner is multicast aware */
+
+#define TCPF_CONNECTING		0x0001000	/* attempting to connect */
+#define TCPF_SENT_FIN		0x0002000	/* send FIN when possible */
+#define TCPF_RCVD_FIN		0x0004000	/* received FIN from peer */
+#define TCPF_FULL		0x0008000	/* PCB send buffer is full */
+#define TCPF_OOM		0x0010000	/* memory allocation failed */
+
+#define ipsock_get_sock(ip)		(&(ip)->ip_sock)
+#define ipsock_is_ipv6(ip)		((ip)->ip_flags & IPF_IPV6)
+#define ipsock_is_v6only(ip)		((ip)->ip_flags & IPF_V6ONLY)
+#define ipsock_get_flags(ip)		((ip)->ip_flags)
+#define ipsock_get_flag(ip,fl)		((ip)->ip_flags & (fl))
+#define ipsock_set_flag(ip,fl)		((ip)->ip_flags |= (fl))
+#define ipsock_clear_flag(ip,fl)	((ip)->ip_flags &= ~(fl))
+#define ipsock_get_sndbuf(ip)		((ip)->ip_sndbuf)
+#define ipsock_get_rcvbuf(ip)		((ip)->ip_rcvbuf)
+
+/*
+ * IP-level option pointers.  This is necessary because even though lwIP's
+ * TCP, UDP, and RAW PCBs share the same initial fields, the C standard does
+ * not permit generic access to such initial fields (due to both possible
+ * padding differences and strict-aliasing rules).  The fields in this
+ * structure are therefore pointers to the initial fields of each of the PCB
+ * structures.  If lwIP ever groups its IP PCB fields into a single structure
+ * and uses that structure as first field of each of the other PCBs, then we
+ * will be able to replace this structure with a pointer to the IP PCB instead.
+ * For convenience we also carry the send and receive buffer limits here.
+ */
+struct ipopts {
+	ip_addr_t *local_ip;
+	ip_addr_t *remote_ip;
+	uint8_t *tos;
+	uint8_t *ttl;
+	size_t sndmin;
+	size_t sndmax;
+	size_t rcvmin;
+	size_t rcvmax;
+};
+
+struct ifdev;
+
+void ipsock_init(void);
+int ipsock_socket(struct ipsock * ip, int domain, size_t sndbuf, size_t rcvbuf,
+	struct sock ** sockp);
+void ipsock_clone(struct ipsock * ip, struct ipsock * newip, sockid_t newid);
+void ipsock_get_any_addr(struct ipsock * ip, ip_addr_t * ipaddr);
+int ipsock_check_src_addr(struct ipsock * ip, ip_addr_t * ipaddr,
+	int allow_mcast, struct ifdev ** ifdevp);
+int ipsock_get_src_addr(struct ipsock * ip, const struct sockaddr * addr,
+	socklen_t addr_len, endpoint_t user_endpt, ip_addr_t * local_ip,
+	uint16_t local_port, int allow_mcast, ip_addr_t * ipaddr,
+	uint16_t * portp);
+int ipsock_get_dst_addr(struct ipsock * ip, const struct sockaddr * addr,
+	socklen_t addr_len, const ip_addr_t * local_addr, ip_addr_t * dst_addr,
+	uint16_t * dst_port);
+void ipsock_put_addr(struct ipsock * ip, struct sockaddr * addr,
+	socklen_t * addr_len, ip_addr_t * ipaddr, uint16_t port);
+int ipsock_setsockopt(struct ipsock * ip, int level, int name,
+	const struct sockdriver_data * data, socklen_t len,
+	struct ipopts * ipopts);
+int ipsock_getsockopt(struct ipsock * ip, int level, int name,
+	const struct sockdriver_data * data, socklen_t * len,
+	struct ipopts * ipopts);
+void ipsock_get_info(struct kinfo_pcb * ki, const ip_addr_t * local_ip,
+	uint16_t local_port, const ip_addr_t * remote_ip,
+	uint16_t remote_port);
+
+#endif /* !MINIX_NET_LWIP_IPSOCK_H */
--- a/minix/net/lwip/lldata.c
+++ b/minix/net/lwip/lldata.c
@ -0,0 +1,584 @@
+/* LWIP service - lldata.c - link-layer (ARP, NDP) data related routines */
+/*
+ * This module is largely isolated from the regular routing code.  There are
+ * two reasons for that.  First, mixing link-layer routes with regular routes
+ * would not work well due to the fact that lwIP keeps these data structures
+ * entirely separate.  Second, as of version 8, NetBSD keeps the IP-layer and
+ * link-layer routing separate as well.
+ *
+ * Unfortunately, lwIP does not provide much in the way of implementing the
+ * functionality that would be expected for this module.  As such, the current
+ * implementation is very restricted and simple.
+ *
+ * For ARP table entries, lwIP only allows for adding and deleting static
+ * entries.  Non-static entries cannot be deleted.  Incomplete (pending)
+ * entries cannot even be enumerated, nor can (e.g.) expiry information be
+ * obtained.  The lwIP ARP datastructures are completely hidden, so there is no
+ * way to overcome these limitations without changing lwIP itself.  As a
+ * result, not all functionality of the arp(8) userland utility is supported.
+ *
+ * For NDP table entries, lwIP offers no API at all.  However, since the data
+ * structures are exposed directly, we can use those to implement full support
+ * for exposing information in a read-only way.  However, manipulating data
+ * structures directly from here is too risky, nor does lwIP currently support
+ * the concept of static NDP table entries.  Therefore, adding, changing, and
+ * deleting NDP entries is currently not supported, and will also first require
+ * changes to lwIP itself.
+ *
+ * The ndp(8) userland utility is also able to show and manipulate various
+ * other neighbor discovery related tables and settings.  We support only a
+ * small subset of them.  The main reason for this is that the other tables,
+ * in particular the prefix and default router lists, are not relevant: on
+ * MINIX 3, these are always managed fully in userland (usually dhcpcd(8)), and
+ * we even hardcode lwIP not to parse Router Advertisement messages at all, so
+ * even though those tables are still part of lwIP, they are always empty.
+ * Other ndp(8) functionality are unsupported for similar reasons.
+ */
+
+#include "lwip.h"
+#include "lldata.h"
+#include "route.h"
+#include "rtsock.h"
+
+#include "lwip/etharp.h"
+#include "lwip/nd6.h"
+#include "lwip/priv/nd6_priv.h" /* for neighbor_cache */
+
+/*
+ * Process a routing command specifically for an ARP table entry.  Return OK if
+ * the routing command has been processed successfully and a routing socket
+ * reply message has already been generated.  Return a negative error code on
+ * failure, in which case the caller will generate a reply message instead.
+ */
+static int
+lldata_arp_process(unsigned int type, const ip_addr_t * dst_addr,
+	const struct eth_addr * gw_addr, struct ifdev * ifdev,
+	unsigned int flags, const struct rtsock_request * rtr)
+{
+	const ip4_addr_t *ip4addr;
+	struct eth_addr ethaddr, *ethptr;
+	struct netif *netif;
+	lldata_arp_num_t num;
+	err_t err;
+
+	netif = (ifdev != NULL) ? ifdev_get_netif(ifdev) : NULL;
+
+	num = etharp_find_addr(netif, ip_2_ip4(dst_addr), &ethptr, &ip4addr);
+
+	if (type != RTM_ADD && num < 0)
+		return ESRCH;
+	else if (type == RTM_ADD && num >= 0)
+		return EEXIST;
+
+	switch (type) {
+	case RTM_CHANGE:
+		/*
+		 * This request is not used by arp(8), so keep things simple.
+		 * For RTM_ADD we support only static entries; we support only
+		 * those too here, and thus we can use delete-and-readd.  If
+		 * the ethernet address is not being changed, try readding the
+		 * entry with the previous ethernet address.
+		 */
+		if (gw_addr == NULL)
+			gw_addr = ethptr;
+
+		if (etharp_remove_static_entry(ip_2_ip4(dst_addr)) != ERR_OK)
+			return EPERM;
+
+		/* FALLTHROUGH */
+	case RTM_ADD:
+		assert(gw_addr != NULL);
+
+		memcpy(&ethaddr, gw_addr, sizeof(ethaddr));
+
+		/*
+		 * Adding static, permanent, unpublished, non-proxy entries is
+		 * all that lwIP supports right now.  We also do not get to
+		 * specify the interface, and the way lwIP picks the interface
+		 * may in fact result in a different one.
+		 */
+		if ((err = etharp_add_static_entry(ip_2_ip4(dst_addr),
+		    &ethaddr)) != ERR_OK)
+			return util_convert_err(err);
+
+		if ((num = etharp_find_addr(NULL /*netif*/, ip_2_ip4(dst_addr),
+		    &ethptr, &ip4addr)) < 0)
+			panic("unable to find just-added static ARP entry");
+
+		/* FALLTHROUGH */
+	case RTM_LOCK:
+	case RTM_GET:
+		rtsock_msg_arp(num, type, rtr);
+
+		return OK;
+
+	case RTM_DELETE:
+		memcpy(&ethaddr, ethptr, sizeof(ethaddr));
+
+		if (etharp_remove_static_entry(ip_2_ip4(dst_addr)) != ERR_OK)
+			return EPERM;
+
+		/*
+		 * FIXME: the following block is a hack, because we cannot
+		 * predict whether the above removal will succeed, while at the
+		 * same time we need the entry to be present in order to report
+		 * the deleted address to the routing socket.  We temporarily
+		 * readd and then remove the entry just for the purpose of
+		 * generating the routing socket reply.  There are other ways
+		 * to resolve this, but only a better lwIP etharp API would
+		 * allow us to resolve this problem cleanly.
+		 */
+		(void)etharp_add_static_entry(ip_2_ip4(dst_addr), &ethaddr);
+
+		num = etharp_find_addr(NULL /*netif*/, ip_2_ip4(dst_addr),
+		    &ethptr, &ip4addr);
+		assert(num >= 0);
+
+		rtsock_msg_arp(num, type, rtr);
+
+		(void)etharp_remove_static_entry(ip_2_ip4(dst_addr));
+
+		return OK;
+
+	default:
+		return EINVAL;
+	}
+}
+
+/*
+ * Enumerate ARP table entries.  Return TRUE if there is at least one more ARP
+ * table entry, of which the number is stored in 'num'.  The caller should set
+ * 'num' to 0 initially, and increase it by one between a successful call and
+ * the next call.  Return FALSE if there are no more ARP table entries.
+ */
+int
+lldata_arp_enum(lldata_arp_num_t * num)
+{
+	ip4_addr_t *ip4addr;
+	struct netif *netif;
+	struct eth_addr *ethaddr;
+
+	for (; *num < ARP_TABLE_SIZE; ++*num) {
+		if (etharp_get_entry(*num, &ip4addr, &netif, &ethaddr))
+			return TRUE;
+	}
+
+	return FALSE;
+}
+
+/*
+ * Obtain information about the ARP table entry identified by 'num'.  The IPv4
+ * address of the entry is stored in 'addr'.  Its ethernet address is stored in
+ * 'gateway'.  The associated interface is stored in 'ifdevp', and the entry's
+ * routing flags (RTF_) are stored in 'flagsp'.
+ */
+void
+lldata_arp_get(lldata_arp_num_t num, struct sockaddr_in * addr,
+	struct sockaddr_dlx * gateway, struct ifdev ** ifdevp,
+	unsigned int * flagsp)
+{
+	ip_addr_t ipaddr;
+	ip4_addr_t *ip4addr;
+	struct netif *netif;
+	struct ifdev *ifdev;
+	struct eth_addr *ethaddr;
+	socklen_t addr_len;
+
+	if (!etharp_get_entry(num, &ip4addr, &netif, &ethaddr))
+		panic("request for invalid ARP entry");
+
+	ip_addr_copy_from_ip4(ipaddr, *ip4addr);
+
+	assert(netif != NULL);
+	ifdev = netif_get_ifdev(netif);
+
+	addr_len = sizeof(*addr);
+
+	addr_put_inet((struct sockaddr *)addr, &addr_len, &ipaddr,
+	    TRUE /*kame*/, 0 /*port*/);
+
+	addr_len = sizeof(*gateway);
+
+	addr_put_link((struct sockaddr *)gateway, &addr_len,
+	    ifdev_get_index(ifdev), ifdev_get_iftype(ifdev), NULL /*name*/,
+	    ethaddr->addr, sizeof(ethaddr->addr));
+
+	*ifdevp = ifdev;
+
+	/*
+	 * TODO: this is not necessarily accurate, but lwIP does not provide us
+	 * with information as to whether this is a static entry or not..
+	 */
+	*flagsp = RTF_HOST | RTF_LLINFO | RTF_LLDATA | RTF_STATIC | RTF_CLONED;
+}
+
+/*
+ * Obtain information about the ND6 neighbor cache entry 'i', which must be a
+ * number between 0 (inclusive) and LWIP_ND6_NUM_NEIGHBORS (exclusive).  If an
+ * entry with this number exists, return a pointer to its IPv6 address, and
+ * additional information in each of the given pointers if not NULL.  The
+ * associated interface is stored in 'netif'.  If the entry has an associated
+ * link-layer address, a pointer to it is stored in 'lladdr'.  The entry's
+ * state (ND6_{INCOMPLETE,REACHABLE,STALE,DELAY,PROBE}) is stored in 'state'.
+ * The 'isrouter' parameter is filled with a boolean value indicating whether
+ * the entry is for a router.  For ND6_INCOMPLETE and ND6_PROBE, the number of
+ * probes sent so far is stored in 'probes_sent'; for other states, the value
+ * is set to zero.  For ND6_REACHABLE and ND6_DELAY, the time until expiration
+ * in ND6_TMR_INTERVAL-millisecond units is stored in 'expire_time'; for other
+ * states, the value is set to zero.  If an entry with number 'i' does not
+ * exist, NULL is returned.
+ *
+ * TODO: upstream this function to lwIP.
+ */
+static const ip6_addr_t *
+nd6_get_neighbor_cache_entry(int8_t i, struct netif ** netif,
+	const uint8_t ** lladdr, uint8_t * state, uint8_t * isrouter,
+	uint32_t * probes_sent, uint32_t * expire_time)
+{
+
+	if (i < 0 || i >= LWIP_ND6_NUM_NEIGHBORS ||
+	    neighbor_cache[i].state == ND6_NO_ENTRY)
+		return NULL;
+
+	if (netif != NULL)
+		*netif = neighbor_cache[i].netif;
+
+	if (lladdr != NULL) {
+		if (neighbor_cache[i].state != ND6_INCOMPLETE)
+			*lladdr = neighbor_cache[i].lladdr;
+		else
+			*lladdr = NULL;
+	}
+
+	if (state != NULL)
+		*state = neighbor_cache[i].state;
+
+	if (isrouter != NULL)
+		*isrouter = neighbor_cache[i].isrouter;
+
+	if (probes_sent != NULL) {
+		if (neighbor_cache[i].state == ND6_INCOMPLETE ||
+		    neighbor_cache[i].state == ND6_PROBE)
+			*probes_sent = neighbor_cache[i].counter.probes_sent;
+		else
+			*probes_sent = 0;
+	}
+
+	if (expire_time != NULL) {
+		switch (neighbor_cache[i].state) {
+		case ND6_REACHABLE:
+			*expire_time =
+			    neighbor_cache[i].counter.reachable_time /
+			    ND6_TMR_INTERVAL;
+			break;
+		case ND6_DELAY:
+			*expire_time = neighbor_cache[i].counter.delay_time;
+			break;
+		case ND6_INCOMPLETE:
+		case ND6_PROBE:
+			/* Probes are sent once per timer tick. */
+			*expire_time = (LWIP_ND6_MAX_MULTICAST_SOLICIT + 1 -
+			    neighbor_cache[i].counter.probes_sent) *
+			    (ND6_TMR_INTERVAL / 1000);
+			break;
+		default:
+			/* Stale entries do not expire; they get replaced. */
+			*expire_time = 0;
+			break;
+		}
+	}
+
+	return &neighbor_cache[i].next_hop_address;
+}
+
+/*
+ * Find a neighbor cache entry by IPv6 address.  Return its index number if
+ * found, or -1 if not.  This is a reimplementation of the exact same function
+ * internal to lwIP.
+ *
+ * TODO: make this function public in lwIP.
+ */
+static int8_t
+nd6_find_neighbor_cache_entry(const ip6_addr_t * addr)
+{
+	int8_t i;
+
+	for (i = 0; i < LWIP_ND6_NUM_NEIGHBORS; i++) {
+		if (ip6_addr_cmp(addr, &neighbor_cache[i].next_hop_address))
+			return i;
+	}
+
+	return -1;
+}
+
+/*
+ * Find an NDP table entry based on the given interface and IPv6 address.  On
+ * success, return OK, with the entry's index number stored in 'nump'.  On
+ * failure, return an appropriate error code.
+ */
+int
+lldata_ndp_find(struct ifdev * ifdev, const struct sockaddr_in6 * addr,
+	lldata_ndp_num_t * nump)
+{
+	ip_addr_t ipaddr;
+	int8_t i;
+	int r;
+
+	if ((r = addr_get_inet((const struct sockaddr *)addr, sizeof(*addr),
+	    IPADDR_TYPE_V6, &ipaddr, TRUE /*kame*/, NULL /*port*/)) != OK)
+		return r;
+
+	/*
+	 * For given link-local addresses, no zone may be provided in the
+	 * address at all.  In such cases, add the zone ourselves, using the
+	 * given interface.
+	 */
+	if (ip6_addr_lacks_zone(ip_2_ip6(&ipaddr), IP6_UNKNOWN))
+		ip6_addr_assign_zone(ip_2_ip6(&ipaddr), IP6_UNKNOWN,
+		    ifdev_get_netif(ifdev));
+
+	i = nd6_find_neighbor_cache_entry(ip_2_ip6(&ipaddr));
+	if (i < 0)
+		return ESRCH;
+
+	/*
+	 * We should compare the neighbor cache entry's associated netif to
+	 * the given ifdev, but since the lwIP neighbor cache is currently not
+	 * keyed by netif anyway (i.e. the internal lookups are purely by IPv6
+	 * address as well), doing so makes little sense in practice.
+	 */
+
+	*nump = (lldata_ndp_num_t)i;
+	return OK;
+}
+
+/*
+ * Process a routing command specifically for an NDP table entry.  Return OK if
+ * the routing command has been processed successfully and a routing socket
+ * reply message has already been generated.  Return a negative error code on
+ * failure, in which case the caller will generate a reply message instead.
+ */
+static int
+lldata_ndp_process(unsigned int type, const ip_addr_t * dst_addr,
+	const struct eth_addr * gw_addr,
+	struct ifdev * ifdev, unsigned int flags,
+	const struct rtsock_request * rtr)
+{
+	lldata_ndp_num_t num;
+
+	num = (lldata_ndp_num_t)
+	    nd6_find_neighbor_cache_entry(ip_2_ip6(dst_addr));
+
+	if (type != RTM_ADD && num < 0)
+		return ESRCH;
+	else if (type == RTM_ADD && num >= 0)
+		return EEXIST;
+
+	switch (type) {
+	case RTM_LOCK:
+	case RTM_GET:
+		rtsock_msg_arp(num, type, rtr);
+
+		return OK;
+
+	case RTM_ADD:
+	case RTM_CHANGE:
+	case RTM_DELETE:
+		/* TODO: add lwIP support to implement these commands. */
+		return ENOSYS;
+
+	default:
+		return EINVAL;
+	}
+}
+
+/*
+ * Enumerate NDP table entries.  Return TRUE if there is at least one more NDP
+ * table entry, of which the number is stored in 'num'.  The caller should set
+ * 'num' to 0 initially, and increase it by one between a successful call and
+ * the next call.  Return FALSE if there are no more NDP table entries.
+ */
+int
+lldata_ndp_enum(lldata_ndp_num_t * num)
+{
+
+	for (; *num < LWIP_ND6_NUM_NEIGHBORS; ++*num) {
+		if (nd6_get_neighbor_cache_entry(*num, NULL /*netif*/,
+		    NULL /*lladdr*/, NULL /*state*/, NULL /*isrouter*/,
+		    NULL /*probes_sent*/, NULL /*expire_time*/) != NULL)
+			return TRUE;
+	}
+
+	return FALSE;
+}
+
+/*
+ * Obtain information about the NDP table entry identified by 'num'.  The IPv6
+ * address of the entry is stored in 'addr'.  Its ethernet address is stored in
+ * 'gateway'.  The associated interface is stored in 'ifdevp', and the entry's
+ * routing flags (RTF_) are stored in 'flagsp'.
+ */
+void
+lldata_ndp_get(lldata_ndp_num_t num, struct sockaddr_in6 * addr,
+	struct sockaddr_dlx * gateway, struct ifdev ** ifdevp,
+	unsigned int * flagsp)
+{
+	const ip6_addr_t *ip6addr;
+	ip_addr_t ipaddr;
+	struct netif *netif;
+	struct ifdev *ifdev;
+	const uint8_t *lladdr;
+	socklen_t addr_len;
+
+	ip6addr = nd6_get_neighbor_cache_entry(num, &netif, &lladdr,
+	    NULL /*state*/, NULL /*isrouter*/, NULL /*probes_sent*/,
+	    NULL /*expire_time*/);
+	assert(ip6addr != NULL);
+
+	ip_addr_copy_from_ip6(ipaddr, *ip6addr);
+
+	ifdev = netif_get_ifdev(netif);
+	assert(ifdev != NULL);
+
+	addr_len = sizeof(*addr);
+
+	addr_put_inet((struct sockaddr *)addr, &addr_len, &ipaddr,
+	    TRUE /*kame*/, 0 /*port*/);
+
+	addr_len = sizeof(*gateway);
+
+	addr_put_link((struct sockaddr *)gateway, &addr_len,
+	    ifdev_get_index(ifdev), ifdev_get_iftype(ifdev), NULL /*name*/,
+	    lladdr, ifdev_get_hwlen(ifdev));
+
+	*ifdevp = ifdev;
+	*flagsp = RTF_HOST | RTF_LLINFO | RTF_LLDATA | RTF_CLONED;
+}
+
+/*
+ * Obtain information about the NDP table entry with the number 'num', which
+ * must be obtained through a previous call to lldata_ndp_find().  On return,
+ * 'asked' is filled with the number of probes sent so far (0 if inapplicable),
+ * 'isrouter' is set to 1 or 0 depending on whether the entry is for a router,
+ * 'state' is set to the entry's state (ND6_LLINFO_), and 'expire' is set to
+ * either the UNIX timestamp of expiry for the entry; 0 for permanent entries.
+ * None of the given pointers must be NULL.  This function always succeeds.
+ */
+void
+lldata_ndp_get_info(lldata_ndp_num_t num, long * asked, int * isrouter,
+	int * state, int * expire)
+{
+	uint32_t nd6_probes_sent = 0 /*gcc*/, nd6_expire_time = 0 /*gcc*/;
+	uint8_t nd6_state = 0 /*gcc*/, nd6_isrouter = 0 /*gcc*/;
+
+	(void)nd6_get_neighbor_cache_entry(num, NULL /*netif*/,
+	    NULL /*lladdr*/, &nd6_state, &nd6_isrouter, &nd6_probes_sent,
+	    &nd6_expire_time);
+
+	*asked = (long)nd6_probes_sent;
+
+	*isrouter = !!nd6_isrouter;
+
+	switch (nd6_state) {
+	case ND6_INCOMPLETE:	*state = ND6_LLINFO_INCOMPLETE; break;
+	case ND6_REACHABLE:	*state = ND6_LLINFO_REACHABLE; break;
+	case ND6_STALE:		*state = ND6_LLINFO_STALE; break;
+	case ND6_DELAY:		*state = ND6_LLINFO_DELAY; break;
+	case ND6_PROBE:		*state = ND6_LLINFO_PROBE; break;
+	default:		panic("unknown ND6 state %u", nd6_state);
+	}
+
+	if (nd6_expire_time != 0)
+		*expire = clock_time(NULL) +
+		    (int)nd6_expire_time * (ND6_TMR_INTERVAL / 1000);
+	else
+		*expire = 0;
+}
+
+/*
+ * Process a routing command specifically for a link-layer route, as one of the
+ * specific continuations of processing started by route_process().  The RTM_
+ * routing command is given as 'type'.  The route destination is given as
+ * 'dst_addr'; its address type determines whether the operation is for ARP or
+ * NDP.  The sockaddr structure for 'gateway' is passed on as is and may have
+ * to be parsed here if not NULL.  'ifdev' is the interface to be associated
+ * with the route; it is non-NULL only if an interface name (IFP) or address
+ * (IFA) was given.  The RTF_ flags field has been checked against the globally
+ * supported flags, but may have to be checked for flags that do not apply to
+ * ARP/NDP routes.  Return OK or a negative error code, following the same
+ * semantics as route_process().
+ */
+int
+lldata_process(unsigned int type, const ip_addr_t * dst_addr,
+	const struct sockaddr * gateway, struct ifdev * ifdev,
+	unsigned int flags, const struct rtsock_request * rtr)
+{
+	const struct route_entry *route;
+	struct eth_addr ethaddr, *gw_addr;
+	int r;
+
+	assert(flags & RTF_LLDATA);
+
+	/*
+	 * It seems that RTF_UP does not apply to link-layer routing entries.
+	 * We basically accept any flags that we can return, but we do not
+	 * actually check most of them anywhere.
+	 */
+	if ((flags & ~(RTF_HOST | RTF_LLINFO | RTF_LLDATA | RTF_STATIC |
+	   RTF_CLONED | RTF_ANNOUNCE)) != 0)
+		return EINVAL;
+
+	gw_addr = NULL;
+
+	if (type == RTM_ADD || type == RTM_CHANGE) {
+		/*
+		 * Link-layer entries are always host entries.  Not all
+		 * requests pass in this flag though, so check only when the
+		 * flags are supposed to be set.
+		 */
+		if ((type == RTM_ADD || type == RTM_CHANGE) &&
+		    !(flags & RTF_HOST))
+			return EINVAL;
+
+		/* lwIP does not support publishing custom entries. */
+		if (flags & RTF_ANNOUNCE)
+			return ENOSYS;
+
+		/* RTF_GATEWAY is always cleared for link-layer entries. */
+		if (gateway != NULL) {
+			if ((r = addr_get_link(gateway, gateway->sa_len,
+			    NULL /*name*/, 0 /*name_max*/, ethaddr.addr,
+			    sizeof(ethaddr.addr))) != OK)
+				return r;
+
+			gw_addr = &ethaddr;
+		}
+
+		if (type == RTM_ADD) {
+			if (gateway == NULL)
+				return EINVAL;
+
+			/*
+			 * If no interface has been specified, see if the
+			 * destination address is on a locally connected
+			 * network.  If so, use that network's interface.
+			 * Otherwise reject the request altogether: we must
+			 * have an interface to which to associate the entry.
+			 */
+			if (ifdev == NULL) {
+				if ((route = route_lookup(dst_addr)) != NULL &&
+				    !(route_get_flags(route) & RTF_GATEWAY))
+					ifdev = route_get_ifdev(route);
+				else
+					return ENETUNREACH;
+			}
+		}
+	}
+
+	if (IP_IS_V4(dst_addr))
+		return lldata_arp_process(type, dst_addr, gw_addr, ifdev,
+		    flags, rtr);
+	else
+		return lldata_ndp_process(type, dst_addr, gw_addr, ifdev,
+		    flags, rtr);
+}
--- a/minix/net/lwip/lldata.h
+++ b/minix/net/lwip/lldata.h
@ -0,0 +1,27 @@
+#ifndef MINIX_NET_LWIP_LLDATA_H
+#define MINIX_NET_LWIP_LLDATA_H
+
+struct rtsock_request;
+
+typedef int lldata_arp_num_t;		/* ARP table entry number */
+typedef int lldata_ndp_num_t;		/* NDP table entry number */
+
+int lldata_arp_enum(lldata_arp_num_t * num);
+void lldata_arp_get(lldata_arp_num_t num, struct sockaddr_in * addr,
+	struct sockaddr_dlx * gateway, struct ifdev ** ifdevp,
+	unsigned int * flagsp);
+
+int lldata_ndp_find(struct ifdev * ifdev,
+	const struct sockaddr_in6 * addr, lldata_ndp_num_t * nump);
+int lldata_ndp_enum(lldata_ndp_num_t * num);
+void lldata_ndp_get(lldata_ndp_num_t num, struct sockaddr_in6 * addr,
+	struct sockaddr_dlx * gateway, struct ifdev ** ifdevp,
+	unsigned int * flagsp);
+void lldata_ndp_get_info(lldata_ndp_num_t num, long * asked, int * isrouter,
+	int * state, int * expire);
+
+int lldata_process(unsigned int type, const ip_addr_t * dst_addr,
+	const struct sockaddr * gateway, struct ifdev * ifdev,
+	unsigned int flags, const struct rtsock_request * rtr);
+
+#endif /* !MINIX_NET_LWIP_LLDATA_H */
--- a/minix/net/lwip/lnksock.c
+++ b/minix/net/lwip/lnksock.c
@ -0,0 +1,77 @@
+/* LWIP service - lnksock.c - link sockets */
+/*
+ * This module contains absolutely minimal support for AF_LINK type sockets,
+ * because for now we need them only to support a specific set of IOCTLs, as
+ * required by for example ifconfig(8).
+ */
+
+#include "lwip.h"
+
+/* The number of link sockets. */
+#define NR_LNKSOCK	4
+
+static struct lnksock {
+	struct sock lnk_sock;		/* socket object, MUST be first */
+	SIMPLEQ_ENTRY(lnksock) lnk_next; /* next in free list */
+} lnk_array[NR_LNKSOCK];
+
+static SIMPLEQ_HEAD(, lnksock) lnk_freelist;	/* list of free link sockets */
+
+static const struct sockevent_ops lnksock_ops;
+
+/*
+ * Initialize the link sockets module.
+ */
+void
+lnksock_init(void)
+{
+	unsigned int slot;
+
+	/* Initialize the list of free link sockets. */
+	SIMPLEQ_INIT(&lnk_freelist);
+
+	for (slot = 0; slot < __arraycount(lnk_array); slot++)
+		SIMPLEQ_INSERT_TAIL(&lnk_freelist, &lnk_array[slot], lnk_next);
+}
+
+/*
+ * Create a link socket.
+ */
+sockid_t
+lnksock_socket(int type, int protocol, struct sock ** sockp,
+	const struct sockevent_ops ** ops)
+{
+	struct lnksock *lnk;
+
+	if (type != SOCK_DGRAM)
+		return EPROTOTYPE;
+
+	if (protocol != 0)
+		return EPROTONOSUPPORT;
+
+	if (SIMPLEQ_EMPTY(&lnk_freelist))
+		return ENOBUFS;
+
+	lnk = SIMPLEQ_FIRST(&lnk_freelist);
+	SIMPLEQ_REMOVE_HEAD(&lnk_freelist, lnk_next);
+
+	*sockp = &lnk->lnk_sock;
+	*ops = &lnksock_ops;
+	return SOCKID_LNK | (sockid_t)(lnk - lnk_array);
+}
+
+/*
+ * Free up a closed link socket.
+ */
+static void
+lnksock_free(struct sock * sock)
+{
+	struct lnksock *lnk = (struct lnksock *)sock;
+
+	SIMPLEQ_INSERT_HEAD(&lnk_freelist, lnk, lnk_next);
+}
+
+static const struct sockevent_ops lnksock_ops = {
+	.sop_ioctl		= ifconf_ioctl,
+	.sop_free		= lnksock_free
+};
--- a/minix/net/lwip/loopif.c
+++ b/minix/net/lwip/loopif.c
@ -0,0 +1,420 @@
+/* LWIP service - loopif.c - loopback interfaces */
+/*
+ * There is always at least one loopback device.  This device is used also to
+ * loop back packets sent on other interfaces to the local interface address.
+ * Therefore, not all packets on the loopback device have a source or
+ * destination address corresponding to the loopback device.
+ */
+
+#include "lwip.h"
+
+/*
+ * As a safety measure, if lwIP somehow gets stuck in a loop replying to its
+ * own packets on a loopback interface, stop with immediately feeding packets
+ * back into lwIP after this many packets.  The remaining packets will still be
+ * delivered, but not before the main message loop has had a chance to run.
+ */
+#define LOOPIF_LIMIT	65536
+
+/*
+ * The MTU is restricted to 65531 bytes, because we need space for a 4-byte
+ * header to identify the original interface of the packet.
+ */
+#define LOOPIF_MAX_MTU	(UINT16_MAX - sizeof(uint32_t))	/* maximum MTU */
+#define LOOPIF_DEF_MTU	LOOPIF_MAX_MTU			/* default MTU */
+
+#define NR_LOOPIF	2		/* number of loopback devices */
+
+struct loopif {
+	struct ifdev loopif_ifdev;	/* interface device, MUST be first */
+	struct pbuf *loopif_head;	/* head of pending loopback packets */
+	struct pbuf **loopif_tailp;	/* tail ptr-ptr of pending packets */
+	TAILQ_ENTRY(loopif) loopif_next;	/* next in free list */
+} loopif_array[NR_LOOPIF];
+
+static TAILQ_HEAD(, loopif) loopif_freelist;	/* free loop interfaces list */
+static TAILQ_HEAD(, loopif) loopif_activelist;	/* active loop interfaces */
+
+#define loopif_get_netif(loopif) (ifdev_get_netif(&(loopif)->loopif_ifdev))
+
+static unsigned int loopif_cksum_flags;
+
+static int loopif_create(const char *name);
+
+static const struct ifdev_ops loopif_ops;
+
+/*
+ * Initialize the loopback interface module.
+ */
+void
+loopif_init(void)
+{
+	unsigned int slot;
+
+	/* Initialize the lists of loopback interfaces. */
+	TAILQ_INIT(&loopif_freelist);
+	TAILQ_INIT(&loopif_activelist);
+
+	for (slot = 0; slot < __arraycount(loopif_array); slot++)
+		TAILQ_INSERT_TAIL(&loopif_freelist, &loopif_array[slot],
+		    loopif_next);
+
+	/*
+	 * The default is to perform no checksumming on loopback interfaces,
+	 * except for ICMP messages because otherwise we would need additional
+	 * changes in the code receiving those.  In fact, for future
+	 * compatibility, disable only those flags that we manage ourselves.
+	 */
+	loopif_cksum_flags = NETIF_CHECKSUM_ENABLE_ALL &
+	    ~(NETIF_CHECKSUM_GEN_IP | NETIF_CHECKSUM_CHECK_IP |
+	    NETIF_CHECKSUM_GEN_UDP | NETIF_CHECKSUM_CHECK_UDP |
+	    NETIF_CHECKSUM_GEN_TCP | NETIF_CHECKSUM_CHECK_TCP);
+
+	/* Tell the ifdev module that users may create more loopif devices. */
+	ifdev_register("lo", loopif_create);
+}
+
+/*
+ * Polling function, invoked after each message loop iteration.  Forward any
+ * packets received on the output side of the loopback device during this
+ * loop iteration, to the input side of the device.
+ */
+static void
+loopif_poll(struct ifdev * ifdev)
+{
+	struct loopif *loopif = (struct loopif *)ifdev;
+	struct pbuf *pbuf, **pnext;
+	struct ifdev *oifdev;
+	struct netif *netif;
+	uint32_t oifindex;
+	unsigned int count;
+	static int warned = FALSE;
+
+	count = 0;
+
+	while ((pbuf = loopif->loopif_head) != NULL) {
+		/*
+		 * Prevent endless loops.  Keep in mind that packets may be
+		 * added to the queue as part of processing packets from the
+		 * queue here, so the queue itself will never reach this
+		 * length.  As such the limit can (and must) be fairly high.
+		 *
+		 * In any case, if this warning is shown, that basically means
+		 * that a bug in lwIP has been triggered.  There should be no
+		 * such bugs, so if there are, they should be fixed in lwIP.
+		 */
+		if (count++ == LOOPIF_LIMIT) {
+			if (!warned) {
+				printf("LWIP: excess loopback traffic, "
+				    "throttling output\n");
+				warned = TRUE;
+			}
+
+			break;
+		}
+
+		pnext = pchain_end(pbuf);
+
+		if ((loopif->loopif_head = *pnext) == NULL)
+			loopif->loopif_tailp = &loopif->loopif_head;
+		*pnext = NULL;
+
+		/*
+		 * Get the original interface for the packet, which if non-zero
+		 * must also be used to pass the packet back to.  The interface
+		 * should still exist in all cases, but better safe than sorry.
+		 */
+		memcpy(&oifindex, pbuf->payload, sizeof(oifindex));
+
+		util_pbuf_header(pbuf, -(int)sizeof(oifindex));
+
+		if (oifindex != 0 &&
+		    (oifdev = ifdev_get_by_index(oifindex)) != NULL)
+			netif = ifdev_get_netif(oifdev);
+		else
+			netif = NULL;
+
+		/*
+		 * Loopback devices hand packets to BPF on output only.  Doing
+		 * so on input as well would duplicate all captured packets.
+		 */
+		ifdev_input(ifdev, pbuf, netif, FALSE /*to_bpf*/);
+	}
+}
+
+/*
+ * Process a packet as output on a loopback interface.  Packets cannot be
+ * passed back into lwIP right away, nor can the original packets be passed
+ * back into lwIP.  Therefore, make a copy of the packet, and pass it back to
+ * lwIP at the end of the current message loop iteration.
+ */
+static err_t
+loopif_output(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif)
+{
+	struct loopif *loopif = (struct loopif *)ifdev;
+	struct ifdev *oifdev;
+	struct pbuf *pcopy;
+	uint32_t oifindex;
+
+	/* Reject oversized packets immediately.  This should not happen. */
+	if (pbuf->tot_len > UINT16_MAX - sizeof(oifindex)) {
+		printf("LWIP: attempt to send oversized loopback packet\n");
+
+		return ERR_MEM;
+	}
+
+	/*
+	 * If the service is low on memory, this is a likely place where
+	 * allocation failures will occur.  Thus, do not print anything here.
+	 * The user can diagnose such problems with interface statistics.
+	 */
+	pcopy = pchain_alloc(PBUF_RAW, sizeof(oifindex) + pbuf->tot_len);
+	if (pcopy == NULL) {
+		ifdev_output_drop(ifdev);
+
+		return ERR_MEM;
+	}
+
+	/*
+	 * If the packet was purposely diverted from a non-loopback interface
+	 * to this interface, we have to remember the original interface, so
+	 * that we can pass back the packet to that interface as well.  If we
+	 * don't, packets to link-local addresses assigned to non-loopback
+	 * interfaces will not be processed correctly.
+	 */
+	if (netif != NULL) {
+		oifdev = netif_get_ifdev(netif);
+		oifindex = ifdev_get_index(oifdev);
+	} else
+		oifindex = 0;
+
+	assert(pcopy->len >= sizeof(oifindex));
+
+	memcpy(pcopy->payload, &oifindex, sizeof(oifindex));
+
+	util_pbuf_header(pcopy, -(int)sizeof(oifindex));
+
+	if (pbuf_copy(pcopy, pbuf) != ERR_OK)
+		panic("unexpected pbuf copy failure");
+
+	pcopy->flags |= pbuf->flags & (PBUF_FLAG_LLMCAST | PBUF_FLAG_LLBCAST);
+
+	util_pbuf_header(pcopy, sizeof(oifindex));
+
+	*loopif->loopif_tailp = pcopy;
+	loopif->loopif_tailp = pchain_end(pcopy);
+
+	return ERR_OK;
+}
+
+/*
+ * Initialization function for a loopback-type netif interface, called from
+ * lwIP at interface creation time.
+ */
+static err_t
+loopif_init_netif(struct ifdev * ifdev, struct netif * netif)
+{
+
+	netif->name[0] = 'l';
+	netif->name[1] = 'o';
+
+	/*
+	 * FIXME: unfortunately, lwIP does not allow one to enable multicast on
+	 * an interface without also enabling multicast management traffic
+	 * (that is, IGMP and MLD).  Thus, for now, joining multicast groups
+	 * and assigning local IPv6 addresses will incur such traffic even on
+	 * loopback interfaces.  For now this is preferable over not supporting
+	 * multicast on loopback interfaces at all.
+	 */
+	netif->flags |= NETIF_FLAG_IGMP | NETIF_FLAG_MLD6;
+
+	NETIF_SET_CHECKSUM_CTRL(netif, loopif_cksum_flags);
+
+	return ERR_OK;
+}
+
+/*
+ * Create a new loopback device.
+ */
+static int
+loopif_create(const char * name)
+{
+	struct loopif *loopif;
+
+	/* Find a free loopback interface slot, if available. */
+	if (TAILQ_EMPTY(&loopif_freelist))
+		return ENOBUFS;
+
+	loopif = TAILQ_FIRST(&loopif_freelist);
+	TAILQ_REMOVE(&loopif_freelist, loopif, loopif_next);
+
+	/* Initialize the loopif structure. */
+	TAILQ_INSERT_HEAD(&loopif_activelist, loopif, loopif_next);
+
+	loopif->loopif_head = NULL;
+	loopif->loopif_tailp = &loopif->loopif_head;
+
+	/*
+	 * For simplicity and efficiency, we do not prepend the address family
+	 * (IPv4/IPv6) to the packet for BPF, which means our loopback devices
+	 * are of type DLT_RAW rather than (NetBSD's) DLT_NULL.
+	 */
+	ifdev_add(&loopif->loopif_ifdev, name, IFF_LOOPBACK | IFF_MULTICAST,
+	    IFT_LOOP, 0 /*hdrlen*/, 0 /*addrlen*/, DLT_RAW, LOOPIF_MAX_MTU,
+	    0 /*nd6flags*/, &loopif_ops);
+
+	ifdev_update_link(&loopif->loopif_ifdev, LINK_STATE_UP);
+
+	return OK;
+}
+
+/*
+ * Destroy an existing loopback device.
+ */
+static int
+loopif_destroy(struct ifdev * ifdev)
+{
+	struct loopif *loopif = (struct loopif *)ifdev;
+	struct pbuf *pbuf, **pnext;
+	int r;
+
+	/*
+	 * The ifdev module may refuse to remove this interface if it is the
+	 * loopback interface used to loop back packets for other interfaces.
+	 */
+	if ((r = ifdev_remove(&loopif->loopif_ifdev)) != OK)
+		return r;
+
+	/*
+	 * Clean up.  The loopback queue can be non-empty only if we have been
+	 * throttling in case of a feedback loop.
+	 */
+	while ((pbuf = loopif->loopif_head) != NULL) {
+		pnext = pchain_end(pbuf);
+
+		if ((loopif->loopif_head = *pnext) == NULL)
+			loopif->loopif_tailp = &loopif->loopif_head;
+		*pnext = NULL;
+
+		pbuf_free(pbuf);
+	}
+
+	TAILQ_REMOVE(&loopif_activelist, loopif, loopif_next);
+
+	TAILQ_INSERT_HEAD(&loopif_freelist, loopif, loopif_next);
+
+	return OK;
+}
+
+/*
+ * Set NetBSD-style interface flags (IFF_) for a loopback interface.
+ */
+static int
+loopif_set_ifflags(struct ifdev * ifdev, unsigned int ifflags)
+{
+	struct loopif *loopif = (struct loopif *)ifdev;
+
+	/*
+	 * Only the IFF_UP flag may be set and cleared.  We adjust the
+	 * IFF_RUNNING flag immediately based on this flag.  This is a bit
+	 * dangerous, but the caller takes this possibility into account.
+	 */
+	if ((ifflags & ~IFF_UP) != 0)
+		return EINVAL;
+
+	if (ifflags & IFF_UP)
+		ifdev_update_ifflags(&loopif->loopif_ifdev,
+		    ifdev_get_ifflags(&loopif->loopif_ifdev) | IFF_RUNNING);
+	else
+		ifdev_update_ifflags(&loopif->loopif_ifdev,
+		    ifdev_get_ifflags(&loopif->loopif_ifdev) & ~IFF_RUNNING);
+
+	return OK;
+}
+
+/*
+ * Set the Maximum Transmission Unit for this interface.  Return TRUE if the
+ * new value is acceptable, in which case the caller will do the rest.  Return
+ * FALSE otherwise.
+ */
+static int
+loopif_set_mtu(struct ifdev * ifdev __unused, unsigned int mtu)
+{
+
+	return (mtu <= LOOPIF_MAX_MTU);
+}
+
+static const struct ifdev_ops loopif_ops = {
+	.iop_init = loopif_init_netif,
+	.iop_input = ip_input,
+	.iop_output = loopif_output,
+	.iop_poll = loopif_poll,
+	.iop_set_ifflags = loopif_set_ifflags,
+	.iop_set_mtu = loopif_set_mtu,
+	.iop_destroy = loopif_destroy,
+};
+
+/*
+ * Set and/or retrieve a per-protocol loopback checksumming option through
+ * sysctl(7).
+ */
+ssize_t
+loopif_cksum(struct rmib_call * call, struct rmib_node * node __unused,
+	struct rmib_oldp * oldp, struct rmib_newp * newp)
+{
+	struct loopif *loopif;
+	unsigned int flags;
+	int r, val;
+
+	/*
+	 * The third name field is the protocol.  We ignore the domain (the
+	 * second field), thus sharing settings between PF_INET and PF_INET6.
+	 * This is necessary because lwIP does not support TCP/UDP checksumming
+	 * flags on a per-domain basis.
+	 */
+	switch (call->call_oname[2]) {
+	case IPPROTO_IP:
+		flags = NETIF_CHECKSUM_GEN_IP | NETIF_CHECKSUM_CHECK_IP;
+		break;
+	case IPPROTO_UDP:
+		flags = NETIF_CHECKSUM_GEN_UDP | NETIF_CHECKSUM_CHECK_UDP;
+		break;
+	case IPPROTO_TCP:
+		flags = NETIF_CHECKSUM_GEN_TCP | NETIF_CHECKSUM_CHECK_TCP;
+		break;
+	default:
+		return EINVAL;
+	}
+
+	/* Copy out the old (current) checksumming option. */
+	if (oldp != NULL) {
+		val = !!(loopif_cksum_flags & flags);
+
+		if ((r = rmib_copyout(oldp, 0, &val, sizeof(val))) < 0)
+			return r;
+	}
+
+	if (newp != NULL) {
+		if ((r = rmib_copyin(newp, &val, sizeof(val))) != OK)
+			return r;
+
+		if (val)
+			loopif_cksum_flags |= flags;
+		else
+			loopif_cksum_flags &= ~flags;
+
+		/*
+		 * Apply the new checksum flags to all loopback interfaces.
+		 * Technically, this may result in dropped packets when
+		 * enabling checksumming on a throttled loopif, but that is a
+		 * case so rare and unimportant that we ignore it.
+		 */
+		TAILQ_FOREACH(loopif, &loopif_activelist, loopif_next) {
+			NETIF_SET_CHECKSUM_CTRL(loopif_get_netif(loopif),
+			    loopif_cksum_flags);
+		}
+	}
+
+	/* Return the length of the node. */
+	return sizeof(val);
+}
--- a/minix/net/lwip/lwip.c
+++ b/minix/net/lwip/lwip.c
@ -0,0 +1,382 @@
+/* LWIP service - lwip.c - main program and dispatch code */
+
+#include "lwip.h"
+#include "tcpisn.h"
+#include "mcast.h"
+#include "ethif.h"
+#include "rtsock.h"
+#include "route.h"
+#include "bpfdev.h"
+
+#include "lwip/init.h"
+#include "lwip/sys.h"
+#include "lwip/timeouts.h"
+#include "arch/cc.h"
+
+static int running, recheck_timer;
+static minix_timer_t lwip_timer;
+
+static void expire_lwip_timer(int);
+
+/*
+ * Return the system uptime in milliseconds.  Also remember that lwIP retrieved
+ * the system uptime during this call, so that we know to check for timer
+ * updates at the end of the current iteration of the message loop.
+ */
+uint32_t
+sys_now(void)
+{
+
+	recheck_timer = TRUE;
+
+	/* TODO: avoid 64-bit arithmetic if possible. */
+	return (uint32_t)(((uint64_t)getticks() * 1000) / sys_hz());
+}
+
+/*
+ * Check if and when lwIP has its next timeout, and set or cancel our timer
+ * accordingly.
+ */
+static void
+set_lwip_timer(void)
+{
+	uint32_t next_timeout;
+	clock_t ticks;
+
+	/* Ask lwIP when the next alarm is supposed to go off, if any. */
+	next_timeout = sys_timeouts_sleeptime();
+
+	/*
+	 * Set or update the lwIP timer.  We rely on set_timer() asking the
+	 * kernel for an alarm only if the timeout is different from the one we
+	 * gave it last time (if at all).  However, due to conversions between
+	 * absolute and relative times, and the fact that we cannot guarantee
+	 * that the uptime itself does not change while executing these
+	 * routines, set_timer() will sometimes be issuing a kernel call even
+	 * if the alarm has not changed.  Not a huge deal, but fixing this will
+	 * require a different interface to lwIP and/or the timers library.
+	 */
+	if (next_timeout != (uint32_t)-1) {
+		/*
+		 * Round up the next timeout (which is in milliseconds) to the
+		 * number of clock ticks to add to the current time.  Avoid any
+		 * potential for overflows, no matter how unrealistic..
+		 */
+		if (next_timeout > TMRDIFF_MAX / sys_hz())
+			ticks = TMRDIFF_MAX;
+		else
+			ticks = (next_timeout * sys_hz() + 999) / 1000;
+
+		set_timer(&lwip_timer, ticks, expire_lwip_timer, 0 /*unused*/);
+	} else
+		cancel_timer(&lwip_timer);	/* not really needed.. */
+}
+
+/*
+ * The timer for lwIP timeouts has gone off.  Check timeouts, and possibly set
+ * a new timer.
+ */
+static void
+expire_lwip_timer(int arg __unused)
+{
+
+	/* Let lwIP do its work. */
+	sys_check_timeouts();
+
+	/*
+	 * See if we have to update our timer for the next lwIP timer.  Doing
+	 * this here, rather than from the main loop, avoids one kernel call.
+	 */
+	set_lwip_timer();
+
+	recheck_timer = FALSE;
+}
+
+/*
+ * Check whether we should adjust our local timer based on a change in the next
+ * lwIP timeout.
+ */
+static void
+check_lwip_timer(void)
+{
+
+	/*
+	 * We make the assumption that whenever lwIP starts a timer, it will
+	 * need to retrieve the current time.  Thus, whenever sys_now() is
+	 * called, we set the 'recheck_timer' flag.  Here, we check whether to
+	 * (re)set our lwIP timer only if the flag is set.  As a result, we do
+	 * not have to mess with timers for literally every incoming message.
+	 *
+	 * When lwIP stops a timer, it does not call sys_now(), and thus, we
+	 * may miss such updates.  However, timers being stopped should be rare
+	 * and getting too many alarm messages is not a big deal.
+	 */
+	if (!recheck_timer)
+		return;
+
+	set_lwip_timer();
+
+	/* Reset the flag for the next message loop iteration. */
+	recheck_timer = FALSE;
+}
+
+/*
+ * Return a random number, for use by lwIP.
+ */
+uint32_t
+lwip_hook_rand(void)
+{
+
+	/*
+	 * The current known uses of this hook are for selection of initial
+	 * TCP/UDP port numbers and for multicast-related timer randomness.
+	 * The former case exists only to avoid picking the same starting port
+	 * numbers after a reboot.  After that, simple sequential iteration of
+	 * the port numbers is used.  The latter case varies the response time
+	 * for sending multicast messages.  Thus, none of the current uses of
+	 * this function require proper randomness, and so we use the simplest
+	 * approach, with time-based initialization to cover the reboot case.
+	 * The sequential port number selection could be improved upon, but
+	 * such an extension would probably bypass this hook anyway.
+	 */
+	return lrand48();
+}
+
+/*
+ * Create a new socket, with the given domain, type, and protocol, for the user
+ * process identified by 'user_endpt'.  On success, return the new socket's
+ * identifier, with the libsockevent socket stored in 'sock' and an operations
+ * table stored in 'ops'.  On failure, return a negative error code.
+ */
+static sockid_t
+alloc_socket(int domain, int type, int protocol, endpoint_t user_endpt,
+	struct sock ** sock, const struct sockevent_ops **ops)
+{
+
+	switch (domain) {
+	case PF_INET:
+#ifdef INET6
+	case PF_INET6:
+#endif /* INET6 */
+		switch (type) {
+		case SOCK_STREAM:
+			return tcpsock_socket(domain, protocol, sock, ops);
+
+		case SOCK_DGRAM:
+			return udpsock_socket(domain, protocol, sock, ops);
+
+		case SOCK_RAW:
+			if (!util_is_root(user_endpt))
+				return EACCES;
+
+			return rawsock_socket(domain, protocol, sock, ops);
+
+		default:
+			return EPROTOTYPE;
+		}
+
+	case PF_ROUTE:
+		return rtsock_socket(type, protocol, sock, ops);
+
+	case PF_LINK:
+		return lnksock_socket(type, protocol, sock, ops);
+
+	default:
+		/* This means that the service has been misconfigured. */
+		printf("socket() with unsupported domain %d\n", domain);
+
+		return EAFNOSUPPORT;
+	}
+}
+
+/*
+ * Initialize the service.
+ */
+static int
+init(int type __unused, sef_init_info_t * init __unused)
+{
+
+	/*
+	 * Initialize the random number seed.  See the lwip_hook_rand() comment
+	 * on why this weak random number source is currently sufficient.
+	 */
+	srand48(clock_time(NULL));
+
+	/* Initialize the lwIP library. */
+	lwip_init();
+
+	/* Initialize the socket events library. */
+	sockevent_init(alloc_socket);
+
+	/* Initialize various helper modules. */
+	mempool_init();
+	tcpisn_init();
+	mcast_init();
+
+	/* Initialize the high-level socket modules. */
+	ipsock_init();
+	tcpsock_init();
+	udpsock_init();
+	rawsock_init();
+
+	/* Initialize the various network interface modules. */
+	ifdev_init();
+	loopif_init();
+	ethif_init();
+
+	/* Initialize the network device driver module. */
+	ndev_init();
+
+	/* Initialize the low-level socket modules. */
+	rtsock_init();
+	lnksock_init();
+
+	/* Initialize the routing module. */
+	route_init();
+
+	/* Initialize other device modules. */
+	bpfdev_init();
+
+	/*
+	 * Initialize the MIB module, after all other modules have registered
+	 * their subtrees with this module.
+	 */
+	mibtree_init();
+
+	/*
+	 * After everything else has been initialized, set up the default
+	 * configuration - in particular, a loopback interface.
+	 */
+	ifconf_init();
+
+	/*
+	 * Initialize the master timer for all the lwIP timers.  Just in case
+	 * lwIP starts a timer right away, perform a first check upon entry of
+	 * the message loop.
+	 */
+	init_timer(&lwip_timer);
+
+	recheck_timer = TRUE;
+
+	running = TRUE;
+
+	return OK;
+}
+
+/*
+ * Perform initialization using the System Event Framework (SEF).
+ */
+static void
+startup(void)
+{
+
+	sef_setcb_init_fresh(init);
+	/*
+	 * This service requires stateless restarts, in that several parts of
+	 * the system (including VFS and drivers) expect that if restarted,
+	 * this service comes back up with a new endpoint.  Therefore, do not
+	 * set a _restart callback here.
+	 *
+	 * TODO: support for live update.
+	 *
+	 * TODO: support for immediate shutdown if no sockets are in use, as
+	 * also done by UDS.  For now, we never shut down immediately, giving
+	 * other processes the opportunity to close sockets on system shutdown.
+	 */
+
+	sef_startup();
+}
+
+/*
+ * The lwIP-based TCP/IP sockets driver.
+ */
+int
+main(void)
+{
+	message m;
+	int r, ipc_status;
+
+	startup();
+
+	while (running) {
+		/*
+		 * For various reasons, the loopback interface does not pass
+		 * packets back into the stack right away.  Instead, it queues
+		 * them up for later processing.  We do that processing here.
+		 */
+		ifdev_poll();
+
+		/*
+		 * Unfortunately, lwIP does not tell us when it starts or stops
+		 * timers.  This means that we have to check ourselves every
+		 * time we have called into lwIP.  For simplicity, we perform
+		 * the check here.
+		 */
+		check_lwip_timer();
+
+		if ((r = sef_receive_status(ANY, &m, &ipc_status)) != OK) {
+			if (r == EINTR)
+				continue;	/* sef_cancel() was called */
+
+			panic("sef_receive_status failed: %d", r);
+		}
+
+		/* Process the received message. */
+		if (is_ipc_notify(ipc_status)) {
+			switch (m.m_source) {
+			case CLOCK:
+				expire_timers(m.m_notify.timestamp);
+
+				break;
+
+			case DS_PROC_NR:
+				/* Network drivers went up and/or down. */
+				ndev_check();
+
+				break;
+
+			default:
+				printf("unexpected notify from %d\n",
+				    m.m_source);
+			}
+
+			continue;
+		}
+
+		switch (m.m_source) {
+		case MIB_PROC_NR:
+			rmib_process(&m, ipc_status);
+
+			break;
+
+		case VFS_PROC_NR:
+			/* Is this a socket device request? */
+			if (IS_SDEV_RQ(m.m_type)) {
+				sockevent_process(&m, ipc_status);
+
+				break;
+			}
+
+			/* Is this a character (or block) device request? */
+			if (IS_CDEV_RQ(m.m_type) || IS_BDEV_RQ(m.m_type)) {
+				bpfdev_process(&m, ipc_status);
+
+				break;
+			}
+
+			/* FALLTHROUGH */
+		default:
+			/* Is this a network device driver response? */
+			if (IS_NDEV_RS(m.m_type)) {
+				ndev_process(&m, ipc_status);
+
+				break;
+			}
+
+			printf("unexpected message %d from %d\n",
+			    m.m_type, m.m_source);
+		}
+	}
+
+	return 0;
+}
--- a/minix/net/lwip/lwip.conf
+++ b/minix/net/lwip/lwip.conf
@ -0,0 +1,10 @@
+service lwip
+{
+	domain
+		INET INET6 ROUTE LINK
+	;
+	system KILL;	# for SIGPIPE
+	ipc
+		SYSTEM vfs rs vm mib
+	;
+};
--- a/minix/net/lwip/lwip.h
+++ b/minix/net/lwip/lwip.h
@ -0,0 +1,130 @@
+#ifndef MINIX_NET_LWIP_LWIP_H
+#define MINIX_NET_LWIP_LWIP_H
+
+#include <minix/drivers.h>
+#include <minix/sockevent.h>
+#include <minix/rmib.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <net/bpf.h>
+
+#include "lwip/ip.h"
+#include "lwiphooks.h"
+
+#include "addr.h"
+#include "ipsock.h"
+#include "ifdev.h"
+#include "util.h"
+
+/*
+ * The standard sockaddr_dl is an absolute pain, because the actual structure
+ * is dynamically sized, while the standard definition is neither the minimum
+ * nor the maximum size.  We use our own version, which uses the maximum size
+ * that we will ever produce and accept.  This greatly simplifies dealing with
+ * this structure while also limiting stack usage a bit.
+ */
+struct sockaddr_dlx {
+	uint8_t		sdlx_len;	/* actual length of this structure */
+	sa_family_t	sdlx_family;	/* address family, always AF_LINK */
+	uint16_t	sdlx_index;	/* interface index */
+	uint8_t		sdlx_type;	/* interface type (IFT_) */
+	uint8_t		sdlx_nlen;	/* interface name length, w/o nul */
+	uint8_t		sdlx_alen;	/* link-layer address length */
+	uint8_t		sdlx_slen;	/* selector length, always 0 */
+	uint8_t		sdlx_data[IFNAMSIZ + NETIF_MAX_HWADDR_LEN];
+};
+
+STATIC_SOCKADDR_MAX_ASSERT(sockaddr_in);
+STATIC_SOCKADDR_MAX_ASSERT(sockaddr_in6);
+STATIC_SOCKADDR_MAX_ASSERT(sockaddr_dlx);
+
+/* This is our own, much smaller internal version of sockaddr_storage. */
+union sockaddr_any {
+	struct sockaddr sa;
+	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
+	struct sockaddr_dlx sdlx;
+};
+
+/* Number of bits in each of the types of IP addresses. */
+#define IP4_BITS	32		/* number of bits in an IPv4 address */
+#define IP6_BITS	128		/* number of bits in an IPv6 address */
+
+/*
+ * Each socket module maintains its own set of sockets, but all sockets must be
+ * given globally unique identifiers.  Therefore, we use these modifier masks,
+ * which are bitwise OR'ed with the per-module socket identifiers.
+ */
+#define SOCKID_TCP	0x00000000
+#define SOCKID_UDP	0x00100000
+#define SOCKID_RAW	0x00200000
+#define SOCKID_RT	0x00400000
+#define SOCKID_LNK	0x00800000
+
+/*
+ * Static remote MIB node identifiers for nodes that are dynamically numbered
+ * on NetBSD, because they do not have a corresponding protocol family number.
+ */
+#define NET_INTERFACES	(PF_MAX)	/* net.interfaces (TODO) */
+#define NET_BPF		(PF_MAX + 1)	/* net.bpf */
+
+#define ROOT_EUID	0		/* effective user ID of superuser */
+
+/*
+ * Function declarations.  Modules with more extended interfaces have their own
+ * header files.
+ */
+
+/* mempool.c */
+void mempool_init(void);
+unsigned int mempool_cur_buffers(void);
+unsigned int mempool_max_buffers(void);
+
+/* pchain.c */
+struct pbuf **pchain_end(struct pbuf * pbuf);
+size_t pchain_size(struct pbuf * pbuf);
+
+/* addrpol.c */
+int addrpol_get_label(const ip_addr_t * ipaddr);
+int addrpol_get_scope(const ip_addr_t * ipaddr, int is_src);
+
+/* tcpsock.c */
+void tcpsock_init(void);
+sockid_t tcpsock_socket(int domain, int protocol, struct sock ** sock,
+	const struct sockevent_ops ** ops);
+
+/* udpsock.c */
+void udpsock_init(void);
+sockid_t udpsock_socket(int domain, int protocol, struct sock ** sock,
+	const struct sockevent_ops ** ops);
+
+/* rawsock.c */
+void rawsock_init(void);
+sockid_t rawsock_socket(int domain, int protocol, struct sock ** sock,
+	const struct sockevent_ops ** ops);
+
+/* loopif.c */
+void loopif_init(void);
+ssize_t loopif_cksum(struct rmib_call * call, struct rmib_node * node,
+	struct rmib_oldp * oldp, struct rmib_newp * newp);
+
+/* lnksock.c */
+void lnksock_init(void);
+sockid_t lnksock_socket(int type, int protocol, struct sock ** sock,
+	const struct sockevent_ops ** ops);
+
+/* mibtree.c */
+void mibtree_init(void);
+void mibtree_register_inet(int domain, int protocol, struct rmib_node * node);
+void mibtree_register_lwip(struct rmib_node * node);
+
+/* ifconf.c */
+void ifconf_init(void);
+int ifconf_ioctl(struct sock * sock, unsigned long request,
+	const struct sockdriver_data * data, endpoint_t user_endpt);
+
+/* bpf_filter.c */
+u_int bpf_filter_ext(const struct bpf_insn * pc, const struct pbuf * pbuf,
+	const u_char * packet, u_int total, u_int len);
+
+#endif /* !MINIX_NET_LWIP_LWIP_H */
--- a/minix/net/lwip/mcast.c
+++ b/minix/net/lwip/mcast.c
@ -0,0 +1,283 @@
+/* LWIP service - mcast.c - per-socket multicast membership tracking */
+/*
+ * Each socket has a linked list of multicast groups of which it is a member.
+ * The linked list consists of 'mcast_member' elements.  There is both a global
+ * limit (the number of elements in 'mcast_array') and a per-socket limit on
+ * group membership.  Since multiple sockets may join the same multicast
+ * groups, there is not a one-to-one relationship between our membership
+ * structures and the lwIP IGMP/MLD membership structures.  Moreover, linking
+ * to the latter structures directly is not intended by lwIP, so we have to
+ * keep our own tracking independent, which in particular means that we have to
+ * make a copy of the multicast group address.
+ *
+ * We currently put no effort into saving memory on storing that group address.
+ * Optimization is complicated by the fact that we have to be able to remove
+ * membership structures when their corresponding interface disappears, which
+ * currently involves removal without knowing about the corresponding socket,
+ * and therefore the socket's address family.  All of this can be changed.
+ *
+ * There is no function to test whether a particular socket is a member of a
+ * multicast group.  The pktsock module currently makes the assumption that if
+ * a socket has been joined to any multicast groups, or set any multicast
+ * options, the application is multicast aware and therefore able to figure out
+ * whether it is interested in particular packets, and so we do not filter
+ * incoming packets against the receiving socket's multicast list.  This should
+ * be more or less in line with what W. Richard Stevens say that the BSDs do.
+ */
+
+#include "lwip.h"
+#include "mcast.h"
+
+#include "lwip/igmp.h"
+#include "lwip/mld6.h"
+
+/*
+ * The per-socket limit on group membership.  In theory, the limit should be
+ * high enough that a single socket can join a particular multicast group on
+ * all interfaces that support multicast.  In practice, we set it a bit lower
+ * to prevent one socket from using up half of the entries per address family.
+ * Setting it to IP_MAX_MEMBERSHIPS is definitely excessive right now..
+ */
+#define MAX_GROUPS_PER_SOCKET	8
+
+static struct mcast_member {
+	LIST_ENTRY(mcast_member) mm_next;	/* next in socket, free list */
+	struct ifdev * mm_ifdev;		/* interface (NULL: free) */
+	ip_addr_t mm_group;			/* group address */
+} mcast_array[NR_IPV4_MCAST_GROUP + NR_IPV6_MCAST_GROUP];
+
+static LIST_HEAD(, mcast_member) mcast_freelist;
+
+/*
+ * Initialize the per-socket multicast membership module.
+ */
+void
+mcast_init(void)
+{
+	unsigned int slot;
+
+	/* Initialize the list of free multicast membership entries. */
+	LIST_INIT(&mcast_freelist);
+
+	for (slot = 0; slot < __arraycount(mcast_array); slot++) {
+		mcast_array[slot].mm_ifdev = NULL;
+
+		LIST_INSERT_HEAD(&mcast_freelist, &mcast_array[slot], mm_next);
+	}
+}
+
+/*
+ * Reset the multicast head for a socket.  The socket must not have any
+ * previous multicast group memberships.
+ */
+void
+mcast_reset(struct mcast_head * mcast_head)
+{
+
+	LIST_INIT(&mcast_head->mh_list);
+}
+
+/*
+ * Attempt to add a per-socket multicast membership association.  The given
+ * 'mcast_head' pointer is part of a socket.  The 'group' parameter is the
+ * multicast group to join.  It is a properly zoned address, but has not been
+ * checked in any other way.  If 'ifdev' is not NULL, it is the interface for
+ * the membership; if it is NULL, an interface will be selected using routing.
+ * Return OK if the membership has been successfully removed, or a negative
+ * error code otherwise.
+ */
+int
+mcast_join(struct mcast_head * mcast_head, const ip_addr_t * group,
+	struct ifdev * ifdev)
+{
+	struct mcast_member *mm;
+	struct netif *netif;
+	unsigned int count;
+	err_t err;
+
+	/*
+	 * The callers of this function perform only checks that depend on the
+	 * address family.  We check everything else here.
+	 */
+	if (!ip_addr_ismulticast(group))
+		return EADDRNOTAVAIL;
+
+	if (!addr_is_valid_multicast(group))
+		return EINVAL;
+
+	/*
+	 * If no interface was specified, pick one with a routing query.  Note
+	 * that scoped IPv6 addresses do require an interface to be specified.
+	 */
+	if (ifdev == NULL) {
+		netif = ip_route(IP46_ADDR_ANY(IP_GET_TYPE(group)), group);
+
+		if (netif == NULL)
+			return EHOSTUNREACH;
+
+		ifdev = netif_get_ifdev(netif);
+	}
+
+	assert(ifdev != NULL);
+	assert(!IP_IS_V6(group) ||
+	    !ip6_addr_lacks_zone(ip_2_ip6(group), IP6_MULTICAST));
+
+	/* The interface must support multicast. */
+	if (!(ifdev_get_ifflags(ifdev) & IFF_MULTICAST))
+		return EADDRNOTAVAIL;
+
+	/*
+	 * First see if this socket is already joined to the given group, which
+	 * is an error.  While looking, also count the number of groups the
+	 * socket has joined already, to enforce the per-socket limit.
+	 */
+	count = 0;
+
+	LIST_FOREACH(mm, &mcast_head->mh_list, mm_next) {
+		if (mm->mm_ifdev == ifdev && ip_addr_cmp(&mm->mm_group, group))
+			return EEXIST;
+
+		count++;
+	}
+
+	if (count >= MAX_GROUPS_PER_SOCKET)
+		return ENOBUFS;
+
+	/* Do we have a free membership structure available? */
+	if (LIST_EMPTY(&mcast_freelist))
+		return ENOBUFS;
+
+	/*
+	 * Nothing can go wrong as far as we are concerned.  Ask lwIP to join
+	 * the multicast group.  This may result in a multicast list update at
+	 * the driver end.
+	 */
+	netif = ifdev_get_netif(ifdev);
+
+	if (IP_IS_V6(group))
+		err = mld6_joingroup_netif(netif, ip_2_ip6(group));
+	else
+		err = igmp_joingroup_netif(netif, ip_2_ip4(group));
+
+	if (err != ERR_OK)
+		return util_convert_err(err);
+
+	/*
+	 * Success.  Allocate, initialize, and attach a membership structure to
+	 * the socket.
+	 */
+	mm = LIST_FIRST(&mcast_freelist);
+
+	LIST_REMOVE(mm, mm_next);
+
+	mm->mm_ifdev = ifdev;
+	mm->mm_group = *group;
+
+	LIST_INSERT_HEAD(&mcast_head->mh_list, mm, mm_next);
+
+	return OK;
+}
+
+/*
+ * Free the given per-socket multicast membership structure, which must
+ * previously have been associated with a socket.  If 'leave_group' is set,
+ * also tell lwIP to leave the corresponding multicast group.
+ */
+static void
+mcast_free(struct mcast_member * mm, int leave_group)
+{
+	struct netif *netif;
+	err_t err;
+
+	assert(mm->mm_ifdev != NULL);
+
+	if (leave_group) {
+		netif = ifdev_get_netif(mm->mm_ifdev);
+
+		if (IP_IS_V6(&mm->mm_group))
+			err = mld6_leavegroup_netif(netif,
+			    ip_2_ip6(&mm->mm_group));
+		else
+			err = igmp_leavegroup_netif(netif,
+			    ip_2_ip4(&mm->mm_group));
+
+		if (err != ERR_OK)
+			panic("lwIP multicast membership desynchronization");
+	}
+
+	LIST_REMOVE(mm, mm_next);
+
+	mm->mm_ifdev = NULL;
+
+	LIST_INSERT_HEAD(&mcast_freelist, mm, mm_next);
+}
+
+/*
+ * Attempt to remove a per-socket multicast membership association.  The given
+ * 'mcast_head' pointer is part of a socket.  The 'group' parameter is the
+ * multicast group to leave.  It is a properly zoned address, but has not been
+ * checked in any other way.  If 'ifdev' is not NULL, it is the interface of
+ * the membership; if it is NULL, a membership matching the address on any
+ * interface will suffice.  As such, the parameter requirements mirror those of
+ * mcast_join().  Return OK if the membership has been successfully removed, or
+ * a negative error code otherwise.
+ */
+int
+mcast_leave(struct mcast_head * mcast_head, const ip_addr_t * group,
+	struct ifdev * ifdev)
+{
+	struct mcast_member *mm;
+
+	/*
+	 * Look up a matching entry.  The fact that we must find a match for
+	 * the given address and interface, keeps us from having to perform
+	 * various other checks, such as whether the given address is a
+	 * multicast address at all.  The exact error codes are not specified.
+	 */
+	LIST_FOREACH(mm, &mcast_head->mh_list, mm_next) {
+		if ((ifdev == NULL || mm->mm_ifdev == ifdev) &&
+		    ip_addr_cmp(&mm->mm_group, group))
+			break;
+	}
+
+	if (mm == NULL)
+		return ESRCH;
+
+	mcast_free(mm, TRUE /*leave_group*/);
+
+	return OK;
+}
+
+/*
+ * Remove all per-socket multicast membership associations of the given socket.
+ * This function is called when the socket is closed.
+ */
+void
+mcast_leave_all(struct mcast_head * mcast_head)
+{
+	struct mcast_member *mm;
+
+	while (!LIST_EMPTY(&mcast_head->mh_list)) {
+		mm = LIST_FIRST(&mcast_head->mh_list);
+
+		mcast_free(mm, TRUE /*leave_group*/);
+	}
+}
+
+/*
+ * The given interface is about to disappear.  Remove and free any per-socket
+ * multicast membership structures associated with the interface, without
+ * leaving the multicast group itself (as that will happen a bit later anyway).
+ */
+void
+mcast_clear(struct ifdev * ifdev)
+{
+	unsigned int slot;
+
+	for (slot = 0; slot < __arraycount(mcast_array); slot++) {
+		if (mcast_array[slot].mm_ifdev != ifdev)
+			continue;
+
+		mcast_free(&mcast_array[slot], FALSE /*leave_group*/);
+	}
+}
--- a/minix/net/lwip/mcast.h
+++ b/minix/net/lwip/mcast.h
@ -0,0 +1,21 @@
+#ifndef MINIX_NET_LWIP_MCAST_H
+#define MINIX_NET_LWIP_MCAST_H
+
+struct mcast_member;
+
+struct mcast_head {
+	LIST_HEAD(, mcast_member) mh_list;
+};
+
+#define mcast_isempty(mcast_head) (LIST_EMPTY(&(mcast_head)->mh_list))
+
+void mcast_init(void);
+void mcast_reset(struct mcast_head * mcast_head);
+int mcast_join(struct mcast_head * mcast_head, const ip_addr_t * group,
+	struct ifdev * ifdev);
+int mcast_leave(struct mcast_head * mcast_head, const ip_addr_t * group,
+	struct ifdev * ifdev);
+void mcast_leave_all(struct mcast_head * mcast_head);
+void mcast_clear(struct ifdev * ifdev);
+
+#endif /* !MINIX_NET_LWIP_MCAST_H */
--- a/minix/net/lwip/mempool.c
+++ b/minix/net/lwip/mempool.c
@ -0,0 +1,821 @@
+/* LWIP service - mempool.c - memory pool management and slab allocation */
+/*
+ * This module should be considered a replacement for lwIP's PBUF_POOL and
+ * custom-pools functionality.  lwIP's PBUF_POOL system allows a PBUF_POOL type
+ * allocation for a moderately large amount of memory, for example for a full-
+ * sized packet, to be turned into a chain of "pbuf" buffers, each of a static
+ * size.  Most of lwIP can deal with such pbuf chains, because many other types
+ * of allocations also end up consisting of pbuf chains.  However, lwIP will
+ * never use PBUF_POOL for its own memory allocations, and use PBUF_RAM
+ * allocations instead.  Such PBUF_RAM allocations always return one single
+ * pbuf with a contiguous memory area.  lwIP's custom pools support allows such
+ * PBUF_RAM allocations to draw from user-defined pools of statically allocated
+ * memory, as an alternative to turning such allocations into malloc() calls.
+ *
+ * However, lwIP itself does not offer a way to combine these two pool systems:
+ * the PBUF_POOL buffer pool and the custom pools are completely separate.  We
+ * want to be able to draw both kinds of memory from the same pool.  This is
+ * the first reason that we are using our own memory pools.  The second is
+ * something that lwIP could never offer anyway: we would like to provide a
+ * certain amount of static/preallocated memory for those types of allocations,
+ * but optionally also add a much larger amount of dynamic memory when needed.
+ *
+ * In order to make this module work, we do not use PBUF_POOL anywhere.
+ * Instead, we use chained static-sized PBUF_RAM allocations for all types of
+ * allocations that we manage ourselves--see pchain_alloc().  We tell lwIP to
+ * use the functions in this module to do the malloc-type allocations for those
+ * PBUF_RAM buffers.  As such, this module manages all PBUF_RAM allocations,
+ * both from our own code and from lwIP.  Note that we do still use lwIP's own
+ * pools for various lwIP structures.  We do want to keep the isolation
+ * provided by the use of such pools, even though that means that we have to
+ * provision some of those pools for the worst case, resulting in some memory
+ * overhead that is unnecessary for the common case.
+ *
+ * With PBUF_RAM allocation redirection system in place, this module has to
+ * manage the memory for those allocations.  It does this based on the
+ * assertion that there are three main classes of PBUF_RAM allocation sizes:
+ *
+ * - "large" allocations: these are allocations for up to MEMPOOL_BUFSIZE bytes
+ *   of PBUF_RAM data, where MEMPOOL_BUFSIZE is the allocation granularity that
+ *   we have picked for the individual buffers in larger chains.  It is set to
+ *   512 bytes right now, mainly to keep pbuf chains for full-sized ethernet
+ *   packets short, which has many performance advantages.  Since the pbuf
+ *   header itself also takes some space (16 bytes, right now), this results in
+ *   allocations seen by mempool_malloc() of up to just over 512 bytes.
+ * - "small" allocations: these are allocations mostly for packet headers, as
+ *   needed by lwIP to prepend to (mainly TCP) packet data that we give to it.
+ *   The size of these allocations varies, but most are 76 bytes (80 bytes if
+ *   we ever add VLAN support), plus once again the pbuf header.
+ * - "excessive" allocations: these are allocations larger than the maximum
+ *   we have configured, effectively requesting contiguous memory of (possibly
+ *   far) more than 512 bytes.  We do not make such allocations ourselves, as
+ *   we only ever create pbuf chains.  Thus, any such allocations come from
+ *   lwIP.  There are a few locations in lwIP that attempt to make those kinds
+ *   of allocations, but we replace one important case in the lwIP code with
+ *   a chained allocation, (currently) leaving only one case: allocation of
+ *   ICMP ping reply packets.  In this module, we outright *deny* any excessive
+ *   allocations.  Practically, that means that no replies are generated for
+ *   requests exceeding around 460 bytes, which is in fact not bad, especially
+ *   since we have multicast ICMP ping replying enabled.  If any new cases of
+ *   excessive allocations are added to lwIP in the future, we will have to
+ *   deal with those on a case-by-case basis, but for now this should be all.
+ *
+ * This module caters to the first two types of allocations.  For large buffer
+ * allocations, it provides a standard slab allocator, with a hardcoded slab
+ * size of MEMPOOL_LARGE_COUNT buffers with a 512-byte data area each.  One
+ * slab is allocated at service start-up; additional slabs up to a configured
+ * maximum are allocated on demand.  Once fallen out of use, all but one slabs
+ * will be freed after a while, using a timer.  The current per-slab count of
+ * 512 large buffers, combined with the buffer size of 512 plus the pbuf header
+ * plus a bit of extra overhead, results in about 266 KB per slab.
+ *
+ * For small buffer allocations, there are two facilities.  First, there is a
+ * static pool of small buffers.  This pool currently provides 256 small-sized
+ * buffers, mainly in order to allow packet headers to be produced even in low-
+ * memory conditions.  In addition, small buffers may be formed by allocating
+ * and then splitting up one large buffer.  The module is currently configured
+ * to split one large buffer into four small buffers, which yields a small
+ * buffer size of just over 100 bytes--enough for the packet headers while
+ * leaving little slack on either side.
+ *
+ * It is important to note that large and small buffer allocations are freed up
+ * through the same function, with no information on the original allocation
+ * size.  As a result, we have to distinguish between large and small buffers
+ * using a unified system.  In particular, this module prepends each of its
+ * allocations by a single pointer, which points to a header structure that is
+ * at the very beginning of the slab that contains the allocated buffer.  That
+ * header structure contains information about the type of slab (large or
+ * small) as well as some accounting information used by both types.
+ *
+ * For large-buffer slabs, this header is part of a larger structure with for
+ * example the slab's list of free buffers.  This larger structure is then
+ * followed by the actual buffers in the slab.
+ *
+ * For small-buffer slabs, the header is followed directly by the actual small
+ * buffers.  Thus, when a large buffer is split up into four small buffers, the
+ * data area of that large buffer consists of a small-type slab header and four
+ * small buffers.  The large buffer itself is simply considered in use, as
+ * though it was allocated for regular data.  This nesting approach saves a lot
+ * of memory for small allocations, at the cost of a bit more computation.
+ *
+ * It should be noted that all allocations should be (and are) pointer-aligned.
+ * Normally lwIP would check for this, but we cannot tell lwIP the platform
+ * pointer size without hardcoding that size.  This module performs proper
+ * alignment of all buffers itself though, regardless of the pointer size.
+ */
+
+#include "lwip.h"
+
+#include <sys/mman.h>
+
+/* Alignment to pointer sizes. */
+#define MEMPOOL_ALIGN_DOWN(s)	((s) & ~(sizeof(void *) - 1))
+#define MEMPOOL_ALIGN_UP(s)	MEMPOOL_ALIGN_DOWN((s) + sizeof(void *) - 1)
+
+/* Large buffers: per-slab count and data area size. */
+#define MEMPOOL_LARGE_COUNT	512
+#define MEMPOOL_LARGE_SIZE	\
+    (MEMPOOL_ALIGN_UP(sizeof(struct pbuf)) + MEMPOOL_BUFSIZE)
+
+/* Small buffers: per-slab count and data area size. */
+#define MEMPOOL_SMALL_COUNT	4
+#define MEMPOOL_SMALL_SIZE	\
+    (MEMPOOL_ALIGN_DOWN(MEMPOOL_LARGE_SIZE / MEMPOOL_SMALL_COUNT) - \
+     sizeof(struct mempool_header))
+
+/* Memory pool slab header, part of both small and large slabs. */
+struct mempool_header {
+	union {
+		struct {
+			uint8_t mhui_flags;
+			uint32_t mhui_inuse;
+		} mhu_info;
+		void *mhu_align;	/* force pointer alignment */
+	} mh_u;
+};
+#define mh_flags mh_u.mhu_info.mhui_flags
+#define mh_inuse mh_u.mhu_info.mhui_inuse
+
+/* Header flags. */
+#define MHF_SMALL	0x01	/* slab is for small buffers, not large ones */
+#define MHF_STATIC	0x02	/* small slab is statically allocated */
+#define MHF_MARKED	0x04	/* large empty slab is up for deallocation */
+
+/*
+ * Large buffer.  When allocated, mlb_header points to the (header of) the
+ * containing large slab, and mlb_data is returned for arbitrary use by the
+ * user of the buffer.  When free, mlb_header is NULL and instead mlb_header2
+ * points to the containing slab (allowing for double-free detection), and the
+ * buffer is on the slab's free list by using mlb_next.
+ */
+struct mempool_large_buf {
+	struct mempool_header *mlb_header;
+	union {
+		struct {
+			struct mempool_header *mlbuf_header2;
+			LIST_ENTRY(mempool_large_buf) mlbuf_next;
+		} mlbu_free;
+		char mlbu_data[MEMPOOL_LARGE_SIZE];
+	} mlb_u;
+};
+#define mlb_header2 mlb_u.mlbu_free.mlbuf_header2
+#define mlb_next mlb_u.mlbu_free.mlbuf_next
+#define mlb_data mlb_u.mlbu_data
+
+/* Small buffer.  Same idea, different size. */
+struct mempool_small_buf {
+	struct mempool_header *msb_header;
+	union {
+		struct {
+			struct mempool_header *msbuf_header2;
+			TAILQ_ENTRY(mempool_small_buf) msbuf_next;
+		} msbu_free;
+		char msbu_data[MEMPOOL_SMALL_SIZE];
+	} msb_u;
+};
+#define msb_header2 msb_u.msbu_free.msbuf_header2
+#define msb_next msb_u.msbu_free.msbuf_next
+#define msb_data msb_u.msbu_data
+
+/*
+ * A large slab, including header, other per-slab fields, and large buffers.
+ * Each of these structures is on exactly one of three slab lists, depending
+ * on whether all its buffers are free (empty), some but not all of its buffers
+ * are in use (partial), or all of its buffers are in use (full).  The mls_next
+ * field is used for that list.  The mls_free field is the per-slab list of
+ * free buffers.
+ */
+struct mempool_large_slab {
+	struct mempool_header mls_header;		/* MUST be first */
+	LIST_ENTRY(mempool_large_slab) mls_next;
+	LIST_HEAD(, mempool_large_buf) mls_free;
+	struct mempool_large_buf mls_buf[MEMPOOL_LARGE_COUNT];
+};
+
+/* The three slab lists for large slabs, as described above. */
+static LIST_HEAD(, mempool_large_slab) mempool_empty_slabs;
+static LIST_HEAD(, mempool_large_slab) mempool_partial_slabs;
+static LIST_HEAD(, mempool_large_slab) mempool_full_slabs;
+
+/*
+ * A small slab, including header and small buffers.  We use unified free lists
+ * for small buffers, and these small slabs are not part of any lists
+ * themselves, so we need neither of the two fields from large slabs for that.
+ */
+struct mempool_small_slab {
+	struct mempool_header mss_header;		/* MUST be first */
+	struct mempool_small_buf mss_buf[MEMPOOL_SMALL_COUNT];
+};
+
+/*
+ * The free lists for static small buffers (from the static pool, see below)
+ * and dynamic small buffers (as obtained by splitting large buffers).
+ */
+static TAILQ_HEAD(, mempool_small_buf) mempool_small_static_freelist;
+static TAILQ_HEAD(, mempool_small_buf) mempool_small_dynamic_freelist;
+
+/*
+ * A static pool of small buffers.  Small buffers are somewhat more important
+ * than large buffers, because they are used for packet headers.  The purpose
+ * of this static pool is to be able to make progress even if all large buffers
+ * are allocated for data, typically in the case that the system is low on
+ * memory.  Note that the number of static small buffers is the given number of
+ * small slabs multiplied by MEMPOOL_SMALL_COUNT, hence the division.
+ */
+#define MEMPOOL_SMALL_SLABS	(256 / MEMPOOL_SMALL_COUNT)
+
+static struct mempool_small_slab mempool_small_pool[MEMPOOL_SMALL_SLABS];
+
+/*
+ * The following setting (mempool_max_slabs) can be changed through sysctl(7).
+ * As such it may be set by userland to a completely arbitrary value and must
+ * be sanity-checked before any actual use.  The default is picked such that
+ * all TCP sockets can fill up their send and receive queues: (TCP_SNDBUF_DEF +
+ * TCP_RCVBUF_DEF) * NR_TCPSOCK / (MEMPOOL_BUFSIZE * MEMPOOL_LARGE_COUNT) =
+ * (32768 + 32768) * 256 / (512 * 512) = 64.  We put in the resulting number
+ * rather than the formula because not all those definitions are public.
+ */
+#define MEMPOOL_DEFAULT_MAX_SLABS	64	/* about 17 MB of memory */
+
+static int mempool_max_slabs;	/* maximum number of large slabs */
+static int mempool_nr_slabs;	/* current number of large slabs */
+
+static int mempool_nr_large;	/* current number of large buffers */
+static int mempool_used_large;	/* large buffers currently in use */
+static int mempool_used_small;	/* small buffers currently in use */
+
+/*
+ * Number of clock ticks between timer invocations.  The timer is used to
+ * deallocate unused slabs.
+ */
+#define MEMPOOL_TIMER_TICKS	(10 * sys_hz())
+
+static minix_timer_t mempool_timer;
+
+static int mempool_defer_alloc;		/* allocation failed, defer next try */
+
+/* The CTL_MINIX MINIX_LWIP "mempool" subtree.  Dynamically numbered. */
+static struct rmib_node minix_lwip_mempool_table[] = {
+	RMIB_INTPTR(RMIB_RW, &mempool_max_slabs, "slab_max",
+	    "Maximum number of memory slabs (configurable)"),
+	RMIB_INTPTR(RMIB_RO, &mempool_nr_slabs, "slab_num",
+	    "Current number of memory slabs"),
+	RMIB_INT(RMIB_RO, sizeof(struct mempool_large_slab), "slab_size",
+	    "Byte size of a single memory slab"),
+	RMIB_INT(RMIB_RO, MEMPOOL_LARGE_COUNT, "slab_bufs",
+	    "Number of large buffers per memory slab"),
+	RMIB_INTPTR(RMIB_RO, &mempool_nr_large, "large_num",
+	    "Current total number of large buffers"),
+	RMIB_INTPTR(RMIB_RO, &mempool_used_large, "large_used",
+	    "Current number of used large buffers"),
+	RMIB_INT(RMIB_RO, MEMPOOL_LARGE_SIZE, "large_size",
+	    "Byte size of a single large buffer"),
+	RMIB_INTPTR(RMIB_RO, &mempool_used_small, "small_used",
+	    "Current number of used small buffers"),
+	RMIB_INT(RMIB_RO, MEMPOOL_SMALL_SIZE, "small_size",
+	    "Byte size of a single small buffer"),
+};
+
+static struct rmib_node minix_lwip_mempool_node =
+    RMIB_NODE(RMIB_RO, minix_lwip_mempool_table, "mempool",
+	"Memory pool settings");
+
+/*
+ * Initialize the given "slab" of small buffers.  The slab may either come from
+ * the statically allocated pool ('is_static' is TRUE) or a single large buffer
+ * that we aim to chop up into small buffers.
+ */
+static void
+mempool_prepare_small(struct mempool_small_slab * mss, int is_static)
+{
+	struct mempool_small_buf *msb;
+	unsigned int count;
+
+	mss->mss_header.mh_flags = MHF_SMALL | ((is_static) ? MHF_STATIC : 0);
+	mss->mss_header.mh_inuse = 0;
+
+	msb = mss->mss_buf;
+
+	for (count = 0; count < MEMPOOL_SMALL_COUNT; count++, msb++) {
+		msb->msb_header = NULL;
+		msb->msb_header2 = &mss->mss_header;
+
+		if (is_static)
+			TAILQ_INSERT_HEAD(&mempool_small_static_freelist, msb,
+			    msb_next);
+		else
+			TAILQ_INSERT_HEAD(&mempool_small_dynamic_freelist, msb,
+			    msb_next);
+	}
+}
+
+/*
+ * Allocate a new slab for large buffers, if allowed by policy and possible.
+ */
+static void
+mempool_new_slab(void)
+{
+	struct mempool_large_slab *mls;
+	struct mempool_large_buf *mlb;
+	unsigned int count;
+
+	/*
+	 * See if allocating a new slab would result in overrunning the
+	 * configured maximum number of large buffers.  Round the maximum,
+	 * which is probably what the user intended.
+	 */
+	if (mempool_cur_buffers() + MEMPOOL_LARGE_COUNT / 2 >
+	    mempool_max_buffers()) {
+		assert(mempool_nr_slabs > 0);
+
+		return;
+	}
+
+	/*
+	 * If a previous allocation failed before during this timer interval,
+	 * do not try again now.
+	 */
+	if (mempool_defer_alloc)
+		return;
+
+	/*
+	 * Allocate the slab.  Preallocate the memory, or we might crash later
+	 * during low-memory conditions.  If allocation fails, simply do
+	 * nothing further.  The caller will check the free lists.
+	 */
+	mls = (struct mempool_large_slab *)mmap(NULL,
+	    sizeof(struct mempool_large_slab), PROT_READ | PROT_WRITE,
+	    MAP_ANON | MAP_PRIVATE | MAP_PREALLOC, -1, 0);
+
+	if (mls == MAP_FAILED) {
+		if (mempool_nr_slabs == 0)
+			panic("unable to allocate initial memory pool");
+
+		/*
+		 * Do not keep hammering VM with mmap requests when the system
+		 * is out of memory.  Try again after the next timer tick.
+		 */
+		mempool_defer_alloc = TRUE;
+
+		return;
+	}
+
+	/* Initialize the new slab. */
+	mls->mls_header.mh_flags = 0;
+	mls->mls_header.mh_inuse = 0;
+
+	mlb = mls->mls_buf;
+
+	LIST_INIT(&mls->mls_free);
+
+	for (count = 0; count < MEMPOOL_LARGE_COUNT; count++, mlb++) {
+		mlb->mlb_header = NULL;
+		mlb->mlb_header2 = &mls->mls_header;
+
+		LIST_INSERT_HEAD(&mls->mls_free, mlb, mlb_next);
+	}
+
+	LIST_INSERT_HEAD(&mempool_empty_slabs, mls, mls_next);
+
+	mempool_nr_slabs++;
+	mempool_nr_large += MEMPOOL_LARGE_COUNT;
+}
+
+/*
+ * Deallocate a slab for large buffers, if allowed.
+ */
+static void
+mempool_destroy_slab(struct mempool_large_slab * mls)
+{
+
+	assert(mempool_nr_slabs > 0);
+
+	assert(!(mls->mls_header.mh_flags & MHF_SMALL));
+	assert(mls->mls_header.mh_inuse == 0);
+
+	/* Never deallocate the last large slab. */
+	if (mempool_nr_slabs == 1)
+		return;
+
+	LIST_REMOVE(mls, mls_next);
+
+	if (munmap(mls, sizeof(*mls)) != 0)
+		panic("munmap failed: %d", -errno);
+
+	assert(mempool_nr_large > MEMPOOL_LARGE_COUNT);
+	mempool_nr_large -= MEMPOOL_LARGE_COUNT;
+	mempool_nr_slabs--;
+}
+
+/*
+ * Regular timer.  Deallocate empty slabs already marked for deallocation, and
+ * mark any other empty slabs for deallocation.
+ */
+static void
+mempool_tick(int arg __unused)
+{
+	struct mempool_large_slab *mls, *tmls;
+
+	/*
+	 * Go through all the empty slabs, destroying marked slabs and marking
+	 * unmarked slabs.
+	 */
+	LIST_FOREACH_SAFE(mls, &mempool_empty_slabs, mls_next, tmls) {
+		if (mls->mls_header.mh_flags & MHF_MARKED)
+			mempool_destroy_slab(mls);
+		else
+			mls->mls_header.mh_flags |= MHF_MARKED;
+	}
+
+	/*
+	 * If allocation failed during the last interval, allow a new attempt
+	 * during the next.
+	 */
+	mempool_defer_alloc = FALSE;
+
+	/* Set the next timer. */
+	set_timer(&mempool_timer, MEMPOOL_TIMER_TICKS, mempool_tick, 0);
+}
+
+/*
+ * Initialize the memory pool module.
+ */
+void
+mempool_init(void)
+{
+	unsigned int slot;
+
+	/* These checks are for absolutely essential points. */
+	assert(sizeof(void *) == MEM_ALIGNMENT);
+	assert(sizeof(struct mempool_small_slab) <= MEMPOOL_LARGE_SIZE);
+	assert(offsetof(struct mempool_small_buf, msb_data) == sizeof(void *));
+	assert(offsetof(struct mempool_large_buf, mlb_data) == sizeof(void *));
+
+	/* Initialize module-local variables. */
+	LIST_INIT(&mempool_empty_slabs);
+	LIST_INIT(&mempool_partial_slabs);
+	LIST_INIT(&mempool_full_slabs);
+
+	TAILQ_INIT(&mempool_small_static_freelist);
+	TAILQ_INIT(&mempool_small_dynamic_freelist);
+
+	mempool_max_slabs = MEMPOOL_DEFAULT_MAX_SLABS;
+	mempool_nr_slabs = 0;
+
+	mempool_nr_large = 0;
+	mempool_used_large = 0;
+	mempool_used_small = 0;
+
+	mempool_defer_alloc = FALSE;
+
+	/* Initialize the static pool of small buffers. */
+	for (slot = 0; slot < __arraycount(mempool_small_pool); slot++)
+		mempool_prepare_small(&mempool_small_pool[slot],
+		    TRUE /*is_static*/);
+
+	/*
+	 * Allocate one large slab.  The service needs at least one large slab
+	 * for basic operation, and therefore will never deallocate the last.
+	 */
+	mempool_new_slab();
+
+	/* Set a regular low-frequency timer to deallocate unused slabs. */
+	set_timer(&mempool_timer, MEMPOOL_TIMER_TICKS, mempool_tick, 0);
+
+	/* Register the minix.lwip.mempool subtree. */
+	mibtree_register_lwip(&minix_lwip_mempool_node);
+}
+
+/*
+ * Return the total number of large buffers currently in the system, regardless
+ * of allocation status.
+ */
+unsigned int
+mempool_cur_buffers(void)
+{
+
+	return mempool_nr_large;
+}
+
+/*
+ * Return the maximum number of large buffers that the system has been allowed
+ * to allocate.  Note that due to low-memory conditions, this maximum may not
+ * be allocated in practice even when desired.
+ */
+unsigned int
+mempool_max_buffers(void)
+{
+
+	if (mempool_max_slabs <= 1)
+		return MEMPOOL_LARGE_COUNT;
+
+	if ((size_t)mempool_max_slabs >
+	    INT_MAX / sizeof(struct mempool_large_slab))
+		return INT_MAX / sizeof(struct mempool_large_slab);
+
+	return (size_t)mempool_max_slabs * MEMPOOL_LARGE_COUNT;
+}
+
+/*
+ * Allocate a large buffer, either by taking one off a free list or by
+ * allocating a new large slab.  On success, return a pointer to the data area
+ * of the large buffer.  This data area is exactly MEMPOOL_LARGE_SIZE bytes in
+ * size.  If no large buffer could be allocated, return NULL.
+ */
+static void *
+mempool_alloc_large(void)
+{
+	struct mempool_large_slab *mls;
+	struct mempool_large_buf *mlb;
+
+	/*
+	 * Find a large slab that has free large blocks.  As is standard for
+	 * slab allocation, favor partially used slabs over empty slabs for
+	 * eventual consolidation.  If both lists are empty, try allocating a
+	 * new slab.  If that fails, we are out of memory, and return NULL.
+	 */
+	if (!LIST_EMPTY(&mempool_partial_slabs))
+		mls = LIST_FIRST(&mempool_partial_slabs);
+	else {
+		if (LIST_EMPTY(&mempool_empty_slabs)) {
+			mempool_new_slab();
+
+			if (LIST_EMPTY(&mempool_empty_slabs))
+				return NULL; /* out of memory */
+		}
+
+		mls = LIST_FIRST(&mempool_empty_slabs);
+	}
+
+	/* Allocate a block from the slab that we picked. */
+	assert(mls != NULL);
+	assert(!LIST_EMPTY(&mls->mls_free));
+
+	mlb = LIST_FIRST(&mls->mls_free);
+	LIST_REMOVE(mlb, mlb_next);
+
+	assert(mlb->mlb_header == NULL);
+	assert(mlb->mlb_header2 == &mls->mls_header);
+
+	mlb->mlb_header = &mls->mls_header;
+
+	/*
+	 * Adjust accounting for the large slab, which may involve moving it
+	 * to another list.
+	 */
+	assert(mls->mls_header.mh_inuse < MEMPOOL_LARGE_COUNT);
+	mls->mls_header.mh_inuse++;
+
+	if (mls->mls_header.mh_inuse == MEMPOOL_LARGE_COUNT) {
+		LIST_REMOVE(mls, mls_next);
+
+		LIST_INSERT_HEAD(&mempool_full_slabs, mls, mls_next);
+	} else if (mls->mls_header.mh_inuse == 1) {
+		LIST_REMOVE(mls, mls_next);
+
+		LIST_INSERT_HEAD(&mempool_partial_slabs, mls, mls_next);
+	}
+
+	assert(mempool_used_large < mempool_nr_large);
+	mempool_used_large++;
+
+	/* Return the block's data area. */
+	return (void *)mlb->mlb_data;
+}
+
+/*
+ * Allocate a small buffer, either by taking one off a free list or by
+ * allocating a large buffer and splitting it up in new free small buffers.  On
+ * success, return a pointer to the data area of the small buffer.  This data
+ * area is exactly MEMPOOL_SMALL_SIZE bytes in size.  If no small buffer could
+ * be allocated, return NULL.
+ */
+static void *
+mempool_alloc_small(void)
+{
+	struct mempool_small_slab *mss;
+	struct mempool_small_buf *msb;
+	struct mempool_header *mh;
+
+	/*
+	 * Find a free small block and take it off the free list.  Try the
+	 * static free list before the dynamic one, so that after a peak in
+	 * buffer usage we are likely to be able to free up the dynamic slabs
+	 * quickly.  If both lists are empty, try allocating a large block to
+	 * divvy up into small blocks.  If that fails, we are out of memory.
+	 */
+	if (!TAILQ_EMPTY(&mempool_small_static_freelist)) {
+		msb = TAILQ_FIRST(&mempool_small_static_freelist);
+
+		TAILQ_REMOVE(&mempool_small_static_freelist, msb, msb_next);
+	} else {
+		if (TAILQ_EMPTY(&mempool_small_dynamic_freelist)) {
+			mss =
+			    (struct mempool_small_slab *)mempool_alloc_large();
+
+			if (mss == NULL)
+				return NULL; /* out of memory */
+
+			/* Initialize the small slab, including its blocks. */
+			mempool_prepare_small(mss, FALSE /*is_static*/);
+		}
+
+		msb = TAILQ_FIRST(&mempool_small_dynamic_freelist);
+		assert(msb != NULL);
+
+		TAILQ_REMOVE(&mempool_small_dynamic_freelist, msb, msb_next);
+	}
+
+	/* Mark the small block as allocated, and return its data area. */
+	assert(msb != NULL);
+
+	assert(msb->msb_header == NULL);
+	assert(msb->msb_header2 != NULL);
+
+	mh = msb->msb_header2;
+	msb->msb_header = mh;
+
+	assert(mh->mh_inuse < MEMPOOL_SMALL_COUNT);
+	mh->mh_inuse++;
+
+	mempool_used_small++;
+
+	return (void *)msb->msb_data;
+}
+
+/*
+ * Memory pool wrapper function for malloc() calls from lwIP.
+ */
+void *
+mempool_malloc(size_t size)
+{
+
+	/*
+	 * It is currently expected that there will be allocation attempts for
+	 * sizes larger than our large size, in particular for ICMP ping
+	 * replies as described elsewhere.  As such, we cannot print any
+	 * warnings here.  For now, refusing these excessive allocations should
+	 * not be a problem in practice.
+	 */
+	if (size > MEMPOOL_LARGE_SIZE)
+		return NULL;
+
+	if (size <= MEMPOOL_SMALL_SIZE)
+		return mempool_alloc_small();
+	else
+		return mempool_alloc_large();
+}
+
+/*
+ * Memory pool wrapper function for free() calls from lwIP.
+ */
+void
+mempool_free(void * ptr)
+{
+	struct mempool_large_slab *mls;
+	struct mempool_large_buf *mlb;
+	struct mempool_small_slab *mss;
+	struct mempool_small_buf *msb;
+	struct mempool_header *mh;
+	unsigned int count;
+
+	/*
+	 * Get a pointer to the slab header, which is right before the data
+	 * area for both large and small buffers.  This pointer is NULL if the
+	 * buffer is free, which would indicate that something is very wrong.
+	 */
+	ptr = (void *)((char *)ptr - sizeof(mh));
+
+	memcpy(&mh, ptr, sizeof(mh));
+
+	if (mh == NULL)
+		panic("mempool_free called on unallocated object!");
+
+	/*
+	 * If the slab header says that the slab is for small buffers, deal
+	 * with that case first.  If we free up the last small buffer of a
+	 * dynamically allocated small slab, we also free up the entire small
+	 * slab, which is in fact the data area of a large buffer.
+	 */
+	if (mh->mh_flags & MHF_SMALL) {
+		/*
+		 * Move the small buffer onto the appropriate small free list.
+		 */
+		msb = (struct mempool_small_buf *)ptr;
+
+		msb->msb_header2 = mh;
+		msb->msb_header = NULL;
+
+		/*
+		 * Simple heuristic, unless the buffer is static: favor reuse
+		 * of small buffers in containers that are already in use
+		 * for other small buffers as well, for consolidation.
+		 */
+		if (mh->mh_flags & MHF_STATIC)
+			TAILQ_INSERT_HEAD(&mempool_small_static_freelist, msb,
+			    msb_next);
+		else if (mh->mh_inuse > 1)
+			TAILQ_INSERT_HEAD(&mempool_small_dynamic_freelist, msb,
+			    msb_next);
+		else
+			TAILQ_INSERT_TAIL(&mempool_small_dynamic_freelist, msb,
+			    msb_next);
+
+		assert(mh->mh_inuse > 0);
+		mh->mh_inuse--;
+
+		assert(mempool_used_small > 0);
+		mempool_used_small--;
+
+		/*
+		 * If the small buffer is statically allocated, or it was not
+		 * the last allocated small buffer in its containing large
+		 * buffer, then we are done.
+		 */
+		if (mh->mh_inuse > 0 || (mh->mh_flags & MHF_STATIC))
+			return;
+
+		/*
+		 * Otherwise, free the containing large buffer as well.  First,
+		 * remove all its small buffers from the free list.
+		 */
+		mss = (struct mempool_small_slab *)mh;
+		msb = mss->mss_buf;
+
+		for (count = 0; count < MEMPOOL_SMALL_COUNT; count++, msb++) {
+			assert(msb->msb_header == NULL);
+			assert(msb->msb_header2 == mh);
+
+			TAILQ_REMOVE(&mempool_small_dynamic_freelist, msb,
+			    msb_next);
+		}
+
+		/* Then, fall through to the large-buffer free code. */
+		ptr = (void *)((char *)mh - sizeof(mh));
+
+		memcpy(&mh, ptr, sizeof(mh));
+
+		assert(mh != NULL);
+		assert(!(mh->mh_flags & MHF_SMALL));
+	}
+
+	/*
+	 * Move the large buffer onto the free list of the large slab to which
+	 * it belongs.
+	 */
+	mls = (struct mempool_large_slab *)mh;
+	mlb = (struct mempool_large_buf *)ptr;
+
+	mlb->mlb_header2 = &mls->mls_header;
+	mlb->mlb_header = NULL;
+
+	LIST_INSERT_HEAD(&mls->mls_free, mlb, mlb_next);
+
+	/*
+	 * Adjust accounting for the large slab, which may involve moving it
+	 * to another list.
+	 */
+	assert(mls->mls_header.mh_inuse > 0);
+	mls->mls_header.mh_inuse--;
+
+	if (mls->mls_header.mh_inuse == 0) {
+		LIST_REMOVE(mls, mls_next);
+
+		LIST_INSERT_HEAD(&mempool_empty_slabs, mls, mls_next);
+
+		mls->mls_header.mh_flags &= ~MHF_MARKED;
+	} else if (mls->mls_header.mh_inuse == MEMPOOL_LARGE_COUNT - 1) {
+		LIST_REMOVE(mls, mls_next);
+
+		LIST_INSERT_HEAD(&mempool_partial_slabs, mls, mls_next);
+	}
+
+	assert(mempool_used_large > 0);
+	mempool_used_large--;
+}
+
+/*
+ * Memory pool wrapper function for calloc() calls from lwIP.
+ */
+void *
+mempool_calloc(size_t num, size_t size)
+{
+	void *ptr;
+	size_t total;
+
+	/*
+	 * Standard overflow check.  This can be improved, but it doesn't have
+	 * to be, because in practice lwIP never calls calloc() anyway.
+	 */
+	if (num > 0 && size > 0 && (size_t)-1 / size < num)
+		return NULL;
+
+	total = num * size;
+
+	if ((ptr = mempool_malloc(total)) == NULL)
+		return NULL;
+
+	memset(ptr, 0, total);
+
+	return ptr;
+}
--- a/minix/net/lwip/mibtree.c
+++ b/minix/net/lwip/mibtree.c
@ -0,0 +1,141 @@
+/* LWIP service - mibtree.c - sysctl support for */
+/*
+ * This file acts as a dispatcher for the net.inet, net.inet6, and minix.lwip
+ * sysctl trees.  It does not cover the other net.* trees; these are taken care
+ * of in other source files.
+ */
+
+#include "lwip.h"
+
+#include <minix/sysctl.h>
+
+#define MAX_PROTO	6	/* maximum # of INET protocols with subtrees */
+
+static struct rmib_indir net_inet_indir[MAX_PROTO];
+static unsigned int net_inet_indir_count = 0;
+static struct rmib_node net_inet_node =
+    RMIB_SNODE(RMIB_RO, net_inet_indir, "inet", "PF_INET related settings");
+
+#ifdef INET6
+static struct rmib_indir net_inet6_indir[MAX_PROTO];
+static unsigned int net_inet6_indir_count = 0;
+static struct rmib_node net_inet6_node =
+    RMIB_SNODE(RMIB_RO, net_inet6_indir, "inet6", "PF_INET6 related settings");
+#endif /* INET6 */
+
+#define MAX_LWIP	4	/* maximum # of miscellaneous LWIP subtrees */
+
+static struct rmib_indir minix_lwip_indir[MAX_LWIP];
+static unsigned int minix_lwip_indir_count = 0;
+static struct rmib_node minix_lwip_node =
+    RMIB_SNODE(RMIB_RO, minix_lwip_indir, "lwip",
+	"LWIP service information and settings");
+
+/*
+ * Initialize the status module by registering the net.inet, net.inet6, and
+ * minix.lwip trees with the MIB service.  Other modules must have added all
+ * subtrees to those trees through mibtree_register_*() before this point.
+ */
+void
+mibtree_init(void)
+{
+	const int inet_mib[] = { CTL_NET, PF_INET };
+#ifdef INET6
+	const int inet6_mib[] = { CTL_NET, PF_INET6 };
+#endif /* INET6 */
+	const int lwip_mib[] = { CTL_MINIX, MINIX_LWIP };
+	int r;
+
+	/*
+	 * Register the "net.inet", "net.inet6", and "minix.lwip" subtrees with
+	 * the MIB service.
+	 *
+	 * These calls only return local failures.  Remote failures (in the MIB
+	 * service) are silently ignored.  So, we can safely panic on failure.
+	 */
+	if ((r = rmib_register(inet_mib, __arraycount(inet_mib),
+	    &net_inet_node)) != OK)
+		panic("unable to register net.inet RMIB tree: %d", r);
+
+#ifdef INET6
+	if ((r = rmib_register(inet6_mib, __arraycount(inet6_mib),
+	    &net_inet6_node)) != OK)
+		panic("unable to register net.inet6 RMIB tree: %d", r);
+#endif /* INET6 */
+
+	if ((r = rmib_register(lwip_mib, __arraycount(lwip_mib),
+	    &minix_lwip_node)) != OK)
+		panic("unable to register minix.lwip RMIB tree: %d", r);
+}
+
+/*
+ * Add a subtree to the local net.inet or net.inet6 tree.  This function must
+ * only be called *before* mibtree_init(), as the latter will register the
+ * final tree with the MIB service.
+ */
+void
+mibtree_register_inet(int domain, int protocol, struct rmib_node * node)
+{
+	struct rmib_node *parent;
+	struct rmib_indir *indir;
+	unsigned int i, *count;
+
+	switch (domain) {
+	case PF_INET:
+		parent = &net_inet_node;
+		indir = net_inet_indir;
+		count = &net_inet_indir_count;
+		break;
+	case PF_INET6:
+#ifdef INET6
+		parent = &net_inet6_node;
+		indir = net_inet6_indir;
+		count = &net_inet6_indir_count;
+		break;
+#else /* !INET6 */
+		return;
+#endif /* !INET6 */
+	default:
+		panic("invalid domain %d", domain);
+	}
+
+	assert(*count < MAX_PROTO);
+
+	/* Insertion sort. */
+	for (i = 0; i < *count; i++) {
+		assert(indir[i].rindir_id != (unsigned int)protocol);
+
+		if (indir[i].rindir_id > (unsigned int)protocol)
+			break;
+	}
+
+	if (i < *count)
+		memmove(&indir[i + 1], &indir[i],
+		    sizeof(indir[0]) * (*count - i));
+
+	indir[i].rindir_id = protocol;
+	indir[i].rindir_node = node;
+	parent->rnode_size = ++*count;
+}
+
+/*
+ * Add a miscellaneous subtree to the local minix.lwip tree.  This function
+ * must only be called *before* mibtree_init(), as the latter will register the
+ * final tree with the MIB service.  Note that the given subtrees are numbered
+ * arbitrarily.  We use sparse trees here only to avoid having to declare
+ * external variables, which is a bit of a hack, but with the expected low
+ * number of miscellaneous subtrees there will be no performance penalty.
+ */
+void
+mibtree_register_lwip(struct rmib_node * node)
+{
+	unsigned int i;
+
+	i = minix_lwip_indir_count;
+
+	assert(i < __arraycount(minix_lwip_indir));
+
+	minix_lwip_indir[i].rindir_id = i;
+	minix_lwip_indir[i].rindir_node = node;
+	minix_lwip_node.rnode_size = ++minix_lwip_indir_count;
+}
--- a/minix/net/lwip/ndev.c
+++ b/minix/net/lwip/ndev.c
--- a/minix/net/lwip/ndev.h
+++ b/minix/net/lwip/ndev.h
@ -0,0 +1,33 @@
+#ifndef MINIX_NET_LWIP_NDEV_H
+#define MINIX_NET_LWIP_NDEV_H
+
+/* The maximum supported number of network device drivers. */
+#define NR_NDEV		8
+
+typedef uint32_t ndev_id_t;
+
+struct ndev_hwaddr {
+	uint8_t nhwa_addr[NDEV_HWADDR_MAX];
+};
+
+struct ndev_conf {
+	uint32_t nconf_set;			/* fields to set (NDEV_SET_) */
+	uint32_t nconf_mode;			/* desired mode (NDEV_MODE_) */
+	struct ndev_hwaddr *nconf_mclist;	/* multicast list pointer */
+	size_t nconf_mccount;			/* multicast list count */
+	uint32_t nconf_caps;			/* capabilities (NDEV_CAP_) */
+	uint32_t nconf_flags;			/* flags to set (NDEV_FLAG_) */
+	uint32_t nconf_media;			/* media selection (IFM_) */
+	struct ndev_hwaddr nconf_hwaddr;	/* desired hardware address */
+};
+
+void ndev_init(void);
+void ndev_check(void);
+void ndev_process(const message * m_ptr, int ipc_status);
+
+int ndev_conf(ndev_id_t id, const struct ndev_conf * nconf);
+int ndev_send(ndev_id_t id, const struct pbuf * pbuf);
+int ndev_can_recv(ndev_id_t id);
+int ndev_recv(ndev_id_t id, struct pbuf * pbuf);
+
+#endif /* !MINIX_NET_LWIP_NDEV_H */
--- a/minix/net/lwip/pchain.c
+++ b/minix/net/lwip/pchain.c
@ -0,0 +1,154 @@
+/* LWIP service - pchain.c - pbuf chain utility functions */
+
+#include "lwip.h"
+
+/*
+ * Allocate a chain of pbuf buffers as though it were a PBUF_POOL allocation,
+ * except that each buffer is of type PBUF_RAM.  Return the pbuf chain on
+ * success, or NULL on memory allocation failure.
+ */
+struct pbuf *
+pchain_alloc(int layer, size_t size)
+{
+	struct pbuf *pbuf, *phead, **pnext;
+	size_t chunk, left;
+	int offset = 0;
+
+	/*
+	 * Check for length overflow.  Note that we do this before prepending
+	 * the header, because otherwise we could never send a full-sized
+	 * (65535-byte) IP packet.  This does mean that we are generating a
+	 * pbuf chain that has over 64KB worth of allocated space, but our
+	 * header hiding ensures that tot_len stays under 64KB.  A check in
+	 * pbuf_header() prevents that later header adjustments end up lifting
+	 * tot_len over this limit.
+	 */
+	if (size > UINT16_MAX)
+		return NULL;
+
+	/*
+	 * Unfortunately, we have no choice but to replicate this block from
+	 * lwIP's pbuf_alloc() code.  It is however unlikely that the offsets
+	 * change for the currently supported layer types, and we do not need
+	 * to support any layer types that we do not use ourselves.
+	 */
+	switch (layer) {
+	case PBUF_TRANSPORT:
+		offset = PBUF_LINK_ENCAPSULATION_HLEN + PBUF_LINK_HLEN +
+		    PBUF_IP_HLEN + PBUF_TRANSPORT_HLEN;
+		break;
+	case PBUF_IP:
+		offset = PBUF_LINK_ENCAPSULATION_HLEN + PBUF_LINK_HLEN +
+		    PBUF_IP_HLEN;
+		break;
+	case PBUF_LINK:
+		offset = PBUF_LINK_ENCAPSULATION_HLEN + PBUF_LINK_HLEN;
+		break;
+	case PBUF_RAW_TX:
+		offset = PBUF_LINK_ENCAPSULATION_HLEN;
+		break;
+	case PBUF_RAW:
+		offset = 0;
+		break;
+	default:
+		panic("invalid pbuf layer: %d", layer);
+	}
+
+	chunk = size + offset;
+	if (chunk > MEMPOOL_BUFSIZE)
+		chunk = MEMPOOL_BUFSIZE;
+
+	if ((phead = pbuf_alloc(PBUF_RAW, chunk, PBUF_RAM)) == NULL)
+		return NULL;
+
+	if (offset > 0)
+		util_pbuf_header(phead, -offset);
+
+	phead->tot_len = size;
+
+	pnext = &phead->next;
+
+	for (left = size - (chunk - offset); left > 0; left -= chunk) {
+		chunk = (left < MEMPOOL_BUFSIZE) ? left : MEMPOOL_BUFSIZE;
+
+		if ((pbuf = pbuf_alloc(PBUF_RAW, chunk, PBUF_RAM)) == NULL) {
+			/*
+			 * Adjust tot_len to match the actual length of the
+			 * chain so far, just in case pbuf_free() starts caring
+			 * about this in the future.
+			 */
+			for (pbuf = phead; pbuf != NULL; pbuf = pbuf->next)
+				pbuf->tot_len -= left;
+
+			pbuf_free(phead);
+
+			return NULL;
+		}
+
+		pbuf->tot_len = left;
+
+		*pnext = pbuf;
+		pnext = &pbuf->next;
+	}
+
+	return phead;
+}
+
+/*
+ * Given the (non-empty) chain of buffers 'pbuf', return a pointer to the
+ * 'next' field of the last buffer in the chain.  This function is packet queue
+ * friendly.  A packet queue is a queue of packet chains, where each chain is
+ * delimited using the 'tot_len' field.  As a result, while the pointer
+ * returned is never NULL, the value pointed to by the returned pointer may or
+ * may not be NULL (and will point to the next chain if not NULL).  As notable
+ * exception, in cases where the buffer type is a single PBUF_REF, 'tot_len'
+ * may be zero and 'len' may be non-zero.  In such cases, the chain consists of
+ * that single buffer only.  This function must handle that case as well.
+ */
+struct pbuf **
+pchain_end(struct pbuf * pbuf)
+{
+
+	assert(pbuf != NULL);
+
+	while (pbuf->tot_len > pbuf->len) {
+		pbuf = pbuf->next;
+
+		assert(pbuf != NULL);
+	}
+
+	return &pbuf->next;
+}
+
+/*
+ * Given the (non-empty) chain of buffers 'pbuf', return a byte size estimation
+ * of the memory used by the chain, rounded up to pool buffer sizes.  This
+ * function is packet queue friendly.
+ */
+size_t
+pchain_size(struct pbuf * pbuf)
+{
+	size_t size;
+
+	assert(pbuf != NULL);
+
+	/*
+	 * Count the first buffer separately, as its length may be seriously
+	 * off due to header hiding.  While the caller should always provide
+	 * exactly the same pbuf chain twice if it intends to get back the same
+	 * size twice, this also protects against accidental size differences
+	 * due to header hiding in that case.
+	 */
+	size = MEMPOOL_BUFSIZE;
+
+	/*
+	 * Round up the size of the rest of the chain to whole buffers.
+	 */
+	if (pbuf->tot_len > pbuf->len) {
+		size += pbuf->tot_len - pbuf->len + MEMPOOL_BUFSIZE - 1;
+
+		size -= size % MEMPOOL_BUFSIZE;
+	}
+
+	return size;
+}
--- a/minix/net/lwip/pktsock.c
+++ b/minix/net/lwip/pktsock.c
--- a/minix/net/lwip/pktsock.h
+++ b/minix/net/lwip/pktsock.h
@ -0,0 +1,63 @@
+#ifndef MINIX_NET_LWIP_PKTSOCK_H
+#define MINIX_NET_LWIP_PKTSOCK_H
+
+#include "mcast.h"
+
+/* Packet-level socket, shared by UDP and RAW. */
+struct pktsock {
+	struct ipsock pkt_ipsock;	/* IP socket object, MUST be first */
+	struct pbuf *pkt_rcvhead;	/* receive buffer, first packet */
+	struct pbuf **pkt_rcvtailp;	/* receive buffer, last ptr-ptr */
+	size_t pkt_rcvlen;		/* receive buffer, length in bytes */
+	struct mcast_head pkt_mcast;	/* multicast membership list */
+	ip6_addr_p_t pkt_srcaddr;	/* IPV6_PKTINFO: source address */
+	uint32_t pkt_ifindex;		/* IPV6_KPTINFO: interface index */
+};
+
+#define pktsock_get_ipsock(pkt)		(&(pkt)->pkt_ipsock)
+#define pktsock_get_ifindex(pkt)	((pkt)->pkt_ifindex)
+
+/* Options when sending packets. */
+struct pktopt {
+	uint8_t pkto_flags;		/* packet send flags (PKTOF_) */
+	uint8_t pkto_tos;		/* type of service for the packet */
+	uint8_t pkto_ttl;		/* time-to-live for the packet */
+	uint8_t pkto_mcast_ttl;		/* time-to-live for multicast packet */
+	ip6_addr_p_t pkto_srcaddr;	/* IPV6_PKTINFO: source address */
+	unsigned int pkto_ifindex;	/* IPV6_PKTINFO: interface index */
+};
+
+#define PKTOF_TTL		0x01	/* send packet with custom TTL value */
+#define PKTOF_TOS		0x02	/* send packet with custom TOS value */
+#define PKTOF_PKTINFO		0x04	/* send packet with src addr, on if. */
+
+int pktsock_socket(struct pktsock * pkt, int domain, size_t sndbuf,
+	size_t rcvbuf, struct sock ** sockp);
+int pktsock_test_input(struct pktsock * pkt, struct pbuf * pbuf);
+void pktsock_input(struct pktsock * pkt, struct pbuf * pbuf,
+	const ip_addr_t * srcaddr, uint16_t port);
+int pktsock_get_pktinfo(struct pktsock * pkt, struct pktopt * pkto,
+	struct ifdev ** ifdevp, ip_addr_t * src_addrp);
+int pktsock_get_ctl(struct pktsock * pkt, const struct sockdriver_data * ctl,
+	socklen_t ctl_len, struct pktopt * pkto);
+int pktsock_get_data(struct pktsock * pkt, const struct sockdriver_data * data,
+	size_t len, struct pbuf * pbuf);
+int pktsock_pre_recv(struct sock * sock, endpoint_t user_endpt, int flags);
+int pktsock_recv(struct sock * sock, const struct sockdriver_data * data,
+	size_t len, size_t * off, const struct sockdriver_data * ctl,
+	socklen_t ctl_len, socklen_t * ctl_off, struct sockaddr * addr,
+	socklen_t * addr_len, endpoint_t user_endpt, int flags, size_t min,
+	int * rflags);
+int pktsock_test_recv(struct sock * sock, size_t min, size_t * size);
+void pktsock_set_mcaware(struct pktsock * pkt);
+int pktsock_setsockopt(struct pktsock * pkt, int level, int name,
+	const struct sockdriver_data * data, socklen_t len,
+	struct ipopts * ipopts);
+int pktsock_getsockopt(struct pktsock * pkt, int level, int name,
+	const struct sockdriver_data * data, socklen_t * len,
+	struct ipopts * ipopts);
+void pktsock_shutdown(struct pktsock * pkt, unsigned int mask);
+void pktsock_close(struct pktsock * pkt);
+size_t pktsock_get_recvlen(struct pktsock * pkt);
+
+#endif /* !MINIX_NET_LWIP_PKTSOCK_H */
--- a/minix/net/lwip/rawsock.c
+++ b/minix/net/lwip/rawsock.c
--- a/minix/net/lwip/route.c
+++ b/minix/net/lwip/route.c
--- a/minix/net/lwip/route.h
+++ b/minix/net/lwip/route.h
@ -0,0 +1,39 @@
+#ifndef MINIX_NET_LWIP_ROUTE_H
+#define MINIX_NET_LWIP_ROUTE_H
+
+#include <net/route.h>
+
+struct route_entry;
+struct rtsock_request;
+
+void route_init(void);
+int route_add(const ip_addr_t * addr, unsigned int prefix,
+	const ip_addr_t * gateway, struct ifdev * ifdev, unsigned int flags,
+	const struct rtsock_request * rtr);
+int route_can_add(const ip_addr_t * addr, unsigned int prefix, int is_host);
+struct route_entry *route_find(const ip_addr_t * addr, unsigned int prefix,
+	int is_host);
+struct route_entry *route_lookup(const ip_addr_t * addr);
+void route_delete(struct route_entry * route,
+	const struct rtsock_request * rtr);
+void route_clear(struct ifdev * ifdev);
+int route_process(unsigned int type, const struct sockaddr * dst,
+	const struct sockaddr * mask, const struct sockaddr * gateway,
+	const struct sockaddr * ifp, const struct sockaddr * ifa,
+	unsigned int flags, unsigned long inits,
+	const struct rt_metrics * rmx, const struct rtsock_request * rtr);
+void route_get(const struct route_entry * route, union sockaddr_any * addr,
+	union sockaddr_any * mask, union sockaddr_any * gateway,
+	union sockaddr_any * ifp, union sockaddr_any * ifa,
+	struct ifdev ** ifdev, unsigned int * flags, unsigned int * use);
+unsigned int route_get_flags(const struct route_entry * route);
+struct ifdev *route_get_ifdev(const struct route_entry * route);
+int route_is_ipv6(const struct route_entry * route);
+struct route_entry *route_enum_v4(struct route_entry * last);
+struct route_entry *route_enum_v6(struct route_entry * last);
+int route_output_v4(struct ifdev * ifdev, const ip4_addr_t * ipaddr,
+	err_t * err);
+int route_output_v6(struct ifdev * ifdev, const ip6_addr_t * ipaddr,
+	err_t * err);
+
+#endif /* !MINIX_NET_LWIP_ROUTE_H */
--- a/minix/net/lwip/rtsock.c
+++ b/minix/net/lwip/rtsock.c
--- a/minix/net/lwip/rtsock.h
+++ b/minix/net/lwip/rtsock.h
@ -0,0 +1,32 @@
+#ifndef MINIX_NET_LWIP_RTSOCK_H
+#define MINIX_NET_LWIP_RTSOCK_H
+
+#include "ifaddr.h"
+#include "lldata.h"
+
+struct route_entry;
+struct rtsock_request;
+
+void rtsock_init(void);
+sockid_t rtsock_socket(int type, int protocol, struct sock ** sock,
+	const struct sockevent_ops ** ops);
+
+void rtsock_msg_ifannounce(struct ifdev * ifdev, int arrival);
+void rtsock_msg_ifinfo(struct ifdev * ifdev);
+
+void rtsock_msg_addr_dl(struct ifdev * ifdev, unsigned int type,
+	ifaddr_dl_num_t num);
+void rtsock_msg_addr_v4(struct ifdev * ifdev, unsigned int type,
+	ifaddr_v4_num_t num);
+void rtsock_msg_addr_v6(struct ifdev * ifdev, unsigned int type,
+	ifaddr_v6_num_t num);
+
+void rtsock_msg_miss(const struct sockaddr * addr);
+void rtsock_msg_route(const struct route_entry * route, unsigned int type,
+	const struct rtsock_request * rtr);
+void rtsock_msg_arp(lldata_arp_num_t num, unsigned int type,
+	const struct rtsock_request * rtr);
+void rtsock_msg_ndp(lldata_ndp_num_t num, unsigned int type,
+	const struct rtsock_request * rtr);
+
+#endif /* !MINIX_NET_LWIP_RTSOCK_H */
--- a/minix/net/lwip/rttree.c
+++ b/minix/net/lwip/rttree.c
@ -0,0 +1,744 @@
+/* LWIP service - rttree.c - generic routing tree data structure */
+/*
+ * This module implements the Net/3 binary radix (Patricia) tree as described
+ * in TCP/IP Illustrated Vol.2, with a few important changes.  First and
+ * foremost, we make the assumption that all address masks are "normal", i.e.,
+ * they can be expressed in terms of a "prefix length" or "bit count", meaning
+ * that the first so many bits of the mask are set and the remaining bits are
+ * all clear.  Based on this assumption, we store routing entries not just in
+ * leaf nodes, but rather in a node at the bit count of the routing entry's
+ * mask; this node may then also have children.  As a result, instead of "leaf"
+ * and "internal" nodes, this module instead uses "data" and "link" nodes:
+ *
+ * - Data nodes are nodes with an associated routing entry.  The data node
+ *   structure is always the first field of its corresponding routing entry
+ *   structure.  Data nodes may have zero, one, or two children.  Its children
+ *   are always a refinement of the address mask in the routing entry.
+ * - Link nodes are nodes with no associated routing entry.  They always have
+ *   exactly two children.  As with BSD's "internal" nodes: since the tree
+ *   needs no more than one link node per routing entry, each routing entry
+ *   structure contains a link node, which may be used anywhere in the tree.
+ *
+ * The result of this approach is that we do not use a linked list for each
+ * leaf, since entries with the same address and different masks are not stored
+ * as part of the same leaf node.  There is however still one case where a
+ * linked list would be necessary: the coexistence of a full-mask network entry
+ * and a host entry (net/32 vs host for IPv4, net/128 vs host for IPv6).  Since
+ * this tree implementation is not used for ARP/ND6 (host) entries, the need to
+ * support that case is not as high, and so it is currently not supported.  It
+ * can be added later if needed.  In that case, the prototype of only
+ * rttree_find_exact() will have to be changed, since rttree_add() already
+ * supports the difference by passing a full mask vs passing no mask at all.
+ *
+ * There are other differences with the BSD implementation, and certainly also
+ * more opportunities for improving performance.  For now, the implementation
+ * should be good enough for its intended purpose.
+ */
+
+#include "lwip.h"
+#include "rttree.h"
+
+#define RTTREE_BITS_TO_BYTE(bits)	((bits) >> 3)
+#define RTTREE_BITS_TO_SHIFT(bits)	(7 - ((bits) & 7))
+#define RTTREE_BITS_TO_BYTES(bits)	(RTTREE_BITS_TO_BYTE((bits) + 7))
+
+/*
+ * The given node is being added to the given routing tree, and just had its
+ * bit count assigned.  Precompute any additional fields used for fast address
+ * access on the node.
+ */
+static void
+rttree_precompute(struct rttree * tree __unused, struct rttree_node * node)
+{
+
+	node->rtn_byte = RTTREE_BITS_TO_BYTE(node->rtn_bits);
+	node->rtn_shift = RTTREE_BITS_TO_SHIFT(node->rtn_bits);
+}
+
+/*
+ * For an operation on the routing tree 'tree', test whether the bit 'bit' is
+ * set or clear in 'addr'.  Return 1 if the address has the bit set, 0 if it
+ * does not.
+ */
+static unsigned int
+rttree_test(const struct rttree * tree __unused, const void * addr,
+	unsigned int bit)
+{
+	unsigned int byte, shift;
+
+	byte = RTTREE_BITS_TO_BYTE(bit);
+	shift = RTTREE_BITS_TO_SHIFT(bit);
+
+	return (((const uint8_t *)addr)[byte] >> shift) & 1;
+}
+
+/*
+ * For an operation on the routing tree 'tree', test whether a particular bit
+ * as identified by the routing node 'node' is set or clear in 'address',
+ * effectively computing the side (left or right) to take when descending down
+ * the tree.  Return 1 if the address has the bit set, 0 if it does not.
+ */
+static inline unsigned int
+rttree_side(const struct rttree * tree, const struct rttree_node * node,
+	const void * addr)
+{
+
+	return (((const uint8_t *)addr)[node->rtn_byte] >>
+	    node->rtn_shift) & 1;
+}
+
+/*
+ * Check for the routing tree 'tree' whether the routing entry 'entry' matches
+ * the address 'addr' exactly.  Return TRUE or FALSE depending on the outcome.
+ * This function must be called only on entries that have already been
+ * determined to span the full bit width.
+ */
+static inline int
+rttree_equals(const struct rttree * tree, const struct rttree_entry * entry,
+	const void * addr)
+{
+	unsigned int bits;
+
+	bits = tree->rtt_bits;
+
+	assert(bits == entry->rte_data.rtn_bits);
+
+	return !memcmp(entry->rte_addr, addr, RTTREE_BITS_TO_BYTE(bits));
+}
+
+/*
+ * Check for the routing tree 'tree' whether the routing entry 'entry' matches
+ * the address 'addr'.  Return TRUE if the address is matched by the entry's
+ * address and mask, or FALSE if not.
+ */
+static inline int
+rttree_match(const struct rttree * tree, const struct rttree_entry * entry,
+	const void * addr)
+{
+	const uint8_t *aptr, *aptr2, *mptr;
+	unsigned int bits, bytes;
+
+	if ((bits = entry->rte_data.rtn_bits) == 0)
+		return TRUE;
+
+	if ((mptr = (const uint8_t *)entry->rte_mask) == NULL)
+		return rttree_equals(tree, entry, addr);
+
+	aptr = (const uint8_t *)addr;
+	aptr2 = (const uint8_t *)entry->rte_addr;
+
+	for (bytes = RTTREE_BITS_TO_BYTES(bits); bytes > 0; bytes--) {
+		if ((*aptr & *mptr) != *aptr2)
+			return FALSE;
+
+		aptr++;
+		aptr2++;
+		mptr++;
+	}
+
+	return TRUE;
+}
+
+/*
+ * Find the first bit that differs between the two given addresses.  Return the
+ * bit number if found, or the full bit width if the addresses are equal.
+ */
+static unsigned int
+rttree_diff(const struct rttree * tree, const void * addr, const void * addr2)
+{
+	const uint8_t *aptr, *aptr2;
+	unsigned int bit, i;
+	uint8_t b;
+
+	aptr = (const uint8_t *)addr;
+	aptr2 = (const uint8_t *)addr2;
+
+	for (bit = 0; bit < tree->rtt_bits; bit += NBBY, aptr++, aptr2++) {
+		if ((b = *aptr ^ *aptr2) != 0) {
+			for (i = 0; i < NBBY; i++)
+				if (b & (1 << (NBBY - i - 1)))
+					break;
+			return bit + i;
+		}
+	}
+
+	return bit;
+}
+
+/*
+ * Add a link node to the free list of the given routing tree, marking it as
+ * free in the process.
+ */
+static void
+rttree_add_free(struct rttree * tree, struct rttree_node * node)
+{
+
+	node->rtn_child[0] = NULL;
+	if ((node->rtn_child[1] = tree->rtt_free) != NULL)
+		node->rtn_child[1]->rtn_child[0] = node;
+	tree->rtt_free = node;
+	node->rtn_parent = NULL;
+	node->rtn_type = RTNT_FREE;
+}
+
+/*
+ * Remove the given free link node from the free list.  The caller must already
+ * have verified that the node is on the free list, and has to change the node
+ * type as appropriate afterward.
+ */
+static void
+rttree_del_free(struct rttree * tree, struct rttree_node * node)
+{
+
+	assert(node->rtn_type == RTNT_FREE);
+
+	if (node->rtn_child[0] != NULL)
+		node->rtn_child[0]->rtn_child[1] = node->rtn_child[1];
+	else
+		tree->rtt_free = node->rtn_child[1];
+	if (node->rtn_child[1] != NULL)
+		node->rtn_child[1]->rtn_child[0] = node->rtn_child[0];
+}
+
+/*
+ * Obtain, remove, and return a free link node from the free list.  This
+ * function must be called only when it is already known that the free list is
+ * not empty.  The caller has to change the node type as appropriate afterward.
+ */
+static struct rttree_node *
+rttree_get_free(struct rttree * tree)
+{
+	struct rttree_node * node;
+
+	node = tree->rtt_free;
+	assert(node != NULL);
+	assert(node->rtn_type == RTNT_FREE);
+
+	rttree_del_free(tree, node);
+
+	return node;
+}
+
+/*
+ * Initialize the given routing tree, with the given address bit width.
+ */
+void
+rttree_init(struct rttree * tree, unsigned int bits)
+{
+
+	tree->rtt_root = NULL;
+	tree->rtt_free = NULL;
+	tree->rtt_bits = bits;
+}
+
+/*
+ * Look up the most narrow routing tree entry that matches the given address.
+ * Return the entry on success, or NULL if no matching entry is found.
+ */
+struct rttree_entry *
+rttree_lookup_match(struct rttree * tree, const void * addr)
+{
+	struct rttree_entry *entry, *best;
+	struct rttree_node *node;
+	unsigned int side;
+
+	/*
+	 * The current implementation is "forward-tracking", testing all
+	 * potentially matching entries while descending into the tree and
+	 * remembering the "best" (narrowest matching) entry.  The assumption
+	 * here is that most lookups will end up returning the default route or
+	 * another broad route, and thus quickly fail a narrower match and bail
+	 * out early.  This assumption is in part motivated by the fact that
+	 * our routing trees do not store link-layer (ARP/ND6) entries.  If
+	 * desired, the implementation can easily be rewritten to do
+	 * backtracking instead.
+	 */
+	best = NULL;
+
+	for (node = tree->rtt_root; node != NULL;
+	    node = node->rtn_child[side]) {
+		if (node->rtn_type == RTNT_DATA) {
+			entry = (struct rttree_entry *)node;
+
+			if (!rttree_match(tree, entry, addr))
+				break;
+
+			best = entry;
+		}
+
+		side = rttree_side(tree, node, addr);
+	}
+
+	return best;
+}
+
+/*
+ * Look up a routing entry that is an exact match for the given (full) address.
+ * Return the entry if it was found, or NULL otherwise.
+ */
+struct rttree_entry *
+rttree_lookup_host(struct rttree * tree, const void * addr)
+{
+	struct rttree_entry *entry;
+	struct rttree_node *node;
+	unsigned int side;
+
+	for (node = tree->rtt_root; node != NULL;
+	    node = node->rtn_child[side]) {
+		if (node->rtn_type == RTNT_DATA &&
+		    node->rtn_bits == tree->rtt_bits) {
+			entry = (struct rttree_entry *)node;
+
+			if (rttree_equals(tree, entry, addr))
+				return entry;
+
+			break;
+		}
+
+		side = rttree_side(tree, node, addr);
+	}
+
+	return NULL;
+}
+
+/*
+ * Look up a routing entry that is an exact match for the given address and
+ * prefix length.  Return the entry if found, or NULL otherwise.
+ */
+struct rttree_entry *
+rttree_lookup_exact(struct rttree * tree, const void * addr,
+	unsigned int prefix)
+{
+	struct rttree_entry *entry;
+	struct rttree_node *node;
+	unsigned int side;
+
+	for (node = tree->rtt_root; node != NULL && node->rtn_bits <= prefix;
+	    node = node->rtn_child[side]) {
+		if (node->rtn_type == RTNT_DATA) {
+			entry = (struct rttree_entry *)node;
+
+			if (!rttree_match(tree, entry, addr))
+				return NULL;
+
+			if (node->rtn_bits == prefix)
+				return entry;
+		}
+
+		side = rttree_side(tree, node, addr);
+	}
+
+	return NULL;
+}
+
+/*
+ * Enumerate entries in the routing tree.  If 'last' is NULL, return the first
+ * entry.  Otherwise, return the next entry starting from 'last'.  In both
+ * cases, if no (more) entries are present in the tree, return NULL.  The order
+ * of the returned entries is stable across tree modifications and the function
+ * may be called multiple times on the same entry.  More specifically, it is
+ * safe to continue enumeration from a previous entry after deleting its
+ * successor from the tree.
+ */
+struct rttree_entry *
+rttree_enum(struct rttree * tree, struct rttree_entry * last)
+{
+	struct rttree_node *node, *parent;
+
+	/*
+	 * For the first query, we may have to return the tree root right away.
+	 * For subsequent queries, we have to move ahead by at least one node.
+	 */
+	if (last == NULL) {
+		if ((node = tree->rtt_root) == NULL)
+			return NULL;
+
+		if (node->rtn_type == RTNT_DATA)
+			return (struct rttree_entry *)node;
+	} else
+		node = &last->rte_data;
+
+	/* A basic iterative pre-order binary-tree depth-first search. */
+	do {
+		assert(node != NULL);
+
+		/* Can we descend further, either left or right? */
+		if (node->rtn_child[0] != NULL)
+			node = node->rtn_child[0];
+		else if (node->rtn_child[1] != NULL)
+			node = node->rtn_child[1];
+		else {
+			/*
+			 * No.  Go back up the tree, until we can go right
+			 * where we went left before.. or run out of tree.
+			 */
+			for (;; node = parent) {
+				if ((parent = node->rtn_parent) == NULL)
+					return NULL;
+
+				if (parent->rtn_child[0] == node &&
+				    parent->rtn_child[1] != NULL) {
+					node = parent->rtn_child[1];
+
+					break;
+				}
+			}
+		}
+
+		/* Skip link nodes. */
+	} while (node->rtn_type != RTNT_DATA);
+
+	return (struct rttree_entry *)node;
+}
+
+/*
+ * Set the node 'node' to be part of tree 'tree', with type 'type' (either
+ * RTNT_DATA or RTNT_LINK) and a bit count of 'prefix'.  The node is set to be
+ * a child of 'parent' on side 'side', unless 'parent' is NULL in which case
+ * the node is set to be the topmost node in the tree (and 'side' is ignored).
+ * The node's children are set to 'left' and 'right'; for each, if not NULL,
+ * its parent is set to 'node'.
+ */
+static void
+rttree_set(struct rttree * tree, struct rttree_node * node, int type,
+	unsigned int prefix, struct rttree_node * parent, int side,
+	struct rttree_node * left, struct rttree_node * right)
+{
+
+	assert(type == RTNT_DATA || type == RTNT_LINK);
+	assert(prefix <= tree->rtt_bits);
+	assert(side == 0 || side == 1);
+
+	node->rtn_type = type;
+	node->rtn_bits = prefix;
+
+	/* With rtn_bits assigned, precompute any derived fields. */
+	rttree_precompute(tree, node);
+
+	if ((node->rtn_parent = parent) != NULL)
+		parent->rtn_child[side] = node;
+	else
+		tree->rtt_root = node;
+
+	if ((node->rtn_child[0] = left) != NULL)
+		left->rtn_parent = node;
+	if ((node->rtn_child[1] = right) != NULL)
+		right->rtn_parent = node;
+}
+
+/*
+ * In the routing tree 'tree', replace old node 'onode' with new node 'node',
+ * setting the type of the latter to 'type'.  The tree is updated accordingly,
+ * but it is left up to the caller to deal with the old node as appropriate.
+ */
+static void
+rttree_replace(struct rttree * tree, struct rttree_node * onode,
+	struct rttree_node * node, int type)
+{
+	struct rttree_node *parent;
+	unsigned int side;
+
+	/*
+	 * Replacing one data node with another data node is not something that
+	 * is currently being done, even if it would work.
+	 */
+	assert(onode->rtn_type != RTNT_DATA || node->rtn_type != RTNT_DATA);
+	assert(onode->rtn_child[0] != NULL);
+	assert(onode->rtn_child[1] != NULL);
+
+	parent = onode->rtn_parent;
+
+	side = (parent != NULL && parent->rtn_child[1] == onode);
+
+	rttree_set(tree, node, type, onode->rtn_bits, parent, side,
+	    onode->rtn_child[0], onode->rtn_child[1]);
+}
+
+/*
+ * Add a new routing entry 'entry' to the routing tree 'tree'.  The entry
+ * object will be initialized as a result.  The address to add is given as
+ * 'addr', and the address mask as 'mask'.  Both those pointers must be point
+ * to memory that is as long-lived as the routing entry; this is typically
+ * accomplished by storing them in a larger object that embeds 'entry'.
+ * However, 'mask' may be NULL, signifying a host type entry with an implied
+ * full mask.  If not NULL, the given mask must be normalized, i.e., it must
+ * consist of a run of zero or more 1-bits followed by a remainder of only
+ * 0-bits.  The number of 1-bits must also be given as a bit count 'prefix',
+ * even if 'mask' is NULL.  The address must be normalized to its mask: no bits
+ * starting from bit 'prefix' must be set in 'addr'.  Return OK if adding the
+ * routing entry succeeded, or EEXIST if an entry already exists for the
+ * combination of that address and mask.  If the caller has already verified
+ * with rttree_lookup_exact() that no such entry exists, the call will succeed.
+ */
+int
+rttree_add(struct rttree * tree, struct rttree_entry * entry,
+	const void * addr, const void * mask, unsigned int prefix)
+{
+	struct rttree_node *node, *parent, *link;
+	struct rttree_entry *other_entry;
+	unsigned int bit, side, side2;
+	int match;
+
+	assert(mask != NULL || prefix == tree->rtt_bits);
+
+	/*
+	 * We start by determining the path, bit count, and method of the
+	 * addition.  We do this with a lookup on the address, for the full
+	 * address width--that is, not limited to the given prefix length.  As
+	 * a result, at some point we will find either a NULL pointer, or a
+	 * data node with a width that is at least as large as the given prefix
+	 * length.  The NULL case is easy: we EXTEND the tree with our new
+	 * entry wherever we ran into the NULL pointer.
+	 *
+	 * If instead we find a sufficiently wide data node, then we see if it
+	 * is a match for the new address.  If so, our new data node should
+	 * either be INSERTed between two nodes along the path taken so far, or
+	 * REPLACE a link node along that path with the new data node.  If it
+	 * it is not a match, then the action to take depends on whether the
+	 * first differing bit falls within the given prefix length: if so, we
+	 * have to BRANCH along the path, using a link node allocated for that
+	 * differing bit; if not, we should use INSERT or REPLACE after all.
+	 *
+	 * As the only exceptional case, we might in fact find an entry for the
+	 * exact same address and prefix length as what is being added.  In the
+	 * current design of the routing tree, this is always a failure case.
+	 */
+	parent = NULL;
+	side = 0;
+	other_entry = NULL;
+
+	for (node = tree->rtt_root; node != NULL;
+	    node = node->rtn_child[side]) {
+		if (node->rtn_type == RTNT_DATA) {
+			other_entry = (struct rttree_entry *)node;
+
+			bit = rttree_diff(tree, other_entry->rte_addr, addr);
+
+			match = (bit >= node->rtn_bits);
+
+			/* Test whether the exact entry already exists. */
+			if (match && node->rtn_bits == prefix)
+				return EEXIST;
+
+			/*
+			 * Test the INSERT/REPLACE and BRANCH cases.  Note that
+			 * this condition is in a terse, optimized form that
+			 * does not map directly to the two different cases.
+			 */
+			if (!match || node->rtn_bits > prefix) {
+				if (bit > prefix)
+					bit = prefix;
+				break;
+			}
+		}
+
+		parent = node;
+		side = rttree_side(tree, node, addr);
+	}
+
+	/*
+	 * At this point, addition is going to succeed no matter what.  Start
+	 * by initializing part of 'entry'.  In particular, add the given
+	 * entry's link node to the list of free link nodes, because the common
+	 * case is that we end up not using it.  If we do, we will just take it
+	 * off again right away.  The entry's data node will be initialized as
+	 * part of the addition process below.
+	 */
+	entry->rte_addr = addr;
+	entry->rte_mask = mask;
+
+	rttree_add_free(tree, &entry->rte_link);
+
+	/*
+	 * First deal with the EXTEND case.  In that case we already know the
+	 * intended parent and the side (left/right) for the addition.
+	 */
+	if (node == NULL) {
+		assert(parent == NULL || parent->rtn_bits < prefix);
+		assert(parent == NULL || parent->rtn_child[side] == NULL);
+
+		rttree_set(tree, &entry->rte_data, RTNT_DATA, prefix, parent,
+		    side, NULL /*left*/, NULL /*right*/);
+
+		return OK;
+	}
+
+	/*
+	 * For the other three cases, we now have to walk back along the path
+	 * we have taken so far in order to find the correct insertion point.
+	 */
+	while (parent != NULL && parent->rtn_bits >= bit) {
+		node = parent;
+
+		parent = node->rtn_parent;
+	}
+
+	if (bit == prefix && node->rtn_bits == bit) {
+		/*
+		 * The REPLACE case.  Replace the link node 'node' with our new
+		 * entry.  Afterwards, mark the link node as free.
+		 */
+		assert(node->rtn_type != RTNT_DATA);
+
+		rttree_replace(tree, node, &entry->rte_data, RTNT_DATA);
+
+		rttree_add_free(tree, node);
+	} else if (bit == prefix) {
+		/*
+		 * The INSERT case.  Insert the data node between 'parent' and
+		 * 'node'.  Note that 'parent' may be NULL.  We need to use the
+		 * address we found earlier, as 'other_entry', to determine
+		 * whether we should add 'node' to the left or right of the
+		 * inserted data node.
+		 */
+		assert(node->rtn_bits > bit);
+		assert(parent == NULL || parent->rtn_bits < bit);
+		assert(other_entry != NULL);
+
+		side = (parent != NULL && parent->rtn_child[1] == node);
+
+		side2 = rttree_test(tree, other_entry->rte_addr, bit);
+
+		rttree_set(tree, &entry->rte_data, RTNT_DATA, prefix, parent,
+		    side, (!side2) ? node : NULL, (side2) ? node : NULL);
+	} else {
+		/*
+		 * The BRANCH case.  In this case, it is impossible that we
+		 * find a link node with a bit count equal to the first
+		 * differing bit between the address we found and the address
+		 * we want to insert: if such a node existed, we would have
+		 * descended down its other child during the initial lookup.
+		 *
+		 * Interpose a link node between 'parent' and 'current' for bit
+		 * 'bit', with its other child set to point to 'entry'.  Again,
+		 * we need to perform an additional bit test here, because even
+		 * though we know that the address we found during the lookup
+		 * differs from the given address at bit 'bit', we do not know
+		 * the value of either bit yet.
+		 */
+		assert(bit < prefix);
+		assert(node->rtn_bits > bit);
+		assert(parent == NULL || parent->rtn_bits < bit);
+
+		link = rttree_get_free(tree);
+
+		side = (parent != NULL && parent->rtn_child[1] == node);
+
+		side2 = rttree_test(tree, addr, bit);
+
+		/* Use NULL for the data node we are about to add. */
+		rttree_set(tree, link, RTNT_LINK, bit, parent, side,
+		    (side2) ? node : NULL, (!side2) ? node : NULL);
+
+		/* This addition will replace the NULL pointer again. */
+		rttree_set(tree, &entry->rte_data, RTNT_DATA, prefix, link,
+		    side2, NULL /*left*/, NULL /*right*/);
+	}
+
+	return OK;
+}
+
+/*
+ * Remove a particular node 'node' from the routing tree 'tree'.  The given
+ * node must have zero or one children.  As integrity check only, if 'nonempty'
+ * is set, the node must have one child.  If the node has one child, that child
+ * will be linked to the node's parent (or the tree root), thus cutting the
+ * node itself out of the tree.  If the node has zero children, the
+ * corresponding slot in its parent (or the tree root) will be cleared.  The
+ * function will return a pointer to the parent node if it too qualifies for
+ * removal afterwards, or NULL if no further removal action needs to be taken.
+ */
+static struct rttree_node *
+rttree_remove(struct rttree * tree, struct rttree_node * node,
+	int nonempty __unused)
+{
+	struct rttree_node *parent, *child;
+	unsigned int side;
+
+	if ((child = node->rtn_child[0]) == NULL)
+		child = node->rtn_child[1];
+
+	assert(child != NULL || !nonempty);
+
+	if ((parent = node->rtn_parent) != NULL) {
+		side = (parent->rtn_child[1] == node);
+
+		parent->rtn_child[side] = child;
+
+		if (child != NULL)
+			child->rtn_parent = parent;
+		else if (parent->rtn_type == RTNT_LINK)
+			return parent;
+	} else {
+		tree->rtt_root = child;
+
+		if (child != NULL)
+			child->rtn_parent = NULL;
+	}
+
+	return NULL;
+}
+
+/*
+ * Delete the routing entry 'entry' from the routing tree 'tree'.  The entry
+ * must have been added before.  This function always succeeds.
+ */
+void
+rttree_delete(struct rttree * tree, struct rttree_entry * entry)
+{
+	struct rttree_node *node, *link;
+
+	/*
+	 * Remove the data node from the tree.  If the data node also has two
+	 * children, we have to replace it with a link node.  Otherwise, we
+	 * have to remove it and, if it has no children at all, possibly remove
+	 * its parent as well.
+	 */
+	node = &entry->rte_data;
+
+	assert(node->rtn_type == RTNT_DATA);
+
+	if (node->rtn_child[0] != NULL && node->rtn_child[1] != NULL) {
+		/*
+		 * The link node we allocate here may actually be the entry's
+		 * own link node.  We do not make an exception for that case
+		 * here, as we have to deal with the entry's link node being in
+		 * use a bit further down anyway.
+		 */
+		link = rttree_get_free(tree);
+
+		rttree_replace(tree, node, link, RTNT_LINK);
+	} else {
+		/*
+		 * Remove the data node from the tree.  If the node has no
+		 * children, its removal may leave a link node with one child.
+		 * That would be its original parent.  That node must then also
+		 * be removed from the tree, and freed up.
+		 */
+		link = rttree_remove(tree, node, FALSE /*nonempty*/);
+
+		if (link != NULL) {
+			(void)rttree_remove(tree, link, TRUE /*nonempty*/);
+
+			rttree_add_free(tree, link);
+		}
+	}
+
+	/*
+	 * Remove the entry's link node from either the tree or the free list,
+	 * depending on the type currently assigned to it.  If it has to be
+	 * removed from the tree, it must be replaced with another link node.
+	 * There will always be enough link nodes available for this to work.
+	 */
+	node = &entry->rte_link;
+
+	if (node->rtn_type == RTNT_LINK) {
+		link = rttree_get_free(tree);
+
+		rttree_replace(tree, node, link, RTNT_LINK);
+	} else {
+		assert(node->rtn_type == RTNT_FREE);
+
+		rttree_del_free(tree, node);
+	}
+}
--- a/minix/net/lwip/rttree.h
+++ b/minix/net/lwip/rttree.h
@ -0,0 +1,50 @@
+#ifndef MINIX_NET_LWIP_RTTREE_H
+#define MINIX_NET_LWIP_RTTREE_H
+
+/* Routing table node structure. */
+struct rttree_node {
+	struct rttree_node *rtn_child[2];	/* left child node */
+	struct rttree_node *rtn_parent;		/* parent node */
+	uint8_t rtn_type;			/* node type (RNT_) */
+	uint8_t rtn_bits;			/* prefix bit count */
+	uint8_t rtn_byte;			/* bits-derived byte index */
+	uint8_t rtn_shift;			/* bits-derived shift count */
+};
+
+#define RTNT_DATA	0			/* data node (entry) */
+#define RTNT_LINK	1			/* link node, in use */
+#define RTNT_FREE	2			/* link node, free */
+
+/* Routing table entry structure. */
+struct rttree_entry {
+	struct rttree_node rte_data;		/* data node - MUST be first */
+	struct rttree_node rte_link;		/* link node */
+	const void *rte_addr;			/* pointer to address */
+	const void *rte_mask;			/* pointer to mask */
+};
+
+/* Routing table structure. */
+struct rttree {
+	struct rttree_node *rtt_root;		/* root of the route tree */
+	struct rttree_node *rtt_free;		/* free internal nodes list */
+	uint8_t rtt_bits;			/* number of bits in address */
+};
+
+#define rttree_get_addr(entry)		((entry)->rte_addr)
+#define rttree_get_mask(entry)		((entry)->rte_mask)
+#define rttree_get_prefix(entry)	((entry)->rte_data.rtn_bits)
+
+void rttree_init(struct rttree * tree, unsigned int bits);
+struct rttree_entry *rttree_lookup_match(struct rttree * tree,
+	const void * addr);
+struct rttree_entry *rttree_lookup_host(struct rttree * tree,
+	const void * addr);
+struct rttree_entry *rttree_lookup_exact(struct rttree * tree,
+	const void * addr, unsigned int prefix);
+struct rttree_entry *rttree_enum(struct rttree * tree,
+	struct rttree_entry * entry);
+int rttree_add(struct rttree * tree, struct rttree_entry * entry,
+	const void * addr, const void * mask, unsigned int prefix);
+void rttree_delete(struct rttree * tree, struct rttree_entry * entry);
+
+#endif /* !MINIX_NET_LWIP_RTTREE_H */
--- a/minix/net/lwip/tcpisn.c
+++ b/minix/net/lwip/tcpisn.c
@ -0,0 +1,203 @@
+/* LWIP service - tcpisn.c - TCP Initial Sequence Number generation */
+/*
+ * This module implements the TCP ISN algorithm standardized in RFC 6528.  It
+ * currently uses the current time, at clock tick granularity, as source for
+ * the 4-microsecond timer, and SHA256 as the hashing algorithm.  As part of
+ * the input to the hash function, we use an "ISN secret" that can be set
+ * through the (hidden, root-only) net.inet.tcp.isn_secret sysctl(7) node.
+ * Ideally, the secret should remain the same across system reboots; it is left
+ * up to userland to take care of that.
+ *
+ * TODO: while this module provides the strongest possible implementation of
+ * the algorithm, it is also quite heavyweight.  We should consider allowing
+ * for a more configurable level of strength, perhaps with the possibility for
+ * less powerful platforms to revert to simple use of a random number.
+ */
+
+#include "lwip.h"
+#include "tcpisn.h"
+
+#include <sys/sha2.h>
+
+/*
+ * The TCP ISN hash input consists of the TCP 4-tuple of the new connection and
+ * a static secret.  The 4-tuple consists of two IP addresses, at most 16 bytes
+ * (128 bits, for IPv6) each, and two port numbers, two bytes (16 bits) each.
+ * We use the SHA256 input block size of 64 bytes to avoid copying, so that
+ * leaves us with 28 bytes of room for the static secret.  We use 16 bytes, and
+ * leave the rest blank.  As a sidenote, while hardcoding sizes is not nice, we
+ * really need to get the layout exactly right in this case.
+ */
+#define TCPISN_TUPLE_LENGTH	(16 * 2 + 2 * 2)
+
+#if TCPISN_SECRET_LENGTH > (SHA256_BLOCK_LENGTH - TCPISN_TUPLE_LENGTH)
+#error "TCP ISN secret length exceeds remainder of hash block"
+#endif
+
+/* We are using memchr() on this, so do not remove the '32' size here! */
+static const uint8_t tcpisn_hextab[32] = "0123456789abcdef0123456789ABCDEF";
+
+static uint8_t tcpisn_input[SHA256_BLOCK_LENGTH] __aligned(4);
+
+static int tcpisn_set;
+
+/*
+ * Initialize the TCP ISN module.
+ */
+void
+tcpisn_init(void)
+{
+	time_t boottime;
+
+	/*
+	 * Part of the input to the hash function is kept as is between calls
+	 * to the TCP ISN hook.  In particular, we zero the entire input here,
+	 * so that the padding is zero.  We also zero the area where the secret
+	 * will be stored, but we put in the system boot time as a last effort
+	 * to try to create at least some minimal amount of unpredictability.
+	 * The boot time is by no means sufficient though, so issue a warning
+	 * if a TCP ISN is requested before an actual secret is set.  Note that
+	 * an actual secret will overwrite the boot time based pseudo-secret.
+	 */
+	memset(tcpisn_input, 0, sizeof(tcpisn_input));
+
+	(void)getuptime(NULL, NULL, &boottime);
+	memcpy(&tcpisn_input[TCPISN_TUPLE_LENGTH], &boottime,
+	    sizeof(boottime));
+
+	tcpisn_set = FALSE;
+}
+
+/*
+ * Set and/or retrieve the ISN secret.  In order to allow the hash value to be
+ * set from the command line, this sysctl(7) node is a hex-encoded string.
+ */
+ssize_t
+tcpisn_secret(struct rmib_call * call __unused,
+	struct rmib_node * node __unused, struct rmib_oldp * oldp,
+	struct rmib_newp * newp)
+{
+	uint8_t secret[TCPISN_SECRET_HEX_LENGTH], byte, *p;
+	unsigned int i;
+	int r;
+
+	/* First copy out the old (current) ISN secret. */
+	if (oldp != NULL) {
+		for (i = 0; i < TCPISN_SECRET_LENGTH; i++) {
+			byte = tcpisn_input[TCPISN_TUPLE_LENGTH + i];
+			secret[i * 2] = tcpisn_hextab[byte >> 4];
+			secret[i * 2 + 1] = tcpisn_hextab[byte & 0xf];
+		}
+		secret[i * 2] = '\0';
+		assert(i * 2 + 1 == sizeof(secret));
+
+		if ((r = rmib_copyout(oldp, 0, secret, sizeof(secret))) < 0)
+			return r;
+	}
+
+	/*
+	 * Then copy in the new ISN secret.  We require the given string to be
+	 * exactly as large as we need.
+	 */
+	if (newp != NULL) {
+		/* Copy in the user-given string. */
+		if ((r = rmib_copyin(newp, secret, sizeof(secret))) != OK)
+			return r;
+		if (secret[i * 2] != '\0')
+			return EINVAL;
+
+		/* Hex-decode the given string (in place). */
+		for (i = 0; i < TCPISN_SECRET_LENGTH; i++) {
+			if ((p = memchr(tcpisn_hextab, secret[i * 2],
+			    sizeof(tcpisn_hextab))) == NULL)
+				return EINVAL;
+			secret[i] = ((uint8_t)(p - tcpisn_hextab) & 0xf) << 4;
+			if ((p = memchr(tcpisn_hextab, secret[i * 2 + 1],
+			    sizeof(tcpisn_hextab))) == NULL)
+				return EINVAL;
+			secret[i] |= (uint8_t)(p - tcpisn_hextab) & 0xf;
+		}
+
+		/* Once fully validated, switch to the new secret. */
+		memcpy(&tcpisn_input[TCPISN_TUPLE_LENGTH], secret,
+		    TCPISN_SECRET_LENGTH);
+
+		tcpisn_set = TRUE;
+	}
+
+	/* Return the length of the node. */
+	return sizeof(secret);
+}
+
+/*
+ * Hook to generate an Initial Sequence Number (ISN) for a new TCP connection.
+ */
+uint32_t
+lwip_hook_tcp_isn(const ip_addr_t * local_ip, uint16_t local_port,
+	const ip_addr_t * remote_ip, uint16_t remote_port)
+{
+	uint8_t output[SHA256_DIGEST_LENGTH] __aligned(4);
+	SHA256_CTX ctx;
+	clock_t realtime;
+	time_t boottime;
+	uint32_t isn;
+
+	if (!tcpisn_set) {
+		printf("LWIP: warning, no TCP ISN secret has been set\n");
+
+		tcpisn_set = TRUE;	/* print the warning only once */
+	}
+
+	if (IP_IS_V6(local_ip)) {
+		assert(IP_IS_V6(remote_ip));
+
+		memcpy(&tcpisn_input[0], &ip_2_ip6(local_ip)->addr, 16);
+		memcpy(&tcpisn_input[16], &ip_2_ip6(remote_ip)->addr, 16);
+	} else {
+		assert(IP_IS_V4(local_ip));
+		assert(IP_IS_V4(remote_ip));
+
+		/*
+		 * Store IPv4 addresses as IPv4-mapped IPv6 addresses, even
+		 * though lwIP will never give us an IPv4-mapped IPv6 address,
+		 * so as to ensure completely disjoint address spaces and thus
+		 * no potential abuse of IPv6 addresses in order to predict
+		 * ISNs for IPv4 connections.
+		 */
+		memset(&tcpisn_input[0], 0, 10);
+		tcpisn_input[10] = 0xff;
+		tcpisn_input[11] = 0xff;
+		memcpy(&tcpisn_input[12], &ip_2_ip4(local_ip)->addr, 4);
+		memset(&tcpisn_input[16], 0, 10);
+		tcpisn_input[26] = 0xff;
+		tcpisn_input[27] = 0xff;
+		memcpy(&tcpisn_input[28], &ip_2_ip4(local_ip)->addr, 4);
+	}
+
+	tcpisn_input[32] = local_port >> 8;
+	tcpisn_input[33] = local_port & 0xff;
+	tcpisn_input[34] = remote_port >> 8;
+	tcpisn_input[35] = remote_port & 0xff;
+
+	/* The rest of the input (secret and padding) is already filled in. */
+
+	SHA256_Init(&ctx); /* this call zeroes a buffer we don't use.. */
+	SHA256_Update(&ctx, tcpisn_input, sizeof(tcpisn_input));
+	SHA256_Final(output, &ctx);
+
+	/* Arbitrarily take the first 32 bits from the generated hash. */
+	memcpy(&isn, output, sizeof(isn));
+
+	/*
+	 * Add the current time in 4-microsecond units.  The time value should
+	 * be wall-clock accurate and stable even across system reboots and
+	 * downtime.  Do not precompute the boot time part: it may change.
+	 */
+	(void)getuptime(NULL, &realtime, &boottime);
+
+	isn += (uint32_t)boottime * 250000;
+	isn += (uint32_t)(((uint64_t)realtime * 250000) / sys_hz());
+
+	/* The result is the ISN to use for this connection. */
+	return isn;
+}
--- a/minix/net/lwip/tcpisn.h
+++ b/minix/net/lwip/tcpisn.h
@ -0,0 +1,20 @@
+#ifndef MINIX_NET_LWIP_TCPISN_H
+#define MINIX_NET_LWIP_TCPISN_H
+
+/*
+ * Length, in bytes, of the secret (random seed) that is used as part of the
+ * input to the hashing function that generates TCP Initial Sequence Numbers.
+ */
+#define TCPISN_SECRET_LENGTH		16
+
+/*
+ * Size of the hexadecimal-string representation of the secret, including
+ * trailing null terminator.
+ */
+#define TCPISN_SECRET_HEX_LENGTH	(TCPISN_SECRET_LENGTH * 2 + 1)
+
+void tcpisn_init(void);
+ssize_t tcpisn_secret(struct rmib_call * call, struct rmib_node * node,
+	struct rmib_oldp * oldp, struct rmib_newp * newp);
+
+#endif /* !MINIX_NET_LWIP_TCPISN_H */
--- a/minix/net/lwip/tcpsock.c
+++ b/minix/net/lwip/tcpsock.c
--- a/minix/net/lwip/udpsock.c
+++ b/minix/net/lwip/udpsock.c
@ -0,0 +1,997 @@
+/* LWIP service - udpsock.c - UDP sockets */
+
+#include "lwip.h"
+#include "ifaddr.h"
+#include "pktsock.h"
+
+#include "lwip/udp.h"
+
+#include <netinet/udp.h>
+#include <netinet/ip_var.h>
+#include <netinet/udp_var.h>
+
+/* The number of UDP sockets.  Inherited from the lwIP configuration. */
+#define NR_UDPSOCK	MEMP_NUM_UDP_PCB
+
+/*
+ * Outgoing packets are not getting buffered, so the send buffer size simply
+ * determines the maximum size for sent packets.  The send buffer maximum is
+ * therefore limited to the maximum size of a single packet (64K-1 bytes),
+ * which is already enforced by lwIP's 16-bit length parameter to pbuf_alloc().
+ *
+ * The actual transmission may enforce a lower limit, though.  The full packet
+ * size must not exceed the same 64K-1 limit, and that includes any headers
+ * that still have to be prepended to the given packet.  The size of those
+ * headers depends on the socket type (IPv4/IPv6) and the IP_HDRINCL setting.
+ */
+#define UDP_MAX_PAYLOAD	(UINT16_MAX)
+
+#define UDP_SNDBUF_MIN	1		/* minimum UDP send buffer size */
+#define UDP_SNDBUF_DEF	8192		/* default UDP send buffer size */
+#define UDP_SNDBUF_MAX	UDP_MAX_PAYLOAD	/* maximum UDP send buffer size */
+#define UDP_RCVBUF_MIN	MEMPOOL_BUFSIZE	/* minimum UDP receive buffer size */
+#define UDP_RCVBUF_DEF	32768		/* default UDP receive buffer size */
+#define UDP_RCVBUF_MAX	65536		/* maximum UDP receive buffer size */
+
+static struct udpsock {
+	struct pktsock udp_pktsock;		/* pkt socket, MUST be first */
+	struct udp_pcb *udp_pcb;		/* lwIP UDP control block */
+	SIMPLEQ_ENTRY(udpsock) udp_next;	/* next in free list */
+} udp_array[NR_UDPSOCK];
+
+static SIMPLEQ_HEAD(, udpsock) udp_freelist;	/* list of free UDP sockets */
+
+static const struct sockevent_ops udpsock_ops;
+
+#define udpsock_get_sock(udp)	(ipsock_get_sock(udpsock_get_ipsock(udp)))
+#define udpsock_get_ipsock(udp)	(pktsock_get_ipsock(&(udp)->udp_pktsock))
+#define udpsock_is_ipv6(udp)	(ipsock_is_ipv6(udpsock_get_ipsock(udp)))
+#define udpsock_is_conn(udp)	\
+	(udp_flags((udp)->udp_pcb) & UDP_FLAGS_CONNECTED)
+
+static ssize_t udpsock_pcblist(struct rmib_call *, struct rmib_node *,
+	struct rmib_oldp *, struct rmib_newp *);
+
+/* The CTL_NET {PF_INET,PF_INET6} IPPROTO_UDP subtree. */
+/* TODO: add many more and make some of them writable.. */
+static struct rmib_node net_inet_udp_table[] = {
+/* 1*/	[UDPCTL_CHECKSUM]	= RMIB_INT(RMIB_RO, 1, "checksum",
+				    "Compute UDP checksums"),
+/* 2*/	[UDPCTL_SENDSPACE]	= RMIB_INT(RMIB_RO, UDP_SNDBUF_DEF,
+				    "sendspace",
+				    "Default UDP send buffer size"),
+/* 3*/	[UDPCTL_RECVSPACE]	= RMIB_INT(RMIB_RO, UDP_RCVBUF_DEF,
+				    "recvspace",
+				    "Default UDP receive buffer size"),
+/* 4*/	[UDPCTL_LOOPBACKCKSUM]	= RMIB_FUNC(RMIB_RW | CTLTYPE_INT, sizeof(int),
+				    loopif_cksum, "do_loopback_cksum",
+				    "Perform UDP checksum on loopback"),
+/*+0*/	[UDPCTL_MAXID]		= RMIB_FUNC(RMIB_RO | CTLTYPE_NODE, 0,
+				    udpsock_pcblist, "pcblist",
+				    "UDP protocol control block list"),
+};
+
+static struct rmib_node net_inet_udp_node =
+    RMIB_NODE(RMIB_RO, net_inet_udp_table, "udp", "UDPv4 related settings");
+static struct rmib_node net_inet6_udp6_node =
+    RMIB_NODE(RMIB_RO, net_inet_udp_table, "udp6", "UDPv6 related settings");
+
+/*
+ * Initialize the UDP sockets module.
+ */
+void
+udpsock_init(void)
+{
+	unsigned int slot;
+
+	/* Initialize the list of free UDP sockets. */
+	SIMPLEQ_INIT(&udp_freelist);
+
+	for (slot = 0; slot < __arraycount(udp_array); slot++)
+		SIMPLEQ_INSERT_TAIL(&udp_freelist, &udp_array[slot], udp_next);
+
+	/* Register the net.inet.udp and net.inet6.udp6 RMIB subtrees. */
+	mibtree_register_inet(PF_INET, IPPROTO_UDP, &net_inet_udp_node);
+	mibtree_register_inet(PF_INET6, IPPROTO_UDP, &net_inet6_udp6_node);
+}
+
+/*
+ * A packet has arrived on a UDP socket.  We own the given packet buffer, and
+ * so we must free it if we do not want to keep it.
+ */
+static void
+udpsock_input(void * arg, struct udp_pcb * pcb __unused, struct pbuf * pbuf,
+	const ip_addr_t * ipaddr, uint16_t port)
+{
+	struct udpsock *udp = (struct udpsock *)arg;
+
+	/* All UDP input processing is handled by pktsock. */
+	pktsock_input(&udp->udp_pktsock, pbuf, ipaddr, port);
+}
+
+/*
+ * Create a UDP socket.
+ */
+sockid_t
+udpsock_socket(int domain, int protocol, struct sock ** sockp,
+	const struct sockevent_ops ** ops)
+{
+	struct udpsock *udp;
+	unsigned int flags;
+	uint8_t ip_type;
+
+	switch (protocol) {
+	case 0:
+	case IPPROTO_UDP:
+		break;
+
+	/* NetBSD does not support IPPROTO_UDPLITE, even though lwIP does. */
+	default:
+		return EPROTONOSUPPORT;
+	}
+
+	if (SIMPLEQ_EMPTY(&udp_freelist))
+		return ENOBUFS;
+
+	udp = SIMPLEQ_FIRST(&udp_freelist);
+
+	ip_type = pktsock_socket(&udp->udp_pktsock, domain, UDP_SNDBUF_DEF,
+	    UDP_RCVBUF_DEF, sockp);
+
+	/* We should have enough PCBs so this call should not fail.. */
+	if ((udp->udp_pcb = udp_new_ip_type(ip_type)) == NULL)
+		return ENOBUFS;
+	udp_recv(udp->udp_pcb, udpsock_input, (void *)udp);
+
+	/* By default, the multicast TTL is 1 and looping is enabled. */
+	udp_set_multicast_ttl(udp->udp_pcb, 1);
+
+	flags = udp_flags(udp->udp_pcb);
+	udp_setflags(udp->udp_pcb, flags | UDP_FLAGS_MULTICAST_LOOP);
+
+	SIMPLEQ_REMOVE_HEAD(&udp_freelist, udp_next);
+
+	*ops = &udpsock_ops;
+	return SOCKID_UDP | (sockid_t)(udp - udp_array);
+}
+
+/*
+ * Bind a UDP socket to a local address.
+ */
+static int
+udpsock_bind(struct sock * sock, const struct sockaddr * addr,
+	socklen_t addr_len, endpoint_t user_endpt)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+	ip_addr_t ipaddr;
+	uint16_t port;
+	err_t err;
+	int r;
+
+	if ((r = ipsock_get_src_addr(udpsock_get_ipsock(udp), addr, addr_len,
+	    user_endpt, &udp->udp_pcb->local_ip, udp->udp_pcb->local_port,
+	    TRUE /*allow_mcast*/, &ipaddr, &port)) != OK)
+		return r;
+
+	err = udp_bind(udp->udp_pcb, &ipaddr, port);
+
+	return util_convert_err(err);
+}
+
+/*
+ * Connect a UDP socket to a remote address.
+ */
+static int
+udpsock_connect(struct sock * sock, const struct sockaddr * addr,
+	socklen_t addr_len, endpoint_t user_endpt __unused)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+	struct ifdev *ifdev;
+	const ip_addr_t *src_addr;
+	ip_addr_t dst_addr;
+	uint16_t dst_port;
+	uint32_t ifindex, ifindex2;
+	err_t err;
+	int r;
+
+	/*
+	 * One may "unconnect" socket by providing an address with family
+	 * AF_UNSPEC.  Providing an <any>:0 address does not achieve the same.
+	 */
+	if (addr_is_unspec(addr, addr_len)) {
+		udp_disconnect(udp->udp_pcb);
+
+		return OK;
+	}
+
+	if ((r = ipsock_get_dst_addr(udpsock_get_ipsock(udp), addr,
+	    addr_len, &udp->udp_pcb->local_ip, &dst_addr, &dst_port)) != OK)
+		return r;
+
+	/*
+	 * Bind explicitly to a source address if the PCB is not bound to one
+	 * yet.  This is expected in the BSD socket API, but lwIP does not do
+	 * it for us.
+	 */
+	if (ip_addr_isany(&udp->udp_pcb->local_ip)) {
+		/* Help the multicast case a bit, if possible. */
+		ifdev = NULL;
+
+		if (ip_addr_ismulticast(&dst_addr)) {
+			ifindex = pktsock_get_ifindex(&udp->udp_pktsock);
+			ifindex2 = udp_get_multicast_netif_index(udp->udp_pcb);
+			if (ifindex == 0)
+				ifindex = ifindex2;
+
+			if (ifindex != 0) {
+				ifdev = ifdev_get_by_index(ifindex);
+
+				if (ifdev == NULL)
+					return ENXIO;
+			}
+		}
+
+		src_addr = ifaddr_select(&dst_addr, ifdev, NULL /*ifdevp*/);
+
+		if (src_addr == NULL)
+			return EHOSTUNREACH;
+
+		err = udp_bind(udp->udp_pcb, src_addr,
+		    udp->udp_pcb->local_port);
+
+		if (err != ERR_OK)
+			return util_convert_err(err);
+	}
+
+	/*
+	 * Connecting a UDP socket serves two main purposes: 1) the socket uses
+	 * the address as destination when sending, and 2) the socket receives
+	 * packets from only the connected address.
+	 */
+	err = udp_connect(udp->udp_pcb, &dst_addr, dst_port);
+
+	if (err != ERR_OK)
+		return util_convert_err(err);
+
+	return OK;
+}
+
+/*
+ * Perform preliminary checks on a send request.
+ */
+static int
+udpsock_pre_send(struct sock * sock, size_t len, socklen_t ctl_len __unused,
+	const struct sockaddr * addr, socklen_t addr_len __unused,
+	endpoint_t user_endpt __unused, int flags)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+
+	if ((flags & ~MSG_DONTROUTE) != 0)
+		return EOPNOTSUPP;
+
+	if (!udpsock_is_conn(udp) && addr == NULL)
+		return EDESTADDRREQ;
+
+	/*
+	 * This is only one part of the length check.  The rest is done from
+	 * udpsock_send(), once we have more information.
+	 */
+	if (len > ipsock_get_sndbuf(udpsock_get_ipsock(udp)))
+		return EMSGSIZE;
+
+	return OK;
+}
+
+/*
+ * Swap IP-level options between the UDP PCB and the packet options structure,
+ * for all options that have their flag set in the packet options structure.
+ * This function is called twice when sending a packet.  The result is that the
+ * flagged options are overridden for only the packet being sent.
+ */
+static void
+udpsock_swap_opt(struct udpsock * udp, struct pktopt * pkto)
+{
+	uint8_t tos, ttl, mcast_ttl;
+
+	if (pkto->pkto_flags & PKTOF_TOS) {
+		tos = udp->udp_pcb->tos;
+		udp->udp_pcb->tos = pkto->pkto_tos;
+		pkto->pkto_tos = tos;
+	}
+
+	if (pkto->pkto_flags & PKTOF_TTL) {
+		ttl = udp->udp_pcb->ttl;
+		mcast_ttl = udp_get_multicast_ttl(udp->udp_pcb);
+		udp->udp_pcb->ttl = pkto->pkto_ttl;
+		udp_set_multicast_ttl(udp->udp_pcb, pkto->pkto_mcast_ttl);
+		pkto->pkto_ttl = ttl;
+		pkto->pkto_mcast_ttl = mcast_ttl;
+	}
+}
+
+/*
+ * Send a packet on a UDP socket.
+ */
+static int
+udpsock_send(struct sock * sock, const struct sockdriver_data * data,
+	size_t len, size_t * off, const struct sockdriver_data * ctl,
+	socklen_t ctl_len, socklen_t * ctl_off __unused,
+	const struct sockaddr * addr, socklen_t addr_len,
+	endpoint_t user_endpt __unused, int flags, size_t min __unused)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+	struct pktopt pktopt;
+	struct pbuf *pbuf;
+	struct ifdev *ifdev;
+	struct netif *netif;
+	const ip_addr_t *src_addrp, *dst_addrp;
+	ip_addr_t src_addr, dst_addr; /* for storage only; not always used! */
+	uint16_t dst_port;
+	uint32_t ifindex;
+	size_t hdrlen;
+	err_t err;
+	int r;
+
+	/* Copy in and parse any packet options. */
+	pktopt.pkto_flags = 0;
+
+	if ((r = pktsock_get_ctl(&udp->udp_pktsock, ctl, ctl_len,
+	    &pktopt)) != OK)
+		return r;
+
+	/*
+	 * The code below will both determine an outgoing interface and a
+	 * source address for the packet.  Even though lwIP could do this for
+	 * us in some cases, there are other cases where we must do so
+	 * ourselves, with as main reasons 1) the possibility that either or
+	 * both have been provided through IPV6_PKTINFO, and 2) our intent to
+	 * detect and stop zone violations for (combinations of) scoped IPv6
+	 * addresses.  As a result, it is easier to simply take over the
+	 * selection tasks lwIP in their entirety.
+	 *
+	 * Much of the same applies to rawsock_send() as well.  Functional
+	 * differences (e.g. IP_HDRINCL support) as well as the PCB accesses in
+	 * the code make it hard to merge the two into a single pktsock copy.
+	 * Please do keep the two in sync as much as possible.
+	 */
+
+	/*
+	 * Start by checking whether the source address and/or the outgoing
+	 * interface are overridden using sticky and/or ancillary options.  The
+	 * call to pktsock_get_pktinfo(), if successful, will either set
+	 * 'ifdev' to NULL, in which case there is no override, or it will set
+	 * 'ifdev' to the outgoing interface to use, and (only) in that case
+	 * also fill 'src_addr', with an address that may either be a locally
+	 * owned unicast address or the unspecified ('any') address.  If it is
+	 * a unicast address, that is the source address to use for the packet.
+	 * Otherwise, fall back to the address to which the socket is bound,
+	 * which may also be the unspecified address or even a multicast
+	 * address.  In those case we will pick a source address further below.
+	 */
+	if ((r = pktsock_get_pktinfo(&udp->udp_pktsock, &pktopt, &ifdev,
+	    &src_addr)) != OK)
+		return r;
+
+	if (ifdev != NULL && !ip_addr_isany(&src_addr)) {
+		/* This is guaranteed to be a proper local unicast address. */
+		src_addrp = &src_addr;
+	} else {
+		src_addrp = &udp->udp_pcb->local_ip;
+
+		/*
+		 * If the socket is bound to a multicast address, use the
+		 * unspecified ('any') address as source address instead, until
+		 * we select a real source address (further below).  This
+		 * substitution keeps the rest of the code a bit simpler.
+		 */
+		if (ip_addr_ismulticast(src_addrp))
+			src_addrp = IP46_ADDR_ANY(IP_GET_TYPE(src_addrp));
+	}
+
+	/*
+	 * Determine the destination address to use.  If the socket is
+	 * connected, always ignore any address provided in the send call.
+	 */
+	if (!udpsock_is_conn(udp)) {
+		assert(addr != NULL); /* already checked in pre_send */
+
+		if ((r = ipsock_get_dst_addr(udpsock_get_ipsock(udp), addr,
+		    addr_len, src_addrp, &dst_addr, &dst_port)) != OK)
+			return r;
+
+		dst_addrp = &dst_addr;
+	} else {
+		dst_addrp = &udp->udp_pcb->remote_ip;
+		dst_port = udp->udp_pcb->remote_port;
+	}
+
+	/*
+	 * If the destination is a multicast address, select the outgoing
+	 * interface based on the multicast interface index, if one is set.
+	 * This must be done here in order to allow the code further below to
+	 * detect zone violations, because if we leave this selection to lwIP,
+	 * it will not perform zone violation detection at all.  Also note that
+	 * this case must *not* override an interface index already specified
+	 * using IPV6_PKTINFO, as per RFC 3542 Sec. 6.7.
+	 */
+	if (ifdev == NULL && ip_addr_ismulticast(dst_addrp)) {
+		ifindex = udp_get_multicast_netif_index(udp->udp_pcb);
+
+		if (ifindex != NETIF_NO_INDEX)
+			ifdev = ifdev_get_by_index(ifindex); /* (may fail) */
+	}
+
+	/*
+	 * If an interface has been determined already now, the send operation
+	 * will bypass routing.  In that case, we must perform our own checks
+	 * on address zone violations, because those will not be made anywhere
+	 * else.  Subsequent steps below will never introduce violations.
+	 */
+	if (ifdev != NULL && IP_IS_V6(dst_addrp)) {
+		if (ifaddr_is_zone_mismatch(ip_2_ip6(dst_addrp), ifdev))
+			return EHOSTUNREACH;
+
+		if (IP_IS_V6(src_addrp) &&
+		    ifaddr_is_zone_mismatch(ip_2_ip6(src_addrp), ifdev))
+			return EHOSTUNREACH;
+	}
+
+	/*
+	 * If we do not yet have an interface at this point, perform a route
+	 * lookup to determine the outgoing interface.  Unless MSG_DONTROUTE is
+	 * set (which covers SO_DONTROUTE as well), in which case we look for a
+	 * local subnet that matches the destination address.
+	 */
+	if (ifdev == NULL) {
+		if (!(flags & MSG_DONTROUTE)) {
+			/*
+			 * ip_route() should never be called with an
+			 * IPADDR_TYPE_ANY type address.  This is a lwIP-
+			 * internal requirement; while we override both routing
+			 * functions, we do not deviate from it.
+			 */
+			if (IP_IS_ANY_TYPE_VAL(*src_addrp))
+				src_addrp =
+				    IP46_ADDR_ANY(IP_GET_TYPE(dst_addrp));
+
+			/* Perform the route lookup. */
+			if ((netif = ip_route(src_addrp, dst_addrp)) == NULL)
+				return EHOSTUNREACH;
+
+			ifdev = netif_get_ifdev(netif);
+		} else {
+			if ((ifdev = ifaddr_map_by_subnet(dst_addrp)) == NULL)
+				return EHOSTUNREACH;
+		}
+	}
+
+	/*
+	 * At this point we have an outgoing interface.  If we do not have a
+	 * source address yet, pick one now.
+	 */
+	assert(ifdev != NULL);
+
+	if (ip_addr_isany(src_addrp)) {
+		src_addrp = ifaddr_select(dst_addrp, ifdev, NULL /*ifdevp*/);
+
+		if (src_addrp == NULL)
+			return EHOSTUNREACH;
+	}
+
+	/*
+	 * Now that we know the full conditions of what we are about to send,
+	 * check whether the packet size leaves enough room for lwIP to prepend
+	 * headers.  If so, allocate a chain of pbufs for the packet.
+	 */
+	assert(len <= UDP_MAX_PAYLOAD);
+
+	if (IP_IS_V6(dst_addrp))
+		hdrlen = IP6_HLEN + UDP_HLEN;
+	else
+		hdrlen = IP_HLEN + UDP_HLEN;
+
+	if (hdrlen + len > UDP_MAX_PAYLOAD)
+		return EMSGSIZE;
+
+	if ((pbuf = pchain_alloc(PBUF_TRANSPORT, len)) == NULL)
+		return ENOBUFS;
+
+	/* Copy in the packet data. */
+	if ((r = pktsock_get_data(&udp->udp_pktsock, data, len, pbuf)) != OK) {
+		pbuf_free(pbuf);
+
+		return r;
+	}
+
+	/*
+	 * Set broadcast/multicast flags for accounting purposes.  Only the
+	 * multicast flag is used for output accounting, but for loopback
+	 * traffic, both flags are copied and used for input accounting and
+	 * setting MSG_MCAST/MSG_BCAST.
+	 */
+	if (ip_addr_ismulticast(dst_addrp))
+		pbuf->flags |= PBUF_FLAG_LLMCAST;
+	else if (ip_addr_isbroadcast(dst_addrp, ifdev_get_netif(ifdev)))
+		pbuf->flags |= PBUF_FLAG_LLBCAST;
+
+	/* Send the packet. */
+	udpsock_swap_opt(udp, &pktopt);
+
+	assert(!ip_addr_isany(src_addrp));
+	assert(!ip_addr_ismulticast(src_addrp));
+
+	err = udp_sendto_if_src(udp->udp_pcb, pbuf, dst_addrp, dst_port,
+	    ifdev_get_netif(ifdev), src_addrp);
+
+	udpsock_swap_opt(udp, &pktopt);
+
+	/* Free the pbuf, as a copy has been made. */
+	pbuf_free(pbuf);
+
+	/*
+	 * On success, make sure to return the size of the sent packet as well.
+	 * As an aside: ctl_off need not be updated, as it is not returned.
+	 */
+	if ((r = util_convert_err(err)) == OK)
+		*off = len;
+	return r;
+}
+
+/*
+ * Update the set of flag-type socket options on a UDP socket.
+ */
+static void
+udpsock_setsockmask(struct sock * sock, unsigned int mask)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+
+	if (mask & SO_REUSEADDR)
+		ip_set_option(udp->udp_pcb, SOF_REUSEADDR);
+	else
+		ip_reset_option(udp->udp_pcb, SOF_REUSEADDR);
+
+	if (mask & SO_BROADCAST)
+		ip_set_option(udp->udp_pcb, SOF_BROADCAST);
+	else
+		ip_reset_option(udp->udp_pcb, SOF_BROADCAST);
+}
+
+/*
+ * Prepare a helper structure for IP-level option processing.
+ */
+static void
+udpsock_get_ipopts(struct udpsock * udp, struct ipopts * ipopts)
+{
+
+	ipopts->local_ip = &udp->udp_pcb->local_ip;
+	ipopts->remote_ip = &udp->udp_pcb->remote_ip;
+	ipopts->tos = &udp->udp_pcb->tos;
+	ipopts->ttl = &udp->udp_pcb->ttl;
+	ipopts->sndmin = UDP_SNDBUF_MIN;
+	ipopts->sndmax = UDP_SNDBUF_MAX;
+	ipopts->rcvmin = UDP_RCVBUF_MIN;
+	ipopts->rcvmax = UDP_RCVBUF_MAX;
+}
+
+/*
+ * Set socket options on a UDP socket.
+ */
+static int
+udpsock_setsockopt(struct sock * sock, int level, int name,
+	const struct sockdriver_data * data, socklen_t len)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+	struct ipopts ipopts;
+	ip_addr_t ipaddr;
+	struct in_addr in_addr;
+	struct ifdev *ifdev;
+	unsigned int flags;
+	uint32_t ifindex;
+	uint8_t byte;
+	int r, val;
+
+	/*
+	 * Unfortunately, we have to duplicate most of the multicast options
+	 * rather than sharing them with rawsock at the pktsock level.  The
+	 * reason is that each of the PCBs have their own multicast abstraction
+	 * functions and so we cannot merge the rest.  Same for getsockopt.
+	 */
+
+	switch (level) {
+	case IPPROTO_IP:
+		if (udpsock_is_ipv6(udp))
+			break;
+
+		switch (name) {
+		case IP_MULTICAST_IF:
+			pktsock_set_mcaware(&udp->udp_pktsock);
+
+			if ((r = sockdriver_copyin_opt(data, &in_addr,
+			    sizeof(in_addr), len)) != OK)
+				return r;
+
+			ip_addr_set_ip4_u32(&ipaddr, in_addr.s_addr);
+
+			if ((ifdev = ifaddr_map_by_addr(&ipaddr)) == NULL)
+				return EADDRNOTAVAIL;
+
+			udp_set_multicast_netif_index(udp->udp_pcb,
+			    ifdev_get_index(ifdev));
+
+			return OK;
+
+		case IP_MULTICAST_LOOP:
+			pktsock_set_mcaware(&udp->udp_pktsock);
+
+			if ((r = sockdriver_copyin_opt(data, &byte,
+			    sizeof(byte), len)) != OK)
+				return r;
+
+			flags = udp_flags(udp->udp_pcb);
+
+			if (byte)
+				flags |= UDP_FLAGS_MULTICAST_LOOP;
+			else
+				flags &= ~UDP_FLAGS_MULTICAST_LOOP;
+
+			udp_setflags(udp->udp_pcb, flags);
+
+			return OK;
+
+		case IP_MULTICAST_TTL:
+			pktsock_set_mcaware(&udp->udp_pktsock);
+
+			if ((r = sockdriver_copyin_opt(data, &byte,
+			    sizeof(byte), len)) != OK)
+				return r;
+
+			udp_set_multicast_ttl(udp->udp_pcb, byte);
+
+			return OK;
+		}
+
+		break;
+
+	case IPPROTO_IPV6:
+		if (!udpsock_is_ipv6(udp))
+			break;
+
+		switch (name) {
+		case IPV6_MULTICAST_IF:
+			pktsock_set_mcaware(&udp->udp_pktsock);
+
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			if (val != 0) {
+				ifindex = (uint32_t)val;
+
+				ifdev = ifdev_get_by_index(ifindex);
+
+				if (ifdev == NULL)
+					return ENXIO;
+			} else
+				ifindex = NETIF_NO_INDEX;
+
+			udp_set_multicast_netif_index(udp->udp_pcb, ifindex);
+
+			return OK;
+
+		case IPV6_MULTICAST_LOOP:
+			pktsock_set_mcaware(&udp->udp_pktsock);
+
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			if (val < 0 || val > 1)
+				return EINVAL;
+
+			flags = udp_flags(udp->udp_pcb);
+
+			if (val)
+				flags |= UDP_FLAGS_MULTICAST_LOOP;
+			else
+				flags &= ~UDP_FLAGS_MULTICAST_LOOP;
+
+			/*
+			 * lwIP's IPv6 functionality does not actually check
+			 * this flag at all yet.  We set it in the hope that
+			 * one day this will magically start working.
+			 */
+			udp_setflags(udp->udp_pcb, flags);
+
+			return OK;
+
+		case IPV6_MULTICAST_HOPS:
+			pktsock_set_mcaware(&udp->udp_pktsock);
+
+			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
+			    len)) != OK)
+				return r;
+
+			if (val < -1 || val > UINT8_MAX)
+				return EINVAL;
+
+			if (val == -1)
+				val = 1;
+
+			udp_set_multicast_ttl(udp->udp_pcb, val);
+
+			return OK;
+		}
+
+		break;
+	}
+
+	/* Handle all other options at the packet or IP level. */
+	udpsock_get_ipopts(udp, &ipopts);
+
+	return pktsock_setsockopt(&udp->udp_pktsock, level, name, data, len,
+	    &ipopts);
+}
+
+/*
+ * Retrieve socket options on a UDP socket.
+ */
+static int
+udpsock_getsockopt(struct sock * sock, int level, int name,
+	const struct sockdriver_data * data, socklen_t * len)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+	struct ipopts ipopts;
+	const ip4_addr_t *ip4addr;
+	struct in_addr in_addr;
+	struct ifdev *ifdev;
+	unsigned int flags;
+	uint32_t ifindex;
+	uint8_t byte;
+	int val;
+
+	switch (level) {
+	case IPPROTO_IP:
+		if (udpsock_is_ipv6(udp))
+			break;
+
+		switch (name) {
+		case IP_MULTICAST_IF:
+			ifindex = udp_get_multicast_netif_index(udp->udp_pcb);
+
+			/*
+			 * Map back from the interface index to the IPv4
+			 * address assigned to the corresponding interface.
+			 * Should this not work out, return the 'any' address.
+			 */
+			if (ifindex != NETIF_NO_INDEX &&
+			   (ifdev = ifdev_get_by_index(ifindex)) != NULL) {
+				ip4addr =
+				    netif_ip4_addr(ifdev_get_netif(ifdev));
+
+				in_addr.s_addr = ip4_addr_get_u32(ip4addr);
+			} else
+				in_addr.s_addr = PP_HTONL(INADDR_ANY);
+
+			return sockdriver_copyout_opt(data, &in_addr,
+			    sizeof(in_addr), len);
+
+		case IP_MULTICAST_LOOP:
+			flags = udp_flags(udp->udp_pcb);
+
+			byte = !!(flags & UDP_FLAGS_MULTICAST_LOOP);
+
+			return sockdriver_copyout_opt(data, &byte,
+			    sizeof(byte), len);
+
+		case IP_MULTICAST_TTL:
+			byte = udp_get_multicast_ttl(udp->udp_pcb);
+
+			return sockdriver_copyout_opt(data, &byte,
+			    sizeof(byte), len);
+		}
+
+		break;
+
+	case IPPROTO_IPV6:
+		if (!udpsock_is_ipv6(udp))
+			break;
+
+		switch (name) {
+		case IPV6_MULTICAST_IF:
+			ifindex = udp_get_multicast_netif_index(udp->udp_pcb);
+
+			val = (int)ifindex;
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+
+		case IPV6_MULTICAST_LOOP:
+			flags = udp_flags(udp->udp_pcb);
+
+			val = !!(flags & UDP_FLAGS_MULTICAST_LOOP);
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+
+		case IPV6_MULTICAST_HOPS:
+			val = udp_get_multicast_ttl(udp->udp_pcb);
+
+			return sockdriver_copyout_opt(data, &val, sizeof(val),
+			    len);
+		}
+
+		break;
+	}
+
+	/* Handle all other options at the packet or IP level. */
+	udpsock_get_ipopts(udp, &ipopts);
+
+	return pktsock_getsockopt(&udp->udp_pktsock, level, name, data, len,
+	    &ipopts);
+}
+
+/*
+ * Retrieve the local socket address of a UDP socket.
+ */
+static int
+udpsock_getsockname(struct sock * sock, struct sockaddr * addr,
+	socklen_t * addr_len)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+
+	ipsock_put_addr(udpsock_get_ipsock(udp), addr, addr_len,
+	    &udp->udp_pcb->local_ip, udp->udp_pcb->local_port);
+
+	return OK;
+}
+
+/*
+ * Retrieve the remote socket address of a UDP socket.
+ */
+static int
+udpsock_getpeername(struct sock * sock, struct sockaddr * addr,
+	socklen_t * addr_len)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+
+	if (!udpsock_is_conn(udp))
+		return ENOTCONN;
+
+	ipsock_put_addr(udpsock_get_ipsock(udp), addr, addr_len,
+	    &udp->udp_pcb->remote_ip, udp->udp_pcb->remote_port);
+
+	return OK;
+}
+
+/*
+ * Shut down a UDP socket for reading and/or writing.
+ */
+static int
+udpsock_shutdown(struct sock * sock, unsigned int mask)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+
+	if (mask & SFL_SHUT_RD)
+		udp_recv(udp->udp_pcb, NULL, NULL);
+
+	pktsock_shutdown(&udp->udp_pktsock, mask);
+
+	return OK;
+}
+
+/*
+ * Close a UDP socket.
+ */
+static int
+udpsock_close(struct sock * sock, int force __unused)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+
+	udp_recv(udp->udp_pcb, NULL, NULL);
+
+	udp_remove(udp->udp_pcb);
+	udp->udp_pcb = NULL;
+
+	pktsock_close(&udp->udp_pktsock);
+
+	return OK;
+}
+
+/*
+ * Free up a closed UDP socket.
+ */
+static void
+udpsock_free(struct sock * sock)
+{
+	struct udpsock *udp = (struct udpsock *)sock;
+
+	assert(udp->udp_pcb == NULL);
+
+	SIMPLEQ_INSERT_HEAD(&udp_freelist, udp, udp_next);
+}
+
+/*
+ * Fill the given kinfo_pcb sysctl(7) structure with information about the UDP
+ * PCB identified by the given pointer.
+ */
+static void
+udpsock_get_info(struct kinfo_pcb * ki, const void * ptr)
+{
+	const struct udp_pcb *pcb = (const struct udp_pcb *)ptr;
+	struct udpsock *udp;
+
+	ki->ki_type = SOCK_DGRAM;
+
+	/*
+	 * All UDP sockets should be created by this module, but protect
+	 * ourselves from the case that that is not true anyway.
+	 */
+	if (pcb->recv_arg != NULL) {
+		udp = (struct udpsock *)pcb->recv_arg;
+
+		assert(udp >= udp_array &&
+		    udp < &udp_array[__arraycount(udp_array)]);
+	} else
+		udp = NULL;
+
+	ipsock_get_info(ki, &pcb->local_ip, pcb->local_port, &pcb->remote_ip,
+	    pcb->remote_port);
+
+	if (udp != NULL) {
+		/* TODO: change this so that sockstat(1) may work one day. */
+		ki->ki_sockaddr = (uint64_t)(uintptr_t)udpsock_get_sock(udp);
+
+		ki->ki_rcvq = pktsock_get_recvlen(&udp->udp_pktsock);
+	}
+}
+
+/*
+ * Given either NULL or a previously returned UDP PCB pointer, return the first
+ * or next UDP PCB pointer, or NULL if there are no more.  Skip UDP PCBs that
+ * are not bound to an address, as there is no use reporting them.
+ */
+static const void *
+udpsock_enum(const void * last)
+{
+	const struct udp_pcb *pcb;
+
+	if (last != NULL)
+		pcb = (const void *)((const struct udp_pcb *)last)->next;
+	else
+		pcb = (const void *)udp_pcbs;
+
+	while (pcb != NULL && pcb->local_port == 0)
+		pcb = pcb->next;
+
+	return pcb;
+}
+
+/*
+ * Obtain the list of UDP protocol control blocks, for sysctl(7).
+ */
+static ssize_t
+udpsock_pcblist(struct rmib_call * call, struct rmib_node * node __unused,
+	struct rmib_oldp * oldp, struct rmib_newp * newp __unused)
+{
+
+	return util_pcblist(call, oldp, udpsock_enum, udpsock_get_info);
+}
+
+static const struct sockevent_ops udpsock_ops = {
+	.sop_bind		= udpsock_bind,
+	.sop_connect		= udpsock_connect,
+	.sop_pre_send		= udpsock_pre_send,
+	.sop_send		= udpsock_send,
+	.sop_pre_recv		= pktsock_pre_recv,
+	.sop_recv		= pktsock_recv,
+	.sop_test_recv		= pktsock_test_recv,
+	.sop_ioctl		= ifconf_ioctl,
+	.sop_setsockmask	= udpsock_setsockmask,
+	.sop_setsockopt		= udpsock_setsockopt,
+	.sop_getsockopt		= udpsock_getsockopt,
+	.sop_getsockname	= udpsock_getsockname,
+	.sop_getpeername	= udpsock_getpeername,
+	.sop_shutdown		= udpsock_shutdown,
+	.sop_close		= udpsock_close,
+	.sop_free		= udpsock_free
+};
--- a/minix/net/lwip/util.c
+++ b/minix/net/lwip/util.c
@ -0,0 +1,251 @@
+/* LWIP service - util.c - shared utility functions */
+
+#include "lwip.h"
+
+#define US	1000000			/* number of microseconds per second */
+
+/*
+ * Convert the given timeval structure to a number of clock ticks, checking
+ * whether the given structure is valid and whether the resulting number of
+ * ticks can be expressed as a (relative) clock ticks value.  Upon success,
+ * return OK, with the number of clock ticks stored in 'ticksp'.  Upon failure,
+ * return a negative error code that may be returned to userland directly.  In
+ * that case, the contents of 'ticksp' are left unchanged.
+ *
+ * TODO: move this function into libsys and remove other redundant copies.
+ */
+int
+util_timeval_to_ticks(const struct timeval * tv, clock_t * ticksp)
+{
+	clock_t ticks;
+
+	if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= US)
+		return EINVAL;
+
+	if (tv->tv_sec >= TMRDIFF_MAX / sys_hz())
+		return EDOM;
+
+	ticks = tv->tv_sec * sys_hz() + (tv->tv_usec * sys_hz() + US - 1) / US;
+	assert(ticks <= TMRDIFF_MAX);
+
+	*ticksp = ticks;
+	return OK;
+}
+
+/*
+ * Convert the given number of clock ticks to a timeval structure.  This
+ * function never fails.
+ */
+void
+util_ticks_to_timeval(clock_t ticks, struct timeval * tv)
+{
+
+	memset(tv, 0, sizeof(*tv));
+	tv->tv_sec = ticks / sys_hz();
+	tv->tv_usec = (ticks % sys_hz()) * US / sys_hz();
+}
+
+/*
+ * Copy data between a user process and a chain of buffers.  If the 'copy_in'
+ * flag is set, the data will be copied in from the user process to the given
+ * chain of buffers; otherwise, the data will be copied out from the given
+ * buffer chain to the user process.  The 'data' parameter is a sockdriver-
+ * supplied structure identifying the remote source or destination of the data.
+ * The 'len' parameter contains the number of bytes to copy, and 'off' contains
+ * the offset into the remote source or destination.  'pbuf' is a pointer to
+ * the buffer chain, and 'skip' is the number of bytes to skip in the first
+ * buffer on the chain.  Return OK on success, or a negative error code if the
+ * copy operation failed.  This function is packet queue friendly.
+ */
+int
+util_copy_data(const struct sockdriver_data * data, size_t len, size_t off,
+	const struct pbuf * pbuf, size_t skip, int copy_in)
+{
+	iovec_t iov[SOCKDRIVER_IOV_MAX];
+	unsigned int i;
+	size_t sub, chunk;
+	int r;
+
+	while (len > 0) {
+		sub = 0;
+
+		for (i = 0; len > 0 && i < __arraycount(iov); i++) {
+			assert(pbuf != NULL);
+
+			chunk = (size_t)pbuf->len - skip;
+			if (chunk > len)
+				chunk = len;
+
+			iov[i].iov_addr = (vir_bytes)pbuf->payload + skip;
+			iov[i].iov_size = chunk;
+
+			sub += chunk;
+			len -= chunk;
+
+			pbuf = pbuf->next;
+			skip = 0;
+		}
+
+		if (copy_in)
+			r = sockdriver_vcopyin(data, off, iov, i);
+		else
+			r = sockdriver_vcopyout(data, off, iov, i);
+		if (r != OK)
+			return r;
+
+		off += sub;
+	}
+
+	return OK;
+}
+
+/*
+ * Copy from a vector of (local) buffers to a single (local) buffer.  Return
+ * the total number of copied bytes on success, or E2BIG if not all of the
+ * results could be stored in the given bfufer.
+ */
+ssize_t
+util_coalesce(char * ptr, size_t max, const iovec_t * iov, unsigned int iovcnt)
+{
+	size_t off, size;
+
+	for (off = 0; iovcnt > 0; iov++, iovcnt--) {
+		if ((size = iov->iov_size) > max)
+			return E2BIG;
+
+		memcpy(&ptr[off], (void *)iov->iov_addr, size);
+
+		off += size;
+		max -= size;
+	}
+
+	return off;
+}
+
+/*
+ * Return TRUE if the given endpoint has superuser privileges, FALSE otherwise.
+ */
+int
+util_is_root(endpoint_t endpt)
+{
+
+	return (getnuid(endpt) == ROOT_EUID);
+}
+
+/*
+ * Convert a lwIP-provided error code (of type err_t) to a negative MINIX 3
+ * error code.
+ */
+int
+util_convert_err(err_t err)
+{
+
+	switch (err) {
+	case ERR_OK:		return OK;
+	case ERR_MEM:		return ENOMEM;
+	case ERR_BUF:		return ENOBUFS;
+	case ERR_TIMEOUT:	return ETIMEDOUT;
+	case ERR_RTE:		return EHOSTUNREACH;
+	case ERR_VAL:		return EINVAL;
+	case ERR_USE:		return EADDRINUSE;
+	case ERR_ALREADY:	return EALREADY;
+	case ERR_ISCONN:	return EISCONN;
+	case ERR_CONN:		return ENOTCONN;
+	case ERR_IF:		return ENETDOWN;
+	case ERR_ABRT:		return ECONNABORTED;
+	case ERR_RST:		return ECONNRESET;
+	case ERR_INPROGRESS:	return EINPROGRESS; /* should not be thrown */
+	case ERR_WOULDBLOCK:	return EWOULDBLOCK; /* should not be thrown */
+	case ERR_ARG:		return EINVAL;
+	case ERR_CLSD:		/* should be caught as separate case */
+	default:		/* should have a case here */
+		printf("LWIP: unexpected error from lwIP: %d", err);
+		return EGENERIC;
+	}
+}
+
+/*
+ * Obtain the list of protocol control blocks for a particular domain and
+ * protocol.  The call may be used for requesting either IPv4 or IPv6 PCBs,
+ * based on the path used to get here.  It is used for TCP, UDP, and RAW PCBs.
+ */
+ssize_t
+util_pcblist(struct rmib_call * call, struct rmib_oldp * oldp,
+	const void *(*enum_proc)(const void *),
+	void (*get_info_proc)(struct kinfo_pcb *, const void *))
+{
+	const void *pcb;
+	ip_addr_t local_ip;
+	struct kinfo_pcb ki;
+	ssize_t off;
+	int r, size, max, domain, protocol;
+
+	if (call->call_namelen != 4)
+		return EINVAL;
+
+	/* The first two added name fields are not used. */
+
+	size = call->call_name[2];
+	if (size < 0 || (size_t)size > sizeof(ki))
+		return EINVAL;
+	if (size == 0)
+		size = sizeof(ki);
+	max = call->call_name[3];
+
+	domain = call->call_oname[1];
+	protocol = call->call_oname[2];
+
+	off = 0;
+
+	for (pcb = enum_proc(NULL); pcb != NULL; pcb = enum_proc(pcb)) {
+		/* Filter on IPv4/IPv6. */
+		memcpy(&local_ip, &((const struct ip_pcb *)pcb)->local_ip,
+		    sizeof(local_ip));
+
+		/*
+		 * lwIP does not support IPv6 sockets with IPv4-mapped IPv6
+		 * addresses, and requires that those be represented as IPv4
+		 * sockets instead.  We perform the appropriate conversions to
+		 * make that work in general, but here we only have the lwIP
+		 * PCB to go on, and that PCB may not even have an associated
+		 * sock data structure.  As a result, we have to report IPv6
+		 * sockets with IPv4-mapped IPv6 addresses as IPv4 sockets
+		 * here.  There is little room for improvement until lwIP
+		 * allows us to store a "this is really an IPv6 socket" flag in
+		 * its PCBs.  As documented in the ipsock module, a partial
+		 * solution would for example cause TCP sockets to "jump" from
+		 * the IPv6 listing to the IPv4 listing when entering TIME_WAIT
+		 * state.  The jumping already occurs now for sockets that are
+		 * getting bound, but that is not as problematic.
+		 */
+		if ((domain == AF_INET) != IP_IS_V4(&local_ip))
+			continue;
+
+		if (rmib_inrange(oldp, off)) {
+			memset(&ki, 0, sizeof(ki));
+
+			ki.ki_pcbaddr = (uint64_t)(uintptr_t)pcb;
+			ki.ki_ppcbaddr = (uint64_t)(uintptr_t)pcb;
+			ki.ki_family = domain;
+			ki.ki_protocol = protocol;
+
+			get_info_proc(&ki, pcb);
+
+			if ((r = rmib_copyout(oldp, off, &ki, size)) < OK)
+				return r;
+		}
+
+		off += size;
+		if (max > 0 && --max == 0)
+			break;
+	}
+
+	/*
+	 * Margin to limit the possible effects of the inherent race condition
+	 * between receiving just the data size and receiving the actual data.
+	 */
+	if (oldp == NULL)
+		off += PCB_SLOP * size;
+
+	return off;
+}
--- a/minix/net/lwip/util.h
+++ b/minix/net/lwip/util.h
@ -0,0 +1,27 @@
+#ifndef MINIX_NET_LWIP_UTIL_H
+#define MINIX_NET_LWIP_UTIL_H
+
+/* util.c */
+int util_timeval_to_ticks(const struct timeval * tv, clock_t * ticksp);
+void util_ticks_to_timeval(clock_t ticks, struct timeval * tv);
+int util_copy_data(const struct sockdriver_data * data, size_t len, size_t off,
+	const struct pbuf * pbuf, size_t skip, int copy_in);
+ssize_t util_coalesce(char * buf, size_t max, const iovec_t * iov,
+	unsigned int iovcnt);
+int util_convert_err(err_t err);
+int util_is_root(endpoint_t user_endpt);
+ssize_t util_pcblist(struct rmib_call * call, struct rmib_oldp * oldp,
+	const void *(*enum_proc)(const void *),
+	void (*get_info_proc)(struct kinfo_pcb *, const void *));
+
+/*
+ * In our code, pbuf header adjustments should never fail.  This wrapper checks
+ * that the pbuf_header() call succeeds, and panics otherwise.
+ */
+#define util_pbuf_header(pbuf,incr)					    \
+	do {								    \
+		if (pbuf_header((pbuf), (incr)))			    \
+			panic("unexpected pbuf header adjustment failure"); \
+	} while (0)
+
+#endif /* !MINIX_NET_LWIP_UTIL_H */
--- a/minix/servers/mib/minix.c
+++ b/minix/servers/mib/minix.c
@ -75,6 +75,7 @@ static struct mib_node mib_minix_table[] = {
 				    "mib", "MIB service information"),
 /* 2*/	[MINIX_PROC]		= MIB_NODE(_P | _RO, mib_minix_proc_table,
 				    "proc", "Process information for ProcFS"),
+/* 3*/	/* MINIX_LWIP is mounted through RMIB and thus not present here. */
 };

 /*
--- a/minix/usr.bin/trace/ioctl/net.c
+++ b/minix/usr.bin/trace/ioctl/net.c
@ -17,12 +17,115 @@
 #include <net/gen/psip_io.h>
 #include <arpa/inet.h>

+#include <net/route.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#include <net80211/ieee80211_ioctl.h>
+
 const char *
 net_ioctl_name(unsigned long req)
 {

 	switch (req) {
 	NAME(FIONREAD);
+	/* sys/sockio.h */
+	NAME(SIOCSHIWAT);		/* TODO: print argument */
+	NAME(SIOCGHIWAT);		/* TODO: print argument */
+	NAME(SIOCSLOWAT);		/* TODO: print argument */
+	NAME(SIOCGLOWAT);		/* TODO: print argument */
+	NAME(SIOCSPGRP);		/* TODO: print argument */
+	NAME(SIOCGPGRP);		/* TODO: print argument */
+	NAME(SIOCADDRT);		/* TODO: print argument */
+	NAME(SIOCDELRT);		/* TODO: print argument */
+	NAME(SIOCSIFADDR);		/* TODO: print argument */
+	NAME(SIOCGIFADDR);		/* TODO: print argument */
+	NAME(SIOCSIFDSTADDR);		/* TODO: print argument */
+	NAME(SIOCGIFDSTADDR);		/* TODO: print argument */
+	NAME(SIOCSIFFLAGS);		/* TODO: print argument */
+	NAME(SIOCGIFFLAGS);		/* TODO: print argument */
+	NAME(SIOCGIFBRDADDR);		/* TODO: print argument */
+	NAME(SIOCSIFBRDADDR);		/* TODO: print argument */
+	NAME(SIOCGIFCONF);		/* TODO: print argument */
+	NAME(SIOCGIFNETMASK);		/* TODO: print argument */
+	NAME(SIOCSIFNETMASK);		/* TODO: print argument */
+	NAME(SIOCGIFMETRIC);		/* TODO: print argument */
+	NAME(SIOCSIFMETRIC);		/* TODO: print argument */
+	NAME(SIOCDIFADDR);		/* TODO: print argument */
+	NAME(SIOCAIFADDR);		/* TODO: print argument */
+	NAME(SIOCGIFALIAS);		/* TODO: print argument */
+	NAME(SIOCGIFAFLAG_IN);		/* TODO: print argument */
+	NAME(SIOCALIFADDR);		/* TODO: print argument */
+	NAME(SIOCGLIFADDR);		/* TODO: print argument */
+	NAME(SIOCDLIFADDR);		/* TODO: print argument */
+	NAME(SIOCSIFADDRPREF);		/* TODO: print argument */
+	NAME(SIOCGIFADDRPREF);		/* TODO: print argument */
+	NAME(SIOCADDMULTI);		/* TODO: print argument */
+	NAME(SIOCDELMULTI);		/* TODO: print argument */
+	NAME(SIOCSIFMEDIA);		/* TODO: print argument */
+	NAME(SIOCGIFMEDIA);		/* TODO: print argument */
+	NAME(SIOCSIFGENERIC);		/* TODO: print argument */
+	NAME(SIOCGIFGENERIC);		/* TODO: print argument */
+	NAME(SIOCSIFPHYADDR);		/* TODO: print argument */
+	NAME(SIOCGIFPSRCADDR);		/* TODO: print argument */
+	NAME(SIOCGIFPDSTADDR);		/* TODO: print argument */
+	NAME(SIOCDIFPHYADDR);		/* TODO: print argument */
+	NAME(SIOCSLIFPHYADDR);		/* TODO: print argument */
+	NAME(SIOCGLIFPHYADDR);		/* TODO: print argument */
+	NAME(SIOCSIFMTU);		/* TODO: print argument */
+	NAME(SIOCGIFMTU);		/* TODO: print argument */
+	NAME(SIOCSDRVSPEC);		/* TODO: print argument */
+	NAME(SIOCGDRVSPEC);		/* TODO: print argument */
+	NAME(SIOCIFCREATE);		/* TODO: print argument */
+	NAME(SIOCIFDESTROY);		/* TODO: print argument */
+	NAME(SIOCIFGCLONERS);		/* TODO: print argument */
+	NAME(SIOCGIFDLT);		/* TODO: print argument */
+	NAME(SIOCGIFCAP);		/* TODO: print argument */
+	NAME(SIOCSIFCAP);		/* TODO: print argument */
+	NAME(SIOCSVH);			/* TODO: print argument */
+	NAME(SIOCGVH);			/* TODO: print argument */
+	NAME(SIOCINITIFADDR);		/* TODO: print argument */
+	NAME(SIOCGIFDATA);		/* TODO: print argument */
+	NAME(SIOCZIFDATA);		/* TODO: print argument */
+	NAME(SIOCGLINKSTR);		/* TODO: print argument */
+	NAME(SIOCSLINKSTR);		/* TODO: print argument */
+	NAME(SIOCGETHERCAP);		/* TODO: print argument */
+	NAME(SIOCGIFINDEX);		/* TODO: print argument */
+	NAME(SIOCSETPFSYNC);		/* TODO: print argument */
+	NAME(SIOCGETPFSYNC);		/* TODO: print argument */
+	/* netinet6/in6_var.h */
+	NAME(SIOCSIFADDR_IN6);		/* TODO: print argument */
+	NAME(SIOCGIFADDR_IN6);		/* TODO: print argument */
+	NAME(SIOCGIFDSTADDR_IN6);	/* TODO: print argument */
+	NAME(SIOCGIFNETMASK_IN6);	/* TODO: print argument */
+	NAME(SIOCDIFADDR_IN6);		/* TODO: print argument */
+	NAME(SIOCGIFPSRCADDR_IN6);	/* TODO: print argument */
+	NAME(SIOCGIFPDSTADDR_IN6);	/* TODO: print argument */
+	NAME(SIOCGIFAFLAG_IN6);		/* TODO: print argument */
+	NAME(SIOCGDRLST_IN6);		/* TODO: print argument */
+	NAME(SIOCSNDFLUSH_IN6);		/* TODO: print argument */
+	NAME(SIOCGNBRINFO_IN6);		/* TODO: print argument */
+	NAME(SIOCSRTRFLUSH_IN6);	/* TODO: print argument */
+	NAME(SIOCGIFSTAT_IN6);		/* TODO: print argument */
+	NAME(SIOCGIFSTAT_ICMP6);	/* TODO: print argument */
+	NAME(SIOCSDEFIFACE_IN6);	/* TODO: print argument */
+	NAME(SIOCGDEFIFACE_IN6);	/* TODO: print argument */
+	NAME(SIOCSIFINFO_FLAGS);	/* TODO: print argument */
+	NAME(SIOCSIFPREFIX_IN6);	/* TODO: print argument */
+	NAME(SIOCGIFPREFIX_IN6);	/* TODO: print argument */
+	NAME(SIOCDIFPREFIX_IN6);	/* TODO: print argument */
+	NAME(SIOCAIFPREFIX_IN6);	/* TODO: print argument */
+	NAME(SIOCCIFPREFIX_IN6);	/* TODO: print argument */
+	NAME(SIOCGIFALIFETIME_IN6);	/* TODO: print argument */
+	NAME(SIOCAIFADDR_IN6);		/* TODO: print argument */
+	NAME(SIOCGIFINFO_IN6);		/* TODO: print argument */
+	NAME(SIOCSIFINFO_IN6);		/* TODO: print argument */
+	NAME(SIOCSIFPHYADDR_IN6);	/* TODO: print argument */
+	NAME(SIOCAADDRCTL_POLICY);	/* TODO: print argument */
+	NAME(SIOCDADDRCTL_POLICY);	/* TODO: print argument */
+	/* net80211/ieee80211_ioctl.h */
+	NAME(SIOCS80211NWID);		/* TODO: print argument */
+	NAME(SIOCG80211NWID);		/* TODO: print argument */
+	/* old MINIX inet ioctls */
 	NAME(NWIOSETHOPT);	/* TODO: print argument */
 	NAME(NWIOGETHOPT);	/* TODO: print argument */
 	NAME(NWIOGETHSTAT);	/* TODO: print argument */