Add lwip: a new lwIP-based TCP/IP service
This commit adds a new TCP/IP service to MINIX 3. As its core, the service uses the lwIP TCP/IP stack for maintenance reasons. The service aims to be compatible with NetBSD userland, including its low-level network management utilities. It also aims to support modern features such as IPv6. In summary, the new LWIP service has support for the following main features: - TCP, UDP, RAW sockets with mostly standard BSD API semantics; - IPv6 support: host mode (complete) and router mode (partial); - most of the standard BSD API socket options (SO_); - all of the standard BSD API message flags (MSG_); - the most used protocol-specific socket and control options; - a default loopback interface and the ability to create one more; - configuration-free ethernet interfaces and driver tracking; - queuing and multiple concurrent requests to each ethernet driver; - standard ioctl(2)-based BSD interface management; - radix tree backed, destination-based routing; - routing sockets for standard BSD route reporting and management; - multicast traffic and multicast group membership tracking; - Berkeley Packet Filter (BPF) devices; - standard and custom sysctl(7) nodes for many internals; - a slab allocation based, hybrid static/dynamic memory pool model. Many of its modules come with fairly elaborate comments that cover many aspects of what is going on. The service is primarily a socket driver built on top of the libsockdriver library, but for BPF devices it is at the same time also a character driver. Change-Id: Ib0c02736234b21143915e5fcc0fda8fe408f046f
This commit is contained in:
parent
0f03189a6a
commit
ef8d499e2d
|
|
@ -194,7 +194,7 @@
|
|||
./etc/system.conf.d/hello minix-base
|
||||
./etc/system.conf.d/inet minix-base obsolete
|
||||
./etc/system.conf.d/ipc minix-base
|
||||
./etc/system.conf.d/lwip minix-base obsolete
|
||||
./etc/system.conf.d/lwip minix-base
|
||||
./etc/system.conf.d/random minix-base
|
||||
./etc/system.conf.d/uds minix-base
|
||||
./etc/system.conf.d/usb_hub minix-base
|
||||
|
|
@ -277,7 +277,7 @@
|
|||
./service/is minix-base
|
||||
./service/isofs minix-base
|
||||
./service/log minix-base
|
||||
./service/lwip minix-base obsolete
|
||||
./service/lwip minix-base
|
||||
./service/memory minix-base
|
||||
./service/mfs minix-base
|
||||
./service/mib minix-base
|
||||
|
|
|
|||
|
|
@ -1182,6 +1182,7 @@
|
|||
./usr/include/minix/blockdriver.h minix-comp
|
||||
./usr/include/minix/blockdriver_mt.h minix-comp
|
||||
./usr/include/minix/board.h minix-comp
|
||||
./usr/include/minix/bpf.h minix-comp
|
||||
./usr/include/minix/btrace.h minix-comp
|
||||
./usr/include/minix/callnr.h minix-comp
|
||||
./usr/include/minix/chardriver.h minix-comp
|
||||
|
|
@ -1208,6 +1209,7 @@
|
|||
./usr/include/minix/hgfs.h minix-comp
|
||||
./usr/include/minix/i2c.h minix-comp
|
||||
./usr/include/minix/i2cdriver.h minix-comp
|
||||
./usr/include/minix/if.h minix-comp
|
||||
./usr/include/minix/input.h minix-comp
|
||||
./usr/include/minix/inputdriver.h minix-comp
|
||||
./usr/include/minix/ioctl.h minix-comp
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@
|
|||
./usr/libdata/debug/service/is.debug minix-debug debug
|
||||
./usr/libdata/debug/service/isofs.debug minix-debug debug
|
||||
./usr/libdata/debug/service/log.debug minix-debug debug
|
||||
./usr/libdata/debug/service/lwip.debug minix-debug debug,obsolete
|
||||
./usr/libdata/debug/service/lwip.debug minix-debug debug
|
||||
./usr/libdata/debug/service/memory.debug minix-debug debug
|
||||
./usr/libdata/debug/service/mfs.debug minix-debug debug
|
||||
./usr/libdata/debug/service/mib.debug minix-debug debug
|
||||
|
|
|
|||
|
|
@ -130,6 +130,9 @@ do
|
|||
;;
|
||||
6,0) des="line printer, parallel port" dev=lp
|
||||
;;
|
||||
7,0)
|
||||
des="Berkeley Packet Filter device" dev=bpf
|
||||
;;
|
||||
9,0)
|
||||
des="unix98 pseudoterminal master" dev=ptmx
|
||||
;;
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ RAMDISK_DEVICES="
|
|||
STD_DEVICES="
|
||||
${RAMDISK_DEVICES}
|
||||
bmp085b1s77 bmp085b2s77 bmp085b3s77
|
||||
bpf
|
||||
eepromb1s50 eepromb1s51 eepromb1s52 eepromb1s53
|
||||
eepromb1s54 eepromb1s55 eepromb1s56 eepromb1s57
|
||||
eepromb2s50 eepromb2s51 eepromb2s52 eepromb2s53
|
||||
|
|
@ -128,6 +129,7 @@ Where key is one of the following:
|
|||
tty00 ... tty03 # Make serial lines
|
||||
ttyp0 ... ttyq0 ... # Make tty, pty pairs
|
||||
audio mixer # Make audio devices
|
||||
bpf # Make /dev/bpf
|
||||
klog # Make /dev/klog
|
||||
ptmx # Make /dev/ptmx
|
||||
random # Make /dev/random, /dev/urandom
|
||||
|
|
@ -215,6 +217,13 @@ do
|
|||
|
||||
makedev bmp085b${bus}s77 c ${major} 0 ${uname} ${gname} 444
|
||||
;;
|
||||
bpf)
|
||||
# Berkeley Packet Filter device, for the LWIP service
|
||||
# This is a cloning device, but some programs (e.g., dhclient)
|
||||
# assume individual devices are numbered, so also create bpf0.
|
||||
makedev ${dev} c 7 0 ${uname} ${gname} 600
|
||||
makedev ${dev}0 c 7 0 ${uname} ${gname} 600
|
||||
;;
|
||||
c[0-3]d[0-7])
|
||||
# Whole disk devices.
|
||||
disk=`expr ${dev} : '...\\(.\\)'`
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ service_get_policies(struct policies * pol, index_t slot)
|
|||
{ .label = "ptyfs", .policy_str = "" },
|
||||
{ .label = "vbfs", .policy_str = "" },
|
||||
/* net */
|
||||
{ .label = "lwip", .policy_str = "" },
|
||||
{ .label = "lwip", .policy_str = "reset" },
|
||||
/* servers */
|
||||
{ .label = "devman", .policy_str = "restart" },
|
||||
{ .label = "ds", .policy_str = "restart" },
|
||||
|
|
|
|||
|
|
@ -5,14 +5,14 @@ INCSDIR= /usr/include/minix
|
|||
INCS+= paths.h param.h
|
||||
INCS+= acpi.h audio_fw.h bitmap.h \
|
||||
bdev.h blockdriver.h blockdriver_mt.h \
|
||||
board.h btrace.h \
|
||||
board.h bpf.h btrace.h \
|
||||
callnr.h chardriver.h clkconf.h com.h \
|
||||
config.h const.h cpufeature.h \
|
||||
debug.h devio.h devman.h dmap.h \
|
||||
driver.h drivers.h drvlib.h ds.h \
|
||||
endpoint.h fb.h fsdriver.h fslib.h gpio.h gcov.h hash.h \
|
||||
hgfs.h i2c.h i2cdriver.h ioctl.h input.h \
|
||||
inputdriver.h ipc.h ipc_filter.h ipcconst.h \
|
||||
hgfs.h i2c.h i2cdriver.h if.h input.h inputdriver.h \
|
||||
ioctl.h ipc.h ipc_filter.h ipcconst.h \
|
||||
keymap.h log.h mmio.h mthread.h minlib.h \
|
||||
netdriver.h optset.h padconf.h partition.h portio.h \
|
||||
priv.h procfs.h profile.h \
|
||||
|
|
|
|||
42
minix/include/minix/bpf.h
Normal file
42
minix/include/minix/bpf.h
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
#ifndef _MINIX_BPF_H
|
||||
#define _MINIX_BPF_H
|
||||
|
||||
#include <net/bpf.h>
|
||||
|
||||
/*
|
||||
* MINIX3-specific extensions to the NetBSD Berkeley Packet Filter header.
|
||||
* These extensions are necessary because NetBSD BPF uses a few ioctl(2)
|
||||
* structure formats that contain pointers--something that MINIX3 has to avoid,
|
||||
* due to its memory granting mechanisms. Thus, those ioctl(2) calls have to
|
||||
* be converted from NetBSD to MINIX3 format. We currently do that in libc.
|
||||
* This header specifies the numbers and formats for the MINIX3 versions.
|
||||
*
|
||||
* See <minix/if.h> for details on how things work here.
|
||||
*/
|
||||
|
||||
/* BIOCSETF: set BPF filter program. */
|
||||
/*
|
||||
* This ioctl is an exception, as it is write-only, so we do not need the
|
||||
* original structure. Also, the size of this structure is currently slightly
|
||||
* over 4KB, which makes it too big for a regular ioctl call. Thus, we have to
|
||||
* use a big ioctl call. Note that future changes of BPF_MAXINSNS will
|
||||
* unfortunately (necessarily) change the ioctl call number.
|
||||
*/
|
||||
struct minix_bpf_program {
|
||||
u_int mbf_len;
|
||||
struct bpf_insn mbf_insns[BPF_MAXINSNS];
|
||||
};
|
||||
|
||||
#define MINIX_BIOCSETF _IOW_BIG(2, struct minix_bpf_program)
|
||||
|
||||
/* BIOCGDLTLIST: retrieve list of possible data link types. */
|
||||
#define MINIX_BPF_MAXDLT 256
|
||||
|
||||
struct minix_bpf_dltlist {
|
||||
struct bpf_dltlist mbfl_dltlist; /* MUST be first */
|
||||
u_int mbfl_list[MINIX_BPF_MAXDLT];
|
||||
};
|
||||
|
||||
#define MINIX_BIOCGDLTLIST _IOWR('B', 119, struct minix_bpf_dltlist)
|
||||
|
||||
#endif /* !_MINIX_BPF_H */
|
||||
|
|
@ -25,7 +25,7 @@
|
|||
#define TTY_MAJOR 4 /* 4 = /dev/tty00 (ttys) */
|
||||
#define CTTY_MAJOR 5 /* 5 = /dev/tty */
|
||||
#define PRINTER_MAJOR 6 /* 6 = /dev/lp (printer driver) */
|
||||
/* 7 = (unused) */
|
||||
#define TCPIP_MAJOR 7 /* 7 = /dev/bpf (TCP/IP service) */
|
||||
/* 8 = /dev/c1 */
|
||||
#define PTY_MAJOR 9 /* 9 = /dev/ptyp0 (pty driver) */
|
||||
/* 10 = /dev/c2 */
|
||||
|
|
|
|||
51
minix/include/minix/if.h
Normal file
51
minix/include/minix/if.h
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#ifndef _MINIX_IF_H
|
||||
#define _MINIX_IF_H
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/if_media.h>
|
||||
|
||||
/*
|
||||
* MINIX3-specific extensions to the network interface headers. These
|
||||
* extensions are necessary because NetBSD IF uses a few ioctl(2) structure
|
||||
* formats that contain pointers--something that MINIX3 has to avoid, due to
|
||||
* its memory granting mechanisms. Thus, those ioctl(2) calls have to be
|
||||
* converted from NetBSD to MINIX3 format. We currently do that in libc.
|
||||
* This header specifies the numbers and formats for the MINIX3 versions.
|
||||
*
|
||||
* The general idea is that we rewrite the ioctl request data to include both
|
||||
* the original structure and a buffer for the array of values to which the
|
||||
* original structure uses a pointer. Important: in those cases, the original
|
||||
* structure is expected to be the first element of the replacement structure.
|
||||
*
|
||||
* There is typically no configured upper bound for the maximum number of
|
||||
* values in the array, and so we pick size values that are hopefully always
|
||||
* oversized and yet keep the ioctl sizes within the range of regular ioctls
|
||||
* (4095 bytes, as per sys/ioccom.h). If there may be larger amounts of data,
|
||||
* we have to use "big" ioctls.
|
||||
*
|
||||
* For the replacement ioctl codes, we use the original ioctl class and number
|
||||
* with a different size. That should virtually eliminate the possibility of
|
||||
* accidental collisions.
|
||||
*/
|
||||
|
||||
/* SIOCGIFMEDIA: retrieve interface media status and types. */
|
||||
#define MINIX_IF_MAXMEDIA 256
|
||||
|
||||
struct minix_ifmediareq {
|
||||
struct ifmediareq mifm_ifm; /* MUST be first */
|
||||
int mifm_list[MINIX_IF_MAXMEDIA];
|
||||
};
|
||||
|
||||
#define MINIX_SIOCGIFMEDIA _IOWR('i', 54, struct minix_ifmediareq)
|
||||
|
||||
/* SIOCIFGCLONERS: retrieve interface "cloners" (virtual types). */
|
||||
#define MINIX_IF_MAXCLONERS 128
|
||||
|
||||
struct minix_if_clonereq {
|
||||
struct if_clonereq mifcr_ifcr; /* MUST be first */
|
||||
char mifcr_buffer[MINIX_IF_MAXCLONERS * IFNAMSIZ];
|
||||
};
|
||||
|
||||
#define MINIX_SIOCIFGCLONERS _IOWR('i', 120, struct minix_if_clonereq)
|
||||
|
||||
#endif /* !_MINIX_IF_H */
|
||||
|
|
@ -28,6 +28,7 @@
|
|||
#define MINIX_TEST 0
|
||||
#define MINIX_MIB 1
|
||||
#define MINIX_PROC 2
|
||||
#define MINIX_LWIP 3
|
||||
|
||||
/*
|
||||
* These identifiers, under MINIX_TEST, are used by test87 to test the MIB
|
||||
|
|
|
|||
|
|
@ -9,6 +9,10 @@
|
|||
#include <sys/ioccom.h>
|
||||
#include <stdarg.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <minix/if.h>
|
||||
#include <minix/bpf.h>
|
||||
#include <assert.h>
|
||||
|
||||
static void rewrite_i2c_netbsd_to_minix(minix_i2c_ioctl_exec_t *out,
|
||||
i2c_ioctl_exec_t *in);
|
||||
|
|
@ -45,6 +49,199 @@ static void rewrite_i2c_minix_to_netbsd(i2c_ioctl_exec_t *out,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a network interface related IOCTL with pointers to a flat format
|
||||
* suitable for MINIX3. Return a pointer to the new data on success, or zero
|
||||
* (with errno set) on failure. The original request code is given in
|
||||
* 'request' and must be replaced by the new request code to be used.
|
||||
*/
|
||||
static vir_bytes
|
||||
ioctl_convert_if_to_minix(void * data, unsigned long * request)
|
||||
{
|
||||
struct minix_ifmediareq *mifm;
|
||||
struct ifmediareq *ifm;
|
||||
struct minix_if_clonereq *mifcr;
|
||||
struct if_clonereq *ifcr;
|
||||
|
||||
switch (*request) {
|
||||
case SIOCGIFMEDIA:
|
||||
ifm = (struct ifmediareq *)data;
|
||||
|
||||
mifm = (struct minix_ifmediareq *)malloc(sizeof(*mifm));
|
||||
if (mifm != NULL) {
|
||||
/*
|
||||
* The count may exceed MINIX_IF_MAXMEDIA, and should
|
||||
* be truncated as needed by the IF implementation.
|
||||
*/
|
||||
memcpy(&mifm->mifm_ifm, ifm, sizeof(*ifm));
|
||||
|
||||
*request = MINIX_SIOCGIFMEDIA;
|
||||
} else
|
||||
errno = ENOMEM;
|
||||
|
||||
return (vir_bytes)mifm;
|
||||
|
||||
case SIOCIFGCLONERS:
|
||||
ifcr = (struct if_clonereq *)data;
|
||||
|
||||
mifcr = (struct minix_if_clonereq *)malloc(sizeof(*mifcr));
|
||||
if (mifcr != NULL) {
|
||||
/*
|
||||
* The count may exceed MINIX_IF_MAXCLONERS, and should
|
||||
* be truncated as needed by the IF implementation.
|
||||
*/
|
||||
memcpy(&mifcr->mifcr_ifcr, ifcr, sizeof(*ifcr));
|
||||
|
||||
*request = MINIX_SIOCIFGCLONERS;
|
||||
} else
|
||||
errno = ENOMEM;
|
||||
|
||||
return (vir_bytes)mifcr;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
|
||||
errno = ENOTTY;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a the result of a network interface related IOCTL with pointers from
|
||||
* the flat format used to make the call to MINIX3. Called on success only.
|
||||
* The given request code is that of the (NetBSD-type) original.
|
||||
*/
|
||||
static void
|
||||
ioctl_convert_if_from_minix(vir_bytes addr, void * data, unsigned long request)
|
||||
{
|
||||
struct minix_ifmediareq *mifm;
|
||||
struct ifmediareq *ifm;
|
||||
struct minix_if_clonereq *mifcr;
|
||||
struct if_clonereq *ifcr;
|
||||
int count;
|
||||
|
||||
switch (request) {
|
||||
case SIOCGIFMEDIA:
|
||||
mifm = (struct minix_ifmediareq *)addr;
|
||||
ifm = (struct ifmediareq *)data;
|
||||
|
||||
memcpy(ifm, &mifm->mifm_ifm, sizeof(*ifm));
|
||||
|
||||
if (ifm->ifm_ulist != NULL && ifm->ifm_count > 0)
|
||||
memcpy(ifm->ifm_ulist, mifm->mifm_list,
|
||||
ifm->ifm_count * sizeof(ifm->ifm_ulist[0]));
|
||||
|
||||
break;
|
||||
|
||||
case SIOCIFGCLONERS:
|
||||
mifcr = (struct minix_if_clonereq *)addr;
|
||||
ifcr = (struct if_clonereq *)data;
|
||||
|
||||
memcpy(ifcr, &mifcr->mifcr_ifcr, sizeof(*ifcr));
|
||||
|
||||
count = (ifcr->ifcr_count < ifcr->ifcr_total) ?
|
||||
ifcr->ifcr_count : ifcr->ifcr_total;
|
||||
if (ifcr->ifcr_buffer != NULL && count > 0)
|
||||
memcpy(ifcr->ifcr_buffer, mifcr->mifcr_buffer,
|
||||
count * IFNAMSIZ);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a BPF (Berkeley Packet Filter) related IOCTL with pointers to a flat
|
||||
* format suitable for MINIX3. Return a pointer to the new data on success, or
|
||||
* zero (with errno set) on failure. The original request code is given in
|
||||
* 'request' and must be replaced by the new request code to be used.
|
||||
*/
|
||||
static vir_bytes
|
||||
ioctl_convert_bpf_to_minix(void * data, unsigned long * request)
|
||||
{
|
||||
struct minix_bpf_program *mbf;
|
||||
struct bpf_program *bf;
|
||||
struct minix_bpf_dltlist *mbfl;
|
||||
struct bpf_dltlist *bfl;
|
||||
|
||||
switch (*request) {
|
||||
case BIOCSETF:
|
||||
bf = (struct bpf_program *)data;
|
||||
|
||||
if (bf->bf_len > __arraycount(mbf->mbf_insns)) {
|
||||
errno = EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
mbf = (struct minix_bpf_program *)malloc(sizeof(*mbf));
|
||||
if (mbf != NULL) {
|
||||
mbf->mbf_len = bf->bf_len;
|
||||
memcpy(mbf->mbf_insns, bf->bf_insns,
|
||||
bf->bf_len * sizeof(mbf->mbf_insns[0]));
|
||||
|
||||
*request = MINIX_BIOCSETF;
|
||||
} else
|
||||
errno = ENOMEM;
|
||||
|
||||
return (vir_bytes)mbf;
|
||||
|
||||
case BIOCGDLTLIST:
|
||||
bfl = (struct bpf_dltlist *)data;
|
||||
|
||||
mbfl = (struct minix_bpf_dltlist *)malloc(sizeof(*mbfl));
|
||||
if (mbfl != NULL) {
|
||||
/*
|
||||
* The length may exceed MINIX_BPF_MAXDLT, and should
|
||||
* be truncated as needed by the BPF implementation.
|
||||
*/
|
||||
memcpy(&mbfl->mbfl_dltlist, bfl, sizeof(*bfl));
|
||||
|
||||
*request = MINIX_BIOCGDLTLIST;
|
||||
} else
|
||||
errno = ENOMEM;
|
||||
|
||||
return (vir_bytes)mbfl;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
|
||||
errno = ENOTTY;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a the result of BPF (Berkeley Packet Filter) related IOCTL with
|
||||
* pointers from the flat format used to make the call to MINIX3. Called on
|
||||
* success only. The given request code is that of the (NetBSD-type) original.
|
||||
*/
|
||||
static void
|
||||
ioctl_convert_bpf_from_minix(vir_bytes addr, void * data,
|
||||
unsigned long request)
|
||||
{
|
||||
struct minix_bpf_dltlist *mbfl;
|
||||
struct bpf_dltlist *bfl;
|
||||
|
||||
switch (request) {
|
||||
case BIOCGDLTLIST:
|
||||
mbfl = (struct minix_bpf_dltlist *)addr;
|
||||
bfl = (struct bpf_dltlist *)data;
|
||||
|
||||
memcpy(bfl, &mbfl->mbfl_dltlist, sizeof(*bfl));
|
||||
|
||||
if (bfl->bfl_list != NULL && bfl->bfl_len > 0)
|
||||
memcpy(bfl->bfl_list, mbfl->mbfl_list,
|
||||
bfl->bfl_len * sizeof(bfl->bfl_list[0]));
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Library implementation of FIOCLEX and FIONCLEX.
|
||||
*/
|
||||
|
|
@ -110,6 +307,7 @@ ioctl_to_fcntl(int fd, unsigned long request, void * data)
|
|||
|
||||
int ioctl(int fd, unsigned long request, ...)
|
||||
{
|
||||
minix_i2c_ioctl_exec_t i2c;
|
||||
int r, request_save;
|
||||
message m;
|
||||
vir_bytes addr;
|
||||
|
|
@ -124,8 +322,6 @@ int ioctl(int fd, unsigned long request, ...)
|
|||
* To support compatibility with interfaces on other systems, certain
|
||||
* requests are re-written to flat structures (i.e. without pointers).
|
||||
*/
|
||||
minix_i2c_ioctl_exec_t i2c;
|
||||
|
||||
request_save = request;
|
||||
|
||||
switch (request) {
|
||||
|
|
@ -142,6 +338,19 @@ int ioctl(int fd, unsigned long request, ...)
|
|||
addr = (vir_bytes) &i2c;
|
||||
request = MINIX_I2C_IOCTL_EXEC;
|
||||
break;
|
||||
|
||||
case SIOCGIFMEDIA:
|
||||
case SIOCIFGCLONERS:
|
||||
if ((addr = ioctl_convert_if_to_minix(data, &request)) == 0)
|
||||
return -1; /* errno has already been set */
|
||||
break;
|
||||
|
||||
case BIOCSETF:
|
||||
case BIOCGDLTLIST:
|
||||
if ((addr = ioctl_convert_bpf_to_minix(data, &request)) == 0)
|
||||
return -1; /* errno has already been set */
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Keep original as-is */
|
||||
addr = (vir_bytes)data;
|
||||
|
|
@ -155,11 +364,30 @@ int ioctl(int fd, unsigned long request, ...)
|
|||
|
||||
r = _syscall(VFS_PROC_NR, VFS_IOCTL, &m);
|
||||
|
||||
/* Translate back to original form */
|
||||
/*
|
||||
* Translate back to original form. Do this on failure as well, as
|
||||
* temporarily allocated resources may have to be freed up again.
|
||||
*/
|
||||
switch (request_save) {
|
||||
case I2C_IOCTL_EXEC:
|
||||
rewrite_i2c_minix_to_netbsd(data, &i2c);
|
||||
break;
|
||||
|
||||
case SIOCGIFMEDIA:
|
||||
case SIOCIFGCLONERS:
|
||||
if (r == 0)
|
||||
ioctl_convert_if_from_minix(addr, data, request_save);
|
||||
free((void *)addr);
|
||||
break;
|
||||
|
||||
case BIOCGDLTLIST:
|
||||
if (r == 0)
|
||||
ioctl_convert_bpf_from_minix(addr, data, request_save);
|
||||
/* FALLTHROUGH */
|
||||
case BIOCSETF:
|
||||
free((void *)addr);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Nothing to do */
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
.include <bsd.own.mk>
|
||||
|
||||
.if ${MKIMAGEONLY} == "no"
|
||||
SUBDIR+= lwip
|
||||
SUBDIR+= uds
|
||||
.endif # ${MKIMAGEONLY} == "no"
|
||||
|
||||
|
|
|
|||
34
minix/net/lwip/Makefile
Normal file
34
minix/net/lwip/Makefile
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
# Makefile for the lwIP TCP/IP socket driver service (LWIP)
|
||||
|
||||
.include <bsd.own.mk>
|
||||
|
||||
PROG= lwip
|
||||
SRCS= lwip.c mempool.c pchain.c addr.c addrpol.c tcpisn.c mcast.c ipsock.c \
|
||||
pktsock.c tcpsock.c udpsock.c rawsock.c ifdev.c ifaddr.c loopif.c \
|
||||
ethif.c ndev.c rttree.c route.c rtsock.c lnksock.c lldata.c mibtree.c \
|
||||
ifconf.c bpfdev.c bpf_filter.c util.c
|
||||
|
||||
FILES=${PROG}.conf
|
||||
FILESNAME=${PROG}
|
||||
FILESDIR= /etc/system.conf.d
|
||||
|
||||
CPPFLAGS+= -I${NETBSDSRCDIR}/minix/lib/liblwip/dist/src/include
|
||||
CPPFLAGS+= -I${NETBSDSRCDIR}/minix/lib/liblwip/lib
|
||||
|
||||
# Disabling USE_INET6 only superficially hides IPv6 support in the service.
|
||||
.if (${USE_INET6} != "no")
|
||||
CPPFLAGS+= -DINET6
|
||||
.endif
|
||||
|
||||
# Some warnings are the result of usage of lwIP macros. We must not generate
|
||||
# errors for those, but even producing the warnings is not helpful, so we
|
||||
# disable them altogether.
|
||||
CPPFLAGS+= -Wno-address
|
||||
|
||||
DPADD+= ${LIBLWIP} ${LIBSOCKEVENT} ${LIBSOCKDRIVER} ${LIBCHARDRIVER} \
|
||||
${LIBSYS} ${LIBTIMERS}
|
||||
LDADD+= -llwip -lsockevent -lsockdriver -lchardriver -lsys -ltimers
|
||||
|
||||
WARNS?= 5
|
||||
|
||||
.include <minix.service.mk>
|
||||
692
minix/net/lwip/addr.c
Normal file
692
minix/net/lwip/addr.c
Normal file
|
|
@ -0,0 +1,692 @@
|
|||
/* LWIP service - addr.c - socket address verification and conversion */
|
||||
|
||||
#include "lwip.h"
|
||||
|
||||
/*
|
||||
* Return TRUE if the given socket address is of type AF_UNSPEC, or FALSE
|
||||
* otherwise.
|
||||
*/
|
||||
int
|
||||
addr_is_unspec(const struct sockaddr * addr, socklen_t addr_len)
|
||||
{
|
||||
|
||||
return (addr_len >= offsetof(struct sockaddr, sa_data) &&
|
||||
addr->sa_family == AF_UNSPEC);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether the given multicast address is generally valid. This check
|
||||
* should not be moved into addr_get_inet(), as we do not want to forbid
|
||||
* creating routes for such addresses, for example. We do however apply the
|
||||
* restrictions here to all provided source and destination addresses. Return
|
||||
* TRUE if the address is an acceptable multicast address, or FALSE otherwise.
|
||||
*/
|
||||
int
|
||||
addr_is_valid_multicast(const ip_addr_t * ipaddr)
|
||||
{
|
||||
uint8_t scope;
|
||||
|
||||
assert(ip_addr_ismulticast(ipaddr));
|
||||
|
||||
/* We apply restrictions to IPv6 multicast addresses only. */
|
||||
if (IP_IS_V6(ipaddr)) {
|
||||
scope = ip6_addr_multicast_scope(ip_2_ip6(ipaddr));
|
||||
|
||||
if (scope == IP6_MULTICAST_SCOPE_RESERVED0 ||
|
||||
scope == IP6_MULTICAST_SCOPE_RESERVEDF)
|
||||
return FALSE;
|
||||
|
||||
/*
|
||||
* We do not impose restrictions on the three defined embedded
|
||||
* flags, even though we put no effort into supporting them,
|
||||
* especially in terms of automatically creating routes for
|
||||
* all cases. We do force the fourth flag to be zero.
|
||||
* Unfortunately there is no lwIP macro to check for this flag.
|
||||
*/
|
||||
if (ip_2_ip6(ipaddr)->addr[0] & PP_HTONL(0x00800000UL))
|
||||
return FALSE;
|
||||
|
||||
/* Prevent KAME-embedded zone IDs from entering the system. */
|
||||
if (ip6_addr_has_scope(ip_2_ip6(ipaddr), IP6_UNKNOWN) &&
|
||||
(ip_2_ip6(ipaddr)->addr[0] & PP_HTONL(0x0000ffffUL)))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Load a sockaddr structure, as copied from userland, as a lwIP-style IP
|
||||
* address and (optionally) a port number. The expected type of IP address is
|
||||
* given as 'type', which must be one of IPADDR_TYPE_{V4,ANY,V6}. If it is
|
||||
* IPADDR_TYPE_V4, 'addr' is expected to point to a sockaddr_in structure. If
|
||||
* it is IPADDR_TYPE_{ANY,V6}, 'addr' is expected to point to a sockaddr_in6
|
||||
* structure. For the _ANY case, the result will be an _ANY address only if it
|
||||
* is the unspecified (all-zeroes) address and a _V6 address in all other
|
||||
* cases. For the _V6 case, the result will always be a _V6 address. The
|
||||
* length of the structure pointed to by 'addr' is given as 'addr_len'. If the
|
||||
* boolean 'kame' flag is set, addresses will be interpreted to be KAME style,
|
||||
* meaning that for scoped IPv6 addresses, the zone is embedded in the address
|
||||
* rather than given in sin6_scope_id. On success, store the resulting IP
|
||||
* address in 'ipaddr'. If 'port' is not NULL, store the port number in it;
|
||||
* otherwise, ignore the port number. On any parsing failure, return an
|
||||
* appropriate negative error code.
|
||||
*/
|
||||
int
|
||||
addr_get_inet(const struct sockaddr * addr, socklen_t addr_len, uint8_t type,
|
||||
ip_addr_t * ipaddr, int kame, uint16_t * port)
|
||||
{
|
||||
struct sockaddr_in sin;
|
||||
struct sockaddr_in6 sin6;
|
||||
ip6_addr_t *ip6addr;
|
||||
uint32_t ifindex;
|
||||
|
||||
switch (type) {
|
||||
case IPADDR_TYPE_V4:
|
||||
if (addr_len != sizeof(sin))
|
||||
return EINVAL;
|
||||
|
||||
/*
|
||||
* Getting around strict aliasing problems. Oh, the irony of
|
||||
* doing an extra memcpy so that the compiler can do a better
|
||||
* job at optimizing..
|
||||
*/
|
||||
memcpy(&sin, addr, sizeof(sin));
|
||||
|
||||
if (sin.sin_family != AF_INET)
|
||||
return EAFNOSUPPORT;
|
||||
|
||||
ip_addr_set_ip4_u32(ipaddr, sin.sin_addr.s_addr);
|
||||
|
||||
if (port != NULL)
|
||||
*port = ntohs(sin.sin_port);
|
||||
|
||||
return OK;
|
||||
|
||||
case IPADDR_TYPE_ANY:
|
||||
case IPADDR_TYPE_V6:
|
||||
if (addr_len != sizeof(sin6))
|
||||
return EINVAL;
|
||||
|
||||
/* Again, strict aliasing.. */
|
||||
memcpy(&sin6, addr, sizeof(sin6));
|
||||
|
||||
if (sin6.sin6_family != AF_INET6)
|
||||
return EAFNOSUPPORT;
|
||||
|
||||
memset(ipaddr, 0, sizeof(*ipaddr));
|
||||
|
||||
/*
|
||||
* This is a bit ugly, but NetBSD does not expose s6_addr32 and
|
||||
* s6_addr is a series of bytes, which is a mismatch for lwIP.
|
||||
* The alternative would be another memcpy..
|
||||
*/
|
||||
ip6addr = ip_2_ip6(ipaddr);
|
||||
assert(sizeof(ip6addr->addr) == sizeof(sin6.sin6_addr));
|
||||
memcpy(ip6addr->addr, &sin6.sin6_addr, sizeof(ip6addr->addr));
|
||||
|
||||
/*
|
||||
* If the address may have a scope, extract the zone ID.
|
||||
* Where the zone ID is depends on the 'kame' parameter: KAME-
|
||||
* style addresses have it embedded within the address, whereas
|
||||
* non-KAME addresses use the (misnamed) sin6_scope_id field.
|
||||
*/
|
||||
if (ip6_addr_has_scope(ip6addr, IP6_UNKNOWN)) {
|
||||
if (kame) {
|
||||
ifindex =
|
||||
ntohl(ip6addr->addr[0]) & 0x0000ffffUL;
|
||||
|
||||
ip6addr->addr[0] &= PP_HTONL(0xffff0000UL);
|
||||
} else {
|
||||
/*
|
||||
* Reject KAME-style addresses for normal
|
||||
* socket calls, to save ourselves the trouble
|
||||
* of mixed address styles elsewhere.
|
||||
*/
|
||||
if (ip6addr->addr[0] & PP_HTONL(0x0000ffffUL))
|
||||
return EINVAL;
|
||||
|
||||
ifindex = sin6.sin6_scope_id;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reject invalid zone IDs. This also enforces that
|
||||
* no zone IDs wider than eight bits enter the system.
|
||||
* As a side effect, it is not possible to add routes
|
||||
* for invalid zones, but that should be no problem.
|
||||
*/
|
||||
if (ifindex != 0 &&
|
||||
ifdev_get_by_index(ifindex) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
ip6_addr_set_zone(ip6addr, ifindex);
|
||||
} else
|
||||
ip6_addr_clear_zone(ip6addr);
|
||||
|
||||
/*
|
||||
* Set the type to ANY if it was ANY and the address itself is
|
||||
* ANY as well. Otherwise, we are binding to a specific IPv6
|
||||
* address, so IPV6_V6ONLY stops being relevant and we should
|
||||
* leave the address set to V6. Destination addresses for ANY
|
||||
* are set to V6 elsewhere.
|
||||
*/
|
||||
if (type == IPADDR_TYPE_ANY && ip6_addr_isany(ip6addr))
|
||||
IP_SET_TYPE(ipaddr, type);
|
||||
else
|
||||
IP_SET_TYPE(ipaddr, IPADDR_TYPE_V6);
|
||||
|
||||
if (port != NULL)
|
||||
*port = ntohs(sin6.sin6_port);
|
||||
|
||||
return OK;
|
||||
|
||||
default:
|
||||
return EAFNOSUPPORT;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Store an lwIP-style IP address and port number as a sockaddr structure
|
||||
* (sockaddr_in or sockaddr_in6, depending on the given IP address) to be
|
||||
* copied to userland. The result is stored in the buffer pointed to by
|
||||
* 'addr'. Before the call, 'addr_len' must be set to the size of this buffer.
|
||||
* This is an internal check to prevent buffer overflows, and must not be used
|
||||
* to validate input, since a mismatch will trigger a panic. After the call,
|
||||
* 'addr_len' will be set to the size of the resulting structure. The lwIP-
|
||||
* style address is given as 'ipaddr'. If the boolean 'kame' flag is set, the
|
||||
* address will be stored KAME-style, meaning that for scoped IPv6 addresses,
|
||||
* the address zone will be stored embedded in the address rather than in
|
||||
* sin6_scope_id. If relevant, 'port' contains the port number in host-byte
|
||||
* order; otherwise it should be set to zone.
|
||||
*/
|
||||
void
|
||||
addr_put_inet(struct sockaddr * addr, socklen_t * addr_len,
|
||||
const ip_addr_t * ipaddr, int kame, uint16_t port)
|
||||
{
|
||||
struct sockaddr_in sin;
|
||||
struct sockaddr_in6 sin6;
|
||||
const ip6_addr_t *ip6addr;
|
||||
uint32_t zone;
|
||||
|
||||
switch (IP_GET_TYPE(ipaddr)) {
|
||||
case IPADDR_TYPE_V4:
|
||||
if (*addr_len < sizeof(sin))
|
||||
panic("provided address buffer too small");
|
||||
|
||||
memset(&sin, 0, sizeof(sin));
|
||||
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_port = htons(port);
|
||||
sin.sin_addr.s_addr = ip_addr_get_ip4_u32(ipaddr);
|
||||
|
||||
memcpy(addr, &sin, sizeof(sin));
|
||||
*addr_len = sizeof(sin);
|
||||
|
||||
break;
|
||||
|
||||
case IPADDR_TYPE_ANY:
|
||||
case IPADDR_TYPE_V6:
|
||||
if (*addr_len < sizeof(sin6))
|
||||
panic("provided address buffer too small");
|
||||
|
||||
ip6addr = ip_2_ip6(ipaddr);
|
||||
|
||||
memset(&sin6, 0, sizeof(sin6));
|
||||
|
||||
sin6.sin6_len = sizeof(sin6);
|
||||
sin6.sin6_family = AF_INET6;
|
||||
sin6.sin6_port = htons(port);
|
||||
memcpy(&sin6.sin6_addr, ip6addr->addr, sizeof(sin6.sin6_addr));
|
||||
|
||||
/*
|
||||
* If the IPv6 address has a zone set, it must be scoped, and
|
||||
* we put the zone in the result. It may occur that a scoped
|
||||
* IPv6 address does not have a zone here though, for example
|
||||
* if packet routing fails for sendto() with a zoneless address
|
||||
* on an unbound socket, resulting in an RTM_MISS message. In
|
||||
* such cases, simply leave the zone index blank in the result.
|
||||
*/
|
||||
if (ip6_addr_has_zone(ip6addr)) {
|
||||
assert(ip6_addr_has_scope(ip6addr, IP6_UNKNOWN));
|
||||
|
||||
zone = ip6_addr_zone(ip6addr);
|
||||
assert(zone <= UINT8_MAX);
|
||||
|
||||
if (kame)
|
||||
sin6.sin6_addr.s6_addr[3] = zone;
|
||||
else
|
||||
sin6.sin6_scope_id = zone;
|
||||
}
|
||||
|
||||
memcpy(addr, &sin6, sizeof(sin6));
|
||||
*addr_len = sizeof(sin6);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
panic("unknown IP address type: %u", IP_GET_TYPE(ipaddr));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Load a link-layer sockaddr structure (sockaddr_dl), as copied from userland,
|
||||
* and return the contained name and/or hardware address. The address is
|
||||
* provided as 'addr', with length 'addr_len'. On success, return OK. If
|
||||
* 'name' is not NULL, it must be of size 'name_max', and will be used to store
|
||||
* the (null-terminated) interface name in the given structure if present, or
|
||||
* the empty string if not. If 'hwaddr' is not NULL, it will be used to store
|
||||
* the hardware address in the given structure, which must in that case be
|
||||
* present and exactly 'hwaddr_len' bytes long. On any parsing failure, return
|
||||
* an appropriate negative error code.
|
||||
*/
|
||||
int
|
||||
addr_get_link(const struct sockaddr * addr, socklen_t addr_len, char * name,
|
||||
size_t name_max, uint8_t * hwaddr, size_t hwaddr_len)
|
||||
{
|
||||
struct sockaddr_dlx sdlx;
|
||||
size_t nlen, alen;
|
||||
|
||||
if (addr_len < offsetof(struct sockaddr_dlx, sdlx_data))
|
||||
return EINVAL;
|
||||
|
||||
/*
|
||||
* We cannot prevent callers from passing in massively oversized
|
||||
* sockaddr_dl structure. However, we insist that all the actual data
|
||||
* be contained within the size of our sockaddr_dlx version.
|
||||
*/
|
||||
if (addr_len > sizeof(sdlx))
|
||||
addr_len = sizeof(sdlx);
|
||||
|
||||
memcpy(&sdlx, addr, addr_len);
|
||||
|
||||
if (sdlx.sdlx_family != AF_LINK)
|
||||
return EAFNOSUPPORT;
|
||||
|
||||
/* Address selectors are not currently supported. */
|
||||
if (sdlx.sdlx_slen != 0)
|
||||
return EINVAL;
|
||||
|
||||
nlen = (size_t)sdlx.sdlx_nlen;
|
||||
alen = (size_t)sdlx.sdlx_alen;
|
||||
|
||||
/* The nlen and alen fields are 8-bit, so no risks of overflow here. */
|
||||
if (addr_len < offsetof(struct sockaddr_dlx, sdlx_data) + nlen + alen)
|
||||
return EINVAL;
|
||||
|
||||
/*
|
||||
* Copy out the name, truncating it if needed. The name in the
|
||||
* sockaddr is not null terminated, so we have to do that. If the
|
||||
* sockaddr has no name, copy out an empty name.
|
||||
*/
|
||||
if (name != NULL) {
|
||||
assert(name_max > 0);
|
||||
|
||||
if (name_max > nlen + 1)
|
||||
name_max = nlen + 1;
|
||||
|
||||
memcpy(name, sdlx.sdlx_data, name_max - 1);
|
||||
name[name_max - 1] = '\0';
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy over the hardware address. For simplicity, we require that the
|
||||
* caller specify the exact hardware address length.
|
||||
*/
|
||||
if (hwaddr != NULL) {
|
||||
if (alen != hwaddr_len)
|
||||
return EINVAL;
|
||||
|
||||
memcpy(hwaddr, sdlx.sdlx_data + nlen, hwaddr_len);
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Store a link-layer sockaddr structure (sockaddr_dl), to be copied to
|
||||
* userland. The result is stored in the buffer pointed to by 'addr'. Before
|
||||
* the call, 'addr_len' must be set to the size of this buffer. This is an
|
||||
* internal check to prevent buffer overflows, and must not be used to validate
|
||||
* input, since a mismatch will trigger a panic. After the call, 'addr_len'
|
||||
* will be set to the size of the resulting structure. The given interface
|
||||
* index 'ifindex' and (IFT_) interface type 'type' will always be stored in
|
||||
* the resulting structure. If 'name' is not NULL, it must be a null-
|
||||
* terminated interface name string which will be included in the structure.
|
||||
* If 'hwaddr' is not NULL, it must be a hardware address of length
|
||||
* 'hwaddr_len', which will also be included in the structure.
|
||||
*/
|
||||
void
|
||||
addr_put_link(struct sockaddr * addr, socklen_t * addr_len, uint32_t ifindex,
|
||||
uint32_t type, const char * name, const uint8_t * hwaddr,
|
||||
size_t hwaddr_len)
|
||||
{
|
||||
struct sockaddr_dlx sdlx;
|
||||
size_t name_len;
|
||||
socklen_t len;
|
||||
|
||||
name_len = (name != NULL) ? strlen(name) : 0;
|
||||
|
||||
if (hwaddr == NULL)
|
||||
hwaddr_len = 0;
|
||||
|
||||
assert(name_len < IFNAMSIZ);
|
||||
assert(hwaddr_len <= NETIF_MAX_HWADDR_LEN);
|
||||
|
||||
len = offsetof(struct sockaddr_dlx, sdlx_data) + name_len + hwaddr_len;
|
||||
|
||||
if (*addr_len < len)
|
||||
panic("provided address buffer too small");
|
||||
|
||||
memset(&sdlx, 0, sizeof(sdlx));
|
||||
sdlx.sdlx_len = len;
|
||||
sdlx.sdlx_family = AF_LINK;
|
||||
sdlx.sdlx_index = ifindex;
|
||||
sdlx.sdlx_type = type;
|
||||
sdlx.sdlx_nlen = name_len;
|
||||
sdlx.sdlx_alen = hwaddr_len;
|
||||
if (name_len > 0)
|
||||
memcpy(sdlx.sdlx_data, name, name_len);
|
||||
if (hwaddr_len > 0)
|
||||
memcpy(sdlx.sdlx_data + name_len, hwaddr, hwaddr_len);
|
||||
|
||||
memcpy(addr, &sdlx, len);
|
||||
*addr_len = len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert an IPv4 or IPv6 netmask, given as sockaddr structure 'addr', to a
|
||||
* prefix length. The length of the sockaddr structure is given as 'addr_len'.
|
||||
* For consistency with addr_get_inet(), the expected address type is given as
|
||||
* 'type', and must be either IPADDR_TYPE_V4 or IPADDR_TYPE_V6. On success,
|
||||
* return OK with the number of set prefix bits returned in 'prefix', and
|
||||
* optionally with a lwIP representation of the netmask stored in 'ipaddr' (if
|
||||
* not NULL). On failure, return an appropriate negative error code. Note
|
||||
* that this function does not support compressed IPv4 network masks; such
|
||||
* addresses must be expanded before a call to this function.
|
||||
*/
|
||||
int
|
||||
addr_get_netmask(const struct sockaddr * addr, socklen_t addr_len,
|
||||
uint8_t type, unsigned int * prefix, ip_addr_t * ipaddr)
|
||||
{
|
||||
struct sockaddr_in sin;
|
||||
struct sockaddr_in6 sin6;
|
||||
unsigned int byte, bit;
|
||||
uint32_t val;
|
||||
|
||||
switch (type) {
|
||||
case IPADDR_TYPE_V4:
|
||||
if (addr_len != sizeof(sin))
|
||||
return EINVAL;
|
||||
|
||||
memcpy(&sin, addr, sizeof(sin));
|
||||
|
||||
if (sin.sin_family != AF_INET)
|
||||
return EAFNOSUPPORT;
|
||||
|
||||
val = ntohl(sin.sin_addr.s_addr);
|
||||
|
||||
/* Find the first zero bit. */
|
||||
for (bit = 0; bit < IP4_BITS; bit++)
|
||||
if (!(val & (1 << (IP4_BITS - bit - 1))))
|
||||
break;
|
||||
|
||||
*prefix = bit;
|
||||
|
||||
/* All bits after the first zero bit must also be zero. */
|
||||
if (bit < IP4_BITS &&
|
||||
(val & ((1 << (IP4_BITS - bit - 1)) - 1)))
|
||||
return EINVAL;
|
||||
|
||||
if (ipaddr != NULL)
|
||||
ip_addr_set_ip4_u32(ipaddr, sin.sin_addr.s_addr);
|
||||
|
||||
return OK;
|
||||
|
||||
case IPADDR_TYPE_V6:
|
||||
if (addr_len != sizeof(sin6))
|
||||
return EINVAL;
|
||||
|
||||
memcpy(&sin6, addr, sizeof(sin6));
|
||||
|
||||
if (sin6.sin6_family != AF_INET6)
|
||||
return EAFNOSUPPORT;
|
||||
|
||||
/* Find the first zero bit. */
|
||||
for (byte = 0; byte < __arraycount(sin6.sin6_addr.s6_addr);
|
||||
byte++)
|
||||
if (sin6.sin6_addr.s6_addr[byte] != 0xff)
|
||||
break;
|
||||
|
||||
/* If all bits are set, there is nothing more to do. */
|
||||
if (byte == __arraycount(sin6.sin6_addr.s6_addr)) {
|
||||
*prefix = __arraycount(sin6.sin6_addr.s6_addr) * NBBY;
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
for (bit = 0; bit < NBBY; bit++)
|
||||
if (!(sin6.sin6_addr.s6_addr[byte] &
|
||||
(1 << (NBBY - bit - 1))))
|
||||
break;
|
||||
|
||||
*prefix = byte * NBBY + bit;
|
||||
|
||||
/* All bits after the first zero bit must also be zero. */
|
||||
if (bit < NBBY && (sin6.sin6_addr.s6_addr[byte] &
|
||||
((1 << (NBBY - bit - 1)) - 1)))
|
||||
return EINVAL;
|
||||
|
||||
for (byte++; byte < __arraycount(sin6.sin6_addr.s6_addr);
|
||||
byte++)
|
||||
if (sin6.sin6_addr.s6_addr[byte] != 0)
|
||||
return EINVAL;
|
||||
|
||||
if (ipaddr != NULL) {
|
||||
ip_addr_set_zero_ip6(ipaddr);
|
||||
|
||||
memcpy(ip_2_ip6(ipaddr)->addr, &sin6.sin6_addr,
|
||||
sizeof(ip_2_ip6(ipaddr)->addr));
|
||||
}
|
||||
|
||||
return OK;
|
||||
|
||||
default:
|
||||
panic("unknown IP address type: %u", type);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a raw network mask based on the given prefix length.
|
||||
*/
|
||||
void
|
||||
addr_make_netmask(uint8_t * addr, socklen_t addr_len, unsigned int prefix)
|
||||
{
|
||||
unsigned int byte, bit;
|
||||
|
||||
byte = prefix / NBBY;
|
||||
bit = prefix % NBBY;
|
||||
|
||||
assert(byte + !!bit <= addr_len);
|
||||
|
||||
if (byte > 0)
|
||||
memset(addr, 0xff, byte);
|
||||
if (bit != 0)
|
||||
addr[byte++] = (uint8_t)(0xff << (NBBY - bit));
|
||||
if (byte < addr_len)
|
||||
memset(&addr[byte], 0, addr_len - byte);
|
||||
}
|
||||
|
||||
/*
|
||||
* Store a network mask as a sockaddr structure, in 'addr'. Before the call,
|
||||
* 'addr_len' must be set to the memory size of 'addr'. The address type is
|
||||
* given as 'type', and must be either IPADDR_TYPE_V4 or IPADDR_TYPE_V6. The
|
||||
* prefix length from which to generate the network mask is given as 'prefix'.
|
||||
* Upon return, 'addr_len' is set to the size of the resulting sockaddr
|
||||
* structure.
|
||||
*/
|
||||
void
|
||||
addr_put_netmask(struct sockaddr * addr, socklen_t * addr_len, uint8_t type,
|
||||
unsigned int prefix)
|
||||
{
|
||||
struct sockaddr_in sin;
|
||||
struct sockaddr_in6 sin6;
|
||||
|
||||
switch (type) {
|
||||
case IPADDR_TYPE_V4:
|
||||
if (*addr_len < sizeof(sin))
|
||||
panic("provided address buffer too small");
|
||||
|
||||
assert(prefix <= IP4_BITS);
|
||||
|
||||
memset(&sin, 0, sizeof(sin));
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
|
||||
addr_make_netmask((uint8_t *)&sin.sin_addr.s_addr,
|
||||
sizeof(sin.sin_addr.s_addr), prefix);
|
||||
|
||||
memcpy(addr, &sin, sizeof(sin));
|
||||
*addr_len = sizeof(sin);
|
||||
|
||||
break;
|
||||
|
||||
case IPADDR_TYPE_V6:
|
||||
if (*addr_len < sizeof(sin6))
|
||||
panic("provided address buffer too small");
|
||||
|
||||
assert(prefix <= IP6_BITS);
|
||||
|
||||
memset(&sin6, 0, sizeof(sin6));
|
||||
sin6.sin6_len = sizeof(sin6);
|
||||
sin6.sin6_family = AF_INET6;
|
||||
|
||||
addr_make_netmask(sin6.sin6_addr.s6_addr,
|
||||
sizeof(sin6.sin6_addr.s6_addr), prefix);
|
||||
|
||||
memcpy(addr, &sin6, sizeof(sin6));
|
||||
*addr_len = sizeof(sin6);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
panic("unknown IP address type: %u", type);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Normalize the given address in 'src' to the given number of prefix bits,
|
||||
* setting all other bits to zero. Return the result in 'dst'.
|
||||
*/
|
||||
void
|
||||
addr_normalize(ip_addr_t * dst, const ip_addr_t * src, unsigned int prefix)
|
||||
{
|
||||
unsigned int addr_len, byte, bit;
|
||||
const uint8_t *srcaddr;
|
||||
uint8_t type, *dstaddr;
|
||||
|
||||
type = IP_GET_TYPE(src);
|
||||
|
||||
memset(dst, 0, sizeof(*dst));
|
||||
IP_SET_TYPE(dst, type);
|
||||
|
||||
switch (type) {
|
||||
case IPADDR_TYPE_V4:
|
||||
srcaddr = (const uint8_t *)&ip_2_ip4(src)->addr;
|
||||
dstaddr = (uint8_t *)&ip_2_ip4(dst)->addr;
|
||||
addr_len = sizeof(ip_2_ip4(src)->addr);
|
||||
|
||||
break;
|
||||
|
||||
case IPADDR_TYPE_V6:
|
||||
ip6_addr_set_zone(ip_2_ip6(dst), ip6_addr_zone(ip_2_ip6(src)));
|
||||
|
||||
srcaddr = (const uint8_t *)&ip_2_ip6(src)->addr;
|
||||
dstaddr = (uint8_t *)&ip_2_ip6(dst)->addr;
|
||||
addr_len = sizeof(ip_2_ip6(src)->addr);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
panic("unknown IP address type: %u", type);
|
||||
}
|
||||
|
||||
byte = prefix / NBBY;
|
||||
bit = prefix % NBBY;
|
||||
|
||||
assert(byte + !!bit <= addr_len);
|
||||
|
||||
if (byte > 0)
|
||||
memcpy(dstaddr, srcaddr, byte);
|
||||
if (bit != 0) {
|
||||
dstaddr[byte] =
|
||||
srcaddr[byte] & (uint8_t)(0xff << (NBBY - bit));
|
||||
byte++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of common bits between the given two addresses, up to the
|
||||
* given maximum. Thus, return a value between 0 and 'max' inclusive.
|
||||
*/
|
||||
unsigned int
|
||||
addr_get_common_bits(const ip_addr_t * ipaddr1, const ip_addr_t * ipaddr2,
|
||||
unsigned int max)
|
||||
{
|
||||
unsigned int addr_len, prefix, bit;
|
||||
const uint8_t *addr1, *addr2;
|
||||
uint8_t byte;
|
||||
|
||||
switch (IP_GET_TYPE(ipaddr1)) {
|
||||
case IPADDR_TYPE_V4:
|
||||
assert(IP_IS_V4(ipaddr2));
|
||||
|
||||
addr1 = (const uint8_t *)&ip_2_ip4(ipaddr1)->addr;
|
||||
addr2 = (const uint8_t *)&ip_2_ip4(ipaddr2)->addr;
|
||||
addr_len = sizeof(ip_2_ip4(ipaddr1)->addr);
|
||||
|
||||
break;
|
||||
|
||||
case IPADDR_TYPE_V6:
|
||||
assert(IP_IS_V6(ipaddr2));
|
||||
|
||||
addr1 = (const uint8_t *)&ip_2_ip6(ipaddr1)->addr;
|
||||
addr2 = (const uint8_t *)&ip_2_ip6(ipaddr2)->addr;
|
||||
addr_len = sizeof(ip_2_ip6(ipaddr1)->addr);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
panic("unknown IP address type: %u", IP_GET_TYPE(ipaddr1));
|
||||
}
|
||||
|
||||
if (addr_len > max * NBBY)
|
||||
addr_len = max * NBBY;
|
||||
|
||||
prefix = 0;
|
||||
|
||||
for (prefix = 0; addr_len > 0; addr1++, addr2++, prefix += NBBY) {
|
||||
if ((byte = (*addr1 ^ *addr2)) != 0) {
|
||||
/* TODO: see if we want a lookup table for this. */
|
||||
for (bit = 0; bit < NBBY; bit++, prefix++)
|
||||
if (byte & (1 << (NBBY - bit - 1)))
|
||||
break;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (prefix > max)
|
||||
prefix = max;
|
||||
|
||||
return prefix;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert the given IPv4 address to an IPv4-mapped IPv6 address.
|
||||
*/
|
||||
void
|
||||
addr_make_v4mapped_v6(ip_addr_t * dst, const ip4_addr_t * src)
|
||||
{
|
||||
|
||||
IP_ADDR6(dst, 0, 0, PP_HTONL(0x0000ffffUL), ip4_addr_get_u32(src));
|
||||
}
|
||||
33
minix/net/lwip/addr.h
Normal file
33
minix/net/lwip/addr.h
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
#ifndef MINIX_NET_LWIP_ADDR_H
|
||||
#define MINIX_NET_LWIP_ADDR_H
|
||||
|
||||
int addr_is_unspec(const struct sockaddr * addr, socklen_t addr_len);
|
||||
|
||||
int addr_is_valid_multicast(const ip_addr_t * ipaddr);
|
||||
|
||||
int addr_get_inet(const struct sockaddr * addr, socklen_t addr_len,
|
||||
uint8_t type, ip_addr_t * ipaddr, int kame, uint16_t * port);
|
||||
void addr_put_inet(struct sockaddr * addr, socklen_t * addr_len,
|
||||
const ip_addr_t * ipaddr, int kame, uint16_t port);
|
||||
|
||||
int addr_get_link(const struct sockaddr * addr, socklen_t addr_len,
|
||||
char * name, size_t name_max, uint8_t * hwaddr, size_t hwaddr_len);
|
||||
void addr_put_link(struct sockaddr * addr, socklen_t * addr_len,
|
||||
uint32_t ifindex, uint32_t type, const char * name,
|
||||
const uint8_t * hwaddr, size_t hwaddr_len);
|
||||
|
||||
int addr_get_netmask(const struct sockaddr * addr, socklen_t addr_len,
|
||||
uint8_t type, unsigned int * prefix, ip_addr_t * ipaddr);
|
||||
void addr_make_netmask(uint8_t * addr, socklen_t addr_len,
|
||||
unsigned int prefix);
|
||||
void addr_put_netmask(struct sockaddr * addr, socklen_t * addr_len,
|
||||
uint8_t type, unsigned int prefix);
|
||||
|
||||
void addr_normalize(ip_addr_t * dst, const ip_addr_t * src,
|
||||
unsigned int prefix);
|
||||
unsigned int addr_get_common_bits(const ip_addr_t * addr1,
|
||||
const ip_addr_t * addr2, unsigned int max);
|
||||
|
||||
void addr_make_v4mapped_v6(ip_addr_t * dst, const ip4_addr_t * src);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_ADDR_H */
|
||||
143
minix/net/lwip/addrpol.c
Normal file
143
minix/net/lwip/addrpol.c
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
/* LWIP service - addrpol.c - address policy table and values */
|
||||
/*
|
||||
* The main purpose of this module is to implement the address policy table
|
||||
* described in RFC 6724. In general, the policy table is used for two
|
||||
* purposes: source address selection, which is part of this service, and
|
||||
* destination address selection, which is implemented in libc. NetBSD 7, the
|
||||
* version that MINIX 3 is synced against at this moment, does not actually
|
||||
* implement the libc part yet, though. That will change with NetBSD 8, where
|
||||
* libc uses sysctl(7) to obtain the kernel's policy table, which itself can be
|
||||
* changed with the new ip6addrctl(8) utility. Once we resync to NetBSD 8, we
|
||||
* will also have to support this new functionality, and this module is where
|
||||
* it would be implemented. Since NetBSD 7 is even lacking the necessary
|
||||
* definitions, we cannot do that ahead of time, though. Thus, until then,
|
||||
* this module is rather simple, as it only implements a static policy table
|
||||
* used for source address selection. No changes beyond this module should be
|
||||
* necessary, e.g. we are purposely not caching labels for local addresses.
|
||||
*/
|
||||
|
||||
#include "lwip.h"
|
||||
|
||||
/*
|
||||
* Address policy table. Currently hardcoded to the default of RFC 6724.
|
||||
* Sorted by prefix length, so that the first match is always also the longest.
|
||||
*/
|
||||
static const struct {
|
||||
ip_addr_t ipaddr;
|
||||
unsigned int prefix;
|
||||
int precedence;
|
||||
int label;
|
||||
} addrpol_table[] = {
|
||||
{ IPADDR6_INIT_HOST(0, 0, 0, 1), 128, 50, 0 },
|
||||
{ IPADDR6_INIT_HOST(0, 0, 0x0000ffffUL, 0), 96, 35, 4 },
|
||||
{ IPADDR6_INIT_HOST(0, 0, 0, 0), 96, 1, 3 },
|
||||
{ IPADDR6_INIT_HOST(0x20010000UL, 0, 0, 0), 32, 5, 5 },
|
||||
{ IPADDR6_INIT_HOST(0x20020000UL, 0, 0, 0), 16, 30, 2 },
|
||||
{ IPADDR6_INIT_HOST(0x3ffe0000UL, 0, 0, 0), 16, 1, 12 },
|
||||
{ IPADDR6_INIT_HOST(0xfec00000UL, 0, 0, 0), 10, 1, 11 },
|
||||
{ IPADDR6_INIT_HOST(0xfc000000UL, 0, 0, 0), 7, 3, 13 },
|
||||
{ IPADDR6_INIT_HOST(0, 0, 0, 0), 0, 40, 1 }
|
||||
};
|
||||
|
||||
/*
|
||||
* Obtain the label value for the given IP address from the address policy
|
||||
* table. Currently only IPv6 addresses may be given. This function is linear
|
||||
* in number of address policy table entries, requiring a relatively expensive
|
||||
* normalization operation for each entry, so it should not be called lightly.
|
||||
* Its results should not be cached beyond local contexts either, because the
|
||||
* policy table itself may be changed from userland (in the future).
|
||||
*
|
||||
* TODO: convert IPv4 addresses to IPv4-mapped IPv6 addresses.
|
||||
* TODO: embed the interface index in link-local addresses.
|
||||
*/
|
||||
int
|
||||
addrpol_get_label(const ip_addr_t * iporig)
|
||||
{
|
||||
ip_addr_t ipaddr;
|
||||
unsigned int i;
|
||||
|
||||
assert(IP_IS_V6(iporig));
|
||||
|
||||
/*
|
||||
* The policy table is sorted by prefix length such that the first
|
||||
* match is also the one with the longest prefix, and as such the best.
|
||||
*/
|
||||
for (i = 0; i < __arraycount(addrpol_table); i++) {
|
||||
addr_normalize(&ipaddr, iporig, addrpol_table[i].prefix);
|
||||
|
||||
if (ip_addr_cmp(&addrpol_table[i].ipaddr, &ipaddr))
|
||||
return addrpol_table[i].label;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot possibly get here with the default policy table, because
|
||||
* the last entry will always match. It is not clear what we should
|
||||
* return if there is no matching entry, though. For now, we return
|
||||
* the default label value for the default (::/0) entry, which is 1.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return an opaque positive value (possibly zero) that represents the scope of
|
||||
* the given IP address. A larger value indicates a wider scope. The 'is_src'
|
||||
* flag indicates whether the address is a source or a destination address,
|
||||
* which affects the value returned for unknown addresses. A scope is a direct
|
||||
* function of only the given address, so the result may be cached on a per-
|
||||
* address basis without risking invalidation at any point in time.
|
||||
*/
|
||||
int
|
||||
addrpol_get_scope(const ip_addr_t * ipaddr, int is_src)
|
||||
{
|
||||
const ip6_addr_t *ip6addr;
|
||||
|
||||
/*
|
||||
* For now, all IPv4 addresses are considered global. This function is
|
||||
* currently called only for IPv6 addresses anyway.
|
||||
*/
|
||||
if (IP_IS_V4(ipaddr))
|
||||
return IP6_MULTICAST_SCOPE_GLOBAL;
|
||||
|
||||
assert(IP_IS_V6(ipaddr));
|
||||
|
||||
ip6addr = ip_2_ip6(ipaddr);
|
||||
|
||||
/*
|
||||
* These are ordered not by ascending scope, but (roughly) by expected
|
||||
* likeliness to match, for performance reasons.
|
||||
*/
|
||||
if (ip6_addr_isglobal(ip6addr))
|
||||
return IP6_MULTICAST_SCOPE_GLOBAL;
|
||||
|
||||
if (ip6_addr_islinklocal(ip6addr) || ip6_addr_isloopback(ip6addr))
|
||||
return IP6_MULTICAST_SCOPE_LINK_LOCAL;
|
||||
|
||||
/*
|
||||
* We deliberately deviate from RFC 6724 Sec. 3.1 by considering
|
||||
* Unique-Local Addresses (ULAs) to be of smaller scope than global
|
||||
* addresses, to avoid that during source address selection, a
|
||||
* preferred ULA is picked over a deprecated global address when given
|
||||
* a global address as destination, as that would likely result in
|
||||
* broken two-way communication.
|
||||
*/
|
||||
if (ip6_addr_isuniquelocal(ip6addr))
|
||||
return IP6_MULTICAST_SCOPE_ORGANIZATION_LOCAL;
|
||||
|
||||
if (ip6_addr_ismulticast(ip6addr))
|
||||
return ip6_addr_multicast_scope(ip6addr);
|
||||
|
||||
/* Site-local addresses are deprecated. */
|
||||
if (ip6_addr_issitelocal(ip6addr))
|
||||
return IP6_MULTICAST_SCOPE_SITE_LOCAL;
|
||||
|
||||
/*
|
||||
* If the address is a source address, give it a scope beyond global to
|
||||
* make sure that a "real" global address is picked first. If the
|
||||
* address is a destination address, give it a global scope so as to
|
||||
* pick "real" global addresses over unknown-scope source addresses.
|
||||
*/
|
||||
if (is_src)
|
||||
return IP6_MULTICAST_SCOPE_RESERVEDF; /* greater than GLOBAL */
|
||||
else
|
||||
return IP6_MULTICAST_SCOPE_GLOBAL;
|
||||
}
|
||||
561
minix/net/lwip/bpf_filter.c
Normal file
561
minix/net/lwip/bpf_filter.c
Normal file
|
|
@ -0,0 +1,561 @@
|
|||
/* LWIP service - bpf_filter.c - Berkeley Packet Filter core implementation */
|
||||
/*
|
||||
* This is basically a drop-in replacement of NetBSD's bpf_filter.c, which
|
||||
* itself can be compiled for either the NetBSD kernel or for userland. On
|
||||
* MINIX 3, we would like to perform certain checks that NetBSD implements only
|
||||
* for its kernel (e.g., memory store access validation) while replacing the
|
||||
* NetBSD kernel specifics with our own (pbuf instead of mbuf, no BPF contexts
|
||||
* for now, etc.). As a result, it is easier to reimplement the whole thing,
|
||||
* because there is not all that much to it.
|
||||
*
|
||||
* Support for the standard BSD API allows us to run standard tests against
|
||||
* this module from userland, where _MINIX_SYSTEM is not defined. MINIX 3
|
||||
* specific extensions are enabled only if _MINIX_SYSTEM is defined.
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <net/bpf.h>
|
||||
#include <minix/bitmap.h>
|
||||
|
||||
#ifdef _MINIX_SYSTEM
|
||||
#include "lwip.h"
|
||||
|
||||
/*
|
||||
* Obtain an unsigned 32-bit value in network byte order from the pbuf chain
|
||||
* 'pbuf' at offset 'k'. The given offset is guaranteed to be within bounds.
|
||||
*/
|
||||
static uint32_t
|
||||
bpf_get32_ext(const struct pbuf * pbuf, uint32_t k)
|
||||
{
|
||||
uint32_t val;
|
||||
unsigned int i;
|
||||
|
||||
/*
|
||||
* Find the pbuf that contains the first byte. We expect that most
|
||||
* filters will operate only on the headers of packets, so that we
|
||||
* mostly avoid going through this O(n) loop. Since only the superuser
|
||||
* can open BPF devices at all, we need not be worried about abuse in
|
||||
* this regard. However, it turns out that this loop is particularly
|
||||
* CPU-intensive after all, we can probably improve it by caching the
|
||||
* last visited pbuf, as read locality is likely high.
|
||||
*/
|
||||
while (k >= pbuf->len) {
|
||||
k -= pbuf->len;
|
||||
pbuf = pbuf->next;
|
||||
assert(pbuf != NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* We assume that every pbuf has some data, but we make no assumptions
|
||||
* about any minimum amount of data per pbuf. Therefore, we may have
|
||||
* to take the bytes from anywhere between one and four pbufs.
|
||||
* Hopefully the compiler will unroll this loop for us.
|
||||
*/
|
||||
val = (uint32_t)(((u_char *)pbuf->payload)[k]) << 24;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (k >= (uint32_t)pbuf->len - 1) {
|
||||
k = 0;
|
||||
pbuf = pbuf->next;
|
||||
assert(pbuf != NULL);
|
||||
} else
|
||||
k++;
|
||||
val = (val << 8) | (uint32_t)(((u_char *)pbuf->payload)[k]);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain an unsigned 16-bit value in network byte order from the pbuf chain
|
||||
* 'pbuf' at offset 'k'. The given offset is guaranteed to be within bounds.
|
||||
*/
|
||||
static uint32_t
|
||||
bpf_get16_ext(const struct pbuf * pbuf, uint32_t k)
|
||||
{
|
||||
|
||||
/* As above. */
|
||||
while (k >= pbuf->len) {
|
||||
k -= pbuf->len;
|
||||
pbuf = pbuf->next;
|
||||
assert(pbuf != NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* There are only two possible cases to cover here: either the two
|
||||
* bytes are in the same pbuf, or they are in subsequent ones.
|
||||
*/
|
||||
if (k < (uint32_t)pbuf->len - 1) {
|
||||
return ((uint32_t)(((u_char *)pbuf->payload)[k]) << 8) |
|
||||
(uint32_t)(((u_char *)pbuf->next->payload)[k + 1]);
|
||||
} else {
|
||||
assert(pbuf->next != NULL);
|
||||
return ((uint32_t)(((u_char *)pbuf->payload)[k]) << 8) |
|
||||
(uint32_t)(((u_char *)pbuf->next->payload)[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain an unsigned 8-bit value from the pbuf chain 'pbuf' at offset 'k'.
|
||||
* The given offset is guaranteed to be within bounds.
|
||||
*/
|
||||
static uint32_t
|
||||
bpf_get8_ext(const struct pbuf * pbuf, uint32_t k)
|
||||
{
|
||||
|
||||
/* As above. */
|
||||
while (k >= pbuf->len) {
|
||||
k -= pbuf->len;
|
||||
pbuf = pbuf->next;
|
||||
assert(pbuf != NULL);
|
||||
}
|
||||
|
||||
return (uint32_t)(((u_char *)pbuf->payload)[k]);
|
||||
}
|
||||
|
||||
#endif /* _MINIX_SYSTEM */
|
||||
|
||||
/*
|
||||
* Execute a BPF filter program on (the first part of) a packet, and return the
|
||||
* maximum size of the packet that should be delivered to the filter owner.
|
||||
*
|
||||
* The 'pc' parameter points to an array of BPF instructions that together form
|
||||
* the filter program to be executed. If 'pc' is NULL, the packet is fully
|
||||
* accepted. Otherwise, the given program MUST have passed a previous call to
|
||||
* bpf_validate(). Not doing so will allow for arbitrary memory access.
|
||||
*
|
||||
* The 'packet' array contains up to the whole packet. The value of 'total'
|
||||
* denotes the total length of the packet; 'len' contains the size of the array
|
||||
* 'packet'. Chunked storage of the packet is not supported at this time.
|
||||
*
|
||||
* If executing the program succeeds, the return value is the maximum number of
|
||||
* bytes from the packet to be delivered. The return value may exceed the full
|
||||
* packet size. If the number of bytes returned is zero, the packet is to be
|
||||
* ignored. If the program fails to execute properly and return a value, a
|
||||
* value of zero is returned as well, thus also indicating that the packet
|
||||
* should be ignored. This is intentional: it saves filter programs from
|
||||
* having to perform explicit checks on the packet they are filtering.
|
||||
*/
|
||||
u_int
|
||||
bpf_filter(const struct bpf_insn * pc, const u_char * packet, u_int total,
|
||||
u_int len)
|
||||
#ifdef _MINIX_SYSTEM
|
||||
{
|
||||
|
||||
return bpf_filter_ext(pc, NULL /*pbuf*/, packet, total, len);
|
||||
}
|
||||
|
||||
u_int
|
||||
bpf_filter_ext(const struct bpf_insn * pc, const struct pbuf * pbuf,
|
||||
const u_char * packet, u_int total, u_int len)
|
||||
#endif /* _MINIX_SYSTEM */
|
||||
{
|
||||
uint32_t k, a, x, mem[BPF_MEMWORDS];
|
||||
|
||||
/* An empty program accepts all packets. */
|
||||
if (pc == NULL)
|
||||
return UINT_MAX;
|
||||
|
||||
/*
|
||||
* We need not clear 'mem': the checker guarantees that each memory
|
||||
* store word is always written before it is read.
|
||||
*/
|
||||
a = 0;
|
||||
x = 0;
|
||||
|
||||
/* Execute the program. */
|
||||
for (;; pc++) {
|
||||
k = pc->k;
|
||||
|
||||
switch (pc->code) {
|
||||
case BPF_LD+BPF_W+BPF_IND: /* A <- P[X+k:4] */
|
||||
if (k + x < k)
|
||||
return 0;
|
||||
k += x;
|
||||
/* FALLTHROUGH */
|
||||
case BPF_LD+BPF_W+BPF_ABS: /* A <- P[k:4] */
|
||||
/*
|
||||
* 'k' may have any value, so check bounds in such a
|
||||
* way that 'k' cannot possibly overflow and wrap.
|
||||
*/
|
||||
if (len >= 3 && k < len - 3)
|
||||
a = ((uint32_t)packet[k] << 24) |
|
||||
((uint32_t)packet[k + 1] << 16) |
|
||||
((uint32_t)packet[k + 2] << 8) |
|
||||
(uint32_t)packet[k + 3];
|
||||
#ifdef _MINIX_SYSTEM
|
||||
else if (total >= 3 && k < total - 3)
|
||||
a = bpf_get32_ext(pbuf, k);
|
||||
#endif /* _MINIX_SYSTEM */
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
case BPF_LD+BPF_H+BPF_IND: /* A <- P[X+k:2] */
|
||||
if (k + x < k)
|
||||
return 0;
|
||||
k += x;
|
||||
/* FALLTHROUGH */
|
||||
case BPF_LD+BPF_H+BPF_ABS: /* A <- P[k:2] */
|
||||
/* As above. */
|
||||
if (len >= 1 && k < len - 1)
|
||||
a = ((uint32_t)packet[k] << 8) |
|
||||
(uint32_t)packet[k + 1];
|
||||
#ifdef _MINIX_SYSTEM
|
||||
else if (total >= 1 && k < total - 1)
|
||||
a = bpf_get16_ext(pbuf, k);
|
||||
#endif /* _MINIX_SYSTEM */
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
case BPF_LD+BPF_B+BPF_IND: /* A <- P[X+k:1] */
|
||||
if (k + x < k)
|
||||
return 0;
|
||||
k += x;
|
||||
/* FALLTHROUGH */
|
||||
case BPF_LD+BPF_B+BPF_ABS: /* A <- P[k:1] */
|
||||
if (k < len)
|
||||
a = (uint32_t)packet[k];
|
||||
#ifdef _MINIX_SYSTEM
|
||||
else if (k < total)
|
||||
a = bpf_get8_ext(pbuf, k);
|
||||
#endif /* _MINIX_SYSTEM */
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
case BPF_LD+BPF_W+BPF_LEN: /* A <- len */
|
||||
a = total;
|
||||
break;
|
||||
case BPF_LD+BPF_IMM: /* A <- k */
|
||||
a = k;
|
||||
break;
|
||||
case BPF_LD+BPF_MEM: /* A <- M[k] */
|
||||
a = mem[k];
|
||||
break;
|
||||
|
||||
case BPF_LDX+BPF_IMM: /* X <- k */
|
||||
x = k;
|
||||
break;
|
||||
case BPF_LDX+BPF_MEM: /* X <- M[k] */
|
||||
x = mem[k];
|
||||
break;
|
||||
case BPF_LDX+BPF_LEN: /* X <- len */
|
||||
x = total;
|
||||
break;
|
||||
case BPF_LDX+BPF_B+BPF_MSH: /* X <- 4*(P[k:1]&0xf) */
|
||||
if (k < len)
|
||||
x = ((uint32_t)packet[k] & 0xf) << 2;
|
||||
#ifdef _MINIX_SYSTEM
|
||||
else if (k < total)
|
||||
x = (bpf_get8_ext(pbuf, k) & 0xf) << 2;
|
||||
#endif /* _MINIX_SYSTEM */
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
|
||||
case BPF_ST: /* M[k] <- A */
|
||||
mem[k] = a;
|
||||
break;
|
||||
|
||||
case BPF_STX: /* M[k] <- X */
|
||||
mem[k] = x;
|
||||
break;
|
||||
|
||||
case BPF_ALU+BPF_ADD+BPF_K: /* A <- A + k */
|
||||
a += k;
|
||||
break;
|
||||
case BPF_ALU+BPF_SUB+BPF_K: /* A <- A - k */
|
||||
a -= k;
|
||||
break;
|
||||
case BPF_ALU+BPF_MUL+BPF_K: /* A <- A * k */
|
||||
a *= k;
|
||||
break;
|
||||
case BPF_ALU+BPF_DIV+BPF_K: /* A <- A / k */
|
||||
a /= k;
|
||||
break;
|
||||
case BPF_ALU+BPF_MOD+BPF_K: /* A <- A % k */
|
||||
a %= k;
|
||||
break;
|
||||
case BPF_ALU+BPF_AND+BPF_K: /* A <- A & k */
|
||||
a &= k;
|
||||
break;
|
||||
case BPF_ALU+BPF_OR+BPF_K: /* A <- A | k */
|
||||
a |= k;
|
||||
break;
|
||||
case BPF_ALU+BPF_XOR+BPF_K: /* A <- A ^ k */
|
||||
a ^= k;
|
||||
break;
|
||||
case BPF_ALU+BPF_LSH+BPF_K: /* A <- A << k */
|
||||
a <<= k;
|
||||
break;
|
||||
case BPF_ALU+BPF_RSH+BPF_K: /* A <- A >> k */
|
||||
a >>= k;
|
||||
break;
|
||||
case BPF_ALU+BPF_ADD+BPF_X: /* A <- A + X */
|
||||
a += x;
|
||||
break;
|
||||
case BPF_ALU+BPF_SUB+BPF_X: /* A <- A - X */
|
||||
a -= x;
|
||||
break;
|
||||
case BPF_ALU+BPF_MUL+BPF_X: /* A <- A * X */
|
||||
a *= x;
|
||||
break;
|
||||
case BPF_ALU+BPF_DIV+BPF_X: /* A <- A / X */
|
||||
if (x == 0)
|
||||
return 0;
|
||||
a /= x;
|
||||
break;
|
||||
case BPF_ALU+BPF_MOD+BPF_X: /* A <- A % X */
|
||||
if (x == 0)
|
||||
return 0;
|
||||
a %= x;
|
||||
break;
|
||||
case BPF_ALU+BPF_AND+BPF_X: /* A <- A & X */
|
||||
a &= x;
|
||||
break;
|
||||
case BPF_ALU+BPF_OR+BPF_X: /* A <- A | X */
|
||||
a |= x;
|
||||
break;
|
||||
case BPF_ALU+BPF_XOR+BPF_X: /* A <- A ^ X */
|
||||
a ^= x;
|
||||
break;
|
||||
case BPF_ALU+BPF_LSH+BPF_X: /* A <- A << X */
|
||||
if (x >= 32)
|
||||
return 0;
|
||||
a <<= x;
|
||||
break;
|
||||
case BPF_ALU+BPF_RSH+BPF_X: /* A <- A >> X */
|
||||
if (x >= 32)
|
||||
return 0;
|
||||
a >>= x;
|
||||
break;
|
||||
case BPF_ALU+BPF_NEG: /* A <- -A */
|
||||
a = -a;
|
||||
break;
|
||||
|
||||
case BPF_JMP+BPF_JA: /* pc += k */
|
||||
pc += k;
|
||||
break;
|
||||
case BPF_JMP+BPF_JGT+BPF_K: /* pc += (A > k) ? jt : jf */
|
||||
pc += (a > k) ? pc->jt : pc->jf;
|
||||
break;
|
||||
case BPF_JMP+BPF_JGE+BPF_K: /* pc += (A >= k) ? jt : jf */
|
||||
pc += (a >= k) ? pc->jt : pc->jf;
|
||||
break;
|
||||
case BPF_JMP+BPF_JEQ+BPF_K: /* pc += (A == k) ? jt : jf */
|
||||
pc += (a == k) ? pc->jt : pc->jf;
|
||||
break;
|
||||
case BPF_JMP+BPF_JSET+BPF_K: /* pc += (A & k) ? jt : jf */
|
||||
pc += (a & k) ? pc->jt : pc->jf;
|
||||
break;
|
||||
case BPF_JMP+BPF_JGT+BPF_X: /* pc += (A > X) ? jt : jf */
|
||||
pc += (a > x) ? pc->jt : pc->jf;
|
||||
break;
|
||||
case BPF_JMP+BPF_JGE+BPF_X: /* pc += (A >= X) ? jt : jf */
|
||||
pc += (a >= x) ? pc->jt : pc->jf;
|
||||
break;
|
||||
case BPF_JMP+BPF_JEQ+BPF_X: /* pc += (A == X) ? jt : jf */
|
||||
pc += (a == x) ? pc->jt : pc->jf;
|
||||
break;
|
||||
case BPF_JMP+BPF_JSET+BPF_X: /* pc += (A & X) ? jt : jf */
|
||||
pc += (a & x) ? pc->jt : pc->jf;
|
||||
break;
|
||||
|
||||
case BPF_RET+BPF_A: /* accept A bytes */
|
||||
return a;
|
||||
case BPF_RET+BPF_K: /* accept K bytes */
|
||||
return k;
|
||||
|
||||
case BPF_MISC+BPF_TAX: /* X <- A */
|
||||
x = a;
|
||||
break;
|
||||
case BPF_MISC+BPF_TXA: /* A <- X */
|
||||
a = x;
|
||||
break;
|
||||
|
||||
default: /* unknown instruction */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
/*
|
||||
* In order to avoid having to perform explicit memory allocation, we store
|
||||
* some validation state on the stack, using data types that are as small as
|
||||
* possible for the current definitions. The data types, and in fact the whole
|
||||
* assumption that we can store the state on the stack, may need to be revised
|
||||
* if certain constants are increased in the future. As of writing, the
|
||||
* validation routine uses a little over 1KB of stack memory.
|
||||
*/
|
||||
#if BPF_MEMWORDS <= 16 /* value as of writing: 16 */
|
||||
typedef uint16_t meminv_t;
|
||||
#else
|
||||
#error "increased BPF_MEMWORDS may require code revision"
|
||||
#endif
|
||||
|
||||
#if BPF_MAXINSNS > 2048 /* value as of writing: 512 */
|
||||
#error "increased BPF_MAXINSNS may require code revision"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Verify that the given filter program is safe to execute, by performing as
|
||||
* many static validity checks as possible. The program is given as 'insns',
|
||||
* which must be an array of 'ninsns' BPF instructions. Unlike bpf_filter(),
|
||||
* this function does not accept empty filter programs. The function returns 1
|
||||
* if the program was successfully validated, or 0 if the program should not be
|
||||
* accepted.
|
||||
*/
|
||||
int
|
||||
bpf_validate(const struct bpf_insn * insns, int ninsns)
|
||||
{
|
||||
bitchunk_t reachable[BITMAP_CHUNKS(BPF_MAXINSNS)];
|
||||
meminv_t invalid, meminv[BPF_MAXINSNS];
|
||||
const struct bpf_insn *insn;
|
||||
u_int pc, count, target;
|
||||
int advance;
|
||||
|
||||
if (insns == NULL || ninsns <= 0 || ninsns > BPF_MAXINSNS)
|
||||
return 0;
|
||||
count = (u_int)ninsns;
|
||||
|
||||
memset(reachable, 0, sizeof(reachable[0]) * BITMAP_CHUNKS(count));
|
||||
memset(meminv, 0, sizeof(meminv[0]) * count);
|
||||
|
||||
SET_BIT(reachable, 0);
|
||||
meminv[0] = (meminv_t)~0;
|
||||
|
||||
for (pc = 0; pc < count; pc++) {
|
||||
/* We completely ignore instructions that are not reachable. */
|
||||
if (!GET_BIT(reachable, pc))
|
||||
continue;
|
||||
|
||||
invalid = meminv[pc];
|
||||
advance = 1;
|
||||
|
||||
insn = &insns[pc];
|
||||
|
||||
switch (insn->code) {
|
||||
case BPF_LD+BPF_W+BPF_ABS:
|
||||
case BPF_LD+BPF_H+BPF_ABS:
|
||||
case BPF_LD+BPF_B+BPF_ABS:
|
||||
case BPF_LD+BPF_W+BPF_IND:
|
||||
case BPF_LD+BPF_H+BPF_IND:
|
||||
case BPF_LD+BPF_B+BPF_IND:
|
||||
case BPF_LD+BPF_LEN:
|
||||
case BPF_LD+BPF_IMM:
|
||||
case BPF_LDX+BPF_IMM:
|
||||
case BPF_LDX+BPF_LEN:
|
||||
case BPF_LDX+BPF_B+BPF_MSH:
|
||||
case BPF_ALU+BPF_ADD+BPF_K:
|
||||
case BPF_ALU+BPF_SUB+BPF_K:
|
||||
case BPF_ALU+BPF_MUL+BPF_K:
|
||||
case BPF_ALU+BPF_AND+BPF_K:
|
||||
case BPF_ALU+BPF_OR+BPF_K:
|
||||
case BPF_ALU+BPF_XOR+BPF_K:
|
||||
case BPF_ALU+BPF_ADD+BPF_X:
|
||||
case BPF_ALU+BPF_SUB+BPF_X:
|
||||
case BPF_ALU+BPF_MUL+BPF_X:
|
||||
case BPF_ALU+BPF_DIV+BPF_X:
|
||||
case BPF_ALU+BPF_MOD+BPF_X:
|
||||
case BPF_ALU+BPF_AND+BPF_X:
|
||||
case BPF_ALU+BPF_OR+BPF_X:
|
||||
case BPF_ALU+BPF_XOR+BPF_X:
|
||||
case BPF_ALU+BPF_LSH+BPF_X:
|
||||
case BPF_ALU+BPF_RSH+BPF_X:
|
||||
case BPF_ALU+BPF_NEG:
|
||||
case BPF_MISC+BPF_TAX:
|
||||
case BPF_MISC+BPF_TXA:
|
||||
/* Nothing we can check for these. */
|
||||
break;
|
||||
case BPF_ALU+BPF_DIV+BPF_K:
|
||||
case BPF_ALU+BPF_MOD+BPF_K:
|
||||
/* No division by zero. */
|
||||
if (insn->k == 0)
|
||||
return 0;
|
||||
break;
|
||||
case BPF_ALU+BPF_LSH+BPF_K:
|
||||
case BPF_ALU+BPF_RSH+BPF_K:
|
||||
/* Do not invoke undefined behavior. */
|
||||
if (insn->k >= 32)
|
||||
return 0;
|
||||
break;
|
||||
case BPF_LD+BPF_MEM:
|
||||
case BPF_LDX+BPF_MEM:
|
||||
/*
|
||||
* Only allow loading words that have been stored in
|
||||
* all execution paths leading up to this instruction.
|
||||
*/
|
||||
if (insn->k >= BPF_MEMWORDS ||
|
||||
(invalid & (1 << insn->k)))
|
||||
return 0;
|
||||
break;
|
||||
case BPF_ST:
|
||||
case BPF_STX:
|
||||
if (insn->k >= BPF_MEMWORDS)
|
||||
return 0;
|
||||
invalid &= ~(1 << insn->k);
|
||||
break;
|
||||
case BPF_JMP+BPF_JA:
|
||||
/*
|
||||
* Make sure that the target instruction of the jump is
|
||||
* still part of the program, and mark it as reachable.
|
||||
*/
|
||||
if (insn->k >= count - pc - 1)
|
||||
return 0;
|
||||
target = pc + insn->k + 1;
|
||||
SET_BIT(reachable, target);
|
||||
meminv[target] |= invalid;
|
||||
advance = 0;
|
||||
break;
|
||||
case BPF_JMP+BPF_JGT+BPF_K:
|
||||
case BPF_JMP+BPF_JGE+BPF_K:
|
||||
case BPF_JMP+BPF_JEQ+BPF_K:
|
||||
case BPF_JMP+BPF_JSET+BPF_K:
|
||||
case BPF_JMP+BPF_JGT+BPF_X:
|
||||
case BPF_JMP+BPF_JGE+BPF_X:
|
||||
case BPF_JMP+BPF_JEQ+BPF_X:
|
||||
case BPF_JMP+BPF_JSET+BPF_X:
|
||||
/*
|
||||
* Make sure that both target instructions are still
|
||||
* part of the program, and mark both as reachable.
|
||||
* There is no chance that the additions will overflow.
|
||||
*/
|
||||
target = pc + insn->jt + 1;
|
||||
if (target >= count)
|
||||
return 0;
|
||||
SET_BIT(reachable, target);
|
||||
meminv[target] |= invalid;
|
||||
|
||||
target = pc + insn->jf + 1;
|
||||
if (target >= count)
|
||||
return 0;
|
||||
SET_BIT(reachable, target);
|
||||
meminv[target] |= invalid;
|
||||
|
||||
advance = 0;
|
||||
break;
|
||||
case BPF_RET+BPF_A:
|
||||
case BPF_RET+BPF_K:
|
||||
advance = 0;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* After most instructions, we simply advance to the next. For
|
||||
* one thing, this means that there must be a next instruction
|
||||
* at all.
|
||||
*/
|
||||
if (advance) {
|
||||
if (pc + 1 == count)
|
||||
return 0;
|
||||
SET_BIT(reachable, pc + 1);
|
||||
meminv[pc + 1] |= invalid;
|
||||
}
|
||||
}
|
||||
|
||||
/* The program has passed all our basic tests. */
|
||||
return 1;
|
||||
}
|
||||
1365
minix/net/lwip/bpfdev.c
Normal file
1365
minix/net/lwip/bpfdev.c
Normal file
File diff suppressed because it is too large
Load Diff
18
minix/net/lwip/bpfdev.h
Normal file
18
minix/net/lwip/bpfdev.h
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#ifndef MINIX_NET_LWIP_BPFDEV_H
|
||||
#define MINIX_NET_LWIP_BPFDEV_H
|
||||
|
||||
/*
|
||||
* BPF link structure, used to abstract away the details of the BPF structure
|
||||
* from other modules.
|
||||
*/
|
||||
struct bpfdev_link {
|
||||
TAILQ_ENTRY(bpfdev_link) bpfl_next;
|
||||
};
|
||||
|
||||
void bpfdev_init(void);
|
||||
void bpfdev_process(message * m_ptr, int ipc_status);
|
||||
void bpfdev_detach(struct bpfdev_link * bpf);
|
||||
void bpfdev_input(struct bpfdev_link * bpf, const struct pbuf * pbuf);
|
||||
void bpfdev_output(struct bpfdev_link * bpf, const struct pbuf * pbuf);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_BPFDEV_H */
|
||||
1718
minix/net/lwip/ethif.c
Normal file
1718
minix/net/lwip/ethif.c
Normal file
File diff suppressed because it is too large
Load Diff
24
minix/net/lwip/ethif.h
Normal file
24
minix/net/lwip/ethif.h
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
#ifndef MINIX_NET_LWIP_ETHIF_H
|
||||
#define MINIX_NET_LWIP_ETHIF_H
|
||||
|
||||
#include "ndev.h"
|
||||
|
||||
struct ethif;
|
||||
|
||||
void ethif_init(void);
|
||||
|
||||
struct ethif *ethif_add(ndev_id_t id, const char * name, uint32_t caps);
|
||||
int ethif_enable(struct ethif * ethif, const char * name,
|
||||
const struct ndev_hwaddr * hwaddr, uint8_t hwaddr_len, uint32_t caps,
|
||||
uint32_t link, uint32_t media);
|
||||
void ethif_disable(struct ethif * ethif);
|
||||
void ethif_remove(struct ethif * ethif);
|
||||
|
||||
void ethif_configured(struct ethif * ethif, int32_t result);
|
||||
void ethif_sent(struct ethif * ethif, int32_t result);
|
||||
void ethif_received(struct ethif * ethif, int32_t result);
|
||||
|
||||
void ethif_status(struct ethif * ethif, uint32_t link, uint32_t media,
|
||||
uint32_t oerror, uint32_t coll, uint32_t ierror, uint32_t iqdrop);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_ETHIF_H */
|
||||
2224
minix/net/lwip/ifaddr.c
Normal file
2224
minix/net/lwip/ifaddr.c
Normal file
File diff suppressed because it is too large
Load Diff
70
minix/net/lwip/ifaddr.h
Normal file
70
minix/net/lwip/ifaddr.h
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#ifndef MINIX_NET_LWIP_IFADDR_H
|
||||
#define MINIX_NET_LWIP_IFADDR_H
|
||||
|
||||
/* Possible values of ifdev_v6flags[] elements. */
|
||||
#define IFADDR_V6F_AUTOCONF 0x01 /* autoconfigured address, no subnet */
|
||||
#define IFADDR_V6F_TEMPORARY 0x02 /* temporary (privacy) address */
|
||||
#define IFADDR_V6F_HWBASED 0x04 /* auto-derived from MAC address */
|
||||
|
||||
typedef int ifaddr_v4_num_t; /* interface IPv4 address number */
|
||||
typedef int ifaddr_v6_num_t; /* interface IPv6 address number */
|
||||
typedef int ifaddr_dl_num_t; /* interface link address number */
|
||||
|
||||
extern int ifaddr_auto_linklocal;
|
||||
extern int ifaddr_accept_rtadv;
|
||||
|
||||
void ifaddr_init(struct ifdev * ifdev);
|
||||
|
||||
int ifaddr_v4_find(struct ifdev * ifdev, const struct sockaddr_in * addr,
|
||||
ifaddr_v4_num_t * num);
|
||||
int ifaddr_v4_enum(struct ifdev * ifdev, ifaddr_v4_num_t * num);
|
||||
int ifaddr_v4_get(struct ifdev * ifdev, ifaddr_v4_num_t num,
|
||||
struct sockaddr_in * addr, struct sockaddr_in * mask,
|
||||
struct sockaddr_in * bcast, struct sockaddr_in * dest);
|
||||
int ifaddr_v4_get_flags(struct ifdev * ifdev, ifaddr_v4_num_t num);
|
||||
int ifaddr_v4_add(struct ifdev * ifdev, const struct sockaddr_in * addr,
|
||||
const struct sockaddr_in * mask, const struct sockaddr_in * bcast,
|
||||
const struct sockaddr_in * dest, int flags);
|
||||
void ifaddr_v4_del(struct ifdev * ifdev, ifaddr_v4_num_t num);
|
||||
void ifaddr_v4_clear(struct ifdev * ifdev);
|
||||
struct ifdev *ifaddr_v4_map_by_addr(const ip4_addr_t * ip4addr);
|
||||
|
||||
int ifaddr_v6_find(struct ifdev * ifdev, const struct sockaddr_in6 * addr6,
|
||||
ifaddr_v6_num_t * num);
|
||||
int ifaddr_v6_enum(struct ifdev * ifdev, ifaddr_v6_num_t * num);
|
||||
void ifaddr_v6_get(struct ifdev * ifdev, ifaddr_v6_num_t num,
|
||||
struct sockaddr_in6 * addr6, struct sockaddr_in6 * mask6,
|
||||
struct sockaddr_in6 * dest6);
|
||||
int ifaddr_v6_get_flags(struct ifdev * ifdev, ifaddr_v6_num_t num);
|
||||
void ifaddr_v6_get_lifetime(struct ifdev * ifdev, ifaddr_v6_num_t num,
|
||||
struct in6_addrlifetime * lifetime);
|
||||
int ifaddr_v6_add(struct ifdev * ifdev, const struct sockaddr_in6 * addr6,
|
||||
const struct sockaddr_in6 * mask6, const struct sockaddr_in6 * dest6,
|
||||
int flags, const struct in6_addrlifetime * lifetime);
|
||||
void ifaddr_v6_del(struct ifdev * ifdev, ifaddr_v6_num_t num);
|
||||
void ifaddr_v6_clear(struct ifdev * ifdev);
|
||||
void ifaddr_v6_check(struct ifdev * ifdev);
|
||||
void ifaddr_v6_set_up(struct ifdev * ifdev);
|
||||
void ifaddr_v6_set_linklocal(struct ifdev * ifdev);
|
||||
struct ifdev *ifaddr_v6_map_by_addr(const ip6_addr_t * ip6addr);
|
||||
|
||||
struct ifdev *ifaddr_map_by_addr(const ip_addr_t * ipaddr);
|
||||
struct ifdev *ifaddr_map_by_subnet(const ip_addr_t * ipaddr);
|
||||
const ip_addr_t *ifaddr_select(const ip_addr_t * dst_addr,
|
||||
struct ifdev * ifdev, struct ifdev ** ifdevp);
|
||||
int ifaddr_is_zone_mismatch(const ip6_addr_t * ipaddr, struct ifdev * ifdev);
|
||||
|
||||
int ifaddr_dl_find(struct ifdev * ifdev, const struct sockaddr_dlx * addr,
|
||||
socklen_t addr_len, ifaddr_dl_num_t * num);
|
||||
int ifaddr_dl_enum(struct ifdev * ifdev, ifaddr_dl_num_t * num);
|
||||
void ifaddr_dl_get(struct ifdev * ifdev, ifaddr_dl_num_t num,
|
||||
struct sockaddr_dlx * addr);
|
||||
int ifaddr_dl_get_flags(struct ifdev * ifdev, ifaddr_dl_num_t num);
|
||||
int ifaddr_dl_add(struct ifdev * ifdev, const struct sockaddr_dlx * addr,
|
||||
socklen_t addr_len, int flags);
|
||||
int ifaddr_dl_del(struct ifdev * ifdev, ifaddr_dl_num_t num);
|
||||
void ifaddr_dl_clear(struct ifdev * ifdev);
|
||||
void ifaddr_dl_update(struct ifdev * ifdev, const uint8_t * hwaddr,
|
||||
int is_factory);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_IFADDR_H */
|
||||
930
minix/net/lwip/ifconf.c
Normal file
930
minix/net/lwip/ifconf.c
Normal file
|
|
@ -0,0 +1,930 @@
|
|||
/* LWIP service - ifconf.c - interface configuration */
|
||||
|
||||
#include "lwip.h"
|
||||
#include "ifaddr.h"
|
||||
#include "lldata.h"
|
||||
|
||||
#include <net/if_media.h>
|
||||
#include <minix/if.h>
|
||||
|
||||
#define LOOPBACK_IFNAME "lo0" /* name of the loopback interface */
|
||||
|
||||
/*
|
||||
* Initialize the first loopback device, which is present by default.
|
||||
*/
|
||||
void
|
||||
ifconf_init(void)
|
||||
{
|
||||
const struct sockaddr_in addr = {
|
||||
.sin_family = AF_INET,
|
||||
.sin_addr = { htonl(INADDR_LOOPBACK) }
|
||||
};
|
||||
struct sockaddr_in6 ll_addr6 = {
|
||||
.sin6_family = AF_INET6,
|
||||
};
|
||||
const struct sockaddr_in6 lo_addr6 = {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_addr = IN6ADDR_LOOPBACK_INIT
|
||||
};
|
||||
const struct in6_addrlifetime lifetime = {
|
||||
.ia6t_vltime = ND6_INFINITE_LIFETIME,
|
||||
.ia6t_pltime = ND6_INFINITE_LIFETIME
|
||||
};
|
||||
struct sockaddr_in6 mask6;
|
||||
struct ifdev *ifdev;
|
||||
socklen_t addr_len;
|
||||
int r;
|
||||
|
||||
if ((r = ifdev_create(LOOPBACK_IFNAME)) != OK)
|
||||
panic("unable to create loopback interface: %d", r);
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(LOOPBACK_IFNAME)) == NULL)
|
||||
panic("unable to find loopback interface");
|
||||
|
||||
if ((r = ifaddr_v4_add(ifdev, &addr, NULL, NULL, NULL, 0)) != OK)
|
||||
panic("unable to set IPv4 address on loopback interface: %d",
|
||||
r);
|
||||
|
||||
addr_len = sizeof(mask6);
|
||||
addr_put_netmask((struct sockaddr *)&mask6, &addr_len, IPADDR_TYPE_V6,
|
||||
64 /*prefix*/);
|
||||
|
||||
ll_addr6.sin6_addr.s6_addr[0] = 0xfe;
|
||||
ll_addr6.sin6_addr.s6_addr[1] = 0x80;
|
||||
ll_addr6.sin6_addr.s6_addr[15] = ifdev_get_index(ifdev);
|
||||
|
||||
if ((r = ifaddr_v6_add(ifdev, &ll_addr6, &mask6, NULL, 0,
|
||||
&lifetime)) != OK)
|
||||
panic("unable to set IPv6 address on loopback interface: %d",
|
||||
r);
|
||||
|
||||
addr_len = sizeof(mask6);
|
||||
addr_put_netmask((struct sockaddr *)&mask6, &addr_len, IPADDR_TYPE_V6,
|
||||
128 /*prefix*/);
|
||||
|
||||
if ((r = ifaddr_v6_add(ifdev, &lo_addr6, &mask6, NULL, 0,
|
||||
&lifetime)) != OK)
|
||||
panic("unable to set IPv6 address on loopback interface: %d",
|
||||
r);
|
||||
|
||||
if ((r = ifdev_set_ifflags(ifdev, IFF_UP)) != OK)
|
||||
panic("unable to bring up loopback interface");
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an address family independent IOCTL request with an "ifreq"
|
||||
* structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_ifreq(unsigned long request, const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct ifreq ifr;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ifr, sizeof(ifr))) != OK)
|
||||
return r;
|
||||
|
||||
if (request != SIOCIFCREATE) {
|
||||
ifr.ifr_name[sizeof(ifr.ifr_name) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(ifr.ifr_name)) == NULL)
|
||||
return ENXIO;
|
||||
} else
|
||||
ifdev = NULL;
|
||||
|
||||
switch (request) {
|
||||
case SIOCGIFFLAGS:
|
||||
ifr.ifr_flags = ifdev_get_ifflags(ifdev);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
case SIOCSIFFLAGS:
|
||||
/*
|
||||
* Unfortunately, ifr_flags is a signed integer and the sign
|
||||
* bit is in fact used as a flag, so without explicit casting
|
||||
* we end up setting all upper bits of the (full) integer. If
|
||||
* NetBSD ever extends the field, this assert should trigger..
|
||||
*/
|
||||
assert(sizeof(ifr.ifr_flags) == sizeof(short));
|
||||
|
||||
return ifdev_set_ifflags(ifdev, (unsigned short)ifr.ifr_flags);
|
||||
|
||||
case SIOCGIFMETRIC:
|
||||
ifr.ifr_metric = ifdev_get_metric(ifdev);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
case SIOCSIFMETRIC:
|
||||
/* The metric is not used within the operating system. */
|
||||
ifdev_set_metric(ifdev, ifr.ifr_metric);
|
||||
|
||||
return OK;
|
||||
|
||||
case SIOCSIFMEDIA:
|
||||
return ifdev_set_ifmedia(ifdev, ifr.ifr_media);
|
||||
|
||||
case SIOCGIFMTU:
|
||||
ifr.ifr_mtu = ifdev_get_mtu(ifdev);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
case SIOCSIFMTU:
|
||||
return ifdev_set_mtu(ifdev, ifr.ifr_mtu);
|
||||
|
||||
case SIOCIFCREATE:
|
||||
if (memchr(ifr.ifr_name, '\0', sizeof(ifr.ifr_name)) == NULL)
|
||||
return EINVAL;
|
||||
|
||||
return ifdev_create(ifr.ifr_name);
|
||||
|
||||
case SIOCIFDESTROY:
|
||||
return ifdev_destroy(ifdev);
|
||||
|
||||
case SIOCGIFDLT:
|
||||
ifr.ifr_dlt = ifdev_get_dlt(ifdev);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
case SIOCGIFINDEX:
|
||||
ifr.ifr_index = ifdev_get_index(ifdev);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an address family independent IOCTL request with an "ifcapreq"
|
||||
* structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_ifcap(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct ifcapreq ifcr;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ifcr, sizeof(ifcr))) != OK)
|
||||
return r;
|
||||
|
||||
ifcr.ifcr_name[sizeof(ifcr.ifcr_name) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(ifcr.ifcr_name)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
switch (request) {
|
||||
case SIOCSIFCAP:
|
||||
return ifdev_set_ifcap(ifdev, ifcr.ifcr_capenable);
|
||||
|
||||
case SIOCGIFCAP:
|
||||
ifdev_get_ifcap(ifdev, &ifcr.ifcr_capabilities,
|
||||
&ifcr.ifcr_capenable);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifcr, sizeof(ifcr));
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an address family independent IOCTL request with an "ifmediareq"
|
||||
* structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_ifmedia(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct ifmediareq ifm;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ifm, sizeof(ifm))) != OK)
|
||||
return r;
|
||||
|
||||
ifm.ifm_name[sizeof(ifm.ifm_name) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(ifm.ifm_name)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
switch (request) {
|
||||
case MINIX_SIOCGIFMEDIA:
|
||||
if ((r = ifdev_get_ifmedia(ifdev, &ifm.ifm_current,
|
||||
&ifm.ifm_active)) != OK)
|
||||
return r;
|
||||
ifm.ifm_mask = 0;
|
||||
|
||||
switch (ifdev_get_link(ifdev)) {
|
||||
case LINK_STATE_UP:
|
||||
ifm.ifm_status = IFM_AVALID | IFM_ACTIVE;
|
||||
break;
|
||||
case LINK_STATE_DOWN:
|
||||
ifm.ifm_status = IFM_AVALID;
|
||||
break;
|
||||
default:
|
||||
ifm.ifm_status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: support for the list of supported media types. This
|
||||
* one is not easy, because we cannot simply suspend the IOCTL
|
||||
* and query the driver. For now, return only entry (which is
|
||||
* the minimum for ifconfig(8) not to complain), namely the
|
||||
* currently selected one.
|
||||
*/
|
||||
if (ifm.ifm_ulist != NULL) {
|
||||
if (ifm.ifm_count < 1)
|
||||
return ENOMEM;
|
||||
|
||||
/*
|
||||
* Copy out the 'list', which consists of one entry.
|
||||
* If we were to produce multiple entries, we would
|
||||
* have to check against the MINIX_IF_MAXMEDIA limit.
|
||||
*/
|
||||
if ((r = sockdriver_copyout(data,
|
||||
offsetof(struct minix_ifmediareq, mifm_list),
|
||||
&ifm.ifm_current, sizeof(ifm.ifm_current))) != OK)
|
||||
return r;
|
||||
}
|
||||
ifm.ifm_count = 1;
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifm, sizeof(ifm));
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an address family independent IOCTL request with an "if_clonereq"
|
||||
* structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_ifclone(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct if_clonereq ifcr;
|
||||
const char *ptr;
|
||||
char name[IFNAMSIZ];
|
||||
size_t off;
|
||||
unsigned int num;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ifcr, sizeof(ifcr))) != OK)
|
||||
return r;
|
||||
|
||||
if (ifcr.ifcr_count < 0)
|
||||
return EINVAL;
|
||||
|
||||
off = offsetof(struct minix_if_clonereq, mifcr_buffer);
|
||||
|
||||
for (num = 0; (ptr = ifdev_enum_vtypes(num)) != NULL; num++) {
|
||||
/* Prevent overflow in case we ever have over 128 vtypes.. */
|
||||
if (num == MINIX_IF_MAXCLONERS)
|
||||
break;
|
||||
|
||||
if (ifcr.ifcr_buffer == NULL ||
|
||||
num >= (unsigned int)ifcr.ifcr_count)
|
||||
continue;
|
||||
|
||||
memset(name, 0, sizeof(name));
|
||||
strlcpy(name, ptr, sizeof(name));
|
||||
|
||||
if ((r = sockdriver_copyout(data, off, name,
|
||||
sizeof(name))) != OK)
|
||||
return r;
|
||||
|
||||
off += sizeof(name);
|
||||
}
|
||||
|
||||
ifcr.ifcr_total = num;
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifcr, sizeof(ifcr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an address family independent IOCTL request with an "if_addrprefreq"
|
||||
* structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_ifaddrpref(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct if_addrprefreq ifap;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ifap, sizeof(ifap))) != OK)
|
||||
return r;
|
||||
|
||||
ifap.ifap_name[sizeof(ifap.ifap_name) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(ifap.ifap_name)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
/*
|
||||
* For now, we simply support only a preference of 0. We do not try to
|
||||
* look up the given address, nor do we return the looked up address.
|
||||
*/
|
||||
switch (request) {
|
||||
case SIOCSIFADDRPREF:
|
||||
if (ifap.ifap_preference != 0)
|
||||
return EINVAL;
|
||||
|
||||
return OK;
|
||||
|
||||
case SIOCGIFADDRPREF:
|
||||
ifap.ifap_preference = 0;
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifap, sizeof(ifap));
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an IOCTL request for AF_INET with an "ifreq" structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_v4_ifreq(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct sockaddr_in addr, mask, bcast, dest, *sin = NULL /*gcc*/;
|
||||
struct ifdev *ifdev;
|
||||
struct ifreq ifr;
|
||||
ifaddr_v4_num_t num;
|
||||
int r, flags;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ifr, sizeof(ifr))) != OK)
|
||||
return r;
|
||||
|
||||
ifr.ifr_name[sizeof(ifr.ifr_name) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(ifr.ifr_name)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
switch (request) {
|
||||
case SIOCGIFADDR:
|
||||
case SIOCGIFNETMASK:
|
||||
case SIOCGIFBRDADDR:
|
||||
case SIOCGIFDSTADDR:
|
||||
/* Retrieve all addresses, then copy out the desired one. */
|
||||
switch (request) {
|
||||
case SIOCGIFADDR: sin = &addr; break;
|
||||
case SIOCGIFNETMASK: sin = &mask; break;
|
||||
case SIOCGIFBRDADDR: sin = &bcast; break;
|
||||
case SIOCGIFDSTADDR: sin = &dest; break;
|
||||
}
|
||||
|
||||
sin->sin_len = 0;
|
||||
|
||||
if ((r = ifaddr_v4_get(ifdev, (ifaddr_v4_num_t)0, &addr, &mask,
|
||||
&bcast, &dest)) != OK)
|
||||
return r;
|
||||
|
||||
if (sin->sin_len == 0) /* not filled in */
|
||||
return EADDRNOTAVAIL;
|
||||
|
||||
memcpy(&ifr.ifr_addr, sin, sizeof(*sin));
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
case SIOCGIFAFLAG_IN:
|
||||
if ((r = ifaddr_v4_find(ifdev,
|
||||
(struct sockaddr_in *)&ifr.ifr_addr, &num)) != OK)
|
||||
return r;
|
||||
|
||||
ifr.ifr_addrflags = ifaddr_v4_get_flags(ifdev, num);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
case SIOCSIFADDR:
|
||||
/*
|
||||
* This one is slightly different from the rest, in that we
|
||||
* either set or update the primary address: if we set it, we
|
||||
* must let _add() generate a matching netmask automatically,
|
||||
* while if we update it, _add() would fail unless we first
|
||||
* delete the old entry.
|
||||
*/
|
||||
sin = (struct sockaddr_in *)&ifr.ifr_addr;
|
||||
|
||||
if ((r = ifaddr_v4_get(ifdev, (ifaddr_v4_num_t)0, &addr, &mask,
|
||||
&bcast, &dest)) == OK) {
|
||||
flags = ifaddr_v4_get_flags(ifdev, (ifaddr_v4_num_t)0);
|
||||
|
||||
ifaddr_v4_del(ifdev, (ifaddr_v4_num_t)0);
|
||||
|
||||
/*
|
||||
* If setting the new address fails, reinstating the
|
||||
* old address should always work. This is really ugly
|
||||
* as it generates routing socket noise, but this call
|
||||
* is deprecated anyway.
|
||||
*/
|
||||
if ((r = ifaddr_v4_add(ifdev, sin, &mask, &bcast,
|
||||
&dest, 0 /*flags*/)) != OK)
|
||||
(void)ifaddr_v4_add(ifdev, &addr, &mask,
|
||||
&bcast, &dest, flags);
|
||||
|
||||
return r;
|
||||
} else
|
||||
return ifaddr_v4_add(ifdev, sin, NULL /*mask*/,
|
||||
NULL /*bcast*/, NULL /*dest*/, 0 /*flags*/);
|
||||
|
||||
case SIOCSIFNETMASK:
|
||||
case SIOCSIFBRDADDR:
|
||||
case SIOCSIFDSTADDR:
|
||||
/* These calls only update the existing primary address. */
|
||||
if ((r = ifaddr_v4_get(ifdev, (ifaddr_v4_num_t)0, &addr, &mask,
|
||||
&bcast, &dest)) != OK)
|
||||
return r;
|
||||
|
||||
sin = (struct sockaddr_in *)&ifr.ifr_addr;
|
||||
|
||||
switch (request) {
|
||||
case SIOCSIFNETMASK: memcpy(&mask, sin, sizeof(mask)); break;
|
||||
case SIOCSIFBRDADDR: memcpy(&bcast, sin, sizeof(bcast)); break;
|
||||
case SIOCSIFDSTADDR: memcpy(&dest, sin, sizeof(dest)); break;
|
||||
}
|
||||
|
||||
return ifaddr_v4_add(ifdev, &addr, &mask, &bcast, &dest,
|
||||
ifaddr_v4_get_flags(ifdev, (ifaddr_v4_num_t)0));
|
||||
|
||||
case SIOCDIFADDR:
|
||||
if ((r = ifaddr_v4_find(ifdev,
|
||||
(struct sockaddr_in *)&ifr.ifr_addr, &num)) != OK)
|
||||
return r;
|
||||
|
||||
ifaddr_v4_del(ifdev, num);
|
||||
|
||||
return OK;
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an IOCTL request for AF_INET with an "ifaliasreq" structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_v4_ifalias(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct ifaliasreq ifra;
|
||||
struct sockaddr_in dest;
|
||||
ifaddr_v4_num_t num;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ifra, sizeof(ifra))) != OK)
|
||||
return r;
|
||||
|
||||
ifra.ifra_name[sizeof(ifra.ifra_name) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(ifra.ifra_name)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
switch (request) {
|
||||
case SIOCAIFADDR:
|
||||
return ifaddr_v4_add(ifdev,
|
||||
(struct sockaddr_in *)&ifra.ifra_addr,
|
||||
(struct sockaddr_in *)&ifra.ifra_mask,
|
||||
(struct sockaddr_in *)&ifra.ifra_broadaddr,
|
||||
(struct sockaddr_in *)&ifra.ifra_dstaddr, 0 /*flags*/);
|
||||
|
||||
case SIOCGIFALIAS:
|
||||
if ((r = ifaddr_v4_find(ifdev,
|
||||
(struct sockaddr_in *)&ifra.ifra_addr, &num)) != OK)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* The broadcast and destination address are stored in the same
|
||||
* ifaliasreq field. We cannot pass a pointer to the same
|
||||
* field to ifaddr_v4_get(). So, use a temporary variable.
|
||||
*/
|
||||
(void)ifaddr_v4_get(ifdev, num,
|
||||
(struct sockaddr_in *)&ifra.ifra_addr,
|
||||
(struct sockaddr_in *)&ifra.ifra_mask,
|
||||
(struct sockaddr_in *)&ifra.ifra_broadaddr, &dest);
|
||||
|
||||
if (ifra.ifra_broadaddr.sa_len == 0)
|
||||
memcpy(&ifra.ifra_dstaddr, &dest, sizeof(dest));
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifra, sizeof(ifra));
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an IOCTL request for AF_INET.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_v4(unsigned long request, const struct sockdriver_data * data,
|
||||
endpoint_t user_endpt)
|
||||
{
|
||||
|
||||
switch (request) {
|
||||
case SIOCSIFADDR:
|
||||
case SIOCSIFDSTADDR:
|
||||
case SIOCSIFBRDADDR:
|
||||
case SIOCSIFNETMASK:
|
||||
case SIOCDIFADDR:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EPERM;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case SIOCGIFADDR:
|
||||
case SIOCGIFDSTADDR:
|
||||
case SIOCGIFBRDADDR:
|
||||
case SIOCGIFNETMASK:
|
||||
case SIOCGIFAFLAG_IN:
|
||||
return ifconf_ioctl_v4_ifreq(request, data);
|
||||
|
||||
case SIOCAIFADDR:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EPERM;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case SIOCGIFALIAS:
|
||||
return ifconf_ioctl_v4_ifalias(request, data);
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef INET6
|
||||
/*
|
||||
* Process an IOCTL request for AF_INET6 with an "in6_ifreq" structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_v6_ifreq(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct in6_ifreq ifr;
|
||||
ifaddr_v6_num_t num;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ifr, sizeof(ifr))) != OK)
|
||||
return r;
|
||||
|
||||
ifr.ifr_name[sizeof(ifr.ifr_name) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(ifr.ifr_name)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
if ((r = ifaddr_v6_find(ifdev, &ifr.ifr_addr, &num)) != OK)
|
||||
return r;
|
||||
|
||||
switch (request) {
|
||||
case SIOCGIFADDR_IN6:
|
||||
/* This IOCTL basically checks if the given address exists. */
|
||||
ifaddr_v6_get(ifdev, num, &ifr.ifr_addr, NULL, NULL);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
case SIOCDIFADDR_IN6:
|
||||
ifaddr_v6_del(ifdev, num);
|
||||
|
||||
return OK;
|
||||
|
||||
case SIOCGIFNETMASK_IN6:
|
||||
ifaddr_v6_get(ifdev, num, NULL, &ifr.ifr_addr, NULL);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
case SIOCGIFAFLAG_IN6:
|
||||
ifr.ifr_ifru.ifru_flags6 = ifaddr_v6_get_flags(ifdev, num);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
case SIOCGIFALIFETIME_IN6:
|
||||
ifaddr_v6_get_lifetime(ifdev, num,
|
||||
&ifr.ifr_ifru.ifru_lifetime);
|
||||
|
||||
return sockdriver_copyout(data, 0, &ifr, sizeof(ifr));
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an IOCTL request for AF_INET6 with an "in6_aliasreq" structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_v6_ifalias(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct in6_aliasreq ifra;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ifra, sizeof(ifra))) != OK)
|
||||
return r;
|
||||
|
||||
ifra.ifra_name[sizeof(ifra.ifra_name) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(ifra.ifra_name)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
switch (request) {
|
||||
case SIOCAIFADDR_IN6:
|
||||
return ifaddr_v6_add(ifdev, &ifra.ifra_addr,
|
||||
&ifra.ifra_prefixmask, &ifra.ifra_dstaddr,
|
||||
ifra.ifra_flags, &ifra.ifra_lifetime);
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an IOCTL request for AF_INET6 with an "in6_ndireq" structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_v6_ndireq(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct in6_ndireq ndi;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &ndi, sizeof(ndi))) != OK)
|
||||
return r;
|
||||
|
||||
ndi.ifname[sizeof(ndi.ifname) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(ndi.ifname)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
switch (request) {
|
||||
case SIOCGIFINFO_IN6:
|
||||
memset(&ndi.ndi, 0, sizeof(ndi.ndi));
|
||||
|
||||
ndi.ndi.linkmtu = ifdev_get_mtu(ifdev);
|
||||
ndi.ndi.flags = ifdev_get_nd6flags(ifdev);
|
||||
ndi.ndi.initialized = 1;
|
||||
/* TODO: all the other fields.. */
|
||||
|
||||
return sockdriver_copyout(data, 0, &ndi, sizeof(ndi));
|
||||
|
||||
case SIOCSIFINFO_IN6:
|
||||
/* TODO: all the other fields.. */
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case SIOCSIFINFO_FLAGS:
|
||||
return ifdev_set_nd6flags(ifdev, ndi.ndi.flags);
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an IOCTL request for AF_INET6 with an "in6_nbrinfo" structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_v6_nbrinfo(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct sockaddr_in6 addr;
|
||||
struct in6_nbrinfo nbri;
|
||||
lldata_ndp_num_t num;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &nbri, sizeof(nbri))) != OK)
|
||||
return r;
|
||||
|
||||
nbri.ifname[sizeof(nbri.ifname) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(nbri.ifname)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
switch (request) {
|
||||
case SIOCGNBRINFO_IN6:
|
||||
/*
|
||||
* Convert the given in6_addr to a full sockaddr_in6, mainly
|
||||
* for internal consistency. It would have been nice if the
|
||||
* KAME management API had had any sort of consistency itself.
|
||||
*/
|
||||
memset(&addr, 0, sizeof(addr));
|
||||
addr.sin6_family = AF_INET6;
|
||||
memcpy(&addr.sin6_addr.s6_addr, &nbri.addr,
|
||||
sizeof(addr.sin6_addr.s6_addr));
|
||||
|
||||
if ((r = lldata_ndp_find(ifdev, &addr, &num)) != OK)
|
||||
return r;
|
||||
|
||||
lldata_ndp_get_info(num, &nbri.asked, &nbri.isrouter,
|
||||
&nbri.state, &nbri.expire);
|
||||
|
||||
return sockdriver_copyout(data, 0, &nbri, sizeof(nbri));
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an IOCTL request for AF_INET6.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_v6(unsigned long request, const struct sockdriver_data * data,
|
||||
endpoint_t user_endpt)
|
||||
{
|
||||
|
||||
switch (request) {
|
||||
case SIOCDIFADDR_IN6:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EPERM;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case SIOCGIFADDR_IN6:
|
||||
case SIOCGIFNETMASK_IN6:
|
||||
case SIOCGIFAFLAG_IN6:
|
||||
case SIOCGIFALIFETIME_IN6:
|
||||
return ifconf_ioctl_v6_ifreq(request, data);
|
||||
|
||||
case SIOCAIFADDR_IN6:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EPERM;
|
||||
|
||||
return ifconf_ioctl_v6_ifalias(request, data);
|
||||
|
||||
case SIOCSIFINFO_IN6:
|
||||
case SIOCSIFINFO_FLAGS:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EPERM;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case SIOCGIFINFO_IN6:
|
||||
return ifconf_ioctl_v6_ndireq(request, data);
|
||||
|
||||
case SIOCGNBRINFO_IN6:
|
||||
return ifconf_ioctl_v6_nbrinfo(request, data);
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
#endif /* INET6 */
|
||||
|
||||
/*
|
||||
* Process an IOCTL request for AF_LINK with an "if_laddrreq" structure.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_dl_lifaddr(unsigned long request,
|
||||
const struct sockdriver_data * data)
|
||||
{
|
||||
struct ifdev *ifdev;
|
||||
struct if_laddrreq iflr;
|
||||
ifaddr_dl_num_t num;
|
||||
int r;
|
||||
|
||||
if ((r = sockdriver_copyin(data, 0, &iflr, sizeof(iflr))) != OK)
|
||||
return r;
|
||||
|
||||
iflr.iflr_name[sizeof(iflr.iflr_name) - 1] = '\0';
|
||||
|
||||
if ((ifdev = ifdev_find_by_name(iflr.iflr_name)) == NULL)
|
||||
return ENXIO;
|
||||
|
||||
switch (request) {
|
||||
case SIOCGLIFADDR:
|
||||
if (iflr.flags & IFLR_PREFIX) {
|
||||
/* We ignore the prefix length, like NetBSD does. */
|
||||
if ((r = ifaddr_dl_find(ifdev,
|
||||
(struct sockaddr_dlx *)&iflr.addr,
|
||||
sizeof(iflr.addr), &num)) != OK)
|
||||
return r;
|
||||
} else
|
||||
num = (ifaddr_dl_num_t)0; /* this always works */
|
||||
|
||||
ifaddr_dl_get(ifdev, num, (struct sockaddr_dlx *)&iflr.addr);
|
||||
iflr.flags = ifaddr_dl_get_flags(ifdev, num);
|
||||
memset(&iflr.dstaddr, 0, sizeof(iflr.dstaddr));
|
||||
|
||||
return sockdriver_copyout(data, 0, &iflr, sizeof(iflr));
|
||||
|
||||
case SIOCALIFADDR:
|
||||
return ifaddr_dl_add(ifdev, (struct sockaddr_dlx *)&iflr.addr,
|
||||
sizeof(iflr.addr), iflr.flags);
|
||||
|
||||
case SIOCDLIFADDR:
|
||||
if ((r = ifaddr_dl_find(ifdev,
|
||||
(struct sockaddr_dlx *)&iflr.addr, sizeof(iflr.addr),
|
||||
&num)) != OK)
|
||||
return r;
|
||||
|
||||
return ifaddr_dl_del(ifdev, num);
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an IOCTL request for AF_LINK.
|
||||
*/
|
||||
static int
|
||||
ifconf_ioctl_dl(unsigned long request, const struct sockdriver_data * data,
|
||||
endpoint_t user_endpt)
|
||||
{
|
||||
|
||||
switch (request) {
|
||||
case SIOCALIFADDR:
|
||||
case SIOCDLIFADDR:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EPERM;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case SIOCGLIFADDR:
|
||||
return ifconf_ioctl_dl_lifaddr(request, data);
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an IOCTL request. This routine is shared between TCP, UDP, RAW, and
|
||||
* link sockets. The given socket may be used to obtain the target domain:
|
||||
* AF_INET, AF_INET6, or AF_LINK.
|
||||
*/
|
||||
int
|
||||
ifconf_ioctl(struct sock * sock, unsigned long request,
|
||||
const struct sockdriver_data * data, endpoint_t user_endpt)
|
||||
{
|
||||
int domain;
|
||||
|
||||
domain = sockevent_get_domain(sock);
|
||||
|
||||
switch (request) {
|
||||
case SIOCSIFFLAGS:
|
||||
case SIOCSIFMETRIC:
|
||||
case SIOCSIFMEDIA:
|
||||
case SIOCSIFMTU:
|
||||
case SIOCIFCREATE:
|
||||
case SIOCIFDESTROY:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EPERM;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case SIOCGIFFLAGS:
|
||||
case SIOCGIFMETRIC:
|
||||
case SIOCGIFMTU:
|
||||
case SIOCGIFDLT:
|
||||
case SIOCGIFINDEX:
|
||||
return ifconf_ioctl_ifreq(request, data);
|
||||
|
||||
case SIOCSIFCAP:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EPERM;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case SIOCGIFCAP:
|
||||
return ifconf_ioctl_ifcap(request, data);
|
||||
|
||||
case MINIX_SIOCGIFMEDIA:
|
||||
return ifconf_ioctl_ifmedia(request, data);
|
||||
|
||||
case MINIX_SIOCIFGCLONERS:
|
||||
return ifconf_ioctl_ifclone(request, data);
|
||||
|
||||
case SIOCSIFADDRPREF:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EPERM;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case SIOCGIFADDRPREF:
|
||||
return ifconf_ioctl_ifaddrpref(request, data);
|
||||
|
||||
default:
|
||||
switch (domain) {
|
||||
case AF_INET:
|
||||
return ifconf_ioctl_v4(request, data, user_endpt);
|
||||
|
||||
#ifdef INET6
|
||||
case AF_INET6:
|
||||
return ifconf_ioctl_v6(request, data, user_endpt);
|
||||
#endif /* INET6 */
|
||||
|
||||
case AF_LINK:
|
||||
return ifconf_ioctl_dl(request, data, user_endpt);
|
||||
|
||||
default:
|
||||
return ENOTTY;
|
||||
}
|
||||
}
|
||||
}
|
||||
1064
minix/net/lwip/ifdev.c
Normal file
1064
minix/net/lwip/ifdev.c
Normal file
File diff suppressed because it is too large
Load Diff
155
minix/net/lwip/ifdev.h
Normal file
155
minix/net/lwip/ifdev.h
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
#ifndef MINIX_NET_LWIP_IFDEV_H
|
||||
#define MINIX_NET_LWIP_IFDEV_H
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/if_types.h>
|
||||
#include <netinet6/in6_var.h>
|
||||
#include <netinet6/nd6.h>
|
||||
|
||||
/*
|
||||
* NetBSD makes setting a hardware address through ifconfig(8) a whole lot
|
||||
* harder than it needs to be, namely by keeping a list of possible hardware
|
||||
* addresses and marking one of them as active. For us, that level of extra
|
||||
* flexibility is completely useless. In order to shield individual interface
|
||||
* modules from having to deal with the rather extended interface for the list
|
||||
* management, we maintain the list in ifdev and simply use a iop_set_hwaddr()
|
||||
* call to the modules when the active address changes. This setting is the
|
||||
* maximum number of hardware addresses in the list maintained by ifdev. It
|
||||
* should be at least 2, or changing hardware addresses will not be possible.
|
||||
*/
|
||||
#define IFDEV_NUM_HWADDRS 3
|
||||
|
||||
struct ifdev;
|
||||
struct bpfdev_link;
|
||||
struct sockaddr_dlx;
|
||||
|
||||
/* Interface operations table. */
|
||||
struct ifdev_ops {
|
||||
err_t (* iop_init)(struct ifdev * ifdev, struct netif * netif);
|
||||
err_t (* iop_input)(struct pbuf * pbuf, struct netif * netif);
|
||||
err_t (* iop_output)(struct ifdev * ifdev, struct pbuf * pbuf,
|
||||
struct netif * netif);
|
||||
err_t (* iop_output_v4)(struct netif * netif, struct pbuf * pbuf,
|
||||
const ip4_addr_t * ipaddr);
|
||||
err_t (* iop_output_v6)(struct netif * netif, struct pbuf * pbuf,
|
||||
const ip6_addr_t * ipaddr);
|
||||
void (* iop_hdrcmplt)(struct ifdev * ifdev, struct pbuf * pbuf);
|
||||
void (* iop_poll)(struct ifdev * ifdev);
|
||||
int (* iop_set_ifflags)(struct ifdev * ifdev, unsigned int ifflags);
|
||||
void (* iop_get_ifcap)(struct ifdev * ifdev, uint64_t * ifcap,
|
||||
uint64_t * ifena);
|
||||
int (* iop_set_ifcap)(struct ifdev * ifdev, uint64_t ifcap);
|
||||
void (* iop_get_ifmedia)(struct ifdev * ifdev, int * ifcurrent,
|
||||
int * ifactive);
|
||||
int (* iop_set_ifmedia)(struct ifdev * ifdev, int ifmedia);
|
||||
void (* iop_set_promisc)(struct ifdev * ifdev, int promisc);
|
||||
int (* iop_set_hwaddr)(struct ifdev * ifdev, const uint8_t * hwaddr);
|
||||
int (* iop_set_mtu)(struct ifdev * ifdev, unsigned int mtu);
|
||||
int (* iop_destroy)(struct ifdev * ifdev);
|
||||
};
|
||||
|
||||
/* Hardware address list entry. The first entry, if any, is the active one. */
|
||||
struct ifdev_hwaddr {
|
||||
uint8_t ifhwa_addr[NETIF_MAX_HWADDR_LEN];
|
||||
uint8_t ifhwa_flags;
|
||||
};
|
||||
#define IFHWAF_VALID 0x01 /* entry contains an address */
|
||||
#define IFHWAF_FACTORY 0x02 /* factory (device-given) address */
|
||||
|
||||
/* Interface structure. */
|
||||
struct ifdev {
|
||||
TAILQ_ENTRY(ifdev) ifdev_next; /* list of active interfaces */
|
||||
char ifdev_name[IFNAMSIZ]; /* interface name, null terminated */
|
||||
unsigned int ifdev_ifflags; /* NetBSD-style interface flags */
|
||||
unsigned int ifdev_dlt; /* data link type (DLT_) */
|
||||
unsigned int ifdev_promisc; /* number of promiscuity requestors */
|
||||
struct netif ifdev_netif; /* lwIP interface structure */
|
||||
struct if_data ifdev_data; /* NetBSD-style interface data */
|
||||
char ifdev_v4set; /* interface has an IPv4 address? */
|
||||
uint8_t ifdev_v6prefix[LWIP_IPV6_NUM_ADDRESSES]; /* IPv6 prefixes */
|
||||
uint8_t ifdev_v6flags[LWIP_IPV6_NUM_ADDRESSES]; /* v6 address flags */
|
||||
uint8_t ifdev_v6state[LWIP_IPV6_NUM_ADDRESSES]; /* v6 shadow states */
|
||||
uint8_t ifdev_v6scope[LWIP_IPV6_NUM_ADDRESSES]; /* cached v6 scopes */
|
||||
struct ifdev_hwaddr ifdev_hwlist[IFDEV_NUM_HWADDRS]; /* HW addr's */
|
||||
uint32_t ifdev_nd6flags; /* ND6-related flags (ND6_IFF_) */
|
||||
const struct ifdev_ops *ifdev_ops; /* interface operations table */
|
||||
TAILQ_HEAD(, bpfdev_link) ifdev_bpf; /* list of attached BPF devices */
|
||||
};
|
||||
|
||||
#define ifdev_get_name(ifdev) ((ifdev)->ifdev_name)
|
||||
#define ifdev_get_ifflags(ifdev) ((ifdev)->ifdev_ifflags)
|
||||
#define ifdev_get_dlt(ifdev) ((ifdev)->ifdev_dlt)
|
||||
#define ifdev_is_promisc(ifdev) ((ifdev)->ifdev_promisc != 0)
|
||||
#define ifdev_get_netif(ifdev) (&(ifdev)->ifdev_netif)
|
||||
#define ifdev_get_nd6flags(ifdev) ((ifdev)->ifdev_nd6flags)
|
||||
#define ifdev_get_iftype(ifdev) ((ifdev)->ifdev_data.ifi_type)
|
||||
#define ifdev_get_hwlen(ifdev) ((ifdev)->ifdev_data.ifi_addrlen)
|
||||
#define ifdev_get_hdrlen(ifdev) ((ifdev)->ifdev_data.ifi_hdrlen)
|
||||
#define ifdev_get_link(ifdev) ((ifdev)->ifdev_data.ifi_link_state)
|
||||
#define ifdev_get_mtu(ifdev) ((ifdev)->ifdev_data.ifi_mtu)
|
||||
#define ifdev_get_metric(ifdev) ((ifdev)->ifdev_data.ifi_metric)
|
||||
#define ifdev_get_ifdata(ifdev) (&(ifdev)->ifdev_data)
|
||||
#define ifdev_is_loopback(ifdev) ((ifdev)->ifdev_ifflags & IFF_LOOPBACK)
|
||||
#define ifdev_is_up(ifdev) ((ifdev)->ifdev_ifflags & IFF_UP)
|
||||
#define ifdev_is_link_up(ifdev) (netif_is_link_up(&(ifdev)->ifdev_netif))
|
||||
#define ifdev_set_metric(ifdev, metric) \
|
||||
((void)((ifdev)->ifdev_data.ifi_metric = (metric)))
|
||||
#define ifdev_get_index(ifdev) \
|
||||
((uint32_t)(netif_get_index(ifdev_get_netif(ifdev))))
|
||||
|
||||
#define ifdev_output_drop(ifdev) ((ifdev)->ifdev_data.ifi_oerrors++)
|
||||
|
||||
#define netif_get_ifdev(netif) ((struct ifdev *)(netif)->state)
|
||||
|
||||
void ifdev_init(void);
|
||||
void ifdev_poll(void);
|
||||
|
||||
void ifdev_register(const char * name, int (* create)(const char *));
|
||||
|
||||
void ifdev_input(struct ifdev * ifdev, struct pbuf * pbuf,
|
||||
struct netif * netif, int to_bpf);
|
||||
err_t ifdev_output(struct ifdev * ifdev, struct pbuf * pbuf,
|
||||
struct netif * netif, int to_bpf, int hdrcmplt);
|
||||
|
||||
void ifdev_attach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl);
|
||||
void ifdev_detach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl);
|
||||
|
||||
struct ifdev *ifdev_get_by_index(uint32_t ifindex);
|
||||
struct ifdev *ifdev_find_by_name(const char * name);
|
||||
struct ifdev *ifdev_enum(struct ifdev * last);
|
||||
|
||||
int ifdev_check_name(const char * name, unsigned int * vtype_slot);
|
||||
|
||||
int ifdev_set_promisc(struct ifdev * ifdev);
|
||||
void ifdev_clear_promisc(struct ifdev * ifdev);
|
||||
|
||||
int ifdev_set_ifflags(struct ifdev * ifdev, unsigned int ifflags);
|
||||
void ifdev_update_ifflags(struct ifdev * ifdev, unsigned int ifflags);
|
||||
|
||||
void ifdev_get_ifcap(struct ifdev * ifdev, uint64_t * ifcap,
|
||||
uint64_t * ifena);
|
||||
int ifdev_set_ifcap(struct ifdev * ifdev, uint64_t ifena);
|
||||
|
||||
int ifdev_get_ifmedia(struct ifdev * ifdev, int * ifcurrent, int * ifactive);
|
||||
int ifdev_set_ifmedia(struct ifdev * ifdev, int ifmedia);
|
||||
|
||||
int ifdev_set_mtu(struct ifdev * ifdev, unsigned int mtu);
|
||||
|
||||
int ifdev_set_nd6flags(struct ifdev * ifdev, uint32_t nd6flags);
|
||||
|
||||
void ifdev_add(struct ifdev * ifdev, const char * name, unsigned int ifflags,
|
||||
unsigned int iftype, size_t hdrlen, size_t addrlen, unsigned int dlt,
|
||||
unsigned int mtu, uint32_t nd6flags, const struct ifdev_ops * iop);
|
||||
int ifdev_remove(struct ifdev * ifdev);
|
||||
|
||||
struct ifdev *ifdev_get_loopback(void);
|
||||
|
||||
void ifdev_update_link(struct ifdev * ifdev, int link);
|
||||
void ifdev_update_hwaddr(struct ifdev * ifdev, const uint8_t * hwaddr,
|
||||
int is_factory);
|
||||
|
||||
int ifdev_create(const char * name);
|
||||
int ifdev_destroy(struct ifdev * ifdev);
|
||||
const char *ifdev_enum_vtypes(unsigned int num);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_IFDEV_H */
|
||||
761
minix/net/lwip/ipsock.c
Normal file
761
minix/net/lwip/ipsock.c
Normal file
|
|
@ -0,0 +1,761 @@
|
|||
/* LWIP service - ipsock.c - shared IP-level socket code */
|
||||
|
||||
#include "lwip.h"
|
||||
#include "ifaddr.h"
|
||||
|
||||
#define ip6_hdr __netbsd_ip6_hdr /* conflicting definitions */
|
||||
#include <net/route.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet6/in6_pcb.h>
|
||||
#undef ip6_hdr
|
||||
|
||||
/* The following are sysctl(7) settings. */
|
||||
int lwip_ip4_forward = 0; /* We patch lwIP to check these.. */
|
||||
int lwip_ip6_forward = 0; /* ..two settings at run time. */
|
||||
static int ipsock_v6only = 1;
|
||||
|
||||
/* The CTL_NET PF_INET IPPROTO_IP subtree. */
|
||||
static struct rmib_node net_inet_ip_table[] = {
|
||||
/* 1*/ [IPCTL_FORWARDING] = RMIB_INTPTR(RMIB_RW, &lwip_ip4_forward,
|
||||
"forwarding",
|
||||
"Enable forwarding of INET diagrams"),
|
||||
/* 3*/ [IPCTL_DEFTTL] = RMIB_INT(RMIB_RO, IP_DEFAULT_TTL, "ttl",
|
||||
"Default TTL for an INET diagram"),
|
||||
/*23*/ [IPCTL_LOOPBACKCKSUM] = RMIB_FUNC(RMIB_RW | CTLTYPE_INT, sizeof(int),
|
||||
loopif_cksum, "do_loopback_cksum",
|
||||
"Perform IP checksum on loopback"),
|
||||
};
|
||||
|
||||
static struct rmib_node net_inet_ip_node =
|
||||
RMIB_NODE(RMIB_RO, net_inet_ip_table, "ip", "IPv4 related settings");
|
||||
|
||||
/* The CTL_NET PF_INET6 IPPROTO_IPV6 subtree. */
|
||||
static struct rmib_node net_inet6_ip6_table[] = {
|
||||
/* 1*/ [IPV6CTL_FORWARDING] = RMIB_INTPTR(RMIB_RW, &lwip_ip6_forward,
|
||||
"forwarding",
|
||||
"Enable forwarding of INET6 diagrams"),
|
||||
/*
|
||||
* The following functionality is not
|
||||
* implemented in lwIP at this time.
|
||||
*/
|
||||
/* 2*/ [IPV6CTL_SENDREDIRECTS] = RMIB_INT(RMIB_RO, 0, "redirect", "Enable "
|
||||
"sending of ICMPv6 redirect messages"),
|
||||
/* 3*/ [IPV6CTL_DEFHLIM] = RMIB_INT(RMIB_RO, IP_DEFAULT_TTL, "hlim",
|
||||
"Hop limit for an INET6 datagram"),
|
||||
/*12*/ [IPV6CTL_ACCEPT_RTADV] = RMIB_INTPTR(RMIB_RW, &ifaddr_accept_rtadv,
|
||||
"accept_rtadv",
|
||||
"Accept router advertisements"),
|
||||
/*16*/ [IPV6CTL_DAD_COUNT] = RMIB_INT(RMIB_RO,
|
||||
LWIP_IPV6_DUP_DETECT_ATTEMPTS, "dad_count",
|
||||
"Number of Duplicate Address Detection "
|
||||
"probes to send"),
|
||||
/*24*/ [IPV6CTL_V6ONLY] = RMIB_INTPTR(RMIB_RW, &ipsock_v6only,
|
||||
"v6only", "Disallow PF_INET6 sockets from "
|
||||
"connecting to PF_INET sockets"),
|
||||
/*
|
||||
* The following setting is significantly
|
||||
* different from NetBSD, and therefore it has
|
||||
* a somewhat different description as well.
|
||||
*/
|
||||
/*35*/ [IPV6CTL_AUTO_LINKLOCAL]= RMIB_INTPTR(RMIB_RW, &ifaddr_auto_linklocal,
|
||||
"auto_linklocal", "Enable global support "
|
||||
"for adding IPv6link-local addresses to "
|
||||
"interfaces"),
|
||||
/*
|
||||
* Temporary addresses are managed entirely by
|
||||
* userland. We only maintain the settings.
|
||||
*/
|
||||
/*+0*/ [IPV6CTL_MAXID] = RMIB_INT(RMIB_RW, 0, "use_tempaddr",
|
||||
"Use temporary address"),
|
||||
/*+1*/ [IPV6CTL_MAXID + 1] = RMIB_INT(RMIB_RW, 86400, "temppltime",
|
||||
"Preferred lifetime of a temporary "
|
||||
"address"),
|
||||
/*+2*/ [IPV6CTL_MAXID + 2] = RMIB_INT(RMIB_RW, 604800, "tempvltime",
|
||||
"Valid lifetime of a temporary address"),
|
||||
};
|
||||
|
||||
static struct rmib_node net_inet6_ip6_node =
|
||||
RMIB_NODE(RMIB_RO, net_inet6_ip6_table, "ip6", "IPv6 related settings");
|
||||
|
||||
/*
|
||||
* Initialize the IP sockets module.
|
||||
*/
|
||||
void
|
||||
ipsock_init(void)
|
||||
{
|
||||
|
||||
/*
|
||||
* Register the net.inet.ip and net.inet6.ip6 subtrees. Unlike for the
|
||||
* specific protocols (TCP/UDP/RAW), here the IPv4 and IPv6 subtrees
|
||||
* are and must be separate, even though many settings are shared
|
||||
* between the two at the lwIP level. Ultimately we may have to split
|
||||
* the subtrees for the specific protocols, too, though..
|
||||
*/
|
||||
mibtree_register_inet(AF_INET, IPPROTO_IP, &net_inet_ip_node);
|
||||
mibtree_register_inet(AF_INET6, IPPROTO_IPV6, &net_inet6_ip6_node);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the lwIP IP address type (IPADDR_TYPE_) for the given IP socket.
|
||||
*/
|
||||
static int
|
||||
ipsock_get_type(struct ipsock * ip)
|
||||
{
|
||||
|
||||
if (!(ip->ip_flags & IPF_IPV6))
|
||||
return IPADDR_TYPE_V4;
|
||||
else if (ip->ip_flags & IPF_V6ONLY)
|
||||
return IPADDR_TYPE_V6;
|
||||
else
|
||||
return IPADDR_TYPE_ANY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create an IP socket, for the given (PF_/AF_) domain and initial send and
|
||||
* receive buffer sizes. Return the lwIP IP address type that should be used
|
||||
* to create the corresponding PCB. Return a pointer to the libsockevent
|
||||
* socket in 'sockp'. This function must not allocate any resources in any
|
||||
* form, as socket creation may still fail later, in which case no destruction
|
||||
* function is called.
|
||||
*/
|
||||
int
|
||||
ipsock_socket(struct ipsock * ip, int domain, size_t sndbuf, size_t rcvbuf,
|
||||
struct sock ** sockp)
|
||||
{
|
||||
|
||||
ip->ip_flags = (domain == AF_INET6) ? IPF_IPV6 : 0;
|
||||
|
||||
if (domain == AF_INET6 && ipsock_v6only)
|
||||
ip->ip_flags |= IPF_V6ONLY;
|
||||
|
||||
ip->ip_sndbuf = sndbuf;
|
||||
ip->ip_rcvbuf = rcvbuf;
|
||||
|
||||
/* Important: when adding settings here, also change ipsock_clone(). */
|
||||
|
||||
*sockp = &ip->ip_sock;
|
||||
|
||||
return ipsock_get_type(ip);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clone the given socket 'ip' into the new socket 'newip', using the socket
|
||||
* identifier 'newid'. In particular, tell libsockevent about the clone and
|
||||
* copy over any settings from 'ip' to 'newip' that can be inherited on a
|
||||
* socket. Cloning is used for new TCP connections arriving on listening TCP
|
||||
* sockets. This function must not fail.
|
||||
*/
|
||||
void
|
||||
ipsock_clone(struct ipsock * ip, struct ipsock * newip, sockid_t newid)
|
||||
{
|
||||
|
||||
sockevent_clone(&ip->ip_sock, &newip->ip_sock, newid);
|
||||
|
||||
/* Inherit all settings from the original socket. */
|
||||
newip->ip_flags = ip->ip_flags;
|
||||
newip->ip_sndbuf = ip->ip_sndbuf;
|
||||
newip->ip_rcvbuf = ip->ip_rcvbuf;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create an <any> address for the given socket, taking into account whether
|
||||
* the socket is IPv4, IPv6, or mixed. The generated address, stored in
|
||||
* 'ipaddr', will have the same type as returned from the ipsock_socket() call.
|
||||
*/
|
||||
void
|
||||
ipsock_get_any_addr(struct ipsock * ip, ip_addr_t * ipaddr)
|
||||
{
|
||||
|
||||
ip_addr_set_any(ipsock_is_ipv6(ip), ipaddr);
|
||||
|
||||
if (ipsock_is_ipv6(ip) && !ipsock_is_v6only(ip))
|
||||
IP_SET_TYPE(ipaddr, IPADDR_TYPE_ANY);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify whether the given (properly scoped) IP address is a valid source
|
||||
* address for the given IP socket. The 'allow_mcast' flag indicates whether
|
||||
* the source address is allowed to be a multicast address. Return OK on
|
||||
* success. If 'ifdevp' is not NULL, it is filled with either the interface
|
||||
* that owns the address, or NULL if the address is (while valid) not
|
||||
* associated with a particular interface. On failure, return a negative error
|
||||
* code. This function must be called, in one way or another, for every source
|
||||
* address used for binding or sending on a IP-layer socket.
|
||||
*/
|
||||
int
|
||||
ipsock_check_src_addr(struct ipsock * ip, ip_addr_t * ipaddr, int allow_mcast,
|
||||
struct ifdev ** ifdevp)
|
||||
{
|
||||
ip6_addr_t *ip6addr;
|
||||
struct ifdev *ifdev;
|
||||
uint32_t inaddr, zone;
|
||||
int is_mcast;
|
||||
|
||||
/*
|
||||
* TODO: for now, forbid binding to multicast addresses. Callers that
|
||||
* never allow multicast addresses anyway (e.g., IPV6_PKTINFO) should
|
||||
* do their own check for this; the one here may eventually be removed.
|
||||
*/
|
||||
is_mcast = ip_addr_ismulticast(ipaddr);
|
||||
|
||||
if (is_mcast && !allow_mcast)
|
||||
return EADDRNOTAVAIL;
|
||||
|
||||
if (IP_IS_V6(ipaddr)) {
|
||||
/*
|
||||
* The given address must not have a KAME-style embedded zone.
|
||||
* This check is already performed in addr_get_inet(), but we
|
||||
* have to replicate it here because not all source addresses
|
||||
* go through addr_get_inet().
|
||||
*/
|
||||
ip6addr = ip_2_ip6(ipaddr);
|
||||
|
||||
if (ip6_addr_has_scope(ip6addr, IP6_UNKNOWN) &&
|
||||
(ip6addr->addr[0] & PP_HTONL(0x0000ffffUL)))
|
||||
return EINVAL;
|
||||
|
||||
/*
|
||||
* lwIP does not support IPv4-mapped IPv6 addresses, so these
|
||||
* must be converted to plain IPv4 addresses instead. The IPv4
|
||||
* 'any' address is not supported in this form. In V6ONLY
|
||||
* mode, refuse connecting or sending to IPv4-mapped addresses
|
||||
* at all.
|
||||
*/
|
||||
if (ip6_addr_isipv4mappedipv6(ip6addr)) {
|
||||
if (ipsock_is_v6only(ip))
|
||||
return EINVAL;
|
||||
|
||||
inaddr = ip6addr->addr[3];
|
||||
|
||||
if (inaddr == PP_HTONL(INADDR_ANY))
|
||||
return EADDRNOTAVAIL;
|
||||
|
||||
ip_addr_set_ip4_u32(ipaddr, inaddr);
|
||||
}
|
||||
}
|
||||
|
||||
ifdev = NULL;
|
||||
|
||||
if (!ip_addr_isany(ipaddr)) {
|
||||
if (IP_IS_V6(ipaddr) &&
|
||||
ip6_addr_lacks_zone(ip_2_ip6(ipaddr), IP6_UNKNOWN))
|
||||
return EADDRNOTAVAIL;
|
||||
|
||||
/*
|
||||
* If the address is a unicast address, it must be assigned to
|
||||
* an interface. Otherwise, if it is a zoned multicast
|
||||
* address, the zone denotes the interface. For global
|
||||
* multicast addresses, we cannot determine an interface.
|
||||
*/
|
||||
if (!is_mcast) {
|
||||
if ((ifdev = ifaddr_map_by_addr(ipaddr)) == NULL)
|
||||
return EADDRNOTAVAIL;
|
||||
} else {
|
||||
/* Some multicast addresses are not acceptable. */
|
||||
if (!addr_is_valid_multicast(ipaddr))
|
||||
return EINVAL;
|
||||
|
||||
if (IP_IS_V6(ipaddr) &&
|
||||
ip6_addr_has_zone(ip_2_ip6(ipaddr))) {
|
||||
zone = ip6_addr_zone(ip_2_ip6(ipaddr));
|
||||
|
||||
if ((ifdev = ifdev_get_by_index(zone)) == NULL)
|
||||
return ENXIO;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ifdevp != NULL)
|
||||
*ifdevp = ifdev;
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve and validate a source address for use in a socket bind call on
|
||||
* socket 'ip'. The user-provided address is given as 'addr', with length
|
||||
* 'addr_len'. The socket's current local IP address and port are given as
|
||||
* 'local_ip' and 'local_port', respectively; for raw sockets, the given local
|
||||
* port number is always zero. The caller's endpoint is given as 'user_endpt',
|
||||
* used to make sure only root can bind to local port numbers. The boolean
|
||||
* 'allow_mcast' flag indicates whether the source address is allowed to be a
|
||||
* multicast address. On success, return OK with the source IP address stored
|
||||
* in 'src_addr' and, if 'src_port' is not NULL, the port number to bind to
|
||||
* stored in 'portp'. Otherwise, return a negative error code. This function
|
||||
* performs all the tasks necessary before the socket can be bound using a lwIP
|
||||
* call.
|
||||
*/
|
||||
int
|
||||
ipsock_get_src_addr(struct ipsock * ip, const struct sockaddr * addr,
|
||||
socklen_t addr_len, endpoint_t user_endpt, ip_addr_t * local_ip,
|
||||
uint16_t local_port, int allow_mcast, ip_addr_t * src_addr,
|
||||
uint16_t * src_port)
|
||||
{
|
||||
uint16_t port;
|
||||
int r;
|
||||
|
||||
/*
|
||||
* If the socket has been bound already, it cannot be bound again.
|
||||
* We check this by checking whether the current local port is non-
|
||||
* zero. This rule does not apply to raw sockets, but raw sockets have
|
||||
* no port numbers anyway, so this conveniently works out. However,
|
||||
* raw sockets may not be rebound after being connected, but that is
|
||||
* checked before we even get here.
|
||||
*/
|
||||
if (local_port != 0)
|
||||
return EINVAL;
|
||||
|
||||
/* Parse the user-provided address. */
|
||||
if ((r = addr_get_inet(addr, addr_len, ipsock_get_type(ip), src_addr,
|
||||
FALSE /*kame*/, &port)) != OK)
|
||||
return r;
|
||||
|
||||
/* Validate the user-provided address. */
|
||||
if ((r = ipsock_check_src_addr(ip, src_addr, allow_mcast,
|
||||
NULL /*ifdevp*/)) != OK)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* If we are interested in port numbers at all (for non-raw sockets,
|
||||
* meaning portp is not NULL), make sure that only the superuser can
|
||||
* bind to privileged port numbers. For raw sockets, only the
|
||||
* superuser can open a socket anyway, so we need no check here.
|
||||
*/
|
||||
if (src_port != NULL) {
|
||||
if (port != 0 && port < IPPORT_RESERVED &&
|
||||
!util_is_root(user_endpt))
|
||||
return EACCES;
|
||||
|
||||
*src_port = port;
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve and validate a destination address for use in a socket connect or
|
||||
* sendto call. The user-provided address is given as 'addr', with length
|
||||
* 'addr_len'. The socket's current local IP address is given as 'local_addr'.
|
||||
* On success, return OK with the destination IP address stored in 'dst_addr'
|
||||
* and, if 'dst_port' is not NULL, the port number to bind to stored in
|
||||
* 'dst_port'. Otherwise, return a negative error code. This function must be
|
||||
* called, in one way or another, for every destination address used for
|
||||
* connecting or sending on a IP-layer socket.
|
||||
*/
|
||||
int
|
||||
ipsock_get_dst_addr(struct ipsock * ip, const struct sockaddr * addr,
|
||||
socklen_t addr_len, const ip_addr_t * local_addr, ip_addr_t * dst_addr,
|
||||
uint16_t * dst_port)
|
||||
{
|
||||
uint16_t port;
|
||||
int r;
|
||||
|
||||
/* Parse the user-provided address. */
|
||||
if ((r = addr_get_inet(addr, addr_len, ipsock_get_type(ip), dst_addr,
|
||||
FALSE /*kame*/, &port)) != OK)
|
||||
return r;
|
||||
|
||||
/* Destination addresses are always specific. */
|
||||
if (IP_GET_TYPE(dst_addr) == IPADDR_TYPE_ANY)
|
||||
IP_SET_TYPE(dst_addr, IPADDR_TYPE_V6);
|
||||
|
||||
/*
|
||||
* lwIP does not support IPv4-mapped IPv6 addresses, so these must be
|
||||
* supported to plain IPv4 addresses instead. In V6ONLY mode, refuse
|
||||
* connecting or sending to IPv4-mapped addresses at all.
|
||||
*/
|
||||
if (IP_IS_V6(dst_addr) &&
|
||||
ip6_addr_isipv4mappedipv6(ip_2_ip6(dst_addr))) {
|
||||
if (ipsock_is_v6only(ip))
|
||||
return EINVAL;
|
||||
|
||||
ip_addr_set_ip4_u32(dst_addr, ip_2_ip6(dst_addr)->addr[3]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now make sure that the local and remote addresses are of the same
|
||||
* family. The local address may be of type IPADDR_TYPE_ANY, which is
|
||||
* allowed for both IPv4 and IPv6. Even for connectionless socket
|
||||
* types we must perform this check as part of connect calls (as well
|
||||
* as sendto calls!) because otherwise we will create problems for
|
||||
* sysctl based socket enumeration (i.e., netstat), which uses the
|
||||
* local IP address type to determine the socket family.
|
||||
*/
|
||||
if (IP_GET_TYPE(local_addr) != IPADDR_TYPE_ANY &&
|
||||
IP_IS_V6(local_addr) != IP_IS_V6(dst_addr))
|
||||
return EINVAL;
|
||||
|
||||
/*
|
||||
* TODO: on NetBSD, an 'any' destination address is replaced with a
|
||||
* local interface address.
|
||||
*/
|
||||
if (ip_addr_isany(dst_addr))
|
||||
return EHOSTUNREACH;
|
||||
|
||||
/*
|
||||
* If the address is a multicast address, the multicast address itself
|
||||
* must be valid.
|
||||
*/
|
||||
if (ip_addr_ismulticast(dst_addr) &&
|
||||
!addr_is_valid_multicast(dst_addr))
|
||||
return EINVAL;
|
||||
|
||||
/*
|
||||
* TODO: decide whether to add a zone to a scoped IPv6 address that
|
||||
* lacks a zone. For now, we let lwIP handle this, as lwIP itself
|
||||
* will always add the zone at some point. If anything changes there,
|
||||
* this would be the place to set the zone (using a route lookup).
|
||||
*/
|
||||
|
||||
/*
|
||||
* For now, we do not forbid or alter any other particular destination
|
||||
* addresses.
|
||||
*/
|
||||
|
||||
if (dst_port != NULL) {
|
||||
/*
|
||||
* Disallow connecting/sending to port zero. There is no error
|
||||
* code that applies well to this case, so we copy NetBSD's.
|
||||
*/
|
||||
if (port == 0)
|
||||
return EADDRNOTAVAIL;
|
||||
|
||||
*dst_port = port;
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Store the address 'ipaddr' associated with the socket 'ip' (for example, it
|
||||
* may be the local or remote IP address of the socket) as a sockaddr structure
|
||||
* in 'addr'. A port number is provided as 'port' (in host-byte order) if
|
||||
* relevant, and zero is passed in otherwise. This function MUST only be
|
||||
* called from contexts where 'addr' is a buffer provided by libsockevent or
|
||||
* libsockdriver, meaning that it is of size SOCKADDR_MAX. The value pointed
|
||||
* to by 'addr_len' is not expected to be initialized in calls to this function
|
||||
* (and will typically zero). On return, 'addr_len' is filled with the length
|
||||
* of the address generated in 'addr'. This function never fails.
|
||||
*/
|
||||
void
|
||||
ipsock_put_addr(struct ipsock * ip, struct sockaddr * addr,
|
||||
socklen_t * addr_len, ip_addr_t * ipaddr, uint16_t port)
|
||||
{
|
||||
ip_addr_t mappedaddr;
|
||||
|
||||
/*
|
||||
* If the socket is an AF_INET6-type socket, and the given address is
|
||||
* an IPv4-type address, store it as an IPv4-mapped IPv6 address.
|
||||
*/
|
||||
if (ipsock_is_ipv6(ip) && IP_IS_V4(ipaddr)) {
|
||||
addr_make_v4mapped_v6(&mappedaddr, ip_2_ip4(ipaddr));
|
||||
|
||||
ipaddr = &mappedaddr;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have good reasons to keep the sockdriver and sockevent APIs as
|
||||
* they are, namely, defaulting 'addr_len' to zero such that the caller
|
||||
* must provide a non-zero length (only) when returning a valid
|
||||
* address. The consequence here is that we have to know the size of
|
||||
* the provided buffer. For libsockevent callbacks, we are always
|
||||
* guaranteed to get a buffer of at least this size.
|
||||
*/
|
||||
*addr_len = SOCKADDR_MAX;
|
||||
|
||||
addr_put_inet(addr, addr_len, ipaddr, FALSE /*kame*/, port);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set socket options on an IP socket.
|
||||
*/
|
||||
int
|
||||
ipsock_setsockopt(struct ipsock * ip, int level, int name,
|
||||
const struct sockdriver_data * data, socklen_t len,
|
||||
struct ipopts * ipopts)
|
||||
{
|
||||
int r, val, allow;
|
||||
uint8_t type;
|
||||
|
||||
switch (level) {
|
||||
case SOL_SOCKET:
|
||||
switch (name) {
|
||||
case SO_SNDBUF:
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
if (val <= 0 || (size_t)val < ipopts->sndmin ||
|
||||
(size_t)val > ipopts->sndmax)
|
||||
return EINVAL;
|
||||
|
||||
ip->ip_sndbuf = val;
|
||||
|
||||
return OK;
|
||||
|
||||
case SO_RCVBUF:
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
if (val <= 0 || (size_t)val < ipopts->rcvmin ||
|
||||
(size_t)val > ipopts->rcvmax)
|
||||
return EINVAL;
|
||||
|
||||
ip->ip_rcvbuf = val;
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case IPPROTO_IP:
|
||||
if (ipsock_is_ipv6(ip))
|
||||
break;
|
||||
|
||||
switch (name) {
|
||||
case IP_TOS:
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
if (val < 0 || val > UINT8_MAX)
|
||||
return EINVAL;
|
||||
|
||||
*ipopts->tos = (uint8_t)val;
|
||||
|
||||
return OK;
|
||||
|
||||
case IP_TTL:
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
if (val < 0 || val > UINT8_MAX)
|
||||
return EINVAL;
|
||||
|
||||
*ipopts->ttl = (uint8_t)val;
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case IPPROTO_IPV6:
|
||||
if (!ipsock_is_ipv6(ip))
|
||||
break;
|
||||
|
||||
switch (name) {
|
||||
case IPV6_UNICAST_HOPS:
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
if (val < -1 || val > UINT8_MAX)
|
||||
return EINVAL;
|
||||
|
||||
if (val == -1)
|
||||
val = IP_DEFAULT_TTL;
|
||||
|
||||
*ipopts->ttl = val;
|
||||
|
||||
return OK;
|
||||
|
||||
case IPV6_TCLASS:
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
if (val < -1 || val > UINT8_MAX)
|
||||
return EINVAL;
|
||||
|
||||
if (val == -1)
|
||||
val = 0;
|
||||
|
||||
*ipopts->tos = val;
|
||||
|
||||
return OK;
|
||||
|
||||
case IPV6_V6ONLY:
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* If the socket has been bound to an actual address,
|
||||
* we still allow the option to be changed, but it no
|
||||
* longer has any effect.
|
||||
*/
|
||||
type = IP_GET_TYPE(ipopts->local_ip);
|
||||
allow = (type == IPADDR_TYPE_ANY ||
|
||||
(type == IPADDR_TYPE_V6 &&
|
||||
ip_addr_isany(ipopts->local_ip)));
|
||||
|
||||
if (val) {
|
||||
ip->ip_flags |= IPF_V6ONLY;
|
||||
|
||||
type = IPADDR_TYPE_V6;
|
||||
} else {
|
||||
ip->ip_flags &= ~IPF_V6ONLY;
|
||||
|
||||
type = IPADDR_TYPE_ANY;
|
||||
}
|
||||
|
||||
if (allow)
|
||||
IP_SET_TYPE(ipopts->local_ip, type);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return ENOPROTOOPT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve socket options on an IP socket.
|
||||
*/
|
||||
int
|
||||
ipsock_getsockopt(struct ipsock * ip, int level, int name,
|
||||
const struct sockdriver_data * data, socklen_t * len,
|
||||
struct ipopts * ipopts)
|
||||
{
|
||||
int val;
|
||||
|
||||
switch (level) {
|
||||
case SOL_SOCKET:
|
||||
switch (name) {
|
||||
case SO_SNDBUF:
|
||||
val = ip->ip_sndbuf;
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
|
||||
case SO_RCVBUF:
|
||||
val = ip->ip_rcvbuf;
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case IPPROTO_IP:
|
||||
if (ipsock_is_ipv6(ip))
|
||||
break;
|
||||
|
||||
switch (name) {
|
||||
case IP_TOS:
|
||||
val = (int)*ipopts->tos;
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
|
||||
case IP_TTL:
|
||||
val = (int)*ipopts->ttl;
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case IPPROTO_IPV6:
|
||||
if (!ipsock_is_ipv6(ip))
|
||||
break;
|
||||
|
||||
switch (name) {
|
||||
case IPV6_UNICAST_HOPS:
|
||||
val = *ipopts->ttl;
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
|
||||
case IPV6_TCLASS:
|
||||
val = *ipopts->tos;
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
|
||||
case IPV6_V6ONLY:
|
||||
val = !!(ip->ip_flags & IPF_V6ONLY);
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return ENOPROTOOPT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill the given kinfo_pcb sysctl(7) structure with IP-level information.
|
||||
*/
|
||||
void
|
||||
ipsock_get_info(struct kinfo_pcb * ki, const ip_addr_t * local_ip,
|
||||
uint16_t local_port, const ip_addr_t * remote_ip, uint16_t remote_port)
|
||||
{
|
||||
ip_addr_t ipaddr;
|
||||
socklen_t len;
|
||||
uint8_t type;
|
||||
|
||||
len = sizeof(ki->ki_spad); /* use this for the full size, not ki_src */
|
||||
|
||||
addr_put_inet(&ki->ki_src, &len, local_ip, TRUE /*kame*/, local_port);
|
||||
|
||||
/*
|
||||
* At this point, the local IP address type has already been used to
|
||||
* determine whether this is an IPv4 or IPv6 socket. While not ideal,
|
||||
* that is the best we can do: we cannot use IPv4-mapped IPv6 addresses
|
||||
* in lwIP PCBs, we cannot store the original type in those PCBs, and
|
||||
* we also cannot rely on the PCB having an associated ipsock object
|
||||
* anymore. We also cannot use the ipsock only when present: it could
|
||||
* make a TCP PCB "jump" from IPv6 to IPv4 in the netstat listing when
|
||||
* it goes into TIME_WAIT state, for example.
|
||||
*
|
||||
* So, use *only* the type of the local IP address to determine whether
|
||||
* this is an IPv4 or an IPv6 socket. At the same time, do *not* rely
|
||||
* on the remote IP address being IPv4 for a local IPv4 address; it may
|
||||
* be of type IPADDR_TYPE_V6 for an unconnected socket bound to an
|
||||
* IPv4-mapped IPv6 address. Pretty messy, but we're limited by what
|
||||
* lwIP offers here. Since it's just netstat, it need not be perfect.
|
||||
*/
|
||||
if ((type = IP_GET_TYPE(local_ip)) == IPADDR_TYPE_V4) {
|
||||
if (!ip_addr_isany(local_ip) || local_port != 0)
|
||||
ki->ki_prstate = INP_BOUND;
|
||||
|
||||
/*
|
||||
* Make sure the returned socket address types are consistent.
|
||||
* The only case where the remote IP address is not IPv4 here
|
||||
* is when it is not set yet, so there is no need to check
|
||||
* whether it is the 'any' address: it always is.
|
||||
*/
|
||||
if (IP_GET_TYPE(remote_ip) != IPADDR_TYPE_V4) {
|
||||
ip_addr_set_zero_ip4(&ipaddr);
|
||||
|
||||
remote_ip = &ipaddr;
|
||||
}
|
||||
} else {
|
||||
if (!ip_addr_isany(local_ip) || local_port != 0)
|
||||
ki->ki_prstate = IN6P_BOUND;
|
||||
if (type != IPADDR_TYPE_ANY)
|
||||
ki->ki_pflags |= IN6P_IPV6_V6ONLY;
|
||||
}
|
||||
|
||||
len = sizeof(ki->ki_dpad); /* use this for the full size, not ki_dst */
|
||||
|
||||
addr_put_inet(&ki->ki_dst, &len, remote_ip, TRUE /*kame*/,
|
||||
remote_port);
|
||||
|
||||
/* Check the type of the *local* IP address here. See above. */
|
||||
if (!ip_addr_isany(remote_ip) || remote_port != 0) {
|
||||
if (type == IPADDR_TYPE_V4)
|
||||
ki->ki_prstate = INP_CONNECTED;
|
||||
else
|
||||
ki->ki_prstate = IN6P_CONNECTED;
|
||||
}
|
||||
}
|
||||
95
minix/net/lwip/ipsock.h
Normal file
95
minix/net/lwip/ipsock.h
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
#ifndef MINIX_NET_LWIP_IPSOCK_H
|
||||
#define MINIX_NET_LWIP_IPSOCK_H
|
||||
|
||||
/* IP-level socket, shared by TCP, UDP, and RAW. */
|
||||
struct ipsock {
|
||||
struct sock ip_sock; /* socket object, MUST be first */
|
||||
unsigned int ip_flags; /* all socket flags */
|
||||
size_t ip_sndbuf; /* send buffer size */
|
||||
size_t ip_rcvbuf; /* receive buffer size */
|
||||
};
|
||||
|
||||
/*
|
||||
* Socket flags. In order to reduce memory consumption, all these flags are
|
||||
* stored in the same field (ipsock.ip_flags) and thus must not overlap between
|
||||
* the same users of the field, and that is why they are all here. For
|
||||
* example, UDPF/PKTF/IPF should all be unique, and TCPF/IPF should be unique,
|
||||
* but UDPF/PKTF may overlap with TCPF and UDPF may overlap with RAWF. In
|
||||
* practice, we have no UDPF or RAWF flags and plenty of space to make all
|
||||
* flags unique anyway.
|
||||
*/
|
||||
#define IPF_IPV6 0x0000001 /* socket is IPv6 */
|
||||
#define IPF_V6ONLY 0x0000002 /* socket is IPv6 only */
|
||||
|
||||
#define PKTF_RECVINFO 0x0000010 /* receive ancillary PKTINFO */
|
||||
#define PKTF_RECVTTL 0x0000020 /* receive ancillary TTL */
|
||||
#define PKTF_RECVTOS 0x0000040 /* receive ancillary TOS */
|
||||
#define PKTF_MCAWARE 0x0000080 /* owner is multicast aware */
|
||||
|
||||
#define TCPF_CONNECTING 0x0001000 /* attempting to connect */
|
||||
#define TCPF_SENT_FIN 0x0002000 /* send FIN when possible */
|
||||
#define TCPF_RCVD_FIN 0x0004000 /* received FIN from peer */
|
||||
#define TCPF_FULL 0x0008000 /* PCB send buffer is full */
|
||||
#define TCPF_OOM 0x0010000 /* memory allocation failed */
|
||||
|
||||
#define ipsock_get_sock(ip) (&(ip)->ip_sock)
|
||||
#define ipsock_is_ipv6(ip) ((ip)->ip_flags & IPF_IPV6)
|
||||
#define ipsock_is_v6only(ip) ((ip)->ip_flags & IPF_V6ONLY)
|
||||
#define ipsock_get_flags(ip) ((ip)->ip_flags)
|
||||
#define ipsock_get_flag(ip,fl) ((ip)->ip_flags & (fl))
|
||||
#define ipsock_set_flag(ip,fl) ((ip)->ip_flags |= (fl))
|
||||
#define ipsock_clear_flag(ip,fl) ((ip)->ip_flags &= ~(fl))
|
||||
#define ipsock_get_sndbuf(ip) ((ip)->ip_sndbuf)
|
||||
#define ipsock_get_rcvbuf(ip) ((ip)->ip_rcvbuf)
|
||||
|
||||
/*
|
||||
* IP-level option pointers. This is necessary because even though lwIP's
|
||||
* TCP, UDP, and RAW PCBs share the same initial fields, the C standard does
|
||||
* not permit generic access to such initial fields (due to both possible
|
||||
* padding differences and strict-aliasing rules). The fields in this
|
||||
* structure are therefore pointers to the initial fields of each of the PCB
|
||||
* structures. If lwIP ever groups its IP PCB fields into a single structure
|
||||
* and uses that structure as first field of each of the other PCBs, then we
|
||||
* will be able to replace this structure with a pointer to the IP PCB instead.
|
||||
* For convenience we also carry the send and receive buffer limits here.
|
||||
*/
|
||||
struct ipopts {
|
||||
ip_addr_t *local_ip;
|
||||
ip_addr_t *remote_ip;
|
||||
uint8_t *tos;
|
||||
uint8_t *ttl;
|
||||
size_t sndmin;
|
||||
size_t sndmax;
|
||||
size_t rcvmin;
|
||||
size_t rcvmax;
|
||||
};
|
||||
|
||||
struct ifdev;
|
||||
|
||||
void ipsock_init(void);
|
||||
int ipsock_socket(struct ipsock * ip, int domain, size_t sndbuf, size_t rcvbuf,
|
||||
struct sock ** sockp);
|
||||
void ipsock_clone(struct ipsock * ip, struct ipsock * newip, sockid_t newid);
|
||||
void ipsock_get_any_addr(struct ipsock * ip, ip_addr_t * ipaddr);
|
||||
int ipsock_check_src_addr(struct ipsock * ip, ip_addr_t * ipaddr,
|
||||
int allow_mcast, struct ifdev ** ifdevp);
|
||||
int ipsock_get_src_addr(struct ipsock * ip, const struct sockaddr * addr,
|
||||
socklen_t addr_len, endpoint_t user_endpt, ip_addr_t * local_ip,
|
||||
uint16_t local_port, int allow_mcast, ip_addr_t * ipaddr,
|
||||
uint16_t * portp);
|
||||
int ipsock_get_dst_addr(struct ipsock * ip, const struct sockaddr * addr,
|
||||
socklen_t addr_len, const ip_addr_t * local_addr, ip_addr_t * dst_addr,
|
||||
uint16_t * dst_port);
|
||||
void ipsock_put_addr(struct ipsock * ip, struct sockaddr * addr,
|
||||
socklen_t * addr_len, ip_addr_t * ipaddr, uint16_t port);
|
||||
int ipsock_setsockopt(struct ipsock * ip, int level, int name,
|
||||
const struct sockdriver_data * data, socklen_t len,
|
||||
struct ipopts * ipopts);
|
||||
int ipsock_getsockopt(struct ipsock * ip, int level, int name,
|
||||
const struct sockdriver_data * data, socklen_t * len,
|
||||
struct ipopts * ipopts);
|
||||
void ipsock_get_info(struct kinfo_pcb * ki, const ip_addr_t * local_ip,
|
||||
uint16_t local_port, const ip_addr_t * remote_ip,
|
||||
uint16_t remote_port);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_IPSOCK_H */
|
||||
584
minix/net/lwip/lldata.c
Normal file
584
minix/net/lwip/lldata.c
Normal file
|
|
@ -0,0 +1,584 @@
|
|||
/* LWIP service - lldata.c - link-layer (ARP, NDP) data related routines */
|
||||
/*
|
||||
* This module is largely isolated from the regular routing code. There are
|
||||
* two reasons for that. First, mixing link-layer routes with regular routes
|
||||
* would not work well due to the fact that lwIP keeps these data structures
|
||||
* entirely separate. Second, as of version 8, NetBSD keeps the IP-layer and
|
||||
* link-layer routing separate as well.
|
||||
*
|
||||
* Unfortunately, lwIP does not provide much in the way of implementing the
|
||||
* functionality that would be expected for this module. As such, the current
|
||||
* implementation is very restricted and simple.
|
||||
*
|
||||
* For ARP table entries, lwIP only allows for adding and deleting static
|
||||
* entries. Non-static entries cannot be deleted. Incomplete (pending)
|
||||
* entries cannot even be enumerated, nor can (e.g.) expiry information be
|
||||
* obtained. The lwIP ARP datastructures are completely hidden, so there is no
|
||||
* way to overcome these limitations without changing lwIP itself. As a
|
||||
* result, not all functionality of the arp(8) userland utility is supported.
|
||||
*
|
||||
* For NDP table entries, lwIP offers no API at all. However, since the data
|
||||
* structures are exposed directly, we can use those to implement full support
|
||||
* for exposing information in a read-only way. However, manipulating data
|
||||
* structures directly from here is too risky, nor does lwIP currently support
|
||||
* the concept of static NDP table entries. Therefore, adding, changing, and
|
||||
* deleting NDP entries is currently not supported, and will also first require
|
||||
* changes to lwIP itself.
|
||||
*
|
||||
* The ndp(8) userland utility is also able to show and manipulate various
|
||||
* other neighbor discovery related tables and settings. We support only a
|
||||
* small subset of them. The main reason for this is that the other tables,
|
||||
* in particular the prefix and default router lists, are not relevant: on
|
||||
* MINIX 3, these are always managed fully in userland (usually dhcpcd(8)), and
|
||||
* we even hardcode lwIP not to parse Router Advertisement messages at all, so
|
||||
* even though those tables are still part of lwIP, they are always empty.
|
||||
* Other ndp(8) functionality are unsupported for similar reasons.
|
||||
*/
|
||||
|
||||
#include "lwip.h"
|
||||
#include "lldata.h"
|
||||
#include "route.h"
|
||||
#include "rtsock.h"
|
||||
|
||||
#include "lwip/etharp.h"
|
||||
#include "lwip/nd6.h"
|
||||
#include "lwip/priv/nd6_priv.h" /* for neighbor_cache */
|
||||
|
||||
/*
|
||||
* Process a routing command specifically for an ARP table entry. Return OK if
|
||||
* the routing command has been processed successfully and a routing socket
|
||||
* reply message has already been generated. Return a negative error code on
|
||||
* failure, in which case the caller will generate a reply message instead.
|
||||
*/
|
||||
static int
|
||||
lldata_arp_process(unsigned int type, const ip_addr_t * dst_addr,
|
||||
const struct eth_addr * gw_addr, struct ifdev * ifdev,
|
||||
unsigned int flags, const struct rtsock_request * rtr)
|
||||
{
|
||||
const ip4_addr_t *ip4addr;
|
||||
struct eth_addr ethaddr, *ethptr;
|
||||
struct netif *netif;
|
||||
lldata_arp_num_t num;
|
||||
err_t err;
|
||||
|
||||
netif = (ifdev != NULL) ? ifdev_get_netif(ifdev) : NULL;
|
||||
|
||||
num = etharp_find_addr(netif, ip_2_ip4(dst_addr), ðptr, &ip4addr);
|
||||
|
||||
if (type != RTM_ADD && num < 0)
|
||||
return ESRCH;
|
||||
else if (type == RTM_ADD && num >= 0)
|
||||
return EEXIST;
|
||||
|
||||
switch (type) {
|
||||
case RTM_CHANGE:
|
||||
/*
|
||||
* This request is not used by arp(8), so keep things simple.
|
||||
* For RTM_ADD we support only static entries; we support only
|
||||
* those too here, and thus we can use delete-and-readd. If
|
||||
* the ethernet address is not being changed, try readding the
|
||||
* entry with the previous ethernet address.
|
||||
*/
|
||||
if (gw_addr == NULL)
|
||||
gw_addr = ethptr;
|
||||
|
||||
if (etharp_remove_static_entry(ip_2_ip4(dst_addr)) != ERR_OK)
|
||||
return EPERM;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case RTM_ADD:
|
||||
assert(gw_addr != NULL);
|
||||
|
||||
memcpy(ðaddr, gw_addr, sizeof(ethaddr));
|
||||
|
||||
/*
|
||||
* Adding static, permanent, unpublished, non-proxy entries is
|
||||
* all that lwIP supports right now. We also do not get to
|
||||
* specify the interface, and the way lwIP picks the interface
|
||||
* may in fact result in a different one.
|
||||
*/
|
||||
if ((err = etharp_add_static_entry(ip_2_ip4(dst_addr),
|
||||
ðaddr)) != ERR_OK)
|
||||
return util_convert_err(err);
|
||||
|
||||
if ((num = etharp_find_addr(NULL /*netif*/, ip_2_ip4(dst_addr),
|
||||
ðptr, &ip4addr)) < 0)
|
||||
panic("unable to find just-added static ARP entry");
|
||||
|
||||
/* FALLTHROUGH */
|
||||
case RTM_LOCK:
|
||||
case RTM_GET:
|
||||
rtsock_msg_arp(num, type, rtr);
|
||||
|
||||
return OK;
|
||||
|
||||
case RTM_DELETE:
|
||||
memcpy(ðaddr, ethptr, sizeof(ethaddr));
|
||||
|
||||
if (etharp_remove_static_entry(ip_2_ip4(dst_addr)) != ERR_OK)
|
||||
return EPERM;
|
||||
|
||||
/*
|
||||
* FIXME: the following block is a hack, because we cannot
|
||||
* predict whether the above removal will succeed, while at the
|
||||
* same time we need the entry to be present in order to report
|
||||
* the deleted address to the routing socket. We temporarily
|
||||
* readd and then remove the entry just for the purpose of
|
||||
* generating the routing socket reply. There are other ways
|
||||
* to resolve this, but only a better lwIP etharp API would
|
||||
* allow us to resolve this problem cleanly.
|
||||
*/
|
||||
(void)etharp_add_static_entry(ip_2_ip4(dst_addr), ðaddr);
|
||||
|
||||
num = etharp_find_addr(NULL /*netif*/, ip_2_ip4(dst_addr),
|
||||
ðptr, &ip4addr);
|
||||
assert(num >= 0);
|
||||
|
||||
rtsock_msg_arp(num, type, rtr);
|
||||
|
||||
(void)etharp_remove_static_entry(ip_2_ip4(dst_addr));
|
||||
|
||||
return OK;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Enumerate ARP table entries. Return TRUE if there is at least one more ARP
|
||||
* table entry, of which the number is stored in 'num'. The caller should set
|
||||
* 'num' to 0 initially, and increase it by one between a successful call and
|
||||
* the next call. Return FALSE if there are no more ARP table entries.
|
||||
*/
|
||||
int
|
||||
lldata_arp_enum(lldata_arp_num_t * num)
|
||||
{
|
||||
ip4_addr_t *ip4addr;
|
||||
struct netif *netif;
|
||||
struct eth_addr *ethaddr;
|
||||
|
||||
for (; *num < ARP_TABLE_SIZE; ++*num) {
|
||||
if (etharp_get_entry(*num, &ip4addr, &netif, ðaddr))
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain information about the ARP table entry identified by 'num'. The IPv4
|
||||
* address of the entry is stored in 'addr'. Its ethernet address is stored in
|
||||
* 'gateway'. The associated interface is stored in 'ifdevp', and the entry's
|
||||
* routing flags (RTF_) are stored in 'flagsp'.
|
||||
*/
|
||||
void
|
||||
lldata_arp_get(lldata_arp_num_t num, struct sockaddr_in * addr,
|
||||
struct sockaddr_dlx * gateway, struct ifdev ** ifdevp,
|
||||
unsigned int * flagsp)
|
||||
{
|
||||
ip_addr_t ipaddr;
|
||||
ip4_addr_t *ip4addr;
|
||||
struct netif *netif;
|
||||
struct ifdev *ifdev;
|
||||
struct eth_addr *ethaddr;
|
||||
socklen_t addr_len;
|
||||
|
||||
if (!etharp_get_entry(num, &ip4addr, &netif, ðaddr))
|
||||
panic("request for invalid ARP entry");
|
||||
|
||||
ip_addr_copy_from_ip4(ipaddr, *ip4addr);
|
||||
|
||||
assert(netif != NULL);
|
||||
ifdev = netif_get_ifdev(netif);
|
||||
|
||||
addr_len = sizeof(*addr);
|
||||
|
||||
addr_put_inet((struct sockaddr *)addr, &addr_len, &ipaddr,
|
||||
TRUE /*kame*/, 0 /*port*/);
|
||||
|
||||
addr_len = sizeof(*gateway);
|
||||
|
||||
addr_put_link((struct sockaddr *)gateway, &addr_len,
|
||||
ifdev_get_index(ifdev), ifdev_get_iftype(ifdev), NULL /*name*/,
|
||||
ethaddr->addr, sizeof(ethaddr->addr));
|
||||
|
||||
*ifdevp = ifdev;
|
||||
|
||||
/*
|
||||
* TODO: this is not necessarily accurate, but lwIP does not provide us
|
||||
* with information as to whether this is a static entry or not..
|
||||
*/
|
||||
*flagsp = RTF_HOST | RTF_LLINFO | RTF_LLDATA | RTF_STATIC | RTF_CLONED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain information about the ND6 neighbor cache entry 'i', which must be a
|
||||
* number between 0 (inclusive) and LWIP_ND6_NUM_NEIGHBORS (exclusive). If an
|
||||
* entry with this number exists, return a pointer to its IPv6 address, and
|
||||
* additional information in each of the given pointers if not NULL. The
|
||||
* associated interface is stored in 'netif'. If the entry has an associated
|
||||
* link-layer address, a pointer to it is stored in 'lladdr'. The entry's
|
||||
* state (ND6_{INCOMPLETE,REACHABLE,STALE,DELAY,PROBE}) is stored in 'state'.
|
||||
* The 'isrouter' parameter is filled with a boolean value indicating whether
|
||||
* the entry is for a router. For ND6_INCOMPLETE and ND6_PROBE, the number of
|
||||
* probes sent so far is stored in 'probes_sent'; for other states, the value
|
||||
* is set to zero. For ND6_REACHABLE and ND6_DELAY, the time until expiration
|
||||
* in ND6_TMR_INTERVAL-millisecond units is stored in 'expire_time'; for other
|
||||
* states, the value is set to zero. If an entry with number 'i' does not
|
||||
* exist, NULL is returned.
|
||||
*
|
||||
* TODO: upstream this function to lwIP.
|
||||
*/
|
||||
static const ip6_addr_t *
|
||||
nd6_get_neighbor_cache_entry(int8_t i, struct netif ** netif,
|
||||
const uint8_t ** lladdr, uint8_t * state, uint8_t * isrouter,
|
||||
uint32_t * probes_sent, uint32_t * expire_time)
|
||||
{
|
||||
|
||||
if (i < 0 || i >= LWIP_ND6_NUM_NEIGHBORS ||
|
||||
neighbor_cache[i].state == ND6_NO_ENTRY)
|
||||
return NULL;
|
||||
|
||||
if (netif != NULL)
|
||||
*netif = neighbor_cache[i].netif;
|
||||
|
||||
if (lladdr != NULL) {
|
||||
if (neighbor_cache[i].state != ND6_INCOMPLETE)
|
||||
*lladdr = neighbor_cache[i].lladdr;
|
||||
else
|
||||
*lladdr = NULL;
|
||||
}
|
||||
|
||||
if (state != NULL)
|
||||
*state = neighbor_cache[i].state;
|
||||
|
||||
if (isrouter != NULL)
|
||||
*isrouter = neighbor_cache[i].isrouter;
|
||||
|
||||
if (probes_sent != NULL) {
|
||||
if (neighbor_cache[i].state == ND6_INCOMPLETE ||
|
||||
neighbor_cache[i].state == ND6_PROBE)
|
||||
*probes_sent = neighbor_cache[i].counter.probes_sent;
|
||||
else
|
||||
*probes_sent = 0;
|
||||
}
|
||||
|
||||
if (expire_time != NULL) {
|
||||
switch (neighbor_cache[i].state) {
|
||||
case ND6_REACHABLE:
|
||||
*expire_time =
|
||||
neighbor_cache[i].counter.reachable_time /
|
||||
ND6_TMR_INTERVAL;
|
||||
break;
|
||||
case ND6_DELAY:
|
||||
*expire_time = neighbor_cache[i].counter.delay_time;
|
||||
break;
|
||||
case ND6_INCOMPLETE:
|
||||
case ND6_PROBE:
|
||||
/* Probes are sent once per timer tick. */
|
||||
*expire_time = (LWIP_ND6_MAX_MULTICAST_SOLICIT + 1 -
|
||||
neighbor_cache[i].counter.probes_sent) *
|
||||
(ND6_TMR_INTERVAL / 1000);
|
||||
break;
|
||||
default:
|
||||
/* Stale entries do not expire; they get replaced. */
|
||||
*expire_time = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return &neighbor_cache[i].next_hop_address;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a neighbor cache entry by IPv6 address. Return its index number if
|
||||
* found, or -1 if not. This is a reimplementation of the exact same function
|
||||
* internal to lwIP.
|
||||
*
|
||||
* TODO: make this function public in lwIP.
|
||||
*/
|
||||
static int8_t
|
||||
nd6_find_neighbor_cache_entry(const ip6_addr_t * addr)
|
||||
{
|
||||
int8_t i;
|
||||
|
||||
for (i = 0; i < LWIP_ND6_NUM_NEIGHBORS; i++) {
|
||||
if (ip6_addr_cmp(addr, &neighbor_cache[i].next_hop_address))
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find an NDP table entry based on the given interface and IPv6 address. On
|
||||
* success, return OK, with the entry's index number stored in 'nump'. On
|
||||
* failure, return an appropriate error code.
|
||||
*/
|
||||
int
|
||||
lldata_ndp_find(struct ifdev * ifdev, const struct sockaddr_in6 * addr,
|
||||
lldata_ndp_num_t * nump)
|
||||
{
|
||||
ip_addr_t ipaddr;
|
||||
int8_t i;
|
||||
int r;
|
||||
|
||||
if ((r = addr_get_inet((const struct sockaddr *)addr, sizeof(*addr),
|
||||
IPADDR_TYPE_V6, &ipaddr, TRUE /*kame*/, NULL /*port*/)) != OK)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* For given link-local addresses, no zone may be provided in the
|
||||
* address at all. In such cases, add the zone ourselves, using the
|
||||
* given interface.
|
||||
*/
|
||||
if (ip6_addr_lacks_zone(ip_2_ip6(&ipaddr), IP6_UNKNOWN))
|
||||
ip6_addr_assign_zone(ip_2_ip6(&ipaddr), IP6_UNKNOWN,
|
||||
ifdev_get_netif(ifdev));
|
||||
|
||||
i = nd6_find_neighbor_cache_entry(ip_2_ip6(&ipaddr));
|
||||
if (i < 0)
|
||||
return ESRCH;
|
||||
|
||||
/*
|
||||
* We should compare the neighbor cache entry's associated netif to
|
||||
* the given ifdev, but since the lwIP neighbor cache is currently not
|
||||
* keyed by netif anyway (i.e. the internal lookups are purely by IPv6
|
||||
* address as well), doing so makes little sense in practice.
|
||||
*/
|
||||
|
||||
*nump = (lldata_ndp_num_t)i;
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process a routing command specifically for an NDP table entry. Return OK if
|
||||
* the routing command has been processed successfully and a routing socket
|
||||
* reply message has already been generated. Return a negative error code on
|
||||
* failure, in which case the caller will generate a reply message instead.
|
||||
*/
|
||||
static int
|
||||
lldata_ndp_process(unsigned int type, const ip_addr_t * dst_addr,
|
||||
const struct eth_addr * gw_addr,
|
||||
struct ifdev * ifdev, unsigned int flags,
|
||||
const struct rtsock_request * rtr)
|
||||
{
|
||||
lldata_ndp_num_t num;
|
||||
|
||||
num = (lldata_ndp_num_t)
|
||||
nd6_find_neighbor_cache_entry(ip_2_ip6(dst_addr));
|
||||
|
||||
if (type != RTM_ADD && num < 0)
|
||||
return ESRCH;
|
||||
else if (type == RTM_ADD && num >= 0)
|
||||
return EEXIST;
|
||||
|
||||
switch (type) {
|
||||
case RTM_LOCK:
|
||||
case RTM_GET:
|
||||
rtsock_msg_arp(num, type, rtr);
|
||||
|
||||
return OK;
|
||||
|
||||
case RTM_ADD:
|
||||
case RTM_CHANGE:
|
||||
case RTM_DELETE:
|
||||
/* TODO: add lwIP support to implement these commands. */
|
||||
return ENOSYS;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Enumerate NDP table entries. Return TRUE if there is at least one more NDP
|
||||
* table entry, of which the number is stored in 'num'. The caller should set
|
||||
* 'num' to 0 initially, and increase it by one between a successful call and
|
||||
* the next call. Return FALSE if there are no more NDP table entries.
|
||||
*/
|
||||
int
|
||||
lldata_ndp_enum(lldata_ndp_num_t * num)
|
||||
{
|
||||
|
||||
for (; *num < LWIP_ND6_NUM_NEIGHBORS; ++*num) {
|
||||
if (nd6_get_neighbor_cache_entry(*num, NULL /*netif*/,
|
||||
NULL /*lladdr*/, NULL /*state*/, NULL /*isrouter*/,
|
||||
NULL /*probes_sent*/, NULL /*expire_time*/) != NULL)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain information about the NDP table entry identified by 'num'. The IPv6
|
||||
* address of the entry is stored in 'addr'. Its ethernet address is stored in
|
||||
* 'gateway'. The associated interface is stored in 'ifdevp', and the entry's
|
||||
* routing flags (RTF_) are stored in 'flagsp'.
|
||||
*/
|
||||
void
|
||||
lldata_ndp_get(lldata_ndp_num_t num, struct sockaddr_in6 * addr,
|
||||
struct sockaddr_dlx * gateway, struct ifdev ** ifdevp,
|
||||
unsigned int * flagsp)
|
||||
{
|
||||
const ip6_addr_t *ip6addr;
|
||||
ip_addr_t ipaddr;
|
||||
struct netif *netif;
|
||||
struct ifdev *ifdev;
|
||||
const uint8_t *lladdr;
|
||||
socklen_t addr_len;
|
||||
|
||||
ip6addr = nd6_get_neighbor_cache_entry(num, &netif, &lladdr,
|
||||
NULL /*state*/, NULL /*isrouter*/, NULL /*probes_sent*/,
|
||||
NULL /*expire_time*/);
|
||||
assert(ip6addr != NULL);
|
||||
|
||||
ip_addr_copy_from_ip6(ipaddr, *ip6addr);
|
||||
|
||||
ifdev = netif_get_ifdev(netif);
|
||||
assert(ifdev != NULL);
|
||||
|
||||
addr_len = sizeof(*addr);
|
||||
|
||||
addr_put_inet((struct sockaddr *)addr, &addr_len, &ipaddr,
|
||||
TRUE /*kame*/, 0 /*port*/);
|
||||
|
||||
addr_len = sizeof(*gateway);
|
||||
|
||||
addr_put_link((struct sockaddr *)gateway, &addr_len,
|
||||
ifdev_get_index(ifdev), ifdev_get_iftype(ifdev), NULL /*name*/,
|
||||
lladdr, ifdev_get_hwlen(ifdev));
|
||||
|
||||
*ifdevp = ifdev;
|
||||
*flagsp = RTF_HOST | RTF_LLINFO | RTF_LLDATA | RTF_CLONED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain information about the NDP table entry with the number 'num', which
|
||||
* must be obtained through a previous call to lldata_ndp_find(). On return,
|
||||
* 'asked' is filled with the number of probes sent so far (0 if inapplicable),
|
||||
* 'isrouter' is set to 1 or 0 depending on whether the entry is for a router,
|
||||
* 'state' is set to the entry's state (ND6_LLINFO_), and 'expire' is set to
|
||||
* either the UNIX timestamp of expiry for the entry; 0 for permanent entries.
|
||||
* None of the given pointers must be NULL. This function always succeeds.
|
||||
*/
|
||||
void
|
||||
lldata_ndp_get_info(lldata_ndp_num_t num, long * asked, int * isrouter,
|
||||
int * state, int * expire)
|
||||
{
|
||||
uint32_t nd6_probes_sent = 0 /*gcc*/, nd6_expire_time = 0 /*gcc*/;
|
||||
uint8_t nd6_state = 0 /*gcc*/, nd6_isrouter = 0 /*gcc*/;
|
||||
|
||||
(void)nd6_get_neighbor_cache_entry(num, NULL /*netif*/,
|
||||
NULL /*lladdr*/, &nd6_state, &nd6_isrouter, &nd6_probes_sent,
|
||||
&nd6_expire_time);
|
||||
|
||||
*asked = (long)nd6_probes_sent;
|
||||
|
||||
*isrouter = !!nd6_isrouter;
|
||||
|
||||
switch (nd6_state) {
|
||||
case ND6_INCOMPLETE: *state = ND6_LLINFO_INCOMPLETE; break;
|
||||
case ND6_REACHABLE: *state = ND6_LLINFO_REACHABLE; break;
|
||||
case ND6_STALE: *state = ND6_LLINFO_STALE; break;
|
||||
case ND6_DELAY: *state = ND6_LLINFO_DELAY; break;
|
||||
case ND6_PROBE: *state = ND6_LLINFO_PROBE; break;
|
||||
default: panic("unknown ND6 state %u", nd6_state);
|
||||
}
|
||||
|
||||
if (nd6_expire_time != 0)
|
||||
*expire = clock_time(NULL) +
|
||||
(int)nd6_expire_time * (ND6_TMR_INTERVAL / 1000);
|
||||
else
|
||||
*expire = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process a routing command specifically for a link-layer route, as one of the
|
||||
* specific continuations of processing started by route_process(). The RTM_
|
||||
* routing command is given as 'type'. The route destination is given as
|
||||
* 'dst_addr'; its address type determines whether the operation is for ARP or
|
||||
* NDP. The sockaddr structure for 'gateway' is passed on as is and may have
|
||||
* to be parsed here if not NULL. 'ifdev' is the interface to be associated
|
||||
* with the route; it is non-NULL only if an interface name (IFP) or address
|
||||
* (IFA) was given. The RTF_ flags field has been checked against the globally
|
||||
* supported flags, but may have to be checked for flags that do not apply to
|
||||
* ARP/NDP routes. Return OK or a negative error code, following the same
|
||||
* semantics as route_process().
|
||||
*/
|
||||
int
|
||||
lldata_process(unsigned int type, const ip_addr_t * dst_addr,
|
||||
const struct sockaddr * gateway, struct ifdev * ifdev,
|
||||
unsigned int flags, const struct rtsock_request * rtr)
|
||||
{
|
||||
const struct route_entry *route;
|
||||
struct eth_addr ethaddr, *gw_addr;
|
||||
int r;
|
||||
|
||||
assert(flags & RTF_LLDATA);
|
||||
|
||||
/*
|
||||
* It seems that RTF_UP does not apply to link-layer routing entries.
|
||||
* We basically accept any flags that we can return, but we do not
|
||||
* actually check most of them anywhere.
|
||||
*/
|
||||
if ((flags & ~(RTF_HOST | RTF_LLINFO | RTF_LLDATA | RTF_STATIC |
|
||||
RTF_CLONED | RTF_ANNOUNCE)) != 0)
|
||||
return EINVAL;
|
||||
|
||||
gw_addr = NULL;
|
||||
|
||||
if (type == RTM_ADD || type == RTM_CHANGE) {
|
||||
/*
|
||||
* Link-layer entries are always host entries. Not all
|
||||
* requests pass in this flag though, so check only when the
|
||||
* flags are supposed to be set.
|
||||
*/
|
||||
if ((type == RTM_ADD || type == RTM_CHANGE) &&
|
||||
!(flags & RTF_HOST))
|
||||
return EINVAL;
|
||||
|
||||
/* lwIP does not support publishing custom entries. */
|
||||
if (flags & RTF_ANNOUNCE)
|
||||
return ENOSYS;
|
||||
|
||||
/* RTF_GATEWAY is always cleared for link-layer entries. */
|
||||
if (gateway != NULL) {
|
||||
if ((r = addr_get_link(gateway, gateway->sa_len,
|
||||
NULL /*name*/, 0 /*name_max*/, ethaddr.addr,
|
||||
sizeof(ethaddr.addr))) != OK)
|
||||
return r;
|
||||
|
||||
gw_addr = ðaddr;
|
||||
}
|
||||
|
||||
if (type == RTM_ADD) {
|
||||
if (gateway == NULL)
|
||||
return EINVAL;
|
||||
|
||||
/*
|
||||
* If no interface has been specified, see if the
|
||||
* destination address is on a locally connected
|
||||
* network. If so, use that network's interface.
|
||||
* Otherwise reject the request altogether: we must
|
||||
* have an interface to which to associate the entry.
|
||||
*/
|
||||
if (ifdev == NULL) {
|
||||
if ((route = route_lookup(dst_addr)) != NULL &&
|
||||
!(route_get_flags(route) & RTF_GATEWAY))
|
||||
ifdev = route_get_ifdev(route);
|
||||
else
|
||||
return ENETUNREACH;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IP_IS_V4(dst_addr))
|
||||
return lldata_arp_process(type, dst_addr, gw_addr, ifdev,
|
||||
flags, rtr);
|
||||
else
|
||||
return lldata_ndp_process(type, dst_addr, gw_addr, ifdev,
|
||||
flags, rtr);
|
||||
}
|
||||
27
minix/net/lwip/lldata.h
Normal file
27
minix/net/lwip/lldata.h
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef MINIX_NET_LWIP_LLDATA_H
|
||||
#define MINIX_NET_LWIP_LLDATA_H
|
||||
|
||||
struct rtsock_request;
|
||||
|
||||
typedef int lldata_arp_num_t; /* ARP table entry number */
|
||||
typedef int lldata_ndp_num_t; /* NDP table entry number */
|
||||
|
||||
int lldata_arp_enum(lldata_arp_num_t * num);
|
||||
void lldata_arp_get(lldata_arp_num_t num, struct sockaddr_in * addr,
|
||||
struct sockaddr_dlx * gateway, struct ifdev ** ifdevp,
|
||||
unsigned int * flagsp);
|
||||
|
||||
int lldata_ndp_find(struct ifdev * ifdev,
|
||||
const struct sockaddr_in6 * addr, lldata_ndp_num_t * nump);
|
||||
int lldata_ndp_enum(lldata_ndp_num_t * num);
|
||||
void lldata_ndp_get(lldata_ndp_num_t num, struct sockaddr_in6 * addr,
|
||||
struct sockaddr_dlx * gateway, struct ifdev ** ifdevp,
|
||||
unsigned int * flagsp);
|
||||
void lldata_ndp_get_info(lldata_ndp_num_t num, long * asked, int * isrouter,
|
||||
int * state, int * expire);
|
||||
|
||||
int lldata_process(unsigned int type, const ip_addr_t * dst_addr,
|
||||
const struct sockaddr * gateway, struct ifdev * ifdev,
|
||||
unsigned int flags, const struct rtsock_request * rtr);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_LLDATA_H */
|
||||
77
minix/net/lwip/lnksock.c
Normal file
77
minix/net/lwip/lnksock.c
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
/* LWIP service - lnksock.c - link sockets */
|
||||
/*
|
||||
* This module contains absolutely minimal support for AF_LINK type sockets,
|
||||
* because for now we need them only to support a specific set of IOCTLs, as
|
||||
* required by for example ifconfig(8).
|
||||
*/
|
||||
|
||||
#include "lwip.h"
|
||||
|
||||
/* The number of link sockets. */
|
||||
#define NR_LNKSOCK 4
|
||||
|
||||
static struct lnksock {
|
||||
struct sock lnk_sock; /* socket object, MUST be first */
|
||||
SIMPLEQ_ENTRY(lnksock) lnk_next; /* next in free list */
|
||||
} lnk_array[NR_LNKSOCK];
|
||||
|
||||
static SIMPLEQ_HEAD(, lnksock) lnk_freelist; /* list of free link sockets */
|
||||
|
||||
static const struct sockevent_ops lnksock_ops;
|
||||
|
||||
/*
|
||||
* Initialize the link sockets module.
|
||||
*/
|
||||
void
|
||||
lnksock_init(void)
|
||||
{
|
||||
unsigned int slot;
|
||||
|
||||
/* Initialize the list of free link sockets. */
|
||||
SIMPLEQ_INIT(&lnk_freelist);
|
||||
|
||||
for (slot = 0; slot < __arraycount(lnk_array); slot++)
|
||||
SIMPLEQ_INSERT_TAIL(&lnk_freelist, &lnk_array[slot], lnk_next);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a link socket.
|
||||
*/
|
||||
sockid_t
|
||||
lnksock_socket(int type, int protocol, struct sock ** sockp,
|
||||
const struct sockevent_ops ** ops)
|
||||
{
|
||||
struct lnksock *lnk;
|
||||
|
||||
if (type != SOCK_DGRAM)
|
||||
return EPROTOTYPE;
|
||||
|
||||
if (protocol != 0)
|
||||
return EPROTONOSUPPORT;
|
||||
|
||||
if (SIMPLEQ_EMPTY(&lnk_freelist))
|
||||
return ENOBUFS;
|
||||
|
||||
lnk = SIMPLEQ_FIRST(&lnk_freelist);
|
||||
SIMPLEQ_REMOVE_HEAD(&lnk_freelist, lnk_next);
|
||||
|
||||
*sockp = &lnk->lnk_sock;
|
||||
*ops = &lnksock_ops;
|
||||
return SOCKID_LNK | (sockid_t)(lnk - lnk_array);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free up a closed link socket.
|
||||
*/
|
||||
static void
|
||||
lnksock_free(struct sock * sock)
|
||||
{
|
||||
struct lnksock *lnk = (struct lnksock *)sock;
|
||||
|
||||
SIMPLEQ_INSERT_HEAD(&lnk_freelist, lnk, lnk_next);
|
||||
}
|
||||
|
||||
static const struct sockevent_ops lnksock_ops = {
|
||||
.sop_ioctl = ifconf_ioctl,
|
||||
.sop_free = lnksock_free
|
||||
};
|
||||
420
minix/net/lwip/loopif.c
Normal file
420
minix/net/lwip/loopif.c
Normal file
|
|
@ -0,0 +1,420 @@
|
|||
/* LWIP service - loopif.c - loopback interfaces */
|
||||
/*
|
||||
* There is always at least one loopback device. This device is used also to
|
||||
* loop back packets sent on other interfaces to the local interface address.
|
||||
* Therefore, not all packets on the loopback device have a source or
|
||||
* destination address corresponding to the loopback device.
|
||||
*/
|
||||
|
||||
#include "lwip.h"
|
||||
|
||||
/*
|
||||
* As a safety measure, if lwIP somehow gets stuck in a loop replying to its
|
||||
* own packets on a loopback interface, stop with immediately feeding packets
|
||||
* back into lwIP after this many packets. The remaining packets will still be
|
||||
* delivered, but not before the main message loop has had a chance to run.
|
||||
*/
|
||||
#define LOOPIF_LIMIT 65536
|
||||
|
||||
/*
|
||||
* The MTU is restricted to 65531 bytes, because we need space for a 4-byte
|
||||
* header to identify the original interface of the packet.
|
||||
*/
|
||||
#define LOOPIF_MAX_MTU (UINT16_MAX - sizeof(uint32_t)) /* maximum MTU */
|
||||
#define LOOPIF_DEF_MTU LOOPIF_MAX_MTU /* default MTU */
|
||||
|
||||
#define NR_LOOPIF 2 /* number of loopback devices */
|
||||
|
||||
struct loopif {
|
||||
struct ifdev loopif_ifdev; /* interface device, MUST be first */
|
||||
struct pbuf *loopif_head; /* head of pending loopback packets */
|
||||
struct pbuf **loopif_tailp; /* tail ptr-ptr of pending packets */
|
||||
TAILQ_ENTRY(loopif) loopif_next; /* next in free list */
|
||||
} loopif_array[NR_LOOPIF];
|
||||
|
||||
static TAILQ_HEAD(, loopif) loopif_freelist; /* free loop interfaces list */
|
||||
static TAILQ_HEAD(, loopif) loopif_activelist; /* active loop interfaces */
|
||||
|
||||
#define loopif_get_netif(loopif) (ifdev_get_netif(&(loopif)->loopif_ifdev))
|
||||
|
||||
static unsigned int loopif_cksum_flags;
|
||||
|
||||
static int loopif_create(const char *name);
|
||||
|
||||
static const struct ifdev_ops loopif_ops;
|
||||
|
||||
/*
|
||||
* Initialize the loopback interface module.
|
||||
*/
|
||||
void
|
||||
loopif_init(void)
|
||||
{
|
||||
unsigned int slot;
|
||||
|
||||
/* Initialize the lists of loopback interfaces. */
|
||||
TAILQ_INIT(&loopif_freelist);
|
||||
TAILQ_INIT(&loopif_activelist);
|
||||
|
||||
for (slot = 0; slot < __arraycount(loopif_array); slot++)
|
||||
TAILQ_INSERT_TAIL(&loopif_freelist, &loopif_array[slot],
|
||||
loopif_next);
|
||||
|
||||
/*
|
||||
* The default is to perform no checksumming on loopback interfaces,
|
||||
* except for ICMP messages because otherwise we would need additional
|
||||
* changes in the code receiving those. In fact, for future
|
||||
* compatibility, disable only those flags that we manage ourselves.
|
||||
*/
|
||||
loopif_cksum_flags = NETIF_CHECKSUM_ENABLE_ALL &
|
||||
~(NETIF_CHECKSUM_GEN_IP | NETIF_CHECKSUM_CHECK_IP |
|
||||
NETIF_CHECKSUM_GEN_UDP | NETIF_CHECKSUM_CHECK_UDP |
|
||||
NETIF_CHECKSUM_GEN_TCP | NETIF_CHECKSUM_CHECK_TCP);
|
||||
|
||||
/* Tell the ifdev module that users may create more loopif devices. */
|
||||
ifdev_register("lo", loopif_create);
|
||||
}
|
||||
|
||||
/*
|
||||
* Polling function, invoked after each message loop iteration. Forward any
|
||||
* packets received on the output side of the loopback device during this
|
||||
* loop iteration, to the input side of the device.
|
||||
*/
|
||||
static void
|
||||
loopif_poll(struct ifdev * ifdev)
|
||||
{
|
||||
struct loopif *loopif = (struct loopif *)ifdev;
|
||||
struct pbuf *pbuf, **pnext;
|
||||
struct ifdev *oifdev;
|
||||
struct netif *netif;
|
||||
uint32_t oifindex;
|
||||
unsigned int count;
|
||||
static int warned = FALSE;
|
||||
|
||||
count = 0;
|
||||
|
||||
while ((pbuf = loopif->loopif_head) != NULL) {
|
||||
/*
|
||||
* Prevent endless loops. Keep in mind that packets may be
|
||||
* added to the queue as part of processing packets from the
|
||||
* queue here, so the queue itself will never reach this
|
||||
* length. As such the limit can (and must) be fairly high.
|
||||
*
|
||||
* In any case, if this warning is shown, that basically means
|
||||
* that a bug in lwIP has been triggered. There should be no
|
||||
* such bugs, so if there are, they should be fixed in lwIP.
|
||||
*/
|
||||
if (count++ == LOOPIF_LIMIT) {
|
||||
if (!warned) {
|
||||
printf("LWIP: excess loopback traffic, "
|
||||
"throttling output\n");
|
||||
warned = TRUE;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
pnext = pchain_end(pbuf);
|
||||
|
||||
if ((loopif->loopif_head = *pnext) == NULL)
|
||||
loopif->loopif_tailp = &loopif->loopif_head;
|
||||
*pnext = NULL;
|
||||
|
||||
/*
|
||||
* Get the original interface for the packet, which if non-zero
|
||||
* must also be used to pass the packet back to. The interface
|
||||
* should still exist in all cases, but better safe than sorry.
|
||||
*/
|
||||
memcpy(&oifindex, pbuf->payload, sizeof(oifindex));
|
||||
|
||||
util_pbuf_header(pbuf, -(int)sizeof(oifindex));
|
||||
|
||||
if (oifindex != 0 &&
|
||||
(oifdev = ifdev_get_by_index(oifindex)) != NULL)
|
||||
netif = ifdev_get_netif(oifdev);
|
||||
else
|
||||
netif = NULL;
|
||||
|
||||
/*
|
||||
* Loopback devices hand packets to BPF on output only. Doing
|
||||
* so on input as well would duplicate all captured packets.
|
||||
*/
|
||||
ifdev_input(ifdev, pbuf, netif, FALSE /*to_bpf*/);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Process a packet as output on a loopback interface. Packets cannot be
|
||||
* passed back into lwIP right away, nor can the original packets be passed
|
||||
* back into lwIP. Therefore, make a copy of the packet, and pass it back to
|
||||
* lwIP at the end of the current message loop iteration.
|
||||
*/
|
||||
static err_t
|
||||
loopif_output(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif)
|
||||
{
|
||||
struct loopif *loopif = (struct loopif *)ifdev;
|
||||
struct ifdev *oifdev;
|
||||
struct pbuf *pcopy;
|
||||
uint32_t oifindex;
|
||||
|
||||
/* Reject oversized packets immediately. This should not happen. */
|
||||
if (pbuf->tot_len > UINT16_MAX - sizeof(oifindex)) {
|
||||
printf("LWIP: attempt to send oversized loopback packet\n");
|
||||
|
||||
return ERR_MEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the service is low on memory, this is a likely place where
|
||||
* allocation failures will occur. Thus, do not print anything here.
|
||||
* The user can diagnose such problems with interface statistics.
|
||||
*/
|
||||
pcopy = pchain_alloc(PBUF_RAW, sizeof(oifindex) + pbuf->tot_len);
|
||||
if (pcopy == NULL) {
|
||||
ifdev_output_drop(ifdev);
|
||||
|
||||
return ERR_MEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the packet was purposely diverted from a non-loopback interface
|
||||
* to this interface, we have to remember the original interface, so
|
||||
* that we can pass back the packet to that interface as well. If we
|
||||
* don't, packets to link-local addresses assigned to non-loopback
|
||||
* interfaces will not be processed correctly.
|
||||
*/
|
||||
if (netif != NULL) {
|
||||
oifdev = netif_get_ifdev(netif);
|
||||
oifindex = ifdev_get_index(oifdev);
|
||||
} else
|
||||
oifindex = 0;
|
||||
|
||||
assert(pcopy->len >= sizeof(oifindex));
|
||||
|
||||
memcpy(pcopy->payload, &oifindex, sizeof(oifindex));
|
||||
|
||||
util_pbuf_header(pcopy, -(int)sizeof(oifindex));
|
||||
|
||||
if (pbuf_copy(pcopy, pbuf) != ERR_OK)
|
||||
panic("unexpected pbuf copy failure");
|
||||
|
||||
pcopy->flags |= pbuf->flags & (PBUF_FLAG_LLMCAST | PBUF_FLAG_LLBCAST);
|
||||
|
||||
util_pbuf_header(pcopy, sizeof(oifindex));
|
||||
|
||||
*loopif->loopif_tailp = pcopy;
|
||||
loopif->loopif_tailp = pchain_end(pcopy);
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialization function for a loopback-type netif interface, called from
|
||||
* lwIP at interface creation time.
|
||||
*/
|
||||
static err_t
|
||||
loopif_init_netif(struct ifdev * ifdev, struct netif * netif)
|
||||
{
|
||||
|
||||
netif->name[0] = 'l';
|
||||
netif->name[1] = 'o';
|
||||
|
||||
/*
|
||||
* FIXME: unfortunately, lwIP does not allow one to enable multicast on
|
||||
* an interface without also enabling multicast management traffic
|
||||
* (that is, IGMP and MLD). Thus, for now, joining multicast groups
|
||||
* and assigning local IPv6 addresses will incur such traffic even on
|
||||
* loopback interfaces. For now this is preferable over not supporting
|
||||
* multicast on loopback interfaces at all.
|
||||
*/
|
||||
netif->flags |= NETIF_FLAG_IGMP | NETIF_FLAG_MLD6;
|
||||
|
||||
NETIF_SET_CHECKSUM_CTRL(netif, loopif_cksum_flags);
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new loopback device.
|
||||
*/
|
||||
static int
|
||||
loopif_create(const char * name)
|
||||
{
|
||||
struct loopif *loopif;
|
||||
|
||||
/* Find a free loopback interface slot, if available. */
|
||||
if (TAILQ_EMPTY(&loopif_freelist))
|
||||
return ENOBUFS;
|
||||
|
||||
loopif = TAILQ_FIRST(&loopif_freelist);
|
||||
TAILQ_REMOVE(&loopif_freelist, loopif, loopif_next);
|
||||
|
||||
/* Initialize the loopif structure. */
|
||||
TAILQ_INSERT_HEAD(&loopif_activelist, loopif, loopif_next);
|
||||
|
||||
loopif->loopif_head = NULL;
|
||||
loopif->loopif_tailp = &loopif->loopif_head;
|
||||
|
||||
/*
|
||||
* For simplicity and efficiency, we do not prepend the address family
|
||||
* (IPv4/IPv6) to the packet for BPF, which means our loopback devices
|
||||
* are of type DLT_RAW rather than (NetBSD's) DLT_NULL.
|
||||
*/
|
||||
ifdev_add(&loopif->loopif_ifdev, name, IFF_LOOPBACK | IFF_MULTICAST,
|
||||
IFT_LOOP, 0 /*hdrlen*/, 0 /*addrlen*/, DLT_RAW, LOOPIF_MAX_MTU,
|
||||
0 /*nd6flags*/, &loopif_ops);
|
||||
|
||||
ifdev_update_link(&loopif->loopif_ifdev, LINK_STATE_UP);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy an existing loopback device.
|
||||
*/
|
||||
static int
|
||||
loopif_destroy(struct ifdev * ifdev)
|
||||
{
|
||||
struct loopif *loopif = (struct loopif *)ifdev;
|
||||
struct pbuf *pbuf, **pnext;
|
||||
int r;
|
||||
|
||||
/*
|
||||
* The ifdev module may refuse to remove this interface if it is the
|
||||
* loopback interface used to loop back packets for other interfaces.
|
||||
*/
|
||||
if ((r = ifdev_remove(&loopif->loopif_ifdev)) != OK)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* Clean up. The loopback queue can be non-empty only if we have been
|
||||
* throttling in case of a feedback loop.
|
||||
*/
|
||||
while ((pbuf = loopif->loopif_head) != NULL) {
|
||||
pnext = pchain_end(pbuf);
|
||||
|
||||
if ((loopif->loopif_head = *pnext) == NULL)
|
||||
loopif->loopif_tailp = &loopif->loopif_head;
|
||||
*pnext = NULL;
|
||||
|
||||
pbuf_free(pbuf);
|
||||
}
|
||||
|
||||
TAILQ_REMOVE(&loopif_activelist, loopif, loopif_next);
|
||||
|
||||
TAILQ_INSERT_HEAD(&loopif_freelist, loopif, loopif_next);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set NetBSD-style interface flags (IFF_) for a loopback interface.
|
||||
*/
|
||||
static int
|
||||
loopif_set_ifflags(struct ifdev * ifdev, unsigned int ifflags)
|
||||
{
|
||||
struct loopif *loopif = (struct loopif *)ifdev;
|
||||
|
||||
/*
|
||||
* Only the IFF_UP flag may be set and cleared. We adjust the
|
||||
* IFF_RUNNING flag immediately based on this flag. This is a bit
|
||||
* dangerous, but the caller takes this possibility into account.
|
||||
*/
|
||||
if ((ifflags & ~IFF_UP) != 0)
|
||||
return EINVAL;
|
||||
|
||||
if (ifflags & IFF_UP)
|
||||
ifdev_update_ifflags(&loopif->loopif_ifdev,
|
||||
ifdev_get_ifflags(&loopif->loopif_ifdev) | IFF_RUNNING);
|
||||
else
|
||||
ifdev_update_ifflags(&loopif->loopif_ifdev,
|
||||
ifdev_get_ifflags(&loopif->loopif_ifdev) & ~IFF_RUNNING);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the Maximum Transmission Unit for this interface. Return TRUE if the
|
||||
* new value is acceptable, in which case the caller will do the rest. Return
|
||||
* FALSE otherwise.
|
||||
*/
|
||||
static int
|
||||
loopif_set_mtu(struct ifdev * ifdev __unused, unsigned int mtu)
|
||||
{
|
||||
|
||||
return (mtu <= LOOPIF_MAX_MTU);
|
||||
}
|
||||
|
||||
static const struct ifdev_ops loopif_ops = {
|
||||
.iop_init = loopif_init_netif,
|
||||
.iop_input = ip_input,
|
||||
.iop_output = loopif_output,
|
||||
.iop_poll = loopif_poll,
|
||||
.iop_set_ifflags = loopif_set_ifflags,
|
||||
.iop_set_mtu = loopif_set_mtu,
|
||||
.iop_destroy = loopif_destroy,
|
||||
};
|
||||
|
||||
/*
|
||||
* Set and/or retrieve a per-protocol loopback checksumming option through
|
||||
* sysctl(7).
|
||||
*/
|
||||
ssize_t
|
||||
loopif_cksum(struct rmib_call * call, struct rmib_node * node __unused,
|
||||
struct rmib_oldp * oldp, struct rmib_newp * newp)
|
||||
{
|
||||
struct loopif *loopif;
|
||||
unsigned int flags;
|
||||
int r, val;
|
||||
|
||||
/*
|
||||
* The third name field is the protocol. We ignore the domain (the
|
||||
* second field), thus sharing settings between PF_INET and PF_INET6.
|
||||
* This is necessary because lwIP does not support TCP/UDP checksumming
|
||||
* flags on a per-domain basis.
|
||||
*/
|
||||
switch (call->call_oname[2]) {
|
||||
case IPPROTO_IP:
|
||||
flags = NETIF_CHECKSUM_GEN_IP | NETIF_CHECKSUM_CHECK_IP;
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
flags = NETIF_CHECKSUM_GEN_UDP | NETIF_CHECKSUM_CHECK_UDP;
|
||||
break;
|
||||
case IPPROTO_TCP:
|
||||
flags = NETIF_CHECKSUM_GEN_TCP | NETIF_CHECKSUM_CHECK_TCP;
|
||||
break;
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
/* Copy out the old (current) checksumming option. */
|
||||
if (oldp != NULL) {
|
||||
val = !!(loopif_cksum_flags & flags);
|
||||
|
||||
if ((r = rmib_copyout(oldp, 0, &val, sizeof(val))) < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (newp != NULL) {
|
||||
if ((r = rmib_copyin(newp, &val, sizeof(val))) != OK)
|
||||
return r;
|
||||
|
||||
if (val)
|
||||
loopif_cksum_flags |= flags;
|
||||
else
|
||||
loopif_cksum_flags &= ~flags;
|
||||
|
||||
/*
|
||||
* Apply the new checksum flags to all loopback interfaces.
|
||||
* Technically, this may result in dropped packets when
|
||||
* enabling checksumming on a throttled loopif, but that is a
|
||||
* case so rare and unimportant that we ignore it.
|
||||
*/
|
||||
TAILQ_FOREACH(loopif, &loopif_activelist, loopif_next) {
|
||||
NETIF_SET_CHECKSUM_CTRL(loopif_get_netif(loopif),
|
||||
loopif_cksum_flags);
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the length of the node. */
|
||||
return sizeof(val);
|
||||
}
|
||||
382
minix/net/lwip/lwip.c
Normal file
382
minix/net/lwip/lwip.c
Normal file
|
|
@ -0,0 +1,382 @@
|
|||
/* LWIP service - lwip.c - main program and dispatch code */
|
||||
|
||||
#include "lwip.h"
|
||||
#include "tcpisn.h"
|
||||
#include "mcast.h"
|
||||
#include "ethif.h"
|
||||
#include "rtsock.h"
|
||||
#include "route.h"
|
||||
#include "bpfdev.h"
|
||||
|
||||
#include "lwip/init.h"
|
||||
#include "lwip/sys.h"
|
||||
#include "lwip/timeouts.h"
|
||||
#include "arch/cc.h"
|
||||
|
||||
static int running, recheck_timer;
|
||||
static minix_timer_t lwip_timer;
|
||||
|
||||
static void expire_lwip_timer(int);
|
||||
|
||||
/*
|
||||
* Return the system uptime in milliseconds. Also remember that lwIP retrieved
|
||||
* the system uptime during this call, so that we know to check for timer
|
||||
* updates at the end of the current iteration of the message loop.
|
||||
*/
|
||||
uint32_t
|
||||
sys_now(void)
|
||||
{
|
||||
|
||||
recheck_timer = TRUE;
|
||||
|
||||
/* TODO: avoid 64-bit arithmetic if possible. */
|
||||
return (uint32_t)(((uint64_t)getticks() * 1000) / sys_hz());
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if and when lwIP has its next timeout, and set or cancel our timer
|
||||
* accordingly.
|
||||
*/
|
||||
static void
|
||||
set_lwip_timer(void)
|
||||
{
|
||||
uint32_t next_timeout;
|
||||
clock_t ticks;
|
||||
|
||||
/* Ask lwIP when the next alarm is supposed to go off, if any. */
|
||||
next_timeout = sys_timeouts_sleeptime();
|
||||
|
||||
/*
|
||||
* Set or update the lwIP timer. We rely on set_timer() asking the
|
||||
* kernel for an alarm only if the timeout is different from the one we
|
||||
* gave it last time (if at all). However, due to conversions between
|
||||
* absolute and relative times, and the fact that we cannot guarantee
|
||||
* that the uptime itself does not change while executing these
|
||||
* routines, set_timer() will sometimes be issuing a kernel call even
|
||||
* if the alarm has not changed. Not a huge deal, but fixing this will
|
||||
* require a different interface to lwIP and/or the timers library.
|
||||
*/
|
||||
if (next_timeout != (uint32_t)-1) {
|
||||
/*
|
||||
* Round up the next timeout (which is in milliseconds) to the
|
||||
* number of clock ticks to add to the current time. Avoid any
|
||||
* potential for overflows, no matter how unrealistic..
|
||||
*/
|
||||
if (next_timeout > TMRDIFF_MAX / sys_hz())
|
||||
ticks = TMRDIFF_MAX;
|
||||
else
|
||||
ticks = (next_timeout * sys_hz() + 999) / 1000;
|
||||
|
||||
set_timer(&lwip_timer, ticks, expire_lwip_timer, 0 /*unused*/);
|
||||
} else
|
||||
cancel_timer(&lwip_timer); /* not really needed.. */
|
||||
}
|
||||
|
||||
/*
|
||||
* The timer for lwIP timeouts has gone off. Check timeouts, and possibly set
|
||||
* a new timer.
|
||||
*/
|
||||
static void
|
||||
expire_lwip_timer(int arg __unused)
|
||||
{
|
||||
|
||||
/* Let lwIP do its work. */
|
||||
sys_check_timeouts();
|
||||
|
||||
/*
|
||||
* See if we have to update our timer for the next lwIP timer. Doing
|
||||
* this here, rather than from the main loop, avoids one kernel call.
|
||||
*/
|
||||
set_lwip_timer();
|
||||
|
||||
recheck_timer = FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether we should adjust our local timer based on a change in the next
|
||||
* lwIP timeout.
|
||||
*/
|
||||
static void
|
||||
check_lwip_timer(void)
|
||||
{
|
||||
|
||||
/*
|
||||
* We make the assumption that whenever lwIP starts a timer, it will
|
||||
* need to retrieve the current time. Thus, whenever sys_now() is
|
||||
* called, we set the 'recheck_timer' flag. Here, we check whether to
|
||||
* (re)set our lwIP timer only if the flag is set. As a result, we do
|
||||
* not have to mess with timers for literally every incoming message.
|
||||
*
|
||||
* When lwIP stops a timer, it does not call sys_now(), and thus, we
|
||||
* may miss such updates. However, timers being stopped should be rare
|
||||
* and getting too many alarm messages is not a big deal.
|
||||
*/
|
||||
if (!recheck_timer)
|
||||
return;
|
||||
|
||||
set_lwip_timer();
|
||||
|
||||
/* Reset the flag for the next message loop iteration. */
|
||||
recheck_timer = FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a random number, for use by lwIP.
|
||||
*/
|
||||
uint32_t
|
||||
lwip_hook_rand(void)
|
||||
{
|
||||
|
||||
/*
|
||||
* The current known uses of this hook are for selection of initial
|
||||
* TCP/UDP port numbers and for multicast-related timer randomness.
|
||||
* The former case exists only to avoid picking the same starting port
|
||||
* numbers after a reboot. After that, simple sequential iteration of
|
||||
* the port numbers is used. The latter case varies the response time
|
||||
* for sending multicast messages. Thus, none of the current uses of
|
||||
* this function require proper randomness, and so we use the simplest
|
||||
* approach, with time-based initialization to cover the reboot case.
|
||||
* The sequential port number selection could be improved upon, but
|
||||
* such an extension would probably bypass this hook anyway.
|
||||
*/
|
||||
return lrand48();
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new socket, with the given domain, type, and protocol, for the user
|
||||
* process identified by 'user_endpt'. On success, return the new socket's
|
||||
* identifier, with the libsockevent socket stored in 'sock' and an operations
|
||||
* table stored in 'ops'. On failure, return a negative error code.
|
||||
*/
|
||||
static sockid_t
|
||||
alloc_socket(int domain, int type, int protocol, endpoint_t user_endpt,
|
||||
struct sock ** sock, const struct sockevent_ops **ops)
|
||||
{
|
||||
|
||||
switch (domain) {
|
||||
case PF_INET:
|
||||
#ifdef INET6
|
||||
case PF_INET6:
|
||||
#endif /* INET6 */
|
||||
switch (type) {
|
||||
case SOCK_STREAM:
|
||||
return tcpsock_socket(domain, protocol, sock, ops);
|
||||
|
||||
case SOCK_DGRAM:
|
||||
return udpsock_socket(domain, protocol, sock, ops);
|
||||
|
||||
case SOCK_RAW:
|
||||
if (!util_is_root(user_endpt))
|
||||
return EACCES;
|
||||
|
||||
return rawsock_socket(domain, protocol, sock, ops);
|
||||
|
||||
default:
|
||||
return EPROTOTYPE;
|
||||
}
|
||||
|
||||
case PF_ROUTE:
|
||||
return rtsock_socket(type, protocol, sock, ops);
|
||||
|
||||
case PF_LINK:
|
||||
return lnksock_socket(type, protocol, sock, ops);
|
||||
|
||||
default:
|
||||
/* This means that the service has been misconfigured. */
|
||||
printf("socket() with unsupported domain %d\n", domain);
|
||||
|
||||
return EAFNOSUPPORT;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the service.
|
||||
*/
|
||||
static int
|
||||
init(int type __unused, sef_init_info_t * init __unused)
|
||||
{
|
||||
|
||||
/*
|
||||
* Initialize the random number seed. See the lwip_hook_rand() comment
|
||||
* on why this weak random number source is currently sufficient.
|
||||
*/
|
||||
srand48(clock_time(NULL));
|
||||
|
||||
/* Initialize the lwIP library. */
|
||||
lwip_init();
|
||||
|
||||
/* Initialize the socket events library. */
|
||||
sockevent_init(alloc_socket);
|
||||
|
||||
/* Initialize various helper modules. */
|
||||
mempool_init();
|
||||
tcpisn_init();
|
||||
mcast_init();
|
||||
|
||||
/* Initialize the high-level socket modules. */
|
||||
ipsock_init();
|
||||
tcpsock_init();
|
||||
udpsock_init();
|
||||
rawsock_init();
|
||||
|
||||
/* Initialize the various network interface modules. */
|
||||
ifdev_init();
|
||||
loopif_init();
|
||||
ethif_init();
|
||||
|
||||
/* Initialize the network device driver module. */
|
||||
ndev_init();
|
||||
|
||||
/* Initialize the low-level socket modules. */
|
||||
rtsock_init();
|
||||
lnksock_init();
|
||||
|
||||
/* Initialize the routing module. */
|
||||
route_init();
|
||||
|
||||
/* Initialize other device modules. */
|
||||
bpfdev_init();
|
||||
|
||||
/*
|
||||
* Initialize the MIB module, after all other modules have registered
|
||||
* their subtrees with this module.
|
||||
*/
|
||||
mibtree_init();
|
||||
|
||||
/*
|
||||
* After everything else has been initialized, set up the default
|
||||
* configuration - in particular, a loopback interface.
|
||||
*/
|
||||
ifconf_init();
|
||||
|
||||
/*
|
||||
* Initialize the master timer for all the lwIP timers. Just in case
|
||||
* lwIP starts a timer right away, perform a first check upon entry of
|
||||
* the message loop.
|
||||
*/
|
||||
init_timer(&lwip_timer);
|
||||
|
||||
recheck_timer = TRUE;
|
||||
|
||||
running = TRUE;
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform initialization using the System Event Framework (SEF).
|
||||
*/
|
||||
static void
|
||||
startup(void)
|
||||
{
|
||||
|
||||
sef_setcb_init_fresh(init);
|
||||
/*
|
||||
* This service requires stateless restarts, in that several parts of
|
||||
* the system (including VFS and drivers) expect that if restarted,
|
||||
* this service comes back up with a new endpoint. Therefore, do not
|
||||
* set a _restart callback here.
|
||||
*
|
||||
* TODO: support for live update.
|
||||
*
|
||||
* TODO: support for immediate shutdown if no sockets are in use, as
|
||||
* also done by UDS. For now, we never shut down immediately, giving
|
||||
* other processes the opportunity to close sockets on system shutdown.
|
||||
*/
|
||||
|
||||
sef_startup();
|
||||
}
|
||||
|
||||
/*
|
||||
* The lwIP-based TCP/IP sockets driver.
|
||||
*/
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
message m;
|
||||
int r, ipc_status;
|
||||
|
||||
startup();
|
||||
|
||||
while (running) {
|
||||
/*
|
||||
* For various reasons, the loopback interface does not pass
|
||||
* packets back into the stack right away. Instead, it queues
|
||||
* them up for later processing. We do that processing here.
|
||||
*/
|
||||
ifdev_poll();
|
||||
|
||||
/*
|
||||
* Unfortunately, lwIP does not tell us when it starts or stops
|
||||
* timers. This means that we have to check ourselves every
|
||||
* time we have called into lwIP. For simplicity, we perform
|
||||
* the check here.
|
||||
*/
|
||||
check_lwip_timer();
|
||||
|
||||
if ((r = sef_receive_status(ANY, &m, &ipc_status)) != OK) {
|
||||
if (r == EINTR)
|
||||
continue; /* sef_cancel() was called */
|
||||
|
||||
panic("sef_receive_status failed: %d", r);
|
||||
}
|
||||
|
||||
/* Process the received message. */
|
||||
if (is_ipc_notify(ipc_status)) {
|
||||
switch (m.m_source) {
|
||||
case CLOCK:
|
||||
expire_timers(m.m_notify.timestamp);
|
||||
|
||||
break;
|
||||
|
||||
case DS_PROC_NR:
|
||||
/* Network drivers went up and/or down. */
|
||||
ndev_check();
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("unexpected notify from %d\n",
|
||||
m.m_source);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (m.m_source) {
|
||||
case MIB_PROC_NR:
|
||||
rmib_process(&m, ipc_status);
|
||||
|
||||
break;
|
||||
|
||||
case VFS_PROC_NR:
|
||||
/* Is this a socket device request? */
|
||||
if (IS_SDEV_RQ(m.m_type)) {
|
||||
sockevent_process(&m, ipc_status);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* Is this a character (or block) device request? */
|
||||
if (IS_CDEV_RQ(m.m_type) || IS_BDEV_RQ(m.m_type)) {
|
||||
bpfdev_process(&m, ipc_status);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* FALLTHROUGH */
|
||||
default:
|
||||
/* Is this a network device driver response? */
|
||||
if (IS_NDEV_RS(m.m_type)) {
|
||||
ndev_process(&m, ipc_status);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
printf("unexpected message %d from %d\n",
|
||||
m.m_type, m.m_source);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
10
minix/net/lwip/lwip.conf
Normal file
10
minix/net/lwip/lwip.conf
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
service lwip
|
||||
{
|
||||
domain
|
||||
INET INET6 ROUTE LINK
|
||||
;
|
||||
system KILL; # for SIGPIPE
|
||||
ipc
|
||||
SYSTEM vfs rs vm mib
|
||||
;
|
||||
};
|
||||
130
minix/net/lwip/lwip.h
Normal file
130
minix/net/lwip/lwip.h
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
#ifndef MINIX_NET_LWIP_LWIP_H
|
||||
#define MINIX_NET_LWIP_LWIP_H
|
||||
|
||||
#include <minix/drivers.h>
|
||||
#include <minix/sockevent.h>
|
||||
#include <minix/rmib.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <net/bpf.h>
|
||||
|
||||
#include "lwip/ip.h"
|
||||
#include "lwiphooks.h"
|
||||
|
||||
#include "addr.h"
|
||||
#include "ipsock.h"
|
||||
#include "ifdev.h"
|
||||
#include "util.h"
|
||||
|
||||
/*
|
||||
* The standard sockaddr_dl is an absolute pain, because the actual structure
|
||||
* is dynamically sized, while the standard definition is neither the minimum
|
||||
* nor the maximum size. We use our own version, which uses the maximum size
|
||||
* that we will ever produce and accept. This greatly simplifies dealing with
|
||||
* this structure while also limiting stack usage a bit.
|
||||
*/
|
||||
struct sockaddr_dlx {
|
||||
uint8_t sdlx_len; /* actual length of this structure */
|
||||
sa_family_t sdlx_family; /* address family, always AF_LINK */
|
||||
uint16_t sdlx_index; /* interface index */
|
||||
uint8_t sdlx_type; /* interface type (IFT_) */
|
||||
uint8_t sdlx_nlen; /* interface name length, w/o nul */
|
||||
uint8_t sdlx_alen; /* link-layer address length */
|
||||
uint8_t sdlx_slen; /* selector length, always 0 */
|
||||
uint8_t sdlx_data[IFNAMSIZ + NETIF_MAX_HWADDR_LEN];
|
||||
};
|
||||
|
||||
STATIC_SOCKADDR_MAX_ASSERT(sockaddr_in);
|
||||
STATIC_SOCKADDR_MAX_ASSERT(sockaddr_in6);
|
||||
STATIC_SOCKADDR_MAX_ASSERT(sockaddr_dlx);
|
||||
|
||||
/* This is our own, much smaller internal version of sockaddr_storage. */
|
||||
union sockaddr_any {
|
||||
struct sockaddr sa;
|
||||
struct sockaddr_in sin;
|
||||
struct sockaddr_in6 sin6;
|
||||
struct sockaddr_dlx sdlx;
|
||||
};
|
||||
|
||||
/* Number of bits in each of the types of IP addresses. */
|
||||
#define IP4_BITS 32 /* number of bits in an IPv4 address */
|
||||
#define IP6_BITS 128 /* number of bits in an IPv6 address */
|
||||
|
||||
/*
|
||||
* Each socket module maintains its own set of sockets, but all sockets must be
|
||||
* given globally unique identifiers. Therefore, we use these modifier masks,
|
||||
* which are bitwise OR'ed with the per-module socket identifiers.
|
||||
*/
|
||||
#define SOCKID_TCP 0x00000000
|
||||
#define SOCKID_UDP 0x00100000
|
||||
#define SOCKID_RAW 0x00200000
|
||||
#define SOCKID_RT 0x00400000
|
||||
#define SOCKID_LNK 0x00800000
|
||||
|
||||
/*
|
||||
* Static remote MIB node identifiers for nodes that are dynamically numbered
|
||||
* on NetBSD, because they do not have a corresponding protocol family number.
|
||||
*/
|
||||
#define NET_INTERFACES (PF_MAX) /* net.interfaces (TODO) */
|
||||
#define NET_BPF (PF_MAX + 1) /* net.bpf */
|
||||
|
||||
#define ROOT_EUID 0 /* effective user ID of superuser */
|
||||
|
||||
/*
|
||||
* Function declarations. Modules with more extended interfaces have their own
|
||||
* header files.
|
||||
*/
|
||||
|
||||
/* mempool.c */
|
||||
void mempool_init(void);
|
||||
unsigned int mempool_cur_buffers(void);
|
||||
unsigned int mempool_max_buffers(void);
|
||||
|
||||
/* pchain.c */
|
||||
struct pbuf **pchain_end(struct pbuf * pbuf);
|
||||
size_t pchain_size(struct pbuf * pbuf);
|
||||
|
||||
/* addrpol.c */
|
||||
int addrpol_get_label(const ip_addr_t * ipaddr);
|
||||
int addrpol_get_scope(const ip_addr_t * ipaddr, int is_src);
|
||||
|
||||
/* tcpsock.c */
|
||||
void tcpsock_init(void);
|
||||
sockid_t tcpsock_socket(int domain, int protocol, struct sock ** sock,
|
||||
const struct sockevent_ops ** ops);
|
||||
|
||||
/* udpsock.c */
|
||||
void udpsock_init(void);
|
||||
sockid_t udpsock_socket(int domain, int protocol, struct sock ** sock,
|
||||
const struct sockevent_ops ** ops);
|
||||
|
||||
/* rawsock.c */
|
||||
void rawsock_init(void);
|
||||
sockid_t rawsock_socket(int domain, int protocol, struct sock ** sock,
|
||||
const struct sockevent_ops ** ops);
|
||||
|
||||
/* loopif.c */
|
||||
void loopif_init(void);
|
||||
ssize_t loopif_cksum(struct rmib_call * call, struct rmib_node * node,
|
||||
struct rmib_oldp * oldp, struct rmib_newp * newp);
|
||||
|
||||
/* lnksock.c */
|
||||
void lnksock_init(void);
|
||||
sockid_t lnksock_socket(int type, int protocol, struct sock ** sock,
|
||||
const struct sockevent_ops ** ops);
|
||||
|
||||
/* mibtree.c */
|
||||
void mibtree_init(void);
|
||||
void mibtree_register_inet(int domain, int protocol, struct rmib_node * node);
|
||||
void mibtree_register_lwip(struct rmib_node * node);
|
||||
|
||||
/* ifconf.c */
|
||||
void ifconf_init(void);
|
||||
int ifconf_ioctl(struct sock * sock, unsigned long request,
|
||||
const struct sockdriver_data * data, endpoint_t user_endpt);
|
||||
|
||||
/* bpf_filter.c */
|
||||
u_int bpf_filter_ext(const struct bpf_insn * pc, const struct pbuf * pbuf,
|
||||
const u_char * packet, u_int total, u_int len);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_LWIP_H */
|
||||
283
minix/net/lwip/mcast.c
Normal file
283
minix/net/lwip/mcast.c
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
/* LWIP service - mcast.c - per-socket multicast membership tracking */
|
||||
/*
|
||||
* Each socket has a linked list of multicast groups of which it is a member.
|
||||
* The linked list consists of 'mcast_member' elements. There is both a global
|
||||
* limit (the number of elements in 'mcast_array') and a per-socket limit on
|
||||
* group membership. Since multiple sockets may join the same multicast
|
||||
* groups, there is not a one-to-one relationship between our membership
|
||||
* structures and the lwIP IGMP/MLD membership structures. Moreover, linking
|
||||
* to the latter structures directly is not intended by lwIP, so we have to
|
||||
* keep our own tracking independent, which in particular means that we have to
|
||||
* make a copy of the multicast group address.
|
||||
*
|
||||
* We currently put no effort into saving memory on storing that group address.
|
||||
* Optimization is complicated by the fact that we have to be able to remove
|
||||
* membership structures when their corresponding interface disappears, which
|
||||
* currently involves removal without knowing about the corresponding socket,
|
||||
* and therefore the socket's address family. All of this can be changed.
|
||||
*
|
||||
* There is no function to test whether a particular socket is a member of a
|
||||
* multicast group. The pktsock module currently makes the assumption that if
|
||||
* a socket has been joined to any multicast groups, or set any multicast
|
||||
* options, the application is multicast aware and therefore able to figure out
|
||||
* whether it is interested in particular packets, and so we do not filter
|
||||
* incoming packets against the receiving socket's multicast list. This should
|
||||
* be more or less in line with what W. Richard Stevens say that the BSDs do.
|
||||
*/
|
||||
|
||||
#include "lwip.h"
|
||||
#include "mcast.h"
|
||||
|
||||
#include "lwip/igmp.h"
|
||||
#include "lwip/mld6.h"
|
||||
|
||||
/*
|
||||
* The per-socket limit on group membership. In theory, the limit should be
|
||||
* high enough that a single socket can join a particular multicast group on
|
||||
* all interfaces that support multicast. In practice, we set it a bit lower
|
||||
* to prevent one socket from using up half of the entries per address family.
|
||||
* Setting it to IP_MAX_MEMBERSHIPS is definitely excessive right now..
|
||||
*/
|
||||
#define MAX_GROUPS_PER_SOCKET 8
|
||||
|
||||
static struct mcast_member {
|
||||
LIST_ENTRY(mcast_member) mm_next; /* next in socket, free list */
|
||||
struct ifdev * mm_ifdev; /* interface (NULL: free) */
|
||||
ip_addr_t mm_group; /* group address */
|
||||
} mcast_array[NR_IPV4_MCAST_GROUP + NR_IPV6_MCAST_GROUP];
|
||||
|
||||
static LIST_HEAD(, mcast_member) mcast_freelist;
|
||||
|
||||
/*
|
||||
* Initialize the per-socket multicast membership module.
|
||||
*/
|
||||
void
|
||||
mcast_init(void)
|
||||
{
|
||||
unsigned int slot;
|
||||
|
||||
/* Initialize the list of free multicast membership entries. */
|
||||
LIST_INIT(&mcast_freelist);
|
||||
|
||||
for (slot = 0; slot < __arraycount(mcast_array); slot++) {
|
||||
mcast_array[slot].mm_ifdev = NULL;
|
||||
|
||||
LIST_INSERT_HEAD(&mcast_freelist, &mcast_array[slot], mm_next);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the multicast head for a socket. The socket must not have any
|
||||
* previous multicast group memberships.
|
||||
*/
|
||||
void
|
||||
mcast_reset(struct mcast_head * mcast_head)
|
||||
{
|
||||
|
||||
LIST_INIT(&mcast_head->mh_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to add a per-socket multicast membership association. The given
|
||||
* 'mcast_head' pointer is part of a socket. The 'group' parameter is the
|
||||
* multicast group to join. It is a properly zoned address, but has not been
|
||||
* checked in any other way. If 'ifdev' is not NULL, it is the interface for
|
||||
* the membership; if it is NULL, an interface will be selected using routing.
|
||||
* Return OK if the membership has been successfully removed, or a negative
|
||||
* error code otherwise.
|
||||
*/
|
||||
int
|
||||
mcast_join(struct mcast_head * mcast_head, const ip_addr_t * group,
|
||||
struct ifdev * ifdev)
|
||||
{
|
||||
struct mcast_member *mm;
|
||||
struct netif *netif;
|
||||
unsigned int count;
|
||||
err_t err;
|
||||
|
||||
/*
|
||||
* The callers of this function perform only checks that depend on the
|
||||
* address family. We check everything else here.
|
||||
*/
|
||||
if (!ip_addr_ismulticast(group))
|
||||
return EADDRNOTAVAIL;
|
||||
|
||||
if (!addr_is_valid_multicast(group))
|
||||
return EINVAL;
|
||||
|
||||
/*
|
||||
* If no interface was specified, pick one with a routing query. Note
|
||||
* that scoped IPv6 addresses do require an interface to be specified.
|
||||
*/
|
||||
if (ifdev == NULL) {
|
||||
netif = ip_route(IP46_ADDR_ANY(IP_GET_TYPE(group)), group);
|
||||
|
||||
if (netif == NULL)
|
||||
return EHOSTUNREACH;
|
||||
|
||||
ifdev = netif_get_ifdev(netif);
|
||||
}
|
||||
|
||||
assert(ifdev != NULL);
|
||||
assert(!IP_IS_V6(group) ||
|
||||
!ip6_addr_lacks_zone(ip_2_ip6(group), IP6_MULTICAST));
|
||||
|
||||
/* The interface must support multicast. */
|
||||
if (!(ifdev_get_ifflags(ifdev) & IFF_MULTICAST))
|
||||
return EADDRNOTAVAIL;
|
||||
|
||||
/*
|
||||
* First see if this socket is already joined to the given group, which
|
||||
* is an error. While looking, also count the number of groups the
|
||||
* socket has joined already, to enforce the per-socket limit.
|
||||
*/
|
||||
count = 0;
|
||||
|
||||
LIST_FOREACH(mm, &mcast_head->mh_list, mm_next) {
|
||||
if (mm->mm_ifdev == ifdev && ip_addr_cmp(&mm->mm_group, group))
|
||||
return EEXIST;
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count >= MAX_GROUPS_PER_SOCKET)
|
||||
return ENOBUFS;
|
||||
|
||||
/* Do we have a free membership structure available? */
|
||||
if (LIST_EMPTY(&mcast_freelist))
|
||||
return ENOBUFS;
|
||||
|
||||
/*
|
||||
* Nothing can go wrong as far as we are concerned. Ask lwIP to join
|
||||
* the multicast group. This may result in a multicast list update at
|
||||
* the driver end.
|
||||
*/
|
||||
netif = ifdev_get_netif(ifdev);
|
||||
|
||||
if (IP_IS_V6(group))
|
||||
err = mld6_joingroup_netif(netif, ip_2_ip6(group));
|
||||
else
|
||||
err = igmp_joingroup_netif(netif, ip_2_ip4(group));
|
||||
|
||||
if (err != ERR_OK)
|
||||
return util_convert_err(err);
|
||||
|
||||
/*
|
||||
* Success. Allocate, initialize, and attach a membership structure to
|
||||
* the socket.
|
||||
*/
|
||||
mm = LIST_FIRST(&mcast_freelist);
|
||||
|
||||
LIST_REMOVE(mm, mm_next);
|
||||
|
||||
mm->mm_ifdev = ifdev;
|
||||
mm->mm_group = *group;
|
||||
|
||||
LIST_INSERT_HEAD(&mcast_head->mh_list, mm, mm_next);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the given per-socket multicast membership structure, which must
|
||||
* previously have been associated with a socket. If 'leave_group' is set,
|
||||
* also tell lwIP to leave the corresponding multicast group.
|
||||
*/
|
||||
static void
|
||||
mcast_free(struct mcast_member * mm, int leave_group)
|
||||
{
|
||||
struct netif *netif;
|
||||
err_t err;
|
||||
|
||||
assert(mm->mm_ifdev != NULL);
|
||||
|
||||
if (leave_group) {
|
||||
netif = ifdev_get_netif(mm->mm_ifdev);
|
||||
|
||||
if (IP_IS_V6(&mm->mm_group))
|
||||
err = mld6_leavegroup_netif(netif,
|
||||
ip_2_ip6(&mm->mm_group));
|
||||
else
|
||||
err = igmp_leavegroup_netif(netif,
|
||||
ip_2_ip4(&mm->mm_group));
|
||||
|
||||
if (err != ERR_OK)
|
||||
panic("lwIP multicast membership desynchronization");
|
||||
}
|
||||
|
||||
LIST_REMOVE(mm, mm_next);
|
||||
|
||||
mm->mm_ifdev = NULL;
|
||||
|
||||
LIST_INSERT_HEAD(&mcast_freelist, mm, mm_next);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to remove a per-socket multicast membership association. The given
|
||||
* 'mcast_head' pointer is part of a socket. The 'group' parameter is the
|
||||
* multicast group to leave. It is a properly zoned address, but has not been
|
||||
* checked in any other way. If 'ifdev' is not NULL, it is the interface of
|
||||
* the membership; if it is NULL, a membership matching the address on any
|
||||
* interface will suffice. As such, the parameter requirements mirror those of
|
||||
* mcast_join(). Return OK if the membership has been successfully removed, or
|
||||
* a negative error code otherwise.
|
||||
*/
|
||||
int
|
||||
mcast_leave(struct mcast_head * mcast_head, const ip_addr_t * group,
|
||||
struct ifdev * ifdev)
|
||||
{
|
||||
struct mcast_member *mm;
|
||||
|
||||
/*
|
||||
* Look up a matching entry. The fact that we must find a match for
|
||||
* the given address and interface, keeps us from having to perform
|
||||
* various other checks, such as whether the given address is a
|
||||
* multicast address at all. The exact error codes are not specified.
|
||||
*/
|
||||
LIST_FOREACH(mm, &mcast_head->mh_list, mm_next) {
|
||||
if ((ifdev == NULL || mm->mm_ifdev == ifdev) &&
|
||||
ip_addr_cmp(&mm->mm_group, group))
|
||||
break;
|
||||
}
|
||||
|
||||
if (mm == NULL)
|
||||
return ESRCH;
|
||||
|
||||
mcast_free(mm, TRUE /*leave_group*/);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove all per-socket multicast membership associations of the given socket.
|
||||
* This function is called when the socket is closed.
|
||||
*/
|
||||
void
|
||||
mcast_leave_all(struct mcast_head * mcast_head)
|
||||
{
|
||||
struct mcast_member *mm;
|
||||
|
||||
while (!LIST_EMPTY(&mcast_head->mh_list)) {
|
||||
mm = LIST_FIRST(&mcast_head->mh_list);
|
||||
|
||||
mcast_free(mm, TRUE /*leave_group*/);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The given interface is about to disappear. Remove and free any per-socket
|
||||
* multicast membership structures associated with the interface, without
|
||||
* leaving the multicast group itself (as that will happen a bit later anyway).
|
||||
*/
|
||||
void
|
||||
mcast_clear(struct ifdev * ifdev)
|
||||
{
|
||||
unsigned int slot;
|
||||
|
||||
for (slot = 0; slot < __arraycount(mcast_array); slot++) {
|
||||
if (mcast_array[slot].mm_ifdev != ifdev)
|
||||
continue;
|
||||
|
||||
mcast_free(&mcast_array[slot], FALSE /*leave_group*/);
|
||||
}
|
||||
}
|
||||
21
minix/net/lwip/mcast.h
Normal file
21
minix/net/lwip/mcast.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef MINIX_NET_LWIP_MCAST_H
|
||||
#define MINIX_NET_LWIP_MCAST_H
|
||||
|
||||
struct mcast_member;
|
||||
|
||||
struct mcast_head {
|
||||
LIST_HEAD(, mcast_member) mh_list;
|
||||
};
|
||||
|
||||
#define mcast_isempty(mcast_head) (LIST_EMPTY(&(mcast_head)->mh_list))
|
||||
|
||||
void mcast_init(void);
|
||||
void mcast_reset(struct mcast_head * mcast_head);
|
||||
int mcast_join(struct mcast_head * mcast_head, const ip_addr_t * group,
|
||||
struct ifdev * ifdev);
|
||||
int mcast_leave(struct mcast_head * mcast_head, const ip_addr_t * group,
|
||||
struct ifdev * ifdev);
|
||||
void mcast_leave_all(struct mcast_head * mcast_head);
|
||||
void mcast_clear(struct ifdev * ifdev);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_MCAST_H */
|
||||
821
minix/net/lwip/mempool.c
Normal file
821
minix/net/lwip/mempool.c
Normal file
|
|
@ -0,0 +1,821 @@
|
|||
/* LWIP service - mempool.c - memory pool management and slab allocation */
|
||||
/*
|
||||
* This module should be considered a replacement for lwIP's PBUF_POOL and
|
||||
* custom-pools functionality. lwIP's PBUF_POOL system allows a PBUF_POOL type
|
||||
* allocation for a moderately large amount of memory, for example for a full-
|
||||
* sized packet, to be turned into a chain of "pbuf" buffers, each of a static
|
||||
* size. Most of lwIP can deal with such pbuf chains, because many other types
|
||||
* of allocations also end up consisting of pbuf chains. However, lwIP will
|
||||
* never use PBUF_POOL for its own memory allocations, and use PBUF_RAM
|
||||
* allocations instead. Such PBUF_RAM allocations always return one single
|
||||
* pbuf with a contiguous memory area. lwIP's custom pools support allows such
|
||||
* PBUF_RAM allocations to draw from user-defined pools of statically allocated
|
||||
* memory, as an alternative to turning such allocations into malloc() calls.
|
||||
*
|
||||
* However, lwIP itself does not offer a way to combine these two pool systems:
|
||||
* the PBUF_POOL buffer pool and the custom pools are completely separate. We
|
||||
* want to be able to draw both kinds of memory from the same pool. This is
|
||||
* the first reason that we are using our own memory pools. The second is
|
||||
* something that lwIP could never offer anyway: we would like to provide a
|
||||
* certain amount of static/preallocated memory for those types of allocations,
|
||||
* but optionally also add a much larger amount of dynamic memory when needed.
|
||||
*
|
||||
* In order to make this module work, we do not use PBUF_POOL anywhere.
|
||||
* Instead, we use chained static-sized PBUF_RAM allocations for all types of
|
||||
* allocations that we manage ourselves--see pchain_alloc(). We tell lwIP to
|
||||
* use the functions in this module to do the malloc-type allocations for those
|
||||
* PBUF_RAM buffers. As such, this module manages all PBUF_RAM allocations,
|
||||
* both from our own code and from lwIP. Note that we do still use lwIP's own
|
||||
* pools for various lwIP structures. We do want to keep the isolation
|
||||
* provided by the use of such pools, even though that means that we have to
|
||||
* provision some of those pools for the worst case, resulting in some memory
|
||||
* overhead that is unnecessary for the common case.
|
||||
*
|
||||
* With PBUF_RAM allocation redirection system in place, this module has to
|
||||
* manage the memory for those allocations. It does this based on the
|
||||
* assertion that there are three main classes of PBUF_RAM allocation sizes:
|
||||
*
|
||||
* - "large" allocations: these are allocations for up to MEMPOOL_BUFSIZE bytes
|
||||
* of PBUF_RAM data, where MEMPOOL_BUFSIZE is the allocation granularity that
|
||||
* we have picked for the individual buffers in larger chains. It is set to
|
||||
* 512 bytes right now, mainly to keep pbuf chains for full-sized ethernet
|
||||
* packets short, which has many performance advantages. Since the pbuf
|
||||
* header itself also takes some space (16 bytes, right now), this results in
|
||||
* allocations seen by mempool_malloc() of up to just over 512 bytes.
|
||||
* - "small" allocations: these are allocations mostly for packet headers, as
|
||||
* needed by lwIP to prepend to (mainly TCP) packet data that we give to it.
|
||||
* The size of these allocations varies, but most are 76 bytes (80 bytes if
|
||||
* we ever add VLAN support), plus once again the pbuf header.
|
||||
* - "excessive" allocations: these are allocations larger than the maximum
|
||||
* we have configured, effectively requesting contiguous memory of (possibly
|
||||
* far) more than 512 bytes. We do not make such allocations ourselves, as
|
||||
* we only ever create pbuf chains. Thus, any such allocations come from
|
||||
* lwIP. There are a few locations in lwIP that attempt to make those kinds
|
||||
* of allocations, but we replace one important case in the lwIP code with
|
||||
* a chained allocation, (currently) leaving only one case: allocation of
|
||||
* ICMP ping reply packets. In this module, we outright *deny* any excessive
|
||||
* allocations. Practically, that means that no replies are generated for
|
||||
* requests exceeding around 460 bytes, which is in fact not bad, especially
|
||||
* since we have multicast ICMP ping replying enabled. If any new cases of
|
||||
* excessive allocations are added to lwIP in the future, we will have to
|
||||
* deal with those on a case-by-case basis, but for now this should be all.
|
||||
*
|
||||
* This module caters to the first two types of allocations. For large buffer
|
||||
* allocations, it provides a standard slab allocator, with a hardcoded slab
|
||||
* size of MEMPOOL_LARGE_COUNT buffers with a 512-byte data area each. One
|
||||
* slab is allocated at service start-up; additional slabs up to a configured
|
||||
* maximum are allocated on demand. Once fallen out of use, all but one slabs
|
||||
* will be freed after a while, using a timer. The current per-slab count of
|
||||
* 512 large buffers, combined with the buffer size of 512 plus the pbuf header
|
||||
* plus a bit of extra overhead, results in about 266 KB per slab.
|
||||
*
|
||||
* For small buffer allocations, there are two facilities. First, there is a
|
||||
* static pool of small buffers. This pool currently provides 256 small-sized
|
||||
* buffers, mainly in order to allow packet headers to be produced even in low-
|
||||
* memory conditions. In addition, small buffers may be formed by allocating
|
||||
* and then splitting up one large buffer. The module is currently configured
|
||||
* to split one large buffer into four small buffers, which yields a small
|
||||
* buffer size of just over 100 bytes--enough for the packet headers while
|
||||
* leaving little slack on either side.
|
||||
*
|
||||
* It is important to note that large and small buffer allocations are freed up
|
||||
* through the same function, with no information on the original allocation
|
||||
* size. As a result, we have to distinguish between large and small buffers
|
||||
* using a unified system. In particular, this module prepends each of its
|
||||
* allocations by a single pointer, which points to a header structure that is
|
||||
* at the very beginning of the slab that contains the allocated buffer. That
|
||||
* header structure contains information about the type of slab (large or
|
||||
* small) as well as some accounting information used by both types.
|
||||
*
|
||||
* For large-buffer slabs, this header is part of a larger structure with for
|
||||
* example the slab's list of free buffers. This larger structure is then
|
||||
* followed by the actual buffers in the slab.
|
||||
*
|
||||
* For small-buffer slabs, the header is followed directly by the actual small
|
||||
* buffers. Thus, when a large buffer is split up into four small buffers, the
|
||||
* data area of that large buffer consists of a small-type slab header and four
|
||||
* small buffers. The large buffer itself is simply considered in use, as
|
||||
* though it was allocated for regular data. This nesting approach saves a lot
|
||||
* of memory for small allocations, at the cost of a bit more computation.
|
||||
*
|
||||
* It should be noted that all allocations should be (and are) pointer-aligned.
|
||||
* Normally lwIP would check for this, but we cannot tell lwIP the platform
|
||||
* pointer size without hardcoding that size. This module performs proper
|
||||
* alignment of all buffers itself though, regardless of the pointer size.
|
||||
*/
|
||||
|
||||
#include "lwip.h"
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
/* Alignment to pointer sizes. */
|
||||
#define MEMPOOL_ALIGN_DOWN(s) ((s) & ~(sizeof(void *) - 1))
|
||||
#define MEMPOOL_ALIGN_UP(s) MEMPOOL_ALIGN_DOWN((s) + sizeof(void *) - 1)
|
||||
|
||||
/* Large buffers: per-slab count and data area size. */
|
||||
#define MEMPOOL_LARGE_COUNT 512
|
||||
#define MEMPOOL_LARGE_SIZE \
|
||||
(MEMPOOL_ALIGN_UP(sizeof(struct pbuf)) + MEMPOOL_BUFSIZE)
|
||||
|
||||
/* Small buffers: per-slab count and data area size. */
|
||||
#define MEMPOOL_SMALL_COUNT 4
|
||||
#define MEMPOOL_SMALL_SIZE \
|
||||
(MEMPOOL_ALIGN_DOWN(MEMPOOL_LARGE_SIZE / MEMPOOL_SMALL_COUNT) - \
|
||||
sizeof(struct mempool_header))
|
||||
|
||||
/* Memory pool slab header, part of both small and large slabs. */
|
||||
struct mempool_header {
|
||||
union {
|
||||
struct {
|
||||
uint8_t mhui_flags;
|
||||
uint32_t mhui_inuse;
|
||||
} mhu_info;
|
||||
void *mhu_align; /* force pointer alignment */
|
||||
} mh_u;
|
||||
};
|
||||
#define mh_flags mh_u.mhu_info.mhui_flags
|
||||
#define mh_inuse mh_u.mhu_info.mhui_inuse
|
||||
|
||||
/* Header flags. */
|
||||
#define MHF_SMALL 0x01 /* slab is for small buffers, not large ones */
|
||||
#define MHF_STATIC 0x02 /* small slab is statically allocated */
|
||||
#define MHF_MARKED 0x04 /* large empty slab is up for deallocation */
|
||||
|
||||
/*
|
||||
* Large buffer. When allocated, mlb_header points to the (header of) the
|
||||
* containing large slab, and mlb_data is returned for arbitrary use by the
|
||||
* user of the buffer. When free, mlb_header is NULL and instead mlb_header2
|
||||
* points to the containing slab (allowing for double-free detection), and the
|
||||
* buffer is on the slab's free list by using mlb_next.
|
||||
*/
|
||||
struct mempool_large_buf {
|
||||
struct mempool_header *mlb_header;
|
||||
union {
|
||||
struct {
|
||||
struct mempool_header *mlbuf_header2;
|
||||
LIST_ENTRY(mempool_large_buf) mlbuf_next;
|
||||
} mlbu_free;
|
||||
char mlbu_data[MEMPOOL_LARGE_SIZE];
|
||||
} mlb_u;
|
||||
};
|
||||
#define mlb_header2 mlb_u.mlbu_free.mlbuf_header2
|
||||
#define mlb_next mlb_u.mlbu_free.mlbuf_next
|
||||
#define mlb_data mlb_u.mlbu_data
|
||||
|
||||
/* Small buffer. Same idea, different size. */
|
||||
struct mempool_small_buf {
|
||||
struct mempool_header *msb_header;
|
||||
union {
|
||||
struct {
|
||||
struct mempool_header *msbuf_header2;
|
||||
TAILQ_ENTRY(mempool_small_buf) msbuf_next;
|
||||
} msbu_free;
|
||||
char msbu_data[MEMPOOL_SMALL_SIZE];
|
||||
} msb_u;
|
||||
};
|
||||
#define msb_header2 msb_u.msbu_free.msbuf_header2
|
||||
#define msb_next msb_u.msbu_free.msbuf_next
|
||||
#define msb_data msb_u.msbu_data
|
||||
|
||||
/*
|
||||
* A large slab, including header, other per-slab fields, and large buffers.
|
||||
* Each of these structures is on exactly one of three slab lists, depending
|
||||
* on whether all its buffers are free (empty), some but not all of its buffers
|
||||
* are in use (partial), or all of its buffers are in use (full). The mls_next
|
||||
* field is used for that list. The mls_free field is the per-slab list of
|
||||
* free buffers.
|
||||
*/
|
||||
struct mempool_large_slab {
|
||||
struct mempool_header mls_header; /* MUST be first */
|
||||
LIST_ENTRY(mempool_large_slab) mls_next;
|
||||
LIST_HEAD(, mempool_large_buf) mls_free;
|
||||
struct mempool_large_buf mls_buf[MEMPOOL_LARGE_COUNT];
|
||||
};
|
||||
|
||||
/* The three slab lists for large slabs, as described above. */
|
||||
static LIST_HEAD(, mempool_large_slab) mempool_empty_slabs;
|
||||
static LIST_HEAD(, mempool_large_slab) mempool_partial_slabs;
|
||||
static LIST_HEAD(, mempool_large_slab) mempool_full_slabs;
|
||||
|
||||
/*
|
||||
* A small slab, including header and small buffers. We use unified free lists
|
||||
* for small buffers, and these small slabs are not part of any lists
|
||||
* themselves, so we need neither of the two fields from large slabs for that.
|
||||
*/
|
||||
struct mempool_small_slab {
|
||||
struct mempool_header mss_header; /* MUST be first */
|
||||
struct mempool_small_buf mss_buf[MEMPOOL_SMALL_COUNT];
|
||||
};
|
||||
|
||||
/*
|
||||
* The free lists for static small buffers (from the static pool, see below)
|
||||
* and dynamic small buffers (as obtained by splitting large buffers).
|
||||
*/
|
||||
static TAILQ_HEAD(, mempool_small_buf) mempool_small_static_freelist;
|
||||
static TAILQ_HEAD(, mempool_small_buf) mempool_small_dynamic_freelist;
|
||||
|
||||
/*
|
||||
* A static pool of small buffers. Small buffers are somewhat more important
|
||||
* than large buffers, because they are used for packet headers. The purpose
|
||||
* of this static pool is to be able to make progress even if all large buffers
|
||||
* are allocated for data, typically in the case that the system is low on
|
||||
* memory. Note that the number of static small buffers is the given number of
|
||||
* small slabs multiplied by MEMPOOL_SMALL_COUNT, hence the division.
|
||||
*/
|
||||
#define MEMPOOL_SMALL_SLABS (256 / MEMPOOL_SMALL_COUNT)
|
||||
|
||||
static struct mempool_small_slab mempool_small_pool[MEMPOOL_SMALL_SLABS];
|
||||
|
||||
/*
|
||||
* The following setting (mempool_max_slabs) can be changed through sysctl(7).
|
||||
* As such it may be set by userland to a completely arbitrary value and must
|
||||
* be sanity-checked before any actual use. The default is picked such that
|
||||
* all TCP sockets can fill up their send and receive queues: (TCP_SNDBUF_DEF +
|
||||
* TCP_RCVBUF_DEF) * NR_TCPSOCK / (MEMPOOL_BUFSIZE * MEMPOOL_LARGE_COUNT) =
|
||||
* (32768 + 32768) * 256 / (512 * 512) = 64. We put in the resulting number
|
||||
* rather than the formula because not all those definitions are public.
|
||||
*/
|
||||
#define MEMPOOL_DEFAULT_MAX_SLABS 64 /* about 17 MB of memory */
|
||||
|
||||
static int mempool_max_slabs; /* maximum number of large slabs */
|
||||
static int mempool_nr_slabs; /* current number of large slabs */
|
||||
|
||||
static int mempool_nr_large; /* current number of large buffers */
|
||||
static int mempool_used_large; /* large buffers currently in use */
|
||||
static int mempool_used_small; /* small buffers currently in use */
|
||||
|
||||
/*
|
||||
* Number of clock ticks between timer invocations. The timer is used to
|
||||
* deallocate unused slabs.
|
||||
*/
|
||||
#define MEMPOOL_TIMER_TICKS (10 * sys_hz())
|
||||
|
||||
static minix_timer_t mempool_timer;
|
||||
|
||||
static int mempool_defer_alloc; /* allocation failed, defer next try */
|
||||
|
||||
/* The CTL_MINIX MINIX_LWIP "mempool" subtree. Dynamically numbered. */
|
||||
static struct rmib_node minix_lwip_mempool_table[] = {
|
||||
RMIB_INTPTR(RMIB_RW, &mempool_max_slabs, "slab_max",
|
||||
"Maximum number of memory slabs (configurable)"),
|
||||
RMIB_INTPTR(RMIB_RO, &mempool_nr_slabs, "slab_num",
|
||||
"Current number of memory slabs"),
|
||||
RMIB_INT(RMIB_RO, sizeof(struct mempool_large_slab), "slab_size",
|
||||
"Byte size of a single memory slab"),
|
||||
RMIB_INT(RMIB_RO, MEMPOOL_LARGE_COUNT, "slab_bufs",
|
||||
"Number of large buffers per memory slab"),
|
||||
RMIB_INTPTR(RMIB_RO, &mempool_nr_large, "large_num",
|
||||
"Current total number of large buffers"),
|
||||
RMIB_INTPTR(RMIB_RO, &mempool_used_large, "large_used",
|
||||
"Current number of used large buffers"),
|
||||
RMIB_INT(RMIB_RO, MEMPOOL_LARGE_SIZE, "large_size",
|
||||
"Byte size of a single large buffer"),
|
||||
RMIB_INTPTR(RMIB_RO, &mempool_used_small, "small_used",
|
||||
"Current number of used small buffers"),
|
||||
RMIB_INT(RMIB_RO, MEMPOOL_SMALL_SIZE, "small_size",
|
||||
"Byte size of a single small buffer"),
|
||||
};
|
||||
|
||||
static struct rmib_node minix_lwip_mempool_node =
|
||||
RMIB_NODE(RMIB_RO, minix_lwip_mempool_table, "mempool",
|
||||
"Memory pool settings");
|
||||
|
||||
/*
|
||||
* Initialize the given "slab" of small buffers. The slab may either come from
|
||||
* the statically allocated pool ('is_static' is TRUE) or a single large buffer
|
||||
* that we aim to chop up into small buffers.
|
||||
*/
|
||||
static void
|
||||
mempool_prepare_small(struct mempool_small_slab * mss, int is_static)
|
||||
{
|
||||
struct mempool_small_buf *msb;
|
||||
unsigned int count;
|
||||
|
||||
mss->mss_header.mh_flags = MHF_SMALL | ((is_static) ? MHF_STATIC : 0);
|
||||
mss->mss_header.mh_inuse = 0;
|
||||
|
||||
msb = mss->mss_buf;
|
||||
|
||||
for (count = 0; count < MEMPOOL_SMALL_COUNT; count++, msb++) {
|
||||
msb->msb_header = NULL;
|
||||
msb->msb_header2 = &mss->mss_header;
|
||||
|
||||
if (is_static)
|
||||
TAILQ_INSERT_HEAD(&mempool_small_static_freelist, msb,
|
||||
msb_next);
|
||||
else
|
||||
TAILQ_INSERT_HEAD(&mempool_small_dynamic_freelist, msb,
|
||||
msb_next);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new slab for large buffers, if allowed by policy and possible.
|
||||
*/
|
||||
static void
|
||||
mempool_new_slab(void)
|
||||
{
|
||||
struct mempool_large_slab *mls;
|
||||
struct mempool_large_buf *mlb;
|
||||
unsigned int count;
|
||||
|
||||
/*
|
||||
* See if allocating a new slab would result in overrunning the
|
||||
* configured maximum number of large buffers. Round the maximum,
|
||||
* which is probably what the user intended.
|
||||
*/
|
||||
if (mempool_cur_buffers() + MEMPOOL_LARGE_COUNT / 2 >
|
||||
mempool_max_buffers()) {
|
||||
assert(mempool_nr_slabs > 0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a previous allocation failed before during this timer interval,
|
||||
* do not try again now.
|
||||
*/
|
||||
if (mempool_defer_alloc)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Allocate the slab. Preallocate the memory, or we might crash later
|
||||
* during low-memory conditions. If allocation fails, simply do
|
||||
* nothing further. The caller will check the free lists.
|
||||
*/
|
||||
mls = (struct mempool_large_slab *)mmap(NULL,
|
||||
sizeof(struct mempool_large_slab), PROT_READ | PROT_WRITE,
|
||||
MAP_ANON | MAP_PRIVATE | MAP_PREALLOC, -1, 0);
|
||||
|
||||
if (mls == MAP_FAILED) {
|
||||
if (mempool_nr_slabs == 0)
|
||||
panic("unable to allocate initial memory pool");
|
||||
|
||||
/*
|
||||
* Do not keep hammering VM with mmap requests when the system
|
||||
* is out of memory. Try again after the next timer tick.
|
||||
*/
|
||||
mempool_defer_alloc = TRUE;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Initialize the new slab. */
|
||||
mls->mls_header.mh_flags = 0;
|
||||
mls->mls_header.mh_inuse = 0;
|
||||
|
||||
mlb = mls->mls_buf;
|
||||
|
||||
LIST_INIT(&mls->mls_free);
|
||||
|
||||
for (count = 0; count < MEMPOOL_LARGE_COUNT; count++, mlb++) {
|
||||
mlb->mlb_header = NULL;
|
||||
mlb->mlb_header2 = &mls->mls_header;
|
||||
|
||||
LIST_INSERT_HEAD(&mls->mls_free, mlb, mlb_next);
|
||||
}
|
||||
|
||||
LIST_INSERT_HEAD(&mempool_empty_slabs, mls, mls_next);
|
||||
|
||||
mempool_nr_slabs++;
|
||||
mempool_nr_large += MEMPOOL_LARGE_COUNT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deallocate a slab for large buffers, if allowed.
|
||||
*/
|
||||
static void
|
||||
mempool_destroy_slab(struct mempool_large_slab * mls)
|
||||
{
|
||||
|
||||
assert(mempool_nr_slabs > 0);
|
||||
|
||||
assert(!(mls->mls_header.mh_flags & MHF_SMALL));
|
||||
assert(mls->mls_header.mh_inuse == 0);
|
||||
|
||||
/* Never deallocate the last large slab. */
|
||||
if (mempool_nr_slabs == 1)
|
||||
return;
|
||||
|
||||
LIST_REMOVE(mls, mls_next);
|
||||
|
||||
if (munmap(mls, sizeof(*mls)) != 0)
|
||||
panic("munmap failed: %d", -errno);
|
||||
|
||||
assert(mempool_nr_large > MEMPOOL_LARGE_COUNT);
|
||||
mempool_nr_large -= MEMPOOL_LARGE_COUNT;
|
||||
mempool_nr_slabs--;
|
||||
}
|
||||
|
||||
/*
|
||||
* Regular timer. Deallocate empty slabs already marked for deallocation, and
|
||||
* mark any other empty slabs for deallocation.
|
||||
*/
|
||||
static void
|
||||
mempool_tick(int arg __unused)
|
||||
{
|
||||
struct mempool_large_slab *mls, *tmls;
|
||||
|
||||
/*
|
||||
* Go through all the empty slabs, destroying marked slabs and marking
|
||||
* unmarked slabs.
|
||||
*/
|
||||
LIST_FOREACH_SAFE(mls, &mempool_empty_slabs, mls_next, tmls) {
|
||||
if (mls->mls_header.mh_flags & MHF_MARKED)
|
||||
mempool_destroy_slab(mls);
|
||||
else
|
||||
mls->mls_header.mh_flags |= MHF_MARKED;
|
||||
}
|
||||
|
||||
/*
|
||||
* If allocation failed during the last interval, allow a new attempt
|
||||
* during the next.
|
||||
*/
|
||||
mempool_defer_alloc = FALSE;
|
||||
|
||||
/* Set the next timer. */
|
||||
set_timer(&mempool_timer, MEMPOOL_TIMER_TICKS, mempool_tick, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the memory pool module.
|
||||
*/
|
||||
void
|
||||
mempool_init(void)
|
||||
{
|
||||
unsigned int slot;
|
||||
|
||||
/* These checks are for absolutely essential points. */
|
||||
assert(sizeof(void *) == MEM_ALIGNMENT);
|
||||
assert(sizeof(struct mempool_small_slab) <= MEMPOOL_LARGE_SIZE);
|
||||
assert(offsetof(struct mempool_small_buf, msb_data) == sizeof(void *));
|
||||
assert(offsetof(struct mempool_large_buf, mlb_data) == sizeof(void *));
|
||||
|
||||
/* Initialize module-local variables. */
|
||||
LIST_INIT(&mempool_empty_slabs);
|
||||
LIST_INIT(&mempool_partial_slabs);
|
||||
LIST_INIT(&mempool_full_slabs);
|
||||
|
||||
TAILQ_INIT(&mempool_small_static_freelist);
|
||||
TAILQ_INIT(&mempool_small_dynamic_freelist);
|
||||
|
||||
mempool_max_slabs = MEMPOOL_DEFAULT_MAX_SLABS;
|
||||
mempool_nr_slabs = 0;
|
||||
|
||||
mempool_nr_large = 0;
|
||||
mempool_used_large = 0;
|
||||
mempool_used_small = 0;
|
||||
|
||||
mempool_defer_alloc = FALSE;
|
||||
|
||||
/* Initialize the static pool of small buffers. */
|
||||
for (slot = 0; slot < __arraycount(mempool_small_pool); slot++)
|
||||
mempool_prepare_small(&mempool_small_pool[slot],
|
||||
TRUE /*is_static*/);
|
||||
|
||||
/*
|
||||
* Allocate one large slab. The service needs at least one large slab
|
||||
* for basic operation, and therefore will never deallocate the last.
|
||||
*/
|
||||
mempool_new_slab();
|
||||
|
||||
/* Set a regular low-frequency timer to deallocate unused slabs. */
|
||||
set_timer(&mempool_timer, MEMPOOL_TIMER_TICKS, mempool_tick, 0);
|
||||
|
||||
/* Register the minix.lwip.mempool subtree. */
|
||||
mibtree_register_lwip(&minix_lwip_mempool_node);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the total number of large buffers currently in the system, regardless
|
||||
* of allocation status.
|
||||
*/
|
||||
unsigned int
|
||||
mempool_cur_buffers(void)
|
||||
{
|
||||
|
||||
return mempool_nr_large;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the maximum number of large buffers that the system has been allowed
|
||||
* to allocate. Note that due to low-memory conditions, this maximum may not
|
||||
* be allocated in practice even when desired.
|
||||
*/
|
||||
unsigned int
|
||||
mempool_max_buffers(void)
|
||||
{
|
||||
|
||||
if (mempool_max_slabs <= 1)
|
||||
return MEMPOOL_LARGE_COUNT;
|
||||
|
||||
if ((size_t)mempool_max_slabs >
|
||||
INT_MAX / sizeof(struct mempool_large_slab))
|
||||
return INT_MAX / sizeof(struct mempool_large_slab);
|
||||
|
||||
return (size_t)mempool_max_slabs * MEMPOOL_LARGE_COUNT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a large buffer, either by taking one off a free list or by
|
||||
* allocating a new large slab. On success, return a pointer to the data area
|
||||
* of the large buffer. This data area is exactly MEMPOOL_LARGE_SIZE bytes in
|
||||
* size. If no large buffer could be allocated, return NULL.
|
||||
*/
|
||||
static void *
|
||||
mempool_alloc_large(void)
|
||||
{
|
||||
struct mempool_large_slab *mls;
|
||||
struct mempool_large_buf *mlb;
|
||||
|
||||
/*
|
||||
* Find a large slab that has free large blocks. As is standard for
|
||||
* slab allocation, favor partially used slabs over empty slabs for
|
||||
* eventual consolidation. If both lists are empty, try allocating a
|
||||
* new slab. If that fails, we are out of memory, and return NULL.
|
||||
*/
|
||||
if (!LIST_EMPTY(&mempool_partial_slabs))
|
||||
mls = LIST_FIRST(&mempool_partial_slabs);
|
||||
else {
|
||||
if (LIST_EMPTY(&mempool_empty_slabs)) {
|
||||
mempool_new_slab();
|
||||
|
||||
if (LIST_EMPTY(&mempool_empty_slabs))
|
||||
return NULL; /* out of memory */
|
||||
}
|
||||
|
||||
mls = LIST_FIRST(&mempool_empty_slabs);
|
||||
}
|
||||
|
||||
/* Allocate a block from the slab that we picked. */
|
||||
assert(mls != NULL);
|
||||
assert(!LIST_EMPTY(&mls->mls_free));
|
||||
|
||||
mlb = LIST_FIRST(&mls->mls_free);
|
||||
LIST_REMOVE(mlb, mlb_next);
|
||||
|
||||
assert(mlb->mlb_header == NULL);
|
||||
assert(mlb->mlb_header2 == &mls->mls_header);
|
||||
|
||||
mlb->mlb_header = &mls->mls_header;
|
||||
|
||||
/*
|
||||
* Adjust accounting for the large slab, which may involve moving it
|
||||
* to another list.
|
||||
*/
|
||||
assert(mls->mls_header.mh_inuse < MEMPOOL_LARGE_COUNT);
|
||||
mls->mls_header.mh_inuse++;
|
||||
|
||||
if (mls->mls_header.mh_inuse == MEMPOOL_LARGE_COUNT) {
|
||||
LIST_REMOVE(mls, mls_next);
|
||||
|
||||
LIST_INSERT_HEAD(&mempool_full_slabs, mls, mls_next);
|
||||
} else if (mls->mls_header.mh_inuse == 1) {
|
||||
LIST_REMOVE(mls, mls_next);
|
||||
|
||||
LIST_INSERT_HEAD(&mempool_partial_slabs, mls, mls_next);
|
||||
}
|
||||
|
||||
assert(mempool_used_large < mempool_nr_large);
|
||||
mempool_used_large++;
|
||||
|
||||
/* Return the block's data area. */
|
||||
return (void *)mlb->mlb_data;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a small buffer, either by taking one off a free list or by
|
||||
* allocating a large buffer and splitting it up in new free small buffers. On
|
||||
* success, return a pointer to the data area of the small buffer. This data
|
||||
* area is exactly MEMPOOL_SMALL_SIZE bytes in size. If no small buffer could
|
||||
* be allocated, return NULL.
|
||||
*/
|
||||
static void *
|
||||
mempool_alloc_small(void)
|
||||
{
|
||||
struct mempool_small_slab *mss;
|
||||
struct mempool_small_buf *msb;
|
||||
struct mempool_header *mh;
|
||||
|
||||
/*
|
||||
* Find a free small block and take it off the free list. Try the
|
||||
* static free list before the dynamic one, so that after a peak in
|
||||
* buffer usage we are likely to be able to free up the dynamic slabs
|
||||
* quickly. If both lists are empty, try allocating a large block to
|
||||
* divvy up into small blocks. If that fails, we are out of memory.
|
||||
*/
|
||||
if (!TAILQ_EMPTY(&mempool_small_static_freelist)) {
|
||||
msb = TAILQ_FIRST(&mempool_small_static_freelist);
|
||||
|
||||
TAILQ_REMOVE(&mempool_small_static_freelist, msb, msb_next);
|
||||
} else {
|
||||
if (TAILQ_EMPTY(&mempool_small_dynamic_freelist)) {
|
||||
mss =
|
||||
(struct mempool_small_slab *)mempool_alloc_large();
|
||||
|
||||
if (mss == NULL)
|
||||
return NULL; /* out of memory */
|
||||
|
||||
/* Initialize the small slab, including its blocks. */
|
||||
mempool_prepare_small(mss, FALSE /*is_static*/);
|
||||
}
|
||||
|
||||
msb = TAILQ_FIRST(&mempool_small_dynamic_freelist);
|
||||
assert(msb != NULL);
|
||||
|
||||
TAILQ_REMOVE(&mempool_small_dynamic_freelist, msb, msb_next);
|
||||
}
|
||||
|
||||
/* Mark the small block as allocated, and return its data area. */
|
||||
assert(msb != NULL);
|
||||
|
||||
assert(msb->msb_header == NULL);
|
||||
assert(msb->msb_header2 != NULL);
|
||||
|
||||
mh = msb->msb_header2;
|
||||
msb->msb_header = mh;
|
||||
|
||||
assert(mh->mh_inuse < MEMPOOL_SMALL_COUNT);
|
||||
mh->mh_inuse++;
|
||||
|
||||
mempool_used_small++;
|
||||
|
||||
return (void *)msb->msb_data;
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory pool wrapper function for malloc() calls from lwIP.
|
||||
*/
|
||||
void *
|
||||
mempool_malloc(size_t size)
|
||||
{
|
||||
|
||||
/*
|
||||
* It is currently expected that there will be allocation attempts for
|
||||
* sizes larger than our large size, in particular for ICMP ping
|
||||
* replies as described elsewhere. As such, we cannot print any
|
||||
* warnings here. For now, refusing these excessive allocations should
|
||||
* not be a problem in practice.
|
||||
*/
|
||||
if (size > MEMPOOL_LARGE_SIZE)
|
||||
return NULL;
|
||||
|
||||
if (size <= MEMPOOL_SMALL_SIZE)
|
||||
return mempool_alloc_small();
|
||||
else
|
||||
return mempool_alloc_large();
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory pool wrapper function for free() calls from lwIP.
|
||||
*/
|
||||
void
|
||||
mempool_free(void * ptr)
|
||||
{
|
||||
struct mempool_large_slab *mls;
|
||||
struct mempool_large_buf *mlb;
|
||||
struct mempool_small_slab *mss;
|
||||
struct mempool_small_buf *msb;
|
||||
struct mempool_header *mh;
|
||||
unsigned int count;
|
||||
|
||||
/*
|
||||
* Get a pointer to the slab header, which is right before the data
|
||||
* area for both large and small buffers. This pointer is NULL if the
|
||||
* buffer is free, which would indicate that something is very wrong.
|
||||
*/
|
||||
ptr = (void *)((char *)ptr - sizeof(mh));
|
||||
|
||||
memcpy(&mh, ptr, sizeof(mh));
|
||||
|
||||
if (mh == NULL)
|
||||
panic("mempool_free called on unallocated object!");
|
||||
|
||||
/*
|
||||
* If the slab header says that the slab is for small buffers, deal
|
||||
* with that case first. If we free up the last small buffer of a
|
||||
* dynamically allocated small slab, we also free up the entire small
|
||||
* slab, which is in fact the data area of a large buffer.
|
||||
*/
|
||||
if (mh->mh_flags & MHF_SMALL) {
|
||||
/*
|
||||
* Move the small buffer onto the appropriate small free list.
|
||||
*/
|
||||
msb = (struct mempool_small_buf *)ptr;
|
||||
|
||||
msb->msb_header2 = mh;
|
||||
msb->msb_header = NULL;
|
||||
|
||||
/*
|
||||
* Simple heuristic, unless the buffer is static: favor reuse
|
||||
* of small buffers in containers that are already in use
|
||||
* for other small buffers as well, for consolidation.
|
||||
*/
|
||||
if (mh->mh_flags & MHF_STATIC)
|
||||
TAILQ_INSERT_HEAD(&mempool_small_static_freelist, msb,
|
||||
msb_next);
|
||||
else if (mh->mh_inuse > 1)
|
||||
TAILQ_INSERT_HEAD(&mempool_small_dynamic_freelist, msb,
|
||||
msb_next);
|
||||
else
|
||||
TAILQ_INSERT_TAIL(&mempool_small_dynamic_freelist, msb,
|
||||
msb_next);
|
||||
|
||||
assert(mh->mh_inuse > 0);
|
||||
mh->mh_inuse--;
|
||||
|
||||
assert(mempool_used_small > 0);
|
||||
mempool_used_small--;
|
||||
|
||||
/*
|
||||
* If the small buffer is statically allocated, or it was not
|
||||
* the last allocated small buffer in its containing large
|
||||
* buffer, then we are done.
|
||||
*/
|
||||
if (mh->mh_inuse > 0 || (mh->mh_flags & MHF_STATIC))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Otherwise, free the containing large buffer as well. First,
|
||||
* remove all its small buffers from the free list.
|
||||
*/
|
||||
mss = (struct mempool_small_slab *)mh;
|
||||
msb = mss->mss_buf;
|
||||
|
||||
for (count = 0; count < MEMPOOL_SMALL_COUNT; count++, msb++) {
|
||||
assert(msb->msb_header == NULL);
|
||||
assert(msb->msb_header2 == mh);
|
||||
|
||||
TAILQ_REMOVE(&mempool_small_dynamic_freelist, msb,
|
||||
msb_next);
|
||||
}
|
||||
|
||||
/* Then, fall through to the large-buffer free code. */
|
||||
ptr = (void *)((char *)mh - sizeof(mh));
|
||||
|
||||
memcpy(&mh, ptr, sizeof(mh));
|
||||
|
||||
assert(mh != NULL);
|
||||
assert(!(mh->mh_flags & MHF_SMALL));
|
||||
}
|
||||
|
||||
/*
|
||||
* Move the large buffer onto the free list of the large slab to which
|
||||
* it belongs.
|
||||
*/
|
||||
mls = (struct mempool_large_slab *)mh;
|
||||
mlb = (struct mempool_large_buf *)ptr;
|
||||
|
||||
mlb->mlb_header2 = &mls->mls_header;
|
||||
mlb->mlb_header = NULL;
|
||||
|
||||
LIST_INSERT_HEAD(&mls->mls_free, mlb, mlb_next);
|
||||
|
||||
/*
|
||||
* Adjust accounting for the large slab, which may involve moving it
|
||||
* to another list.
|
||||
*/
|
||||
assert(mls->mls_header.mh_inuse > 0);
|
||||
mls->mls_header.mh_inuse--;
|
||||
|
||||
if (mls->mls_header.mh_inuse == 0) {
|
||||
LIST_REMOVE(mls, mls_next);
|
||||
|
||||
LIST_INSERT_HEAD(&mempool_empty_slabs, mls, mls_next);
|
||||
|
||||
mls->mls_header.mh_flags &= ~MHF_MARKED;
|
||||
} else if (mls->mls_header.mh_inuse == MEMPOOL_LARGE_COUNT - 1) {
|
||||
LIST_REMOVE(mls, mls_next);
|
||||
|
||||
LIST_INSERT_HEAD(&mempool_partial_slabs, mls, mls_next);
|
||||
}
|
||||
|
||||
assert(mempool_used_large > 0);
|
||||
mempool_used_large--;
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory pool wrapper function for calloc() calls from lwIP.
|
||||
*/
|
||||
void *
|
||||
mempool_calloc(size_t num, size_t size)
|
||||
{
|
||||
void *ptr;
|
||||
size_t total;
|
||||
|
||||
/*
|
||||
* Standard overflow check. This can be improved, but it doesn't have
|
||||
* to be, because in practice lwIP never calls calloc() anyway.
|
||||
*/
|
||||
if (num > 0 && size > 0 && (size_t)-1 / size < num)
|
||||
return NULL;
|
||||
|
||||
total = num * size;
|
||||
|
||||
if ((ptr = mempool_malloc(total)) == NULL)
|
||||
return NULL;
|
||||
|
||||
memset(ptr, 0, total);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
141
minix/net/lwip/mibtree.c
Normal file
141
minix/net/lwip/mibtree.c
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
/* LWIP service - mibtree.c - sysctl support for */
|
||||
/*
|
||||
* This file acts as a dispatcher for the net.inet, net.inet6, and minix.lwip
|
||||
* sysctl trees. It does not cover the other net.* trees; these are taken care
|
||||
* of in other source files.
|
||||
*/
|
||||
|
||||
#include "lwip.h"
|
||||
|
||||
#include <minix/sysctl.h>
|
||||
|
||||
#define MAX_PROTO 6 /* maximum # of INET protocols with subtrees */
|
||||
|
||||
static struct rmib_indir net_inet_indir[MAX_PROTO];
|
||||
static unsigned int net_inet_indir_count = 0;
|
||||
static struct rmib_node net_inet_node =
|
||||
RMIB_SNODE(RMIB_RO, net_inet_indir, "inet", "PF_INET related settings");
|
||||
|
||||
#ifdef INET6
|
||||
static struct rmib_indir net_inet6_indir[MAX_PROTO];
|
||||
static unsigned int net_inet6_indir_count = 0;
|
||||
static struct rmib_node net_inet6_node =
|
||||
RMIB_SNODE(RMIB_RO, net_inet6_indir, "inet6", "PF_INET6 related settings");
|
||||
#endif /* INET6 */
|
||||
|
||||
#define MAX_LWIP 4 /* maximum # of miscellaneous LWIP subtrees */
|
||||
|
||||
static struct rmib_indir minix_lwip_indir[MAX_LWIP];
|
||||
static unsigned int minix_lwip_indir_count = 0;
|
||||
static struct rmib_node minix_lwip_node =
|
||||
RMIB_SNODE(RMIB_RO, minix_lwip_indir, "lwip",
|
||||
"LWIP service information and settings");
|
||||
|
||||
/*
|
||||
* Initialize the status module by registering the net.inet, net.inet6, and
|
||||
* minix.lwip trees with the MIB service. Other modules must have added all
|
||||
* subtrees to those trees through mibtree_register_*() before this point.
|
||||
*/
|
||||
void
|
||||
mibtree_init(void)
|
||||
{
|
||||
const int inet_mib[] = { CTL_NET, PF_INET };
|
||||
#ifdef INET6
|
||||
const int inet6_mib[] = { CTL_NET, PF_INET6 };
|
||||
#endif /* INET6 */
|
||||
const int lwip_mib[] = { CTL_MINIX, MINIX_LWIP };
|
||||
int r;
|
||||
|
||||
/*
|
||||
* Register the "net.inet", "net.inet6", and "minix.lwip" subtrees with
|
||||
* the MIB service.
|
||||
*
|
||||
* These calls only return local failures. Remote failures (in the MIB
|
||||
* service) are silently ignored. So, we can safely panic on failure.
|
||||
*/
|
||||
if ((r = rmib_register(inet_mib, __arraycount(inet_mib),
|
||||
&net_inet_node)) != OK)
|
||||
panic("unable to register net.inet RMIB tree: %d", r);
|
||||
|
||||
#ifdef INET6
|
||||
if ((r = rmib_register(inet6_mib, __arraycount(inet6_mib),
|
||||
&net_inet6_node)) != OK)
|
||||
panic("unable to register net.inet6 RMIB tree: %d", r);
|
||||
#endif /* INET6 */
|
||||
|
||||
if ((r = rmib_register(lwip_mib, __arraycount(lwip_mib),
|
||||
&minix_lwip_node)) != OK)
|
||||
panic("unable to register minix.lwip RMIB tree: %d", r);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a subtree to the local net.inet or net.inet6 tree. This function must
|
||||
* only be called *before* mibtree_init(), as the latter will register the
|
||||
* final tree with the MIB service.
|
||||
*/
|
||||
void
|
||||
mibtree_register_inet(int domain, int protocol, struct rmib_node * node)
|
||||
{
|
||||
struct rmib_node *parent;
|
||||
struct rmib_indir *indir;
|
||||
unsigned int i, *count;
|
||||
|
||||
switch (domain) {
|
||||
case PF_INET:
|
||||
parent = &net_inet_node;
|
||||
indir = net_inet_indir;
|
||||
count = &net_inet_indir_count;
|
||||
break;
|
||||
case PF_INET6:
|
||||
#ifdef INET6
|
||||
parent = &net_inet6_node;
|
||||
indir = net_inet6_indir;
|
||||
count = &net_inet6_indir_count;
|
||||
break;
|
||||
#else /* !INET6 */
|
||||
return;
|
||||
#endif /* !INET6 */
|
||||
default:
|
||||
panic("invalid domain %d", domain);
|
||||
}
|
||||
|
||||
assert(*count < MAX_PROTO);
|
||||
|
||||
/* Insertion sort. */
|
||||
for (i = 0; i < *count; i++) {
|
||||
assert(indir[i].rindir_id != (unsigned int)protocol);
|
||||
|
||||
if (indir[i].rindir_id > (unsigned int)protocol)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i < *count)
|
||||
memmove(&indir[i + 1], &indir[i],
|
||||
sizeof(indir[0]) * (*count - i));
|
||||
|
||||
indir[i].rindir_id = protocol;
|
||||
indir[i].rindir_node = node;
|
||||
parent->rnode_size = ++*count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a miscellaneous subtree to the local minix.lwip tree. This function
|
||||
* must only be called *before* mibtree_init(), as the latter will register the
|
||||
* final tree with the MIB service. Note that the given subtrees are numbered
|
||||
* arbitrarily. We use sparse trees here only to avoid having to declare
|
||||
* external variables, which is a bit of a hack, but with the expected low
|
||||
* number of miscellaneous subtrees there will be no performance penalty.
|
||||
*/
|
||||
void
|
||||
mibtree_register_lwip(struct rmib_node * node)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
i = minix_lwip_indir_count;
|
||||
|
||||
assert(i < __arraycount(minix_lwip_indir));
|
||||
|
||||
minix_lwip_indir[i].rindir_id = i;
|
||||
minix_lwip_indir[i].rindir_node = node;
|
||||
minix_lwip_node.rnode_size = ++minix_lwip_indir_count;
|
||||
}
|
||||
1019
minix/net/lwip/ndev.c
Normal file
1019
minix/net/lwip/ndev.c
Normal file
File diff suppressed because it is too large
Load Diff
33
minix/net/lwip/ndev.h
Normal file
33
minix/net/lwip/ndev.h
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
#ifndef MINIX_NET_LWIP_NDEV_H
|
||||
#define MINIX_NET_LWIP_NDEV_H
|
||||
|
||||
/* The maximum supported number of network device drivers. */
|
||||
#define NR_NDEV 8
|
||||
|
||||
typedef uint32_t ndev_id_t;
|
||||
|
||||
struct ndev_hwaddr {
|
||||
uint8_t nhwa_addr[NDEV_HWADDR_MAX];
|
||||
};
|
||||
|
||||
struct ndev_conf {
|
||||
uint32_t nconf_set; /* fields to set (NDEV_SET_) */
|
||||
uint32_t nconf_mode; /* desired mode (NDEV_MODE_) */
|
||||
struct ndev_hwaddr *nconf_mclist; /* multicast list pointer */
|
||||
size_t nconf_mccount; /* multicast list count */
|
||||
uint32_t nconf_caps; /* capabilities (NDEV_CAP_) */
|
||||
uint32_t nconf_flags; /* flags to set (NDEV_FLAG_) */
|
||||
uint32_t nconf_media; /* media selection (IFM_) */
|
||||
struct ndev_hwaddr nconf_hwaddr; /* desired hardware address */
|
||||
};
|
||||
|
||||
void ndev_init(void);
|
||||
void ndev_check(void);
|
||||
void ndev_process(const message * m_ptr, int ipc_status);
|
||||
|
||||
int ndev_conf(ndev_id_t id, const struct ndev_conf * nconf);
|
||||
int ndev_send(ndev_id_t id, const struct pbuf * pbuf);
|
||||
int ndev_can_recv(ndev_id_t id);
|
||||
int ndev_recv(ndev_id_t id, struct pbuf * pbuf);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_NDEV_H */
|
||||
154
minix/net/lwip/pchain.c
Normal file
154
minix/net/lwip/pchain.c
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
/* LWIP service - pchain.c - pbuf chain utility functions */
|
||||
|
||||
#include "lwip.h"
|
||||
|
||||
/*
|
||||
* Allocate a chain of pbuf buffers as though it were a PBUF_POOL allocation,
|
||||
* except that each buffer is of type PBUF_RAM. Return the pbuf chain on
|
||||
* success, or NULL on memory allocation failure.
|
||||
*/
|
||||
struct pbuf *
|
||||
pchain_alloc(int layer, size_t size)
|
||||
{
|
||||
struct pbuf *pbuf, *phead, **pnext;
|
||||
size_t chunk, left;
|
||||
int offset = 0;
|
||||
|
||||
/*
|
||||
* Check for length overflow. Note that we do this before prepending
|
||||
* the header, because otherwise we could never send a full-sized
|
||||
* (65535-byte) IP packet. This does mean that we are generating a
|
||||
* pbuf chain that has over 64KB worth of allocated space, but our
|
||||
* header hiding ensures that tot_len stays under 64KB. A check in
|
||||
* pbuf_header() prevents that later header adjustments end up lifting
|
||||
* tot_len over this limit.
|
||||
*/
|
||||
if (size > UINT16_MAX)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Unfortunately, we have no choice but to replicate this block from
|
||||
* lwIP's pbuf_alloc() code. It is however unlikely that the offsets
|
||||
* change for the currently supported layer types, and we do not need
|
||||
* to support any layer types that we do not use ourselves.
|
||||
*/
|
||||
switch (layer) {
|
||||
case PBUF_TRANSPORT:
|
||||
offset = PBUF_LINK_ENCAPSULATION_HLEN + PBUF_LINK_HLEN +
|
||||
PBUF_IP_HLEN + PBUF_TRANSPORT_HLEN;
|
||||
break;
|
||||
case PBUF_IP:
|
||||
offset = PBUF_LINK_ENCAPSULATION_HLEN + PBUF_LINK_HLEN +
|
||||
PBUF_IP_HLEN;
|
||||
break;
|
||||
case PBUF_LINK:
|
||||
offset = PBUF_LINK_ENCAPSULATION_HLEN + PBUF_LINK_HLEN;
|
||||
break;
|
||||
case PBUF_RAW_TX:
|
||||
offset = PBUF_LINK_ENCAPSULATION_HLEN;
|
||||
break;
|
||||
case PBUF_RAW:
|
||||
offset = 0;
|
||||
break;
|
||||
default:
|
||||
panic("invalid pbuf layer: %d", layer);
|
||||
}
|
||||
|
||||
chunk = size + offset;
|
||||
if (chunk > MEMPOOL_BUFSIZE)
|
||||
chunk = MEMPOOL_BUFSIZE;
|
||||
|
||||
if ((phead = pbuf_alloc(PBUF_RAW, chunk, PBUF_RAM)) == NULL)
|
||||
return NULL;
|
||||
|
||||
if (offset > 0)
|
||||
util_pbuf_header(phead, -offset);
|
||||
|
||||
phead->tot_len = size;
|
||||
|
||||
pnext = &phead->next;
|
||||
|
||||
for (left = size - (chunk - offset); left > 0; left -= chunk) {
|
||||
chunk = (left < MEMPOOL_BUFSIZE) ? left : MEMPOOL_BUFSIZE;
|
||||
|
||||
if ((pbuf = pbuf_alloc(PBUF_RAW, chunk, PBUF_RAM)) == NULL) {
|
||||
/*
|
||||
* Adjust tot_len to match the actual length of the
|
||||
* chain so far, just in case pbuf_free() starts caring
|
||||
* about this in the future.
|
||||
*/
|
||||
for (pbuf = phead; pbuf != NULL; pbuf = pbuf->next)
|
||||
pbuf->tot_len -= left;
|
||||
|
||||
pbuf_free(phead);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pbuf->tot_len = left;
|
||||
|
||||
*pnext = pbuf;
|
||||
pnext = &pbuf->next;
|
||||
}
|
||||
|
||||
return phead;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given the (non-empty) chain of buffers 'pbuf', return a pointer to the
|
||||
* 'next' field of the last buffer in the chain. This function is packet queue
|
||||
* friendly. A packet queue is a queue of packet chains, where each chain is
|
||||
* delimited using the 'tot_len' field. As a result, while the pointer
|
||||
* returned is never NULL, the value pointed to by the returned pointer may or
|
||||
* may not be NULL (and will point to the next chain if not NULL). As notable
|
||||
* exception, in cases where the buffer type is a single PBUF_REF, 'tot_len'
|
||||
* may be zero and 'len' may be non-zero. In such cases, the chain consists of
|
||||
* that single buffer only. This function must handle that case as well.
|
||||
*/
|
||||
struct pbuf **
|
||||
pchain_end(struct pbuf * pbuf)
|
||||
{
|
||||
|
||||
assert(pbuf != NULL);
|
||||
|
||||
while (pbuf->tot_len > pbuf->len) {
|
||||
pbuf = pbuf->next;
|
||||
|
||||
assert(pbuf != NULL);
|
||||
}
|
||||
|
||||
return &pbuf->next;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given the (non-empty) chain of buffers 'pbuf', return a byte size estimation
|
||||
* of the memory used by the chain, rounded up to pool buffer sizes. This
|
||||
* function is packet queue friendly.
|
||||
*/
|
||||
size_t
|
||||
pchain_size(struct pbuf * pbuf)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
assert(pbuf != NULL);
|
||||
|
||||
/*
|
||||
* Count the first buffer separately, as its length may be seriously
|
||||
* off due to header hiding. While the caller should always provide
|
||||
* exactly the same pbuf chain twice if it intends to get back the same
|
||||
* size twice, this also protects against accidental size differences
|
||||
* due to header hiding in that case.
|
||||
*/
|
||||
size = MEMPOOL_BUFSIZE;
|
||||
|
||||
/*
|
||||
* Round up the size of the rest of the chain to whole buffers.
|
||||
*/
|
||||
if (pbuf->tot_len > pbuf->len) {
|
||||
size += pbuf->tot_len - pbuf->len + MEMPOOL_BUFSIZE - 1;
|
||||
|
||||
size -= size % MEMPOOL_BUFSIZE;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
1236
minix/net/lwip/pktsock.c
Normal file
1236
minix/net/lwip/pktsock.c
Normal file
File diff suppressed because it is too large
Load Diff
63
minix/net/lwip/pktsock.h
Normal file
63
minix/net/lwip/pktsock.h
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
#ifndef MINIX_NET_LWIP_PKTSOCK_H
|
||||
#define MINIX_NET_LWIP_PKTSOCK_H
|
||||
|
||||
#include "mcast.h"
|
||||
|
||||
/* Packet-level socket, shared by UDP and RAW. */
|
||||
struct pktsock {
|
||||
struct ipsock pkt_ipsock; /* IP socket object, MUST be first */
|
||||
struct pbuf *pkt_rcvhead; /* receive buffer, first packet */
|
||||
struct pbuf **pkt_rcvtailp; /* receive buffer, last ptr-ptr */
|
||||
size_t pkt_rcvlen; /* receive buffer, length in bytes */
|
||||
struct mcast_head pkt_mcast; /* multicast membership list */
|
||||
ip6_addr_p_t pkt_srcaddr; /* IPV6_PKTINFO: source address */
|
||||
uint32_t pkt_ifindex; /* IPV6_KPTINFO: interface index */
|
||||
};
|
||||
|
||||
#define pktsock_get_ipsock(pkt) (&(pkt)->pkt_ipsock)
|
||||
#define pktsock_get_ifindex(pkt) ((pkt)->pkt_ifindex)
|
||||
|
||||
/* Options when sending packets. */
|
||||
struct pktopt {
|
||||
uint8_t pkto_flags; /* packet send flags (PKTOF_) */
|
||||
uint8_t pkto_tos; /* type of service for the packet */
|
||||
uint8_t pkto_ttl; /* time-to-live for the packet */
|
||||
uint8_t pkto_mcast_ttl; /* time-to-live for multicast packet */
|
||||
ip6_addr_p_t pkto_srcaddr; /* IPV6_PKTINFO: source address */
|
||||
unsigned int pkto_ifindex; /* IPV6_PKTINFO: interface index */
|
||||
};
|
||||
|
||||
#define PKTOF_TTL 0x01 /* send packet with custom TTL value */
|
||||
#define PKTOF_TOS 0x02 /* send packet with custom TOS value */
|
||||
#define PKTOF_PKTINFO 0x04 /* send packet with src addr, on if. */
|
||||
|
||||
int pktsock_socket(struct pktsock * pkt, int domain, size_t sndbuf,
|
||||
size_t rcvbuf, struct sock ** sockp);
|
||||
int pktsock_test_input(struct pktsock * pkt, struct pbuf * pbuf);
|
||||
void pktsock_input(struct pktsock * pkt, struct pbuf * pbuf,
|
||||
const ip_addr_t * srcaddr, uint16_t port);
|
||||
int pktsock_get_pktinfo(struct pktsock * pkt, struct pktopt * pkto,
|
||||
struct ifdev ** ifdevp, ip_addr_t * src_addrp);
|
||||
int pktsock_get_ctl(struct pktsock * pkt, const struct sockdriver_data * ctl,
|
||||
socklen_t ctl_len, struct pktopt * pkto);
|
||||
int pktsock_get_data(struct pktsock * pkt, const struct sockdriver_data * data,
|
||||
size_t len, struct pbuf * pbuf);
|
||||
int pktsock_pre_recv(struct sock * sock, endpoint_t user_endpt, int flags);
|
||||
int pktsock_recv(struct sock * sock, const struct sockdriver_data * data,
|
||||
size_t len, size_t * off, const struct sockdriver_data * ctl,
|
||||
socklen_t ctl_len, socklen_t * ctl_off, struct sockaddr * addr,
|
||||
socklen_t * addr_len, endpoint_t user_endpt, int flags, size_t min,
|
||||
int * rflags);
|
||||
int pktsock_test_recv(struct sock * sock, size_t min, size_t * size);
|
||||
void pktsock_set_mcaware(struct pktsock * pkt);
|
||||
int pktsock_setsockopt(struct pktsock * pkt, int level, int name,
|
||||
const struct sockdriver_data * data, socklen_t len,
|
||||
struct ipopts * ipopts);
|
||||
int pktsock_getsockopt(struct pktsock * pkt, int level, int name,
|
||||
const struct sockdriver_data * data, socklen_t * len,
|
||||
struct ipopts * ipopts);
|
||||
void pktsock_shutdown(struct pktsock * pkt, unsigned int mask);
|
||||
void pktsock_close(struct pktsock * pkt);
|
||||
size_t pktsock_get_recvlen(struct pktsock * pkt);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_PKTSOCK_H */
|
||||
1341
minix/net/lwip/rawsock.c
Normal file
1341
minix/net/lwip/rawsock.c
Normal file
File diff suppressed because it is too large
Load Diff
1654
minix/net/lwip/route.c
Normal file
1654
minix/net/lwip/route.c
Normal file
File diff suppressed because it is too large
Load Diff
39
minix/net/lwip/route.h
Normal file
39
minix/net/lwip/route.h
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#ifndef MINIX_NET_LWIP_ROUTE_H
|
||||
#define MINIX_NET_LWIP_ROUTE_H
|
||||
|
||||
#include <net/route.h>
|
||||
|
||||
struct route_entry;
|
||||
struct rtsock_request;
|
||||
|
||||
void route_init(void);
|
||||
int route_add(const ip_addr_t * addr, unsigned int prefix,
|
||||
const ip_addr_t * gateway, struct ifdev * ifdev, unsigned int flags,
|
||||
const struct rtsock_request * rtr);
|
||||
int route_can_add(const ip_addr_t * addr, unsigned int prefix, int is_host);
|
||||
struct route_entry *route_find(const ip_addr_t * addr, unsigned int prefix,
|
||||
int is_host);
|
||||
struct route_entry *route_lookup(const ip_addr_t * addr);
|
||||
void route_delete(struct route_entry * route,
|
||||
const struct rtsock_request * rtr);
|
||||
void route_clear(struct ifdev * ifdev);
|
||||
int route_process(unsigned int type, const struct sockaddr * dst,
|
||||
const struct sockaddr * mask, const struct sockaddr * gateway,
|
||||
const struct sockaddr * ifp, const struct sockaddr * ifa,
|
||||
unsigned int flags, unsigned long inits,
|
||||
const struct rt_metrics * rmx, const struct rtsock_request * rtr);
|
||||
void route_get(const struct route_entry * route, union sockaddr_any * addr,
|
||||
union sockaddr_any * mask, union sockaddr_any * gateway,
|
||||
union sockaddr_any * ifp, union sockaddr_any * ifa,
|
||||
struct ifdev ** ifdev, unsigned int * flags, unsigned int * use);
|
||||
unsigned int route_get_flags(const struct route_entry * route);
|
||||
struct ifdev *route_get_ifdev(const struct route_entry * route);
|
||||
int route_is_ipv6(const struct route_entry * route);
|
||||
struct route_entry *route_enum_v4(struct route_entry * last);
|
||||
struct route_entry *route_enum_v6(struct route_entry * last);
|
||||
int route_output_v4(struct ifdev * ifdev, const ip4_addr_t * ipaddr,
|
||||
err_t * err);
|
||||
int route_output_v6(struct ifdev * ifdev, const ip6_addr_t * ipaddr,
|
||||
err_t * err);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_ROUTE_H */
|
||||
1912
minix/net/lwip/rtsock.c
Normal file
1912
minix/net/lwip/rtsock.c
Normal file
File diff suppressed because it is too large
Load Diff
32
minix/net/lwip/rtsock.h
Normal file
32
minix/net/lwip/rtsock.h
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
#ifndef MINIX_NET_LWIP_RTSOCK_H
|
||||
#define MINIX_NET_LWIP_RTSOCK_H
|
||||
|
||||
#include "ifaddr.h"
|
||||
#include "lldata.h"
|
||||
|
||||
struct route_entry;
|
||||
struct rtsock_request;
|
||||
|
||||
void rtsock_init(void);
|
||||
sockid_t rtsock_socket(int type, int protocol, struct sock ** sock,
|
||||
const struct sockevent_ops ** ops);
|
||||
|
||||
void rtsock_msg_ifannounce(struct ifdev * ifdev, int arrival);
|
||||
void rtsock_msg_ifinfo(struct ifdev * ifdev);
|
||||
|
||||
void rtsock_msg_addr_dl(struct ifdev * ifdev, unsigned int type,
|
||||
ifaddr_dl_num_t num);
|
||||
void rtsock_msg_addr_v4(struct ifdev * ifdev, unsigned int type,
|
||||
ifaddr_v4_num_t num);
|
||||
void rtsock_msg_addr_v6(struct ifdev * ifdev, unsigned int type,
|
||||
ifaddr_v6_num_t num);
|
||||
|
||||
void rtsock_msg_miss(const struct sockaddr * addr);
|
||||
void rtsock_msg_route(const struct route_entry * route, unsigned int type,
|
||||
const struct rtsock_request * rtr);
|
||||
void rtsock_msg_arp(lldata_arp_num_t num, unsigned int type,
|
||||
const struct rtsock_request * rtr);
|
||||
void rtsock_msg_ndp(lldata_ndp_num_t num, unsigned int type,
|
||||
const struct rtsock_request * rtr);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_RTSOCK_H */
|
||||
744
minix/net/lwip/rttree.c
Normal file
744
minix/net/lwip/rttree.c
Normal file
|
|
@ -0,0 +1,744 @@
|
|||
/* LWIP service - rttree.c - generic routing tree data structure */
|
||||
/*
|
||||
* This module implements the Net/3 binary radix (Patricia) tree as described
|
||||
* in TCP/IP Illustrated Vol.2, with a few important changes. First and
|
||||
* foremost, we make the assumption that all address masks are "normal", i.e.,
|
||||
* they can be expressed in terms of a "prefix length" or "bit count", meaning
|
||||
* that the first so many bits of the mask are set and the remaining bits are
|
||||
* all clear. Based on this assumption, we store routing entries not just in
|
||||
* leaf nodes, but rather in a node at the bit count of the routing entry's
|
||||
* mask; this node may then also have children. As a result, instead of "leaf"
|
||||
* and "internal" nodes, this module instead uses "data" and "link" nodes:
|
||||
*
|
||||
* - Data nodes are nodes with an associated routing entry. The data node
|
||||
* structure is always the first field of its corresponding routing entry
|
||||
* structure. Data nodes may have zero, one, or two children. Its children
|
||||
* are always a refinement of the address mask in the routing entry.
|
||||
* - Link nodes are nodes with no associated routing entry. They always have
|
||||
* exactly two children. As with BSD's "internal" nodes: since the tree
|
||||
* needs no more than one link node per routing entry, each routing entry
|
||||
* structure contains a link node, which may be used anywhere in the tree.
|
||||
*
|
||||
* The result of this approach is that we do not use a linked list for each
|
||||
* leaf, since entries with the same address and different masks are not stored
|
||||
* as part of the same leaf node. There is however still one case where a
|
||||
* linked list would be necessary: the coexistence of a full-mask network entry
|
||||
* and a host entry (net/32 vs host for IPv4, net/128 vs host for IPv6). Since
|
||||
* this tree implementation is not used for ARP/ND6 (host) entries, the need to
|
||||
* support that case is not as high, and so it is currently not supported. It
|
||||
* can be added later if needed. In that case, the prototype of only
|
||||
* rttree_find_exact() will have to be changed, since rttree_add() already
|
||||
* supports the difference by passing a full mask vs passing no mask at all.
|
||||
*
|
||||
* There are other differences with the BSD implementation, and certainly also
|
||||
* more opportunities for improving performance. For now, the implementation
|
||||
* should be good enough for its intended purpose.
|
||||
*/
|
||||
|
||||
#include "lwip.h"
|
||||
#include "rttree.h"
|
||||
|
||||
#define RTTREE_BITS_TO_BYTE(bits) ((bits) >> 3)
|
||||
#define RTTREE_BITS_TO_SHIFT(bits) (7 - ((bits) & 7))
|
||||
#define RTTREE_BITS_TO_BYTES(bits) (RTTREE_BITS_TO_BYTE((bits) + 7))
|
||||
|
||||
/*
|
||||
* The given node is being added to the given routing tree, and just had its
|
||||
* bit count assigned. Precompute any additional fields used for fast address
|
||||
* access on the node.
|
||||
*/
|
||||
static void
|
||||
rttree_precompute(struct rttree * tree __unused, struct rttree_node * node)
|
||||
{
|
||||
|
||||
node->rtn_byte = RTTREE_BITS_TO_BYTE(node->rtn_bits);
|
||||
node->rtn_shift = RTTREE_BITS_TO_SHIFT(node->rtn_bits);
|
||||
}
|
||||
|
||||
/*
|
||||
* For an operation on the routing tree 'tree', test whether the bit 'bit' is
|
||||
* set or clear in 'addr'. Return 1 if the address has the bit set, 0 if it
|
||||
* does not.
|
||||
*/
|
||||
static unsigned int
|
||||
rttree_test(const struct rttree * tree __unused, const void * addr,
|
||||
unsigned int bit)
|
||||
{
|
||||
unsigned int byte, shift;
|
||||
|
||||
byte = RTTREE_BITS_TO_BYTE(bit);
|
||||
shift = RTTREE_BITS_TO_SHIFT(bit);
|
||||
|
||||
return (((const uint8_t *)addr)[byte] >> shift) & 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* For an operation on the routing tree 'tree', test whether a particular bit
|
||||
* as identified by the routing node 'node' is set or clear in 'address',
|
||||
* effectively computing the side (left or right) to take when descending down
|
||||
* the tree. Return 1 if the address has the bit set, 0 if it does not.
|
||||
*/
|
||||
static inline unsigned int
|
||||
rttree_side(const struct rttree * tree, const struct rttree_node * node,
|
||||
const void * addr)
|
||||
{
|
||||
|
||||
return (((const uint8_t *)addr)[node->rtn_byte] >>
|
||||
node->rtn_shift) & 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for the routing tree 'tree' whether the routing entry 'entry' matches
|
||||
* the address 'addr' exactly. Return TRUE or FALSE depending on the outcome.
|
||||
* This function must be called only on entries that have already been
|
||||
* determined to span the full bit width.
|
||||
*/
|
||||
static inline int
|
||||
rttree_equals(const struct rttree * tree, const struct rttree_entry * entry,
|
||||
const void * addr)
|
||||
{
|
||||
unsigned int bits;
|
||||
|
||||
bits = tree->rtt_bits;
|
||||
|
||||
assert(bits == entry->rte_data.rtn_bits);
|
||||
|
||||
return !memcmp(entry->rte_addr, addr, RTTREE_BITS_TO_BYTE(bits));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for the routing tree 'tree' whether the routing entry 'entry' matches
|
||||
* the address 'addr'. Return TRUE if the address is matched by the entry's
|
||||
* address and mask, or FALSE if not.
|
||||
*/
|
||||
static inline int
|
||||
rttree_match(const struct rttree * tree, const struct rttree_entry * entry,
|
||||
const void * addr)
|
||||
{
|
||||
const uint8_t *aptr, *aptr2, *mptr;
|
||||
unsigned int bits, bytes;
|
||||
|
||||
if ((bits = entry->rte_data.rtn_bits) == 0)
|
||||
return TRUE;
|
||||
|
||||
if ((mptr = (const uint8_t *)entry->rte_mask) == NULL)
|
||||
return rttree_equals(tree, entry, addr);
|
||||
|
||||
aptr = (const uint8_t *)addr;
|
||||
aptr2 = (const uint8_t *)entry->rte_addr;
|
||||
|
||||
for (bytes = RTTREE_BITS_TO_BYTES(bits); bytes > 0; bytes--) {
|
||||
if ((*aptr & *mptr) != *aptr2)
|
||||
return FALSE;
|
||||
|
||||
aptr++;
|
||||
aptr2++;
|
||||
mptr++;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the first bit that differs between the two given addresses. Return the
|
||||
* bit number if found, or the full bit width if the addresses are equal.
|
||||
*/
|
||||
static unsigned int
|
||||
rttree_diff(const struct rttree * tree, const void * addr, const void * addr2)
|
||||
{
|
||||
const uint8_t *aptr, *aptr2;
|
||||
unsigned int bit, i;
|
||||
uint8_t b;
|
||||
|
||||
aptr = (const uint8_t *)addr;
|
||||
aptr2 = (const uint8_t *)addr2;
|
||||
|
||||
for (bit = 0; bit < tree->rtt_bits; bit += NBBY, aptr++, aptr2++) {
|
||||
if ((b = *aptr ^ *aptr2) != 0) {
|
||||
for (i = 0; i < NBBY; i++)
|
||||
if (b & (1 << (NBBY - i - 1)))
|
||||
break;
|
||||
return bit + i;
|
||||
}
|
||||
}
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a link node to the free list of the given routing tree, marking it as
|
||||
* free in the process.
|
||||
*/
|
||||
static void
|
||||
rttree_add_free(struct rttree * tree, struct rttree_node * node)
|
||||
{
|
||||
|
||||
node->rtn_child[0] = NULL;
|
||||
if ((node->rtn_child[1] = tree->rtt_free) != NULL)
|
||||
node->rtn_child[1]->rtn_child[0] = node;
|
||||
tree->rtt_free = node;
|
||||
node->rtn_parent = NULL;
|
||||
node->rtn_type = RTNT_FREE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the given free link node from the free list. The caller must already
|
||||
* have verified that the node is on the free list, and has to change the node
|
||||
* type as appropriate afterward.
|
||||
*/
|
||||
static void
|
||||
rttree_del_free(struct rttree * tree, struct rttree_node * node)
|
||||
{
|
||||
|
||||
assert(node->rtn_type == RTNT_FREE);
|
||||
|
||||
if (node->rtn_child[0] != NULL)
|
||||
node->rtn_child[0]->rtn_child[1] = node->rtn_child[1];
|
||||
else
|
||||
tree->rtt_free = node->rtn_child[1];
|
||||
if (node->rtn_child[1] != NULL)
|
||||
node->rtn_child[1]->rtn_child[0] = node->rtn_child[0];
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain, remove, and return a free link node from the free list. This
|
||||
* function must be called only when it is already known that the free list is
|
||||
* not empty. The caller has to change the node type as appropriate afterward.
|
||||
*/
|
||||
static struct rttree_node *
|
||||
rttree_get_free(struct rttree * tree)
|
||||
{
|
||||
struct rttree_node * node;
|
||||
|
||||
node = tree->rtt_free;
|
||||
assert(node != NULL);
|
||||
assert(node->rtn_type == RTNT_FREE);
|
||||
|
||||
rttree_del_free(tree, node);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the given routing tree, with the given address bit width.
|
||||
*/
|
||||
void
|
||||
rttree_init(struct rttree * tree, unsigned int bits)
|
||||
{
|
||||
|
||||
tree->rtt_root = NULL;
|
||||
tree->rtt_free = NULL;
|
||||
tree->rtt_bits = bits;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up the most narrow routing tree entry that matches the given address.
|
||||
* Return the entry on success, or NULL if no matching entry is found.
|
||||
*/
|
||||
struct rttree_entry *
|
||||
rttree_lookup_match(struct rttree * tree, const void * addr)
|
||||
{
|
||||
struct rttree_entry *entry, *best;
|
||||
struct rttree_node *node;
|
||||
unsigned int side;
|
||||
|
||||
/*
|
||||
* The current implementation is "forward-tracking", testing all
|
||||
* potentially matching entries while descending into the tree and
|
||||
* remembering the "best" (narrowest matching) entry. The assumption
|
||||
* here is that most lookups will end up returning the default route or
|
||||
* another broad route, and thus quickly fail a narrower match and bail
|
||||
* out early. This assumption is in part motivated by the fact that
|
||||
* our routing trees do not store link-layer (ARP/ND6) entries. If
|
||||
* desired, the implementation can easily be rewritten to do
|
||||
* backtracking instead.
|
||||
*/
|
||||
best = NULL;
|
||||
|
||||
for (node = tree->rtt_root; node != NULL;
|
||||
node = node->rtn_child[side]) {
|
||||
if (node->rtn_type == RTNT_DATA) {
|
||||
entry = (struct rttree_entry *)node;
|
||||
|
||||
if (!rttree_match(tree, entry, addr))
|
||||
break;
|
||||
|
||||
best = entry;
|
||||
}
|
||||
|
||||
side = rttree_side(tree, node, addr);
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up a routing entry that is an exact match for the given (full) address.
|
||||
* Return the entry if it was found, or NULL otherwise.
|
||||
*/
|
||||
struct rttree_entry *
|
||||
rttree_lookup_host(struct rttree * tree, const void * addr)
|
||||
{
|
||||
struct rttree_entry *entry;
|
||||
struct rttree_node *node;
|
||||
unsigned int side;
|
||||
|
||||
for (node = tree->rtt_root; node != NULL;
|
||||
node = node->rtn_child[side]) {
|
||||
if (node->rtn_type == RTNT_DATA &&
|
||||
node->rtn_bits == tree->rtt_bits) {
|
||||
entry = (struct rttree_entry *)node;
|
||||
|
||||
if (rttree_equals(tree, entry, addr))
|
||||
return entry;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
side = rttree_side(tree, node, addr);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up a routing entry that is an exact match for the given address and
|
||||
* prefix length. Return the entry if found, or NULL otherwise.
|
||||
*/
|
||||
struct rttree_entry *
|
||||
rttree_lookup_exact(struct rttree * tree, const void * addr,
|
||||
unsigned int prefix)
|
||||
{
|
||||
struct rttree_entry *entry;
|
||||
struct rttree_node *node;
|
||||
unsigned int side;
|
||||
|
||||
for (node = tree->rtt_root; node != NULL && node->rtn_bits <= prefix;
|
||||
node = node->rtn_child[side]) {
|
||||
if (node->rtn_type == RTNT_DATA) {
|
||||
entry = (struct rttree_entry *)node;
|
||||
|
||||
if (!rttree_match(tree, entry, addr))
|
||||
return NULL;
|
||||
|
||||
if (node->rtn_bits == prefix)
|
||||
return entry;
|
||||
}
|
||||
|
||||
side = rttree_side(tree, node, addr);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enumerate entries in the routing tree. If 'last' is NULL, return the first
|
||||
* entry. Otherwise, return the next entry starting from 'last'. In both
|
||||
* cases, if no (more) entries are present in the tree, return NULL. The order
|
||||
* of the returned entries is stable across tree modifications and the function
|
||||
* may be called multiple times on the same entry. More specifically, it is
|
||||
* safe to continue enumeration from a previous entry after deleting its
|
||||
* successor from the tree.
|
||||
*/
|
||||
struct rttree_entry *
|
||||
rttree_enum(struct rttree * tree, struct rttree_entry * last)
|
||||
{
|
||||
struct rttree_node *node, *parent;
|
||||
|
||||
/*
|
||||
* For the first query, we may have to return the tree root right away.
|
||||
* For subsequent queries, we have to move ahead by at least one node.
|
||||
*/
|
||||
if (last == NULL) {
|
||||
if ((node = tree->rtt_root) == NULL)
|
||||
return NULL;
|
||||
|
||||
if (node->rtn_type == RTNT_DATA)
|
||||
return (struct rttree_entry *)node;
|
||||
} else
|
||||
node = &last->rte_data;
|
||||
|
||||
/* A basic iterative pre-order binary-tree depth-first search. */
|
||||
do {
|
||||
assert(node != NULL);
|
||||
|
||||
/* Can we descend further, either left or right? */
|
||||
if (node->rtn_child[0] != NULL)
|
||||
node = node->rtn_child[0];
|
||||
else if (node->rtn_child[1] != NULL)
|
||||
node = node->rtn_child[1];
|
||||
else {
|
||||
/*
|
||||
* No. Go back up the tree, until we can go right
|
||||
* where we went left before.. or run out of tree.
|
||||
*/
|
||||
for (;; node = parent) {
|
||||
if ((parent = node->rtn_parent) == NULL)
|
||||
return NULL;
|
||||
|
||||
if (parent->rtn_child[0] == node &&
|
||||
parent->rtn_child[1] != NULL) {
|
||||
node = parent->rtn_child[1];
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Skip link nodes. */
|
||||
} while (node->rtn_type != RTNT_DATA);
|
||||
|
||||
return (struct rttree_entry *)node;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the node 'node' to be part of tree 'tree', with type 'type' (either
|
||||
* RTNT_DATA or RTNT_LINK) and a bit count of 'prefix'. The node is set to be
|
||||
* a child of 'parent' on side 'side', unless 'parent' is NULL in which case
|
||||
* the node is set to be the topmost node in the tree (and 'side' is ignored).
|
||||
* The node's children are set to 'left' and 'right'; for each, if not NULL,
|
||||
* its parent is set to 'node'.
|
||||
*/
|
||||
static void
|
||||
rttree_set(struct rttree * tree, struct rttree_node * node, int type,
|
||||
unsigned int prefix, struct rttree_node * parent, int side,
|
||||
struct rttree_node * left, struct rttree_node * right)
|
||||
{
|
||||
|
||||
assert(type == RTNT_DATA || type == RTNT_LINK);
|
||||
assert(prefix <= tree->rtt_bits);
|
||||
assert(side == 0 || side == 1);
|
||||
|
||||
node->rtn_type = type;
|
||||
node->rtn_bits = prefix;
|
||||
|
||||
/* With rtn_bits assigned, precompute any derived fields. */
|
||||
rttree_precompute(tree, node);
|
||||
|
||||
if ((node->rtn_parent = parent) != NULL)
|
||||
parent->rtn_child[side] = node;
|
||||
else
|
||||
tree->rtt_root = node;
|
||||
|
||||
if ((node->rtn_child[0] = left) != NULL)
|
||||
left->rtn_parent = node;
|
||||
if ((node->rtn_child[1] = right) != NULL)
|
||||
right->rtn_parent = node;
|
||||
}
|
||||
|
||||
/*
|
||||
* In the routing tree 'tree', replace old node 'onode' with new node 'node',
|
||||
* setting the type of the latter to 'type'. The tree is updated accordingly,
|
||||
* but it is left up to the caller to deal with the old node as appropriate.
|
||||
*/
|
||||
static void
|
||||
rttree_replace(struct rttree * tree, struct rttree_node * onode,
|
||||
struct rttree_node * node, int type)
|
||||
{
|
||||
struct rttree_node *parent;
|
||||
unsigned int side;
|
||||
|
||||
/*
|
||||
* Replacing one data node with another data node is not something that
|
||||
* is currently being done, even if it would work.
|
||||
*/
|
||||
assert(onode->rtn_type != RTNT_DATA || node->rtn_type != RTNT_DATA);
|
||||
assert(onode->rtn_child[0] != NULL);
|
||||
assert(onode->rtn_child[1] != NULL);
|
||||
|
||||
parent = onode->rtn_parent;
|
||||
|
||||
side = (parent != NULL && parent->rtn_child[1] == onode);
|
||||
|
||||
rttree_set(tree, node, type, onode->rtn_bits, parent, side,
|
||||
onode->rtn_child[0], onode->rtn_child[1]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a new routing entry 'entry' to the routing tree 'tree'. The entry
|
||||
* object will be initialized as a result. The address to add is given as
|
||||
* 'addr', and the address mask as 'mask'. Both those pointers must be point
|
||||
* to memory that is as long-lived as the routing entry; this is typically
|
||||
* accomplished by storing them in a larger object that embeds 'entry'.
|
||||
* However, 'mask' may be NULL, signifying a host type entry with an implied
|
||||
* full mask. If not NULL, the given mask must be normalized, i.e., it must
|
||||
* consist of a run of zero or more 1-bits followed by a remainder of only
|
||||
* 0-bits. The number of 1-bits must also be given as a bit count 'prefix',
|
||||
* even if 'mask' is NULL. The address must be normalized to its mask: no bits
|
||||
* starting from bit 'prefix' must be set in 'addr'. Return OK if adding the
|
||||
* routing entry succeeded, or EEXIST if an entry already exists for the
|
||||
* combination of that address and mask. If the caller has already verified
|
||||
* with rttree_lookup_exact() that no such entry exists, the call will succeed.
|
||||
*/
|
||||
int
|
||||
rttree_add(struct rttree * tree, struct rttree_entry * entry,
|
||||
const void * addr, const void * mask, unsigned int prefix)
|
||||
{
|
||||
struct rttree_node *node, *parent, *link;
|
||||
struct rttree_entry *other_entry;
|
||||
unsigned int bit, side, side2;
|
||||
int match;
|
||||
|
||||
assert(mask != NULL || prefix == tree->rtt_bits);
|
||||
|
||||
/*
|
||||
* We start by determining the path, bit count, and method of the
|
||||
* addition. We do this with a lookup on the address, for the full
|
||||
* address width--that is, not limited to the given prefix length. As
|
||||
* a result, at some point we will find either a NULL pointer, or a
|
||||
* data node with a width that is at least as large as the given prefix
|
||||
* length. The NULL case is easy: we EXTEND the tree with our new
|
||||
* entry wherever we ran into the NULL pointer.
|
||||
*
|
||||
* If instead we find a sufficiently wide data node, then we see if it
|
||||
* is a match for the new address. If so, our new data node should
|
||||
* either be INSERTed between two nodes along the path taken so far, or
|
||||
* REPLACE a link node along that path with the new data node. If it
|
||||
* it is not a match, then the action to take depends on whether the
|
||||
* first differing bit falls within the given prefix length: if so, we
|
||||
* have to BRANCH along the path, using a link node allocated for that
|
||||
* differing bit; if not, we should use INSERT or REPLACE after all.
|
||||
*
|
||||
* As the only exceptional case, we might in fact find an entry for the
|
||||
* exact same address and prefix length as what is being added. In the
|
||||
* current design of the routing tree, this is always a failure case.
|
||||
*/
|
||||
parent = NULL;
|
||||
side = 0;
|
||||
other_entry = NULL;
|
||||
|
||||
for (node = tree->rtt_root; node != NULL;
|
||||
node = node->rtn_child[side]) {
|
||||
if (node->rtn_type == RTNT_DATA) {
|
||||
other_entry = (struct rttree_entry *)node;
|
||||
|
||||
bit = rttree_diff(tree, other_entry->rte_addr, addr);
|
||||
|
||||
match = (bit >= node->rtn_bits);
|
||||
|
||||
/* Test whether the exact entry already exists. */
|
||||
if (match && node->rtn_bits == prefix)
|
||||
return EEXIST;
|
||||
|
||||
/*
|
||||
* Test the INSERT/REPLACE and BRANCH cases. Note that
|
||||
* this condition is in a terse, optimized form that
|
||||
* does not map directly to the two different cases.
|
||||
*/
|
||||
if (!match || node->rtn_bits > prefix) {
|
||||
if (bit > prefix)
|
||||
bit = prefix;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
parent = node;
|
||||
side = rttree_side(tree, node, addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point, addition is going to succeed no matter what. Start
|
||||
* by initializing part of 'entry'. In particular, add the given
|
||||
* entry's link node to the list of free link nodes, because the common
|
||||
* case is that we end up not using it. If we do, we will just take it
|
||||
* off again right away. The entry's data node will be initialized as
|
||||
* part of the addition process below.
|
||||
*/
|
||||
entry->rte_addr = addr;
|
||||
entry->rte_mask = mask;
|
||||
|
||||
rttree_add_free(tree, &entry->rte_link);
|
||||
|
||||
/*
|
||||
* First deal with the EXTEND case. In that case we already know the
|
||||
* intended parent and the side (left/right) for the addition.
|
||||
*/
|
||||
if (node == NULL) {
|
||||
assert(parent == NULL || parent->rtn_bits < prefix);
|
||||
assert(parent == NULL || parent->rtn_child[side] == NULL);
|
||||
|
||||
rttree_set(tree, &entry->rte_data, RTNT_DATA, prefix, parent,
|
||||
side, NULL /*left*/, NULL /*right*/);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* For the other three cases, we now have to walk back along the path
|
||||
* we have taken so far in order to find the correct insertion point.
|
||||
*/
|
||||
while (parent != NULL && parent->rtn_bits >= bit) {
|
||||
node = parent;
|
||||
|
||||
parent = node->rtn_parent;
|
||||
}
|
||||
|
||||
if (bit == prefix && node->rtn_bits == bit) {
|
||||
/*
|
||||
* The REPLACE case. Replace the link node 'node' with our new
|
||||
* entry. Afterwards, mark the link node as free.
|
||||
*/
|
||||
assert(node->rtn_type != RTNT_DATA);
|
||||
|
||||
rttree_replace(tree, node, &entry->rte_data, RTNT_DATA);
|
||||
|
||||
rttree_add_free(tree, node);
|
||||
} else if (bit == prefix) {
|
||||
/*
|
||||
* The INSERT case. Insert the data node between 'parent' and
|
||||
* 'node'. Note that 'parent' may be NULL. We need to use the
|
||||
* address we found earlier, as 'other_entry', to determine
|
||||
* whether we should add 'node' to the left or right of the
|
||||
* inserted data node.
|
||||
*/
|
||||
assert(node->rtn_bits > bit);
|
||||
assert(parent == NULL || parent->rtn_bits < bit);
|
||||
assert(other_entry != NULL);
|
||||
|
||||
side = (parent != NULL && parent->rtn_child[1] == node);
|
||||
|
||||
side2 = rttree_test(tree, other_entry->rte_addr, bit);
|
||||
|
||||
rttree_set(tree, &entry->rte_data, RTNT_DATA, prefix, parent,
|
||||
side, (!side2) ? node : NULL, (side2) ? node : NULL);
|
||||
} else {
|
||||
/*
|
||||
* The BRANCH case. In this case, it is impossible that we
|
||||
* find a link node with a bit count equal to the first
|
||||
* differing bit between the address we found and the address
|
||||
* we want to insert: if such a node existed, we would have
|
||||
* descended down its other child during the initial lookup.
|
||||
*
|
||||
* Interpose a link node between 'parent' and 'current' for bit
|
||||
* 'bit', with its other child set to point to 'entry'. Again,
|
||||
* we need to perform an additional bit test here, because even
|
||||
* though we know that the address we found during the lookup
|
||||
* differs from the given address at bit 'bit', we do not know
|
||||
* the value of either bit yet.
|
||||
*/
|
||||
assert(bit < prefix);
|
||||
assert(node->rtn_bits > bit);
|
||||
assert(parent == NULL || parent->rtn_bits < bit);
|
||||
|
||||
link = rttree_get_free(tree);
|
||||
|
||||
side = (parent != NULL && parent->rtn_child[1] == node);
|
||||
|
||||
side2 = rttree_test(tree, addr, bit);
|
||||
|
||||
/* Use NULL for the data node we are about to add. */
|
||||
rttree_set(tree, link, RTNT_LINK, bit, parent, side,
|
||||
(side2) ? node : NULL, (!side2) ? node : NULL);
|
||||
|
||||
/* This addition will replace the NULL pointer again. */
|
||||
rttree_set(tree, &entry->rte_data, RTNT_DATA, prefix, link,
|
||||
side2, NULL /*left*/, NULL /*right*/);
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a particular node 'node' from the routing tree 'tree'. The given
|
||||
* node must have zero or one children. As integrity check only, if 'nonempty'
|
||||
* is set, the node must have one child. If the node has one child, that child
|
||||
* will be linked to the node's parent (or the tree root), thus cutting the
|
||||
* node itself out of the tree. If the node has zero children, the
|
||||
* corresponding slot in its parent (or the tree root) will be cleared. The
|
||||
* function will return a pointer to the parent node if it too qualifies for
|
||||
* removal afterwards, or NULL if no further removal action needs to be taken.
|
||||
*/
|
||||
static struct rttree_node *
|
||||
rttree_remove(struct rttree * tree, struct rttree_node * node,
|
||||
int nonempty __unused)
|
||||
{
|
||||
struct rttree_node *parent, *child;
|
||||
unsigned int side;
|
||||
|
||||
if ((child = node->rtn_child[0]) == NULL)
|
||||
child = node->rtn_child[1];
|
||||
|
||||
assert(child != NULL || !nonempty);
|
||||
|
||||
if ((parent = node->rtn_parent) != NULL) {
|
||||
side = (parent->rtn_child[1] == node);
|
||||
|
||||
parent->rtn_child[side] = child;
|
||||
|
||||
if (child != NULL)
|
||||
child->rtn_parent = parent;
|
||||
else if (parent->rtn_type == RTNT_LINK)
|
||||
return parent;
|
||||
} else {
|
||||
tree->rtt_root = child;
|
||||
|
||||
if (child != NULL)
|
||||
child->rtn_parent = NULL;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete the routing entry 'entry' from the routing tree 'tree'. The entry
|
||||
* must have been added before. This function always succeeds.
|
||||
*/
|
||||
void
|
||||
rttree_delete(struct rttree * tree, struct rttree_entry * entry)
|
||||
{
|
||||
struct rttree_node *node, *link;
|
||||
|
||||
/*
|
||||
* Remove the data node from the tree. If the data node also has two
|
||||
* children, we have to replace it with a link node. Otherwise, we
|
||||
* have to remove it and, if it has no children at all, possibly remove
|
||||
* its parent as well.
|
||||
*/
|
||||
node = &entry->rte_data;
|
||||
|
||||
assert(node->rtn_type == RTNT_DATA);
|
||||
|
||||
if (node->rtn_child[0] != NULL && node->rtn_child[1] != NULL) {
|
||||
/*
|
||||
* The link node we allocate here may actually be the entry's
|
||||
* own link node. We do not make an exception for that case
|
||||
* here, as we have to deal with the entry's link node being in
|
||||
* use a bit further down anyway.
|
||||
*/
|
||||
link = rttree_get_free(tree);
|
||||
|
||||
rttree_replace(tree, node, link, RTNT_LINK);
|
||||
} else {
|
||||
/*
|
||||
* Remove the data node from the tree. If the node has no
|
||||
* children, its removal may leave a link node with one child.
|
||||
* That would be its original parent. That node must then also
|
||||
* be removed from the tree, and freed up.
|
||||
*/
|
||||
link = rttree_remove(tree, node, FALSE /*nonempty*/);
|
||||
|
||||
if (link != NULL) {
|
||||
(void)rttree_remove(tree, link, TRUE /*nonempty*/);
|
||||
|
||||
rttree_add_free(tree, link);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the entry's link node from either the tree or the free list,
|
||||
* depending on the type currently assigned to it. If it has to be
|
||||
* removed from the tree, it must be replaced with another link node.
|
||||
* There will always be enough link nodes available for this to work.
|
||||
*/
|
||||
node = &entry->rte_link;
|
||||
|
||||
if (node->rtn_type == RTNT_LINK) {
|
||||
link = rttree_get_free(tree);
|
||||
|
||||
rttree_replace(tree, node, link, RTNT_LINK);
|
||||
} else {
|
||||
assert(node->rtn_type == RTNT_FREE);
|
||||
|
||||
rttree_del_free(tree, node);
|
||||
}
|
||||
}
|
||||
50
minix/net/lwip/rttree.h
Normal file
50
minix/net/lwip/rttree.h
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
#ifndef MINIX_NET_LWIP_RTTREE_H
|
||||
#define MINIX_NET_LWIP_RTTREE_H
|
||||
|
||||
/* Routing table node structure. */
|
||||
struct rttree_node {
|
||||
struct rttree_node *rtn_child[2]; /* left child node */
|
||||
struct rttree_node *rtn_parent; /* parent node */
|
||||
uint8_t rtn_type; /* node type (RNT_) */
|
||||
uint8_t rtn_bits; /* prefix bit count */
|
||||
uint8_t rtn_byte; /* bits-derived byte index */
|
||||
uint8_t rtn_shift; /* bits-derived shift count */
|
||||
};
|
||||
|
||||
#define RTNT_DATA 0 /* data node (entry) */
|
||||
#define RTNT_LINK 1 /* link node, in use */
|
||||
#define RTNT_FREE 2 /* link node, free */
|
||||
|
||||
/* Routing table entry structure. */
|
||||
struct rttree_entry {
|
||||
struct rttree_node rte_data; /* data node - MUST be first */
|
||||
struct rttree_node rte_link; /* link node */
|
||||
const void *rte_addr; /* pointer to address */
|
||||
const void *rte_mask; /* pointer to mask */
|
||||
};
|
||||
|
||||
/* Routing table structure. */
|
||||
struct rttree {
|
||||
struct rttree_node *rtt_root; /* root of the route tree */
|
||||
struct rttree_node *rtt_free; /* free internal nodes list */
|
||||
uint8_t rtt_bits; /* number of bits in address */
|
||||
};
|
||||
|
||||
#define rttree_get_addr(entry) ((entry)->rte_addr)
|
||||
#define rttree_get_mask(entry) ((entry)->rte_mask)
|
||||
#define rttree_get_prefix(entry) ((entry)->rte_data.rtn_bits)
|
||||
|
||||
void rttree_init(struct rttree * tree, unsigned int bits);
|
||||
struct rttree_entry *rttree_lookup_match(struct rttree * tree,
|
||||
const void * addr);
|
||||
struct rttree_entry *rttree_lookup_host(struct rttree * tree,
|
||||
const void * addr);
|
||||
struct rttree_entry *rttree_lookup_exact(struct rttree * tree,
|
||||
const void * addr, unsigned int prefix);
|
||||
struct rttree_entry *rttree_enum(struct rttree * tree,
|
||||
struct rttree_entry * entry);
|
||||
int rttree_add(struct rttree * tree, struct rttree_entry * entry,
|
||||
const void * addr, const void * mask, unsigned int prefix);
|
||||
void rttree_delete(struct rttree * tree, struct rttree_entry * entry);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_RTTREE_H */
|
||||
203
minix/net/lwip/tcpisn.c
Normal file
203
minix/net/lwip/tcpisn.c
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
/* LWIP service - tcpisn.c - TCP Initial Sequence Number generation */
|
||||
/*
|
||||
* This module implements the TCP ISN algorithm standardized in RFC 6528. It
|
||||
* currently uses the current time, at clock tick granularity, as source for
|
||||
* the 4-microsecond timer, and SHA256 as the hashing algorithm. As part of
|
||||
* the input to the hash function, we use an "ISN secret" that can be set
|
||||
* through the (hidden, root-only) net.inet.tcp.isn_secret sysctl(7) node.
|
||||
* Ideally, the secret should remain the same across system reboots; it is left
|
||||
* up to userland to take care of that.
|
||||
*
|
||||
* TODO: while this module provides the strongest possible implementation of
|
||||
* the algorithm, it is also quite heavyweight. We should consider allowing
|
||||
* for a more configurable level of strength, perhaps with the possibility for
|
||||
* less powerful platforms to revert to simple use of a random number.
|
||||
*/
|
||||
|
||||
#include "lwip.h"
|
||||
#include "tcpisn.h"
|
||||
|
||||
#include <sys/sha2.h>
|
||||
|
||||
/*
|
||||
* The TCP ISN hash input consists of the TCP 4-tuple of the new connection and
|
||||
* a static secret. The 4-tuple consists of two IP addresses, at most 16 bytes
|
||||
* (128 bits, for IPv6) each, and two port numbers, two bytes (16 bits) each.
|
||||
* We use the SHA256 input block size of 64 bytes to avoid copying, so that
|
||||
* leaves us with 28 bytes of room for the static secret. We use 16 bytes, and
|
||||
* leave the rest blank. As a sidenote, while hardcoding sizes is not nice, we
|
||||
* really need to get the layout exactly right in this case.
|
||||
*/
|
||||
#define TCPISN_TUPLE_LENGTH (16 * 2 + 2 * 2)
|
||||
|
||||
#if TCPISN_SECRET_LENGTH > (SHA256_BLOCK_LENGTH - TCPISN_TUPLE_LENGTH)
|
||||
#error "TCP ISN secret length exceeds remainder of hash block"
|
||||
#endif
|
||||
|
||||
/* We are using memchr() on this, so do not remove the '32' size here! */
|
||||
static const uint8_t tcpisn_hextab[32] = "0123456789abcdef0123456789ABCDEF";
|
||||
|
||||
static uint8_t tcpisn_input[SHA256_BLOCK_LENGTH] __aligned(4);
|
||||
|
||||
static int tcpisn_set;
|
||||
|
||||
/*
|
||||
* Initialize the TCP ISN module.
|
||||
*/
|
||||
void
|
||||
tcpisn_init(void)
|
||||
{
|
||||
time_t boottime;
|
||||
|
||||
/*
|
||||
* Part of the input to the hash function is kept as is between calls
|
||||
* to the TCP ISN hook. In particular, we zero the entire input here,
|
||||
* so that the padding is zero. We also zero the area where the secret
|
||||
* will be stored, but we put in the system boot time as a last effort
|
||||
* to try to create at least some minimal amount of unpredictability.
|
||||
* The boot time is by no means sufficient though, so issue a warning
|
||||
* if a TCP ISN is requested before an actual secret is set. Note that
|
||||
* an actual secret will overwrite the boot time based pseudo-secret.
|
||||
*/
|
||||
memset(tcpisn_input, 0, sizeof(tcpisn_input));
|
||||
|
||||
(void)getuptime(NULL, NULL, &boottime);
|
||||
memcpy(&tcpisn_input[TCPISN_TUPLE_LENGTH], &boottime,
|
||||
sizeof(boottime));
|
||||
|
||||
tcpisn_set = FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set and/or retrieve the ISN secret. In order to allow the hash value to be
|
||||
* set from the command line, this sysctl(7) node is a hex-encoded string.
|
||||
*/
|
||||
ssize_t
|
||||
tcpisn_secret(struct rmib_call * call __unused,
|
||||
struct rmib_node * node __unused, struct rmib_oldp * oldp,
|
||||
struct rmib_newp * newp)
|
||||
{
|
||||
uint8_t secret[TCPISN_SECRET_HEX_LENGTH], byte, *p;
|
||||
unsigned int i;
|
||||
int r;
|
||||
|
||||
/* First copy out the old (current) ISN secret. */
|
||||
if (oldp != NULL) {
|
||||
for (i = 0; i < TCPISN_SECRET_LENGTH; i++) {
|
||||
byte = tcpisn_input[TCPISN_TUPLE_LENGTH + i];
|
||||
secret[i * 2] = tcpisn_hextab[byte >> 4];
|
||||
secret[i * 2 + 1] = tcpisn_hextab[byte & 0xf];
|
||||
}
|
||||
secret[i * 2] = '\0';
|
||||
assert(i * 2 + 1 == sizeof(secret));
|
||||
|
||||
if ((r = rmib_copyout(oldp, 0, secret, sizeof(secret))) < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Then copy in the new ISN secret. We require the given string to be
|
||||
* exactly as large as we need.
|
||||
*/
|
||||
if (newp != NULL) {
|
||||
/* Copy in the user-given string. */
|
||||
if ((r = rmib_copyin(newp, secret, sizeof(secret))) != OK)
|
||||
return r;
|
||||
if (secret[i * 2] != '\0')
|
||||
return EINVAL;
|
||||
|
||||
/* Hex-decode the given string (in place). */
|
||||
for (i = 0; i < TCPISN_SECRET_LENGTH; i++) {
|
||||
if ((p = memchr(tcpisn_hextab, secret[i * 2],
|
||||
sizeof(tcpisn_hextab))) == NULL)
|
||||
return EINVAL;
|
||||
secret[i] = ((uint8_t)(p - tcpisn_hextab) & 0xf) << 4;
|
||||
if ((p = memchr(tcpisn_hextab, secret[i * 2 + 1],
|
||||
sizeof(tcpisn_hextab))) == NULL)
|
||||
return EINVAL;
|
||||
secret[i] |= (uint8_t)(p - tcpisn_hextab) & 0xf;
|
||||
}
|
||||
|
||||
/* Once fully validated, switch to the new secret. */
|
||||
memcpy(&tcpisn_input[TCPISN_TUPLE_LENGTH], secret,
|
||||
TCPISN_SECRET_LENGTH);
|
||||
|
||||
tcpisn_set = TRUE;
|
||||
}
|
||||
|
||||
/* Return the length of the node. */
|
||||
return sizeof(secret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hook to generate an Initial Sequence Number (ISN) for a new TCP connection.
|
||||
*/
|
||||
uint32_t
|
||||
lwip_hook_tcp_isn(const ip_addr_t * local_ip, uint16_t local_port,
|
||||
const ip_addr_t * remote_ip, uint16_t remote_port)
|
||||
{
|
||||
uint8_t output[SHA256_DIGEST_LENGTH] __aligned(4);
|
||||
SHA256_CTX ctx;
|
||||
clock_t realtime;
|
||||
time_t boottime;
|
||||
uint32_t isn;
|
||||
|
||||
if (!tcpisn_set) {
|
||||
printf("LWIP: warning, no TCP ISN secret has been set\n");
|
||||
|
||||
tcpisn_set = TRUE; /* print the warning only once */
|
||||
}
|
||||
|
||||
if (IP_IS_V6(local_ip)) {
|
||||
assert(IP_IS_V6(remote_ip));
|
||||
|
||||
memcpy(&tcpisn_input[0], &ip_2_ip6(local_ip)->addr, 16);
|
||||
memcpy(&tcpisn_input[16], &ip_2_ip6(remote_ip)->addr, 16);
|
||||
} else {
|
||||
assert(IP_IS_V4(local_ip));
|
||||
assert(IP_IS_V4(remote_ip));
|
||||
|
||||
/*
|
||||
* Store IPv4 addresses as IPv4-mapped IPv6 addresses, even
|
||||
* though lwIP will never give us an IPv4-mapped IPv6 address,
|
||||
* so as to ensure completely disjoint address spaces and thus
|
||||
* no potential abuse of IPv6 addresses in order to predict
|
||||
* ISNs for IPv4 connections.
|
||||
*/
|
||||
memset(&tcpisn_input[0], 0, 10);
|
||||
tcpisn_input[10] = 0xff;
|
||||
tcpisn_input[11] = 0xff;
|
||||
memcpy(&tcpisn_input[12], &ip_2_ip4(local_ip)->addr, 4);
|
||||
memset(&tcpisn_input[16], 0, 10);
|
||||
tcpisn_input[26] = 0xff;
|
||||
tcpisn_input[27] = 0xff;
|
||||
memcpy(&tcpisn_input[28], &ip_2_ip4(local_ip)->addr, 4);
|
||||
}
|
||||
|
||||
tcpisn_input[32] = local_port >> 8;
|
||||
tcpisn_input[33] = local_port & 0xff;
|
||||
tcpisn_input[34] = remote_port >> 8;
|
||||
tcpisn_input[35] = remote_port & 0xff;
|
||||
|
||||
/* The rest of the input (secret and padding) is already filled in. */
|
||||
|
||||
SHA256_Init(&ctx); /* this call zeroes a buffer we don't use.. */
|
||||
SHA256_Update(&ctx, tcpisn_input, sizeof(tcpisn_input));
|
||||
SHA256_Final(output, &ctx);
|
||||
|
||||
/* Arbitrarily take the first 32 bits from the generated hash. */
|
||||
memcpy(&isn, output, sizeof(isn));
|
||||
|
||||
/*
|
||||
* Add the current time in 4-microsecond units. The time value should
|
||||
* be wall-clock accurate and stable even across system reboots and
|
||||
* downtime. Do not precompute the boot time part: it may change.
|
||||
*/
|
||||
(void)getuptime(NULL, &realtime, &boottime);
|
||||
|
||||
isn += (uint32_t)boottime * 250000;
|
||||
isn += (uint32_t)(((uint64_t)realtime * 250000) / sys_hz());
|
||||
|
||||
/* The result is the ISN to use for this connection. */
|
||||
return isn;
|
||||
}
|
||||
20
minix/net/lwip/tcpisn.h
Normal file
20
minix/net/lwip/tcpisn.h
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#ifndef MINIX_NET_LWIP_TCPISN_H
|
||||
#define MINIX_NET_LWIP_TCPISN_H
|
||||
|
||||
/*
|
||||
* Length, in bytes, of the secret (random seed) that is used as part of the
|
||||
* input to the hashing function that generates TCP Initial Sequence Numbers.
|
||||
*/
|
||||
#define TCPISN_SECRET_LENGTH 16
|
||||
|
||||
/*
|
||||
* Size of the hexadecimal-string representation of the secret, including
|
||||
* trailing null terminator.
|
||||
*/
|
||||
#define TCPISN_SECRET_HEX_LENGTH (TCPISN_SECRET_LENGTH * 2 + 1)
|
||||
|
||||
void tcpisn_init(void);
|
||||
ssize_t tcpisn_secret(struct rmib_call * call, struct rmib_node * node,
|
||||
struct rmib_oldp * oldp, struct rmib_newp * newp);
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_TCPISN_H */
|
||||
2793
minix/net/lwip/tcpsock.c
Normal file
2793
minix/net/lwip/tcpsock.c
Normal file
File diff suppressed because it is too large
Load Diff
997
minix/net/lwip/udpsock.c
Normal file
997
minix/net/lwip/udpsock.c
Normal file
|
|
@ -0,0 +1,997 @@
|
|||
/* LWIP service - udpsock.c - UDP sockets */
|
||||
|
||||
#include "lwip.h"
|
||||
#include "ifaddr.h"
|
||||
#include "pktsock.h"
|
||||
|
||||
#include "lwip/udp.h"
|
||||
|
||||
#include <netinet/udp.h>
|
||||
#include <netinet/ip_var.h>
|
||||
#include <netinet/udp_var.h>
|
||||
|
||||
/* The number of UDP sockets. Inherited from the lwIP configuration. */
|
||||
#define NR_UDPSOCK MEMP_NUM_UDP_PCB
|
||||
|
||||
/*
|
||||
* Outgoing packets are not getting buffered, so the send buffer size simply
|
||||
* determines the maximum size for sent packets. The send buffer maximum is
|
||||
* therefore limited to the maximum size of a single packet (64K-1 bytes),
|
||||
* which is already enforced by lwIP's 16-bit length parameter to pbuf_alloc().
|
||||
*
|
||||
* The actual transmission may enforce a lower limit, though. The full packet
|
||||
* size must not exceed the same 64K-1 limit, and that includes any headers
|
||||
* that still have to be prepended to the given packet. The size of those
|
||||
* headers depends on the socket type (IPv4/IPv6) and the IP_HDRINCL setting.
|
||||
*/
|
||||
#define UDP_MAX_PAYLOAD (UINT16_MAX)
|
||||
|
||||
#define UDP_SNDBUF_MIN 1 /* minimum UDP send buffer size */
|
||||
#define UDP_SNDBUF_DEF 8192 /* default UDP send buffer size */
|
||||
#define UDP_SNDBUF_MAX UDP_MAX_PAYLOAD /* maximum UDP send buffer size */
|
||||
#define UDP_RCVBUF_MIN MEMPOOL_BUFSIZE /* minimum UDP receive buffer size */
|
||||
#define UDP_RCVBUF_DEF 32768 /* default UDP receive buffer size */
|
||||
#define UDP_RCVBUF_MAX 65536 /* maximum UDP receive buffer size */
|
||||
|
||||
static struct udpsock {
|
||||
struct pktsock udp_pktsock; /* pkt socket, MUST be first */
|
||||
struct udp_pcb *udp_pcb; /* lwIP UDP control block */
|
||||
SIMPLEQ_ENTRY(udpsock) udp_next; /* next in free list */
|
||||
} udp_array[NR_UDPSOCK];
|
||||
|
||||
static SIMPLEQ_HEAD(, udpsock) udp_freelist; /* list of free UDP sockets */
|
||||
|
||||
static const struct sockevent_ops udpsock_ops;
|
||||
|
||||
#define udpsock_get_sock(udp) (ipsock_get_sock(udpsock_get_ipsock(udp)))
|
||||
#define udpsock_get_ipsock(udp) (pktsock_get_ipsock(&(udp)->udp_pktsock))
|
||||
#define udpsock_is_ipv6(udp) (ipsock_is_ipv6(udpsock_get_ipsock(udp)))
|
||||
#define udpsock_is_conn(udp) \
|
||||
(udp_flags((udp)->udp_pcb) & UDP_FLAGS_CONNECTED)
|
||||
|
||||
static ssize_t udpsock_pcblist(struct rmib_call *, struct rmib_node *,
|
||||
struct rmib_oldp *, struct rmib_newp *);
|
||||
|
||||
/* The CTL_NET {PF_INET,PF_INET6} IPPROTO_UDP subtree. */
|
||||
/* TODO: add many more and make some of them writable.. */
|
||||
static struct rmib_node net_inet_udp_table[] = {
|
||||
/* 1*/ [UDPCTL_CHECKSUM] = RMIB_INT(RMIB_RO, 1, "checksum",
|
||||
"Compute UDP checksums"),
|
||||
/* 2*/ [UDPCTL_SENDSPACE] = RMIB_INT(RMIB_RO, UDP_SNDBUF_DEF,
|
||||
"sendspace",
|
||||
"Default UDP send buffer size"),
|
||||
/* 3*/ [UDPCTL_RECVSPACE] = RMIB_INT(RMIB_RO, UDP_RCVBUF_DEF,
|
||||
"recvspace",
|
||||
"Default UDP receive buffer size"),
|
||||
/* 4*/ [UDPCTL_LOOPBACKCKSUM] = RMIB_FUNC(RMIB_RW | CTLTYPE_INT, sizeof(int),
|
||||
loopif_cksum, "do_loopback_cksum",
|
||||
"Perform UDP checksum on loopback"),
|
||||
/*+0*/ [UDPCTL_MAXID] = RMIB_FUNC(RMIB_RO | CTLTYPE_NODE, 0,
|
||||
udpsock_pcblist, "pcblist",
|
||||
"UDP protocol control block list"),
|
||||
};
|
||||
|
||||
static struct rmib_node net_inet_udp_node =
|
||||
RMIB_NODE(RMIB_RO, net_inet_udp_table, "udp", "UDPv4 related settings");
|
||||
static struct rmib_node net_inet6_udp6_node =
|
||||
RMIB_NODE(RMIB_RO, net_inet_udp_table, "udp6", "UDPv6 related settings");
|
||||
|
||||
/*
|
||||
* Initialize the UDP sockets module.
|
||||
*/
|
||||
void
|
||||
udpsock_init(void)
|
||||
{
|
||||
unsigned int slot;
|
||||
|
||||
/* Initialize the list of free UDP sockets. */
|
||||
SIMPLEQ_INIT(&udp_freelist);
|
||||
|
||||
for (slot = 0; slot < __arraycount(udp_array); slot++)
|
||||
SIMPLEQ_INSERT_TAIL(&udp_freelist, &udp_array[slot], udp_next);
|
||||
|
||||
/* Register the net.inet.udp and net.inet6.udp6 RMIB subtrees. */
|
||||
mibtree_register_inet(PF_INET, IPPROTO_UDP, &net_inet_udp_node);
|
||||
mibtree_register_inet(PF_INET6, IPPROTO_UDP, &net_inet6_udp6_node);
|
||||
}
|
||||
|
||||
/*
|
||||
* A packet has arrived on a UDP socket. We own the given packet buffer, and
|
||||
* so we must free it if we do not want to keep it.
|
||||
*/
|
||||
static void
|
||||
udpsock_input(void * arg, struct udp_pcb * pcb __unused, struct pbuf * pbuf,
|
||||
const ip_addr_t * ipaddr, uint16_t port)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)arg;
|
||||
|
||||
/* All UDP input processing is handled by pktsock. */
|
||||
pktsock_input(&udp->udp_pktsock, pbuf, ipaddr, port);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a UDP socket.
|
||||
*/
|
||||
sockid_t
|
||||
udpsock_socket(int domain, int protocol, struct sock ** sockp,
|
||||
const struct sockevent_ops ** ops)
|
||||
{
|
||||
struct udpsock *udp;
|
||||
unsigned int flags;
|
||||
uint8_t ip_type;
|
||||
|
||||
switch (protocol) {
|
||||
case 0:
|
||||
case IPPROTO_UDP:
|
||||
break;
|
||||
|
||||
/* NetBSD does not support IPPROTO_UDPLITE, even though lwIP does. */
|
||||
default:
|
||||
return EPROTONOSUPPORT;
|
||||
}
|
||||
|
||||
if (SIMPLEQ_EMPTY(&udp_freelist))
|
||||
return ENOBUFS;
|
||||
|
||||
udp = SIMPLEQ_FIRST(&udp_freelist);
|
||||
|
||||
ip_type = pktsock_socket(&udp->udp_pktsock, domain, UDP_SNDBUF_DEF,
|
||||
UDP_RCVBUF_DEF, sockp);
|
||||
|
||||
/* We should have enough PCBs so this call should not fail.. */
|
||||
if ((udp->udp_pcb = udp_new_ip_type(ip_type)) == NULL)
|
||||
return ENOBUFS;
|
||||
udp_recv(udp->udp_pcb, udpsock_input, (void *)udp);
|
||||
|
||||
/* By default, the multicast TTL is 1 and looping is enabled. */
|
||||
udp_set_multicast_ttl(udp->udp_pcb, 1);
|
||||
|
||||
flags = udp_flags(udp->udp_pcb);
|
||||
udp_setflags(udp->udp_pcb, flags | UDP_FLAGS_MULTICAST_LOOP);
|
||||
|
||||
SIMPLEQ_REMOVE_HEAD(&udp_freelist, udp_next);
|
||||
|
||||
*ops = &udpsock_ops;
|
||||
return SOCKID_UDP | (sockid_t)(udp - udp_array);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bind a UDP socket to a local address.
|
||||
*/
|
||||
static int
|
||||
udpsock_bind(struct sock * sock, const struct sockaddr * addr,
|
||||
socklen_t addr_len, endpoint_t user_endpt)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
ip_addr_t ipaddr;
|
||||
uint16_t port;
|
||||
err_t err;
|
||||
int r;
|
||||
|
||||
if ((r = ipsock_get_src_addr(udpsock_get_ipsock(udp), addr, addr_len,
|
||||
user_endpt, &udp->udp_pcb->local_ip, udp->udp_pcb->local_port,
|
||||
TRUE /*allow_mcast*/, &ipaddr, &port)) != OK)
|
||||
return r;
|
||||
|
||||
err = udp_bind(udp->udp_pcb, &ipaddr, port);
|
||||
|
||||
return util_convert_err(err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Connect a UDP socket to a remote address.
|
||||
*/
|
||||
static int
|
||||
udpsock_connect(struct sock * sock, const struct sockaddr * addr,
|
||||
socklen_t addr_len, endpoint_t user_endpt __unused)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
struct ifdev *ifdev;
|
||||
const ip_addr_t *src_addr;
|
||||
ip_addr_t dst_addr;
|
||||
uint16_t dst_port;
|
||||
uint32_t ifindex, ifindex2;
|
||||
err_t err;
|
||||
int r;
|
||||
|
||||
/*
|
||||
* One may "unconnect" socket by providing an address with family
|
||||
* AF_UNSPEC. Providing an <any>:0 address does not achieve the same.
|
||||
*/
|
||||
if (addr_is_unspec(addr, addr_len)) {
|
||||
udp_disconnect(udp->udp_pcb);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
if ((r = ipsock_get_dst_addr(udpsock_get_ipsock(udp), addr,
|
||||
addr_len, &udp->udp_pcb->local_ip, &dst_addr, &dst_port)) != OK)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* Bind explicitly to a source address if the PCB is not bound to one
|
||||
* yet. This is expected in the BSD socket API, but lwIP does not do
|
||||
* it for us.
|
||||
*/
|
||||
if (ip_addr_isany(&udp->udp_pcb->local_ip)) {
|
||||
/* Help the multicast case a bit, if possible. */
|
||||
ifdev = NULL;
|
||||
|
||||
if (ip_addr_ismulticast(&dst_addr)) {
|
||||
ifindex = pktsock_get_ifindex(&udp->udp_pktsock);
|
||||
ifindex2 = udp_get_multicast_netif_index(udp->udp_pcb);
|
||||
if (ifindex == 0)
|
||||
ifindex = ifindex2;
|
||||
|
||||
if (ifindex != 0) {
|
||||
ifdev = ifdev_get_by_index(ifindex);
|
||||
|
||||
if (ifdev == NULL)
|
||||
return ENXIO;
|
||||
}
|
||||
}
|
||||
|
||||
src_addr = ifaddr_select(&dst_addr, ifdev, NULL /*ifdevp*/);
|
||||
|
||||
if (src_addr == NULL)
|
||||
return EHOSTUNREACH;
|
||||
|
||||
err = udp_bind(udp->udp_pcb, src_addr,
|
||||
udp->udp_pcb->local_port);
|
||||
|
||||
if (err != ERR_OK)
|
||||
return util_convert_err(err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Connecting a UDP socket serves two main purposes: 1) the socket uses
|
||||
* the address as destination when sending, and 2) the socket receives
|
||||
* packets from only the connected address.
|
||||
*/
|
||||
err = udp_connect(udp->udp_pcb, &dst_addr, dst_port);
|
||||
|
||||
if (err != ERR_OK)
|
||||
return util_convert_err(err);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform preliminary checks on a send request.
|
||||
*/
|
||||
static int
|
||||
udpsock_pre_send(struct sock * sock, size_t len, socklen_t ctl_len __unused,
|
||||
const struct sockaddr * addr, socklen_t addr_len __unused,
|
||||
endpoint_t user_endpt __unused, int flags)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
|
||||
if ((flags & ~MSG_DONTROUTE) != 0)
|
||||
return EOPNOTSUPP;
|
||||
|
||||
if (!udpsock_is_conn(udp) && addr == NULL)
|
||||
return EDESTADDRREQ;
|
||||
|
||||
/*
|
||||
* This is only one part of the length check. The rest is done from
|
||||
* udpsock_send(), once we have more information.
|
||||
*/
|
||||
if (len > ipsock_get_sndbuf(udpsock_get_ipsock(udp)))
|
||||
return EMSGSIZE;
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Swap IP-level options between the UDP PCB and the packet options structure,
|
||||
* for all options that have their flag set in the packet options structure.
|
||||
* This function is called twice when sending a packet. The result is that the
|
||||
* flagged options are overridden for only the packet being sent.
|
||||
*/
|
||||
static void
|
||||
udpsock_swap_opt(struct udpsock * udp, struct pktopt * pkto)
|
||||
{
|
||||
uint8_t tos, ttl, mcast_ttl;
|
||||
|
||||
if (pkto->pkto_flags & PKTOF_TOS) {
|
||||
tos = udp->udp_pcb->tos;
|
||||
udp->udp_pcb->tos = pkto->pkto_tos;
|
||||
pkto->pkto_tos = tos;
|
||||
}
|
||||
|
||||
if (pkto->pkto_flags & PKTOF_TTL) {
|
||||
ttl = udp->udp_pcb->ttl;
|
||||
mcast_ttl = udp_get_multicast_ttl(udp->udp_pcb);
|
||||
udp->udp_pcb->ttl = pkto->pkto_ttl;
|
||||
udp_set_multicast_ttl(udp->udp_pcb, pkto->pkto_mcast_ttl);
|
||||
pkto->pkto_ttl = ttl;
|
||||
pkto->pkto_mcast_ttl = mcast_ttl;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Send a packet on a UDP socket.
|
||||
*/
|
||||
static int
|
||||
udpsock_send(struct sock * sock, const struct sockdriver_data * data,
|
||||
size_t len, size_t * off, const struct sockdriver_data * ctl,
|
||||
socklen_t ctl_len, socklen_t * ctl_off __unused,
|
||||
const struct sockaddr * addr, socklen_t addr_len,
|
||||
endpoint_t user_endpt __unused, int flags, size_t min __unused)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
struct pktopt pktopt;
|
||||
struct pbuf *pbuf;
|
||||
struct ifdev *ifdev;
|
||||
struct netif *netif;
|
||||
const ip_addr_t *src_addrp, *dst_addrp;
|
||||
ip_addr_t src_addr, dst_addr; /* for storage only; not always used! */
|
||||
uint16_t dst_port;
|
||||
uint32_t ifindex;
|
||||
size_t hdrlen;
|
||||
err_t err;
|
||||
int r;
|
||||
|
||||
/* Copy in and parse any packet options. */
|
||||
pktopt.pkto_flags = 0;
|
||||
|
||||
if ((r = pktsock_get_ctl(&udp->udp_pktsock, ctl, ctl_len,
|
||||
&pktopt)) != OK)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* The code below will both determine an outgoing interface and a
|
||||
* source address for the packet. Even though lwIP could do this for
|
||||
* us in some cases, there are other cases where we must do so
|
||||
* ourselves, with as main reasons 1) the possibility that either or
|
||||
* both have been provided through IPV6_PKTINFO, and 2) our intent to
|
||||
* detect and stop zone violations for (combinations of) scoped IPv6
|
||||
* addresses. As a result, it is easier to simply take over the
|
||||
* selection tasks lwIP in their entirety.
|
||||
*
|
||||
* Much of the same applies to rawsock_send() as well. Functional
|
||||
* differences (e.g. IP_HDRINCL support) as well as the PCB accesses in
|
||||
* the code make it hard to merge the two into a single pktsock copy.
|
||||
* Please do keep the two in sync as much as possible.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Start by checking whether the source address and/or the outgoing
|
||||
* interface are overridden using sticky and/or ancillary options. The
|
||||
* call to pktsock_get_pktinfo(), if successful, will either set
|
||||
* 'ifdev' to NULL, in which case there is no override, or it will set
|
||||
* 'ifdev' to the outgoing interface to use, and (only) in that case
|
||||
* also fill 'src_addr', with an address that may either be a locally
|
||||
* owned unicast address or the unspecified ('any') address. If it is
|
||||
* a unicast address, that is the source address to use for the packet.
|
||||
* Otherwise, fall back to the address to which the socket is bound,
|
||||
* which may also be the unspecified address or even a multicast
|
||||
* address. In those case we will pick a source address further below.
|
||||
*/
|
||||
if ((r = pktsock_get_pktinfo(&udp->udp_pktsock, &pktopt, &ifdev,
|
||||
&src_addr)) != OK)
|
||||
return r;
|
||||
|
||||
if (ifdev != NULL && !ip_addr_isany(&src_addr)) {
|
||||
/* This is guaranteed to be a proper local unicast address. */
|
||||
src_addrp = &src_addr;
|
||||
} else {
|
||||
src_addrp = &udp->udp_pcb->local_ip;
|
||||
|
||||
/*
|
||||
* If the socket is bound to a multicast address, use the
|
||||
* unspecified ('any') address as source address instead, until
|
||||
* we select a real source address (further below). This
|
||||
* substitution keeps the rest of the code a bit simpler.
|
||||
*/
|
||||
if (ip_addr_ismulticast(src_addrp))
|
||||
src_addrp = IP46_ADDR_ANY(IP_GET_TYPE(src_addrp));
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine the destination address to use. If the socket is
|
||||
* connected, always ignore any address provided in the send call.
|
||||
*/
|
||||
if (!udpsock_is_conn(udp)) {
|
||||
assert(addr != NULL); /* already checked in pre_send */
|
||||
|
||||
if ((r = ipsock_get_dst_addr(udpsock_get_ipsock(udp), addr,
|
||||
addr_len, src_addrp, &dst_addr, &dst_port)) != OK)
|
||||
return r;
|
||||
|
||||
dst_addrp = &dst_addr;
|
||||
} else {
|
||||
dst_addrp = &udp->udp_pcb->remote_ip;
|
||||
dst_port = udp->udp_pcb->remote_port;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the destination is a multicast address, select the outgoing
|
||||
* interface based on the multicast interface index, if one is set.
|
||||
* This must be done here in order to allow the code further below to
|
||||
* detect zone violations, because if we leave this selection to lwIP,
|
||||
* it will not perform zone violation detection at all. Also note that
|
||||
* this case must *not* override an interface index already specified
|
||||
* using IPV6_PKTINFO, as per RFC 3542 Sec. 6.7.
|
||||
*/
|
||||
if (ifdev == NULL && ip_addr_ismulticast(dst_addrp)) {
|
||||
ifindex = udp_get_multicast_netif_index(udp->udp_pcb);
|
||||
|
||||
if (ifindex != NETIF_NO_INDEX)
|
||||
ifdev = ifdev_get_by_index(ifindex); /* (may fail) */
|
||||
}
|
||||
|
||||
/*
|
||||
* If an interface has been determined already now, the send operation
|
||||
* will bypass routing. In that case, we must perform our own checks
|
||||
* on address zone violations, because those will not be made anywhere
|
||||
* else. Subsequent steps below will never introduce violations.
|
||||
*/
|
||||
if (ifdev != NULL && IP_IS_V6(dst_addrp)) {
|
||||
if (ifaddr_is_zone_mismatch(ip_2_ip6(dst_addrp), ifdev))
|
||||
return EHOSTUNREACH;
|
||||
|
||||
if (IP_IS_V6(src_addrp) &&
|
||||
ifaddr_is_zone_mismatch(ip_2_ip6(src_addrp), ifdev))
|
||||
return EHOSTUNREACH;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we do not yet have an interface at this point, perform a route
|
||||
* lookup to determine the outgoing interface. Unless MSG_DONTROUTE is
|
||||
* set (which covers SO_DONTROUTE as well), in which case we look for a
|
||||
* local subnet that matches the destination address.
|
||||
*/
|
||||
if (ifdev == NULL) {
|
||||
if (!(flags & MSG_DONTROUTE)) {
|
||||
/*
|
||||
* ip_route() should never be called with an
|
||||
* IPADDR_TYPE_ANY type address. This is a lwIP-
|
||||
* internal requirement; while we override both routing
|
||||
* functions, we do not deviate from it.
|
||||
*/
|
||||
if (IP_IS_ANY_TYPE_VAL(*src_addrp))
|
||||
src_addrp =
|
||||
IP46_ADDR_ANY(IP_GET_TYPE(dst_addrp));
|
||||
|
||||
/* Perform the route lookup. */
|
||||
if ((netif = ip_route(src_addrp, dst_addrp)) == NULL)
|
||||
return EHOSTUNREACH;
|
||||
|
||||
ifdev = netif_get_ifdev(netif);
|
||||
} else {
|
||||
if ((ifdev = ifaddr_map_by_subnet(dst_addrp)) == NULL)
|
||||
return EHOSTUNREACH;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point we have an outgoing interface. If we do not have a
|
||||
* source address yet, pick one now.
|
||||
*/
|
||||
assert(ifdev != NULL);
|
||||
|
||||
if (ip_addr_isany(src_addrp)) {
|
||||
src_addrp = ifaddr_select(dst_addrp, ifdev, NULL /*ifdevp*/);
|
||||
|
||||
if (src_addrp == NULL)
|
||||
return EHOSTUNREACH;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we know the full conditions of what we are about to send,
|
||||
* check whether the packet size leaves enough room for lwIP to prepend
|
||||
* headers. If so, allocate a chain of pbufs for the packet.
|
||||
*/
|
||||
assert(len <= UDP_MAX_PAYLOAD);
|
||||
|
||||
if (IP_IS_V6(dst_addrp))
|
||||
hdrlen = IP6_HLEN + UDP_HLEN;
|
||||
else
|
||||
hdrlen = IP_HLEN + UDP_HLEN;
|
||||
|
||||
if (hdrlen + len > UDP_MAX_PAYLOAD)
|
||||
return EMSGSIZE;
|
||||
|
||||
if ((pbuf = pchain_alloc(PBUF_TRANSPORT, len)) == NULL)
|
||||
return ENOBUFS;
|
||||
|
||||
/* Copy in the packet data. */
|
||||
if ((r = pktsock_get_data(&udp->udp_pktsock, data, len, pbuf)) != OK) {
|
||||
pbuf_free(pbuf);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set broadcast/multicast flags for accounting purposes. Only the
|
||||
* multicast flag is used for output accounting, but for loopback
|
||||
* traffic, both flags are copied and used for input accounting and
|
||||
* setting MSG_MCAST/MSG_BCAST.
|
||||
*/
|
||||
if (ip_addr_ismulticast(dst_addrp))
|
||||
pbuf->flags |= PBUF_FLAG_LLMCAST;
|
||||
else if (ip_addr_isbroadcast(dst_addrp, ifdev_get_netif(ifdev)))
|
||||
pbuf->flags |= PBUF_FLAG_LLBCAST;
|
||||
|
||||
/* Send the packet. */
|
||||
udpsock_swap_opt(udp, &pktopt);
|
||||
|
||||
assert(!ip_addr_isany(src_addrp));
|
||||
assert(!ip_addr_ismulticast(src_addrp));
|
||||
|
||||
err = udp_sendto_if_src(udp->udp_pcb, pbuf, dst_addrp, dst_port,
|
||||
ifdev_get_netif(ifdev), src_addrp);
|
||||
|
||||
udpsock_swap_opt(udp, &pktopt);
|
||||
|
||||
/* Free the pbuf, as a copy has been made. */
|
||||
pbuf_free(pbuf);
|
||||
|
||||
/*
|
||||
* On success, make sure to return the size of the sent packet as well.
|
||||
* As an aside: ctl_off need not be updated, as it is not returned.
|
||||
*/
|
||||
if ((r = util_convert_err(err)) == OK)
|
||||
*off = len;
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the set of flag-type socket options on a UDP socket.
|
||||
*/
|
||||
static void
|
||||
udpsock_setsockmask(struct sock * sock, unsigned int mask)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
|
||||
if (mask & SO_REUSEADDR)
|
||||
ip_set_option(udp->udp_pcb, SOF_REUSEADDR);
|
||||
else
|
||||
ip_reset_option(udp->udp_pcb, SOF_REUSEADDR);
|
||||
|
||||
if (mask & SO_BROADCAST)
|
||||
ip_set_option(udp->udp_pcb, SOF_BROADCAST);
|
||||
else
|
||||
ip_reset_option(udp->udp_pcb, SOF_BROADCAST);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare a helper structure for IP-level option processing.
|
||||
*/
|
||||
static void
|
||||
udpsock_get_ipopts(struct udpsock * udp, struct ipopts * ipopts)
|
||||
{
|
||||
|
||||
ipopts->local_ip = &udp->udp_pcb->local_ip;
|
||||
ipopts->remote_ip = &udp->udp_pcb->remote_ip;
|
||||
ipopts->tos = &udp->udp_pcb->tos;
|
||||
ipopts->ttl = &udp->udp_pcb->ttl;
|
||||
ipopts->sndmin = UDP_SNDBUF_MIN;
|
||||
ipopts->sndmax = UDP_SNDBUF_MAX;
|
||||
ipopts->rcvmin = UDP_RCVBUF_MIN;
|
||||
ipopts->rcvmax = UDP_RCVBUF_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set socket options on a UDP socket.
|
||||
*/
|
||||
static int
|
||||
udpsock_setsockopt(struct sock * sock, int level, int name,
|
||||
const struct sockdriver_data * data, socklen_t len)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
struct ipopts ipopts;
|
||||
ip_addr_t ipaddr;
|
||||
struct in_addr in_addr;
|
||||
struct ifdev *ifdev;
|
||||
unsigned int flags;
|
||||
uint32_t ifindex;
|
||||
uint8_t byte;
|
||||
int r, val;
|
||||
|
||||
/*
|
||||
* Unfortunately, we have to duplicate most of the multicast options
|
||||
* rather than sharing them with rawsock at the pktsock level. The
|
||||
* reason is that each of the PCBs have their own multicast abstraction
|
||||
* functions and so we cannot merge the rest. Same for getsockopt.
|
||||
*/
|
||||
|
||||
switch (level) {
|
||||
case IPPROTO_IP:
|
||||
if (udpsock_is_ipv6(udp))
|
||||
break;
|
||||
|
||||
switch (name) {
|
||||
case IP_MULTICAST_IF:
|
||||
pktsock_set_mcaware(&udp->udp_pktsock);
|
||||
|
||||
if ((r = sockdriver_copyin_opt(data, &in_addr,
|
||||
sizeof(in_addr), len)) != OK)
|
||||
return r;
|
||||
|
||||
ip_addr_set_ip4_u32(&ipaddr, in_addr.s_addr);
|
||||
|
||||
if ((ifdev = ifaddr_map_by_addr(&ipaddr)) == NULL)
|
||||
return EADDRNOTAVAIL;
|
||||
|
||||
udp_set_multicast_netif_index(udp->udp_pcb,
|
||||
ifdev_get_index(ifdev));
|
||||
|
||||
return OK;
|
||||
|
||||
case IP_MULTICAST_LOOP:
|
||||
pktsock_set_mcaware(&udp->udp_pktsock);
|
||||
|
||||
if ((r = sockdriver_copyin_opt(data, &byte,
|
||||
sizeof(byte), len)) != OK)
|
||||
return r;
|
||||
|
||||
flags = udp_flags(udp->udp_pcb);
|
||||
|
||||
if (byte)
|
||||
flags |= UDP_FLAGS_MULTICAST_LOOP;
|
||||
else
|
||||
flags &= ~UDP_FLAGS_MULTICAST_LOOP;
|
||||
|
||||
udp_setflags(udp->udp_pcb, flags);
|
||||
|
||||
return OK;
|
||||
|
||||
case IP_MULTICAST_TTL:
|
||||
pktsock_set_mcaware(&udp->udp_pktsock);
|
||||
|
||||
if ((r = sockdriver_copyin_opt(data, &byte,
|
||||
sizeof(byte), len)) != OK)
|
||||
return r;
|
||||
|
||||
udp_set_multicast_ttl(udp->udp_pcb, byte);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case IPPROTO_IPV6:
|
||||
if (!udpsock_is_ipv6(udp))
|
||||
break;
|
||||
|
||||
switch (name) {
|
||||
case IPV6_MULTICAST_IF:
|
||||
pktsock_set_mcaware(&udp->udp_pktsock);
|
||||
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
if (val != 0) {
|
||||
ifindex = (uint32_t)val;
|
||||
|
||||
ifdev = ifdev_get_by_index(ifindex);
|
||||
|
||||
if (ifdev == NULL)
|
||||
return ENXIO;
|
||||
} else
|
||||
ifindex = NETIF_NO_INDEX;
|
||||
|
||||
udp_set_multicast_netif_index(udp->udp_pcb, ifindex);
|
||||
|
||||
return OK;
|
||||
|
||||
case IPV6_MULTICAST_LOOP:
|
||||
pktsock_set_mcaware(&udp->udp_pktsock);
|
||||
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
if (val < 0 || val > 1)
|
||||
return EINVAL;
|
||||
|
||||
flags = udp_flags(udp->udp_pcb);
|
||||
|
||||
if (val)
|
||||
flags |= UDP_FLAGS_MULTICAST_LOOP;
|
||||
else
|
||||
flags &= ~UDP_FLAGS_MULTICAST_LOOP;
|
||||
|
||||
/*
|
||||
* lwIP's IPv6 functionality does not actually check
|
||||
* this flag at all yet. We set it in the hope that
|
||||
* one day this will magically start working.
|
||||
*/
|
||||
udp_setflags(udp->udp_pcb, flags);
|
||||
|
||||
return OK;
|
||||
|
||||
case IPV6_MULTICAST_HOPS:
|
||||
pktsock_set_mcaware(&udp->udp_pktsock);
|
||||
|
||||
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
||||
len)) != OK)
|
||||
return r;
|
||||
|
||||
if (val < -1 || val > UINT8_MAX)
|
||||
return EINVAL;
|
||||
|
||||
if (val == -1)
|
||||
val = 1;
|
||||
|
||||
udp_set_multicast_ttl(udp->udp_pcb, val);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* Handle all other options at the packet or IP level. */
|
||||
udpsock_get_ipopts(udp, &ipopts);
|
||||
|
||||
return pktsock_setsockopt(&udp->udp_pktsock, level, name, data, len,
|
||||
&ipopts);
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve socket options on a UDP socket.
|
||||
*/
|
||||
static int
|
||||
udpsock_getsockopt(struct sock * sock, int level, int name,
|
||||
const struct sockdriver_data * data, socklen_t * len)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
struct ipopts ipopts;
|
||||
const ip4_addr_t *ip4addr;
|
||||
struct in_addr in_addr;
|
||||
struct ifdev *ifdev;
|
||||
unsigned int flags;
|
||||
uint32_t ifindex;
|
||||
uint8_t byte;
|
||||
int val;
|
||||
|
||||
switch (level) {
|
||||
case IPPROTO_IP:
|
||||
if (udpsock_is_ipv6(udp))
|
||||
break;
|
||||
|
||||
switch (name) {
|
||||
case IP_MULTICAST_IF:
|
||||
ifindex = udp_get_multicast_netif_index(udp->udp_pcb);
|
||||
|
||||
/*
|
||||
* Map back from the interface index to the IPv4
|
||||
* address assigned to the corresponding interface.
|
||||
* Should this not work out, return the 'any' address.
|
||||
*/
|
||||
if (ifindex != NETIF_NO_INDEX &&
|
||||
(ifdev = ifdev_get_by_index(ifindex)) != NULL) {
|
||||
ip4addr =
|
||||
netif_ip4_addr(ifdev_get_netif(ifdev));
|
||||
|
||||
in_addr.s_addr = ip4_addr_get_u32(ip4addr);
|
||||
} else
|
||||
in_addr.s_addr = PP_HTONL(INADDR_ANY);
|
||||
|
||||
return sockdriver_copyout_opt(data, &in_addr,
|
||||
sizeof(in_addr), len);
|
||||
|
||||
case IP_MULTICAST_LOOP:
|
||||
flags = udp_flags(udp->udp_pcb);
|
||||
|
||||
byte = !!(flags & UDP_FLAGS_MULTICAST_LOOP);
|
||||
|
||||
return sockdriver_copyout_opt(data, &byte,
|
||||
sizeof(byte), len);
|
||||
|
||||
case IP_MULTICAST_TTL:
|
||||
byte = udp_get_multicast_ttl(udp->udp_pcb);
|
||||
|
||||
return sockdriver_copyout_opt(data, &byte,
|
||||
sizeof(byte), len);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case IPPROTO_IPV6:
|
||||
if (!udpsock_is_ipv6(udp))
|
||||
break;
|
||||
|
||||
switch (name) {
|
||||
case IPV6_MULTICAST_IF:
|
||||
ifindex = udp_get_multicast_netif_index(udp->udp_pcb);
|
||||
|
||||
val = (int)ifindex;
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
|
||||
case IPV6_MULTICAST_LOOP:
|
||||
flags = udp_flags(udp->udp_pcb);
|
||||
|
||||
val = !!(flags & UDP_FLAGS_MULTICAST_LOOP);
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
|
||||
case IPV6_MULTICAST_HOPS:
|
||||
val = udp_get_multicast_ttl(udp->udp_pcb);
|
||||
|
||||
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
||||
len);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* Handle all other options at the packet or IP level. */
|
||||
udpsock_get_ipopts(udp, &ipopts);
|
||||
|
||||
return pktsock_getsockopt(&udp->udp_pktsock, level, name, data, len,
|
||||
&ipopts);
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve the local socket address of a UDP socket.
|
||||
*/
|
||||
static int
|
||||
udpsock_getsockname(struct sock * sock, struct sockaddr * addr,
|
||||
socklen_t * addr_len)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
|
||||
ipsock_put_addr(udpsock_get_ipsock(udp), addr, addr_len,
|
||||
&udp->udp_pcb->local_ip, udp->udp_pcb->local_port);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve the remote socket address of a UDP socket.
|
||||
*/
|
||||
static int
|
||||
udpsock_getpeername(struct sock * sock, struct sockaddr * addr,
|
||||
socklen_t * addr_len)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
|
||||
if (!udpsock_is_conn(udp))
|
||||
return ENOTCONN;
|
||||
|
||||
ipsock_put_addr(udpsock_get_ipsock(udp), addr, addr_len,
|
||||
&udp->udp_pcb->remote_ip, udp->udp_pcb->remote_port);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shut down a UDP socket for reading and/or writing.
|
||||
*/
|
||||
static int
|
||||
udpsock_shutdown(struct sock * sock, unsigned int mask)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
|
||||
if (mask & SFL_SHUT_RD)
|
||||
udp_recv(udp->udp_pcb, NULL, NULL);
|
||||
|
||||
pktsock_shutdown(&udp->udp_pktsock, mask);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Close a UDP socket.
|
||||
*/
|
||||
static int
|
||||
udpsock_close(struct sock * sock, int force __unused)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
|
||||
udp_recv(udp->udp_pcb, NULL, NULL);
|
||||
|
||||
udp_remove(udp->udp_pcb);
|
||||
udp->udp_pcb = NULL;
|
||||
|
||||
pktsock_close(&udp->udp_pktsock);
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free up a closed UDP socket.
|
||||
*/
|
||||
static void
|
||||
udpsock_free(struct sock * sock)
|
||||
{
|
||||
struct udpsock *udp = (struct udpsock *)sock;
|
||||
|
||||
assert(udp->udp_pcb == NULL);
|
||||
|
||||
SIMPLEQ_INSERT_HEAD(&udp_freelist, udp, udp_next);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill the given kinfo_pcb sysctl(7) structure with information about the UDP
|
||||
* PCB identified by the given pointer.
|
||||
*/
|
||||
static void
|
||||
udpsock_get_info(struct kinfo_pcb * ki, const void * ptr)
|
||||
{
|
||||
const struct udp_pcb *pcb = (const struct udp_pcb *)ptr;
|
||||
struct udpsock *udp;
|
||||
|
||||
ki->ki_type = SOCK_DGRAM;
|
||||
|
||||
/*
|
||||
* All UDP sockets should be created by this module, but protect
|
||||
* ourselves from the case that that is not true anyway.
|
||||
*/
|
||||
if (pcb->recv_arg != NULL) {
|
||||
udp = (struct udpsock *)pcb->recv_arg;
|
||||
|
||||
assert(udp >= udp_array &&
|
||||
udp < &udp_array[__arraycount(udp_array)]);
|
||||
} else
|
||||
udp = NULL;
|
||||
|
||||
ipsock_get_info(ki, &pcb->local_ip, pcb->local_port, &pcb->remote_ip,
|
||||
pcb->remote_port);
|
||||
|
||||
if (udp != NULL) {
|
||||
/* TODO: change this so that sockstat(1) may work one day. */
|
||||
ki->ki_sockaddr = (uint64_t)(uintptr_t)udpsock_get_sock(udp);
|
||||
|
||||
ki->ki_rcvq = pktsock_get_recvlen(&udp->udp_pktsock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Given either NULL or a previously returned UDP PCB pointer, return the first
|
||||
* or next UDP PCB pointer, or NULL if there are no more. Skip UDP PCBs that
|
||||
* are not bound to an address, as there is no use reporting them.
|
||||
*/
|
||||
static const void *
|
||||
udpsock_enum(const void * last)
|
||||
{
|
||||
const struct udp_pcb *pcb;
|
||||
|
||||
if (last != NULL)
|
||||
pcb = (const void *)((const struct udp_pcb *)last)->next;
|
||||
else
|
||||
pcb = (const void *)udp_pcbs;
|
||||
|
||||
while (pcb != NULL && pcb->local_port == 0)
|
||||
pcb = pcb->next;
|
||||
|
||||
return pcb;
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain the list of UDP protocol control blocks, for sysctl(7).
|
||||
*/
|
||||
static ssize_t
|
||||
udpsock_pcblist(struct rmib_call * call, struct rmib_node * node __unused,
|
||||
struct rmib_oldp * oldp, struct rmib_newp * newp __unused)
|
||||
{
|
||||
|
||||
return util_pcblist(call, oldp, udpsock_enum, udpsock_get_info);
|
||||
}
|
||||
|
||||
static const struct sockevent_ops udpsock_ops = {
|
||||
.sop_bind = udpsock_bind,
|
||||
.sop_connect = udpsock_connect,
|
||||
.sop_pre_send = udpsock_pre_send,
|
||||
.sop_send = udpsock_send,
|
||||
.sop_pre_recv = pktsock_pre_recv,
|
||||
.sop_recv = pktsock_recv,
|
||||
.sop_test_recv = pktsock_test_recv,
|
||||
.sop_ioctl = ifconf_ioctl,
|
||||
.sop_setsockmask = udpsock_setsockmask,
|
||||
.sop_setsockopt = udpsock_setsockopt,
|
||||
.sop_getsockopt = udpsock_getsockopt,
|
||||
.sop_getsockname = udpsock_getsockname,
|
||||
.sop_getpeername = udpsock_getpeername,
|
||||
.sop_shutdown = udpsock_shutdown,
|
||||
.sop_close = udpsock_close,
|
||||
.sop_free = udpsock_free
|
||||
};
|
||||
251
minix/net/lwip/util.c
Normal file
251
minix/net/lwip/util.c
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
/* LWIP service - util.c - shared utility functions */
|
||||
|
||||
#include "lwip.h"
|
||||
|
||||
#define US 1000000 /* number of microseconds per second */
|
||||
|
||||
/*
|
||||
* Convert the given timeval structure to a number of clock ticks, checking
|
||||
* whether the given structure is valid and whether the resulting number of
|
||||
* ticks can be expressed as a (relative) clock ticks value. Upon success,
|
||||
* return OK, with the number of clock ticks stored in 'ticksp'. Upon failure,
|
||||
* return a negative error code that may be returned to userland directly. In
|
||||
* that case, the contents of 'ticksp' are left unchanged.
|
||||
*
|
||||
* TODO: move this function into libsys and remove other redundant copies.
|
||||
*/
|
||||
int
|
||||
util_timeval_to_ticks(const struct timeval * tv, clock_t * ticksp)
|
||||
{
|
||||
clock_t ticks;
|
||||
|
||||
if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= US)
|
||||
return EINVAL;
|
||||
|
||||
if (tv->tv_sec >= TMRDIFF_MAX / sys_hz())
|
||||
return EDOM;
|
||||
|
||||
ticks = tv->tv_sec * sys_hz() + (tv->tv_usec * sys_hz() + US - 1) / US;
|
||||
assert(ticks <= TMRDIFF_MAX);
|
||||
|
||||
*ticksp = ticks;
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert the given number of clock ticks to a timeval structure. This
|
||||
* function never fails.
|
||||
*/
|
||||
void
|
||||
util_ticks_to_timeval(clock_t ticks, struct timeval * tv)
|
||||
{
|
||||
|
||||
memset(tv, 0, sizeof(*tv));
|
||||
tv->tv_sec = ticks / sys_hz();
|
||||
tv->tv_usec = (ticks % sys_hz()) * US / sys_hz();
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy data between a user process and a chain of buffers. If the 'copy_in'
|
||||
* flag is set, the data will be copied in from the user process to the given
|
||||
* chain of buffers; otherwise, the data will be copied out from the given
|
||||
* buffer chain to the user process. The 'data' parameter is a sockdriver-
|
||||
* supplied structure identifying the remote source or destination of the data.
|
||||
* The 'len' parameter contains the number of bytes to copy, and 'off' contains
|
||||
* the offset into the remote source or destination. 'pbuf' is a pointer to
|
||||
* the buffer chain, and 'skip' is the number of bytes to skip in the first
|
||||
* buffer on the chain. Return OK on success, or a negative error code if the
|
||||
* copy operation failed. This function is packet queue friendly.
|
||||
*/
|
||||
int
|
||||
util_copy_data(const struct sockdriver_data * data, size_t len, size_t off,
|
||||
const struct pbuf * pbuf, size_t skip, int copy_in)
|
||||
{
|
||||
iovec_t iov[SOCKDRIVER_IOV_MAX];
|
||||
unsigned int i;
|
||||
size_t sub, chunk;
|
||||
int r;
|
||||
|
||||
while (len > 0) {
|
||||
sub = 0;
|
||||
|
||||
for (i = 0; len > 0 && i < __arraycount(iov); i++) {
|
||||
assert(pbuf != NULL);
|
||||
|
||||
chunk = (size_t)pbuf->len - skip;
|
||||
if (chunk > len)
|
||||
chunk = len;
|
||||
|
||||
iov[i].iov_addr = (vir_bytes)pbuf->payload + skip;
|
||||
iov[i].iov_size = chunk;
|
||||
|
||||
sub += chunk;
|
||||
len -= chunk;
|
||||
|
||||
pbuf = pbuf->next;
|
||||
skip = 0;
|
||||
}
|
||||
|
||||
if (copy_in)
|
||||
r = sockdriver_vcopyin(data, off, iov, i);
|
||||
else
|
||||
r = sockdriver_vcopyout(data, off, iov, i);
|
||||
if (r != OK)
|
||||
return r;
|
||||
|
||||
off += sub;
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy from a vector of (local) buffers to a single (local) buffer. Return
|
||||
* the total number of copied bytes on success, or E2BIG if not all of the
|
||||
* results could be stored in the given bfufer.
|
||||
*/
|
||||
ssize_t
|
||||
util_coalesce(char * ptr, size_t max, const iovec_t * iov, unsigned int iovcnt)
|
||||
{
|
||||
size_t off, size;
|
||||
|
||||
for (off = 0; iovcnt > 0; iov++, iovcnt--) {
|
||||
if ((size = iov->iov_size) > max)
|
||||
return E2BIG;
|
||||
|
||||
memcpy(&ptr[off], (void *)iov->iov_addr, size);
|
||||
|
||||
off += size;
|
||||
max -= size;
|
||||
}
|
||||
|
||||
return off;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return TRUE if the given endpoint has superuser privileges, FALSE otherwise.
|
||||
*/
|
||||
int
|
||||
util_is_root(endpoint_t endpt)
|
||||
{
|
||||
|
||||
return (getnuid(endpt) == ROOT_EUID);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a lwIP-provided error code (of type err_t) to a negative MINIX 3
|
||||
* error code.
|
||||
*/
|
||||
int
|
||||
util_convert_err(err_t err)
|
||||
{
|
||||
|
||||
switch (err) {
|
||||
case ERR_OK: return OK;
|
||||
case ERR_MEM: return ENOMEM;
|
||||
case ERR_BUF: return ENOBUFS;
|
||||
case ERR_TIMEOUT: return ETIMEDOUT;
|
||||
case ERR_RTE: return EHOSTUNREACH;
|
||||
case ERR_VAL: return EINVAL;
|
||||
case ERR_USE: return EADDRINUSE;
|
||||
case ERR_ALREADY: return EALREADY;
|
||||
case ERR_ISCONN: return EISCONN;
|
||||
case ERR_CONN: return ENOTCONN;
|
||||
case ERR_IF: return ENETDOWN;
|
||||
case ERR_ABRT: return ECONNABORTED;
|
||||
case ERR_RST: return ECONNRESET;
|
||||
case ERR_INPROGRESS: return EINPROGRESS; /* should not be thrown */
|
||||
case ERR_WOULDBLOCK: return EWOULDBLOCK; /* should not be thrown */
|
||||
case ERR_ARG: return EINVAL;
|
||||
case ERR_CLSD: /* should be caught as separate case */
|
||||
default: /* should have a case here */
|
||||
printf("LWIP: unexpected error from lwIP: %d", err);
|
||||
return EGENERIC;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain the list of protocol control blocks for a particular domain and
|
||||
* protocol. The call may be used for requesting either IPv4 or IPv6 PCBs,
|
||||
* based on the path used to get here. It is used for TCP, UDP, and RAW PCBs.
|
||||
*/
|
||||
ssize_t
|
||||
util_pcblist(struct rmib_call * call, struct rmib_oldp * oldp,
|
||||
const void *(*enum_proc)(const void *),
|
||||
void (*get_info_proc)(struct kinfo_pcb *, const void *))
|
||||
{
|
||||
const void *pcb;
|
||||
ip_addr_t local_ip;
|
||||
struct kinfo_pcb ki;
|
||||
ssize_t off;
|
||||
int r, size, max, domain, protocol;
|
||||
|
||||
if (call->call_namelen != 4)
|
||||
return EINVAL;
|
||||
|
||||
/* The first two added name fields are not used. */
|
||||
|
||||
size = call->call_name[2];
|
||||
if (size < 0 || (size_t)size > sizeof(ki))
|
||||
return EINVAL;
|
||||
if (size == 0)
|
||||
size = sizeof(ki);
|
||||
max = call->call_name[3];
|
||||
|
||||
domain = call->call_oname[1];
|
||||
protocol = call->call_oname[2];
|
||||
|
||||
off = 0;
|
||||
|
||||
for (pcb = enum_proc(NULL); pcb != NULL; pcb = enum_proc(pcb)) {
|
||||
/* Filter on IPv4/IPv6. */
|
||||
memcpy(&local_ip, &((const struct ip_pcb *)pcb)->local_ip,
|
||||
sizeof(local_ip));
|
||||
|
||||
/*
|
||||
* lwIP does not support IPv6 sockets with IPv4-mapped IPv6
|
||||
* addresses, and requires that those be represented as IPv4
|
||||
* sockets instead. We perform the appropriate conversions to
|
||||
* make that work in general, but here we only have the lwIP
|
||||
* PCB to go on, and that PCB may not even have an associated
|
||||
* sock data structure. As a result, we have to report IPv6
|
||||
* sockets with IPv4-mapped IPv6 addresses as IPv4 sockets
|
||||
* here. There is little room for improvement until lwIP
|
||||
* allows us to store a "this is really an IPv6 socket" flag in
|
||||
* its PCBs. As documented in the ipsock module, a partial
|
||||
* solution would for example cause TCP sockets to "jump" from
|
||||
* the IPv6 listing to the IPv4 listing when entering TIME_WAIT
|
||||
* state. The jumping already occurs now for sockets that are
|
||||
* getting bound, but that is not as problematic.
|
||||
*/
|
||||
if ((domain == AF_INET) != IP_IS_V4(&local_ip))
|
||||
continue;
|
||||
|
||||
if (rmib_inrange(oldp, off)) {
|
||||
memset(&ki, 0, sizeof(ki));
|
||||
|
||||
ki.ki_pcbaddr = (uint64_t)(uintptr_t)pcb;
|
||||
ki.ki_ppcbaddr = (uint64_t)(uintptr_t)pcb;
|
||||
ki.ki_family = domain;
|
||||
ki.ki_protocol = protocol;
|
||||
|
||||
get_info_proc(&ki, pcb);
|
||||
|
||||
if ((r = rmib_copyout(oldp, off, &ki, size)) < OK)
|
||||
return r;
|
||||
}
|
||||
|
||||
off += size;
|
||||
if (max > 0 && --max == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Margin to limit the possible effects of the inherent race condition
|
||||
* between receiving just the data size and receiving the actual data.
|
||||
*/
|
||||
if (oldp == NULL)
|
||||
off += PCB_SLOP * size;
|
||||
|
||||
return off;
|
||||
}
|
||||
27
minix/net/lwip/util.h
Normal file
27
minix/net/lwip/util.h
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef MINIX_NET_LWIP_UTIL_H
|
||||
#define MINIX_NET_LWIP_UTIL_H
|
||||
|
||||
/* util.c */
|
||||
int util_timeval_to_ticks(const struct timeval * tv, clock_t * ticksp);
|
||||
void util_ticks_to_timeval(clock_t ticks, struct timeval * tv);
|
||||
int util_copy_data(const struct sockdriver_data * data, size_t len, size_t off,
|
||||
const struct pbuf * pbuf, size_t skip, int copy_in);
|
||||
ssize_t util_coalesce(char * buf, size_t max, const iovec_t * iov,
|
||||
unsigned int iovcnt);
|
||||
int util_convert_err(err_t err);
|
||||
int util_is_root(endpoint_t user_endpt);
|
||||
ssize_t util_pcblist(struct rmib_call * call, struct rmib_oldp * oldp,
|
||||
const void *(*enum_proc)(const void *),
|
||||
void (*get_info_proc)(struct kinfo_pcb *, const void *));
|
||||
|
||||
/*
|
||||
* In our code, pbuf header adjustments should never fail. This wrapper checks
|
||||
* that the pbuf_header() call succeeds, and panics otherwise.
|
||||
*/
|
||||
#define util_pbuf_header(pbuf,incr) \
|
||||
do { \
|
||||
if (pbuf_header((pbuf), (incr))) \
|
||||
panic("unexpected pbuf header adjustment failure"); \
|
||||
} while (0)
|
||||
|
||||
#endif /* !MINIX_NET_LWIP_UTIL_H */
|
||||
|
|
@ -75,6 +75,7 @@ static struct mib_node mib_minix_table[] = {
|
|||
"mib", "MIB service information"),
|
||||
/* 2*/ [MINIX_PROC] = MIB_NODE(_P | _RO, mib_minix_proc_table,
|
||||
"proc", "Process information for ProcFS"),
|
||||
/* 3*/ /* MINIX_LWIP is mounted through RMIB and thus not present here. */
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -17,12 +17,115 @@
|
|||
#include <net/gen/psip_io.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include <net/route.h>
|
||||
#include <netinet6/in6_var.h>
|
||||
#include <netinet6/nd6.h>
|
||||
#include <net80211/ieee80211_ioctl.h>
|
||||
|
||||
const char *
|
||||
net_ioctl_name(unsigned long req)
|
||||
{
|
||||
|
||||
switch (req) {
|
||||
NAME(FIONREAD);
|
||||
/* sys/sockio.h */
|
||||
NAME(SIOCSHIWAT); /* TODO: print argument */
|
||||
NAME(SIOCGHIWAT); /* TODO: print argument */
|
||||
NAME(SIOCSLOWAT); /* TODO: print argument */
|
||||
NAME(SIOCGLOWAT); /* TODO: print argument */
|
||||
NAME(SIOCSPGRP); /* TODO: print argument */
|
||||
NAME(SIOCGPGRP); /* TODO: print argument */
|
||||
NAME(SIOCADDRT); /* TODO: print argument */
|
||||
NAME(SIOCDELRT); /* TODO: print argument */
|
||||
NAME(SIOCSIFADDR); /* TODO: print argument */
|
||||
NAME(SIOCGIFADDR); /* TODO: print argument */
|
||||
NAME(SIOCSIFDSTADDR); /* TODO: print argument */
|
||||
NAME(SIOCGIFDSTADDR); /* TODO: print argument */
|
||||
NAME(SIOCSIFFLAGS); /* TODO: print argument */
|
||||
NAME(SIOCGIFFLAGS); /* TODO: print argument */
|
||||
NAME(SIOCGIFBRDADDR); /* TODO: print argument */
|
||||
NAME(SIOCSIFBRDADDR); /* TODO: print argument */
|
||||
NAME(SIOCGIFCONF); /* TODO: print argument */
|
||||
NAME(SIOCGIFNETMASK); /* TODO: print argument */
|
||||
NAME(SIOCSIFNETMASK); /* TODO: print argument */
|
||||
NAME(SIOCGIFMETRIC); /* TODO: print argument */
|
||||
NAME(SIOCSIFMETRIC); /* TODO: print argument */
|
||||
NAME(SIOCDIFADDR); /* TODO: print argument */
|
||||
NAME(SIOCAIFADDR); /* TODO: print argument */
|
||||
NAME(SIOCGIFALIAS); /* TODO: print argument */
|
||||
NAME(SIOCGIFAFLAG_IN); /* TODO: print argument */
|
||||
NAME(SIOCALIFADDR); /* TODO: print argument */
|
||||
NAME(SIOCGLIFADDR); /* TODO: print argument */
|
||||
NAME(SIOCDLIFADDR); /* TODO: print argument */
|
||||
NAME(SIOCSIFADDRPREF); /* TODO: print argument */
|
||||
NAME(SIOCGIFADDRPREF); /* TODO: print argument */
|
||||
NAME(SIOCADDMULTI); /* TODO: print argument */
|
||||
NAME(SIOCDELMULTI); /* TODO: print argument */
|
||||
NAME(SIOCSIFMEDIA); /* TODO: print argument */
|
||||
NAME(SIOCGIFMEDIA); /* TODO: print argument */
|
||||
NAME(SIOCSIFGENERIC); /* TODO: print argument */
|
||||
NAME(SIOCGIFGENERIC); /* TODO: print argument */
|
||||
NAME(SIOCSIFPHYADDR); /* TODO: print argument */
|
||||
NAME(SIOCGIFPSRCADDR); /* TODO: print argument */
|
||||
NAME(SIOCGIFPDSTADDR); /* TODO: print argument */
|
||||
NAME(SIOCDIFPHYADDR); /* TODO: print argument */
|
||||
NAME(SIOCSLIFPHYADDR); /* TODO: print argument */
|
||||
NAME(SIOCGLIFPHYADDR); /* TODO: print argument */
|
||||
NAME(SIOCSIFMTU); /* TODO: print argument */
|
||||
NAME(SIOCGIFMTU); /* TODO: print argument */
|
||||
NAME(SIOCSDRVSPEC); /* TODO: print argument */
|
||||
NAME(SIOCGDRVSPEC); /* TODO: print argument */
|
||||
NAME(SIOCIFCREATE); /* TODO: print argument */
|
||||
NAME(SIOCIFDESTROY); /* TODO: print argument */
|
||||
NAME(SIOCIFGCLONERS); /* TODO: print argument */
|
||||
NAME(SIOCGIFDLT); /* TODO: print argument */
|
||||
NAME(SIOCGIFCAP); /* TODO: print argument */
|
||||
NAME(SIOCSIFCAP); /* TODO: print argument */
|
||||
NAME(SIOCSVH); /* TODO: print argument */
|
||||
NAME(SIOCGVH); /* TODO: print argument */
|
||||
NAME(SIOCINITIFADDR); /* TODO: print argument */
|
||||
NAME(SIOCGIFDATA); /* TODO: print argument */
|
||||
NAME(SIOCZIFDATA); /* TODO: print argument */
|
||||
NAME(SIOCGLINKSTR); /* TODO: print argument */
|
||||
NAME(SIOCSLINKSTR); /* TODO: print argument */
|
||||
NAME(SIOCGETHERCAP); /* TODO: print argument */
|
||||
NAME(SIOCGIFINDEX); /* TODO: print argument */
|
||||
NAME(SIOCSETPFSYNC); /* TODO: print argument */
|
||||
NAME(SIOCGETPFSYNC); /* TODO: print argument */
|
||||
/* netinet6/in6_var.h */
|
||||
NAME(SIOCSIFADDR_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFADDR_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFDSTADDR_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFNETMASK_IN6); /* TODO: print argument */
|
||||
NAME(SIOCDIFADDR_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFPSRCADDR_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFPDSTADDR_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFAFLAG_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGDRLST_IN6); /* TODO: print argument */
|
||||
NAME(SIOCSNDFLUSH_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGNBRINFO_IN6); /* TODO: print argument */
|
||||
NAME(SIOCSRTRFLUSH_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFSTAT_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFSTAT_ICMP6); /* TODO: print argument */
|
||||
NAME(SIOCSDEFIFACE_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGDEFIFACE_IN6); /* TODO: print argument */
|
||||
NAME(SIOCSIFINFO_FLAGS); /* TODO: print argument */
|
||||
NAME(SIOCSIFPREFIX_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFPREFIX_IN6); /* TODO: print argument */
|
||||
NAME(SIOCDIFPREFIX_IN6); /* TODO: print argument */
|
||||
NAME(SIOCAIFPREFIX_IN6); /* TODO: print argument */
|
||||
NAME(SIOCCIFPREFIX_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFALIFETIME_IN6); /* TODO: print argument */
|
||||
NAME(SIOCAIFADDR_IN6); /* TODO: print argument */
|
||||
NAME(SIOCGIFINFO_IN6); /* TODO: print argument */
|
||||
NAME(SIOCSIFINFO_IN6); /* TODO: print argument */
|
||||
NAME(SIOCSIFPHYADDR_IN6); /* TODO: print argument */
|
||||
NAME(SIOCAADDRCTL_POLICY); /* TODO: print argument */
|
||||
NAME(SIOCDADDRCTL_POLICY); /* TODO: print argument */
|
||||
/* net80211/ieee80211_ioctl.h */
|
||||
NAME(SIOCS80211NWID); /* TODO: print argument */
|
||||
NAME(SIOCG80211NWID); /* TODO: print argument */
|
||||
/* old MINIX inet ioctls */
|
||||
NAME(NWIOSETHOPT); /* TODO: print argument */
|
||||
NAME(NWIOGETHOPT); /* TODO: print argument */
|
||||
NAME(NWIOGETHSTAT); /* TODO: print argument */
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user