minix/external/public-domain/sqlite/sqlite2mdoc/main.c

/*	$Id: main.c,v 1.2 2016/12/18 16:56:32 christos Exp $ */
/*
 * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#ifdef __linux__
#define _GNU_SOURCE
#endif
#include <sys/queue.h>

#include <assert.h>
#include <ctype.h>
#include <err.h>
#include <time.h>
#include <getopt.h>
#include <search.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef __linux__
#include <bsd/stdio.h>
#include <bsd/stdlib.h>
#include <bsd/string.h>
#endif

/*
 * Phase of parsing input file.
 */
enum	phase {
	PHASE_INIT = 0, /* waiting to encounter definition */
	PHASE_KEYS, /* have definition, now keywords */
	PHASE_DESC, /* have keywords, now description */
	PHASE_SEEALSO,
	PHASE_DECL /* have description, now declarations */
};

/*
 * What kind of declaration (preliminary analysis).
 */
enum	decltype {
	DECLTYPE_CPP, /* pre-processor */
	DECLTYPE_C, /* semicolon-closed non-preprocessor */
	DECLTYPE_NEITHER /* non-preprocessor, no semicolon */
};

/*
 * In variables and function declarations, we toss these.
 */
enum	preproc {
	PREPROC_SQLITE_API,
	PREPROC_SQLITE_DEPRECATED,
	PREPROC_SQLITE_EXPERIMENTAL,
	PREPROC_SQLITE_EXTERN,
	PREPROC__MAX
};

/*
 * HTML tags that we recognise.
 */
enum	tag {
	TAG_B_CLOSE,
	TAG_B_OPEN,
	TAG_BLOCK_CLOSE,
	TAG_BLOCK_OPEN,
	TAG_DD_CLOSE,
	TAG_DD_OPEN,
	TAG_DL_CLOSE,
	TAG_DL_OPEN,
	TAG_DT_CLOSE,
	TAG_DT_OPEN,
	TAG_H3_CLOSE,
	TAG_H3_OPEN,
	TAG_LI_CLOSE,
	TAG_LI_OPEN,
	TAG_OL_CLOSE,
	TAG_OL_OPEN,
	TAG_PRE_CLOSE,
	TAG_PRE_OPEN,
	TAG_UL_CLOSE,
	TAG_UL_OPEN,
	TAG__MAX
};

TAILQ_HEAD(defnq, defn);
TAILQ_HEAD(declq, decl);

/*
 * A declaration of type DECLTYPE_CPP or DECLTYPE_C.
 * These need not be unique (if ifdef'd).
 */
struct	decl {
	enum decltype	 type; /* type of declaration */
	char		*text; /* text */
	size_t		 textsz; /* strlen(text) */
	TAILQ_ENTRY(decl) entries;
};

/*
 * A definition is basically the manpage contents.
 */
struct	defn {
	char		 *name; /* really Nd */
	TAILQ_ENTRY(defn) entries;
	char		 *desc; /* long description */
	size_t		  descsz; /* strlen(desc) */
	struct declq	  dcqhead; /* declarations */
	int		  multiline; /* used when parsing */
	int		  instruct; /* used when parsing */
	const char	 *fn; /* parsed from file */
	size_t		  ln; /* parsed at line */
	int		  postprocessed; /* good for emission? */
	char		 *dt; /* manpage title */
	char		**nms; /* manpage names */
	size_t		  nmsz; /* number of names */
	char		 *fname; /* manpage filename */
	char		 *keybuf; /* raw keywords */
	size_t		  keybufsz; /* length of "keysbuf" */
	char		 *seealso; /* see also tags */
	size_t		  seealsosz; /* length of seealso */
	char		**xrs; /* parsed "see also" references */
	size_t		  xrsz; /* number of references */
	char		**keys; /* parsed keywords */
	size_t		  keysz; /* number of keywords */
};

/*
 * Entire parse routine.
 */
struct	parse {
	enum phase	 phase; /* phase of parse */
	size_t		 ln; /* line number */
	const char	*fn; /* open file */
	struct defnq	 dqhead; /* definitions */
};

/*
 * How to handle HTML tags we find in the text.
 */
struct	taginfo {
	const char	*html; /* HTML to key on */
	const char	*mdoc; /* generate mdoc(7) */
	unsigned int	 flags;
#define	TAGINFO_NOBR	 0x01 /* follow w/space, not newline */
#define	TAGINFO_NOOP	 0x02 /* just strip out */
#define	TAGINFO_NOSP	 0x04 /* follow w/o space or newline */
#define	TAGINFO_INLINE	 0x08 /* inline block (notused) */
};

static	const struct taginfo tags[TAG__MAX] = {
	{ "</b>", "\\fP", TAGINFO_INLINE }, /* TAG_B_CLOSE */
	{ "<b>", "\\fB", TAGINFO_INLINE }, /* TAG_B_OPEN */
	{ "</blockquote>", ".Ed\n.Pp", 0 }, /* TAG_BLOCK_CLOSE */
	{ "<blockquote>", ".Bd -ragged", 0 }, /* TAG_BLOCK_OPEN */
	{ "</dd>", "", TAGINFO_NOOP }, /* TAG_DD_CLOSE */
	{ "<dd>", "", TAGINFO_NOOP }, /* TAG_DD_OPEN */
	{ "</dl>", ".El\n.Pp", 0 }, /* TAG_DL_CLOSE */
	{ "<dl>", ".Bl -tag -width Ds", 0 }, /* TAG_DL_OPEN */
	{ "</dt>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_DT_CLOSE */
	{ "<dt>", ".It", TAGINFO_NOBR }, /* TAG_DT_OPEN */
	{ "</h3>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_H3_CLOSE */
	{ "<h3>", ".Ss", TAGINFO_NOBR }, /* TAG_H3_OPEN */
	{ "</li>", "", TAGINFO_NOOP }, /* TAG_LI_CLOSE */
	{ "<li>", ".It", 0 }, /* TAG_LI_OPEN */
	{ "</ol>", ".El\n.Pp", 0 }, /* TAG_OL_CLOSE */
	{ "<ol>", ".Bl -enum", 0 }, /* TAG_OL_OPEN */
	{ "</pre>", ".Ed\n.Pp", 0 }, /* TAG_PRE_CLOSE */
	{ "<pre>", ".Bd -literal", 0 }, /* TAG_PRE_OPEN */
	{ "</ul>", ".El\n.Pp", 0 }, /* TAG_UL_CLOSE */
	{ "<ul>", ".Bl -bullet", 0 }, /* TAG_UL_OPEN */
};

static	const char *const preprocs[TAG__MAX] = {
	"SQLITE_API", /* PREPROC_SQLITE_API */
	"SQLITE_DEPRECATED", /* PREPROC_SQLITE_DEPRECATED */
	"SQLITE_EXPERIMENTAL", /* PREPROC_SQLITE_EXPERIMENTAL */
	"SQLITE_EXTERN", /* PREPROC_SQLITE_EXTERN */
};

/* Verbose reporting. */
static	int verbose;
/* Don't output any files: use stdout. */
static	int nofile;

static void
decl_function_add(struct parse *p, char **etext,
	size_t *etextsz, const char *cp, size_t len)
{

	if (' ' != (*etext)[*etextsz - 1]) {
		*etext = realloc(*etext, *etextsz + 2);
		if (NULL == *etext)
			err(EXIT_FAILURE, "%s:%zu: "
				"realloc", p->fn, p->ln);
		(*etextsz)++;
		strlcat(*etext, " ", *etextsz + 1);
	}
	*etext = realloc(*etext, *etextsz + len + 1);
	if (NULL == *etext)
		err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln);
	memcpy(*etext + *etextsz, cp, len);
	*etextsz += len;
	(*etext)[*etextsz] = '\0';
}

static void
decl_function_copy(struct parse *p, char **etext,
	size_t *etextsz, const char *cp, size_t len)
{

	*etext = malloc(len + 1);
	if (NULL == *etext)
		err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln);
	memcpy(*etext, cp, len);
	*etextsz = len;
	(*etext)[*etextsz] = '\0';
}

/*
 * A C function (or variable, or whatever).
 * This is more specifically any non-preprocessor text.
 */
static int
decl_function(struct parse *p, char *cp, size_t len)
{
	char		*ep, *ncp, *lcp, *rcp;
	size_t		 nlen;
	struct defn	*d;
	struct decl	*e;

	/* Fetch current interface definition. */
	d = TAILQ_LAST(&p->dqhead, defnq);
	assert(NULL != d);

	/*
	 * Since C tokens are semicolon-separated, we may be invoked any
	 * number of times per a single line.
	 */
again:
	while (isspace((int)*cp)) {
		cp++;
		len--;
	}
	if ('\0' == *cp)
		return(1);

	/* Whether we're a continuation clause. */
	if (d->multiline) {
		/* This might be NULL if we're not a continuation. */
		e = TAILQ_LAST(&d->dcqhead, declq);
		assert(DECLTYPE_C == e->type);
		assert(NULL != e);
		assert(NULL != e->text);
		assert(e->textsz);
	} else {
		assert(0 == d->instruct);
		e = calloc(1, sizeof(struct decl));
		e->type = DECLTYPE_C;
		if (NULL == e)
			err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
		TAILQ_INSERT_TAIL(&d->dcqhead, e, entries);
	}

	/*
	 * We begin by seeing if there's a semicolon on this line.
	 * If there is, we'll need to do some special handling.
	 */
	ep = strchr(cp, ';');
	lcp = strchr(cp, '{');
	rcp = strchr(cp, '}');

	/* We're only a partial statement (i.e., no closure). */
	if (NULL == ep && d->multiline) {
		assert(NULL != e->text);
		assert(e->textsz > 0);
		/* Is a struct starting or ending here? */
		if (d->instruct && NULL != rcp)
			d->instruct--;
		else if (NULL != lcp)
			d->instruct++;
		decl_function_add(p, &e->text, &e->textsz, cp, len);
		return(1);
	} else if (NULL == ep && ! d->multiline) {
		d->multiline = 1;
		/* Is a structure starting in this line? */
		if (NULL != lcp &&
		    (NULL == rcp || rcp < lcp))
			d->instruct++;
		decl_function_copy(p, &e->text, &e->textsz, cp, len);
		return(1);
	}

	/* Position ourselves after the semicolon. */
	assert(NULL != ep);
	ncp = cp;
	nlen = (ep - cp) + 1;
	cp = ep + 1;
	len -= nlen;

	if (d->multiline) {
		assert(NULL != e->text);
		/* Don't stop the multi-line if we're in a struct. */
		if (0 == d->instruct) {
			if (NULL == lcp || lcp > cp)
				d->multiline = 0;
		} else if (NULL != rcp && rcp < cp)
			if (0 == --d->instruct)
				d->multiline = 0;
		decl_function_add(p, &e->text, &e->textsz, ncp, nlen);
	} else {
		assert(NULL == e->text);
		if (NULL != lcp && lcp < cp) {
			d->multiline = 1;
			d->instruct++;
		}
		decl_function_copy(p, &e->text, &e->textsz, ncp, nlen);
	}

	goto again;
}

/*
 * A definition is just #define followed by space followed by the name,
 * then the value of that name.
 * We ignore the latter.
 * FIXME: this does not understand multi-line CPP, but I don't think
 * there are any instances of that in sqlite.h.
 */
static int
decl_define(struct parse *p, char *cp, size_t len)
{
	struct defn	*d;
	struct decl	*e;
	size_t		 sz;

	while (isspace((int)*cp)) {
		cp++;
		len--;
	}
	if (0 == len) {
		warnx("%s:%zu: empty pre-processor "
			"constant", p->fn, p->ln);
		return(1);
	}

	d = TAILQ_LAST(&p->dqhead, defnq);
	assert(NULL != d);

	/*
	 * We're parsing a preprocessor definition, but we're still
	 * waiting on a semicolon from a function definition.
	 * It might be a comment or an error.
	 */
	if (d->multiline) {
		warnx("%s:%zu: multiline declaration "
			"still open (harmless?)", p->fn, p->ln);
		e = TAILQ_LAST(&d->dcqhead, declq);
		assert(NULL != e);
		e->type = DECLTYPE_NEITHER;
		d->multiline = d->instruct = 0;
	}

	sz = 0;
	while ('\0' != cp[sz] && ! isspace((int)cp[sz]))
		sz++;

	e = calloc(1, sizeof(struct decl));
	if (NULL == e)
		err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
	e->type = DECLTYPE_CPP;
	e->text = calloc(1, sz + 1);
	if (NULL == e->text)
		err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
	strlcpy(e->text, cp, sz + 1);
	e->textsz = sz;
	TAILQ_INSERT_TAIL(&d->dcqhead, e, entries);
	return(1);
}

/*
 * A declaration is a function, variable, preprocessor definition, or
 * really anything else until we reach a blank line.
 */
static void
decl(struct parse *p, char *cp, size_t len)
{
	struct defn	*d;
	struct decl	*e;

	while (isspace((int)*cp)) {
		cp++;
		len--;
	}

	/* Check closure. */
	if ('\0' == *cp) {
		p->phase = PHASE_INIT;
		/* Check multiline status. */
		d = TAILQ_LAST(&p->dqhead, defnq);
		assert(NULL != d);
		if (d->multiline) {
			warnx("%s:%zu: multiline declaration "
				"still open (harmless?)", p->fn, p->ln);
			e = TAILQ_LAST(&d->dcqhead, declq);
			assert(NULL != e);
			e->type = DECLTYPE_NEITHER;
			d->multiline = d->instruct = 0;
		}
		return;
	}

	/*
	 * Catch preprocessor defines, but discard all other types of
	 * preprocessor statements.
	 */
	if ('#' == *cp) {
		len--;
		cp++;
		while (isspace((int)*cp)) {
			len--;
			cp++;
		}
		if (0 == strncmp(cp, "define", 6))
			decl_define(p, cp + 6, len - 6);
		return;
	}

	decl_function(p, cp, len);
}

/*
 * Parse "SEE ALSO" phrases, which can come at any point in the
 * interface description (unlike what they claim).
 */
static void
seealso(struct parse *p, char *cp, size_t len)
{
	struct defn	*d;

	if ('\0' == *cp) {
		warnx("%s:%zu: warn: unexpected end of "
			"interface description", p->fn, p->ln);
		p->phase = PHASE_INIT;
		return;
	} else if (0 == strcmp(cp, "*/")) {
		p->phase = PHASE_DECL;
		return;
	} else if ('*' != cp[0] || '*' != cp[1]) {
		warnx("%s:%zu: warn: unexpected end of "
			"interface description", p->fn, p->ln);
		p->phase = PHASE_INIT;
		return;
	}

	cp += 2;
	len -= 2;
	while (isspace((int)*cp)) {
		cp++;
		len--;
	}

	/* Blank line: back to description part. */
	if (0 == len) {
		p->phase = PHASE_DESC;
		return;
	}

	/* Fetch current interface definition. */
	d = TAILQ_LAST(&p->dqhead, defnq);
	assert(NULL != d);

	d->seealso = realloc(d->seealso,
		d->seealsosz + len + 1);
	memcpy(d->seealso + d->seealsosz, cp, len);
	d->seealsosz += len;
	d->seealso[d->seealsosz] = '\0';
}

/*
 * A definition description is a block of text that we'll later format
 * in mdoc(7).
 * It extends from the name of the definition down to the declarations
 * themselves.
 */
static void
desc(struct parse *p, char *cp, size_t len)
{
	struct defn	*d;
	size_t		 nsz;

	if ('\0' == *cp) {
		warnx("%s:%zu: warn: unexpected end of "
			"interface description", p->fn, p->ln);
		p->phase = PHASE_INIT;
		return;
	} else if (0 == strcmp(cp, "*/")) {
		/* End of comment area, start of declarations. */
		p->phase = PHASE_DECL;
		return;
	} else if ('*' != cp[0] || '*' != cp[1]) {
		warnx("%s:%zu: warn: unexpected end of "
			"interface description", p->fn, p->ln);
		p->phase = PHASE_INIT;
		return;
	}

	cp += 2;
	len -= 2;

	while (isspace((int)*cp)) {
		cp++;
		len--;
	}

	/* Fetch current interface definition. */
	d = TAILQ_LAST(&p->dqhead, defnq);
	assert(NULL != d);

	/* Ignore leading blank lines. */
	if (0 == len && NULL == d->desc)
		return;

	/* Collect SEE ALSO clauses. */
	if (0 == strncasecmp(cp, "see also:", 9)) {
		cp += 9;
		len -= 9;
		while (isspace((int)*cp)) {
			cp++;
			len--;
		}
		p->phase = PHASE_SEEALSO;
		d->seealso = realloc(d->seealso,
			d->seealsosz + len + 1);
		memcpy(d->seealso + d->seealsosz, cp, len);
		d->seealsosz += len;
		d->seealso[d->seealsosz] = '\0';
		return;
	}

	/* White-space padding between lines. */
	if (NULL != d->desc &&
	    ' ' != d->desc[d->descsz - 1] &&
	    '\n' != d->desc[d->descsz - 1]) {
		d->desc = realloc(d->desc, d->descsz + 2);
		if (NULL == d->desc)
			err(EXIT_FAILURE, "%s:%zu: realloc",
				p->fn, p->ln);
		d->descsz++;
		strlcat(d->desc, " ", d->descsz + 1);
	}

	/* Either append the line of a newline, if blank. */
	nsz = 0 == len ? 1 : len;
	if (NULL == d->desc) {
		d->desc = calloc(1, nsz + 1);
		if (NULL == d->desc)
			err(EXIT_FAILURE, "%s:%zu: calloc",
				p->fn, p->ln);
	} else {
		d->desc = realloc(d->desc, d->descsz + nsz + 1);
		if (NULL == d->desc)
			err(EXIT_FAILURE, "%s:%zu: realloc",
				p->fn, p->ln);
	}
	d->descsz += nsz;
	strlcat(d->desc, 0 == len ? "\n" : cp, d->descsz + 1);
}

/*
 * Copy all KEYWORDS into a buffer.
 */
static void
keys(struct parse *p, char *cp, size_t len)
{
	struct defn	*d;

	if ('\0' == *cp) {
		warnx("%s:%zu: warn: unexpected end of "
			"interface keywords", p->fn, p->ln);
		p->phase = PHASE_INIT;
		return;
	} else if (0 == strcmp(cp, "*/")) {
		/* End of comment area, start of declarations. */
		p->phase = PHASE_DECL;
		return;
	} else if ('*' != cp[0] || '*' != cp[1]) {
		if ('\0' != cp[1]) {
			warnx("%s:%zu: warn: unexpected end of "
				"interface keywords", p->fn, p->ln);
			p->phase = PHASE_INIT;
			return;
		} else
			warnx("%s:%zu: warn: workaround in effect "
				"for unexpected end of "
				"interface keywords", p->fn, p->ln);
	}

	cp += 2;
	len -= 2;
	while (isspace((int)*cp)) {
		cp++;
		len--;
	}

	if (0 == len) {
		p->phase = PHASE_DESC;
		return;
	} else if (strncmp(cp, "KEYWORDS:", 9))
		return;

	cp += 9;
	len -= 9;

	d = TAILQ_LAST(&p->dqhead, defnq);
	assert(NULL != d);
	d->keybuf = realloc(d->keybuf, d->keybufsz + len + 1);
	if (NULL == d->keybuf)
		err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln);
	memcpy(d->keybuf + d->keybufsz, cp, len);
	d->keybufsz += len;
	d->keybuf[d->keybufsz] = '\0';
}

/*
 * Initial state is where we're scanning forward to find commented
 * instances of CAPI3REF.
 */
static void
init(struct parse *p, char *cp)
{
	struct defn	*d;

	/* Look for comment hook. */
	if ('*' != cp[0] || '*' != cp[1])
		return;
	cp += 2;
	while (isspace((int)*cp))
		cp++;

	/* Look for beginning of definition. */
	if (strncmp(cp, "CAPI3REF:", 9))
		return;
	cp += 9;
	while (isspace((int)*cp))
		cp++;
	if ('\0' == *cp) {
		warnx("%s:%zu: warn: unexpected end of "
			"interface definition", p->fn, p->ln);
		return;
	}

	/* Add definition to list of existing ones. */
	d = calloc(1, sizeof(struct defn));
	if (NULL == d)
		err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
	d->name = strdup(cp);
	if (NULL == d->name)
		err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln);
	d->fn = p->fn;
	d->ln = p->ln;
	p->phase = PHASE_KEYS;
	TAILQ_INIT(&d->dcqhead);
	TAILQ_INSERT_TAIL(&p->dqhead, d, entries);
}

#define	BPOINT(_cp) \
	(';' == (_cp)[0] || \
	 '[' == (_cp)[0] || \
	 ('(' == (_cp)[0] && '*' != (_cp)[1]) || \
	 ')' == (_cp)[0] || \
	 '{' == (_cp)[0])

/*
 * Given a declaration (be it preprocessor or C), try to parse out a
 * reasonable "name" for the affair.
 * For a struct, for example, it'd be the struct name.
 * For a typedef, it'd be the type name.
 * For a function, it'd be the function name.
 */
static void
grok_name(const struct decl *e,
	const char **start, size_t *sz)
{
	const char	*cp;

	*start = NULL;
	*sz = 0;

	if (DECLTYPE_CPP != e->type) {
		assert(';' == e->text[e->textsz - 1]);
		cp = e->text;
		do {
			while (isspace((int)*cp))
				cp++;
			if (BPOINT(cp))
				break;
			/* Function pointers... */
			if ('(' == *cp)
				cp++;
			/* Pass over pointers. */
			while ('*' == *cp)
				cp++;
			*start = cp;
			*sz = 0;
			while ( ! isspace((int)*cp)) {
				if (BPOINT(cp))
					break;
				cp++;
				(*sz)++;
			}
		} while ( ! BPOINT(cp));
	} else {
		*sz = e->textsz;
		*start = e->text;
	}
}

static int
xrcmp(const void *p1, const void *p2)
{
	const char	*s1 = *(const char **)p1,
	     	 	*s2 = *(const char **)p2;

	return(strcasecmp(s1, s2));
}

/*
 * Extract information from the interface definition.
 * Mark it as "postprocessed" on success.
 */
static void
postprocess(const char *prefix, struct defn *d)
{
	struct decl	*first;
	const char	*start;
	size_t		 offs, sz, i;
	ENTRY		 ent;

	if (TAILQ_EMPTY(&d->dcqhead))
		return;

	/* Find the first #define or declaration. */
	TAILQ_FOREACH(first, &d->dcqhead, entries)
		if (DECLTYPE_CPP == first->type ||
		    DECLTYPE_C == first->type)
			break;

	if (NULL == first) {
		warnx("%s:%zu: no entry to document", d->fn, d->ln);
		return;
	}

	/*
	 * Now compute the document name (`Dt').
	 * We'll also use this for the filename.
	 */
	grok_name(first, &start, &sz);
	if (NULL == start) {
		warnx("%s:%zu: couldn't deduce "
			"entry name", d->fn, d->ln);
		return;
	}

	/* Document name needs all-caps. */
	d->dt = malloc(sz + 1);
	if (NULL == d->dt)
		err(EXIT_FAILURE, "malloc");
	memcpy(d->dt, start, sz);
	d->dt[sz] = '\0';
	for (i = 0; i < sz; i++)
		d->dt[i] = toupper((int)d->dt[i]);

	/* Filename needs no special chars. */
	asprintf(&d->fname, "%s/%.*s.3",
		prefix, (int)sz, start);
	if (NULL == d->fname)
		err(EXIT_FAILURE, "asprintf");

	offs = strlen(prefix) + 1;
	for (i = 0; i < sz; i++) {
		if (isalnum((int)d->fname[offs + i]) ||
		    '_' == d->fname[offs + i] ||
		    '-' == d->fname[offs + i])
			continue;
		d->fname[offs + i] = '_';
	}

	/*
	 * First, extract all keywords.
	 */
	for (i = 0; i < d->keybufsz; ) {
		while (isspace((int)d->keybuf[i]))
			i++;
		if (i == d->keybufsz)
			break;
		sz = 0;
		start = &d->keybuf[i];
		if ('{' == d->keybuf[i]) {
			start = &d->keybuf[++i];
			for ( ; i < d->keybufsz; i++, sz++)
				if ('}' == d->keybuf[i])
					break;
			if ('}' == d->keybuf[i])
				i++;
		} else
			for ( ; i < d->keybufsz; i++, sz++)
				if (isspace((int)d->keybuf[i]))
					break;
		if (0 == sz)
			continue;
		d->keys = realloc(d->keys,
			(d->keysz + 1) * sizeof(char *));
		if (NULL == d->keys)
			err(EXIT_FAILURE, "realloc");
		d->keys[d->keysz] = malloc(sz + 1);
		if (NULL == d->keys[d->keysz])
			err(EXIT_FAILURE, "malloc");
		memcpy(d->keys[d->keysz], start, sz);
		d->keys[d->keysz][sz] = '\0';
		d->keysz++;

		/* Hash the keyword. */
		ent.key = d->keys[d->keysz - 1];
		ent.data = d;
		(void)hsearch(ent, ENTER);
	}

	/*
	 * Now extract all `Nm' values for this document.
	 * We only use CPP and C references, and hope for the best when
	 * doing so.
	 * Enter each one of these as a searchable keyword.
	 */
	TAILQ_FOREACH(first, &d->dcqhead, entries) {
		if (DECLTYPE_CPP != first->type &&
		    DECLTYPE_C != first->type)
			continue;
		grok_name(first, &start, &sz);
		if (NULL == start)
			continue;
		d->nms = realloc(d->nms,
			(d->nmsz + 1) * sizeof(char *));
		if (NULL == d->nms)
			err(EXIT_FAILURE, "realloc");
		d->nms[d->nmsz] = malloc(sz + 1);
		if (NULL == d->nms[d->nmsz])
			err(EXIT_FAILURE, "malloc");
		memcpy(d->nms[d->nmsz], start, sz);
		d->nms[d->nmsz][sz] = '\0';
		d->nmsz++;

		/* Hash the name. */
		ent.key = d->nms[d->nmsz - 1];
		ent.data = d;
		(void)hsearch(ent, ENTER);
	}

	if (0 == d->nmsz) {
		warnx("%s:%zu: couldn't deduce "
			"any names", d->fn, d->ln);
		return;
	}

	/*
	 * Next, scan for all `Xr' values.
	 * We'll add more to this list later.
	 */
	for (i = 0; i < d->seealsosz; i++) {
		/*
		 * Find next value starting with `['.
		 * There's other stuff in there (whitespace or
		 * free text leading up to these) that we're ok
		 * to ignore.
		 */
		while (i < d->seealsosz && '[' != d->seealso[i])
			i++;
		if (i == d->seealsosz)
			break;

		/*
		 * Now scan for the matching `]'.
		 * We can also have a vertical bar if we're separating a
		 * keyword and its shown name.
		 */
		start = &d->seealso[++i];
		sz = 0;
		while (i < d->seealsosz &&
		      ']' != d->seealso[i] &&
		      '|' != d->seealso[i]) {
			i++;
			sz++;
		}
		if (i == d->seealsosz)
			break;
		if (0 == sz)
			continue;

		/*
		 * Continue on to the end-of-reference, if we weren't
		 * there to begin with.
		 */
		if (']' != d->seealso[i])
			while (i < d->seealsosz &&
			      ']' != d->seealso[i])
				i++;

		/* Strip trailing whitespace. */
		while (sz > 1 && ' ' == start[sz - 1])
			sz--;

		/* Strip trailing parenthesis. */
		if (sz > 2 &&
		    '(' == start[sz - 2] &&
	 	    ')' == start[sz - 1])
			sz -= 2;

		d->xrs = realloc(d->xrs,
			(d->xrsz + 1) * sizeof(char *));
		if (NULL == d->xrs)
			err(EXIT_FAILURE, "realloc");
		d->xrs[d->xrsz] = malloc(sz + 1);
		if (NULL == d->xrs[d->xrsz])
			err(EXIT_FAILURE, "malloc");
		memcpy(d->xrs[d->xrsz], start, sz);
		d->xrs[d->xrsz][sz] = '\0';
		d->xrsz++;
	}

	/*
	 * Next, extract all references.
	 * We'll accumulate these into a list of SEE ALSO tags, after.
	 * See how these are parsed above for a description: this is
	 * basically the same thing.
	 */
	for (i = 0; i < d->descsz; i++) {
		if ('[' != d->desc[i])
			continue;
		i++;
		if ('[' == d->desc[i])
			continue;

		start = &d->desc[i];
		for (sz = 0; i < d->descsz; i++, sz++)
			if (']' == d->desc[i] ||
			    '|' == d->desc[i])
				break;

		if (i == d->descsz)
			break;
		else if (sz == 0)
			continue;

		if (']' != d->desc[i])
			while (i < d->descsz &&
			      ']' != d->desc[i])
				i++;

		while (sz > 1 && ' ' == start[sz - 1])
			sz--;

		if (sz > 2 &&
		    '(' == start[sz - 2] &&
		    ')' == start[sz - 1])
			sz -= 2;

		d->xrs = realloc(d->xrs,
			(d->xrsz + 1) * sizeof(char *));
		if (NULL == d->xrs)
			err(EXIT_FAILURE, "realloc");
		d->xrs[d->xrsz] = malloc(sz + 1);
		if (NULL == d->xrs[d->xrsz])
			err(EXIT_FAILURE, "malloc");
		memcpy(d->xrs[d->xrsz], start, sz);
		d->xrs[d->xrsz][sz] = '\0';
		d->xrsz++;
	}

	qsort(d->xrs, d->xrsz, sizeof(char *), xrcmp);
	d->postprocessed = 1;
}

/*
 * Convenience function to look up a keyword.
 * Returns the keyword's file if found or NULL.
 */
static const char *
lookup(char *key)
{
	ENTRY		 ent;
	ENTRY		*res;
	struct defn	*d;

	ent.key = key;
	res = hsearch(ent, FIND);
	if (NULL == res)
		return(NULL);
	d = (struct defn *)res->data;
	if (0 == d->nmsz)
		return(NULL);
	assert(NULL != d->nms[0]);
	return(d->nms[0]);
}

/*
 * Emit a valid mdoc(7) document within the given prefix.
 */
static void
emit(const struct defn *d, const char *mdocdate)
{
	struct decl	*first;
	size_t		 sz, i, col, last, ns;
	FILE		*f;
	char		*cp;
	const char	*res, *lastres, *args, *str, *end;
	enum tag	 tag;
	enum preproc	 pre;

	if ( ! d->postprocessed) {
		warnx("%s:%zu: interface has errors, not "
			"producing manpage", d->fn, d->ln);
		return;
	}

	if (0 == nofile) {
		if (NULL == (f = fopen(d->fname, "w"))) {
			warn("%s: fopen", d->fname);
			return;
		}
	} else
		f = stdout;

	/* Begin by outputting the mdoc(7) header. */
#if 0
	fputs(".Dd $" "Mdocdate$\n", f);
#else
	fprintf(f, ".Dd %s\n", mdocdate);
#endif
	fprintf(f, ".Dt %s 3\n", d->dt);
	fputs(".Os\n", f);
	fputs(".Sh NAME\n", f);

	/* Now print the name bits of each declaration. */
	for (i = 0; i < d->nmsz; i++)
		fprintf(f, ".Nm %s%s\n", d->nms[i],
			i < d->nmsz - 1 ? " ," : "");

	fprintf(f, ".Nd %s\n", d->name);
	fputs(".Sh SYNOPSIS\n", f);

	TAILQ_FOREACH(first, &d->dcqhead, entries) {
		if (DECLTYPE_CPP != first->type &&
		    DECLTYPE_C != first->type)
			continue;

		/* Easy: just print the CPP name. */
		if (DECLTYPE_CPP == first->type) {
			fprintf(f, ".Fd #define %s\n",
				first->text);
			continue;
		}

		/* First, strip out the sqlite CPPs. */
		for (i = 0; i < first->textsz; ) {
			for (pre = 0; pre < PREPROC__MAX; pre++) {
				sz = strlen(preprocs[pre]);
				if (strncmp(preprocs[pre],
				    &first->text[i], sz))
					continue;
				i += sz;
				while (isspace((int)first->text[i]))
					i++;
				break;
			}
			if (pre == PREPROC__MAX)
				break;
		}

		/* If we're a typedef, immediately print Vt. */
		if (0 == strncmp(&first->text[i], "typedef", 7)) {
			fprintf(f, ".Vt %s\n", &first->text[i]);
			continue;
		}

		/* Are we a struct? */
		if (first->textsz > 2 &&
		    '}' == first->text[first->textsz - 2] &&
		    NULL != (cp = strchr(&first->text[i], '{'))) {
			*cp = '\0';
			fprintf(f, ".Vt %s;\n", &first->text[i]);
			/* Restore brace for later usage. */
			*cp = '{';
			continue;
		}

		/* Catch remaining non-functions. */
		if (first->textsz > 2 &&
		    ')' != first->text[first->textsz - 2]) {
			fprintf(f, ".Vt %s\n", &first->text[i]);
			continue;
		}

		str = &first->text[i];
		if (NULL == (args = strchr(str, '('))) {
			/* What is this? */
			fputs(".Bd -literal\n", f);
			fputs(&first->text[i], f);
			fputs("\n.Ed\n", f);
			continue;
		}

		/* Scroll back to end of function name. */
		end = args - 1;
		while (end > str && isspace((int)*end))
			end--;

		/* Scroll back to what comes before. */
		for ( ; end > str; end--)
			if (isspace((int)*end) || '*' == *end)
				break;

		/*
		 * If we can't find what came before, then the function
		 * has no type, which is odd... let's just call it void.
		 */
		if (end > str) {
			fprintf(f, ".Ft %.*s\n",
				(int)(end - str + 1), str);
			fprintf(f, ".Fo %.*s\n",
				(int)(args - end - 1), end + 1);
		} else {
			fputs(".Ft void\n", f);
			fprintf(f, ".Fo %.*s\n", (int)(args - end), end);
		}

		/*
		 * Convert function arguments into `Fa' clauses.
		 * This also handles nested function pointers, which
		 * would otherwise throw off the delimeters.
		 */
		for (;;) {
			str = ++args;
			while (isspace((int)*str))
				str++;
			fputs(".Fa \"", f);
			ns = 0;
			while ('\0' != *str &&
			       (ns || ',' != *str) &&
			       (ns || ')' != *str)) {
				if ('/' == str[0] && '*' == str[1]) {
					str += 2;
					for ( ; '\0' != str[0]; str++)
						if ('*' == str[0] && '/' == str[1])
							break;
					if ('\0' == *str)
						break;
					str += 2;
					while (isspace((int)*str))
						str++;
					if ('\0' == *str ||
					    (0 == ns && ',' == *str) ||
					    (0 == ns && ')' == *str))
						break;
				}
				if ('(' == *str)
					ns++;
				else if (')' == *str)
					ns--;
				fputc(*str, f);
				str++;
			}
			fputs("\"\n", f);
			if ('\0' == *str || ')' == *str)
				break;
			args = str;
		}

		fputs(".Fc\n", f);
	}

	fputs(".Sh DESCRIPTION\n", f);

	/*
	 * Strip the crap out of the description.
	 * "Crap" consists of things I don't understand that mess up
	 * parsing of the HTML, for instance,
	 *   <dl>[[foo bar]]<dt>foo bar</dt>...</dl>
	 * These are not well-formed HTML.
	 */
	for (i = 0; i < d->descsz; i++) {
		if ('^' == d->desc[i] &&
		    '(' == d->desc[i + 1]) {
			d->desc[i] = d->desc[i + 1] = ' ';
			i++;
			continue;
		} else if (')' == d->desc[i] &&
			   '^' == d->desc[i + 1]) {
			d->desc[i] = d->desc[i + 1] = ' ';
			i++;
			continue;
		} else if ('^' == d->desc[i]) {
			d->desc[i] = ' ';
			continue;
		} else if ('[' != d->desc[i] ||
			   '[' != d->desc[i + 1])
			continue;
		d->desc[i] = d->desc[i + 1] = ' ';
		for (i += 2; i < d->descsz; i++) {
			if (']' == d->desc[i] &&
			    ']' == d->desc[i + 1])
				break;
			d->desc[i] = ' ';
		}
		if (i == d->descsz)
			continue;
		d->desc[i] = d->desc[i + 1] = ' ';
		i++;
	}

	/*
	 * Here we go!
	 * Print out the description as best we can.
	 * Do on-the-fly processing of any HTML we encounter into
	 * mdoc(7) and try to break lines up.
	 */
	col = 0;
	for (i = 0; i < d->descsz; ) {
		/*
		 * Newlines are paragraph breaks.
		 * If we have multiple newlines, then keep to a single
		 * `Pp' to keep it clean.
		 * Only do this if we're not before a block-level HTML,
		 * as this would mean, for instance, a `Pp'-`Bd' pair.
		 */
		if ('\n' == d->desc[i]) {
			while (isspace((int)d->desc[i]))
				i++;
			for (tag = 0; tag < TAG__MAX; tag++) {
				sz = strlen(tags[tag].html);
				if (0 == strncmp(&d->desc[i], tags[tag].html, sz))
					break;
			}
			if (TAG__MAX == tag ||
			    TAGINFO_INLINE & tags[tag].flags) {
				if (col > 0)
					fputs("\n", f);
				fputs(".Pp\n", f);
				/* We're on a new line. */
				col = 0;
			}
			continue;
		}

		/*
		 * New sentence, new line.
		 * We guess whether this is the case by using the
		 * dumbest possible heuristic.
		 */
		if (' ' == d->desc[i] && i &&
		    '.' == d->desc[i - 1]) {
			while (' ' == d->desc[i])
				i++;
			fputs("\n", f);
			col = 0;
			continue;
		}
		/*
		 * After 65 characters, force a break when we encounter
		 * white-space to keep our lines more or less tidy.
		 */
		if (col > 65 && ' ' == d->desc[i]) {
			while (' ' == d->desc[i])
				i++;
			fputs("\n", f);
			col = 0;
			continue;
		}

		/*
		 * Parsing HTML tags.
		 * Why, sqlite guys, couldn't you have used something
		 * like markdown or something?
		 * Sheesh.
		 */
		if ('<' == d->desc[i]) {
			for (tag = 0; tag < TAG__MAX; tag++) {
				sz = strlen(tags[tag].html);
				if (strncmp(&d->desc[i],
				    tags[tag].html, sz))
					continue;
				/*
				 * NOOP tags don't do anything, such as
				 * the case of `</dd>', which only
				 * serves to end an `It' block that will
				 * be closed out by a subsequent `It' or
				 * end of clause `El' anyway.
				 * Skip the trailing space.
				 */
				if (TAGINFO_NOOP & tags[tag].flags) {
					i += sz;
					while (isspace((int)d->desc[i]))
						i++;
					break;
				} else if (TAGINFO_INLINE & tags[tag].flags) {
					fputs(tags[tag].mdoc, f);
					i += sz;
					break;
				}

				/*
				 * A breaking mdoc(7) statement.
				 * Break the current line, output the
				 * macro, and conditionally break
				 * following that (or we might do
				 * nothing at all).
				 */
				if (col > 0) {
					fputs("\n", f);
					col = 0;
				}
				fputs(tags[tag].mdoc, f);
				if ( ! (TAGINFO_NOBR & tags[tag].flags)) {
					fputs("\n", f);
					col = 0;
				} else if ( ! (TAGINFO_NOSP & tags[tag].flags)) {
					fputs(" ", f);
					col++;
				}
				i += sz;
				while (isspace((int)d->desc[i]))
					i++;
				break;
			}
			if (tag < TAG__MAX)
				continue;
		} else if ('[' == d->desc[i] &&
			   ']' != d->desc[i + 1]) {
			/* Do we start at the bracket or bar? */
			for (sz = i + 1; sz < d->descsz; sz++)
				if ('|' == d->desc[sz] ||
				    ']' == d->desc[sz])
					break;

			if (sz == d->descsz)
				continue;
			else if ('|' == d->desc[sz])
				i = sz + 1;
			else
				i = i + 1;

			/*
			 * Now handle in-page references.
			 * Print them out as-is: we've already
			 * accumulated them into our "SEE ALSO" values,
			 * which we'll use below.
			 */
			for ( ; i < d->descsz; i++, col++) {
				if (']' == d->desc[i]) {
					i++;
					break;
				}
				fputc(d->desc[i], f);
				col++;
			}
			continue;
		}

		if (' ' == d->desc[i] && 0 == col) {
			while (' ' == d->desc[i])
				i++;
			continue;
		}

		assert('\n' != d->desc[i]);

		/*
		 * Handle some oddities.
		 * The following HTML escapes exist in the output that I
		 * could find.
		 * There might be others...
		 */
		if (0 == strncmp(&d->desc[i], "&nbsp;", 6)) {
			i += 6;
			fputc(' ', f);
		} else if (0 == strncmp(&d->desc[i], "&lt;", 4)) {
			i += 4;
			fputc('<', f);
		} else if (0 == strncmp(&d->desc[i], "&gt;", 4)) {
			i += 4;
			fputc('>', f);
		} else if (0 == strncmp(&d->desc[i], "&#91;", 5)) {
			i += 5;
			fputc('[', f);
		} else {
			/* Make sure we don't trigger a macro. */
			if (0 == col && '.' == d->desc[i])
				fputs("\\&", f);
			fputc(d->desc[i], f);
			i++;
		}

		col++;
	}

	if (col > 0)
		fputs("\n", f);

	if (d->xrsz > 0) {
		/*
		 * Look up all of our keywords (which are in the xrs
		 * field) in the table of all known keywords.
		 * Don't print duplicates.
		 */
		lastres = NULL;
		for (last = 0, i = 0; i < d->xrsz; i++) {
			res = lookup(d->xrs[i]);
			/* Ignore self-reference. */
			if (res == d->nms[0] && verbose)
				warnx("%s:%zu: self-reference: %s",
					d->fn, d->ln, d->xrs[i]);
			if (res == d->nms[0] && verbose)
				continue;
			if (NULL == res && verbose)
				warnx("%s:%zu: ref not found: %s",
					d->fn, d->ln, d->xrs[i]);
			if (NULL == res)
				continue;

			/* Ignore duplicates. */
			if (NULL != lastres && lastres == res)
				continue;
			if (last)
				fputs(" ,\n", f);
			else
				fputs(".Sh SEE ALSO\n", f);
			fprintf(f, ".Xr %s 3", res);
			last = 1;
			lastres = res;
		}
		if (last)
			fputs("\n", f);
	}

	if (0 == nofile)
		fclose(f);
}

int
main(int argc, char *argv[])
{
	size_t		 i, len;
	FILE		*f;
	char		*cp;
	const char	*prefix;
	struct parse	 p;
	int		 rc, ch;
	struct defn	*d;
	struct decl	*e;

	rc = 0;
	prefix = ".";
	f = stdin;
	memset(&p, 0, sizeof(struct parse));
	p.fn = "<stdin>";
	p.ln = 0;
	p.phase = PHASE_INIT;
	TAILQ_INIT(&p.dqhead);

	while (-1 != (ch = getopt(argc, argv, "np:v")))
		switch (ch) {
		case ('n'):
			nofile = 1;
			break;
		case ('p'):
			prefix = optarg;
			break;
		case ('v'):
			verbose = 1;
			break;
		default:
			goto usage;
		}

	time_t now = time(NULL);
	struct tm tm;
	char mdocdate[256];
	if (gmtime_r(&now, &tm) == NULL)
		err(EXIT_FAILURE, "gmtime");
	strftime(mdocdate, sizeof(mdocdate), "%B %d, %Y", &tm);
	/*
	 * Read in line-by-line and process in the phase dictated by our
	 * finite state automaton.
	 */
	while (NULL != (cp = fgetln(f, &len))) {
		assert(len > 0);
		p.ln++;
		if ('\n' != cp[len - 1]) {
			warnx("%s:%zu: unterminated line", p.fn, p.ln);
			break;
		}
		cp[--len] = '\0';
		/* Lines are always nil-terminated. */
		switch (p.phase) {
		case (PHASE_INIT):
			init(&p, cp);
			break;
		case (PHASE_KEYS):
			keys(&p, cp, len);
			break;
		case (PHASE_DESC):
			desc(&p, cp, len);
			break;
		case (PHASE_SEEALSO):
			seealso(&p, cp, len);
			break;
		case (PHASE_DECL):
			decl(&p, cp, len);
			break;
		}
	}

	/*
	 * If we hit the last line, then try to process.
	 * Otherwise, we failed along the way.
	 */
	if (NULL == cp) {
		/*
		 * Allow us to be at the declarations or scanning for
		 * the next clause.
		 */
		if (PHASE_INIT == p.phase ||
		    PHASE_DECL == p.phase) {
			if (0 == hcreate(5000))
				err(EXIT_FAILURE, "hcreate");
			TAILQ_FOREACH(d, &p.dqhead, entries)
				postprocess(prefix, d);
			TAILQ_FOREACH(d, &p.dqhead, entries)
				emit(d, mdocdate);
			rc = 1;
		} else if (PHASE_DECL != p.phase)
			warnx("%s:%zu: exit when not in "
				"initial state", p.fn, p.ln);
	}

	while ( ! TAILQ_EMPTY(&p.dqhead)) {
		d = TAILQ_FIRST(&p.dqhead);
		TAILQ_REMOVE(&p.dqhead, d, entries);
		while ( ! TAILQ_EMPTY(&d->dcqhead)) {
			e = TAILQ_FIRST(&d->dcqhead);
			TAILQ_REMOVE(&d->dcqhead, e, entries);
			free(e->text);
			free(e);
		}
		free(d->name);
		free(d->desc);
		free(d->dt);
		for (i = 0; i < d->nmsz; i++)
			free(d->nms[i]);
		for (i = 0; i < d->xrsz; i++)
			free(d->xrs[i]);
		for (i = 0; i < d->keysz; i++)
			free(d->keys[i]);
		free(d->keys);
		free(d->nms);
		free(d->xrs);
		free(d->fname);
		free(d->seealso);
		free(d->keybuf);
		free(d);
	}

	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
usage:
	fprintf(stderr, "usage: %s [-nv] [-p prefix]\n", getprogname());
	return(EXIT_FAILURE);
}