.. / download
#include <stdio.h>
#include <unistd.h>
#include <getopt.h>
#include <stdbool.h>
#include <string.h>

#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <libxslt/transform.h>

#include "xslt.h"
#include "s1kd_tools.h"

#define PROG_NAME "s1kd-index"
#define VERSION "1.10.0"

/* Path to text nodes where indexFlags may occur */
#define ELEMENTS_XPATH BAD_CAST "//para/text()"

#define PRE_TERM_DELIM BAD_CAST " "
#define POST_TERM_DELIM BAD_CAST " .,"

#define ERR_PREFIX PROG_NAME ": ERROR: "
#define INF_PREFIX PROG_NAME ": INFO: "
#define E_NO_LIST ERR_PREFIX "Could not read index flags from %s\n"
#define E_BAD_LIST ERR_PREFIX "Could not read list: %s\n"
#define E_NO_FILE ERR_PREFIX "Could not read file: %s\n"
#define I_MARKUP INF_PREFIX "Adding index flags to %s...\n"
#define I_DELETE INF_PREFIX "Deleting index flags from %s...\n"
#define EXIT_NO_LIST 1

static enum verbosity { QUIET, NORMAL, VERBOSE } verbosity = NORMAL;

/* Help/usage message */
static void show_help(void)
{
	puts("Usage:");
	puts("  " PROG_NAME " -h?");
	puts("  " PROG_NAME " [-I <index>] [-filqv] [<module>...]");
	puts("  " PROG_NAME " -D [-filqv] [<module>...]");
	puts("");
	puts("Options:");
	puts("  -D, --delete              Delete current index flags.");
	puts("  -f, --overwrite           Overwrite input module(s).");
	puts("  -h, -?, --help            Show help/usage message.");
	puts("  -I, --indexflags <index>  Specify a custom .indexflags file");
	puts("  -i, --ignore-case         Ignore case when flagging terms.");
	puts("  -l, --list                Input is a list of file names.");
	puts("  -q, --quiet               Quiet mode.");
	puts("  -v, --verbose             Verbose output.");
	puts("  --version                 Show version information.");
	LIBXML2_PARSE_LONGOPT_HELP
}

static void show_version(void)
{
	printf("%s (s1kd-tools) %s\n", PROG_NAME, VERSION);
	printf("Using libxml %s and libxslt %s\n", xmlParserVersion, xsltEngineVersion);
}

/* Return the lowest level in an indexFlag. This is matched against the text
 * to determine where to insert the flag.
 */
static xmlChar *last_level(xmlNodePtr flag)
{
	xmlChar *lvl;

	if ((lvl = xmlGetProp(flag, BAD_CAST "indexLevelFour"))) {
		return lvl;
	} else if ((lvl = xmlGetProp(flag, BAD_CAST "indexLevelThree"))) {
		return lvl;
	} else if ((lvl = xmlGetProp(flag, BAD_CAST "indexLevelTwo"))) {
		return lvl;
	} else if ((lvl = xmlGetProp(flag, BAD_CAST "indexLevelOne"))) {
		return lvl;
	}

	return NULL;
}

static bool is_term(xmlChar *content, int content_len, int i, xmlChar *term, int term_len, bool ignorecase)
{
	bool is;
	xmlChar s, e;

	s = i == 0 ? ' ' : content[i - 1];
	e = i + term_len >= content_len - 1 ? ' ' : content[i + term_len];

	is = xmlStrchr(PRE_TERM_DELIM, s) &&
	     (ignorecase ?
	     	xmlStrncasecmp(content + i, term, term_len) :
		xmlStrncmp(content + i, term, term_len)) == 0 &&
	     xmlStrchr(POST_TERM_DELIM, e);

	return is;
}

/* Insert indexFlag elements after matched terms. */
static void gen_index_node(xmlNodePtr node, xmlNodePtr flag, bool ignorecase)
{
	xmlChar *content;
	xmlChar *term;
	int term_len, content_len;
	int i;

	content = xmlNodeGetContent(node);
	content_len = xmlStrlen(content);

	term = last_level(flag);
	term_len = xmlStrlen(term);

	i = 0;
	while (i + term_len <= content_len) {
		if (is_term(content, content_len, i, term, term_len, ignorecase)) {
			xmlChar *s1 = xmlStrndup(content, i + term_len);
			xmlChar *s2 = xmlStrsub(content, i + term_len, content_len - (i + term_len));
			xmlNodePtr acr;

			xmlFree(content);

			xmlNodeSetContent(node, s1);
			xmlFree(s1);

			acr = xmlAddNextSibling(node, xmlCopyNode(flag, 1));
			node = xmlAddNextSibling(acr, xmlNewText(s2));

			content = s2;
			content_len = xmlStrlen(s2);
			i = 0;
		} else {
			++i;
		}
	}

	xmlFree(term);
	xmlFree(content);
}

/* Flag an individual term in all applicable elements in a module. */
static void gen_index_flag(xmlNodePtr flag, xmlXPathContextPtr ctx, bool ignorecase)
{
	xmlXPathObjectPtr obj;

	obj = xmlXPathEvalExpression(ELEMENTS_XPATH, ctx);

	if (!xmlXPathNodeSetIsEmpty(obj->nodesetval)) {
		int i;

		for (i = 0; i < obj->nodesetval->nodeNr; ++i) {
			gen_index_node(obj->nodesetval->nodeTab[i], flag, ignorecase);
		}
	}

	xmlXPathFreeObject(obj);
}

/* Insert indexFlags for each term included in the specified index file. */
static void gen_index_flags(xmlNodeSetPtr flags, xmlXPathContextPtr ctx, bool ignorecase)
{
	int i;

	for (i = 0; i < flags->nodeNr; ++i) {
		gen_index_flag(flags->nodeTab[i], ctx, ignorecase);
	}
}

/* Apply a built-in XSLT transform to a doc in place. */
static void transform_doc(xmlDocPtr doc, unsigned char *xsl, unsigned int len)
{
	xmlDocPtr styledoc, src, res;
	xsltStylesheetPtr style;
	xmlNodePtr old;

	src = xmlCopyDoc(doc, 1);

	styledoc = read_xml_mem((const char *) xsl, len);
	style = xsltParseStylesheetDoc(styledoc);

	res = xsltApplyStylesheet(style, src, NULL);

	old = xmlDocSetRootElement(doc, xmlCopyNode(xmlDocGetRootElement(res), 1));
	xmlFreeNode(old);

	xmlFreeDoc(src);
	xmlFreeDoc(res);
	xsltFreeStylesheet(style);
}

/* Convert index flags for older issues. */
static void convert_to_iss_30(xmlDocPtr doc)
{
	transform_doc(doc, iss30_xsl, iss30_xsl_len);
}

static void delete_index_flags(const char *path, bool overwrite)
{
	xmlDocPtr doc;

	if (verbosity >= VERBOSE) {
		fprintf(stderr, I_DELETE, path);
	}

	doc = read_xml_doc(path);

	transform_doc(doc, delete_xsl, delete_xsl_len);

	if (overwrite) {
		save_xml_doc(doc, path);
	} else {
		save_xml_doc(doc, "-");
	}
}

/* Insert indexFlag elements after matched terms in a document. */
static void gen_index(const char *path, xmlDocPtr index_doc, bool overwrite, bool ignorecase)
{
	xmlDocPtr doc;
	xmlXPathContextPtr doc_ctx, index_ctx;
	xmlXPathObjectPtr index_obj;
	xmlNodeSetPtr flags;

	if (verbosity >= VERBOSE) {
		fprintf(stderr, I_MARKUP, path);
	}

	if (!(doc = read_xml_doc(path))) {
		if (verbosity >= NORMAL) {
			fprintf(stderr, E_NO_FILE, path);
		}
		return;
	}

	index_ctx = xmlXPathNewContext(index_doc);
	index_obj = xmlXPathEvalExpression(BAD_CAST "//indexFlag", index_ctx);
	flags = index_obj->nodesetval;

	doc_ctx = xmlXPathNewContext(doc);

	if (!xmlXPathNodeSetIsEmpty(flags)) {
		gen_index_flags(flags, doc_ctx, ignorecase);
	}

	xmlXPathFreeContext(doc_ctx);
	xmlXPathFreeObject(index_obj);
	xmlXPathFreeContext(index_ctx);

	if (xmlStrcmp(xmlFirstElementChild(xmlDocGetRootElement(doc))->name, BAD_CAST "idstatus") == 0) {
		convert_to_iss_30(doc);
	}

	if (overwrite) {
		save_xml_doc(doc, path);
	} else {
		save_xml_doc(doc, "-");
	}

	xmlFreeDoc(doc);
}

static xmlDocPtr read_index_flags(const char *fname)
{
	xmlDocPtr index_doc;

	if (!(index_doc = read_xml_doc(fname))) {
		if (verbosity >= NORMAL) {
			fprintf(stderr, E_NO_LIST, fname);
		}
		exit(EXIT_NO_LIST);
	}

	return index_doc;
}

static void handle_list(const char *path, bool delflags, xmlDocPtr index_doc, bool overwrite, bool ignorecase)
{
	FILE *f;
	char line[PATH_MAX];

	if (path) {
		f = fopen(path, "r");
	} else {
		f = stdin;
	}

	if (!f) {
		if (verbosity >= NORMAL) {
			fprintf(stderr, E_BAD_LIST, path);
		}
		return;
	}

	while (fgets(line, PATH_MAX, f)) {
		strtok(line, "\t\r\n");

		if (delflags) {
			delete_index_flags(line, overwrite);
		} else {
			gen_index(line, index_doc, overwrite, ignorecase);
		}
	}

	fclose(f);
}

int main(int argc, char **argv)
{
	int i;
	bool overwrite = false;
	bool ignorecase = false;
	bool delflags = false;
	bool list = false;

	xmlDocPtr index_doc = NULL;

	const char *sopts = "DfI:liqvh?";
	struct option lopts[] = {
		{"version"    , no_argument      , 0, 0},
		{"help"       , no_argument      , 0, 'h'},
		{"delete"     , no_argument      , 0, 'D'},
		{"overwrite"  , no_argument      , 0, 'f'},
		{"indexflags" , required_argument, 0, 'I'},
		{"ignore-case", no_argument      , 0, 'i'},
		{"list"       , no_argument      , 0, 'l'},
		{"quiet"      , no_argument      , 0, 'q'},
		{"verbose"    , no_argument      , 0, 'v'},
		LIBXML2_PARSE_LONGOPT_DEFS
		{0, 0, 0, 0}
	};
	int loptind = 0;

	while ((i = getopt_long(argc, argv, sopts, lopts, &loptind)) != -1) {
		switch (i) {
			case 0:
				if (strcmp(lopts[loptind].name, "version") == 0) {
					show_version();
					return 0;
				}
				LIBXML2_PARSE_LONGOPT_HANDLE(lopts, loptind, optarg)
				break;
			case 'D':
				delflags = true;
				break;
			case 'f':
				overwrite = true;
				break;
			case 'I':
				if (!index_doc) {
					index_doc = read_index_flags(optarg);
				}
				break;
			case 'i':
				ignorecase = true;
				break;
			case 'l':
				list = true;
				break;
			case 'q':
				--verbosity;
				break;
			case 'v':
				++verbosity;
				break;
			case 'h':
			case '?':
				show_help();
				return 0;
		}
	}

	if (!index_doc && !delflags) {
		char fname[PATH_MAX];
		find_config(fname, DEFAULT_INDEXFLAGS_FNAME);
		index_doc = read_index_flags(fname);
	}

	if (optind < argc) {
		for (i = optind; i < argc; ++i) {
			if (list) {
				handle_list(argv[i], delflags, index_doc, overwrite, ignorecase);
			} else if (delflags) {
				delete_index_flags(argv[i], overwrite);
			} else {
				gen_index(argv[i], index_doc, overwrite, ignorecase);
			}
		}
	} else if (list) {
		handle_list(NULL, delflags, index_doc, overwrite, ignorecase);
	} else if (delflags) {
		delete_index_flags("-", false);
	} else {
		gen_index("-", index_doc, false, ignorecase);
	}

	xmlFreeDoc(index_doc);

	xsltCleanupGlobals();
	xmlCleanupParser();

	return 0;
}


gopher://khzae.net/0/s1kd/s1kd-tools/src/tools/s1kd-index/s1kd-index.c
Styles: Light Dark Classic