.. / download
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <getopt.h>
#include <stdbool.h>
#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#include "xml-utils.h"

#define PROG_NAME "xml-trim"
#define VERSION "3.5.1"

#define is_space(c) isspace((unsigned char) c)

/* Remove whitespace on left end of string. */
static char *strltrm(char *dst, const char *src)
{
	int start;
	for (start = 0; is_space(src[start]); ++start);
	sprintf(dst, "%s", src + start);
	return dst;
}

/* Remove whitespace on right end of string. */
static char *strrtrm(char *dst, const char *src)
{
	int len, end;
	len = strlen(src);
	for (end = len - 1; is_space(src[end]); --end);
	sprintf(dst, "%.*s", end + 1, src);
	return dst;
}

/* Normalize space by replacing all sequences of whitespace characters with a
 * single space.
 */
static char *strnorm(char *dst, const char *src)
{
	int i, j;
	j = 0;
	for (i = 0; src[i]; ++i) {
		if (is_space(src[i])) {
			dst[j] = ' ';
			while (is_space(src[i + 1])) {
				++i;
			}
		} else {
			dst[j] = src[i];
		}
		++j;
	}
	return dst;
}

/* Register an XML namespace with the XPath context. */
static void register_ns(xmlXPathContextPtr ctx, char *optarg)
{
	char *prefix, *uri;

	prefix = strtok(optarg, "=");
	uri    = strtok(NULL, "");

	xmlXPathRegisterNs(ctx, BAD_CAST prefix, BAD_CAST uri);
}

/* Trim space in a text node. */
static void trim(xmlNodePtr node, char *(*f)(char *, const char *)) {
	char *content, *trimmed;

	content = (char *) xmlNodeGetContent(node);

	trimmed = calloc(strlen(content) + 1, 1);
	f(trimmed, content);
	xmlFree(content);

	content = strdup(trimmed);
	xmlFree(trimmed);

	xmlNodeSetContent(node, BAD_CAST content);
	xmlFree(content);
}

/* Trim all text nodes in a given set of elements. */
static void trim_nodes(xmlNodeSetPtr nodes, bool normalize)
{
	int i;

	for (i = 0; i < nodes->nodeNr; ++i) {
		xmlNodePtr first, last;

		/* If node has no children, no trimming is necessary. */
		if (!nodes->nodeTab[i]->children) {
			continue;
		}

		if ((first = nodes->nodeTab[i]->children)->type == XML_TEXT_NODE) {
			trim(first, strltrm);
		}

		if ((last = xmlGetLastChild(nodes->nodeTab[i]))->type == XML_TEXT_NODE) {
			trim(last, strrtrm);
		}

		if (normalize) {
			for (first = nodes->nodeTab[i]->children; first; first = first->next) {
				if (first->type == XML_TEXT_NODE) {
					trim(first, strnorm);
				}
			}
		}
	}
}

static void trim_nodes_in_file(const char *path, xmlNodePtr ns, xmlNodePtr elems, bool normalize, bool overwrite)
{
	xmlDocPtr doc;
	xmlXPathContextPtr ctx;
	xmlNodePtr cur;

	doc = read_xml_doc(path);
	ctx = xmlXPathNewContext(doc);

	for (cur = ns->children; cur; cur = cur->next) {
		xmlChar *n;
		n = xmlNodeGetContent(cur);
		register_ns(ctx, (char *) n);
		xmlFree(n);
	}

	for (cur = elems->children; cur; cur = cur->next) {
		xmlChar *xpath;
		xmlXPathObjectPtr obj;
		xmlChar *e;

		e = xmlNodeGetContent(cur);

		/* If the element specifier contains a /, treat it like a
		 * literal XPath expression.
		 *
		 * Otherwise, match all elements with the same name at any
		 * position.
		 */
		if (xmlStrchr(e, '/')) {
			xpath = xmlStrdup(e);
		} else {
			xpath = xmlStrdup(BAD_CAST "//");
			xpath = xmlStrcat(xpath, e);
		}

		xmlFree(e);

		obj = xmlXPathEvalExpression(xpath, ctx);

		xmlFree(xpath);

		if (!xmlXPathNodeSetIsEmpty(obj->nodesetval)) {
			trim_nodes(obj->nodesetval, normalize);
		}

		xmlXPathFreeObject(obj);
	}

	xmlXPathFreeContext(ctx);

	if (overwrite) {
		save_xml_doc(doc, path);
	} else {
		save_xml_doc(doc, "-");
	}

	xmlFreeDoc(doc);
}

/* Show usage message. */
static void show_help(void)
{
	puts("Usage: " PROG_NAME " [-e <elem> ...] [-N <ns=URL> ...] [-fnh?] [<src>...]");
	puts("");
	puts("Options:");
	puts("  -e, --element <elem>      Element to trim space on.");
	puts("  -f, --overwrite           Overwrite input files.");
	puts("  -h, -?, --help            Show usage message.");
	puts("  -N, --namespace <ns=URL>  Register a namespace.");
	puts("  -n, --normalize           Normalize space as well as trim.");
	puts("  --version                 Show version information.");
	puts("  <src>                     XML file to trim.");
	LIBXML2_PARSE_LONGOPT_HELP
}

/* Show version information. */
static void show_version(void)
{
	printf("%s (xml-utils) %s\n", PROG_NAME, VERSION);
	printf("Using libxml %s\n", xmlParserVersion);
}

int main(int argc, char **argv)
{
	int i;
	xmlNodePtr ns, elems;
	bool normalize = false;
	bool overwrite = false;

	const char *sopts = "e:fN:nh?";
	struct option lopts[] = {
		{"version"  , no_argument      , 0, 0},
		{"help"     , no_argument      , 0, 'h'},
		{"element"  , required_argument, 0, 'e'},
		{"overwrite", no_argument      , 0, 'f'},
		{"namespace", required_argument, 0, 'N'},
		{"normalize", no_argument      , 0, 'n'},
		LIBXML2_PARSE_LONGOPT_DEFS
		{0, 0, 0, 0}
	};
	int loptind = 0;

	ns = xmlNewNode(NULL, BAD_CAST "namespaces");
	elems = xmlNewNode(NULL, BAD_CAST "elems");

	while ((i = getopt_long(argc, argv, sopts, lopts, &loptind)) != -1)
		switch (i) {
			case 0:
				if (strcmp(lopts[loptind].name, "version") == 0) {
					show_version();
					return 0;
				}
				LIBXML2_PARSE_LONGOPT_HANDLE(lopts, loptind, optarg);
				break;
			case 'e':
				xmlNewChild(elems, NULL, BAD_CAST "elem", BAD_CAST optarg);
				break;
			case 'f':
				overwrite = true;
				break;
			case 'N':
				xmlNewChild(ns, NULL, BAD_CAST "ns", BAD_CAST optarg);
				break;
			case 'n':
				normalize = true;
				break;
			case 'h':
			case '?':
				show_help();
				return 0;
		}

	if (optind < argc) {
		for (i = optind; i < argc; ++i) {
			trim_nodes_in_file(argv[i], ns, elems, normalize, overwrite);
		}
	} else {
		trim_nodes_in_file("-", ns, elems, normalize, overwrite);
	}

	xmlFreeNode(ns);
	xmlFreeNode(elems);

	xmlCleanupParser();

	return 0;
}


gopher://khzae.net/0/s1000d/xml/xml-utils/src/utils/xml-trim/xml-trim.c
Styles: Light Dark Classic