From: Martin Mareš Date: Sun, 17 Aug 2025 13:28:34 +0000 (+0200) Subject: Switch to PCRE2 X-Git-Tag: v1.1~2 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=86791d415b9bbe77871fd9711fa3318d1a0c84df;p=xsv.git Switch to PCRE2 Old PCRE is not packaged by Debian any longer. --- diff --git a/Makefile b/Makefile index cd99b87..fa274ef 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ VERSION=1.0.1 ARCHIVE=xsv-$(VERSION).tar.gz -PCRE_CFLAGS:=$(shell pcre-config --cflags) -PCRE_LIBS:=$(shell pcre-config --libs) +PCRE_CFLAGS:=$(shell pcre2-config --cflags) +PCRE_LIBS:=$(shell pcre2-config --libs8) -CFLAGS=-O2 -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Wundef -Wredundant-decls -std=gnu99 $(PCRE_CFLAGS) -DVERSION='"$(VERSION)"' +CFLAGS=-O2 -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Wundef -Wredundant-decls -Wno-pointer-sign -std=gnu99 $(PCRE_CFLAGS) -DVERSION='"$(VERSION)"' LDLIBS=$(PCRE_LIBS) PREFIX=/usr/local diff --git a/README b/README index 2cb8e90..d918637 100644 --- a/README +++ b/README @@ -2,7 +2,7 @@ XSV -- Swiss-Army Knife for CSV-Like Files - (c) 2012--2013 Martin Mares + (c) 2012--2025 Martin Mares You can use and distribute this program under the terms of GPLv2. @@ -22,7 +22,7 @@ Compilation instructions: (or anywhere else if you override PREFIX). The program has been tested on Linux, but it should run on an arbitrary POSIX -system with a C99 compiler and the PCRE library. Building of the manual page +system with a C99 compiler and the PCRE2 library. Building of the manual page requires AsciiDoc (any reasonably recent version should work). All bug reports and suggestions are welcome, especially when accompanied by patches. diff --git a/xsv.1.txt b/xsv.1.txt index da38076..f2903ee 100644 --- a/xsv.1.txt +++ b/xsv.1.txt @@ -48,7 +48,7 @@ output. If no format is given, *--tsv* is assumed. exactly one space is used. *-r, --regex=*'regex':: The fields are separated by sequences of characters satisfying the given - Perl-compatible regular expression (see *pcrepattern*(3) for a full description + Perl-compatible regular expression (see *pcre2pattern*(3) for a full description of their syntax). For example, `--regex='#+'` separates fields by an arbitrary number of hashes. Leading or trailing separators are interpreted as empty fields (this can be overridden by *--sloppy*). This format can be used only diff --git a/xsv.c b/xsv.c index 0d2d972..39891d8 100644 --- a/xsv.c +++ b/xsv.c @@ -1,7 +1,7 @@ /* * The Swiss-Army Knife for CSV-like Files * - * (c) 2012 Martin Mares + * (c) 2012-2025 Martin Mares */ #include @@ -12,7 +12,8 @@ #include #include -#include +#define PCRE2_CODE_UNIT_WIDTH 8 +#include #ifdef __GNUC__ #define NONRET __attribute__((noreturn)) @@ -115,8 +116,8 @@ struct format { int always_quote; // regex backend: - pcre *pcre; - pcre_extra *pcre_extra; + pcre2_code *pcre; + pcre2_match_data *pcre_mdata; // Temporary file backend: FILE *tmp_file; @@ -377,15 +378,19 @@ static int ws_read(struct format *fmt) static const char *regex_set(struct format *f, char *rx) { - const char *err; - int errpos; - f->pcre = pcre_compile(rx, PCRE_DOLLAR_ENDONLY, &err, &errpos, NULL); - if (!f->pcre) - return err; + int errcode; + PCRE2_SIZE errpos; + + f->pcre = pcre2_compile(rx, PCRE2_ZERO_TERMINATED, PCRE2_DOLLAR_ENDONLY, &errcode, &errpos, NULL); + if (!f->pcre) { + char *errmsg = xmalloc(256); + pcre2_get_error_message(errcode, errmsg, 256); + return errmsg; + } + + pcre2_jit_compile(f->pcre, PCRE2_JIT_COMPLETE); - f->pcre_extra = pcre_study(f->pcre, 0, &err); - if (!f->pcre_extra) - return err; + f->pcre_mdata = pcre2_match_data_create_from_pattern(f->pcre, NULL); return NULL; } @@ -402,10 +407,9 @@ static int regex_read(struct format *fmt) int i = 0; for (;;) { - int ovec[3]; - int err = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3); + int err = pcre2_match(fmt->pcre, (char *) c, n, i, 0, fmt->pcre_mdata, NULL); if (err < 0) { - if (err != PCRE_ERROR_NOMATCH) + if (err != PCRE2_ERROR_NOMATCH) warn(fmt, "PCRE matching error %d", err); // No further occurrence of the separator: the rest is a single field if (!fmt->sloppy || i < n) { @@ -414,6 +418,7 @@ static int regex_read(struct format *fmt) } return 1; } + PCRE2_SIZE *ovec = pcre2_get_ovector_pointer(fmt->pcre_mdata); if (ovec[0] == ovec[1]) { warn(fmt, "Regular expression matched an empty separator."); new_field(i);