From 211e583ac0b447ce6dea74344905b494732dfb37 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Tue, 24 Jul 2012 18:48:20 +0200 Subject: [PATCH] Added a manual page --- Makefile | 7 ++- xsv.1.txt | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 xsv.1.txt diff --git a/Makefile b/Makefile index e92d225..db742f4 100644 --- a/Makefile +++ b/Makefile @@ -4,12 +4,15 @@ PCRE_LIBS:=$(shell pcre-config --libs) CFLAGS=-O2 -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Wundef -Wredundant-decls -std=gnu99 $(PCRE_CFLAGS) -g LDLIBS=$(PCRE_LIBS) -all: xsv +all: xsv xsv.1 tests: xsv ./run-tests +xsv.1: xsv.1.txt + a2x -f manpage $< + clean: rm -f `find . -name "*~" -or -name "*.[oa]" -or -name "\#*\#" -or -name TAGS -or -name core -or -name .depend -or -name .#*` - rm -f xsv + rm -f xsv xsv.1 rm -rf tmp diff --git a/xsv.1.txt b/xsv.1.txt new file mode 100644 index 0000000..921febf --- /dev/null +++ b/xsv.1.txt @@ -0,0 +1,139 @@ +XSV(1) +====== +:man source: XSV +:man version: 1.0 +:man manual: The Swiss-Army Knife for CSV + +NAME +---- +xsv - manipulate CSV-like text files + +SYNOPSIS +-------- +*xsv* 'input-format' ['output-format'] 'options' ['fields'] + +DESCRIPTION +----------- +*xsv* is a utility for manipulating text files, whose lines are divided into fields. +This includes popular formats for textual databases like CSV (Comma-Separated Values), +TSV (Tab-Separated Values), formats with other separators (like `/etc/passwd`), and +many other formats. + +*xsv* reads lines from the standard input. Each line is split into fields +according to a specified file format. The fields are then written to the +standard output in a possibly different format. Additionally, the fields can +be re-ordered, trimmed, and otherwise manipulated. + +FORMATS +------- +If a single file format is selected, it is used for both the input and the output. +If two file formats are given, the former applies to the input, the latter to the +output. If no format is given, *--tsv* is assumed. + +*-t, --tsv*:: + Tab-separated values, or more generally fields separated by a single + occurrence of a delimiter character. By default, the delimiter is the TAB + character, but it can be changed by the *-d* option. +*-c, --csv*:: + Comma-separated values -- the traditional CSV format as defined in RFC 4180. + Fields are separated by a single comma. When a field contains a comma, it is + enclosed in double quotes. When it contains double quotes, they are repeated. + The only deviations from the RFC are that we do not put a CR at the end of a line + (although we accept it on the input) and that each line can have a different + number of fields. +*-w, --ws*:: + The fields are separated by arbitrary sequence of whitespace characters + (spaces, tabs and form-feeds). Leading or trailing whitespace is interpreted + as an empty field (this can be overridden by *--sloppy*). When used for output, + exactly one space is used. +*-r, --regex=*'regex':: + The fields are separated by sequences of characters satisfying the given + Perl-compatible regular expression (see *pcrepattern*(3) for a full description + of their syntax). For example, `--regex='#+'` separates fields by an arbitrary + number of hashes. Leading or trailing separators are interpreted as empty + fields (this can be overridden by *--sloppy*). This format can be used only + for input. +*--table*: + An output-only format, which displays the data in form of a table. Data in each + column are justified to the width of the longest item. With *--grid*, an ASCII-art + grid is added. Please note that this requires two passes over the data, + so pre-formatted data are stored in a temporary file. + +FORMAT PARAMTERS +-------------- +Each format option can be followed by parameters specific to that format: + +*-d, --fs=*'character':: + Use the specified character as a field separator (delimiter). + Applies to *--csv* and *--tsv*. +*-f, --fields=*'name'*,*'name'*,*...:: + Assign names to fields. The names can be then used to refer to fields + instead of numbers. +*-h, --header*:: + The file starts with a header line, which contains field names. + It can be combined with *--fields*, if you want to override the names. +*-q, --quiet*:: + By default, *xsv* prints warnings when something suspicious happens + (e.g., an unterminated quote in CSV, or when we attempt to print a field, + which contains the separator character). If the warnings are too noisy, + use *--quiet* to silence them. +*--always-quote*:: + When writing CSV files, quote all fields, even if it is not needed. +*--table-sep=*'n':: + Separate table columns by 'n' spaces. When not given, two spaces are used. + Applies to *--table* only. +*--grid*:: + Decorate the table by an ASCII-art grid of vertical lines. The lines sit + in the middle of inter-column spaces. + Applies to *--table* only. +*-s, --sloppy*:: + Ignore separators at the beginning or at the end of a line. Otherwise, + they are interpreted as empty fields. + Applies to *--ws* and *--regex*. + +OTHER OPTIONS +------------- +There are several options, which do not apply to the file format. Instead, they +specify how the data should be transformed between the input and the output. + +*--trim*:: + Delete leading and trailing spaces in each field. +*--equalize*":: + When different lines contain a different number of fields, pad the short + ones with empty fields. Please note that this requires two passes over the + data, possibly storing the data to a temporary file in between. + +SELECTION OF FIELDS +------------------- +By default, *xsv* copies all fields from the input to the output. Instead of that, +you can specify a list of fields or field ranges to copy. Unlike *cut*(1), the fields +are copied in the given order. + +A field can be identified by its number (starting with 1), or by its name when +*--fields* or *--header* is given. A field range has the form 'field'-'field'; +either 'field' can be omitted, which refers to the first/last field of the line. + +EXAMPLES +-------- +`xsv . +It can be distributed and used under the terms of the GNU +General Public License version 2. -- 2.39.2