Files
bash/examples/loadables/lcut.c
T

524 lines
11 KiB
C

/* lcut - extract specified fields from a line and assign them to an array or
print them to the standard output */
/*
Copyright (C) 2020,2022,2023 Free Software Foundation, Inc.
Bash is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Bash is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Bash. If not, see <http://www.gnu.org/licenses/>.
*/
/* See Makefile for compilation details. */
#include <config.h>
#if defined (HAVE_UNISTD_H)
# include <unistd.h>
#endif
#include "bashansi.h"
#include <stdio.h>
#include "loadables.h"
#include "shmbutil.h"
#define CUT_ARRAY_DEFAULT "CUTFIELDS"
#define NOPOS -2 /* sentinel for unset startpos/endpos */
#define BOL 0
#define EOL INT_MAX
#define NORANGE -1 /* just a position, no range */
#define BFLAG (1 << 0)
#define CFLAG (1 << 1)
#define DFLAG (1 << 2)
#define FFLAG (1 << 3)
#define SFLAG (1 << 4)
struct cutpos
{
int startpos, endpos; /* zero-based, correction done in getlist() */
};
struct cutop
{
int flags;
int delim;
int npos;
struct cutpos *poslist;
};
static int
poscmp (const void *a, const void *b)
{
struct cutpos *p1, *p2;
p1 = (struct cutpos *)a;
p2 = (struct cutpos *)b;
return (p1->startpos - p2->startpos);
}
static int
getlist (char *arg, struct cutpos **opp)
{
char *ntok, *ltok, *larg;
int s, e;
intmax_t num;
struct cutpos *poslist;
int npos, nsize;
poslist = 0;
nsize = npos = 0;
s = e = 0;
larg = arg;
while (ltok = strsep (&larg, ","))
{
if (*ltok == 0)
continue;
ntok = strsep (&ltok, "-");
if (*ntok == 0)
s = BOL;
else
{
if (valid_number (ntok, &num) == 0 || (int)num != num || num <= 0)
{
builtin_error ("%s: invalid list value", ntok);
*opp = poslist;
return -1;
}
s = num;
s--; /* fields are 1-based */
}
if (ltok == 0)
e = NORANGE;
else if (*ltok == 0)
e = EOL;
else
{
if (valid_number (ltok, &num) == 0 || (int)num != num || num <= 0)
{
builtin_error ("%s: invalid list value", ltok);
*opp = poslist;
return -1;
}
e = num;
e--;
if (e == s)
e = NORANGE;
}
if (npos == nsize)
{
nsize += 4;
poslist = (struct cutpos *)xrealloc (poslist, nsize * sizeof (struct cutpos));
}
poslist[npos].startpos = s;
poslist[npos].endpos = e;
npos++;
}
if (npos == 0)
{
builtin_error ("missing list of positions");
*opp = poslist;
return -1;
}
qsort (poslist, npos, sizeof(poslist[0]), poscmp);
*opp = poslist;
return npos;
}
static int
cutbytes (SHELL_VAR *v, char *line, struct cutop *ops)
{
arrayind_t ind;
char *buf, *bmap;
size_t llen;
int i, b, n, s, e;
llen = strlen (line);
buf = xmalloc (llen + 1);
bmap = xmalloc (llen + 1);
memset (bmap, 0, llen);
for (n = 0; n < ops->npos; n++)
{
s = ops->poslist[n].startpos; /* no translation needed yet */
e = ops->poslist[n].endpos;
if (e == NORANGE)
e = s;
else if (e == EOL || e >= llen)
e = llen - 1;
/* even if a column is specified multiple times, it will only be printed
once */
for (i = s; i <= e; i++)
bmap[i] = 1;
}
b = 0;
for (i = 0; i < llen; i++)
if (bmap[i])
buf[b++] = line[i];
buf[b] = 0;
if (v)
{
ind = 0;
bind_array_element (v, ind, buf, 0);
ind++;
}
else
printf ("%s\n", buf);
free (buf);
free (bmap);
return ind;
}
static int
cutchars (SHELL_VAR *v, char *line, struct cutop *ops)
{
arrayind_t ind;
char *buf, *bmap;
wchar_t *wbuf, *wb2;
size_t llen, wlen;
int i, b, n, s, e;
if (MB_CUR_MAX == 1)
return (cutbytes (v, line, ops));
if (locale_utf8locale && utf8_mbsmbchar (line) == 0)
return (cutbytes (v, line, ops));
llen = strlen (line);
wbuf = (wchar_t *)xmalloc ((llen + 1) * sizeof (wchar_t));
wlen = mbstowcs (wbuf, line, llen);
if (MB_INVALIDCH (wlen))
{
free (wbuf);
return (cutbytes (v, line, ops));
}
bmap = xmalloc (llen + 1);
memset (bmap, 0, llen);
for (n = 0; n < ops->npos; n++)
{
s = ops->poslist[n].startpos; /* no translation needed yet */
e = ops->poslist[n].endpos;
if (e == NORANGE)
e = s;
else if (e == EOL || e >= wlen)
e = wlen - 1;
/* even if a column is specified multiple times, it will only be printed
once */
for (i = s; i <= e; i++)
bmap[i] = 1;
}
wb2 = (wchar_t *)xmalloc ((wlen + 1) * sizeof (wchar_t));
b = 0;
for (i = 0; i < wlen; i++)
if (bmap[i])
wb2[b++] = wbuf[i];
wb2[b] = 0;
free (wbuf);
buf = bmap;
n = wcstombs (buf, wb2, llen);
if (v)
{
ind = 0;
bind_array_element (v, ind, buf, 0);
ind++;
}
else
printf ("%s\n", buf);
free (buf);
free (wb2);
return ind;
}
/* The basic strategy is to cut the line into fields using strsep, populate
an array of fields from 0..nf, then select those fields using the same
bitmap approach as cut{bytes,chars} and assign them to the array variable
V or print them on stdout. This function obeys SFLAG. */
static int
cutfields (SHELL_VAR *v, char *line, struct cutop *ops)
{
arrayind_t ind;
char *buf, *bmap, *field, **fields, delim[2];
size_t llen, fsize;
int i, b, n, s, e, nf;
ind = 0;
delim[0] = ops->delim;
delim[1] = '\0';
fields = 0;
nf = 0;
fsize = 0;
field = buf = line;
do
{
field = strsep (&buf, delim); /* destructive */
if (nf == fsize)
{
fsize += 8;
fields = xrealloc (fields, fsize * sizeof (char *));
}
fields[nf] = field;
if (field)
nf++;
}
while (field);
if (nf == 1)
{
free (fields);
if (ops->flags & SFLAG)
return ind;
if (v)
{
bind_array_element (v, ind, line, 0);
ind++;
}
else
printf ("%s\n", line);
return ind;
}
bmap = xmalloc (nf + 1);
memset (bmap, 0, nf);
for (n = 0; n < ops->npos; n++)
{
s = ops->poslist[n].startpos; /* no translation needed yet */
e = ops->poslist[n].endpos;
if (e == NORANGE)
e = s;
else if (e == EOL || e >= nf)
e = nf - 1;
/* even if a column is specified multiple times, it will only be printed
once */
for (i = s; i <= e; i++)
bmap[i] = 1;
}
for (i = 1, b = 0; b < nf; b++)
{
if (bmap[b] == 0)
continue;
if (v)
{
bind_array_element (v, ind, fields[b], 0);
ind++;
}
else
{
if (i == 0)
putchar (ops->delim);
printf ("%s", fields[b]);
}
i = 0;
}
if (v == 0)
putchar ('\n');
return nf;
}
static int
cutline (SHELL_VAR *v, char *line, struct cutop *ops)
{
int rval;
if (ops->flags & BFLAG)
rval = cutbytes (v, line, ops);
else if (ops->flags & CFLAG)
rval = cutchars (v, line, ops);
else
rval = cutfields (v, line, ops);
return (rval >= 0 ? EXECUTION_SUCCESS : EXECUTION_FAILURE);
}
static int
cutfile (SHELL_VAR *v, WORD_LIST *list, struct cutop *ops)
{
}
#define OPTSET(x) ((cutflags & (x)) ? 1 : 0)
static int
cut_internal (int which, WORD_LIST *list)
{
int opt, rval, cutflags, delim, npos;
char *array_name, *cutstring, *list_arg;
SHELL_VAR *v;
struct cutop op;
struct cutpos *poslist;
v = 0;
rval = EXECUTION_SUCCESS;
cutflags = 0;
array_name = 0;
list_arg = 0;
delim = '\t';
reset_internal_getopt ();
while ((opt = internal_getopt (list, "a:b:c:d:f:sn")) != -1)
{
switch (opt)
{
case 'a':
array_name = list_optarg;
break;
case 'b':
cutflags |= BFLAG;
list_arg = list_optarg;
break;
case 'c':
cutflags |= CFLAG;
list_arg = list_optarg;
break;
case 'd':
cutflags |= DFLAG;
delim = list_optarg[0];
if (delim == 0 || list_optarg[1])
{
builtin_error ("delimiter must be a single non-null character");
return (EX_USAGE);
}
break;
case 'f':
cutflags |= FFLAG;
list_arg = list_optarg;
break;
case 'n':
break;
case 's':
cutflags |= SFLAG;
break;
CASE_HELPOPT;
default:
builtin_usage ();
return (EX_USAGE);
}
}
list = loptend;
if (array_name && (valid_identifier (array_name) == 0))
{
sh_invalidid (array_name);
return (EXECUTION_FAILURE);
}
if (list == 0 && which == 0)
{
builtin_error ("string argument required");
return (EX_USAGE);
}
/* options are mutually exclusive and one is required */
if ((OPTSET (BFLAG) + OPTSET (CFLAG) + OPTSET (FFLAG)) != 1)
{
builtin_usage ();
return (EX_USAGE);
}
if ((npos = getlist (list_arg, &poslist)) < 0)
{
free (poslist);
return (EXECUTION_FAILURE);
}
if (array_name)
{
v = find_or_make_array_variable (array_name, 1);
if (v == 0 || readonly_p (v) || noassign_p (v))
{
if (v && readonly_p (v))
err_readonly (array_name);
return (EXECUTION_FAILURE);
}
else if (array_p (v) == 0)
{
builtin_error ("%s: not an indexed array", array_name);
return (EXECUTION_FAILURE);
}
if (invisible_p (v))
VUNSETATTR (v, att_invisible);
array_flush (array_cell (v));
}
cutstring = list->word->word;
if (cutstring == 0 || *cutstring == 0)
{
free (poslist);
return (EXECUTION_SUCCESS);
}
op.flags = cutflags;
op.delim = delim;
op.npos = npos;
op.poslist = poslist;
if (which == 0)
rval = cutline (v, cutstring, &op);
return (rval);
}
int
lcut_builtin (WORD_LIST *list)
{
return (cut_internal (0, list));
}
char *lcut_doc[] = {
"Extract selected fields from a string.",
"",
"Select portions of LINE (as specified by LIST) and assign them to",
"elements of the indexed array ARRAY starting at index 0, or write",
"them to the standard output if -a is not specified.",
"",
"Items specified by LIST are either column positions or fields delimited",
"by a special character, and are described more completely in cut(1).",
"",
"Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
"field delimiter is specified by -d (default TAB). Column numbering",
"starts at 1.",
(char *)NULL
};
struct builtin lcut_struct = {
"lcut", /* builtin name */
lcut_builtin, /* function implementing the builtin */
BUILTIN_ENABLED, /* initial flags for builtin */
lcut_doc, /* array of long documentation strings. */
"lcut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] line", /* usage synopsis; becomes short_doc */
0 /* reserved for internal use */
};