mirror of
https://https.git.savannah.gnu.org/git/bash.git
synced 2026-06-22 05:17:59 +02:00
310 lines
8.9 KiB
C
310 lines
8.9 KiB
C
/* rev - reverse lines in a file or files character by character */
|
|
|
|
/*
|
|
* Copyright (c) 1987, 1992 The Regents of the University of California.
|
|
* Copyright (C) 2026 Free Software Foundation, Inc.
|
|
|
|
Bash is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Bash is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Bash. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* Modified for Linux by Charles Hannum (mycroft@gnu.ai.mit.edu)
|
|
* and Brian Koehmstedt (bpk@gnu.ai.mit.edu)
|
|
*
|
|
* Wed Sep 14 22:26:00 1994: Patch from bjdouma <bjdouma@xs4all.nl> to handle
|
|
* last line that has no newline correctly.
|
|
* 3-Jun-1998: Patched by Nicolai Langfeldt to work better on Linux:
|
|
* Handle any-length-lines. Code copied from util-linux' setpwnam.c
|
|
* 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
|
|
* added Native Language Support
|
|
* 1999-09-19 Bruno Haible <haible@clisp.cons.org>
|
|
* modified to work correctly in multi-byte locales
|
|
* July 2010 - Davidlohr Bueso <dave@gnu.org>
|
|
* Fixed memory leaks (including Linux signal handling)
|
|
* Added some memory allocation error handling
|
|
* Lowered the default buffer size to 256, instead of 512 bytes
|
|
* Changed tab indentation to 8 chars for better reading the code
|
|
* 2026/03/24 02:17:26: Duncan Roe (duncan_roe@optusnet.com.au)
|
|
* Increase speed by using read(2) and processing
|
|
* multi-byte characters locally.
|
|
* Initial version only handles UTF-8 encoding.
|
|
* 2026/04/04 01:52:47: Duncan Roe (duncan_roe@optusnet.com.au)
|
|
* Convert into a bash loadable builtin.
|
|
*/
|
|
|
|
/* Headers */
|
|
#include <config.h>
|
|
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <stdio.h>
|
|
#include <getopt.h>
|
|
#include <setjmp.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include "shmbutil.h"
|
|
#include "loadables.h"
|
|
#include <array.h> /* Has to go after stdint & loadables (!) */
|
|
|
|
/* Macros */
|
|
|
|
#define SYSCALL(x, y) do x = y; while(x == -1 && errno == EINTR)
|
|
#define PUTC(x) if (v) *buf++ = x; else fputc(x, stdout)
|
|
|
|
/* ********************************* getlen ********************************* */
|
|
|
|
static int
|
|
getlen(char *last_trlg_byte, int num_bytes_left)
|
|
/* Get the length of a UTF-8 sequence */
|
|
/*
|
|
* If last_trlg_byte is indeed the last byte of a valid UTF-8 multibyte
|
|
* sequence, return the length of that sequence. Otherwise return 1.
|
|
*
|
|
* There can be up to 3 trailing bytes, which must start '10'b and carry 6 bits
|
|
* of data. The header byte starts with as many 1 bits as there are bytes in the
|
|
* sequence, followed by a 0 bit. The rest of the byte carries data.
|
|
* As an example, a 4-byte sequence starts '11110'b leaving 3 bits for data.
|
|
* 3 trailing bytes carry 6 bits each for a total of 21 bits.
|
|
* UTF-16 can only encode 20 bits, so there are very few 21-bit codepoints.
|
|
*/
|
|
{
|
|
|
|
const char mask[5] = { 0200, 0300, 0340, 0360, 0370 };
|
|
char *p = last_trlg_byte;
|
|
int n; /* Bytes in header + trailer(s) */
|
|
int i;
|
|
|
|
if ((*p-- & mask[1]) != mask[0])
|
|
goto not_utf_8;
|
|
num_bytes_left--;
|
|
n = 2;
|
|
for (i = num_bytes_left >= 3 ? 3 : num_bytes_left; i > 0; i--, p--, n++)
|
|
{ /* 3 more bytes max */
|
|
if ((*p & mask[1]) != mask[0])
|
|
{
|
|
if ((*p & mask[n]) == mask[n - 1])
|
|
return n;
|
|
else
|
|
goto not_utf_8;
|
|
} /* if ((*p-- & mask[1]) != mask[0]) */
|
|
}
|
|
|
|
not_utf_8:
|
|
return 1;
|
|
} /* getlen() */
|
|
|
|
/* ****************************** reverse_line ****************************** */
|
|
|
|
static void
|
|
reverse_line(SHELL_VAR *v, arrayind_t *ind, char *line, size_t len,
|
|
int outputsep, char sep)
|
|
{
|
|
char *p, *q;
|
|
char *buf;
|
|
int i, j;
|
|
#if defined (ARRAY_VARS)
|
|
if (v)
|
|
{
|
|
/*
|
|
* Bypass extra copies and malloc / free calls by getting a shell var
|
|
* with NULL value and putting an allocated buffer in it.
|
|
*/
|
|
bind_array_element (v, (*ind)++, (char *)NULL, 0);
|
|
buf = xmalloc(len + outputsep + 1); /* +1 for NUL */
|
|
(((ARRAY *)v->value)->lastref)->value = buf;
|
|
buf[len + outputsep] = '\0';
|
|
} /* if (v) */
|
|
#endif
|
|
|
|
if (locale_utf8locale)
|
|
{
|
|
for (i = len, p = line + len - 1; i > 0; i--, p--)
|
|
{
|
|
if (*p & 0200)
|
|
{
|
|
j = getlen(p, i);
|
|
p = q = p - (j - 1); /* p-> 1st byte of seq */
|
|
i -= (j - 1); /* Reduce num left by num trlg bytes */
|
|
for (; j > 0; j--)
|
|
PUTC(*q++);
|
|
} /* if (*p & 0200) */
|
|
else
|
|
PUTC(*p);
|
|
} /* for (i = len, p = line + len - 1; i > 0; i--) */
|
|
} /* if (locale_utf8locale) */
|
|
else
|
|
{
|
|
for (i = len, p = line + len - 1; i > 0; i--)
|
|
PUTC(*p--);
|
|
} /* if (locale_utf8locale) else */
|
|
if (outputsep)
|
|
PUTC(sep);
|
|
} /* reverse_line() */
|
|
|
|
/* ****************************** rev_internal ****************************** */
|
|
|
|
static int
|
|
rev_internal(WORD_LIST *list)
|
|
{
|
|
int unbuffered_read;
|
|
char *array_name;
|
|
arrayind_t ind;
|
|
int outputsep;
|
|
WORD_LIST *l;
|
|
SHELL_VAR *v;
|
|
size_t llen;
|
|
char *line;
|
|
size_t n;
|
|
int rval;
|
|
char sep;
|
|
int opt;
|
|
int fd, closefd;
|
|
|
|
v = 0;
|
|
rval = EXECUTION_SUCCESS;
|
|
|
|
array_name = 0;
|
|
sep = '\n';
|
|
ind = 0;
|
|
|
|
reset_internal_getopt();
|
|
while ((opt = internal_getopt(list, "0a:h")) != -1)
|
|
switch (opt)
|
|
{
|
|
case '0':
|
|
sep = '\0';
|
|
break;
|
|
case 'a':
|
|
#if defined (ARRAY_VARS)
|
|
array_name = list_optarg;
|
|
break;
|
|
#else
|
|
builtin_error("arrays not available");
|
|
return (EX_USAGE);
|
|
#endif
|
|
CASE_HELPOPT;
|
|
default:
|
|
builtin_usage();
|
|
return (EX_USAGE);
|
|
}
|
|
|
|
if (array_name && (valid_identifier(array_name) == 0))
|
|
{
|
|
sh_invalidid(array_name);
|
|
return (EXECUTION_FAILURE);
|
|
}
|
|
|
|
#if defined (ARRAY_VARS)
|
|
if (array_name)
|
|
{
|
|
v = builtin_find_indexed_array(array_name, 1);
|
|
if (v == 0)
|
|
return (EXECUTION_FAILURE);
|
|
}
|
|
#endif
|
|
|
|
l = loptend;
|
|
line = 0;
|
|
llen = 0;
|
|
|
|
do
|
|
{
|
|
/* for each file */
|
|
closefd = 0;
|
|
|
|
if (l == 0)
|
|
fd = 0;
|
|
else
|
|
{
|
|
SYSCALL(fd, open(l->word->word, O_RDONLY));
|
|
closefd = fd != -1;;
|
|
}
|
|
if (fd == -1)
|
|
{
|
|
file_error(l->word->word);
|
|
rval = EXECUTION_FAILURE;
|
|
goto next_file;
|
|
}
|
|
|
|
#ifndef __CYGWIN__
|
|
unbuffered_read = (lseek(fd, 0L, SEEK_CUR) < 0) && (errno == ESPIPE);
|
|
#else
|
|
unbuffered_read = 1;
|
|
#endif
|
|
|
|
/* Read from input */
|
|
while ((n = zgetline(fd, &line, &llen, sep, unbuffered_read)) != -1)
|
|
{
|
|
QUIT;
|
|
if (line[n] == sep)
|
|
outputsep = 1;
|
|
else
|
|
{
|
|
outputsep = 0;
|
|
n++; /* Work around zgetline behaviour on unterminated line */
|
|
}
|
|
reverse_line(v, &ind, line, n, outputsep, sep);
|
|
} /* while ((n = zgetline(...) !=-1) */
|
|
if (closefd)
|
|
close(fd);
|
|
|
|
next_file:
|
|
QUIT;
|
|
if (l)
|
|
l = l->next;
|
|
} /* do */
|
|
while (l);
|
|
|
|
free(line);
|
|
return rval;
|
|
} /* rev_internal() */
|
|
|
|
/* ********************************** main ********************************** */
|
|
|
|
int
|
|
rev_builtin(WORD_LIST *list)
|
|
{
|
|
return rev_internal(list);
|
|
} /* main() */
|
|
|
|
char *rev_doc[] = {
|
|
"Reverse lines characterwise.",
|
|
"",
|
|
"Copy the lines of the specified files to standard output,",
|
|
"or assign them to the indexed array ARRAY starting at index 0,",
|
|
"reversing the order of characters in every line.",
|
|
"If no files are specified, standard input is read.",
|
|
"",
|
|
"When -0 is specified, use the byte '\\0' as line separator.",
|
|
"",
|
|
"When -a is specified, assign each reversed line"
|
|
"to successive elements of ARRAY,",
|
|
"beginning at 0.",
|
|
"The lines rev assigns to ARRAY are identical to the lines it would",
|
|
"write to the standard output if -a were not supplied.",
|
|
"",
|
|
"This utility processes UTF-8 without using a wide-character buffer.",
|
|
(char *)NULL
|
|
};
|
|
|
|
struct builtin rev_struct = {
|
|
"rev", /* builtin name */
|
|
rev_builtin, /* function implementing the builtin */
|
|
BUILTIN_ENABLED, /* initial flags for builtin */
|
|
rev_doc, /* array of long documentation strings */
|
|
"rev [-0] [-a ARRAY] [file ...]", /* usage synopsis; becomes short_doc */
|
|
0 /* reserved for internal use */
|
|
};
|