Add Pod::Markdown and EPICS::PodMD Perl modules
Co-authored-by: Timo Korhonen <timo.korhonen@ess.eu>
This commit is contained in:
61
src/tools/EPICS/PodMD.pm
Normal file
61
src/tools/EPICS/PodMD.pm
Normal file
@ -0,0 +1,61 @@
|
||||
# SPDX-License-Identifier: EPICS
|
||||
|
||||
package EPICS::PodMD;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use base 'Pod::Markdown';
|
||||
|
||||
# Translate L<link text|filename/Section name>
|
||||
# into <a href="filename.md#Section-name">link text</a>#
|
||||
# This is for Sphinx processing on Readthedocs. Sphinx converts
|
||||
# links to .md into .html automatically in the processing.
|
||||
|
||||
sub format_perldoc_url {
|
||||
my ($self, $name, $section) = @_;
|
||||
|
||||
my $url_prefix = $self->perldoc_url_prefix;
|
||||
if (
|
||||
defined($name)
|
||||
&& $self->is_local_module($name)
|
||||
&& defined($self->local_module_url_prefix)
|
||||
) {
|
||||
$url_prefix = $self->local_module_url_prefix;
|
||||
}
|
||||
|
||||
my $url = '';
|
||||
|
||||
# If the link is to another module (external link).
|
||||
if (defined($name)) {
|
||||
$name .= ".md" unless $name =~ m/\.md$/;
|
||||
$url = $url_prefix .
|
||||
($self->escape_url ? URI::Escape::uri_escape($name) : $name);
|
||||
}
|
||||
|
||||
# See https://rt.cpan.org/Ticket/Display.html?id=57776
|
||||
# for a discussion on the need to mangle the section.
|
||||
if ($section){
|
||||
|
||||
my $method = $url
|
||||
# If we already have a prefix on the url it's external.
|
||||
? $self->perldoc_fragment_format
|
||||
# Else an internal link points to this markdown doc.
|
||||
: $self->markdown_fragment_format;
|
||||
|
||||
$method = 'format_fragment_' . $method
|
||||
unless ref($method);
|
||||
|
||||
{
|
||||
# Set topic to enable code refs to be simple.
|
||||
local $_ = $section;
|
||||
$section = $self->$method($section);
|
||||
}
|
||||
|
||||
$url .= '#' . $section;
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
1;
|
@ -17,7 +17,10 @@ PERL_MODULES += EPICS/Release.pm
|
||||
PERL_MODULES += EPICS/Readfile.pm
|
||||
PERL_MODULES += EPICS/Getopts.pm
|
||||
PERL_MODULES += EPICS/PodHtml.pm
|
||||
PERL_MODULES += EPICS/PodMD.pm
|
||||
PERL_MODULES += EPICS/PodXHtml.pm
|
||||
PERL_MODULES += Pod/Markdown.pm
|
||||
PERL_MODULES += URI/Escape.pm
|
||||
|
||||
# This goes into lib/perl, not bin/<host>
|
||||
PERL_MODULES += EpicsHostArch.pl
|
||||
|
1698
src/tools/Pod/Markdown.pm
Normal file
1698
src/tools/Pod/Markdown.pm
Normal file
File diff suppressed because it is too large
Load Diff
249
src/tools/URI/Escape.pm
Normal file
249
src/tools/URI/Escape.pm
Normal file
@ -0,0 +1,249 @@
|
||||
package URI::Escape;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
=head1 NAME
|
||||
|
||||
URI::Escape - Percent-encode and percent-decode unsafe characters
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use URI::Escape;
|
||||
$safe = uri_escape("10% is enough\n");
|
||||
$verysafe = uri_escape("foo", "\0-\377");
|
||||
$str = uri_unescape($safe);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module provides functions to percent-encode and percent-decode URI strings as
|
||||
defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping".
|
||||
This is the terminology used by this module, which predates the formalization of the
|
||||
terms by the RFC by several years.
|
||||
|
||||
A URI consists of a restricted set of characters. The restricted set
|
||||
of characters consists of digits, letters, and a few graphic symbols
|
||||
chosen from those common to most of the character encodings and input
|
||||
facilities available to Internet users. They are made up of the
|
||||
"unreserved" and "reserved" character sets as defined in RFC 3986.
|
||||
|
||||
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||
reserved = ":" / "/" / "?" / "#" / "[" / "]" / "@"
|
||||
"!" / "$" / "&" / "'" / "(" / ")"
|
||||
/ "*" / "+" / "," / ";" / "="
|
||||
|
||||
In addition, any byte (octet) can be represented in a URI by an escape
|
||||
sequence: a triplet consisting of the character "%" followed by two
|
||||
hexadecimal digits. A byte can also be represented directly by a
|
||||
character, using the US-ASCII character for that octet.
|
||||
|
||||
Some of the characters are I<reserved> for use as delimiters or as
|
||||
part of certain URI components. These must be escaped if they are to
|
||||
be treated as ordinary data. Read RFC 3986 for further details.
|
||||
|
||||
The functions provided (and exported by default) from this module are:
|
||||
|
||||
=over 4
|
||||
|
||||
=item uri_escape( $string )
|
||||
|
||||
=item uri_escape( $string, $unsafe )
|
||||
|
||||
Replaces each unsafe character in the $string with the corresponding
|
||||
escape sequence and returns the result. The $string argument should
|
||||
be a string of bytes. The uri_escape() function will croak if given a
|
||||
characters with code above 255. Use uri_escape_utf8() if you know you
|
||||
have such chars or/and want chars in the 128 .. 255 range treated as
|
||||
UTF-8.
|
||||
|
||||
The uri_escape() function takes an optional second argument that
|
||||
overrides the set of characters that are to be escaped. The set is
|
||||
specified as a string that can be used in a regular expression
|
||||
character class (between [ ]). E.g.:
|
||||
|
||||
"\x00-\x1f\x7f-\xff" # all control and hi-bit characters
|
||||
"a-z" # all lower case characters
|
||||
"^A-Za-z" # everything not a letter
|
||||
|
||||
The default set of characters to be escaped is all those which are
|
||||
I<not> part of the C<unreserved> character class shown above as well
|
||||
as the reserved characters. I.e. the default is:
|
||||
|
||||
"^A-Za-z0-9\-\._~"
|
||||
|
||||
The second argument can also be specified as a regular expression object:
|
||||
|
||||
qr/[^A-Za-z]/
|
||||
|
||||
Any strings matched by this regular expression will have all of their
|
||||
characters escaped.
|
||||
|
||||
=item uri_escape_utf8( $string )
|
||||
|
||||
=item uri_escape_utf8( $string, $unsafe )
|
||||
|
||||
Works like uri_escape(), but will encode chars as UTF-8 before
|
||||
escaping them. This makes this function able to deal with characters
|
||||
with code above 255 in $string. Note that chars in the 128 .. 255
|
||||
range will be escaped differently by this function compared to what
|
||||
uri_escape() would. For chars in the 0 .. 127 range there is no
|
||||
difference.
|
||||
|
||||
Equivalent to:
|
||||
|
||||
utf8::encode($string);
|
||||
my $uri = uri_escape($string);
|
||||
|
||||
Note: JavaScript has a function called escape() that produces the
|
||||
sequence "%uXXXX" for chars in the 256 .. 65535 range. This function
|
||||
has really nothing to do with URI escaping but some folks got confused
|
||||
since it "does the right thing" in the 0 .. 255 range. Because of
|
||||
this you sometimes see "URIs" with these kind of escapes. The
|
||||
JavaScript encodeURIComponent() function is similar to uri_escape_utf8().
|
||||
|
||||
=item uri_unescape($string,...)
|
||||
|
||||
Returns a string with each %XX sequence replaced with the actual byte
|
||||
(octet).
|
||||
|
||||
This does the same as:
|
||||
|
||||
$string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
|
||||
|
||||
but does not modify the string in-place as this RE would. Using the
|
||||
uri_unescape() function instead of the RE might make the code look
|
||||
cleaner and is a few characters less to type.
|
||||
|
||||
In a simple benchmark test I did,
|
||||
calling the function (instead of the inline RE above) if a few chars
|
||||
were unescaped was something like 40% slower, and something like 700% slower if none were. If
|
||||
you are going to unescape a lot of times it might be a good idea to
|
||||
inline the RE.
|
||||
|
||||
If the uri_unescape() function is passed multiple strings, then each
|
||||
one is returned unescaped.
|
||||
|
||||
=back
|
||||
|
||||
The module can also export the C<%escapes> hash, which contains the
|
||||
mapping from all 256 bytes to the corresponding escape codes. Lookup
|
||||
in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))>
|
||||
each time.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<URI>
|
||||
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright 1995-2004 Gisle Aas.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself.
|
||||
|
||||
SPDX-License-Identifier: Artistic-2.0
|
||||
=cut
|
||||
|
||||
use Exporter 5.57 'import';
|
||||
our %escapes;
|
||||
our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8);
|
||||
our @EXPORT_OK = qw(%escapes);
|
||||
our $VERSION = '5.28';
|
||||
|
||||
use Carp ();
|
||||
|
||||
# Build a char->hex map
|
||||
for (0..255) {
|
||||
$escapes{chr($_)} = sprintf("%%%02X", $_);
|
||||
}
|
||||
|
||||
my %subst; # compiled patterns
|
||||
|
||||
my %Unsafe = (
|
||||
RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/,
|
||||
RFC3986 => qr/[^A-Za-z0-9\-\._~]/,
|
||||
);
|
||||
|
||||
sub uri_escape {
|
||||
my($text, $patn) = @_;
|
||||
return undef unless defined $text;
|
||||
my $re;
|
||||
if (defined $patn){
|
||||
if (ref $patn eq 'Regexp') {
|
||||
$text =~ s{($patn)}{
|
||||
join('', map +($escapes{$_} || _fail_hi($_)), split //, "$1")
|
||||
}ge;
|
||||
return $text;
|
||||
}
|
||||
$re = $subst{$patn};
|
||||
if (!defined $re) {
|
||||
$re = $patn;
|
||||
# we need to escape the [] characters, except for those used in
|
||||
# posix classes. if they are prefixed by a backslash, allow them
|
||||
# through unmodified.
|
||||
$re =~ s{(\[:\w+:\])|(\\)?([\[\]]|\\\z)}{
|
||||
defined $1 ? $1 : defined $2 ? "$2$3" : "\\$3"
|
||||
}ge;
|
||||
eval {
|
||||
# disable the warnings here, since they will trigger later
|
||||
# when used, and we only want them to appear once per call,
|
||||
# but every time the same pattern is used.
|
||||
no warnings 'regexp';
|
||||
$re = $subst{$patn} = qr{[$re]};
|
||||
1;
|
||||
} or Carp::croak("uri_escape: $@");
|
||||
}
|
||||
}
|
||||
else {
|
||||
$re = $Unsafe{RFC3986};
|
||||
}
|
||||
$text =~ s/($re)/$escapes{$1} || _fail_hi($1)/ge;
|
||||
$text;
|
||||
}
|
||||
|
||||
sub _fail_hi {
|
||||
my $chr = shift;
|
||||
Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr));
|
||||
}
|
||||
|
||||
sub uri_escape_utf8 {
|
||||
my $text = shift;
|
||||
return undef unless defined $text;
|
||||
utf8::encode($text);
|
||||
return uri_escape($text, @_);
|
||||
}
|
||||
|
||||
sub uri_unescape {
|
||||
# Note from RFC1630: "Sequences which start with a percent sign
|
||||
# but are not followed by two hexadecimal characters are reserved
|
||||
# for future extension"
|
||||
my $str = shift;
|
||||
if (@_ && wantarray) {
|
||||
# not executed for the common case of a single argument
|
||||
my @str = ($str, @_); # need to copy
|
||||
for (@str) {
|
||||
s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
|
||||
}
|
||||
return @str;
|
||||
}
|
||||
$str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
|
||||
$str;
|
||||
}
|
||||
|
||||
# XXX FIXME escape_char is buggy as it assigns meaning to the string's storage format.
|
||||
sub escape_char {
|
||||
# Old versions of utf8::is_utf8() didn't properly handle magical vars (e.g. $1).
|
||||
# The following forces a fetch to occur beforehand.
|
||||
my $dummy = substr($_[0], 0, 0);
|
||||
|
||||
if (utf8::is_utf8($_[0])) {
|
||||
my $s = shift;
|
||||
utf8::encode($s);
|
||||
unshift(@_, $s);
|
||||
}
|
||||
|
||||
return join '', @URI::Escape::escapes{split //, $_[0]};
|
||||
}
|
||||
|
||||
1;
|
Reference in New Issue
Block a user