Add Pod::Markdown and EPICS::PodMD Perl modules

Co-authored-by: Timo Korhonen <timo.korhonen@ess.eu>
2024-08-17 15:01:44 -05:00
parent 60f6ba7b49
commit 4f6aa7ec29
4 changed files with 2011 additions and 0 deletions
--- a/src/tools/EPICS/PodMD.pm
+++ b/src/tools/EPICS/PodMD.pm
@ -0,0 +1,61 @@
+# SPDX-License-Identifier: EPICS
+
+package EPICS::PodMD;
+
+use strict;
+use warnings;
+
+use base 'Pod::Markdown';
+
+# Translate L<link text|filename/Section name>
+# into <a href="filename.md#Section-name">link text</a>#
+# This is for Sphinx processing on Readthedocs. Sphinx converts
+# links to .md into .html automatically in the processing.
+
+sub format_perldoc_url {
+  my ($self, $name, $section) = @_;
+
+  my $url_prefix = $self->perldoc_url_prefix;
+  if (
+    defined($name)
+    && $self->is_local_module($name)
+    && defined($self->local_module_url_prefix)
+  ) {
+    $url_prefix = $self->local_module_url_prefix;
+  }
+
+  my $url = '';
+
+  # If the link is to another module (external link).
+  if (defined($name)) {
+    $name .= ".md" unless $name =~ m/\.md$/; 
+    $url = $url_prefix .
+      ($self->escape_url ? URI::Escape::uri_escape($name) : $name);
+  }
+
+  # See https://rt.cpan.org/Ticket/Display.html?id=57776
+  # for a discussion on the need to mangle the section.
+  if ($section){
+
+    my $method = $url
+      # If we already have a prefix on the url it's external.
+      ? $self->perldoc_fragment_format
+      # Else an internal link points to this markdown doc.
+      : $self->markdown_fragment_format;
+
+    $method = 'format_fragment_' . $method
+      unless ref($method);
+
+    {
+      # Set topic to enable code refs to be simple.
+      local $_ = $section;
+      $section = $self->$method($section);
+    }
+
+    $url .= '#' . $section;
+  }
+
+  return $url;
+}
+
+1;
--- a/src/tools/Makefile
+++ b/src/tools/Makefile
@ -17,7 +17,10 @@ PERL_MODULES += EPICS/Release.pm
 PERL_MODULES += EPICS/Readfile.pm
 PERL_MODULES += EPICS/Getopts.pm
 PERL_MODULES += EPICS/PodHtml.pm
+PERL_MODULES += EPICS/PodMD.pm
 PERL_MODULES += EPICS/PodXHtml.pm
+PERL_MODULES += Pod/Markdown.pm
+PERL_MODULES += URI/Escape.pm

 # This goes into lib/perl, not bin/<host>
 PERL_MODULES += EpicsHostArch.pl
--- a/src/tools/Pod/Markdown.pm
+++ b/src/tools/Pod/Markdown.pm
--- a/src/tools/URI/Escape.pm
+++ b/src/tools/URI/Escape.pm
@ -0,0 +1,249 @@
+package URI::Escape;
+
+use strict;
+use warnings;
+
+=head1 NAME
+
+URI::Escape - Percent-encode and percent-decode unsafe characters
+
+=head1 SYNOPSIS
+
+ use URI::Escape;
+ $safe = uri_escape("10% is enough\n");
+ $verysafe = uri_escape("foo", "\0-\377");
+ $str  = uri_unescape($safe);
+
+=head1 DESCRIPTION
+
+This module provides functions to percent-encode and percent-decode URI strings as
+defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping".
+This is the terminology used by this module, which predates the formalization of the
+terms by the RFC by several years.
+
+A URI consists of a restricted set of characters.  The restricted set
+of characters consists of digits, letters, and a few graphic symbols
+chosen from those common to most of the character encodings and input
+facilities available to Internet users.  They are made up of the
+"unreserved" and "reserved" character sets as defined in RFC 3986.
+
+   unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+   reserved      = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+                   "!" / "$" / "&" / "'" / "(" / ")"
+                 / "*" / "+" / "," / ";" / "="
+
+In addition, any byte (octet) can be represented in a URI by an escape
+sequence: a triplet consisting of the character "%" followed by two
+hexadecimal digits.  A byte can also be represented directly by a
+character, using the US-ASCII character for that octet.
+
+Some of the characters are I<reserved> for use as delimiters or as
+part of certain URI components.  These must be escaped if they are to
+be treated as ordinary data.  Read RFC 3986 for further details.
+
+The functions provided (and exported by default) from this module are:
+
+=over 4
+
+=item uri_escape( $string )
+
+=item uri_escape( $string, $unsafe )
+
+Replaces each unsafe character in the $string with the corresponding
+escape sequence and returns the result.  The $string argument should
+be a string of bytes.  The uri_escape() function will croak if given a
+characters with code above 255.  Use uri_escape_utf8() if you know you
+have such chars or/and want chars in the 128 .. 255 range treated as
+UTF-8.
+
+The uri_escape() function takes an optional second argument that
+overrides the set of characters that are to be escaped.  The set is
+specified as a string that can be used in a regular expression
+character class (between [ ]).  E.g.:
+
+  "\x00-\x1f\x7f-\xff"          # all control and hi-bit characters
+  "a-z"                         # all lower case characters
+  "^A-Za-z"                     # everything not a letter
+
+The default set of characters to be escaped is all those which are
+I<not> part of the C<unreserved> character class shown above as well
+as the reserved characters.  I.e. the default is:
+
+    "^A-Za-z0-9\-\._~"
+
+The second argument can also be specified as a regular expression object:
+
+  qr/[^A-Za-z]/
+
+Any strings matched by this regular expression will have all of their
+characters escaped.
+
+=item uri_escape_utf8( $string )
+
+=item uri_escape_utf8( $string, $unsafe )
+
+Works like uri_escape(), but will encode chars as UTF-8 before
+escaping them.  This makes this function able to deal with characters
+with code above 255 in $string.  Note that chars in the 128 .. 255
+range will be escaped differently by this function compared to what
+uri_escape() would.  For chars in the 0 .. 127 range there is no
+difference.
+
+Equivalent to:
+
+    utf8::encode($string);
+    my $uri = uri_escape($string);
+
+Note: JavaScript has a function called escape() that produces the
+sequence "%uXXXX" for chars in the 256 .. 65535 range.  This function
+has really nothing to do with URI escaping but some folks got confused
+since it "does the right thing" in the 0 .. 255 range.  Because of
+this you sometimes see "URIs" with these kind of escapes.  The
+JavaScript encodeURIComponent() function is similar to uri_escape_utf8().
+
+=item uri_unescape($string,...)
+
+Returns a string with each %XX sequence replaced with the actual byte
+(octet).
+
+This does the same as:
+
+   $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
+
+but does not modify the string in-place as this RE would.  Using the
+uri_unescape() function instead of the RE might make the code look
+cleaner and is a few characters less to type.
+
+In a simple benchmark test I did,
+calling the function (instead of the inline RE above) if a few chars
+were unescaped was something like 40% slower, and something like 700% slower if none were.  If
+you are going to unescape a lot of times it might be a good idea to
+inline the RE.
+
+If the uri_unescape() function is passed multiple strings, then each
+one is returned unescaped.
+
+=back
+
+The module can also export the C<%escapes> hash, which contains the
+mapping from all 256 bytes to the corresponding escape codes.  Lookup
+in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))>
+each time.
+
+=head1 SEE ALSO
+
+L<URI>
+
+
+=head1 COPYRIGHT
+
+Copyright 1995-2004 Gisle Aas.
+
+This program is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+SPDX-License-Identifier: Artistic-2.0
+=cut
+
+use Exporter 5.57 'import';
+our %escapes;
+our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8);
+our @EXPORT_OK = qw(%escapes);
+our $VERSION = '5.28';
+
+use Carp ();
+
+# Build a char->hex map
+for (0..255) {
+    $escapes{chr($_)} = sprintf("%%%02X", $_);
+}
+
+my %subst;  # compiled patterns
+
+my %Unsafe = (
+    RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/,
+    RFC3986 => qr/[^A-Za-z0-9\-\._~]/,
+);
+
+sub uri_escape {
+    my($text, $patn) = @_;
+    return undef unless defined $text;
+    my $re;
+    if (defined $patn){
+        if (ref $patn eq 'Regexp') {
+            $text =~ s{($patn)}{
+                join('', map +($escapes{$_} || _fail_hi($_)), split //, "$1")
+            }ge;
+            return $text;
+        }
+        $re = $subst{$patn};
+        if (!defined $re) {
+            $re = $patn;
+            # we need to escape the [] characters, except for those used in
+            # posix classes. if they are prefixed by a backslash, allow them
+            # through unmodified.
+            $re =~ s{(\[:\w+:\])|(\\)?([\[\]]|\\\z)}{
+                defined $1 ? $1 : defined $2 ? "$2$3" : "\\$3"
+            }ge;
+            eval {
+                # disable the warnings here, since they will trigger later
+                # when used, and we only want them to appear once per call,
+                # but every time the same pattern is used.
+                no warnings 'regexp';
+                $re = $subst{$patn} = qr{[$re]};
+                1;
+            } or Carp::croak("uri_escape: $@");
+        }
+    }
+    else {
+        $re = $Unsafe{RFC3986};
+    }
+    $text =~ s/($re)/$escapes{$1} || _fail_hi($1)/ge;
+    $text;
+}
+
+sub _fail_hi {
+    my $chr = shift;
+    Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr));
+}
+
+sub uri_escape_utf8 {
+    my $text = shift;
+    return undef unless defined $text;
+    utf8::encode($text);
+    return uri_escape($text, @_);
+}
+
+sub uri_unescape {
+    # Note from RFC1630:  "Sequences which start with a percent sign
+    # but are not followed by two hexadecimal characters are reserved
+    # for future extension"
+    my $str = shift;
+    if (@_ && wantarray) {
+        # not executed for the common case of a single argument
+        my @str = ($str, @_);  # need to copy
+        for (@str) {
+            s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
+        }
+        return @str;
+    }
+    $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
+    $str;
+}
+
+# XXX FIXME escape_char is buggy as it assigns meaning to the string's storage format.
+sub escape_char {
+    # Old versions of utf8::is_utf8() didn't properly handle magical vars (e.g. $1).
+    # The following forces a fetch to occur beforehand.
+    my $dummy = substr($_[0], 0, 0);
+
+    if (utf8::is_utf8($_[0])) {
+        my $s = shift;
+        utf8::encode($s);
+        unshift(@_, $s);
+    }
+
+    return join '', @URI::Escape::escapes{split //, $_[0]};
+}
+
+1;