Deploy site

This commit is contained in:
Gitea Actions
2025-06-09 03:00:56 +02:00
commit a96b026468
2329 changed files with 367195 additions and 0 deletions

View File

@ -0,0 +1,81 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
from .__version__ import __author__, __copyright__, __email__, __license__, __version__
from ._base import AbstractSanitizer, AbstractValidator
from ._common import (
ascii_symbols,
normalize_platform,
replace_ansi_escape,
replace_unprintable_char,
unprintable_ascii_chars,
validate_pathtype,
validate_unprintable_char,
)
from ._const import Platform
from ._filename import (
FileNameSanitizer,
FileNameValidator,
is_valid_filename,
sanitize_filename,
validate_filename,
)
from ._filepath import (
FilePathSanitizer,
FilePathValidator,
is_valid_filepath,
sanitize_filepath,
validate_filepath,
)
from ._ltsv import sanitize_ltsv_label, validate_ltsv_label
from ._symbol import replace_symbol, validate_symbol
from .error import (
ErrorReason,
InvalidCharError,
InvalidReservedNameError,
NullNameError,
ReservedNameError,
ValidationError,
ValidReservedNameError,
)
__all__ = (
"__author__",
"__copyright__",
"__email__",
"__license__",
"__version__",
"AbstractSanitizer",
"AbstractValidator",
"Platform",
"ascii_symbols",
"normalize_platform",
"replace_ansi_escape",
"replace_unprintable_char",
"unprintable_ascii_chars",
"validate_pathtype",
"validate_unprintable_char",
"FileNameSanitizer",
"FileNameValidator",
"is_valid_filename",
"sanitize_filename",
"validate_filename",
"FilePathSanitizer",
"FilePathValidator",
"is_valid_filepath",
"sanitize_filepath",
"validate_filepath",
"sanitize_ltsv_label",
"validate_ltsv_label",
"replace_symbol",
"validate_symbol",
"ErrorReason",
"InvalidCharError",
"InvalidReservedNameError",
"NullNameError",
"ReservedNameError",
"ValidationError",
"ValidReservedNameError",
)

View File

@ -0,0 +1,9 @@
from typing import Final
__author__: Final = "Tsuyoshi Hombashi"
__copyright__: Final = f"Copyright 2016-2025, {__author__}"
__license__: Final = "MIT License"
__version__ = "3.2.3"
__maintainer__: Final = __author__
__email__: Final = "tsuyoshi.hombashi@gmail.com"

View File

@ -0,0 +1,252 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import abc
import os
import re
import sys
from collections.abc import Sequence
from typing import Final, Optional
from ._common import normalize_platform, unprintable_ascii_chars
from ._const import DEFAULT_MIN_LEN, Platform
from ._types import PathType, PlatformType
from .error import ReservedNameError, ValidationError
from .handler import NullValueHandler, ReservedNameHandler, ValidationErrorHandler
class BaseFile:
_INVALID_PATH_CHARS: Final[str] = "".join(unprintable_ascii_chars)
_INVALID_FILENAME_CHARS: Final[str] = _INVALID_PATH_CHARS + "/"
_INVALID_WIN_PATH_CHARS: Final[str] = _INVALID_PATH_CHARS + ':*?"<>|\t\n\r\x0b\x0c'
_INVALID_WIN_FILENAME_CHARS: Final[str] = (
_INVALID_FILENAME_CHARS + _INVALID_WIN_PATH_CHARS + "\\"
)
@property
def platform(self) -> Platform:
return self.__platform
@property
def reserved_keywords(self) -> tuple[str, ...]:
return self._additional_reserved_names
@property
def max_len(self) -> int:
return self._max_len
def __init__(
self,
max_len: int,
fs_encoding: Optional[str],
additional_reserved_names: Optional[Sequence[str]] = None,
platform_max_len: Optional[int] = None,
platform: Optional[PlatformType] = None,
) -> None:
if additional_reserved_names is None:
additional_reserved_names = tuple()
self._additional_reserved_names = tuple(n.upper() for n in additional_reserved_names)
self.__platform = normalize_platform(platform)
if platform_max_len is None:
platform_max_len = self._get_default_max_path_len()
if max_len <= 0:
self._max_len = platform_max_len
else:
self._max_len = max_len
self._max_len = min(self._max_len, platform_max_len)
if fs_encoding:
self._fs_encoding = fs_encoding
else:
self._fs_encoding = sys.getfilesystemencoding()
def _is_posix(self) -> bool:
return self.platform == Platform.POSIX
def _is_universal(self) -> bool:
return self.platform == Platform.UNIVERSAL
def _is_linux(self, include_universal: bool = False) -> bool:
if include_universal:
return self.platform in (Platform.UNIVERSAL, Platform.LINUX)
return self.platform == Platform.LINUX
def _is_windows(self, include_universal: bool = False) -> bool:
if include_universal:
return self.platform in (Platform.UNIVERSAL, Platform.WINDOWS)
return self.platform == Platform.WINDOWS
def _is_macos(self, include_universal: bool = False) -> bool:
if include_universal:
return self.platform in (Platform.UNIVERSAL, Platform.MACOS)
return self.platform == Platform.MACOS
def _get_default_max_path_len(self) -> int:
if self._is_linux():
return 4096
if self._is_windows():
return 260
if self._is_posix() or self._is_macos():
return 1024
return 260 # universal
class AbstractValidator(BaseFile, metaclass=abc.ABCMeta):
def __init__(
self,
max_len: int,
fs_encoding: Optional[str],
check_reserved: bool,
additional_reserved_names: Optional[Sequence[str]] = None,
platform_max_len: Optional[int] = None,
platform: Optional[PlatformType] = None,
) -> None:
self._check_reserved = check_reserved
super().__init__(
max_len,
fs_encoding,
additional_reserved_names=additional_reserved_names,
platform_max_len=platform_max_len,
platform=platform,
)
@property
@abc.abstractmethod
def min_len(self) -> int: # pragma: no cover
pass
@abc.abstractmethod
def validate(self, value: PathType) -> None: # pragma: no cover
pass
def is_valid(self, value: PathType) -> bool:
try:
self.validate(value)
except (TypeError, ValidationError):
return False
return True
def _is_reserved_keyword(self, value: str) -> bool:
return value.upper() in self.reserved_keywords
class AbstractSanitizer(BaseFile, metaclass=abc.ABCMeta):
def __init__(
self,
validator: AbstractValidator,
max_len: int,
fs_encoding: Optional[str],
validate_after_sanitize: bool,
null_value_handler: Optional[ValidationErrorHandler] = None,
reserved_name_handler: Optional[ValidationErrorHandler] = None,
additional_reserved_names: Optional[Sequence[str]] = None,
platform_max_len: Optional[int] = None,
platform: Optional[PlatformType] = None,
) -> None:
super().__init__(
max_len=max_len,
fs_encoding=fs_encoding,
additional_reserved_names=additional_reserved_names,
platform_max_len=platform_max_len,
platform=platform,
)
if null_value_handler is None:
null_value_handler = NullValueHandler.return_null_string
self._null_value_handler = null_value_handler
if reserved_name_handler is None:
reserved_name_handler = ReservedNameHandler.add_trailing_underscore
self._reserved_name_handler = reserved_name_handler
self._validate_after_sanitize = validate_after_sanitize
self._validator = validator
@abc.abstractmethod
def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: # pragma: no cover
pass
class BaseValidator(AbstractValidator):
__RE_ROOT_NAME: Final = re.compile(r"([^\.]+)")
__RE_REPEAD_DOT: Final = re.compile(r"^\.{3,}")
@property
def min_len(self) -> int:
return self._min_len
def __init__(
self,
min_len: int,
max_len: int,
fs_encoding: Optional[str],
check_reserved: bool,
additional_reserved_names: Optional[Sequence[str]] = None,
platform_max_len: Optional[int] = None,
platform: Optional[PlatformType] = None,
) -> None:
if min_len <= 0:
min_len = DEFAULT_MIN_LEN
self._min_len = max(min_len, 1)
super().__init__(
max_len=max_len,
fs_encoding=fs_encoding,
check_reserved=check_reserved,
additional_reserved_names=additional_reserved_names,
platform_max_len=platform_max_len,
platform=platform,
)
self._validate_max_len()
def _validate_reserved_keywords(self, name: str) -> None:
if not self._check_reserved:
return
root_name = self.__extract_root_name(name)
base_name = os.path.basename(name)
for name in (root_name, base_name):
if self._is_reserved_keyword(name):
raise ReservedNameError(
f"'{root_name}' is a reserved name",
reusable_name=False,
reserved_name=root_name,
platform=self.platform,
)
def _validate_max_len(self) -> None:
if self.max_len < 1:
raise ValueError("max_len must be greater or equal to one")
if self.min_len > self.max_len:
raise ValueError("min_len must be lower than max_len")
@classmethod
def __extract_root_name(cls, path: str) -> str:
if path in (".", ".."):
return path
if cls.__RE_REPEAD_DOT.search(path):
return path
match = cls.__RE_ROOT_NAME.match(os.path.basename(path))
if match is None:
return ""
return match.group(1)

View File

@ -0,0 +1,162 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import ntpath
import platform
import re
import string
import sys
from pathlib import PurePath
from typing import Any, Final, Optional
from ._const import Platform
from ._types import PathType, PlatformType
_re_whitespaces: Final = re.compile(r"^[\s]+$")
def validate_pathtype(
text: PathType, allow_whitespaces: bool = False, error_msg: Optional[str] = None
) -> None:
from .error import ErrorReason, ValidationError
if _is_not_null_string(text) or isinstance(text, PurePath):
return
if allow_whitespaces and _re_whitespaces.search(str(text)):
return
if is_null_string(text):
raise ValidationError(reason=ErrorReason.NULL_NAME)
raise TypeError(f"text must be a string: actual={type(text)}")
def to_str(name: PathType) -> str:
if isinstance(name, PurePath):
return str(name)
return name
def is_nt_abspath(value: str) -> bool:
ver_info = sys.version_info[:2]
if ver_info <= (3, 10):
if value.startswith("\\\\"):
return True
elif ver_info >= (3, 13):
return ntpath.isabs(value)
drive, _tail = ntpath.splitdrive(value)
return ntpath.isabs(value) and len(drive) > 0
def is_null_string(value: Any) -> bool:
if value is None:
return True
try:
return len(value.strip()) == 0
except AttributeError:
return False
def _is_not_null_string(value: Any) -> bool:
try:
return len(value.strip()) > 0
except AttributeError:
return False
def _get_unprintable_ascii_chars() -> list[str]:
return [chr(c) for c in range(128) if chr(c) not in string.printable]
unprintable_ascii_chars: Final = tuple(_get_unprintable_ascii_chars())
def _get_ascii_symbols() -> list[str]:
symbol_list: list[str] = []
for i in range(128):
c = chr(i)
if c in unprintable_ascii_chars or c in string.digits + string.ascii_letters:
continue
symbol_list.append(c)
return symbol_list
ascii_symbols: Final = tuple(_get_ascii_symbols())
__RE_UNPRINTABLE_CHARS: Final = re.compile(
"[{}]".format(re.escape("".join(unprintable_ascii_chars))), re.UNICODE
)
__RE_ANSI_ESCAPE: Final = re.compile(
r"(?:\x1B[@-Z\\-_]|[\x80-\x9A\x9C-\x9F]|(?:\x1B\[|\x9B)[0-?]*[ -/]*[@-~])"
)
def validate_unprintable_char(text: str) -> None:
from .error import InvalidCharError
match_list = __RE_UNPRINTABLE_CHARS.findall(to_str(text))
if match_list:
raise InvalidCharError(f"unprintable character found: {match_list}")
def replace_unprintable_char(text: str, replacement_text: str = "") -> str:
try:
return __RE_UNPRINTABLE_CHARS.sub(replacement_text, text)
except (TypeError, AttributeError):
raise TypeError("text must be a string")
def replace_ansi_escape(text: str, replacement_text: str = "") -> str:
try:
return __RE_ANSI_ESCAPE.sub(replacement_text, text)
except (TypeError, AttributeError):
raise TypeError("text must be a string")
def normalize_platform(name: Optional[PlatformType]) -> Platform:
if isinstance(name, Platform):
return name
if not name:
return Platform.UNIVERSAL
platform_str = name.strip().casefold()
if platform_str == "posix":
return Platform.POSIX
if platform_str == "auto":
platform_str = platform.system().casefold()
if platform_str in ["linux"]:
return Platform.LINUX
if platform_str and platform_str.startswith("win"):
return Platform.WINDOWS
if platform_str in ["mac", "macos", "darwin"]:
return Platform.MACOS
return Platform.UNIVERSAL
def findall_to_str(match: list[Any]) -> str:
return ", ".join([repr(text) for text in match])
def truncate_str(text: str, encoding: str, max_bytes: int) -> str:
str_bytes = text.encode(encoding)
str_bytes = str_bytes[:max_bytes]
# last char might be malformed, ignore it
return str_bytes.decode(encoding, "ignore")

View File

@ -0,0 +1,41 @@
import enum
from typing import Final
DEFAULT_MIN_LEN: Final = 1
INVALID_CHAR_ERR_MSG_TMPL: Final = "invalids=({invalid}), value={value}"
_NTFS_RESERVED_FILE_NAMES: Final = (
"$Mft",
"$MftMirr",
"$LogFile",
"$Volume",
"$AttrDef",
"$Bitmap",
"$Boot",
"$BadClus",
"$Secure",
"$Upcase",
"$Extend",
"$Quota",
"$ObjId",
"$Reparse",
) # Only in root directory
@enum.unique
class Platform(enum.Enum):
"""
Platform specifier enumeration.
"""
#: POSIX compatible platform.
POSIX = "POSIX"
#: platform independent. note that absolute paths cannot specify this.
UNIVERSAL = "universal"
LINUX = "Linux"
WINDOWS = "Windows"
MACOS = "macOS"

View File

@ -0,0 +1,475 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import itertools
import posixpath
import re
import warnings
from collections.abc import Sequence
from pathlib import Path, PurePath
from re import Pattern
from typing import Final, Optional
from ._base import AbstractSanitizer, AbstractValidator, BaseFile, BaseValidator
from ._common import findall_to_str, is_nt_abspath, to_str, truncate_str, validate_pathtype
from ._const import DEFAULT_MIN_LEN, INVALID_CHAR_ERR_MSG_TMPL, Platform
from ._types import PathType, PlatformType
from .error import ErrorAttrKey, ErrorReason, InvalidCharError, ValidationError
from .handler import ReservedNameHandler, ValidationErrorHandler
_DEFAULT_MAX_FILENAME_LEN: Final = 255
_RE_INVALID_FILENAME: Final = re.compile(
f"[{re.escape(BaseFile._INVALID_FILENAME_CHARS):s}]", re.UNICODE
)
_RE_INVALID_WIN_FILENAME: Final = re.compile(
f"[{re.escape(BaseFile._INVALID_WIN_FILENAME_CHARS):s}]", re.UNICODE
)
class FileNameSanitizer(AbstractSanitizer):
def __init__(
self,
max_len: int = _DEFAULT_MAX_FILENAME_LEN,
fs_encoding: Optional[str] = None,
platform: Optional[PlatformType] = None,
null_value_handler: Optional[ValidationErrorHandler] = None,
reserved_name_handler: Optional[ValidationErrorHandler] = None,
additional_reserved_names: Optional[Sequence[str]] = None,
validate_after_sanitize: bool = False,
validator: Optional[AbstractValidator] = None,
) -> None:
if validator:
fname_validator = validator
else:
fname_validator = FileNameValidator(
min_len=DEFAULT_MIN_LEN,
max_len=max_len,
fs_encoding=fs_encoding,
check_reserved=True,
additional_reserved_names=additional_reserved_names,
platform=platform,
)
super().__init__(
max_len=max_len,
fs_encoding=fs_encoding,
null_value_handler=null_value_handler,
reserved_name_handler=reserved_name_handler,
additional_reserved_names=additional_reserved_names,
platform=platform,
validate_after_sanitize=validate_after_sanitize,
validator=fname_validator,
)
self._sanitize_regexp = self._get_sanitize_regexp()
def sanitize(self, value: PathType, replacement_text: str = "") -> PathType:
try:
validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True))
except ValidationError as e:
if e.reason == ErrorReason.NULL_NAME:
if isinstance(value, PurePath):
raise
return self._null_value_handler(e) # type: ignore
raise
sanitized_filename = self._sanitize_regexp.sub(replacement_text, str(value))
sanitized_filename = truncate_str(sanitized_filename, self._fs_encoding, self.max_len)
try:
self._validator.validate(sanitized_filename)
except ValidationError as e:
if e.reason == ErrorReason.RESERVED_NAME:
replacement_word = self._reserved_name_handler(e)
if e.reserved_name != replacement_word:
sanitized_filename = re.sub(
re.escape(e.reserved_name), replacement_word, sanitized_filename
)
elif e.reason == ErrorReason.INVALID_CHARACTER and self._is_windows(
include_universal=True
):
# Do not start a file or directory name with a space
sanitized_filename = sanitized_filename.lstrip(" ")
# Do not end a file or directory name with a space or a period
sanitized_filename = sanitized_filename.rstrip(" ")
if sanitized_filename not in (".", ".."):
sanitized_filename = sanitized_filename.rstrip(" .")
elif e.reason == ErrorReason.NULL_NAME:
sanitized_filename = self._null_value_handler(e)
if self._validate_after_sanitize:
try:
self._validator.validate(sanitized_filename)
except ValidationError as e:
raise ValidationError(
description=str(e),
reason=ErrorReason.INVALID_AFTER_SANITIZE,
platform=self.platform,
)
if isinstance(value, PurePath):
return Path(sanitized_filename) # type: ignore
return sanitized_filename # type: ignore
def _get_sanitize_regexp(self) -> Pattern[str]:
if self._is_windows(include_universal=True):
return _RE_INVALID_WIN_FILENAME
return _RE_INVALID_FILENAME
class FileNameValidator(BaseValidator):
_WINDOWS_RESERVED_FILE_NAMES: Final = (
("CON", "PRN", "AUX", "CLOCK$", "NUL")
+ tuple(f"{name:s}{num:d}" for name, num in itertools.product(("COM", "LPT"), range(0, 10)))
+ tuple(
f"{name:s}{ssd:s}"
for name, ssd in itertools.product(
("COM", "LPT"),
("\N{SUPERSCRIPT ONE}", "\N{SUPERSCRIPT TWO}", "\N{SUPERSCRIPT THREE}"),
)
)
)
_MACOS_RESERVED_FILE_NAMES: Final = (":",)
@property
def reserved_keywords(self) -> tuple[str, ...]:
common_keywords = super().reserved_keywords
if self._is_universal():
word_set = set(
common_keywords
+ self._WINDOWS_RESERVED_FILE_NAMES
+ self._MACOS_RESERVED_FILE_NAMES
)
elif self._is_windows():
word_set = set(common_keywords + self._WINDOWS_RESERVED_FILE_NAMES)
elif self._is_posix() or self._is_macos():
word_set = set(common_keywords + self._MACOS_RESERVED_FILE_NAMES)
else:
word_set = set(common_keywords)
return tuple(sorted(word_set))
def __init__(
self,
min_len: int = DEFAULT_MIN_LEN,
max_len: int = _DEFAULT_MAX_FILENAME_LEN,
fs_encoding: Optional[str] = None,
platform: Optional[PlatformType] = None,
check_reserved: bool = True,
additional_reserved_names: Optional[Sequence[str]] = None,
) -> None:
super().__init__(
min_len=min_len,
max_len=max_len,
fs_encoding=fs_encoding,
check_reserved=check_reserved,
additional_reserved_names=additional_reserved_names,
platform=platform,
)
def validate(self, value: PathType) -> None:
validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True))
unicode_filename = to_str(value)
byte_ct = len(unicode_filename.encode(self._fs_encoding))
self.validate_abspath(unicode_filename)
err_kwargs = {
ErrorAttrKey.REASON: ErrorReason.INVALID_LENGTH,
ErrorAttrKey.PLATFORM: self.platform,
ErrorAttrKey.FS_ENCODING: self._fs_encoding,
ErrorAttrKey.BYTE_COUNT: byte_ct,
}
if byte_ct > self.max_len:
raise ValidationError(
[
f"filename is too long: expected<={self.max_len:d} bytes, actual={byte_ct:d} bytes"
],
**err_kwargs,
)
if byte_ct < self.min_len:
raise ValidationError(
[
f"filename is too short: expected>={self.min_len:d} bytes, actual={byte_ct:d} bytes"
],
**err_kwargs,
)
self._validate_reserved_keywords(unicode_filename)
self.__validate_universal_filename(unicode_filename)
if self._is_windows(include_universal=True):
self.__validate_win_filename(unicode_filename)
def validate_abspath(self, value: str) -> None:
err = ValidationError(
description=f"found an absolute path ({value}), expected a filename",
platform=self.platform,
reason=ErrorReason.FOUND_ABS_PATH,
)
if self._is_windows(include_universal=True):
if is_nt_abspath(value):
raise err
if posixpath.isabs(value):
raise err
def __validate_universal_filename(self, unicode_filename: str) -> None:
match = _RE_INVALID_FILENAME.findall(unicode_filename)
if match:
raise InvalidCharError(
INVALID_CHAR_ERR_MSG_TMPL.format(
invalid=findall_to_str(match), value=repr(unicode_filename)
),
platform=Platform.UNIVERSAL,
)
def __validate_win_filename(self, unicode_filename: str) -> None:
match = _RE_INVALID_WIN_FILENAME.findall(unicode_filename)
if match:
raise InvalidCharError(
INVALID_CHAR_ERR_MSG_TMPL.format(
invalid=findall_to_str(match), value=repr(unicode_filename)
),
platform=Platform.WINDOWS,
)
if unicode_filename in (".", ".."):
return
KB2829981_err_tmpl = "{}. Refer: https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/file-folder-name-whitespace-characters" # noqa: E501
if unicode_filename[-1] in (" ", "."):
raise InvalidCharError(
INVALID_CHAR_ERR_MSG_TMPL.format(
invalid=re.escape(unicode_filename[-1]), value=repr(unicode_filename)
),
platform=Platform.WINDOWS,
description=KB2829981_err_tmpl.format(
"Do not end a file or directory name with a space or a period"
),
)
if unicode_filename[0] in (" "):
raise InvalidCharError(
INVALID_CHAR_ERR_MSG_TMPL.format(
invalid=re.escape(unicode_filename[0]), value=repr(unicode_filename)
),
platform=Platform.WINDOWS,
description=KB2829981_err_tmpl.format(
"Do not start a file or directory name with a space"
),
)
def validate_filename(
filename: PathType,
platform: Optional[PlatformType] = None,
min_len: int = DEFAULT_MIN_LEN,
max_len: int = _DEFAULT_MAX_FILENAME_LEN,
fs_encoding: Optional[str] = None,
check_reserved: bool = True,
additional_reserved_names: Optional[Sequence[str]] = None,
) -> None:
"""Verifying whether the ``filename`` is a valid file name or not.
Args:
filename:
Filename to validate.
platform:
Target platform name of the filename.
.. include:: platform.txt
min_len:
Minimum byte length of the ``filename``. The value must be greater or equal to one.
Defaults to ``1``.
max_len:
Maximum byte length of the ``filename``. The value must be lower than:
- ``Linux``: 4096
- ``macOS``: 1024
- ``Windows``: 260
- ``universal``: 260
Defaults to ``255``.
fs_encoding:
Filesystem encoding that is used to calculate the byte length of the filename.
If |None|, get the encoding from the execution environment.
check_reserved:
If |True|, check the reserved names of the ``platform``.
additional_reserved_names:
Additional reserved names to check.
Case insensitive.
Raises:
ValidationError (ErrorReason.INVALID_LENGTH):
If the ``filename`` is longer than ``max_len`` characters.
ValidationError (ErrorReason.INVALID_CHARACTER):
If the ``filename`` includes invalid character(s) for a filename:
|invalid_filename_chars|.
The following characters are also invalid for Windows platforms:
|invalid_win_filename_chars|.
ValidationError (ErrorReason.RESERVED_NAME):
If the ``filename`` equals the reserved name by OS.
Windows reserved name is as follows:
``"CON"``, ``"PRN"``, ``"AUX"``, ``"NUL"``, ``"COM[1-9]"``, ``"LPT[1-9]"``.
Example:
:ref:`example-validate-filename`
See Also:
`Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs
<https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file>`__
"""
FileNameValidator(
platform=platform,
min_len=min_len,
max_len=max_len,
fs_encoding=fs_encoding,
check_reserved=check_reserved,
additional_reserved_names=additional_reserved_names,
).validate(filename)
def is_valid_filename(
filename: PathType,
platform: Optional[PlatformType] = None,
min_len: int = DEFAULT_MIN_LEN,
max_len: Optional[int] = None,
fs_encoding: Optional[str] = None,
check_reserved: bool = True,
additional_reserved_names: Optional[Sequence[str]] = None,
) -> bool:
"""Check whether the ``filename`` is a valid name or not.
Args:
filename:
A filename to be checked.
platform:
Target platform name of the filename.
Example:
:ref:`example-is-valid-filename`
See Also:
:py:func:`.validate_filename()`
"""
return FileNameValidator(
platform=platform,
min_len=min_len,
max_len=-1 if max_len is None else max_len,
fs_encoding=fs_encoding,
check_reserved=check_reserved,
additional_reserved_names=additional_reserved_names,
).is_valid(filename)
def sanitize_filename(
filename: PathType,
replacement_text: str = "",
platform: Optional[PlatformType] = None,
max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN,
fs_encoding: Optional[str] = None,
check_reserved: Optional[bool] = None,
null_value_handler: Optional[ValidationErrorHandler] = None,
reserved_name_handler: Optional[ValidationErrorHandler] = None,
additional_reserved_names: Optional[Sequence[str]] = None,
validate_after_sanitize: bool = False,
) -> PathType:
"""Make a valid filename from a string.
To make a valid filename, the function does the following:
- Replace invalid characters as file names included in the ``filename``
with the ``replacement_text``. Invalid characters are:
- unprintable characters
- |invalid_filename_chars|
- for Windows (or universal) only: |invalid_win_filename_chars|
- Replace a value if a sanitized value is a reserved name by operating systems
with a specified handler by ``reserved_name_handler``.
Args:
filename: Filename to sanitize.
replacement_text:
Replacement text for invalid characters. Defaults to ``""``.
platform:
Target platform name of the filename.
.. include:: platform.txt
max_len:
Maximum byte length of the ``filename``.
Truncate the name length if the ``filename`` length exceeds this value.
Defaults to ``255``.
fs_encoding:
Filesystem encoding that is used to calculate the byte length of the filename.
If |None|, get the encoding from the execution environment.
check_reserved:
[Deprecated] Use 'reserved_name_handler' instead.
null_value_handler:
Function called when a value after sanitization is an empty string.
You can specify predefined handlers:
- :py:func:`~.handler.NullValueHandler.return_null_string`
- :py:func:`~.handler.NullValueHandler.return_timestamp`
- :py:func:`~.handler.raise_error`
Defaults to :py:func:`.handler.NullValueHandler.return_null_string` that just return ``""``.
reserved_name_handler:
Function called when a value after sanitization is a reserved name.
You can specify predefined handlers:
- :py:meth:`~.handler.ReservedNameHandler.add_leading_underscore`
- :py:meth:`~.handler.ReservedNameHandler.add_trailing_underscore`
- :py:meth:`~.handler.ReservedNameHandler.as_is`
- :py:func:`~.handler.raise_error`
Defaults to :py:func:`.handler.add_trailing_underscore`.
additional_reserved_names:
Additional reserved names to sanitize.
Case insensitive.
validate_after_sanitize:
Execute validation after sanitization to the file name.
Returns:
Same type as the ``filename`` (str or PathLike object):
Sanitized filename.
Raises:
ValueError:
If the ``filename`` is an invalid filename.
Example:
:ref:`example-sanitize-filename`
"""
if check_reserved is not None:
warnings.warn(
"'check_reserved' is deprecated. Use 'reserved_name_handler' instead.",
DeprecationWarning,
)
if check_reserved is False:
reserved_name_handler = ReservedNameHandler.as_is
return FileNameSanitizer(
platform=platform,
max_len=-1 if max_len is None else max_len,
fs_encoding=fs_encoding,
null_value_handler=null_value_handler,
reserved_name_handler=reserved_name_handler,
additional_reserved_names=additional_reserved_names,
validate_after_sanitize=validate_after_sanitize,
).sanitize(filename, replacement_text)

View File

@ -0,0 +1,519 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import ntpath
import os.path
import posixpath
import re
import warnings
from collections.abc import Sequence
from pathlib import Path, PurePath
from re import Pattern
from typing import Final, Optional
from ._base import AbstractSanitizer, AbstractValidator, BaseFile, BaseValidator
from ._common import findall_to_str, is_nt_abspath, to_str, validate_pathtype
from ._const import _NTFS_RESERVED_FILE_NAMES, DEFAULT_MIN_LEN, INVALID_CHAR_ERR_MSG_TMPL, Platform
from ._filename import FileNameSanitizer, FileNameValidator
from ._types import PathType, PlatformType
from .error import ErrorAttrKey, ErrorReason, InvalidCharError, ReservedNameError, ValidationError
from .handler import ReservedNameHandler, ValidationErrorHandler
_RE_INVALID_PATH: Final = re.compile(f"[{re.escape(BaseFile._INVALID_PATH_CHARS):s}]", re.UNICODE)
_RE_INVALID_WIN_PATH: Final = re.compile(
f"[{re.escape(BaseFile._INVALID_WIN_PATH_CHARS):s}]", re.UNICODE
)
class FilePathSanitizer(AbstractSanitizer):
def __init__(
self,
max_len: int = -1,
fs_encoding: Optional[str] = None,
platform: Optional[PlatformType] = None,
null_value_handler: Optional[ValidationErrorHandler] = None,
reserved_name_handler: Optional[ValidationErrorHandler] = None,
additional_reserved_names: Optional[Sequence[str]] = None,
normalize: bool = True,
validate_after_sanitize: bool = False,
validator: Optional[AbstractValidator] = None,
) -> None:
if validator:
fpath_validator = validator
else:
fpath_validator = FilePathValidator(
min_len=DEFAULT_MIN_LEN,
max_len=max_len,
fs_encoding=fs_encoding,
check_reserved=True,
additional_reserved_names=additional_reserved_names,
platform=platform,
)
super().__init__(
max_len=max_len,
fs_encoding=fs_encoding,
validator=fpath_validator,
null_value_handler=null_value_handler,
reserved_name_handler=reserved_name_handler,
additional_reserved_names=additional_reserved_names,
platform=platform,
validate_after_sanitize=validate_after_sanitize,
)
self._sanitize_regexp = self._get_sanitize_regexp()
self.__fname_sanitizer = FileNameSanitizer(
max_len=self.max_len,
fs_encoding=fs_encoding,
null_value_handler=null_value_handler,
reserved_name_handler=reserved_name_handler,
additional_reserved_names=additional_reserved_names,
platform=self.platform,
validate_after_sanitize=validate_after_sanitize,
)
self.__normalize = normalize
if self._is_windows(include_universal=True):
self.__split_drive = ntpath.splitdrive
else:
self.__split_drive = posixpath.splitdrive
def sanitize(self, value: PathType, replacement_text: str = "") -> PathType:
try:
validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True))
except ValidationError as e:
if e.reason == ErrorReason.NULL_NAME:
if isinstance(value, PurePath):
raise
return self._null_value_handler(e) # type: ignore
raise
unicode_filepath = to_str(value)
drive, unicode_filepath = self.__split_drive(unicode_filepath)
unicode_filepath = self._sanitize_regexp.sub(replacement_text, unicode_filepath)
if self.__normalize and unicode_filepath:
unicode_filepath = os.path.normpath(unicode_filepath)
sanitized_path = unicode_filepath
sanitized_entries: list[str] = []
if drive:
sanitized_entries.append(drive)
for entry in sanitized_path.replace("\\", "/").split("/"):
if entry in _NTFS_RESERVED_FILE_NAMES:
sanitized_entries.append(f"{entry}_")
continue
sanitized_entry = str(
self.__fname_sanitizer.sanitize(entry, replacement_text=replacement_text)
)
if not sanitized_entry:
if not sanitized_entries:
sanitized_entries.append("")
continue
sanitized_entries.append(sanitized_entry)
sanitized_path = self.__get_path_separator().join(sanitized_entries)
try:
self._validator.validate(sanitized_path)
except ValidationError as e:
if e.reason == ErrorReason.NULL_NAME:
sanitized_path = self._null_value_handler(e)
if self._validate_after_sanitize:
self._validator.validate(sanitized_path)
if isinstance(value, PurePath):
return Path(sanitized_path) # type: ignore
return sanitized_path # type: ignore
def _get_sanitize_regexp(self) -> Pattern[str]:
if self._is_windows(include_universal=True):
return _RE_INVALID_WIN_PATH
return _RE_INVALID_PATH
def __get_path_separator(self) -> str:
if self._is_windows():
return "\\"
return "/"
class FilePathValidator(BaseValidator):
_RE_NTFS_RESERVED: Final = re.compile(
"|".join(f"^/{re.escape(pattern)}$" for pattern in _NTFS_RESERVED_FILE_NAMES),
re.IGNORECASE,
)
_MACOS_RESERVED_FILE_PATHS: Final = ("/", ":")
@property
def reserved_keywords(self) -> tuple[str, ...]:
common_keywords = super().reserved_keywords
if any([self._is_universal(), self._is_posix(), self._is_macos()]):
return common_keywords + self._MACOS_RESERVED_FILE_PATHS
if self._is_linux():
return common_keywords + ("/",)
return common_keywords
def __init__(
self,
min_len: int = DEFAULT_MIN_LEN,
max_len: int = -1,
fs_encoding: Optional[str] = None,
platform: Optional[PlatformType] = None,
check_reserved: bool = True,
additional_reserved_names: Optional[Sequence[str]] = None,
) -> None:
super().__init__(
min_len=min_len,
max_len=max_len,
fs_encoding=fs_encoding,
check_reserved=check_reserved,
additional_reserved_names=additional_reserved_names,
platform=platform,
)
self.__fname_validator = FileNameValidator(
min_len=min_len,
max_len=self.max_len,
fs_encoding=fs_encoding,
check_reserved=check_reserved,
additional_reserved_names=additional_reserved_names,
platform=platform,
)
if self._is_windows(include_universal=True):
self.__split_drive = ntpath.splitdrive
else:
self.__split_drive = posixpath.splitdrive
def validate(self, value: PathType) -> None:
validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True))
self.validate_abspath(value)
_drive, tail = self.__split_drive(value)
if not tail:
return
unicode_filepath = to_str(tail)
byte_ct = len(unicode_filepath.encode(self._fs_encoding))
err_kwargs = {
ErrorAttrKey.REASON: ErrorReason.INVALID_LENGTH,
ErrorAttrKey.PLATFORM: self.platform,
ErrorAttrKey.FS_ENCODING: self._fs_encoding,
ErrorAttrKey.BYTE_COUNT: byte_ct,
}
if byte_ct > self.max_len:
raise ValidationError(
[
f"file path is too long: expected<={self.max_len:d} bytes, actual={byte_ct:d} bytes"
],
**err_kwargs,
)
if byte_ct < self.min_len:
raise ValidationError(
[
"file path is too short: expected>={:d} bytes, actual={:d} bytes".format(
self.min_len, byte_ct
)
],
**err_kwargs,
)
self._validate_reserved_keywords(unicode_filepath)
unicode_filepath = unicode_filepath.replace("\\", "/")
for entry in unicode_filepath.split("/"):
if not entry or entry in (".", ".."):
continue
self.__fname_validator.validate(entry)
if self._is_windows(include_universal=True):
self.__validate_win_filepath(unicode_filepath)
else:
self.__validate_unix_filepath(unicode_filepath)
def validate_abspath(self, value: PathType) -> None:
is_posix_abs = posixpath.isabs(value)
is_nt_abs = is_nt_abspath(to_str(value))
if any([self._is_windows() and is_nt_abs, self._is_posix() and is_posix_abs]):
return
if self._is_universal() and any([is_nt_abs, is_posix_abs]):
ValidationError(
"platform-independent absolute file path is not supported",
platform=self.platform,
reason=ErrorReason.MALFORMED_ABS_PATH,
)
err_object = ValidationError(
description=(
"an invalid absolute file path ({}) for the platform ({}).".format(
value, self.platform.value
)
+ " to avoid the error, specify an appropriate platform corresponding to"
+ " the path format or 'auto'."
),
platform=self.platform,
reason=ErrorReason.MALFORMED_ABS_PATH,
)
if self._is_windows(include_universal=True) and is_posix_abs:
raise err_object
if not self._is_windows():
drive, _tail = ntpath.splitdrive(value)
if drive and is_nt_abs:
raise err_object
def __validate_unix_filepath(self, unicode_filepath: str) -> None:
match = _RE_INVALID_PATH.findall(unicode_filepath)
if match:
raise InvalidCharError(
INVALID_CHAR_ERR_MSG_TMPL.format(
invalid=findall_to_str(match), value=repr(unicode_filepath)
)
)
def __validate_win_filepath(self, unicode_filepath: str) -> None:
match = _RE_INVALID_WIN_PATH.findall(unicode_filepath)
if match:
raise InvalidCharError(
INVALID_CHAR_ERR_MSG_TMPL.format(
invalid=findall_to_str(match), value=repr(unicode_filepath)
),
platform=Platform.WINDOWS,
)
_drive, value = self.__split_drive(unicode_filepath)
if value:
match_reserved = self._RE_NTFS_RESERVED.search(value)
if match_reserved:
reserved_name = match_reserved.group()
raise ReservedNameError(
f"'{reserved_name}' is a reserved name",
reusable_name=False,
reserved_name=reserved_name,
platform=self.platform,
)
def validate_filepath(
file_path: PathType,
platform: Optional[PlatformType] = None,
min_len: int = DEFAULT_MIN_LEN,
max_len: Optional[int] = None,
fs_encoding: Optional[str] = None,
check_reserved: bool = True,
additional_reserved_names: Optional[Sequence[str]] = None,
) -> None:
"""Verifying whether the ``file_path`` is a valid file path or not.
Args:
file_path (PathType):
File path to be validated.
platform (Optional[PlatformType], optional):
Target platform name of the file path.
.. include:: platform.txt
min_len (int, optional):
Minimum byte length of the ``file_path``. The value must be greater or equal to one.
Defaults to ``1``.
max_len (Optional[int], optional):
Maximum byte length of the ``file_path``. If the value is |None| or minus,
automatically determined by the ``platform``:
- ``Linux``: 4096
- ``macOS``: 1024
- ``Windows``: 260
- ``universal``: 260
fs_encoding (Optional[str], optional):
Filesystem encoding that is used to calculate the byte length of the file path.
If |None|, get the encoding from the execution environment.
check_reserved (bool, optional):
If |True|, check the reserved names of the ``platform``.
Defaults to |True|.
additional_reserved_names (Optional[Sequence[str]], optional):
Additional reserved names to check.
Raises:
ValidationError (ErrorReason.INVALID_CHARACTER):
If the ``file_path`` includes invalid char(s):
|invalid_file_path_chars|.
The following characters are also invalid for Windows platforms:
|invalid_win_file_path_chars|
ValidationError (ErrorReason.INVALID_LENGTH):
If the ``file_path`` is longer than ``max_len`` characters.
ValidationError:
If ``file_path`` includes invalid values.
Example:
:ref:`example-validate-file-path`
See Also:
`Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs
<https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file>`__
"""
FilePathValidator(
platform=platform,
min_len=min_len,
max_len=-1 if max_len is None else max_len,
fs_encoding=fs_encoding,
check_reserved=check_reserved,
additional_reserved_names=additional_reserved_names,
).validate(file_path)
def is_valid_filepath(
file_path: PathType,
platform: Optional[PlatformType] = None,
min_len: int = DEFAULT_MIN_LEN,
max_len: Optional[int] = None,
fs_encoding: Optional[str] = None,
check_reserved: bool = True,
additional_reserved_names: Optional[Sequence[str]] = None,
) -> bool:
"""Check whether the ``file_path`` is a valid name or not.
Args:
file_path:
A filepath to be checked.
platform:
Target platform name of the file path.
Example:
:ref:`example-is-valid-filepath`
See Also:
:py:func:`.validate_filepath()`
"""
return FilePathValidator(
platform=platform,
min_len=min_len,
max_len=-1 if max_len is None else max_len,
fs_encoding=fs_encoding,
check_reserved=check_reserved,
additional_reserved_names=additional_reserved_names,
).is_valid(file_path)
def sanitize_filepath(
file_path: PathType,
replacement_text: str = "",
platform: Optional[PlatformType] = None,
max_len: Optional[int] = None,
fs_encoding: Optional[str] = None,
check_reserved: Optional[bool] = None,
null_value_handler: Optional[ValidationErrorHandler] = None,
reserved_name_handler: Optional[ValidationErrorHandler] = None,
additional_reserved_names: Optional[Sequence[str]] = None,
normalize: bool = True,
validate_after_sanitize: bool = False,
) -> PathType:
"""Make a valid file path from a string.
To make a valid file path, the function does the following:
- Replace invalid characters for a file path within the ``file_path``
with the ``replacement_text``. Invalid characters are as follows:
- unprintable characters
- |invalid_file_path_chars|
- for Windows (or universal) only: |invalid_win_file_path_chars|
- Replace a value if a sanitized value is a reserved name by operating systems
with a specified handler by ``reserved_name_handler``.
Args:
file_path:
File path to sanitize.
replacement_text:
Replacement text for invalid characters.
Defaults to ``""``.
platform:
Target platform name of the file path.
.. include:: platform.txt
max_len:
Maximum byte length of the file path.
Truncate the path if the value length exceeds the `max_len`.
If the value is |None| or minus, ``max_len`` will automatically determined by the ``platform``:
- ``Linux``: 4096
- ``macOS``: 1024
- ``Windows``: 260
- ``universal``: 260
fs_encoding:
Filesystem encoding that is used to calculate the byte length of the file path.
If |None|, get the encoding from the execution environment.
check_reserved:
[Deprecated] Use 'reserved_name_handler' instead.
null_value_handler:
Function called when a value after sanitization is an empty string.
You can specify predefined handlers:
- :py:func:`.handler.NullValueHandler.return_null_string`
- :py:func:`.handler.NullValueHandler.return_timestamp`
- :py:func:`.handler.raise_error`
Defaults to :py:func:`.handler.NullValueHandler.return_null_string` that just return ``""``.
reserved_name_handler:
Function called when a value after sanitization is one of the reserved names.
You can specify predefined handlers:
- :py:meth:`~.handler.ReservedNameHandler.add_leading_underscore`
- :py:meth:`~.handler.ReservedNameHandler.add_trailing_underscore`
- :py:meth:`~.handler.ReservedNameHandler.as_is`
- :py:func:`~.handler.raise_error`
Defaults to :py:func:`.handler.add_trailing_underscore`.
additional_reserved_names:
Additional reserved names to sanitize.
Case insensitive.
normalize:
If |True|, normalize the the file path.
validate_after_sanitize:
Execute validation after sanitization to the file path.
Returns:
Same type as the argument (str or PathLike object):
Sanitized filepath.
Raises:
ValueError:
If the ``file_path`` is an invalid file path.
Example:
:ref:`example-sanitize-file-path`
"""
if check_reserved is not None:
warnings.warn(
"'check_reserved' is deprecated. Use 'reserved_name_handler' instead.",
DeprecationWarning,
)
if check_reserved is False:
reserved_name_handler = ReservedNameHandler.as_is
return FilePathSanitizer(
platform=platform,
max_len=-1 if max_len is None else max_len,
fs_encoding=fs_encoding,
normalize=normalize,
null_value_handler=null_value_handler,
reserved_name_handler=reserved_name_handler,
additional_reserved_names=additional_reserved_names,
validate_after_sanitize=validate_after_sanitize,
).sanitize(file_path, replacement_text)

View File

@ -0,0 +1,44 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import re
from typing import Final
from ._common import to_str, validate_pathtype
from .error import InvalidCharError
__RE_INVALID_LTSV_LABEL: Final = re.compile("[^0-9A-Za-z_.-]", re.UNICODE)
def validate_ltsv_label(label: str) -> None:
"""
Verifying whether ``label`` is a valid
`Labeled Tab-separated Values (LTSV) <http://ltsv.org/>`__ label or not.
:param label: Label to validate.
:raises pathvalidate.ValidationError:
If invalid character(s) found in the ``label`` for a LTSV format label.
"""
validate_pathtype(label, allow_whitespaces=False)
match_list = __RE_INVALID_LTSV_LABEL.findall(to_str(label))
if match_list:
raise InvalidCharError(f"invalid character found for a LTSV format label: {match_list}")
def sanitize_ltsv_label(label: str, replacement_text: str = "") -> str:
"""
Replace all of the symbols in text.
:param label: Input text.
:param replacement_text: Replacement text.
:return: A replacement string.
:rtype: str
"""
validate_pathtype(label, allow_whitespaces=False)
return __RE_INVALID_LTSV_LABEL.sub(replacement_text, to_str(label))

View File

@ -0,0 +1,93 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import re
from collections.abc import Sequence
from typing import Final
from ._common import ascii_symbols, to_str, unprintable_ascii_chars
from .error import InvalidCharError
__RE_SYMBOL: Final = re.compile(
"[{}]".format(re.escape("".join(ascii_symbols + unprintable_ascii_chars))), re.UNICODE
)
def validate_symbol(text: str) -> None:
"""
Verifying whether symbol(s) included in the ``text`` or not.
Args:
text:
Input text to validate.
Raises:
ValidationError (ErrorReason.INVALID_CHARACTER):
If symbol(s) included in the ``text``.
"""
match_list = __RE_SYMBOL.findall(to_str(text))
if match_list:
raise InvalidCharError(f"invalid symbols found: {match_list}")
def replace_symbol(
text: str,
replacement_text: str = "",
exclude_symbols: Sequence[str] = [],
is_replace_consecutive_chars: bool = False,
is_strip: bool = False,
) -> str:
"""
Replace all of the symbols in the ``text``.
Args:
text:
Input text.
replacement_text:
Replacement text.
exclude_symbols:
Symbols that were excluded from the replacement.
is_replace_consecutive_chars:
If |True|, replace consecutive multiple ``replacement_text`` characters
to a single character.
is_strip:
If |True|, strip ``replacement_text`` from the beginning/end of the replacement text.
Returns:
A replacement string.
Example:
:ref:`example-sanitize-symbol`
"""
if exclude_symbols:
regexp = re.compile(
"[{}]".format(
re.escape(
"".join(set(ascii_symbols + unprintable_ascii_chars) - set(exclude_symbols))
)
),
re.UNICODE,
)
else:
regexp = __RE_SYMBOL
try:
new_text = regexp.sub(replacement_text, to_str(text))
except TypeError:
raise TypeError("text must be a string")
if not replacement_text:
return new_text
if is_replace_consecutive_chars:
new_text = re.sub(f"{re.escape(replacement_text)}+", replacement_text, new_text)
if is_strip:
new_text = new_text.strip(replacement_text)
return new_text

View File

@ -0,0 +1,8 @@
from pathlib import Path
from typing import TypeVar
from ._const import Platform
PathType = TypeVar("PathType", str, Path)
PlatformType = TypeVar("PlatformType", str, Platform)

View File

@ -0,0 +1,47 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
from argparse import ArgumentTypeError
from ._filename import sanitize_filename, validate_filename
from ._filepath import sanitize_filepath, validate_filepath
from .error import ValidationError
def validate_filename_arg(value: str) -> str:
if not value:
return ""
try:
validate_filename(value)
except ValidationError as e:
raise ArgumentTypeError(e)
return value
def validate_filepath_arg(value: str) -> str:
if not value:
return ""
try:
validate_filepath(value, platform="auto")
except ValidationError as e:
raise ArgumentTypeError(e)
return value
def sanitize_filename_arg(value: str) -> str:
if not value:
return ""
return sanitize_filename(value)
def sanitize_filepath_arg(value: str) -> str:
if not value:
return ""
return sanitize_filepath(value, platform="auto")

View File

@ -0,0 +1,50 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
from typing import Union
import click
from click import Context, Option, Parameter
from ._filename import sanitize_filename, validate_filename
from ._filepath import sanitize_filepath, validate_filepath
from .error import ValidationError
def validate_filename_arg(ctx: Context, param: Union[Option, Parameter], value: str) -> str:
if not value:
return ""
try:
validate_filename(value)
except ValidationError as e:
raise click.BadParameter(str(e))
return value
def validate_filepath_arg(ctx: Context, param: Union[Option, Parameter], value: str) -> str:
if not value:
return ""
try:
validate_filepath(value)
except ValidationError as e:
raise click.BadParameter(str(e))
return value
def sanitize_filename_arg(ctx: Context, param: Union[Option, Parameter], value: str) -> str:
if not value:
return ""
return sanitize_filename(value)
def sanitize_filepath_arg(ctx: Context, param: Union[Option, Parameter], value: str) -> str:
if not value:
return ""
return sanitize_filepath(value)

View File

@ -0,0 +1,253 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import enum
from typing import Final, Optional
from ._const import Platform
def _to_error_code(code: int) -> str:
return f"PV{code:04d}"
class ErrorAttrKey:
BYTE_COUNT: Final = "byte_count"
DESCRIPTION: Final = "description"
FS_ENCODING: Final = "fs_encoding"
PLATFORM: Final = "platform"
REASON: Final = "reason"
RESERVED_NAME: Final = "reserved_name"
REUSABLE_NAME: Final = "reusable_name"
@enum.unique
class ErrorReason(enum.Enum):
"""
Validation error reasons.
"""
NULL_NAME = (_to_error_code(1001), "NULL_NAME", "the value must not be an empty string")
RESERVED_NAME = (
_to_error_code(1002),
"RESERVED_NAME",
"found a reserved name by a platform",
)
INVALID_CHARACTER = (
_to_error_code(1100),
"INVALID_CHARACTER",
"invalid characters found",
)
INVALID_LENGTH = (
_to_error_code(1101),
"INVALID_LENGTH",
"found an invalid string length",
)
FOUND_ABS_PATH = (
_to_error_code(1200),
"FOUND_ABS_PATH",
"found an absolute path where must be a relative path",
)
MALFORMED_ABS_PATH = (
_to_error_code(1201),
"MALFORMED_ABS_PATH",
"found a malformed absolute path",
)
INVALID_AFTER_SANITIZE = (
_to_error_code(2000),
"INVALID_AFTER_SANITIZE",
"found invalid value after sanitizing",
)
@property
def code(self) -> str:
"""str: Error code."""
return self.__code
@property
def name(self) -> str:
"""str: Error reason name."""
return self.__name
@property
def description(self) -> str:
"""str: Error reason description."""
return self.__description
def __init__(self, code: str, name: str, description: str) -> None:
self.__name = name
self.__code = code
self.__description = description
def __str__(self) -> str:
return f"[{self.__code}] {self.__description}"
class ValidationError(ValueError):
"""
Exception class of validation errors.
"""
@property
def platform(self) -> Optional[Platform]:
"""
:py:class:`~pathvalidate.Platform`: Platform information.
"""
return self.__platform
@property
def reason(self) -> ErrorReason:
"""
:py:class:`~pathvalidate.error.ErrorReason`: The cause of the error.
"""
return self.__reason
@property
def description(self) -> Optional[str]:
"""Optional[str]: Error description."""
return self.__description
@property
def reserved_name(self) -> str:
"""str: Reserved name."""
return self.__reserved_name
@property
def reusable_name(self) -> Optional[bool]:
"""Optional[bool]: Whether the name is reusable or not."""
return self.__reusable_name
@property
def fs_encoding(self) -> Optional[str]:
"""Optional[str]: File system encoding."""
return self.__fs_encoding
@property
def byte_count(self) -> Optional[int]:
"""Optional[int]: Byte count of the path."""
return self.__byte_count
def __init__(self, *args, **kwargs) -> None: # type: ignore
if ErrorAttrKey.REASON not in kwargs:
raise ValueError(f"{ErrorAttrKey.REASON} must be specified")
self.__reason: ErrorReason = kwargs.pop(ErrorAttrKey.REASON)
self.__byte_count: Optional[int] = kwargs.pop(ErrorAttrKey.BYTE_COUNT, None)
self.__platform: Optional[Platform] = kwargs.pop(ErrorAttrKey.PLATFORM, None)
self.__description: Optional[str] = kwargs.pop(ErrorAttrKey.DESCRIPTION, None)
self.__reserved_name: str = kwargs.pop(ErrorAttrKey.RESERVED_NAME, "")
self.__reusable_name: Optional[bool] = kwargs.pop(ErrorAttrKey.REUSABLE_NAME, None)
self.__fs_encoding: Optional[str] = kwargs.pop(ErrorAttrKey.FS_ENCODING, None)
try:
super().__init__(*args[0], **kwargs)
except IndexError:
super().__init__(*args, **kwargs)
def as_slog(self) -> dict[str, str]:
"""Return a dictionary representation of the error.
Returns:
Dict[str, str]: A dictionary representation of the error.
"""
slog: dict[str, str] = {
"code": self.reason.code,
ErrorAttrKey.DESCRIPTION: self.reason.description,
}
if self.platform:
slog[ErrorAttrKey.PLATFORM] = self.platform.value
if self.description:
slog[ErrorAttrKey.DESCRIPTION] = self.description
if self.__reusable_name is not None:
slog[ErrorAttrKey.REUSABLE_NAME] = str(self.__reusable_name)
if self.__fs_encoding:
slog[ErrorAttrKey.FS_ENCODING] = self.__fs_encoding
if self.__byte_count:
slog[ErrorAttrKey.BYTE_COUNT] = str(self.__byte_count)
return slog
def __str__(self) -> str:
item_list = []
header = str(self.reason)
if Exception.__str__(self):
item_list.append(Exception.__str__(self))
if self.platform:
item_list.append(f"{ErrorAttrKey.PLATFORM}={self.platform.value}")
if self.description:
item_list.append(f"{ErrorAttrKey.DESCRIPTION}={self.description}")
if self.__reusable_name is not None:
item_list.append(f"{ErrorAttrKey.REUSABLE_NAME}={self.reusable_name}")
if self.__fs_encoding:
item_list.append(f"{ErrorAttrKey.FS_ENCODING}={self.__fs_encoding}")
if self.__byte_count is not None:
item_list.append(f"{ErrorAttrKey.BYTE_COUNT}={self.__byte_count:,d}")
if item_list:
header += ": "
return header + ", ".join(item_list).strip()
def __repr__(self) -> str:
return self.__str__()
class NullNameError(ValidationError):
"""[Deprecated]
Exception raised when a name is empty.
"""
def __init__(self, *args, **kwargs) -> None: # type: ignore
kwargs[ErrorAttrKey.REASON] = ErrorReason.NULL_NAME
super().__init__(args, **kwargs)
class InvalidCharError(ValidationError):
"""
Exception raised when includes invalid character(s) within a string.
"""
def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def]
kwargs[ErrorAttrKey.REASON] = ErrorReason.INVALID_CHARACTER
super().__init__(args, **kwargs)
class ReservedNameError(ValidationError):
"""
Exception raised when a string matched a reserved name.
"""
def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def]
kwargs[ErrorAttrKey.REASON] = ErrorReason.RESERVED_NAME
super().__init__(args, **kwargs)
class ValidReservedNameError(ReservedNameError):
"""[Deprecated]
Exception raised when a string matched a reserved name.
However, it can be used as a name.
"""
def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def]
kwargs[ErrorAttrKey.REUSABLE_NAME] = True
super().__init__(args, **kwargs)
class InvalidReservedNameError(ReservedNameError):
"""[Deprecated]
Exception raised when a string matched a reserved name.
Moreover, the reserved name is invalid as a name.
"""
def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def]
kwargs[ErrorAttrKey.REUSABLE_NAME] = False
super().__init__(args, **kwargs)

View File

@ -0,0 +1,138 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import warnings
from datetime import datetime
from typing import Callable
from .error import ValidationError
ValidationErrorHandler = Callable[[ValidationError], str]
def return_null_string(e: ValidationError) -> str:
"""Null value handler that always returns an empty string.
Args:
e (ValidationError): A validation error.
Returns:
str: An empty string.
"""
warnings.warn(
"'return_null_string' is deprecated. Use 'NullValueHandler.return_null_string' instead.",
DeprecationWarning,
)
return ""
def return_timestamp(e: ValidationError) -> str:
"""Null value handler that returns a timestamp of when the function was called.
Args:
e (ValidationError): A validation error.
Returns:
str: A timestamp.
"""
warnings.warn(
"'return_timestamp' is deprecated. Use 'NullValueHandler.reserved_name_handler' instead.",
DeprecationWarning,
)
return str(datetime.now().timestamp())
def raise_error(e: ValidationError) -> str:
"""Null value handler that always raises an exception.
Args:
e (ValidationError): A validation error.
Raises:
ValidationError: Always raised.
"""
raise e
class NullValueHandler:
@classmethod
def return_null_string(cls, e: ValidationError) -> str:
"""Null value handler that always returns an empty string.
Args:
e (ValidationError): A validation error.
Returns:
str: An empty string.
"""
return ""
@classmethod
def return_timestamp(cls, e: ValidationError) -> str:
"""Null value handler that returns a timestamp of when the function was called.
Args:
e (ValidationError): A validation error.
Returns:
str: A timestamp.
"""
return str(datetime.now().timestamp())
class ReservedNameHandler:
@classmethod
def add_leading_underscore(cls, e: ValidationError) -> str:
"""Reserved name handler that adds a leading underscore (``"_"``) to the name
except for ``"."`` and ``".."``.
Args:
e (ValidationError): A reserved name error.
Returns:
str: The converted name.
"""
if e.reserved_name in (".", "..") or e.reusable_name:
return e.reserved_name
return f"_{e.reserved_name}"
@classmethod
def add_trailing_underscore(cls, e: ValidationError) -> str:
"""Reserved name handler that adds a trailing underscore (``"_"``) to the name
except for ``"."`` and ``".."``.
Args:
e (ValidationError): A reserved name error.
Returns:
str: The converted name.
"""
if e.reserved_name in (".", "..") or e.reusable_name:
return e.reserved_name
return f"{e.reserved_name}_"
@classmethod
def as_is(cls, e: ValidationError) -> str:
"""Reserved name handler that returns the name as is.
Args:
e (ValidationError): A reserved name error.
Returns:
str: The name as is.
"""
return e.reserved_name