Source code for eko.io.raw

"""Utilities to manipulate unstructured IO.

The content is treated independently on the particular data content, but
as generic uknown data in an abstract file format, e.g. a tar archive or
YAML data file, as opposed to structured YAML representing a specific
runcard.
"""

import os
from pathlib import Path
from tarfile import TarFile, TarInfo
from typing import Optional, Sequence


[docs] def is_within_directory(directory: os.PathLike, target: os.PathLike) -> bool: """Check if target path is contained in directory. Thanks to TrellixVulnTeam for the `idea <https://github.com/NNPDF/eko/pull/154>`_. Parameters ---------- directory: the directory where the target is supposed to be contained target: the target file to check """ abs_dir = Path(directory).absolute() abs_target = Path(target).absolute() return abs_dir == abs_target or abs_dir in abs_target.parents
[docs] def safe_extractall( tar: TarFile, path: Optional[os.PathLike] = None, members: Optional[Sequence[TarInfo]] = None, *, numeric_owner: bool = False, ): """Extract a tar archive avoiding CVE-2007-4559 issue. Thanks to TrellixVulnTeam for the `contribution <https://github.com/NNPDF/eko/pull/154>`_. All undocumented parameters have the same meaning of the analogue ones in :meth:`TarFile.extractall`. Parameters ---------- tar: the tar archive object to be extracted path: the path to extract to, if not specified the current directory is used """ if path is None: path = Path.cwd() path = Path(path) for member in tar.getmembers(): member_path = path / member.name if not is_within_directory(path, member_path): raise Exception("Attempted Path Traversal in Tar File") tar.extractall(path, members, numeric_owner=numeric_owner)