diff options
-rw-r--r-- | Pipfile | 2 | ||||
-rw-r--r-- | packaging-tools/sdkcomponent.py | 114 | ||||
-rw-r--r-- | packaging-tools/tests/test_sdkcomponent.py | 98 |
3 files changed, 182 insertions, 32 deletions
@@ -18,6 +18,8 @@ configparser = "==5.2.0" dataclasses = {version = "==0.8", markers="(python_version >= '3.6' and python_version < '3.7')"} asyncio-backport = {version = "==0.1.1", markers="(python_version >= '3.6' and python_version < '3.7')"} requests = "==2.27.1" +htmllistparse = "==0.6.0" +urlpath = "==1.2.0" [dev-packages] ddt = "==1.6.0" diff --git a/packaging-tools/sdkcomponent.py b/packaging-tools/sdkcomponent.py index a57d0b2f5..ddc58cd25 100644 --- a/packaging-tools/sdkcomponent.py +++ b/packaging-tools/sdkcomponent.py @@ -30,11 +30,15 @@ ############################################################################# import os +import re from configparser import ConfigParser from dataclasses import dataclass, field +from fnmatch import fnmatch from pathlib import Path from typing import Dict, List, Optional, Tuple -from urllib.parse import urlparse + +import htmllistparse # type: ignore +from urlpath import URL # type: ignore from bldinstallercommon import uri_exists from logging_util import init_logger @@ -210,13 +214,60 @@ class ArchiveResolver: self.file_share_base_url = file_share_base_url self.pkg_template_folder = pkg_template_folder - def resolve_payload_uri(self, unresolved_archive_uri: str) -> str: + def absolute_url(self, url: str) -> str: + """ + Append the given URL to resolver's file_share_base_url if doesn't start with http + + Args: + url: A URL to check and convert if necessary + + Returns: + An absolute URL starting with the file_share_base_url or the original absolute URL + """ + if not url.startswith("http"): + return self.file_share_base_url.rstrip("/") + "/" + url.lstrip("/") + return url + + def resolve_uri_pattern(self, pattern: str, base_url: Optional[URL] = None) -> List[URL]: + """ + Return payload URIs from remote tree, fnmatch pattern match for given arguments. + Patterns will match arbitrary number of '/' allowing recursive search. + + Args: + pattern: A fnmatch pattern starting with an absolute URL e.g. "http://foo.bar/dir/*" + base_url: For recursive runs, specify the child directory to crawl + + Returns: + The final list of matching URIs + """ + # split base pattern from pattern (fnmatch chars *,[,],?) + base_pattern = re.split(r'[\*\[\]\?]', pattern)[0] + # base_url from base_pattern if not specified + base_url = base_url or URL(base_pattern.rsplit("/", 1)[0]) + # get links from base_url + log.info("Crawl: %s", base_url) + links = htmllistparse.fetch_listing(base_url, timeout=30)[1] + # get fnmatch pattern matches from links recursively + uri_list = [] + for link in links: + if link.name.endswith("/"): + # match the directory with base_pattern + if fnmatch(base_url / link.name, base_pattern + "*"): + # recursively look for pattern matches inside the matching directory + uri_list.extend(self.resolve_uri_pattern(pattern, base_url / link.name)) + else: + if fnmatch(base_url / link.name, pattern): + uri_list.append(base_url / link.name) + return uri_list + + def resolve_payload_uri(self, unresolved_archive_uri: str) -> List[str]: """ Resolves the given archive URI and resolves it based on the type of URI given - Available URI types: - - file system string paths, file system URIs - - network locations e.g. HTTP URLs + Available URI types, in the order of priority: + - fnmatch pattern for absolute or relative URL (recursive) + - file system string paths, file system URIs and network locations e.g. HTTP URLs - file system string paths relative to data folder under package template root + - network locations relative to file_share_base_url Args: unresolved_archive_uri: Original URI to resolve @@ -224,15 +275,19 @@ class ArchiveResolver: Returns: A resolved URI location for the payload """ + # is it a URL containing a fnmatch pattern + if any(char in unresolved_archive_uri for char in ("*", "[", "]", "?")): + pattern = self.absolute_url(unresolved_archive_uri) + return [str(url) for url in self.resolve_uri_pattern(pattern)] # is it a file system path or an absolute URL which can be downloaded - if os.path.exists(unresolved_archive_uri) or urlparse(unresolved_archive_uri).netloc: - return unresolved_archive_uri + if os.path.exists(unresolved_archive_uri) or URL(unresolved_archive_uri).netloc: + return [unresolved_archive_uri] # is it relative to pkg template root dir, under the 'data' directory pkg_data_dir = os.path.join(self.pkg_template_folder, "data", unresolved_archive_uri) if os.path.exists(pkg_data_dir): - return pkg_data_dir + return [pkg_data_dir] # ok, we assume this is a URL which can be downloaded - return self.file_share_base_url.rstrip("/") + "/" + unresolved_archive_uri.lstrip("/") + return [self.absolute_url(unresolved_archive_uri)] @dataclass @@ -256,9 +311,11 @@ class IfwSdkComponent: archive_skip: bool = False def __post_init__(self) -> None: - """Post init: convert component sha1 uri to resolved uri if it exists""" + """Post init: resolve component sha1 uri if it exists""" if self.comp_sha1_uri: - self.comp_sha1_uri = self.archive_resolver.resolve_payload_uri(self.comp_sha1_uri) + match_uris = self.archive_resolver.resolve_payload_uri(self.comp_sha1_uri) + assert len(match_uris) == 1, f"More than one match for component sha: {match_uris}" + self.comp_sha1_uri = match_uris.pop() def validate(self, uri_check: bool = True, ignore_errors: bool = False) -> bool: """ @@ -449,7 +506,7 @@ def parse_ifw_sdk_archives( for arch_section_name in archive_sections: config_subst = ConfigSubst(config, arch_section_name, substitutions) unresolved_archive_uri = config_subst.get("archive_uri") - resolved_archive_uri = archive_resolver.resolve_payload_uri(unresolved_archive_uri) + resolved_uris = archive_resolver.resolve_payload_uri(unresolved_archive_uri) archive_action_string = config_subst.get("archive_action", "") archive_action: Optional[Tuple[Path, str]] = None if archive_action_string: @@ -472,20 +529,21 @@ def parse_ifw_sdk_archives( rpath_target = os.sep + rpath_target component_sha1_file = config_subst.get("component_sha1_file") archive_name = config_subst.get("archive_name") - payload = IfwPayloadItem( - package_name=arch_section_name, - archive_uri=resolved_archive_uri, - archive_action=archive_action, - disable_extract_archive=disable_extract_archive, - package_strip_dirs=package_strip_dirs, - package_finalize_items=package_finalize_items, - parent_target_install_base=parent_target_install_base, - arch_target_install_base=target_install_base, - arch_target_install_dir=target_install_dir, - rpath_target=rpath_target, - component_sha1=component_sha1_file, - archive_name=archive_name, - ) - payload.validate() - parsed_archives.append(payload) + for resolved_archive_uri in resolved_uris: + payload = IfwPayloadItem( + package_name=arch_section_name, + archive_uri=resolved_archive_uri, + archive_action=archive_action, + disable_extract_archive=disable_extract_archive, + package_strip_dirs=package_strip_dirs, + package_finalize_items=package_finalize_items, + parent_target_install_base=parent_target_install_base, + arch_target_install_base=target_install_base, + arch_target_install_dir=target_install_dir, + rpath_target=rpath_target, + component_sha1=component_sha1_file, + archive_name=archive_name, + ) + payload.validate() + parsed_archives.append(payload) return parsed_archives diff --git a/packaging-tools/tests/test_sdkcomponent.py b/packaging-tools/tests/test_sdkcomponent.py index af5e534b1..49ed97144 100644 --- a/packaging-tools/tests/test_sdkcomponent.py +++ b/packaging-tools/tests/test_sdkcomponent.py @@ -33,9 +33,11 @@ import tempfile import unittest from configparser import ConfigParser, ExtendedInterpolation from pathlib import Path -from typing import Dict, List, Tuple +from typing import Any, Dict, List, Optional, Tuple from ddt import data, ddt, unpack # type: ignore +from htmllistparse import FileEntry # type: ignore +from urlpath import URL # type: ignore from sdkcomponent import ( ArchiveResolver, @@ -102,6 +104,32 @@ def create_paths(root_folder: str, paths: List[str]) -> List[str]: return ret +def create_listing(url: URL, timeout: int) -> Tuple[Optional[Any], List[FileEntry]]: + _ = timeout + uri = url.as_uri().rstrip("/") + if not uri: + return (None, []) + tree_dict: Dict[str, List[FileEntry]] = { + "http://fileshare.intra/base": [FileEntry("path/", "", "", "")], + "http://fileshare.intra/base/path": [ + FileEntry("a.7z", "", "", ""), + FileEntry("b.7z", "", "", ""), + FileEntry("c.tar.gz", "", "", ""), + FileEntry("d.tar.xz", "", "", ""), + FileEntry("child/", "", "", ""), + ], + "http://fileshare.intra/base/path/child": [ + FileEntry("a.7z", "", "", ""), + FileEntry("b.7z", "", "", ""), + FileEntry("c.tar.gz", "", "", ""), + FileEntry("d.txt", "", "", ""), + FileEntry("child/", "", "", ""), + ], + "http://fileshare.intra/base/path/child/child": [FileEntry("a.txt", "", "", "")] + } + return url.path if uri in tree_dict else None, tree_dict.get(uri, []) + + @ddt class TestRunner(unittest.TestCase): def test_parse_ifw_sdk_archives(self) -> None: @@ -239,12 +267,74 @@ class TestRunner(unittest.TestCase): pass resolver = ArchiveResolver("http://intranet.local.it/artifacts", template_folder) - self.assertEqual(resolver.resolve_payload_uri("readme.txt"), payload_file) + self.assertEqual(resolver.resolve_payload_uri("readme.txt").pop(), payload_file) self.assertEqual( - resolver.resolve_payload_uri("qt/qtsvg/qtsvg-RHEL_7_4.7z"), + resolver.resolve_payload_uri("qt/qtsvg/qtsvg-RHEL_7_4.7z").pop(), "http://intranet.local.it/artifacts/qt/qtsvg/qtsvg-RHEL_7_4.7z", ) - self.assertEqual(resolver.resolve_payload_uri(__file__), __file__) + self.assertEqual(resolver.resolve_payload_uri(__file__).pop(), __file__) + + @data( # type: ignore + ( + "http://fileshare.intra/base/path/*", + [ + URL("http://fileshare.intra/base/path/a.7z"), + URL("http://fileshare.intra/base/path/b.7z"), + URL('http://fileshare.intra/base/path/c.tar.gz'), + URL('http://fileshare.intra/base/path/d.tar.xz'), + URL('http://fileshare.intra/base/path/child/a.7z'), + URL('http://fileshare.intra/base/path/child/b.7z'), + URL('http://fileshare.intra/base/path/child/c.tar.gz'), + URL('http://fileshare.intra/base/path/child/d.txt'), + URL('http://fileshare.intra/base/path/child/child/a.txt'), + ], + ), + ( + "http://fileshare.intra/base/path/*.7z", + [ + URL("http://fileshare.intra/base/path/a.7z"), + URL("http://fileshare.intra/base/path/b.7z"), + URL('http://fileshare.intra/base/path/child/a.7z'), + URL('http://fileshare.intra/base/path/child/b.7z'), + ], + ), + ( + "http://fileshare.intra/base/path/*.tar.*", + [ + URL('http://fileshare.intra/base/path/c.tar.gz'), + URL('http://fileshare.intra/base/path/d.tar.xz'), + URL('http://fileshare.intra/base/path/child/c.tar.gz'), + ], + ), + ( + "http://fileshare.intra/base/path/*.t*", + [ + URL('http://fileshare.intra/base/path/c.tar.gz'), + URL('http://fileshare.intra/base/path/d.tar.xz'), + URL('http://fileshare.intra/base/path/child/c.tar.gz'), + URL('http://fileshare.intra/base/path/child/d.txt'), + URL('http://fileshare.intra/base/path/child/child/a.txt'), + ], + ), + ( + "http://fileshare.intra/base/path/chi?d/child/[a-f].txt", + [ + URL('http://fileshare.intra/base/path/child/child/a.txt'), + ], + ), + ( + "http://fileshare.intra/base/path/*[b-f].7z", + [ + URL('http://fileshare.intra/base/path/b.7z'), + URL('http://fileshare.intra/base/path/child/b.7z'), + ], + ), + ) + @unpack # type: ignore + @unittest.mock.patch("htmllistparse.fetch_listing", side_effect=create_listing) # type: ignore + def test_pattern_archive_resolver(self, pattern: str, expected: List[str], _: Any) -> None: + resolver = ArchiveResolver("", "") + self.assertCountEqual(resolver.resolve_uri_pattern(pattern, None), expected) def test_locate_pkg_templ_dir_invalid(self) -> None: with tempfile.TemporaryDirectory(dir=os.getcwd()) as tmp_base_dir: |