aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrik Teivonen <patrik.teivonen@qt.io>2022-10-25 13:31:12 +0300
committerPatrik Teivonen <patrik.teivonen@qt.io>2022-12-29 14:55:12 +0000
commit4e3ab8c3ce2973850a9bbcafb0636c386db97a45 (patch)
tree02fbbbf10116973896d79e5034f6d3c440f74d7e
parent5e0b0bdbf08df923f2b74e1c8d6d29e90663142c (diff)
Make resolve_wildcard_uri() crawl urls asynchronously
Speed up the uri resolvation by resolving child dirs concurrently. Task-number: QTBUG-105693 Change-Id: I44fadc6de4097f43c57a5dd6df275bf94e0fc5e3 Reviewed-by: Iikka Eklund <iikka.eklund@qt.io>
-rw-r--r--packaging-tools/sdkcomponent.py35
-rw-r--r--packaging-tools/tests/test_sdkcomponent.py8
2 files changed, 35 insertions, 8 deletions
diff --git a/packaging-tools/sdkcomponent.py b/packaging-tools/sdkcomponent.py
index ddc58cd25..be013a9a2 100644
--- a/packaging-tools/sdkcomponent.py
+++ b/packaging-tools/sdkcomponent.py
@@ -29,13 +29,15 @@
#
#############################################################################
+import asyncio
import os
import re
+import sys
from configparser import ConfigParser
from dataclasses import dataclass, field
from fnmatch import fnmatch
from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
import htmllistparse # type: ignore
from urlpath import URL # type: ignore
@@ -43,6 +45,12 @@ from urlpath import URL # type: ignore
from bldinstallercommon import uri_exists
from logging_util import init_logger
+if sys.version_info < (3, 7):
+ from asyncio_backport import run as asyncio_run
+else:
+ from asyncio import run as asyncio_run
+
+
log = init_logger(__name__, debug_mode=False)
@@ -228,7 +236,16 @@ class ArchiveResolver:
return self.file_share_base_url.rstrip("/") + "/" + url.lstrip("/")
return url
- def resolve_uri_pattern(self, pattern: str, base_url: Optional[URL] = None) -> List[URL]:
+ async def fetch_in_executor(self, url: str) -> Tuple[Any, List[Any]]:
+ """Wrap fetch_listing in a Future and return it"""
+ if sys.version_info < (3, 7):
+ loop = asyncio.get_event_loop() # keep for Python 3.6 compatibility
+ else:
+ loop = asyncio.get_running_loop()
+ log.info("Crawl: %s", url)
+ return await loop.run_in_executor(None, htmllistparse.fetch_listing, url, 30)
+
+ async def resolve_uri_pattern(self, pattern: str, base_url: Optional[URL] = None) -> List[URL]:
"""
Return payload URIs from remote tree, fnmatch pattern match for given arguments.
Patterns will match arbitrary number of '/' allowing recursive search.
@@ -245,19 +262,23 @@ class ArchiveResolver:
# base_url from base_pattern if not specified
base_url = base_url or URL(base_pattern.rsplit("/", 1)[0])
# get links from base_url
- log.info("Crawl: %s", base_url)
- links = htmllistparse.fetch_listing(base_url, timeout=30)[1]
+ _, links = await self.fetch_in_executor(base_url)
# get fnmatch pattern matches from links recursively
uri_list = []
+ child_list = []
for link in links:
if link.name.endswith("/"):
# match the directory with base_pattern
if fnmatch(base_url / link.name, base_pattern + "*"):
- # recursively look for pattern matches inside the matching directory
- uri_list.extend(self.resolve_uri_pattern(pattern, base_url / link.name))
+ child_list.append(base_url / link.name)
else:
if fnmatch(base_url / link.name, pattern):
uri_list.append(base_url / link.name)
+ # recursively look for pattern matches inside the matching child directories
+ coros = [self.resolve_uri_pattern(pattern, url) for url in child_list]
+ results = await asyncio.gather(*coros)
+ for item in results:
+ uri_list.extend(item)
return uri_list
def resolve_payload_uri(self, unresolved_archive_uri: str) -> List[str]:
@@ -278,7 +299,7 @@ class ArchiveResolver:
# is it a URL containing a fnmatch pattern
if any(char in unresolved_archive_uri for char in ("*", "[", "]", "?")):
pattern = self.absolute_url(unresolved_archive_uri)
- return [str(url) for url in self.resolve_uri_pattern(pattern)]
+ return [str(url) for url in asyncio_run(self.resolve_uri_pattern(pattern))]
# is it a file system path or an absolute URL which can be downloaded
if os.path.exists(unresolved_archive_uri) or URL(unresolved_archive_uri).netloc:
return [unresolved_archive_uri]
diff --git a/packaging-tools/tests/test_sdkcomponent.py b/packaging-tools/tests/test_sdkcomponent.py
index 49ed97144..8bc6b6f49 100644
--- a/packaging-tools/tests/test_sdkcomponent.py
+++ b/packaging-tools/tests/test_sdkcomponent.py
@@ -29,6 +29,7 @@
#############################################################################
import os
+import sys
import tempfile
import unittest
from configparser import ConfigParser, ExtendedInterpolation
@@ -47,6 +48,11 @@ from sdkcomponent import (
parse_ifw_sdk_comp,
)
+if sys.version_info < (3, 7):
+ import asyncio_backport as asyncio
+else:
+ import asyncio
+
def ifw_sdk_config_valid(section_name: str) -> ConfigParser:
conf = ConfigParser(interpolation=ExtendedInterpolation())
@@ -334,7 +340,7 @@ class TestRunner(unittest.TestCase):
@unittest.mock.patch("htmllistparse.fetch_listing", side_effect=create_listing) # type: ignore
def test_pattern_archive_resolver(self, pattern: str, expected: List[str], _: Any) -> None:
resolver = ArchiveResolver("", "")
- self.assertCountEqual(resolver.resolve_uri_pattern(pattern, None), expected)
+ self.assertCountEqual(asyncio.run(resolver.resolve_uri_pattern(pattern, None)), expected)
def test_locate_pkg_templ_dir_invalid(self) -> None:
with tempfile.TemporaryDirectory(dir=os.getcwd()) as tmp_base_dir: