aboutsummaryrefslogtreecommitdiffstats
path: root/sources/pyside2/tests
diff options
context:
space:
mode:
Diffstat (limited to 'sources/pyside2/tests')
-rw-r--r--sources/pyside2/tests/registry/scrape_testresults.py341
1 files changed, 341 insertions, 0 deletions
diff --git a/sources/pyside2/tests/registry/scrape_testresults.py b/sources/pyside2/tests/registry/scrape_testresults.py
new file mode 100644
index 000000000..128269fd5
--- /dev/null
+++ b/sources/pyside2/tests/registry/scrape_testresults.py
@@ -0,0 +1,341 @@
+#############################################################################
+##
+## Copyright (C) 2018 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of Qt for Python.
+##
+## $QT_BEGIN_LICENSE:LGPL$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU Lesser General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU Lesser
+## General Public License version 3 as published by the Free Software
+## Foundation and appearing in the file LICENSE.LGPL3 included in the
+## packaging of this file. Please review the following information to
+## ensure the GNU Lesser General Public License version 3 requirements
+## will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 2.0 or (at your option) the GNU General
+## Public license version 3 or any later version approved by the KDE Free
+## Qt Foundation. The licenses are as published by the Free Software
+## Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-2.0.html and
+## https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+"""
+scrape_testresults.py
+
+Read the testresults website of COIN and find the pages that contain an
+embedded exists_{platform}_{version}_ci.py .
+
+The found pages will then be sorted by date/time and put into the registry.
+
+This program utilizes the multiprocessing package for speedy access to
+the web pages. The program works well in typically less than half an hour.
+
+After the cache has been created, the runtime is substantially smaller.
+
+"""
+
+import sys
+if sys.version_info[:2] < (3, 6):
+ print("This program is written for Python 3.6 or higher.")
+ sys.exit(1)
+
+DEMO_URL = ("https://testresults.qt.io/coin/api/results/pyside/pyside-setup/"
+ # The above URL part is fixed.
+ "30c1193ec56a86b8d0920c325185b9870f96941e/"
+ "MacOSMacOS_10_12x86_64MacOSMacOS_10_12x86_64Clangqtci-macos-"
+ "10.12-x86_64-8-425364DebugAndRelease_Release/"
+ "d80c5d4547ea2b3d74188bd458955aae39cb32b4/"
+ "test_1535865484/"
+ "log.txt.gz")
+
+from bs4 import BeautifulSoup
+from datetime import datetime
+from multiprocessing import Pool
+from textwrap import dedent
+import requests
+import os
+import time
+import re
+import json
+import argparse
+
+my_name = __file__ if __file__.endswith(".py") else __file__[:-1]
+test_path = os.path.join(os.path.dirname(__file__), "testresults", "embedded")
+if not os.path.exists(test_path):
+ os.makedirs(test_path)
+cache_path = os.path.dirname(test_path)
+target_path = os.path.dirname(__file__)
+start_time = time.time()
+
+def get_name(url):
+ """
+ Return the last piece of an url, including trailing slash.
+
+ In effect, this undoes the accumulation of URL pieces.
+ """
+ name = url.rstrip("/").rsplit("/", 1)[-1]
+ if url.endswith("/"):
+ name += "/"
+ return name
+
+def rel_url(url):
+ """
+ throw the top URL away
+ """
+ return url[len(top_url):]
+
+stop_all = False
+
+def find_all_links(text, url, ignore=()):
+ """
+ Find all links in a page.
+
+ Only simple links are allowed. That means safe characters and
+ at most one "/" at the end for directories.
+ """
+ global stop_all
+ soup = BeautifulSoup(text, "html.parser")
+ lis = soup.find_all("a")
+ names = list(row["href"] for row in lis)
+ names = list(name for name in names if name not in ignore)
+ for name in names:
+ if not re.match("^[A-Za-z0-9_\-.]+/?$", name):
+ print("Unexpected character in link:", name)
+ # Not clear how to terminate the pool quick and clean.
+ # We crash badly in handle_suburl_tup, ugly but works.
+ stop_all = True
+ return []
+ urls = list(url + name for name in names)
+ return urls
+
+def read_url(url):
+ # We intentionally let things fail, because we re-run things on failure.
+ try:
+ response = requests.get(url)
+ except requests.exceptions.ContentDecodingError as e:
+ # This is a permanent error which is in the data. We ignore that.
+ print(os.getpid(), "Decoding Error:", e)
+ print(os.getpid(), "Cannot fix this, ignored.")
+ return None
+ except requests.exceptions.RequestException as e:
+ print("Read error:", e)
+ raise
+ else:
+ return response
+
+def get_timestamp(text):
+ # agent:2018/06/29 15:02:15
+ global stop_all
+ ts = text[6 : 6 + 19]
+ ts = re.sub('[^0-9]','_', ts)
+ # check that it is a valid time stamp
+ try:
+ datetime.strptime(ts, "%Y_%m_%d_%H_%M_%S")
+ except ValueError as e:
+ print("Unexpected time stamp", e)
+ stop_all = True
+ raise
+ return ts
+
+def write_data(name, text):
+ ts = get_timestamp(text)
+ lines = text.split("\n")
+ for idx, line in enumerate(lines):
+ if "BEGIN_FILE" in line:
+ start = idx + 1
+ offset = line.index("BEGIN_FILE")
+ if "END_FILE" in line:
+ stop = idx
+ lines = lines[start : stop]
+ if offset:
+ lines = list(line[offset:] for line in lines)
+ # fix the lines - the original has no empty line after "# eof"
+ while lines[-1] == "":
+ lines.pop()
+ text = "\n".join(lines) + "\n"
+ modname = re.search("'(..*?)'", text).group(1)
+ fn = os.path.join(test_path, f"{ts}-{name}-{modname}.py")
+ if os.path.exists(fn):
+ # do not change the file, we want to skip it
+ return
+ with open(fn, "w") as f:
+ f.write(text)
+
+def update_license(text):
+ end_license = text.index("\n\n")
+ with open(my_name) as fi:
+ my_text = fi.read()
+ my_end_license = my_text.index("\n\n")
+ text = my_text[:my_end_license] + text[end_license:]
+ return text
+
+def eval_data():
+ """
+ Read all found files, sort them and keep the latest version.
+ """
+ files = []
+ for entry in os.scandir(test_path):
+ if "exists_" in entry.name and entry.name.endswith(".py"):
+ if os.path.getmtime(entry.path) >= start_time:
+ # this file is newly created
+ files.append(entry.path)
+ files.sort()
+ # read the files and update in chronological order
+ results = {}
+ for fn in files:
+ with open(fn) as f:
+ text = f.read()
+ modname = re.search("'(..*?)'", text).group(1)
+ results[modname] = text
+ for fn in results:
+ name = os.path.join(target_path, fn + ".py")
+ with open(name, "w") as f:
+ f.write(update_license(results[fn]))
+ return len(results)
+
+def handle_suburl(idx, n, url, level):
+ if level == 1:
+ print(os.getpid(), "Reading", idx+1, "of", n, rel_url(url))
+ response = read_url(url)
+ urls = find_all_links(response.text, url)
+ for sub_url in urls:
+ name = get_name(sub_url)
+ if name.endswith("/"):
+ if name.startswith("build_"):
+ continue
+ if name == "tasks/":
+ continue
+ handle_suburl(0, 0, sub_url, level + 1)
+ else:
+ if name.startswith("log.txt"):
+ test_name = sub_url.split("/")[-2]
+ print(os.getpid(), test_name)
+ response = read_url(sub_url)
+ if response and "BEGIN_FILE" in response.text:
+ print(os.getpid(), test_name, "FOUND!")
+ write_data(test_name, response.text)
+ else:
+ print(os.getpid(), test_name)
+
+
+def handle_suburl_tup(idx_n_url_level):
+ if stop_all:
+ return # bad solution, but it stops fast
+ idx, n, url, level = idx_n_url_level
+ try:
+ ret = handle_suburl(idx, n, url, level)
+ return url, None
+ except requests.exceptions.RequestException as e:
+ return url, e
+
+def handle_batch(urls, level):
+ n = len(urls)
+ args = ((idx, n, url, level) for (idx, url) in enumerate(urls))
+ with Pool(10) as p:
+ records = list(p.imap_unordered(handle_suburl_tup, args))
+ # re-read the failed ones
+ runs = [n]
+ for idx in range(10):
+ urls = list(x[0] for x in records if x[-1])
+ if not urls:
+ break
+ print("Pausing 5 seconds")
+ time.sleep(5)
+ n = len(urls)
+ runs.append(n)
+ args = ((idx, n, url, level) for (idx, url) in enumerate(urls))
+ with Pool(10) as p:
+ records = list(p.imap_unordered(handle_suburl_tup, args))
+ # Return success when the remaining URLs are empty.
+ print("Runs:", ", ".join(map(str, runs)))
+ return not urls
+
+def handle_topurl(url):
+ """
+ Find all links to directories.
+
+ We maintain a cache of these links. The cache is only updated
+ when all URLs have been successfully processed.
+ """
+ try:
+ response = requests.get(url)
+ except requests.exceptions.RequestException as e:
+ print("Skipped", e)
+ return
+ global top_url
+ top_url = url
+ urls = find_all_links(response.text, url, ignore=("tasks/",))
+ work_urls = set(urls)
+ cache_file = os.path.join(cache_path, "known_urls.json")
+ if os.path.exists(cache_file):
+ with open(cache_file, 'r') as fp:
+ known_urls = json.load(fp)
+ work_urls -= set(known_urls)
+ level = 1
+ for sub_url in work_urls:
+ name = get_name(sub_url)
+ if name.endswith("/"):
+ if name.startswith("build_"):
+ continue
+ work_urls.add(sub_url)
+ success = handle_batch(work_urls, 1)
+ if success:
+ with open(cache_file, 'w') as fp:
+ json.dump(urls, fp, sort_keys=True, indent=4)
+ return success
+
+def get_test_results(starturl):
+ ok = handle_topurl(starturl)
+ stop_time = time.time()
+ runtime = stop_time - start_time
+ hours, remainder = divmod(runtime, 3600)
+ minutes, seconds = divmod(remainder, 60)
+
+ runtime_formatted = '%d:%02d:%06.3f' % (hours, minutes, seconds)
+ print(f"Run time: {runtime_formatted}s")
+ if ok:
+ found = eval_data()
+ print(f"Successful scan, {found} new files.")
+ if found:
+ print("Please check if a git push is necessary.")
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ usage=dedent(f"""\
+ {os.path.basename(my_name)} [-h] scan
+
+ Scan the COIN testresults website for embedded exists_{{platf}}_{{version}}_ci.py files.
+
+ Warning: On the first call, this script may take almost 30 minutes to run.
+ Subsequent calls are *much* faster due to caching.
+
+ For more information, see the file
+ sources/shiboken2/libshiboken/signature_doc.rst
+ """))
+ subparsers = parser.add_subparsers(dest="command", metavar="", title="required argument")
+ # create the parser for the "scan" command
+ parser_test = subparsers.add_parser("scan", help="run the scan")
+ args = parser.parse_args()
+ if not args.command:
+ parser.print_usage()
+ exit(1)
+ get_test_results("https://testresults.qt.io/coin/api/results/pyside/pyside-setup/")