From acb3f68a65d47843bbad25117804d8dec868376d Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Tue, 31 Jan 2023 22:15:37 +0100 Subject: [PATCH] add retry on link check --- scripts/check_links.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/scripts/check_links.py b/scripts/check_links.py index 08c5694..1193dbc 100755 --- a/scripts/check_links.py +++ b/scripts/check_links.py @@ -9,7 +9,9 @@ URL checking script import sys import re from urllib.parse import urlparse +from urllib3 import Retry import requests +from requests.adapters import HTTPAdapter RED = '\033[91m' @@ -19,7 +21,7 @@ BLUE = '\033[94m' MAGENTA = '\033[95m' RESET = '\033[0m' -TIMEOUT = 3 +TIMEOUT = 10 VALID_RET = [ 200, 302, @@ -55,6 +57,20 @@ def get_links(path): return urls +def get_session(): + """get a session with retry""" + session = requests.Session() + retry_on = [404, 429, 500, 502, 503, 504] + retry = Retry(total=3, + backoff_factor=1, + allowed_methods=False, + status_forcelist=retry_on) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + return session + + def check_links(urls): """check urls""" cnt = 0 @@ -93,10 +109,11 @@ def check_links(urls): ) print(msg) verb = 'get' - ret = requests.get(url, - timeout=TIMEOUT, - allow_redirects=True, - headers=HEADERS).status_code + sess = get_session() + ret = sess.get(url, + timeout=TIMEOUT, + allow_redirects=True, + headers=HEADERS).status_code if ret not in VALID_RET: print(f' {RED}[ERROR]{RESET} {url} returned {ret}') return False