From 71507a260450730da3abb11e3bc1f6a49511b3e6 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Thu, 19 Jan 2023 23:02:31 +0100 Subject: [PATCH] adding doc links tests (ext and int) --- scripts/check_links.py | 97 ++++++++++++++++++++++++++++++++++++++++++ test-doc.sh | 47 ++++++-------------- tests-requirements.txt | 3 +- 3 files changed, 111 insertions(+), 36 deletions(-) create mode 100755 scripts/check_links.py diff --git a/scripts/check_links.py b/scripts/check_links.py new file mode 100755 index 0000000..ba6799b --- /dev/null +++ b/scripts/check_links.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +""" +author: deadc0de6 (https://github.com/deadc0de6) +Copyright (c) 2023, deadc0de6 + +URL checking script +""" + +import sys +import re +from urllib.parse import urlparse +import requests + + +TIMEOUT = 3 +VALID_RET = [ + 200, + 302, +] +IGNORES = [ + 'badgen.net', +] +USER_AGENT = ( + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/58.0.3029.110 Safari/537.36' +) +HEADERS = { + 'User-Agent': USER_AGENT, +} +PATTERN = ( + r"https?://[a-zA-Z0-9][a-zA-Z0-9-]{1,61}" + r"[a-zA-Z0-9]\.[=a-zA-Z0-9\_\/\?\&\%\+\#\.\-]+" +) + + +def get_links(path): + """get a list of URLS""" + with open(path, encoding='utf-8') as file: + content = file.read() + entries = re.findall(PATTERN, content) + urls = list(set(entries)) + return urls + + +def check_links(urls): + """check urls""" + cnt = 0 + ign = 0 + for url in urls: + cnt += 1 + hostname = urlparse(url).hostname + if hostname in IGNORES: + print(f' [IGN] {url}') + ign += 1 + continue + + verb = 'head' + ret = requests.head(url, + timeout=TIMEOUT, + allow_redirects=True, + headers=HEADERS).status_code + if ret not in VALID_RET: + verb = 'get' + ret = requests.get(url, + timeout=TIMEOUT, + allow_redirects=True, + headers=HEADERS).status_code + if ret not in VALID_RET: + print(f' [ERROR] {url} returned {ret}') + return False + print(f' [OK-{verb}-{ret}] {url}') + print(f'OK - total {cnt} links checked ({ign} ignored)') + return True + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print(f'usage: {sys.argv[0]} ') + sys.exit(1) + + print(f'checking {sys.argv[1]} for links...') + links = get_links(sys.argv[1]) + print(f' found {len(links)} links') + try: + if not check_links(links): + sys.exit(1) + except ValueError as exc: + print(f'error {exc}') + sys.exit(1) + except urlparse.URLError as exc: + print(f'urlparse error {exc}') + sys.exit(1) + except requests.exceptions.RequestException as exc: + print(f'requests error {exc}') + sys.exit(1) + sys.exit(0) diff --git a/test-doc.sh b/test-doc.sh index 898b2a4..d2aa2dc 100755 --- a/test-doc.sh +++ b/test-doc.sh @@ -2,46 +2,25 @@ # author: deadc0de6 (https://github.com/deadc0de6) # Copyright (c) 2022, deadc0de6 -## test the doc with linkcheckMarkdown -## pip install --user linkcheckmd +## test doc external links +find . -type f -iname '*.md' | while read -r line; do + ./scripts/check_links.py "${line}" +done + +## test the doc internal links +## https://github.com/remarkjs/remark-validate-links +## https://github.com/tcort/markdown-link-check set +e -which linkcheckMarkdown >/dev/null 2>&1 +which remark >/dev/null 2>&1 r="$?" set -e if [ "$r" != "0" ]; then - echo "[ERROR] install \"linkcheckMarkdown\" to test for dead links" + echo "[WARNING] install \"remark\" to test the doc" exit 1 fi -find . -type f -iname '*.md' | while read line; do - echo "checking links in \"${line}\"" - linkcheckMarkdown ${line} + +find . -type f -iname '*.md' | while read -r line; do + remark -f -u validate-links "${line}" done - -### test the doc with remark -### https://github.com/remarkjs/remark-validate-links -#set +e -#which remark >/dev/null 2>&1 -#r="$?" -#set -e -#if [ "$r" != "0" ]; then -# echo "[WARNING] install \"remark\" to test the doc" -#else -# remark -f -u validate-links docs/ -# remark -f -u validate-links *.md -#fi - -### test the doc with markdown-link-check -### https://github.com/tcort/markdown-link-check -#set +e -#which markdown-link-check >/dev/null 2>&1 -#r="$?" -#set -e -#if [ "$r" != "0" ]; then -# echo "[WARNING] install \"markdown-link-check\" to test the doc" -#else -# for i in `find docs -iname '*.md'`; do markdown-link-check $i; done -# markdown-link-check README.md -#fi - echo "documentation OK" \ No newline at end of file diff --git a/tests-requirements.txt b/tests-requirements.txt index 65e131d..b1f2e91 100644 --- a/tests-requirements.txt +++ b/tests-requirements.txt @@ -5,5 +5,4 @@ coveralls; python_version > '3.5' pyflakes; python_version > '3.5' pylint; python_version > '3.5' halo; python_version > '3.5' -distro; python_version > '3.5' -linkcheckmd; python_version > '3.5' \ No newline at end of file +distro; python_version > '3.5' \ No newline at end of file