1
0
mirror of https://github.com/deadc0de6/dotdrop.git synced 2026-02-04 15:39:43 +00:00

adding doc links tests (ext and int)

This commit is contained in:
deadc0de6
2023-01-19 23:02:31 +01:00
parent 08bd0b9969
commit 71507a2604
3 changed files with 111 additions and 36 deletions

97
scripts/check_links.py Executable file
View File

@@ -0,0 +1,97 @@
#!/usr/bin/env python3
"""
author: deadc0de6 (https://github.com/deadc0de6)
Copyright (c) 2023, deadc0de6
URL checking script
"""
import sys
import re
from urllib.parse import urlparse
import requests
TIMEOUT = 3
VALID_RET = [
200,
302,
]
IGNORES = [
'badgen.net',
]
USER_AGENT = (
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/58.0.3029.110 Safari/537.36'
)
HEADERS = {
'User-Agent': USER_AGENT,
}
PATTERN = (
r"https?://[a-zA-Z0-9][a-zA-Z0-9-]{1,61}"
r"[a-zA-Z0-9]\.[=a-zA-Z0-9\_\/\?\&\%\+\#\.\-]+"
)
def get_links(path):
"""get a list of URLS"""
with open(path, encoding='utf-8') as file:
content = file.read()
entries = re.findall(PATTERN, content)
urls = list(set(entries))
return urls
def check_links(urls):
"""check urls"""
cnt = 0
ign = 0
for url in urls:
cnt += 1
hostname = urlparse(url).hostname
if hostname in IGNORES:
print(f' [IGN] {url}')
ign += 1
continue
verb = 'head'
ret = requests.head(url,
timeout=TIMEOUT,
allow_redirects=True,
headers=HEADERS).status_code
if ret not in VALID_RET:
verb = 'get'
ret = requests.get(url,
timeout=TIMEOUT,
allow_redirects=True,
headers=HEADERS).status_code
if ret not in VALID_RET:
print(f' [ERROR] {url} returned {ret}')
return False
print(f' [OK-{verb}-{ret}] {url}')
print(f'OK - total {cnt} links checked ({ign} ignored)')
return True
if __name__ == '__main__':
if len(sys.argv) < 2:
print(f'usage: {sys.argv[0]} <path>')
sys.exit(1)
print(f'checking {sys.argv[1]} for links...')
links = get_links(sys.argv[1])
print(f' found {len(links)} links')
try:
if not check_links(links):
sys.exit(1)
except ValueError as exc:
print(f'error {exc}')
sys.exit(1)
except urlparse.URLError as exc:
print(f'urlparse error {exc}')
sys.exit(1)
except requests.exceptions.RequestException as exc:
print(f'requests error {exc}')
sys.exit(1)
sys.exit(0)

47
test-doc.sh vendored
View File

@@ -2,46 +2,25 @@
# author: deadc0de6 (https://github.com/deadc0de6)
# Copyright (c) 2022, deadc0de6
## test the doc with linkcheckMarkdown
## pip install --user linkcheckmd
## test doc external links
find . -type f -iname '*.md' | while read -r line; do
./scripts/check_links.py "${line}"
done
## test the doc internal links
## https://github.com/remarkjs/remark-validate-links
## https://github.com/tcort/markdown-link-check
set +e
which linkcheckMarkdown >/dev/null 2>&1
which remark >/dev/null 2>&1
r="$?"
set -e
if [ "$r" != "0" ]; then
echo "[ERROR] install \"linkcheckMarkdown\" to test for dead links"
echo "[WARNING] install \"remark\" to test the doc"
exit 1
fi
find . -type f -iname '*.md' | while read line; do
echo "checking links in \"${line}\""
linkcheckMarkdown ${line}
find . -type f -iname '*.md' | while read -r line; do
remark -f -u validate-links "${line}"
done
### test the doc with remark
### https://github.com/remarkjs/remark-validate-links
#set +e
#which remark >/dev/null 2>&1
#r="$?"
#set -e
#if [ "$r" != "0" ]; then
# echo "[WARNING] install \"remark\" to test the doc"
#else
# remark -f -u validate-links docs/
# remark -f -u validate-links *.md
#fi
### test the doc with markdown-link-check
### https://github.com/tcort/markdown-link-check
#set +e
#which markdown-link-check >/dev/null 2>&1
#r="$?"
#set -e
#if [ "$r" != "0" ]; then
# echo "[WARNING] install \"markdown-link-check\" to test the doc"
#else
# for i in `find docs -iname '*.md'`; do markdown-link-check $i; done
# markdown-link-check README.md
#fi
echo "documentation OK"

View File

@@ -5,5 +5,4 @@ coveralls; python_version > '3.5'
pyflakes; python_version > '3.5'
pylint; python_version > '3.5'
halo; python_version > '3.5'
distro; python_version > '3.5'
linkcheckmd; python_version > '3.5'
distro; python_version > '3.5'