|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +# Simple verifier for qubes-translated repository content sanity. |
| 4 | +# This script looks at frontmatter of each file mostly checks that: |
| 5 | +# - lang: key is present and matches the directory name |
| 6 | +# - ref: key is present |
| 7 | +# - url-related parameters (redirect_from, permalink) are properly language-scoped |
| 8 | +# - no unexpected entries in the frontmatter are present |
| 9 | +# |
| 10 | +# Besides that, verifies if no unexpected files are present. |
| 11 | +# |
| 12 | +# Usage: |
| 13 | +# $0 <qubes-translated dir> |
| 14 | +# Supported env variables: |
| 15 | +# - TRANSLATED_LANGS - list of expected languages |
| 16 | + |
| 17 | +import argparse |
| 18 | +import os |
| 19 | +import string |
| 20 | +import yaml |
| 21 | + |
| 22 | +TRANSLATED_LANGS = ['de'] |
| 23 | +if 'TRANSLATED_LANGS' in os.environ: |
| 24 | + TRANSLATED_LANGS = os.environ['TRANSLATED_LANGS'].split() |
| 25 | + |
| 26 | +SAFE_PATH_CHARS = string.ascii_letters + string.digits + '/_-.' |
| 27 | + |
| 28 | +ALLOWED_FRONTMATTER_KEYS = ['title', 'lang', 'ref', 'permalink', 'redirect_from', 'redirect_to', 'layout', 'model'] |
| 29 | + |
| 30 | +MANDATORY_FRONTMATTER_KEYS = ['title', 'lang', 'ref', 'layout'] |
| 31 | + |
| 32 | +ALLOWED_EXTERNAL_REDIRECT_TARGETS = ['https://github.com/Qubes-Community/Contents/blob/master/'] |
| 33 | + |
| 34 | +ALLOWED_LAYOUTS = ['doc', 'doc-index', 'doc-full', 'hcl', 'downloads', 'news', 'team', 'home', 'default', 'experts', 'sidebar'] |
| 35 | + |
| 36 | +#TODO: consider allowing some layouts only on some pages (and consider their redirect_from too) |
| 37 | + |
| 38 | +parser = argparse.ArgumentParser() |
| 39 | +parser.add_argument('directory') |
| 40 | + |
| 41 | +class VerificationError(Exception): |
| 42 | + def __init__(self, path, msg): |
| 43 | + safe_name = ''.join(l if l in SAFE_PATH_CHARS else '?' for l in path) |
| 44 | + super().__init__('{}: {}'.format(path, msg)) |
| 45 | + |
| 46 | +def verify_readme(path): |
| 47 | + with open(path) as f: |
| 48 | + readme_text = f.read() |
| 49 | + |
| 50 | + if '---' in readme_text: |
| 51 | + raise VerificationError(path, 'may not contain frontmatter') |
| 52 | + if '<' in readme_text: |
| 53 | + raise VerificationError(path, 'may not contain HTML') |
| 54 | + if '{' in readme_text: |
| 55 | + raise VerificationError(path, 'may not contain liquid templates') |
| 56 | + |
| 57 | + |
| 58 | +def verify_md_file(lang, path): |
| 59 | + with open(path) as f: |
| 60 | + file_content = f.read() |
| 61 | + |
| 62 | + # there must be frontmatter |
| 63 | + if not file_content.startswith('---\n'): |
| 64 | + raise VerificationError(path, 'missing frontmatter') |
| 65 | + |
| 66 | + # better be more strict - may catch too much (if another separator is |
| 67 | + # used), but then loading yaml will detect multiple documents |
| 68 | + frontmatter_text = file_content[4:].split('\n---\n')[0] |
| 69 | + # there could be _just_ frontmatter too |
| 70 | + if frontmatter_text.endswith('\n---'): |
| 71 | + frontmatter_text = frontmatter_text[:-4] |
| 72 | + try: |
| 73 | + frontmatter = yaml.safe_load(frontmatter_text) |
| 74 | + except Exception as e: |
| 75 | + raise VerificationError(path, 'failed to parse frontmatter: {!s}'.format(e)) from e |
| 76 | + verify_frontmatter(lang, path, frontmatter) |
| 77 | + |
| 78 | + |
| 79 | +def verify_frontmatter(lang, path, frontmatter): |
| 80 | + # double check if all entries were verified |
| 81 | + verified = [] |
| 82 | + |
| 83 | + url_prefix = '/{}/'.format(lang) |
| 84 | + if any(key not in ALLOWED_FRONTMATTER_KEYS for key in frontmatter): |
| 85 | + raise VerificationError(path, 'unexpected frontmatter key') |
| 86 | + |
| 87 | + for key in MANDATORY_FRONTMATTER_KEYS: |
| 88 | + if key not in frontmatter: |
| 89 | + raise VerificationError(path, key + ' missing in frontmatter') |
| 90 | + |
| 91 | + if lang != frontmatter['lang']: |
| 92 | + raise VerificationError(path, 'lang mismatch') |
| 93 | + |
| 94 | + verified.append('lang') |
| 95 | + |
| 96 | + if not isinstance(frontmatter['ref'], int): |
| 97 | + raise VerificationError(path, 'invalid ref format') |
| 98 | + |
| 99 | + verified.append('ref') |
| 100 | + |
| 101 | + if 'permalink' in frontmatter: |
| 102 | + if not frontmatter['permalink'].startswith(url_prefix): |
| 103 | + raise VerificationError(path, 'invalid permalink') |
| 104 | + |
| 105 | + verified.append('permalink') |
| 106 | + |
| 107 | + if 'redirect_from' in frontmatter: |
| 108 | + if isinstance(frontmatter['redirect_from'], str): |
| 109 | + redirect_from = [frontmatter['redirect_from']] |
| 110 | + elif isinstance(frontmatter['redirect_from'], list): |
| 111 | + redirect_from = frontmatter['redirect_from'] |
| 112 | + else: |
| 113 | + raise VerificationError(path, 'invalid redirect_from format') |
| 114 | + |
| 115 | + for url in redirect_from: |
| 116 | + if '/..' in url: |
| 117 | + raise VerificationError(path, '.. in url') |
| 118 | + if not url.startswith(url_prefix): |
| 119 | + raise VerificationError(path, 'invalid redirect_from') |
| 120 | + |
| 121 | + verified.append('redirect_from') |
| 122 | + |
| 123 | + if 'redirect_to' in frontmatter: |
| 124 | + url = frontmatter['redirect_to'] |
| 125 | + if isinstance(url, list): |
| 126 | + if len(url) != 1: |
| 127 | + raise VerificationError(path, 'if redirect_to is a list, must be 1-element') |
| 128 | + url = url[0] |
| 129 | + if not isinstance(url, str): |
| 130 | + raise VerificationError(path, 'invalid redirect_to format') |
| 131 | + if '/..' in url: |
| 132 | + raise VerificationError(path, '.. in redirect_to') |
| 133 | + if not any(url.startswith(prefix) for prefix in ALLOWED_EXTERNAL_REDIRECT_TARGETS + [url_prefix]): |
| 134 | + raise VerificationError(path, 'forbidden redirect_to target') |
| 135 | + |
| 136 | + verified.append('redirect_to') |
| 137 | + |
| 138 | + if frontmatter['layout'] not in ALLOWED_LAYOUTS: |
| 139 | + raise VerificationError(path, 'forbidden layout') |
| 140 | + |
| 141 | + verified.append('layout') |
| 142 | + |
| 143 | + title = frontmatter['title'] |
| 144 | + if not isinstance(title, str): |
| 145 | + raise VerificationError(path, 'invalid title format') |
| 146 | + |
| 147 | + # avoid HTML in title |
| 148 | + if '<' in title or '%' in title: |
| 149 | + raise VerificationError(path, 'invalid character in title') |
| 150 | + |
| 151 | + verified.append('title') |
| 152 | + |
| 153 | + # if 'model' is present, must have 'all' value |
| 154 | + if 'model' in frontmatter: |
| 155 | + if frontmatter['model'] != 'all': |
| 156 | + raise VerificationError(path, 'invalid model value') |
| 157 | + verified.append('model') |
| 158 | + |
| 159 | + # intentionally compare lists, not sets, to catch duplicates too |
| 160 | + if sorted(verified) != sorted(frontmatter): |
| 161 | + raise VerificationError(path, 'BUG, some frontmatter entries were not verified') |
| 162 | + |
| 163 | + # all is ok |
| 164 | + |
| 165 | + |
| 166 | +def verify_lang(lang, path): |
| 167 | + for dirpath, dirnames, filenames in os.walk(path): |
| 168 | + for filename in filenames: |
| 169 | + filepath = os.path.join(dirpath, filename) |
| 170 | + if any(c not in SAFE_PATH_CHARS for c in filename): |
| 171 | + raise VerificationError(filepath, 'unsafe characters in filename') |
| 172 | + |
| 173 | + if filename.endswith('.md'): |
| 174 | + verify_md_file(lang, filepath) |
| 175 | + elif filename.endswith('.html'): |
| 176 | + # the frontmatter is expected the same |
| 177 | + verify_md_file(lang, filepath) |
| 178 | + elif filename.endswith('.yml'): |
| 179 | + # those are loaded scoped anyway, so can mess only own language |
| 180 | + pass |
| 181 | + else: |
| 182 | + raise VerificationError(filepath, 'unexpected file type') |
| 183 | + |
| 184 | + |
| 185 | + |
| 186 | +def main(): |
| 187 | + args = parser.parse_args() |
| 188 | + |
| 189 | + for lang in os.listdir(args.directory): |
| 190 | + if lang == '.git': |
| 191 | + pass |
| 192 | + elif lang == 'README.md': |
| 193 | + verify_readme(os.path.join(args.directory, lang)) |
| 194 | + elif lang in TRANSLATED_LANGS: |
| 195 | + verify_lang(lang, os.path.join(args.directory, lang)) |
| 196 | + else: |
| 197 | + raise VerificationError(lang, 'unexpected language dir') |
| 198 | + |
| 199 | + |
| 200 | +if __name__ == '__main__': |
| 201 | + main() |
0 commit comments