Skip to content

Commit 02a168a

Browse files
committed
Add scripts to fetch+update qubes-translated submodule
Fetch the update made by _utils/transifex-* scripts. The transifex-pull script performs quite detailed inspections and apply fixups. At this stage apply only a basic sanity check with the sole purpose: do not allow translated content to subvert origin english one.
1 parent d1f7fa9 commit 02a168a

File tree

2 files changed

+225
-0
lines changed

2 files changed

+225
-0
lines changed

_utils/update-translated

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
tmpbranch="new-$$"
6+
git -C _translated fetch origin master:"$tmpbranch"
7+
tmpdir=$(mktemp -d)
8+
trap 'rm -rf $tmpdir; git -C _translated br -D $tmpbranch' EXIT
9+
git clone --shared "$PWD/_translated" -b "$tmpbranch" "$tmpdir/translated"
10+
11+
if ! _utils/verify-translated "$tmpdir/translated"; then
12+
echo "Translated content did not pass sanity check, not updating" >&2
13+
# TODO: consider some louder alert? email? issue on github?
14+
exit 1
15+
fi
16+
17+
git -C _translated merge --ff-only "$tmpbranch"
18+
git add _translated
19+
20+
git commit -m 'autoupdate: _translated'
21+
commit_id=$(git show --pretty=format:%H|head -1)
22+
tag_name=auto_${commit_id:0:8}
23+
git tag -s -m "Automatic tag for commit $commit_id" "$tag_name"
24+
git push origin master $tag_name

_utils/verify-translated

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#!/usr/bin/env python3
2+
3+
# Simple verifier for qubes-translated repository content sanity.
4+
# This script looks at frontmatter of each file mostly checks that:
5+
# - lang: key is present and matches the directory name
6+
# - ref: key is present
7+
# - url-related parameters (redirect_from, permalink) are properly language-scoped
8+
# - no unexpected entries in the frontmatter are present
9+
#
10+
# Besides that, verifies if no unexpected files are present.
11+
#
12+
# Usage:
13+
# $0 <qubes-translated dir>
14+
# Supported env variables:
15+
# - TRANSLATED_LANGS - list of expected languages
16+
17+
import argparse
18+
import os
19+
import string
20+
import yaml
21+
22+
TRANSLATED_LANGS = ['de']
23+
if 'TRANSLATED_LANGS' in os.environ:
24+
TRANSLATED_LANGS = os.environ['TRANSLATED_LANGS'].split()
25+
26+
SAFE_PATH_CHARS = string.ascii_letters + string.digits + '/_-.'
27+
28+
ALLOWED_FRONTMATTER_KEYS = ['title', 'lang', 'ref', 'permalink', 'redirect_from', 'redirect_to', 'layout', 'model']
29+
30+
MANDATORY_FRONTMATTER_KEYS = ['title', 'lang', 'ref', 'layout']
31+
32+
ALLOWED_EXTERNAL_REDIRECT_TARGETS = ['https://github.com/Qubes-Community/Contents/blob/master/']
33+
34+
ALLOWED_LAYOUTS = ['doc', 'doc-index', 'doc-full', 'hcl', 'downloads', 'news', 'team', 'home', 'default', 'experts', 'sidebar']
35+
36+
#TODO: consider allowing some layouts only on some pages (and consider their redirect_from too)
37+
38+
parser = argparse.ArgumentParser()
39+
parser.add_argument('directory')
40+
41+
class VerificationError(Exception):
42+
def __init__(self, path, msg):
43+
safe_name = ''.join(l if l in SAFE_PATH_CHARS else '?' for l in path)
44+
super().__init__('{}: {}'.format(path, msg))
45+
46+
def verify_readme(path):
47+
with open(path) as f:
48+
readme_text = f.read()
49+
50+
if '---' in readme_text:
51+
raise VerificationError(path, 'may not contain frontmatter')
52+
if '<' in readme_text:
53+
raise VerificationError(path, 'may not contain HTML')
54+
if '{' in readme_text:
55+
raise VerificationError(path, 'may not contain liquid templates')
56+
57+
58+
def verify_md_file(lang, path):
59+
with open(path) as f:
60+
file_content = f.read()
61+
62+
# there must be frontmatter
63+
if not file_content.startswith('---\n'):
64+
raise VerificationError(path, 'missing frontmatter')
65+
66+
# better be more strict - may catch too much (if another separator is
67+
# used), but then loading yaml will detect multiple documents
68+
frontmatter_text = file_content[4:].split('\n---\n')[0]
69+
# there could be _just_ frontmatter too
70+
if frontmatter_text.endswith('\n---'):
71+
frontmatter_text = frontmatter_text[:-4]
72+
try:
73+
frontmatter = yaml.safe_load(frontmatter_text)
74+
except Exception as e:
75+
raise VerificationError(path, 'failed to parse frontmatter: {!s}'.format(e)) from e
76+
verify_frontmatter(lang, path, frontmatter)
77+
78+
79+
def verify_frontmatter(lang, path, frontmatter):
80+
# double check if all entries were verified
81+
verified = []
82+
83+
url_prefix = '/{}/'.format(lang)
84+
if any(key not in ALLOWED_FRONTMATTER_KEYS for key in frontmatter):
85+
raise VerificationError(path, 'unexpected frontmatter key')
86+
87+
for key in MANDATORY_FRONTMATTER_KEYS:
88+
if key not in frontmatter:
89+
raise VerificationError(path, key + ' missing in frontmatter')
90+
91+
if lang != frontmatter['lang']:
92+
raise VerificationError(path, 'lang mismatch')
93+
94+
verified.append('lang')
95+
96+
if not isinstance(frontmatter['ref'], int):
97+
raise VerificationError(path, 'invalid ref format')
98+
99+
verified.append('ref')
100+
101+
if 'permalink' in frontmatter:
102+
if not frontmatter['permalink'].startswith(url_prefix):
103+
raise VerificationError(path, 'invalid permalink')
104+
105+
verified.append('permalink')
106+
107+
if 'redirect_from' in frontmatter:
108+
if isinstance(frontmatter['redirect_from'], str):
109+
redirect_from = [frontmatter['redirect_from']]
110+
elif isinstance(frontmatter['redirect_from'], list):
111+
redirect_from = frontmatter['redirect_from']
112+
else:
113+
raise VerificationError(path, 'invalid redirect_from format')
114+
115+
for url in redirect_from:
116+
if '/..' in url:
117+
raise VerificationError(path, '.. in url')
118+
if not url.startswith(url_prefix):
119+
raise VerificationError(path, 'invalid redirect_from')
120+
121+
verified.append('redirect_from')
122+
123+
if 'redirect_to' in frontmatter:
124+
url = frontmatter['redirect_to']
125+
if isinstance(url, list):
126+
if len(url) != 1:
127+
raise VerificationError(path, 'if redirect_to is a list, must be 1-element')
128+
url = url[0]
129+
if not isinstance(url, str):
130+
raise VerificationError(path, 'invalid redirect_to format')
131+
if '/..' in url:
132+
raise VerificationError(path, '.. in redirect_to')
133+
if not any(url.startswith(prefix) for prefix in ALLOWED_EXTERNAL_REDIRECT_TARGETS + [url_prefix]):
134+
raise VerificationError(path, 'forbidden redirect_to target')
135+
136+
verified.append('redirect_to')
137+
138+
if frontmatter['layout'] not in ALLOWED_LAYOUTS:
139+
raise VerificationError(path, 'forbidden layout')
140+
141+
verified.append('layout')
142+
143+
title = frontmatter['title']
144+
if not isinstance(title, str):
145+
raise VerificationError(path, 'invalid title format')
146+
147+
# avoid HTML in title
148+
if '<' in title or '%' in title:
149+
raise VerificationError(path, 'invalid character in title')
150+
151+
verified.append('title')
152+
153+
# if 'model' is present, must have 'all' value
154+
if 'model' in frontmatter:
155+
if frontmatter['model'] != 'all':
156+
raise VerificationError(path, 'invalid model value')
157+
verified.append('model')
158+
159+
# intentionally compare lists, not sets, to catch duplicates too
160+
if sorted(verified) != sorted(frontmatter):
161+
raise VerificationError(path, 'BUG, some frontmatter entries were not verified')
162+
163+
# all is ok
164+
165+
166+
def verify_lang(lang, path):
167+
for dirpath, dirnames, filenames in os.walk(path):
168+
for filename in filenames:
169+
filepath = os.path.join(dirpath, filename)
170+
if any(c not in SAFE_PATH_CHARS for c in filename):
171+
raise VerificationError(filepath, 'unsafe characters in filename')
172+
173+
if filename.endswith('.md'):
174+
verify_md_file(lang, filepath)
175+
elif filename.endswith('.html'):
176+
# the frontmatter is expected the same
177+
verify_md_file(lang, filepath)
178+
elif filename.endswith('.yml'):
179+
# those are loaded scoped anyway, so can mess only own language
180+
pass
181+
else:
182+
raise VerificationError(filepath, 'unexpected file type')
183+
184+
185+
186+
def main():
187+
args = parser.parse_args()
188+
189+
for lang in os.listdir(args.directory):
190+
if lang == '.git':
191+
pass
192+
elif lang == 'README.md':
193+
verify_readme(os.path.join(args.directory, lang))
194+
elif lang in TRANSLATED_LANGS:
195+
verify_lang(lang, os.path.join(args.directory, lang))
196+
else:
197+
raise VerificationError(lang, 'unexpected language dir')
198+
199+
200+
if __name__ == '__main__':
201+
main()

0 commit comments

Comments
 (0)