summaryrefslogtreecommitdiff
path: root/scrapers
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2021-03-03 16:38:37 +0100
committerMartin Fischer <martin@push-f.com>2021-03-03 16:41:28 +0100
commit30099b5d9b616b820341b70582f26685597cebe5 (patch)
treea86d86629b7bf260bf056e6e9c8c3e024aab0d04 /scrapers
parent294ec05490f639c08d69d010f769b8d60aa3650d (diff)
differentiate between abbreviations and redirects
Diffstat (limited to 'scrapers')
-rwxr-xr-xscrapers/at.py10
-rwxr-xr-xscrapers/de.py1
2 files changed, 11 insertions, 0 deletions
diff --git a/scrapers/at.py b/scrapers/at.py
index bc4d874..1af9894 100755
--- a/scrapers/at.py
+++ b/scrapers/at.py
@@ -50,6 +50,16 @@ for page in pages:
)
if 'Abkuerzung' in info:
data['abbr'] = info['Abkuerzung'].strip()
+ data['redir'] = data['abbr'].lower()\
+ .replace(')', '')\
+ .replace('(', '')\
+ .replace(' – ', '-')\
+ .replace(' ', '-')\
+ .replace('\xa0', '-')\
+ .replace('ä', 'ae')\
+ .replace('ü', 'ue')\
+ .replace('ö', 'oe')\
+ .replace('ß', 'ss')
normen[info['Gesetzesnummer']] = data
with open('laws/at.json', 'w') as f:
diff --git a/scrapers/de.py b/scrapers/de.py
index 7e0cd1f..9450143 100755
--- a/scrapers/de.py
+++ b/scrapers/de.py
@@ -21,6 +21,7 @@ for idx, l in enumerate(LETTERS, 1):
title = el[1].tail.strip(),
url = urllib.parse.urljoin(url, target),
abbr = abbr,
+ redir = abbr,
))
with open('laws/de.json', 'w') as f:
json.dump(sorted(laws, key=lambda l: l['title']), f, indent=2, ensure_ascii=False)