diff options
author | Martin Fischer <martin@push-f.com> | 2025-04-08 19:25:36 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2025-04-14 07:04:45 +0200 |
commit | e29d27533725819ec3f6d05a27048d3d2627b53e (patch) | |
tree | 5afba50408b25179edb4ea6445acfe1d3e051488 /scrapers/de.py | |
parent | 96236c9d80cea2d6ba83591a7d08a8cc096fd8d3 (diff) |
refactor: port fetchers to Go
* Austria: upgraded to RIS API v2.6 because v2.5 has been turned off
Diffstat (limited to 'scrapers/de.py')
-rwxr-xr-x | scrapers/de.py | 27 |
1 files changed, 0 insertions, 27 deletions
diff --git a/scrapers/de.py b/scrapers/de.py deleted file mode 100755 index 9450143..0000000 --- a/scrapers/de.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python3 -import json -import urllib.parse - -import lxml.html -import requests - -laws = [] - -LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWYZ123456789' - -for idx, l in enumerate(LETTERS, 1): - print(f'fetching {idx}/{len(LETTERS)}') - url = 'https://www.gesetze-im-internet.de/Teilliste_{}.html'.format(l) - req = requests.get(url) - root = lxml.html.fromstring(req.text) - for el in root.get_element_by_id('paddingLR12'): - target = el[0].get('href').replace('index.html', '') - abbr = target.strip('/.') - laws.append(dict( - title = el[1].tail.strip(), - url = urllib.parse.urljoin(url, target), - abbr = abbr, - redir = abbr, - )) -with open('laws/de.json', 'w') as f: - json.dump(sorted(laws, key=lambda l: l['title']), f, indent=2, ensure_ascii=False) |