From e29d27533725819ec3f6d05a27048d3d2627b53e Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Tue, 8 Apr 2025 19:25:36 +0200 Subject: refactor: port fetchers to Go * Austria: upgraded to RIS API v2.6 because v2.5 has been turned off --- scrapers/de.py | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100755 scrapers/de.py (limited to 'scrapers/de.py') diff --git a/scrapers/de.py b/scrapers/de.py deleted file mode 100755 index 9450143..0000000 --- a/scrapers/de.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python3 -import json -import urllib.parse - -import lxml.html -import requests - -laws = [] - -LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWYZ123456789' - -for idx, l in enumerate(LETTERS, 1): - print(f'fetching {idx}/{len(LETTERS)}') - url = 'https://www.gesetze-im-internet.de/Teilliste_{}.html'.format(l) - req = requests.get(url) - root = lxml.html.fromstring(req.text) - for el in root.get_element_by_id('paddingLR12'): - target = el[0].get('href').replace('index.html', '') - abbr = target.strip('/.') - laws.append(dict( - title = el[1].tail.strip(), - url = urllib.parse.urljoin(url, target), - abbr = abbr, - redir = abbr, - )) -with open('laws/de.json', 'w') as f: - json.dump(sorted(laws, key=lambda l: l['title']), f, indent=2, ensure_ascii=False) -- cgit v1.2.3