summaryrefslogtreecommitdiff
path: root/scrapers/de.py
diff options
context:
space:
mode:
Diffstat (limited to 'scrapers/de.py')
-rwxr-xr-xscrapers/de.py27
1 files changed, 0 insertions, 27 deletions
diff --git a/scrapers/de.py b/scrapers/de.py
deleted file mode 100755
index 9450143..0000000
--- a/scrapers/de.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python3
-import json
-import urllib.parse
-
-import lxml.html
-import requests
-
-laws = []
-
-LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWYZ123456789'
-
-for idx, l in enumerate(LETTERS, 1):
- print(f'fetching {idx}/{len(LETTERS)}')
- url = 'https://www.gesetze-im-internet.de/Teilliste_{}.html'.format(l)
- req = requests.get(url)
- root = lxml.html.fromstring(req.text)
- for el in root.get_element_by_id('paddingLR12'):
- target = el[0].get('href').replace('index.html', '')
- abbr = target.strip('/.')
- laws.append(dict(
- title = el[1].tail.strip(),
- url = urllib.parse.urljoin(url, target),
- abbr = abbr,
- redir = abbr,
- ))
-with open('laws/de.json', 'w') as f:
- json.dump(sorted(laws, key=lambda l: l['title']), f, indent=2, ensure_ascii=False)