blob: 7e0cd1f19edd77a515a629688da9ddf207067fe0 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
#!/usr/bin/env python3
import json
import urllib.parse
import lxml.html
import requests
laws = []
LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWYZ123456789'
for idx, l in enumerate(LETTERS, 1):
print(f'fetching {idx}/{len(LETTERS)}')
url = 'https://www.gesetze-im-internet.de/Teilliste_{}.html'.format(l)
req = requests.get(url)
root = lxml.html.fromstring(req.text)
for el in root.get_element_by_id('paddingLR12'):
target = el[0].get('href').replace('index.html', '')
abbr = target.strip('/.')
laws.append(dict(
title = el[1].tail.strip(),
url = urllib.parse.urljoin(url, target),
abbr = abbr,
))
with open('laws/de.json', 'w') as f:
json.dump(sorted(laws, key=lambda l: l['title']), f, indent=2, ensure_ascii=False)
|