diff options
Diffstat (limited to 'scrapers/at.py')
-rwxr-xr-x | scrapers/at.py | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/scrapers/at.py b/scrapers/at.py new file mode 100755 index 0000000..54b2402 --- /dev/null +++ b/scrapers/at.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +import datetime +import json +import math +from multiprocessing.dummy import Pool as ThreadPool + +import requests + +sess = requests.session() + +# API documentation: +# https://data.bka.gv.at/ris/api/v2.5/applications/bundesnormen + +def fetch_page(page): + res = sess.get('https://data.bka.gv.at/ris/api/v2.5/bundesnormen', params=dict( + Seitennummer=page, + DokumenteProSeite='OneHundred', + FassungVom=datetime.datetime.today().strftime('%Y-%m-%d'), + Abschnitt_Von=1 + )) + print(res.request.url) + data = res.json()['OgdSearchResult'] + + if 'Error' in data: + print(data) + return + + return data['OgdDocumentResults'] + +pages = [] +first = fetch_page(1) +pages.append(first) +page_count = math.ceil(int(first['Hits']['#text']) / 100) + +for page in ThreadPool(8).map(fetch_page, range(2, page_count+1)): + pages.append(page) + +normen = {} + +for page in pages: + for result in page['OgdDocumentReference']: + info = result['Data']['Metadaten']['Bundes-Landesnormen'] + if info['Typ'] in ('K', 'K (Geltungsbereich)'): + continue + if info['Typ'].startswith('Vertrag -'): + continue + normen[info['Gesetzesnummer']] = dict( + title=info['Kurztitel'], + url=info['GesamteRechtsvorschriftUrl'], + abbr=info.get('Abkuerzung') + ) + +with open('laws/at.json', 'w') as f: + json.dump(list(normen.values()), f, indent=2, ensure_ascii=False) |