summaryrefslogtreecommitdiff
path: root/scrapers/at.py
diff options
context:
space:
mode:
Diffstat (limited to 'scrapers/at.py')
-rwxr-xr-xscrapers/at.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/scrapers/at.py b/scrapers/at.py
new file mode 100755
index 0000000..54b2402
--- /dev/null
+++ b/scrapers/at.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+import datetime
+import json
+import math
+from multiprocessing.dummy import Pool as ThreadPool
+
+import requests
+
+sess = requests.session()
+
+# API documentation:
+# https://data.bka.gv.at/ris/api/v2.5/applications/bundesnormen
+
+def fetch_page(page):
+ res = sess.get('https://data.bka.gv.at/ris/api/v2.5/bundesnormen', params=dict(
+ Seitennummer=page,
+ DokumenteProSeite='OneHundred',
+ FassungVom=datetime.datetime.today().strftime('%Y-%m-%d'),
+ Abschnitt_Von=1
+ ))
+ print(res.request.url)
+ data = res.json()['OgdSearchResult']
+
+ if 'Error' in data:
+ print(data)
+ return
+
+ return data['OgdDocumentResults']
+
+pages = []
+first = fetch_page(1)
+pages.append(first)
+page_count = math.ceil(int(first['Hits']['#text']) / 100)
+
+for page in ThreadPool(8).map(fetch_page, range(2, page_count+1)):
+ pages.append(page)
+
+normen = {}
+
+for page in pages:
+ for result in page['OgdDocumentReference']:
+ info = result['Data']['Metadaten']['Bundes-Landesnormen']
+ if info['Typ'] in ('K', 'K (Geltungsbereich)'):
+ continue
+ if info['Typ'].startswith('Vertrag -'):
+ continue
+ normen[info['Gesetzesnummer']] = dict(
+ title=info['Kurztitel'],
+ url=info['GesamteRechtsvorschriftUrl'],
+ abbr=info.get('Abkuerzung')
+ )
+
+with open('laws/at.json', 'w') as f:
+ json.dump(list(normen.values()), f, indent=2, ensure_ascii=False)