summaryrefslogtreecommitdiff
path: root/scrapers/at.py
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2025-04-08 19:25:36 +0200
committerMartin Fischer <martin@push-f.com>2025-04-14 07:04:45 +0200
commite29d27533725819ec3f6d05a27048d3d2627b53e (patch)
tree5afba50408b25179edb4ea6445acfe1d3e051488 /scrapers/at.py
parent96236c9d80cea2d6ba83591a7d08a8cc096fd8d3 (diff)
refactor: port fetchers to Go
* Austria: upgraded to RIS API v2.6 because v2.5 has been turned off
Diffstat (limited to 'scrapers/at.py')
-rwxr-xr-xscrapers/at.py66
1 files changed, 0 insertions, 66 deletions
diff --git a/scrapers/at.py b/scrapers/at.py
deleted file mode 100755
index 1af9894..0000000
--- a/scrapers/at.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env python3
-import datetime
-import json
-import math
-from multiprocessing.dummy import Pool as ThreadPool
-
-import requests
-
-sess = requests.session()
-
-# API documentation:
-# https://data.bka.gv.at/ris/api/v2.5/applications/bundesnormen
-
-def fetch_page(page):
- res = sess.get('https://data.bka.gv.at/ris/api/v2.5/bundesnormen', params=dict(
- Seitennummer=page,
- DokumenteProSeite='OneHundred',
- FassungVom=datetime.datetime.today().strftime('%Y-%m-%d'),
- Abschnitt_Von=1
- ))
- print(res.request.url)
- data = res.json()['OgdSearchResult']
-
- if 'Error' in data:
- print(data)
- return
-
- return data['OgdDocumentResults']
-
-pages = []
-first = fetch_page(1)
-pages.append(first)
-page_count = math.ceil(int(first['Hits']['#text']) / 100)
-
-for page in ThreadPool(8).map(fetch_page, range(2, page_count+1)):
- pages.append(page)
-
-normen = {}
-
-for page in pages:
- for result in page['OgdDocumentReference']:
- info = result['Data']['Metadaten']['Bundes-Landesnormen']
- if info['Typ'] in ('K', 'K (Geltungsbereich)'):
- continue
- if info['Typ'].startswith('Vertrag -'):
- continue
- data = dict(
- title=info['Kurztitel'].strip(),
- url=info['GesamteRechtsvorschriftUrl'],
- )
- if 'Abkuerzung' in info:
- data['abbr'] = info['Abkuerzung'].strip()
- data['redir'] = data['abbr'].lower()\
- .replace(')', '')\
- .replace('(', '')\
- .replace(' – ', '-')\
- .replace(' ', '-')\
- .replace('\xa0', '-')\
- .replace('ä', 'ae')\
- .replace('ü', 'ue')\
- .replace('ö', 'oe')\
- .replace('ß', 'ss')
- normen[info['Gesetzesnummer']] = data
-
-with open('laws/at.json', 'w') as f:
- json.dump(list(normen.values()), f, indent=2, ensure_ascii=False)