diff options
author | Martin Fischer <martin@push-f.com> | 2022-06-24 12:13:33 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2022-06-24 18:20:53 +0200 |
commit | 5cb14b038a4c6e19b5e9192b3ba459f0dbf02d70 (patch) | |
tree | 515699c40db88eac34fef3aa90d6befeed3961f3 |
initial commit
-rw-r--r-- | .gitignore | 1 | ||||
-rwxr-xr-x | fetch_data.py | 98 |
2 files changed, 99 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c13fa2a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +credentials.toml diff --git a/fetch_data.py b/fetch_data.py new file mode 100755 index 0000000..f3c63a3 --- /dev/null +++ b/fetch_data.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +import argparse +import sys +import json + +import pywikiapi +import mwparserfromhell +import requests +import luadata +import tomli + +OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php' +OVERPASS_ENDPOINT = 'https://lz4.overpass-api.de/api/interpreter' + +osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT) + +parser = argparse.ArgumentParser() +parser.add_argument('--update', action='store_true') +args = parser.parse_args() + + +def find_template_calls(template_name): + for page in osmwiki.query_pages( + generator='embeddedin', + geititle='Template:' + template_name, + prop='revisions', + rvprop='content', + rvslots='main', + ): + doc = mwparserfromhell.parse(page['revisions'][0]['slots']['main']['content']) + for template in doc.filter_templates(): + if template.name.matches(template_name): + yield page, template.params + + +# step 1: find relation ids by looking for template calls + +relation_ids = set() + +for page, targs in find_template_calls('list relations'): + if len(targs) == 0: + continue + for arg in targs[0].split(): + if arg.isdigit(): + relation_ids.add(arg) + else: + # TODO: only if verbose output is enabled + print( + f'[warning] found unexpected argument "{arg}" in {page["title"]}', + file=sys.stderr, + ) + +# step 2: query overpass turbo about relations + +sess = requests.session() +sess.headers['user-agent'] = 'osmwiki-overpass-bridge' + +relations = {} + +# TODO: split query if it gets too large + +query = '[out:json]; (' +for rel_id in relation_ids: + query += f'relation({rel_id});' +query += ');' +query += 'out body;' + +res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query)) +res.raise_for_status() + +KEYS = ('name', 'name:en', 'wikidata', 'wikipedia', 'website') + +res = res.json() + +for rel in res['elements']: + relations[rel['id']] = dict( + tags={k: v for k, v in rel['tags'].items() if k in KEYS} + ) + +# step 3: serialize data as Lua + +text = f'''\ +-- This page is automatically generated by a Python script using the Overpass API. +-- {res['osm3s']['copyright']} +-- osm_base = {res['osm3s']['timestamp_osm_base']} + +return ''' + luadata.serialize( + dict(relations=relations) +) + +if args.update: + with open('credentials.toml', 'rb') as f: + creds = tomli.load(f) + osmwiki.login(creds['username'], creds['password']) + csrf_token = osmwiki('query', meta='tokens')['query']['tokens']['csrftoken'] + osmwiki('edit', title='Module:Report/data', text=text, token=csrf_token) +else: + print(text) |