summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2022-06-24 12:13:33 +0200
committerMartin Fischer <martin@push-f.com>2022-06-24 18:20:53 +0200
commit5cb14b038a4c6e19b5e9192b3ba459f0dbf02d70 (patch)
tree515699c40db88eac34fef3aa90d6befeed3961f3
initial commit
-rw-r--r--.gitignore1
-rwxr-xr-xfetch_data.py98
2 files changed, 99 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c13fa2a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+credentials.toml
diff --git a/fetch_data.py b/fetch_data.py
new file mode 100755
index 0000000..f3c63a3
--- /dev/null
+++ b/fetch_data.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+import argparse
+import sys
+import json
+
+import pywikiapi
+import mwparserfromhell
+import requests
+import luadata
+import tomli
+
+OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php'
+OVERPASS_ENDPOINT = 'https://lz4.overpass-api.de/api/interpreter'
+
+osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--update', action='store_true')
+args = parser.parse_args()
+
+
+def find_template_calls(template_name):
+ for page in osmwiki.query_pages(
+ generator='embeddedin',
+ geititle='Template:' + template_name,
+ prop='revisions',
+ rvprop='content',
+ rvslots='main',
+ ):
+ doc = mwparserfromhell.parse(page['revisions'][0]['slots']['main']['content'])
+ for template in doc.filter_templates():
+ if template.name.matches(template_name):
+ yield page, template.params
+
+
+# step 1: find relation ids by looking for template calls
+
+relation_ids = set()
+
+for page, targs in find_template_calls('list relations'):
+ if len(targs) == 0:
+ continue
+ for arg in targs[0].split():
+ if arg.isdigit():
+ relation_ids.add(arg)
+ else:
+ # TODO: only if verbose output is enabled
+ print(
+ f'[warning] found unexpected argument "{arg}" in {page["title"]}',
+ file=sys.stderr,
+ )
+
+# step 2: query overpass turbo about relations
+
+sess = requests.session()
+sess.headers['user-agent'] = 'osmwiki-overpass-bridge'
+
+relations = {}
+
+# TODO: split query if it gets too large
+
+query = '[out:json]; ('
+for rel_id in relation_ids:
+ query += f'relation({rel_id});'
+query += ');'
+query += 'out body;'
+
+res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query))
+res.raise_for_status()
+
+KEYS = ('name', 'name:en', 'wikidata', 'wikipedia', 'website')
+
+res = res.json()
+
+for rel in res['elements']:
+ relations[rel['id']] = dict(
+ tags={k: v for k, v in rel['tags'].items() if k in KEYS}
+ )
+
+# step 3: serialize data as Lua
+
+text = f'''\
+-- This page is automatically generated by a Python script using the Overpass API.
+-- {res['osm3s']['copyright']}
+-- osm_base = {res['osm3s']['timestamp_osm_base']}
+
+return ''' + luadata.serialize(
+ dict(relations=relations)
+)
+
+if args.update:
+ with open('credentials.toml', 'rb') as f:
+ creds = tomli.load(f)
+ osmwiki.login(creds['username'], creds['password'])
+ csrf_token = osmwiki('query', meta='tokens')['query']['tokens']['csrftoken']
+ osmwiki('edit', title='Module:Report/data', text=text, token=csrf_token)
+else:
+ print(text)