diff options
| author | Martin Fischer <martin@push-f.com> | 2022-06-24 12:13:33 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2022-06-24 18:20:53 +0200 | 
| commit | 5cb14b038a4c6e19b5e9192b3ba459f0dbf02d70 (patch) | |
| tree | 515699c40db88eac34fef3aa90d6befeed3961f3 | |
initial commit
| -rw-r--r-- | .gitignore | 1 | ||||
| -rwxr-xr-x | fetch_data.py | 98 | 
2 files changed, 99 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c13fa2a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +credentials.toml diff --git a/fetch_data.py b/fetch_data.py new file mode 100755 index 0000000..f3c63a3 --- /dev/null +++ b/fetch_data.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +import argparse +import sys +import json + +import pywikiapi +import mwparserfromhell +import requests +import luadata +import tomli + +OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php' +OVERPASS_ENDPOINT = 'https://lz4.overpass-api.de/api/interpreter' + +osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT) + +parser = argparse.ArgumentParser() +parser.add_argument('--update', action='store_true') +args = parser.parse_args() + + +def find_template_calls(template_name): +    for page in osmwiki.query_pages( +        generator='embeddedin', +        geititle='Template:' + template_name, +        prop='revisions', +        rvprop='content', +        rvslots='main', +    ): +        doc = mwparserfromhell.parse(page['revisions'][0]['slots']['main']['content']) +        for template in doc.filter_templates(): +            if template.name.matches(template_name): +                yield page, template.params + + +# step 1: find relation ids by looking for template calls + +relation_ids = set() + +for page, targs in find_template_calls('list relations'): +    if len(targs) == 0: +        continue +    for arg in targs[0].split(): +        if arg.isdigit(): +            relation_ids.add(arg) +        else: +            # TODO: only if verbose output is enabled +            print( +                f'[warning] found unexpected argument "{arg}" in {page["title"]}', +                file=sys.stderr, +            ) + +# step 2: query overpass turbo about relations + +sess = requests.session() +sess.headers['user-agent'] = 'osmwiki-overpass-bridge' + +relations = {} + +# TODO: split query if it gets too large + +query = '[out:json]; (' +for rel_id in relation_ids: +    query += f'relation({rel_id});' +query += ');' +query += 'out body;' + +res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query)) +res.raise_for_status() + +KEYS = ('name', 'name:en', 'wikidata', 'wikipedia', 'website') + +res = res.json() + +for rel in res['elements']: +    relations[rel['id']] = dict( +        tags={k: v for k, v in rel['tags'].items() if k in KEYS} +    ) + +# step 3: serialize data as Lua + +text = f'''\ +-- This page is automatically generated by a Python script using the Overpass API. +-- {res['osm3s']['copyright']} +-- osm_base = {res['osm3s']['timestamp_osm_base']} + +return ''' + luadata.serialize( +    dict(relations=relations) +) + +if args.update: +    with open('credentials.toml', 'rb') as f: +        creds = tomli.load(f) +    osmwiki.login(creds['username'], creds['password']) +    csrf_token = osmwiki('query', meta='tokens')['query']['tokens']['csrftoken'] +    osmwiki('edit', title='Module:Report/data', text=text, token=csrf_token) +else: +    print(text)  | 
