summaryrefslogtreecommitdiff
path: root/fetch_data.py
blob: f3c63a388c0d6a7ef4722decda6782cd2d090ff7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
import argparse
import sys
import json

import pywikiapi
import mwparserfromhell
import requests
import luadata
import tomli

OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php'
OVERPASS_ENDPOINT = 'https://lz4.overpass-api.de/api/interpreter'

osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT)

parser = argparse.ArgumentParser()
parser.add_argument('--update', action='store_true')
args = parser.parse_args()


def find_template_calls(template_name):
    for page in osmwiki.query_pages(
        generator='embeddedin',
        geititle='Template:' + template_name,
        prop='revisions',
        rvprop='content',
        rvslots='main',
    ):
        doc = mwparserfromhell.parse(page['revisions'][0]['slots']['main']['content'])
        for template in doc.filter_templates():
            if template.name.matches(template_name):
                yield page, template.params


# step 1: find relation ids by looking for template calls

relation_ids = set()

for page, targs in find_template_calls('list relations'):
    if len(targs) == 0:
        continue
    for arg in targs[0].split():
        if arg.isdigit():
            relation_ids.add(arg)
        else:
            # TODO: only if verbose output is enabled
            print(
                f'[warning] found unexpected argument "{arg}" in {page["title"]}',
                file=sys.stderr,
            )

# step 2: query overpass turbo about relations

sess = requests.session()
sess.headers['user-agent'] = 'osmwiki-overpass-bridge'

relations = {}

# TODO: split query if it gets too large

query = '[out:json]; ('
for rel_id in relation_ids:
    query += f'relation({rel_id});'
query += ');'
query += 'out body;'

res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query))
res.raise_for_status()

KEYS = ('name', 'name:en', 'wikidata', 'wikipedia', 'website')

res = res.json()

for rel in res['elements']:
    relations[rel['id']] = dict(
        tags={k: v for k, v in rel['tags'].items() if k in KEYS}
    )

# step 3: serialize data as Lua

text = f'''\
-- This page is automatically generated by a Python script using the Overpass API.
-- {res['osm3s']['copyright']}
-- osm_base = {res['osm3s']['timestamp_osm_base']}

return ''' + luadata.serialize(
    dict(relations=relations)
)

if args.update:
    with open('credentials.toml', 'rb') as f:
        creds = tomli.load(f)
    osmwiki.login(creds['username'], creds['password'])
    csrf_token = osmwiki('query', meta='tokens')['query']['tokens']['csrftoken']
    osmwiki('edit', title='Module:Report/data', text=text, token=csrf_token)
else:
    print(text)