summaryrefslogtreecommitdiff
path: root/fetch_data.py
blob: d17fff262c31e2c0870386d058d885b98c63fb13 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python3
import argparse
import sys
import json
from collections.abc import Iterable

import pywikiapi
import mwparserfromhell
import requests
import luadata
import tomli

OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php'
OVERPASS_ENDPOINT = 'https://lz4.overpass-api.de/api/interpreter'

osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT)

parser = argparse.ArgumentParser()
parser.add_argument('--update', action='store_true')
args = parser.parse_args()


def find_template_calls(template_name):
    for page in osmwiki.query_pages(
        generator='embeddedin',
        geititle='Template:' + template_name,
        prop='revisions',
        rvprop='content',
        rvslots='main',
    ):
        doc = mwparserfromhell.parse(page['revisions'][0]['slots']['main']['content'])
        for template in doc.filter_templates():
            if template.name.matches(template_name):
                yield page, template


# step 1: find relation ids by looking for template calls

relation_ids = {}

for page, template in find_template_calls('list relations'):
    if not template.has(1):
        continue
    for arg in template.get(1).split():
        if arg.isdigit():
            relation_ids[arg] = relation_ids.get(arg, False) or template.has('length')
        else:
            # TODO: only if verbose output is enabled
            print(
                f'[warning] found unexpected argument "{arg}" in {page["title"]}',
                file=sys.stderr,
            )

# step 2: query overpass turbo about relations

sess = requests.session()
sess.headers['user-agent'] = 'osmwiki-overpass-bridge'

relations = {}

overpass_info = None


def query_overpass(relation_ids: Iterable[int], with_length: bool):
    # TODO: split query if it gets too large
    # TODO: calculate length for superrelations (not handled by the length Overpass operator)

    query = '[out:json]; ('
    for rel_id in relation_ids:
        query += f'relation({rel_id});'
    query += ');'
    if with_length:
        query += 'convert result ::=::, ::id=id(), length=length();'
    query += 'out body;'

    res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query))
    res.raise_for_status()

    KEYS = ('name', 'name:en', 'wikidata', 'wikipedia', 'website')

    res = res.json()

    for rel in res['elements']:
        data = dict(tags={k: v for k, v in rel['tags'].items() if k in KEYS})

        if with_length:
            data['length'] = float(rel['tags']['length'])

        relations[rel['id']] = data

    global overpass_info
    overpass_info = res['osm3s']


query_overpass((rel_id for rel_id, length in relation_ids.items() if not length), False)
query_overpass((rel_id for rel_id, length in relation_ids.items() if length), True)

# step 3: serialize data as Lua

text = f'''\
-- This page is automatically generated by a Python script using the Overpass API.
-- {overpass_info['copyright']}
-- osm_base = {overpass_info['timestamp_osm_base']}

return ''' + luadata.serialize(
    dict(relations=relations)
)

if args.update:
    with open('credentials.toml', 'rb') as f:
        creds = tomli.load(f)
    osmwiki.login(creds['username'], creds['password'])
    csrf_token = osmwiki('query', meta='tokens')['query']['tokens']['csrftoken']
    osmwiki('edit', title='Module:Report/data', text=text, token=csrf_token)
else:
    print(text)