diff options
-rwxr-xr-x | fetch_data.py | 62 |
1 files changed, 40 insertions, 22 deletions
diff --git a/fetch_data.py b/fetch_data.py index f3c63a3..d17fff2 100755 --- a/fetch_data.py +++ b/fetch_data.py @@ -2,6 +2,7 @@ import argparse import sys import json +from collections.abc import Iterable import pywikiapi import mwparserfromhell @@ -30,19 +31,19 @@ def find_template_calls(template_name): doc = mwparserfromhell.parse(page['revisions'][0]['slots']['main']['content']) for template in doc.filter_templates(): if template.name.matches(template_name): - yield page, template.params + yield page, template # step 1: find relation ids by looking for template calls -relation_ids = set() +relation_ids = {} -for page, targs in find_template_calls('list relations'): - if len(targs) == 0: +for page, template in find_template_calls('list relations'): + if not template.has(1): continue - for arg in targs[0].split(): + for arg in template.get(1).split(): if arg.isdigit(): - relation_ids.add(arg) + relation_ids[arg] = relation_ids.get(arg, False) or template.has('length') else: # TODO: only if verbose output is enabled print( @@ -57,32 +58,49 @@ sess.headers['user-agent'] = 'osmwiki-overpass-bridge' relations = {} -# TODO: split query if it gets too large +overpass_info = None -query = '[out:json]; (' -for rel_id in relation_ids: - query += f'relation({rel_id});' -query += ');' -query += 'out body;' -res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query)) -res.raise_for_status() +def query_overpass(relation_ids: Iterable[int], with_length: bool): + # TODO: split query if it gets too large + # TODO: calculate length for superrelations (not handled by the length Overpass operator) -KEYS = ('name', 'name:en', 'wikidata', 'wikipedia', 'website') + query = '[out:json]; (' + for rel_id in relation_ids: + query += f'relation({rel_id});' + query += ');' + if with_length: + query += 'convert result ::=::, ::id=id(), length=length();' + query += 'out body;' -res = res.json() + res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query)) + res.raise_for_status() -for rel in res['elements']: - relations[rel['id']] = dict( - tags={k: v for k, v in rel['tags'].items() if k in KEYS} - ) + KEYS = ('name', 'name:en', 'wikidata', 'wikipedia', 'website') + + res = res.json() + + for rel in res['elements']: + data = dict(tags={k: v for k, v in rel['tags'].items() if k in KEYS}) + + if with_length: + data['length'] = float(rel['tags']['length']) + + relations[rel['id']] = data + + global overpass_info + overpass_info = res['osm3s'] + + +query_overpass((rel_id for rel_id, length in relation_ids.items() if not length), False) +query_overpass((rel_id for rel_id, length in relation_ids.items() if length), True) # step 3: serialize data as Lua text = f'''\ -- This page is automatically generated by a Python script using the Overpass API. --- {res['osm3s']['copyright']} --- osm_base = {res['osm3s']['timestamp_osm_base']} +-- {overpass_info['copyright']} +-- osm_base = {overpass_info['timestamp_osm_base']} return ''' + luadata.serialize( dict(relations=relations) |