diff options
Diffstat (limited to 'fetch_data.py')
-rwxr-xr-x | fetch_data.py | 59 |
1 files changed, 46 insertions, 13 deletions
diff --git a/fetch_data.py b/fetch_data.py index d17fff2..8739fbf 100755 --- a/fetch_data.py +++ b/fetch_data.py @@ -2,7 +2,7 @@ import argparse import sys import json -from collections.abc import Iterable +from collections.abc import Collection import pywikiapi import mwparserfromhell @@ -38,12 +38,12 @@ def find_template_calls(template_name): relation_ids = {} -for page, template in find_template_calls('list relations'): - if not template.has(1): +for page, tpl in find_template_calls('list relations'): + if not tpl.has(1): continue - for arg in template.get(1).split(): + for arg in tpl.get(1).split(): if arg.isdigit(): - relation_ids[arg] = relation_ids.get(arg, False) or template.has('length') + relation_ids[int(arg)] = relation_ids.get(arg, False) or tpl.has('length') else: # TODO: only if verbose output is enabled print( @@ -61,16 +61,36 @@ relations = {} overpass_info = None -def query_overpass(relation_ids: Iterable[int], with_length: bool): +def get_length(route, relations): + """ + The length operator of Overpass QL does not support superrelations + (relations containing relations) and just returns 0 for those, + so we need to implement our own recursion. + """ + if route['members']: + # If there are any members of type relation we recurse, + # ensuring that we still report a sensible result when + # ways are mistakenly added directly to superrelations. + + # TODO: gracefully handle KeyError exception + return sum(get_length(relations[m], relations) for m in route['members']) + else: + return float(route['tags']['length']) + + +def query_overpass(relation_ids: Collection[int], with_length: bool): # TODO: split query if it gets too large - # TODO: calculate length for superrelations (not handled by the length Overpass operator) query = '[out:json]; (' for rel_id in relation_ids: query += f'relation({rel_id});' + if with_length: + query += 'rel(r);' # 1. recursion + query += 'rel(r);' # 2. recursion + query += 'rel(r);' # 3. recursion query += ');' if with_length: - query += 'convert result ::=::, ::id=id(), length=length();' + query += 'convert result ::=::, ::id=id(), length=length(), member_ids=per_member(ref()), member_types=per_member(mtype());' query += 'out body;' res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query)) @@ -80,20 +100,33 @@ def query_overpass(relation_ids: Iterable[int], with_length: bool): res = res.json() - for rel in res['elements']: + all_relations = {rel['id']: rel for rel in res['elements']} + + if with_length: + for rel in all_relations.values(): + member_ids = [int(x) for x in rel['tags']['member_ids'].split(';')] + member_types = rel['tags']['member_types'].split(';') + rel['members'] = [ + m_id + for m_id, m_type in zip(member_ids, member_types) + if m_type == 'relation' + ] + + for rel_id in relation_ids: + rel = all_relations[rel_id] data = dict(tags={k: v for k, v in rel['tags'].items() if k in KEYS}) if with_length: - data['length'] = float(rel['tags']['length']) + data['length'] = get_length(rel, all_relations) - relations[rel['id']] = data + relations[rel_id] = data global overpass_info overpass_info = res['osm3s'] -query_overpass((rel_id for rel_id, length in relation_ids.items() if not length), False) -query_overpass((rel_id for rel_id, length in relation_ids.items() if length), True) +query_overpass([rel_id for rel_id, length in relation_ids.items() if not length], False) +query_overpass([rel_id for rel_id, length in relation_ids.items() if length], True) # step 3: serialize data as Lua |