summaryrefslogtreecommitdiff
path: root/fetch_data.py
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2022-06-24 18:55:08 +0200
committerMartin Fischer <martin@push-f.com>2022-06-24 19:21:59 +0200
commitc04faae8f0029ad905c0b55139e8ab5cb35ead93 (patch)
treeb739c81e44b156c915d6adaf16a782202f6973c3 /fetch_data.py
parent5cb14b038a4c6e19b5e9192b3ba459f0dbf02d70 (diff)
support length calculation for relations that aren't superrelations
Diffstat (limited to 'fetch_data.py')
-rwxr-xr-xfetch_data.py62
1 files changed, 40 insertions, 22 deletions
diff --git a/fetch_data.py b/fetch_data.py
index f3c63a3..d17fff2 100755
--- a/fetch_data.py
+++ b/fetch_data.py
@@ -2,6 +2,7 @@
import argparse
import sys
import json
+from collections.abc import Iterable
import pywikiapi
import mwparserfromhell
@@ -30,19 +31,19 @@ def find_template_calls(template_name):
doc = mwparserfromhell.parse(page['revisions'][0]['slots']['main']['content'])
for template in doc.filter_templates():
if template.name.matches(template_name):
- yield page, template.params
+ yield page, template
# step 1: find relation ids by looking for template calls
-relation_ids = set()
+relation_ids = {}
-for page, targs in find_template_calls('list relations'):
- if len(targs) == 0:
+for page, template in find_template_calls('list relations'):
+ if not template.has(1):
continue
- for arg in targs[0].split():
+ for arg in template.get(1).split():
if arg.isdigit():
- relation_ids.add(arg)
+ relation_ids[arg] = relation_ids.get(arg, False) or template.has('length')
else:
# TODO: only if verbose output is enabled
print(
@@ -57,32 +58,49 @@ sess.headers['user-agent'] = 'osmwiki-overpass-bridge'
relations = {}
-# TODO: split query if it gets too large
+overpass_info = None
-query = '[out:json]; ('
-for rel_id in relation_ids:
- query += f'relation({rel_id});'
-query += ');'
-query += 'out body;'
-res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query))
-res.raise_for_status()
+def query_overpass(relation_ids: Iterable[int], with_length: bool):
+ # TODO: split query if it gets too large
+ # TODO: calculate length for superrelations (not handled by the length Overpass operator)
-KEYS = ('name', 'name:en', 'wikidata', 'wikipedia', 'website')
+ query = '[out:json]; ('
+ for rel_id in relation_ids:
+ query += f'relation({rel_id});'
+ query += ');'
+ if with_length:
+ query += 'convert result ::=::, ::id=id(), length=length();'
+ query += 'out body;'
-res = res.json()
+ res = sess.get(OVERPASS_ENDPOINT, params=dict(data=query))
+ res.raise_for_status()
-for rel in res['elements']:
- relations[rel['id']] = dict(
- tags={k: v for k, v in rel['tags'].items() if k in KEYS}
- )
+ KEYS = ('name', 'name:en', 'wikidata', 'wikipedia', 'website')
+
+ res = res.json()
+
+ for rel in res['elements']:
+ data = dict(tags={k: v for k, v in rel['tags'].items() if k in KEYS})
+
+ if with_length:
+ data['length'] = float(rel['tags']['length'])
+
+ relations[rel['id']] = data
+
+ global overpass_info
+ overpass_info = res['osm3s']
+
+
+query_overpass((rel_id for rel_id, length in relation_ids.items() if not length), False)
+query_overpass((rel_id for rel_id, length in relation_ids.items() if length), True)
# step 3: serialize data as Lua
text = f'''\
-- This page is automatically generated by a Python script using the Overpass API.
--- {res['osm3s']['copyright']}
--- osm_base = {res['osm3s']['timestamp_osm_base']}
+-- {overpass_info['copyright']}
+-- osm_base = {overpass_info['timestamp_osm_base']}
return ''' + luadata.serialize(
dict(relations=relations)