From 3df9bdd164b956d3dff2ecd5b5852eea9b0c9000 Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Fri, 24 Jun 2022 21:18:26 +0200 Subject: also query length from wikidata --- fetch_data.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'fetch_data.py') diff --git a/fetch_data.py b/fetch_data.py index bf96ee9..3c77c78 100755 --- a/fetch_data.py +++ b/fetch_data.py @@ -9,9 +9,12 @@ import mwparserfromhell import requests import luadata import tomli +from SPARQLWrapper import SPARQLWrapper, JSON +USER_AGENT = 'osmwiki-overpass-import' OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php' OVERPASS_ENDPOINT = 'https://lz4.overpass-api.de/api/interpreter' +WIKIDATA_ENDPOINT = 'https://query.wikidata.org/sparql' osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT) @@ -54,7 +57,7 @@ for page, tpl in find_template_calls('list relations'): # step 2: query overpass turbo about relations sess = requests.session() -sess.headers['user-agent'] = 'osmwiki-overpass-bridge' +sess.headers['user-agent'] = USER_AGENT relations = {} @@ -78,6 +81,23 @@ def get_length(route, relations): return float(route['tags']['length']) +def query_lengths_from_wikidata(wikidata_ids): + query = """SELECT ?id ?length WHERE { + VALUES ?id { %s } + ?id p:P2043/psn:P2043/wikibase:quantityAmount ?length . + }""" % ' '.join( + f'wd:' + x for x in wikidata_ids + ) + + sparql = SPARQLWrapper(WIKIDATA_ENDPOINT, agent=USER_AGENT) + sparql.setQuery(query) + sparql.setReturnFormat(JSON) + return { + res['id']['value'].rsplit('/', maxsplit=1)[1]: float(res['length']['value']) + for res in sparql.query().convert()["results"]["bindings"] + } + + def query_overpass(relation_ids: Collection[int], with_length: bool): # TODO: split query if it gets too large @@ -112,12 +132,23 @@ def query_overpass(relation_ids: Collection[int], with_length: bool): if m_type == 'relation' ] + def wikidata_ids(): + for rel_id in relation_ids: + wikidata_id = all_relations[rel_id]['tags'].get('wikidata') + if wikidata_id: + yield wikidata_id + + wikidata_lengths = query_lengths_from_wikidata(wikidata_ids()) + for rel_id in relation_ids: rel = all_relations[rel_id] data = dict(tags={k: v for k, v in rel['tags'].items() if k in KEYS}) if with_length: data['mapped_length'] = get_length(rel, all_relations) + wikidata_id = rel['tags'].get('wikidata') + if wikidata_id in wikidata_lengths: + data['wikidata_length'] = wikidata_lengths[wikidata_id] relations[rel_id] = data @@ -133,6 +164,7 @@ query_overpass([rel_id for rel_id, length in relation_ids.items() if length], Tr text = f'''\ -- This page is automatically generated by a Python script using the Overpass API. -- {overpass_info['copyright']} +-- The wikidata_length data is queried from www.wikidata.org and available under Creative Commons CC0 License. -- osm_base = {overpass_info['timestamp_osm_base']} return ''' + luadata.serialize( -- cgit v1.2.3