diff options
author | Martin Fischer <martin@push-f.com> | 2022-06-24 21:18:26 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2022-06-24 22:57:03 +0200 |
commit | 3df9bdd164b956d3dff2ecd5b5852eea9b0c9000 (patch) | |
tree | 72628e5004679f501e9a79816f106679fe075a7e | |
parent | 4836c2b42f6eb6c99b27375d00492b63e4500892 (diff) |
also query length from wikidata
-rwxr-xr-x | fetch_data.py | 34 |
1 files changed, 33 insertions, 1 deletions
diff --git a/fetch_data.py b/fetch_data.py index bf96ee9..3c77c78 100755 --- a/fetch_data.py +++ b/fetch_data.py @@ -9,9 +9,12 @@ import mwparserfromhell import requests import luadata import tomli +from SPARQLWrapper import SPARQLWrapper, JSON +USER_AGENT = 'osmwiki-overpass-import' OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php' OVERPASS_ENDPOINT = 'https://lz4.overpass-api.de/api/interpreter' +WIKIDATA_ENDPOINT = 'https://query.wikidata.org/sparql' osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT) @@ -54,7 +57,7 @@ for page, tpl in find_template_calls('list relations'): # step 2: query overpass turbo about relations sess = requests.session() -sess.headers['user-agent'] = 'osmwiki-overpass-bridge' +sess.headers['user-agent'] = USER_AGENT relations = {} @@ -78,6 +81,23 @@ def get_length(route, relations): return float(route['tags']['length']) +def query_lengths_from_wikidata(wikidata_ids): + query = """SELECT ?id ?length WHERE { + VALUES ?id { %s } + ?id p:P2043/psn:P2043/wikibase:quantityAmount ?length . + }""" % ' '.join( + f'wd:' + x for x in wikidata_ids + ) + + sparql = SPARQLWrapper(WIKIDATA_ENDPOINT, agent=USER_AGENT) + sparql.setQuery(query) + sparql.setReturnFormat(JSON) + return { + res['id']['value'].rsplit('/', maxsplit=1)[1]: float(res['length']['value']) + for res in sparql.query().convert()["results"]["bindings"] + } + + def query_overpass(relation_ids: Collection[int], with_length: bool): # TODO: split query if it gets too large @@ -112,12 +132,23 @@ def query_overpass(relation_ids: Collection[int], with_length: bool): if m_type == 'relation' ] + def wikidata_ids(): + for rel_id in relation_ids: + wikidata_id = all_relations[rel_id]['tags'].get('wikidata') + if wikidata_id: + yield wikidata_id + + wikidata_lengths = query_lengths_from_wikidata(wikidata_ids()) + for rel_id in relation_ids: rel = all_relations[rel_id] data = dict(tags={k: v for k, v in rel['tags'].items() if k in KEYS}) if with_length: data['mapped_length'] = get_length(rel, all_relations) + wikidata_id = rel['tags'].get('wikidata') + if wikidata_id in wikidata_lengths: + data['wikidata_length'] = wikidata_lengths[wikidata_id] relations[rel_id] = data @@ -133,6 +164,7 @@ query_overpass([rel_id for rel_id, length in relation_ids.items() if length], Tr text = f'''\ -- This page is automatically generated by a Python script using the Overpass API. -- {overpass_info['copyright']} +-- The wikidata_length data is queried from www.wikidata.org and available under Creative Commons CC0 License. -- osm_base = {overpass_info['timestamp_osm_base']} return ''' + luadata.serialize( |