summaryrefslogtreecommitdiff
path: root/fetch_data.py
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2022-06-24 21:18:26 +0200
committerMartin Fischer <martin@push-f.com>2022-06-24 22:57:03 +0200
commit3df9bdd164b956d3dff2ecd5b5852eea9b0c9000 (patch)
tree72628e5004679f501e9a79816f106679fe075a7e /fetch_data.py
parent4836c2b42f6eb6c99b27375d00492b63e4500892 (diff)
also query length from wikidata
Diffstat (limited to 'fetch_data.py')
-rwxr-xr-xfetch_data.py34
1 files changed, 33 insertions, 1 deletions
diff --git a/fetch_data.py b/fetch_data.py
index bf96ee9..3c77c78 100755
--- a/fetch_data.py
+++ b/fetch_data.py
@@ -9,9 +9,12 @@ import mwparserfromhell
import requests
import luadata
import tomli
+from SPARQLWrapper import SPARQLWrapper, JSON
+USER_AGENT = 'osmwiki-overpass-import'
OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php'
OVERPASS_ENDPOINT = 'https://lz4.overpass-api.de/api/interpreter'
+WIKIDATA_ENDPOINT = 'https://query.wikidata.org/sparql'
osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT)
@@ -54,7 +57,7 @@ for page, tpl in find_template_calls('list relations'):
# step 2: query overpass turbo about relations
sess = requests.session()
-sess.headers['user-agent'] = 'osmwiki-overpass-bridge'
+sess.headers['user-agent'] = USER_AGENT
relations = {}
@@ -78,6 +81,23 @@ def get_length(route, relations):
return float(route['tags']['length'])
+def query_lengths_from_wikidata(wikidata_ids):
+ query = """SELECT ?id ?length WHERE {
+ VALUES ?id { %s }
+ ?id p:P2043/psn:P2043/wikibase:quantityAmount ?length .
+ }""" % ' '.join(
+ f'wd:' + x for x in wikidata_ids
+ )
+
+ sparql = SPARQLWrapper(WIKIDATA_ENDPOINT, agent=USER_AGENT)
+ sparql.setQuery(query)
+ sparql.setReturnFormat(JSON)
+ return {
+ res['id']['value'].rsplit('/', maxsplit=1)[1]: float(res['length']['value'])
+ for res in sparql.query().convert()["results"]["bindings"]
+ }
+
+
def query_overpass(relation_ids: Collection[int], with_length: bool):
# TODO: split query if it gets too large
@@ -112,12 +132,23 @@ def query_overpass(relation_ids: Collection[int], with_length: bool):
if m_type == 'relation'
]
+ def wikidata_ids():
+ for rel_id in relation_ids:
+ wikidata_id = all_relations[rel_id]['tags'].get('wikidata')
+ if wikidata_id:
+ yield wikidata_id
+
+ wikidata_lengths = query_lengths_from_wikidata(wikidata_ids())
+
for rel_id in relation_ids:
rel = all_relations[rel_id]
data = dict(tags={k: v for k, v in rel['tags'].items() if k in KEYS})
if with_length:
data['mapped_length'] = get_length(rel, all_relations)
+ wikidata_id = rel['tags'].get('wikidata')
+ if wikidata_id in wikidata_lengths:
+ data['wikidata_length'] = wikidata_lengths[wikidata_id]
relations[rel_id] = data
@@ -133,6 +164,7 @@ query_overpass([rel_id for rel_id, length in relation_ids.items() if length], Tr
text = f'''\
-- This page is automatically generated by a Python script using the Overpass API.
-- {overpass_info['copyright']}
+-- The wikidata_length data is queried from www.wikidata.org and available under Creative Commons CC0 License.
-- osm_base = {overpass_info['timestamp_osm_base']}
return ''' + luadata.serialize(