diff options
| author | Martin Fischer <martin@push-f.com> | 2022-06-24 21:18:26 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2022-06-24 22:57:03 +0200 | 
| commit | 3df9bdd164b956d3dff2ecd5b5852eea9b0c9000 (patch) | |
| tree | 72628e5004679f501e9a79816f106679fe075a7e | |
| parent | 4836c2b42f6eb6c99b27375d00492b63e4500892 (diff) | |
also query length from wikidata
| -rwxr-xr-x | fetch_data.py | 34 | 
1 files changed, 33 insertions, 1 deletions
| diff --git a/fetch_data.py b/fetch_data.py index bf96ee9..3c77c78 100755 --- a/fetch_data.py +++ b/fetch_data.py @@ -9,9 +9,12 @@ import mwparserfromhell  import requests  import luadata  import tomli +from SPARQLWrapper import SPARQLWrapper, JSON +USER_AGENT = 'osmwiki-overpass-import'  OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php'  OVERPASS_ENDPOINT = 'https://lz4.overpass-api.de/api/interpreter' +WIKIDATA_ENDPOINT = 'https://query.wikidata.org/sparql'  osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT) @@ -54,7 +57,7 @@ for page, tpl in find_template_calls('list relations'):  # step 2: query overpass turbo about relations  sess = requests.session() -sess.headers['user-agent'] = 'osmwiki-overpass-bridge' +sess.headers['user-agent'] = USER_AGENT  relations = {} @@ -78,6 +81,23 @@ def get_length(route, relations):          return float(route['tags']['length']) +def query_lengths_from_wikidata(wikidata_ids): +    query = """SELECT ?id ?length WHERE { +      VALUES ?id { %s } +      ?id p:P2043/psn:P2043/wikibase:quantityAmount ?length  . +    }""" % ' '.join( +        f'wd:' + x for x in wikidata_ids +    ) + +    sparql = SPARQLWrapper(WIKIDATA_ENDPOINT, agent=USER_AGENT) +    sparql.setQuery(query) +    sparql.setReturnFormat(JSON) +    return { +        res['id']['value'].rsplit('/', maxsplit=1)[1]: float(res['length']['value']) +        for res in sparql.query().convert()["results"]["bindings"] +    } + +  def query_overpass(relation_ids: Collection[int], with_length: bool):      # TODO: split query if it gets too large @@ -112,12 +132,23 @@ def query_overpass(relation_ids: Collection[int], with_length: bool):                  if m_type == 'relation'              ] +        def wikidata_ids(): +            for rel_id in relation_ids: +                wikidata_id = all_relations[rel_id]['tags'].get('wikidata') +                if wikidata_id: +                    yield wikidata_id + +        wikidata_lengths = query_lengths_from_wikidata(wikidata_ids()) +      for rel_id in relation_ids:          rel = all_relations[rel_id]          data = dict(tags={k: v for k, v in rel['tags'].items() if k in KEYS})          if with_length:              data['mapped_length'] = get_length(rel, all_relations) +            wikidata_id = rel['tags'].get('wikidata') +            if wikidata_id in wikidata_lengths: +                data['wikidata_length'] = wikidata_lengths[wikidata_id]          relations[rel_id] = data @@ -133,6 +164,7 @@ query_overpass([rel_id for rel_id, length in relation_ids.items() if length], Tr  text = f'''\  -- This page is automatically generated by a Python script using the Overpass API.  -- {overpass_info['copyright']} +-- The wikidata_length data is queried from www.wikidata.org and available under Creative Commons CC0 License.  -- osm_base = {overpass_info['timestamp_osm_base']}  return ''' + luadata.serialize( | 
