diff options
Diffstat (limited to 'osm_proposals')
-rwxr-xr-x | osm_proposals/archived_without_template.py | 34 | ||||
-rwxr-xr-x | osm_proposals/proposals.py | 187 |
2 files changed, 221 insertions, 0 deletions
diff --git a/osm_proposals/archived_without_template.py b/osm_proposals/archived_without_template.py new file mode 100755 index 0000000..ea026d3 --- /dev/null +++ b/osm_proposals/archived_without_template.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +""" +Queries wiki.openstreetmap.org for archived proposal pages without the {{Proposal page}} template. + +Sometimes when archiving a page people accidentally also replace the +{{Proposal page}} template, which however means that proposal.py +won't find the page anymore. This script lists such pages so that the +template can be manually restored. +""" +import argparse + +import pywikiapi +import mwparserfromhell + +OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php' + + +def run(): + arg_parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + arg_parser.parse_args() + + osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT) + + for page in osmwiki.query_pages( + generator='categorymembers', + gcmtitle='Category:Archived proposals', + gcmlimit='max', + prop='templates', + tltemplates='Template:Proposal page', + ): + if not 'templates' in page: + print(page['title']) diff --git a/osm_proposals/proposals.py b/osm_proposals/proposals.py new file mode 100755 index 0000000..c111699 --- /dev/null +++ b/osm_proposals/proposals.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +"""Queries wiki.openstreetmap.org for proposals and writes a JSON list of them to the given file.""" +import argparse +import html +import json +import sys +from collections.abc import Container + +import pywikiapi +import mwparserfromhell +import requests + +OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php' + +# https://wiki.openstreetmap.org/w/index.php?title=Template:Proposal_page&action=edit + + +def run(): + arg_parser = argparse.ArgumentParser(description=__doc__) + arg_parser.add_argument("out_file") + args = arg_parser.parse_args() + + res = requests.get( + OSMWIKI_ENDPOINT, + params=dict( + action='expandtemplates', + prop='wikitext', + format='json', + text='{{#invoke:languages/table|json}}', + ), + ) + langs: dict[str, dict] = json.loads(res.json()['expandtemplates']['wikitext']) + + osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT) + + proposals = [] + for page in osmwiki.query_pages( + generator='embeddedin', + geititle='Template:Proposal page', + geilimit='max', + prop='revisions', + rvprop='content', + rvslots='main', + ): + proposal = parse_proposal(page, langs) + if proposal: + proposals.append(proposal) + + proposals.sort(key=sort_key, reverse=True) + + with open(args.out_file, 'w') as f: + json.dump([{k: v for k, v in p.items() if v is not None} for p in proposals], f) + + +def get_template_val(tpl, name): + param = tpl.get(name, None) + if param: + value = param.value.strip() + if value: + # turn empty strings into None + return value + + +def eprint(*args): + print(*args, file=sys.stderr) + + +def is_stub(doc): + if any( + doc.ifilter_templates(matches=lambda t: t.name.matches('Archived proposal')) + ): + return False + + if not any(doc.ifilter_headings()): + # detect proposals without headings as stubs + return True + + if not any( + n + for n in doc.nodes + # any text + if isinstance(n, mwparserfromhell.nodes.text.Text) + # other than newlines + and n.strip() + # and "Please comment on the [[{{TALKPAGENAME}}|discussion page]]." + and n.strip() not in ('Please comment on the', '.') + ): + # detect proposals without text as stubs + return True + + return False + + +def parse_proposal(page: dict, langs: Container[str]) -> dict | None: + page_title = page['title'] + text = page['revisions'][0]['slots']['main']['content'] + doc = mwparserfromhell.parse(text) + proposal_page_templates = doc.filter_templates( + matches=lambda t: t.name.matches('Proposal page') + or t.name.matches('Proposal Page') + ) + + if not proposal_page_templates: + eprint('{{Proposal Page}} not found in', page_title) + return None + + for comment in doc.ifilter_comments(): + # remove comments like <!-- Date the RFC email is sent to the Tagging list: YYYY-MM-DD --> + doc.remove(comment) + + tpl = proposal_page_templates[0] + + status = get_template_val(tpl, 'status') + if status: + status = status.lower() + + if is_stub(doc): + if status in ('approved', 'rejected'): + eprint(f'WARNING {status} proposal is a stub', page['title']) + else: + eprint('skipping stub', page['title']) + return None + + name = get_template_val(tpl, 'name') + if name: + name = html.unescape(name) + + draft_start = get_template_val(tpl, 'draftStartDate') + if draft_start in ('*', '-'): + draft_start = None + + rfc_start = get_template_val(tpl, 'rfcStartDate') + if rfc_start in ('*', '-'): + rfc_start = None + + vote_start = get_template_val(tpl, 'voteStartDate') + if vote_start in ('*', '-'): + vote_start = None + + definition = get_template_val(tpl, 'definition') + users = get_template_val(tpl, 'users') or get_template_val(tpl, 'user') + + parts = page_title.split(':', maxsplit=1) + parts[0] = parts[0].lower() + + lang = None + if parts[0] in langs: + lang = parts[0] + + return dict( + page_title=page_title, + lang=lang, + name=name, + status=status, + definition=definition, + draft_start=draft_start, + rfc_start=rfc_start, + vote_start=vote_start, + authors=users, + ) + + +STATUSES = { + 'voting': 0, + 'post-vote': 1, + 'proposed': 2, + 'draft': 3, + 'approved': 4, + 'inactive': 5, + 'rejected': 6, + 'abandoned': 7, + 'canceled': 8, + 'obsoleted': 9, +} + + +def sort_key(proposal): + status = proposal['status'] + + if status in ('voting', 'approved', 'rejected'): + date = proposal['vote_start'] or '' + elif status == 'proposed': + date = proposal['rfc_start'] or '' + else: + date = proposal['draft_start'] or '' + + return (-STATUSES.get(proposal['status'], 10), date) |