2 files changed, 221 insertions, 0 deletions
diff --git a/osm_proposals/archived_without_template.py b/osm_proposals/archived_without_template.py
new file mode 100755
index 0000000..ea026d3
--- /dev/null
+++ b/osm_proposals/archived_without_template.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+"""
+Queries wiki.openstreetmap.org for archived proposal pages without the {{Proposal page}} template.
+
+Sometimes when archiving a page people accidentally also replace the
+{{Proposal page}} template, which however means that proposal.py
+won't find the page anymore. This script lists such pages so that the
+template can be manually restored.
+"""
+import argparse
+
+import pywikiapi
+import mwparserfromhell
+
+OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php'
+
+
+def run():
+    arg_parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    arg_parser.parse_args()
+
+    osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT)
+
+    for page in osmwiki.query_pages(
+        generator='categorymembers',
+        gcmtitle='Category:Archived proposals',
+        gcmlimit='max',
+        prop='templates',
+        tltemplates='Template:Proposal page',
+    ):
+        if not 'templates' in page:
+            print(page['title'])
diff --git a/osm_proposals/proposals.py b/osm_proposals/proposals.py
new file mode 100755
index 0000000..c111699
--- /dev/null
+++ b/osm_proposals/proposals.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""Queries wiki.openstreetmap.org for proposals and writes a JSON list of them to the given file."""
+import argparse
+import html
+import json
+import sys
+from collections.abc import Container
+
+import pywikiapi
+import mwparserfromhell
+import requests
+
+OSMWIKI_ENDPOINT = 'https://wiki.openstreetmap.org/w/api.php'
+
+# https://wiki.openstreetmap.org/w/index.php?title=Template:Proposal_page&action=edit
+
+
+def run():
+    arg_parser = argparse.ArgumentParser(description=__doc__)
+    arg_parser.add_argument("out_file")
+    args = arg_parser.parse_args()
+
+    res = requests.get(
+        OSMWIKI_ENDPOINT,
+        params=dict(
+            action='expandtemplates',
+            prop='wikitext',
+            format='json',
+            text='{{#invoke:languages/table|json}}',
+        ),
+    )
+    langs: dict[str, dict] = json.loads(res.json()['expandtemplates']['wikitext'])
+
+    osmwiki = pywikiapi.Site(OSMWIKI_ENDPOINT)
+
+    proposals = []
+    for page in osmwiki.query_pages(
+        generator='embeddedin',
+        geititle='Template:Proposal page',
+        geilimit='max',
+        prop='revisions',
+        rvprop='content',
+        rvslots='main',
+    ):
+        proposal = parse_proposal(page, langs)
+        if proposal:
+            proposals.append(proposal)
+
+    proposals.sort(key=sort_key, reverse=True)
+
+    with open(args.out_file, 'w') as f:
+        json.dump([{k: v for k, v in p.items() if v is not None} for p in proposals], f)
+
+
+def get_template_val(tpl, name):
+    param = tpl.get(name, None)
+    if param:
+        value = param.value.strip()
+        if value:
+            # turn empty strings into None
+            return value
+
+
+def eprint(*args):
+    print(*args, file=sys.stderr)
+
+
+def is_stub(doc):
+    if any(
+        doc.ifilter_templates(matches=lambda t: t.name.matches('Archived proposal'))
+    ):
+        return False
+
+    if not any(doc.ifilter_headings()):
+        # detect proposals without headings as stubs
+        return True
+
+    if not any(
+        n
+        for n in doc.nodes
+        # any text
+        if isinstance(n, mwparserfromhell.nodes.text.Text)
+        # other than newlines
+        and n.strip()
+        # and "Please comment on the [[{{TALKPAGENAME}}|discussion page]]."
+        and n.strip() not in ('Please comment on the', '.')
+    ):
+        # detect proposals without text as stubs
+        return True
+
+    return False
+
+
+def parse_proposal(page: dict, langs: Container[str]) -> dict | None:
+    page_title = page['title']
+    text = page['revisions'][0]['slots']['main']['content']
+    doc = mwparserfromhell.parse(text)
+    proposal_page_templates = doc.filter_templates(
+        matches=lambda t: t.name.matches('Proposal page')
+        or t.name.matches('Proposal Page')
+    )
+
+    if not proposal_page_templates:
+        eprint('{{Proposal Page}} not found in', page_title)
+        return None
+
+    for comment in doc.ifilter_comments():
+        # remove comments like <!-- Date the RFC email is sent to the Tagging list: YYYY-MM-DD -->
+        doc.remove(comment)
+
+    tpl = proposal_page_templates[0]
+
+    status = get_template_val(tpl, 'status')
+    if status:
+        status = status.lower()
+
+    if is_stub(doc):
+        if status in ('approved', 'rejected'):
+            eprint(f'WARNING {status} proposal is a stub', page['title'])
+        else:
+            eprint('skipping stub', page['title'])
+            return None
+
+    name = get_template_val(tpl, 'name')
+    if name:
+        name = html.unescape(name)
+
+    draft_start = get_template_val(tpl, 'draftStartDate')
+    if draft_start in ('*', '-'):
+        draft_start = None
+
+    rfc_start = get_template_val(tpl, 'rfcStartDate')
+    if rfc_start in ('*', '-'):
+        rfc_start = None
+
+    vote_start = get_template_val(tpl, 'voteStartDate')
+    if vote_start in ('*', '-'):
+        vote_start = None
+
+    definition = get_template_val(tpl, 'definition')
+    users = get_template_val(tpl, 'users') or get_template_val(tpl, 'user')
+
+    parts = page_title.split(':', maxsplit=1)
+    parts[0] = parts[0].lower()
+
+    lang = None
+    if parts[0] in langs:
+        lang = parts[0]
+
+    return dict(
+        page_title=page_title,
+        lang=lang,
+        name=name,
+        status=status,
+        definition=definition,
+        draft_start=draft_start,
+        rfc_start=rfc_start,
+        vote_start=vote_start,
+        authors=users,
+    )
+
+
+STATUSES = {
+    'voting': 0,
+    'post-vote': 1,
+    'proposed': 2,
+    'draft': 3,
+    'approved': 4,
+    'inactive': 5,
+    'rejected': 6,
+    'abandoned': 7,
+    'canceled': 8,
+    'obsoleted': 9,
+}
+
+
+def sort_key(proposal):
+    status = proposal['status']
+
+    if status in ('voting', 'approved', 'rejected'):
+        date = proposal['vote_start'] or ''
+    elif status == 'proposed':
+        date = proposal['rfc_start'] or ''
+    else:
+        date = proposal['draft_start'] or ''
+
+    return (-STATUSES.get(proposal['status'], 10), date)