From f1b0f9b6c9404cca0a5cd230279dd82254b18378 Mon Sep 17 00:00:00 2001
From: Martin Fischer <martin@push-f.com>
Date: Sun, 13 Apr 2025 09:44:11 +0200
Subject: remove: scraper for Mexico

I cannot reach www.diputados.gob.mx anymore.
---
 scrapers/mx.py | 28 ----------------------------
 1 file changed, 28 deletions(-)
 delete mode 100755 scrapers/mx.py

(limited to 'scrapers/mx.py')

diff --git a/scrapers/mx.py b/scrapers/mx.py
deleted file mode 100755
index ec6a274..0000000
--- a/scrapers/mx.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python3
-import json
-import re
-import urllib.parse
-
-import lxml.html
-import requests
-
-URL = 'http://www.diputados.gob.mx/LeyesBiblio/index.htm'
-
-req = requests.get(URL)
-root = lxml.html.fromstring(req.text)
-# /following-sibling::span/table
-table = root.xpath(".//*[contains(text(), 'LEYES FEDERALES VIGENTES')]/ancestor::table[1]/following::table[1]")[0]
-
-laws = []
-
-for link in table.iterfind('.//tr//td[2]//a'):
-    title = lxml.html.tostring(link, method='text', encoding='unicode')
-    href = link.get('href')
-    laws.append(dict(
-        title = re.sub(r'\s+', ' ', title.strip()),
-        url = urllib.parse.urljoin(URL, link.get('href')),
-        redir = re.match('ref/(.*)\.htm', href).group(1)
-    ))
-
-with open('laws/mx.json', 'w') as f:
-    json.dump(sorted(laws, key=lambda l: l['title']), f, indent=2, ensure_ascii=False)
\ No newline at end of file
-- 
cgit v1.2.3