summaryrefslogtreecommitdiff
path: root/lex-fetch/uk/uk.go
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2025-04-08 19:25:36 +0200
committerMartin Fischer <martin@push-f.com>2025-04-14 07:04:45 +0200
commite29d27533725819ec3f6d05a27048d3d2627b53e (patch)
tree5afba50408b25179edb4ea6445acfe1d3e051488 /lex-fetch/uk/uk.go
parent96236c9d80cea2d6ba83591a7d08a8cc096fd8d3 (diff)
refactor: port fetchers to Go
* Austria: upgraded to RIS API v2.6 because v2.5 has been turned off
Diffstat (limited to 'lex-fetch/uk/uk.go')
-rw-r--r--lex-fetch/uk/uk.go90
1 files changed, 90 insertions, 0 deletions
diff --git a/lex-fetch/uk/uk.go b/lex-fetch/uk/uk.go
new file mode 100644
index 0000000..f8e1510
--- /dev/null
+++ b/lex-fetch/uk/uk.go
@@ -0,0 +1,90 @@
+package uk
+
+import (
+ "fmt"
+ "log/slog"
+ "net/http"
+ "regexp"
+ "strconv"
+
+ "github.com/antchfx/xmlquery"
+ "push-f.com/lex-surf/internal/lex"
+ "push-f.com/lex-surf/lex-fetch/progress"
+)
+
+type Fetcher struct{}
+
+func (s *Fetcher) Fetch(log *slog.Logger, client *http.Client, progress *progress.Reporter) ([]lex.Law, error) {
+ firstPage, err := fetchPage(client, 1)
+ if err != nil {
+ return nil, fmt.Errorf("failed to fetch first page: %w", err)
+ }
+ progress.Total = firstPage.totalItems / firstPage.itemsPerPage
+
+ laws := firstPage.entries
+
+ for page := 2; page <= progress.Total; page++ {
+ feed, err := fetchPage(client, page)
+ if err != nil {
+ return nil, fmt.Errorf("failed to fetch page: %w", err)
+ }
+ laws = append(laws, feed.entries...)
+ progress.ReportProgress(page)
+ }
+ return laws, nil
+}
+
+func fetchPage(client *http.Client, page int) (*feed, error) {
+ resp, err := client.Get(fmt.Sprintf("https://www.legislation.gov.uk/ukpga/data.feed?page=%d", page))
+ if err != nil {
+ return nil, fmt.Errorf("failed to fetch page: %w", err)
+ }
+ if resp.StatusCode != 200 {
+ return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode)
+ }
+
+ doc, err := xmlquery.Parse(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("failed to HTML parse: %w", err)
+ }
+ itemsPerPageEl := xmlquery.FindOne(doc, "//openSearch:itemsPerPage")
+ if itemsPerPageEl == nil {
+ return nil, fmt.Errorf("couldn't find openSearch:itemsPerPage")
+ }
+ itemsPerPage, _ := strconv.Atoi(itemsPerPageEl.InnerText())
+
+ facetTypesEl := xmlquery.FindOne(doc, "//leg:facetType")
+ if facetTypesEl == nil {
+ return nil, fmt.Errorf("couldn't find leg:facetType")
+ }
+ totalItems, _ := strconv.Atoi(facetTypesEl.SelectAttr("value"))
+ if totalItems == 0 {
+ return nil, fmt.Errorf("detected 0 total results")
+ }
+
+ var laws []lex.Law
+
+ for _, entry := range xmlquery.Find(doc, "//entry") {
+ title := xmlquery.FindOne(entry, "./title").InnerText()
+ repealed, _ := regexp.MatchString("(?i)\\(repealed( .+)?\\)$", title)
+ if repealed {
+ continue
+ }
+ laws = append(laws, lex.Law{
+ Title: title,
+ URL: xmlquery.FindOne(entry, "./link[@rel='self']").SelectAttr("href"),
+ })
+ }
+
+ return &feed{
+ totalItems: totalItems,
+ itemsPerPage: itemsPerPage,
+ entries: laws,
+ }, nil
+}
+
+type feed struct {
+ itemsPerPage int
+ totalItems int
+ entries []lex.Law
+}