diff options
Diffstat (limited to 'lex-fetch/uk/uk.go')
-rw-r--r-- | lex-fetch/uk/uk.go | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/lex-fetch/uk/uk.go b/lex-fetch/uk/uk.go new file mode 100644 index 0000000..f8e1510 --- /dev/null +++ b/lex-fetch/uk/uk.go @@ -0,0 +1,90 @@ +package uk + +import ( + "fmt" + "log/slog" + "net/http" + "regexp" + "strconv" + + "github.com/antchfx/xmlquery" + "push-f.com/lex-surf/internal/lex" + "push-f.com/lex-surf/lex-fetch/progress" +) + +type Fetcher struct{} + +func (s *Fetcher) Fetch(log *slog.Logger, client *http.Client, progress *progress.Reporter) ([]lex.Law, error) { + firstPage, err := fetchPage(client, 1) + if err != nil { + return nil, fmt.Errorf("failed to fetch first page: %w", err) + } + progress.Total = firstPage.totalItems / firstPage.itemsPerPage + + laws := firstPage.entries + + for page := 2; page <= progress.Total; page++ { + feed, err := fetchPage(client, page) + if err != nil { + return nil, fmt.Errorf("failed to fetch page: %w", err) + } + laws = append(laws, feed.entries...) + progress.ReportProgress(page) + } + return laws, nil +} + +func fetchPage(client *http.Client, page int) (*feed, error) { + resp, err := client.Get(fmt.Sprintf("https://www.legislation.gov.uk/ukpga/data.feed?page=%d", page)) + if err != nil { + return nil, fmt.Errorf("failed to fetch page: %w", err) + } + if resp.StatusCode != 200 { + return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode) + } + + doc, err := xmlquery.Parse(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to HTML parse: %w", err) + } + itemsPerPageEl := xmlquery.FindOne(doc, "//openSearch:itemsPerPage") + if itemsPerPageEl == nil { + return nil, fmt.Errorf("couldn't find openSearch:itemsPerPage") + } + itemsPerPage, _ := strconv.Atoi(itemsPerPageEl.InnerText()) + + facetTypesEl := xmlquery.FindOne(doc, "//leg:facetType") + if facetTypesEl == nil { + return nil, fmt.Errorf("couldn't find leg:facetType") + } + totalItems, _ := strconv.Atoi(facetTypesEl.SelectAttr("value")) + if totalItems == 0 { + return nil, fmt.Errorf("detected 0 total results") + } + + var laws []lex.Law + + for _, entry := range xmlquery.Find(doc, "//entry") { + title := xmlquery.FindOne(entry, "./title").InnerText() + repealed, _ := regexp.MatchString("(?i)\\(repealed( .+)?\\)$", title) + if repealed { + continue + } + laws = append(laws, lex.Law{ + Title: title, + URL: xmlquery.FindOne(entry, "./link[@rel='self']").SelectAttr("href"), + }) + } + + return &feed{ + totalItems: totalItems, + itemsPerPage: itemsPerPage, + entries: laws, + }, nil +} + +type feed struct { + itemsPerPage int + totalItems int + entries []lex.Law +} |