package uk import ( "fmt" "log/slog" "net/http" "regexp" "strconv" "github.com/antchfx/xmlquery" "push-f.com/lex-surf/internal/lex" "push-f.com/lex-surf/lex-fetch/progress" ) type Fetcher struct{} func (s *Fetcher) Fetch(log *slog.Logger, client *http.Client, progress *progress.Reporter) ([]lex.Law, error) { firstPage, err := fetchPage(client, 1) if err != nil { return nil, fmt.Errorf("failed to fetch first page: %w", err) } progress.Total = firstPage.totalItems / firstPage.itemsPerPage laws := firstPage.entries for page := 2; page <= progress.Total; page++ { feed, err := fetchPage(client, page) if err != nil { return nil, fmt.Errorf("failed to fetch page: %w", err) } laws = append(laws, feed.entries...) progress.ReportProgress(page) } return laws, nil } func fetchPage(client *http.Client, page int) (*feed, error) { resp, err := client.Get(fmt.Sprintf("https://www.legislation.gov.uk/ukpga/data.feed?page=%d", page)) if err != nil { return nil, fmt.Errorf("failed to fetch page: %w", err) } if resp.StatusCode != 200 { return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode) } doc, err := xmlquery.Parse(resp.Body) if err != nil { return nil, fmt.Errorf("failed to HTML parse: %w", err) } itemsPerPageEl := xmlquery.FindOne(doc, "//openSearch:itemsPerPage") if itemsPerPageEl == nil { return nil, fmt.Errorf("couldn't find openSearch:itemsPerPage") } itemsPerPage, _ := strconv.Atoi(itemsPerPageEl.InnerText()) facetTypesEl := xmlquery.FindOne(doc, "//leg:facetType") if facetTypesEl == nil { return nil, fmt.Errorf("couldn't find leg:facetType") } totalItems, _ := strconv.Atoi(facetTypesEl.SelectAttr("value")) if totalItems == 0 { return nil, fmt.Errorf("detected 0 total results") } var laws []lex.Law for _, entry := range xmlquery.Find(doc, "//entry") { title := xmlquery.FindOne(entry, "./title").InnerText() repealed, _ := regexp.MatchString("(?i)\\(repealed( .+)?\\)$", title) if repealed { continue } laws = append(laws, lex.Law{ Title: title, URL: xmlquery.FindOne(entry, "./link[@rel='self']").SelectAttr("href"), }) } return &feed{ totalItems: totalItems, itemsPerPage: itemsPerPage, entries: laws, }, nil } type feed struct { itemsPerPage int totalItems int entries []lex.Law }