From e29d27533725819ec3f6d05a27048d3d2627b53e Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Tue, 8 Apr 2025 19:25:36 +0200 Subject: refactor: port fetchers to Go * Austria: upgraded to RIS API v2.6 because v2.5 has been turned off --- lex-fetch/at/at.go | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 lex-fetch/at/at.go (limited to 'lex-fetch/at') diff --git a/lex-fetch/at/at.go b/lex-fetch/at/at.go new file mode 100644 index 0000000..44efb27 --- /dev/null +++ b/lex-fetch/at/at.go @@ -0,0 +1,181 @@ +package at + +import ( + "encoding/json" + "fmt" + "log/slog" + "maps" + "math" + "net/http" + "net/url" + "slices" + "strconv" + "strings" + "sync" + "time" + + "push-f.com/lex-surf/internal/lex" + "push-f.com/lex-surf/lex-fetch/progress" +) + +type Fetcher struct{} + +const concurrentRequests = 4 + +func (s *Fetcher) Fetch(log *slog.Logger, client *http.Client, progress *progress.Reporter) ([]lex.Law, error) { + // The API is documented in https://data.bka.gv.at/ris/ogd/v2.6/Documents/Dokumentation_OGD-RIS_API.pdf. + + // Consolidated laws can only be queried via the Bundesrecht endpoint which returns individual paragraphs. + // Since we'll get multiple results for each law, we're saving results into a hash map. + lawsMap := make(map[string]lex.Law) + + // TODO: also query laws from the past and future + date := time.Now().Format("2006-01-02") + + data, err := fetchPage(client, date, 1) + if err != nil { + return nil, fmt.Errorf("failed to fetch first page: %w", err) + } + totalResults, _ := strconv.Atoi(data.Hits.Text) + if totalResults == 0 { + return nil, fmt.Errorf("API returned 0 results") + } + totalPages := int(math.Ceil(float64(totalResults) / 100)) + progress.Total = totalPages + assign(lawsMap, data.OgdDocumentReference) + + semaphore := make(chan struct{}, concurrentRequests) + var wg sync.WaitGroup + var mu sync.Mutex + var retErr error + var retErrPage int + var errOnce sync.Once + + for page := 2; page <= totalPages; page++ { + if retErr != nil { + return nil, fmt.Errorf("failed to fetch page %d: %w", retErrPage, retErr) + } + wg.Add(1) + semaphore <- struct{}{} + + go func(p int) { + defer wg.Done() + defer func() { <-semaphore }() + + data, err := fetchPage(client, date, page) + progress.ReportProgress(page) + if err != nil { + errOnce.Do(func() { + retErr = err + retErrPage = p + }) + } + mu.Lock() + assign(lawsMap, data.OgdDocumentReference) + mu.Unlock() + }(page) + } + laws := slices.SortedFunc(maps.Values(lawsMap), func(a, b lex.Law) int { + if a.Title > b.Title { + return 1 + } else if a.Title < b.Title { + return -1 + } + return 0 + }) + return laws, nil +} + +func fetchPage(client *http.Client, date string, page int) (*ogdDocumentResults, error) { + req, err := http.NewRequest("GET", "https://data.bka.gv.at/ris/api/v2.6/Bundesrecht", nil) + if err != nil { + return nil, fmt.Errorf("build request: %w", err) + } + req.URL.RawQuery = url.Values{ + "Appl": {"BrKons"}, // Bundesrecht konsolidiert + "Typ": {"BG oder BVG"}, // Bundesgesetz or Bundesverfassungsgesetz + "DokumenteProSeite": {"OneHundred"}, + "Seitennummer": {strconv.Itoa(page)}, + "FassungVom": {date}, + }.Encode() + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("send request: %w", err) + } + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode) + } + + var data brKonsResult + + err = json.NewDecoder(resp.Body).Decode(&data) + if err != nil { + return nil, fmt.Errorf("JSON decode: %w", err) + } + + result := data.OgdSearchResult + + if result.Error != nil { + return nil, fmt.Errorf("error response: %s", result.Error) + } + + return result.OgdDocumentResults, nil +} + +func assign(laws map[string]lex.Law, paraDocs []document) { + for _, paraDoc := range paraDocs { + para := paraDoc.Data.Metadaten.Bundesrecht + law := lex.Law{ + Title: para.Kurztitel, + URL: para.BrKons.GesamteRechtsvorschriftUrl, + } + if para.BrKons.Abkuerzung != nil { + law.Abbr = *para.BrKons.Abkuerzung + redir := strings.ToLower(*para.BrKons.Abkuerzung) + redir = strings.ReplaceAll(redir, ")", "") + redir = strings.ReplaceAll(redir, "(", "") + redir = strings.ReplaceAll(redir, " – ", "-") + redir = strings.ReplaceAll(redir, " ", "-") + redir = strings.ReplaceAll(redir, "\u00A0", "-") + redir = strings.ReplaceAll(redir, "ä", "ae") + redir = strings.ReplaceAll(redir, "ü", "ue") + redir = strings.ReplaceAll(redir, "ö", "oe") + redir = strings.ReplaceAll(redir, "ß", "ss") + law.Redir = redir + } + laws[para.BrKons.Gesetzesnummer] = law + } +} + +type brKonsResult struct { + OgdSearchResult struct { + Error *struct { + Applikation string + Message string + } + OgdDocumentResults *ogdDocumentResults + } +} + +type ogdDocumentResults struct { + Hits struct { + Text string `json:"#text"` + } + OgdDocumentReference []document +} + +type document struct { + Data struct { + Metadaten struct { + Bundesrecht struct { + Kurztitel string + BrKons struct { + GesamteRechtsvorschriftUrl string + Abkuerzung *string + Gesetzesnummer string + } + } + } + } +} -- cgit v1.2.3