From e29d27533725819ec3f6d05a27048d3d2627b53e Mon Sep 17 00:00:00 2001
From: Martin Fischer <martin@push-f.com>
Date: Tue, 8 Apr 2025 19:25:36 +0200
Subject: refactor: port fetchers to Go

* Austria: upgraded to RIS API v2.6 because v2.5 has been turned off
---
 lex-fetch/de/de.go | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 lex-fetch/de/de.go

(limited to 'lex-fetch/de')

diff --git a/lex-fetch/de/de.go b/lex-fetch/de/de.go
new file mode 100644
index 0000000..2c0a033
--- /dev/null
+++ b/lex-fetch/de/de.go
@@ -0,0 +1,66 @@
+package de
+
+import (
+	"fmt"
+	"log/slog"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/antchfx/htmlquery"
+	"golang.org/x/text/encoding/charmap"
+	"golang.org/x/text/transform"
+	"push-f.com/lex-surf/internal/lex"
+	"push-f.com/lex-surf/lex-fetch/progress"
+)
+
+type Fetcher struct{}
+
+var pages = []rune("ABCDEFGHIJKLMNOPQRSTUVWYZ123456789")
+
+func (s *Fetcher) Fetch(log *slog.Logger, client *http.Client, progress *progress.Reporter) ([]lex.Law, error) {
+	progress.Total = len(pages)
+	var laws []lex.Law
+	for i := range len(pages) {
+		resp, err := client.Get(fmt.Sprintf("https://www.gesetze-im-internet.de/Teilliste_%c.html", pages[i]))
+		if err != nil {
+			return nil, fmt.Errorf("failed to fetch page: %w", err)
+		}
+		if resp.StatusCode != 200 {
+			return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode)
+		}
+		reader := transform.NewReader(resp.Body, charmap.ISO8859_1.NewDecoder())
+		doc, err := htmlquery.Parse(reader)
+		if err != nil {
+			return nil, fmt.Errorf("failed to HTML parse: %w", err)
+		}
+		div := htmlquery.FindOne(doc, "//div[@id='paddingLR12']")
+		if div == nil {
+			return nil, fmt.Errorf("didn't find expected HTML div")
+		}
+		for child := range div.ChildNodes() {
+			if child.FirstChild != nil {
+				href := htmlquery.SelectAttr(child.FirstChild, "href")
+				redir, ok := strings.CutPrefix(href, "./")
+				if !ok {
+					return nil, fmt.Errorf("expected href to start with ./ but found %s", href)
+				}
+				redir, ok = strings.CutSuffix(redir, "/index.html")
+				if !ok {
+					return nil, fmt.Errorf("expected href to end with /index.html but found %s", href)
+				}
+
+				hrefUrl, _ := url.Parse(href)
+
+				laws = append(laws, lex.Law{
+					URL:   resp.Request.URL.ResolveReference(hrefUrl).String(),
+					Title: strings.TrimSpace(child.FirstChild.NextSibling.NextSibling.Data),
+					Redir: redir,
+					Abbr:  redir,
+				})
+			}
+		}
+		progress.ReportProgress(i + 1)
+	}
+	return laws, nil
+}
-- 
cgit v1.2.3