diff options
-rw-r--r-- | chardet/chardet.go | 23 | ||||
-rw-r--r-- | go.mod | 4 | ||||
-rw-r--r-- | go.sum | 6 | ||||
-rw-r--r-- | main.go | 12 |
4 files changed, 40 insertions, 5 deletions
diff --git a/chardet/chardet.go b/chardet/chardet.go new file mode 100644 index 0000000..9fb68bc --- /dev/null +++ b/chardet/chardet.go @@ -0,0 +1,23 @@ +package chardet + +import ( + "bytes" + "io" + "golang.org/x/net/html/charset" +) + +func DetectAndDecode(r io.Reader) (*bytes.Reader, error) { + decoded, err := charset.NewReader(r, "text/html") + if err != nil { + return nil, err + } + + buf := new(bytes.Buffer) + _, err = io.Copy(buf, decoded) + if err != nil { + return nil, err + } + + return bytes.NewReader(buf.Bytes()), nil +} + @@ -2,4 +2,6 @@ module ght go 1.23.3 -require golang.org/x/net v0.31.0 +require golang.org/x/text v0.21.0 // indirect + +require golang.org/x/net v0.32.0 @@ -1,2 +1,4 @@ -golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= -golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= @@ -7,6 +7,8 @@ import ( "os" "time" + "ght/chardet" + "golang.org/x/net/html" ) @@ -39,7 +41,13 @@ func fetchAndParse(client *http.Client, url string, useRange bool) (string, erro } defer resp.Body.Close() - doc, err := html.Parse(resp.Body) + // encoding and decode + body, err := chardet.DetectAndDecode(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to decode response body: %w", err) + } + + doc, err := html.Parse(body) if err != nil { return "", fmt.Errorf("failed to parse HTML: %w", err) } @@ -64,7 +72,7 @@ func fetchTitle(url string) (string, error) { return title, nil } - // no range limit : get reqest + // no range limit : get request title, err = fetchAndParse(client, url, false) if err != nil { return "", err |