summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhaturatu <taro@eyes4you.org>2024-12-15 00:54:08 +0900
committerhaturatu <taro@eyes4you.org>2024-12-15 00:54:08 +0900
commitdf23b04506e6b4cdfa5c40ce52172728bfe1d063 (patch)
treed428d461896a2e33e1ec884e671e96b64617b6b6
parent97109aaf2b00830e36cc778c0b76be9c3c36898f (diff)
add charset
-rw-r--r--chardet/chardet.go23
-rw-r--r--go.mod4
-rw-r--r--go.sum6
-rw-r--r--main.go12
4 files changed, 40 insertions, 5 deletions
diff --git a/chardet/chardet.go b/chardet/chardet.go
new file mode 100644
index 0000000..9fb68bc
--- /dev/null
+++ b/chardet/chardet.go
@@ -0,0 +1,23 @@
+package chardet
+
+import (
+ "bytes"
+ "io"
+ "golang.org/x/net/html/charset"
+)
+
+func DetectAndDecode(r io.Reader) (*bytes.Reader, error) {
+ decoded, err := charset.NewReader(r, "text/html")
+ if err != nil {
+ return nil, err
+ }
+
+ buf := new(bytes.Buffer)
+ _, err = io.Copy(buf, decoded)
+ if err != nil {
+ return nil, err
+ }
+
+ return bytes.NewReader(buf.Bytes()), nil
+}
+
diff --git a/go.mod b/go.mod
index 188e966..9e6c573 100644
--- a/go.mod
+++ b/go.mod
@@ -2,4 +2,6 @@ module ght
go 1.23.3
-require golang.org/x/net v0.31.0
+require golang.org/x/text v0.21.0 // indirect
+
+require golang.org/x/net v0.32.0
diff --git a/go.sum b/go.sum
index 26142d0..704655a 100644
--- a/go.sum
+++ b/go.sum
@@ -1,2 +1,4 @@
-golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
-golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
+golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI=
+golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs=
+golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
diff --git a/main.go b/main.go
index 903abc4..221783f 100644
--- a/main.go
+++ b/main.go
@@ -7,6 +7,8 @@ import (
"os"
"time"
+ "ght/chardet"
+
"golang.org/x/net/html"
)
@@ -39,7 +41,13 @@ func fetchAndParse(client *http.Client, url string, useRange bool) (string, erro
}
defer resp.Body.Close()
- doc, err := html.Parse(resp.Body)
+ // encoding and decode
+ body, err := chardet.DetectAndDecode(resp.Body)
+ if err != nil {
+ return "", fmt.Errorf("failed to decode response body: %w", err)
+ }
+
+ doc, err := html.Parse(body)
if err != nil {
return "", fmt.Errorf("failed to parse HTML: %w", err)
}
@@ -64,7 +72,7 @@ func fetchTitle(url string) (string, error) {
return title, nil
}
- // no range limit : get reqest
+ // no range limit : get request
title, err = fetchAndParse(client, url, false)
if err != nil {
return "", err