Skip to content

Commit 062c13a

Browse files
committed
Add html content sanitizer
1 parent 304f29a commit 062c13a

File tree

3 files changed

+39
-0
lines changed

3 files changed

+39
-0
lines changed

go.mod

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,23 @@ require (
1313
)
1414

1515
require (
16+
github.com/aymerick/douceur v0.2.0 // indirect
1617
github.com/bahlo/generic-list-go v0.2.0 // indirect
1718
github.com/buger/jsonparser v1.1.1 // indirect
1819
github.com/go-openapi/jsonpointer v0.19.5 // indirect
1920
github.com/go-openapi/swag v0.21.1 // indirect
2021
github.com/google/go-github/v71 v71.0.0 // indirect
22+
github.com/gorilla/css v1.0.1 // indirect
2123
github.com/gorilla/mux v1.8.0 // indirect
2224
github.com/invopop/jsonschema v0.13.0 // indirect
2325
github.com/josharian/intern v1.0.0 // indirect
2426
github.com/mailru/easyjson v0.7.7 // indirect
27+
github.com/microcosm-cc/bluemonday v1.0.27 // indirect
2528
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
2629
github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 // indirect
2730
go.yaml.in/yaml/v3 v3.0.4 // indirect
2831
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
32+
golang.org/x/net v0.26.0 // indirect
2933
gopkg.in/yaml.v2 v2.4.0 // indirect
3034
)
3135

go.sum

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
2+
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
13
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
24
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
35
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
@@ -32,6 +34,8 @@ github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD
3234
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
3335
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
3436
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
37+
github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
38+
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
3539
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
3640
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
3741
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -57,6 +61,8 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0
5761
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
5862
github.com/mark3labs/mcp-go v0.36.0 h1:rIZaijrRYPeSbJG8/qNDe0hWlGrCJ7FWHNMz2SQpTis=
5963
github.com/mark3labs/mcp-go v0.36.0/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g=
64+
github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
65+
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
6066
github.com/migueleliasweb/go-github-mock v1.3.0 h1:2sVP9JEMB2ubQw1IKto3/fzF51oFC6eVWOOFDgQoq88=
6167
github.com/migueleliasweb/go-github-mock v1.3.0/go.mod h1:ipQhV8fTcj/G6m7BKzin08GaJ/3B5/SonRAkgrk0zCY=
6268
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
@@ -104,6 +110,8 @@ go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
104110
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
105111
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
106112
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
113+
golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
114+
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
107115
golang.org/x/oauth2 v0.29.0 h1:WdYw2tdTK1S8olAzWHdgeqfy+Mtm9XNhv/xJsY65d98=
108116
golang.org/x/oauth2 v0.29.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
109117
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=

pkg/sanitize/sanitize.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
package sanitize
22

3+
import (
4+
"github.com/microcosm-cc/bluemonday"
5+
)
6+
7+
type ContentFilter struct {
8+
HTMLPolicy *bluemonday.Policy
9+
}
10+
11+
func NewContentFilter() *ContentFilter {
12+
p := bluemonday.NewPolicy()
13+
p.AllowElements("b", "blockquote", "br", "code", "em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "li", "ol", "p", "pre", "strong", "sub", "sup", "table", "tbody", "td", "th", "thead", "tr", "ul")
14+
p.AllowAttrs("img", "a")
15+
p.AllowAttrs()
16+
p.AllowURLSchemes("https")
17+
18+
return &ContentFilter{
19+
HTMLPolicy: p,
20+
}
21+
}
22+
323
// FilterInvisibleCharacters removes invisible or control characters that should not appear
424
// in user-facing titles or bodies. This includes:
525
// - Unicode tag characters: U+E0001, U+E0020–U+E007F
@@ -20,6 +40,13 @@ func FilterInvisibleCharacters(input string) string {
2040
return string(out)
2141
}
2242

43+
func (cf *ContentFilter) FilterHtmlTags(input string) string {
44+
if input == "" {
45+
return input
46+
}
47+
return cf.HTMLPolicy.Sanitize(input)
48+
}
49+
2350
func shouldRemoveRune(r rune) bool {
2451
switch r {
2552
case 0x200B, // ZERO WIDTH SPACE

0 commit comments

Comments
 (0)