feat: add setting to block disposable emails

- Add a new setting `EMAIL_DOMAIN_BLOCK_DISPOSABLE` that will append a list of
  domains that are known for being used by temporary or disposable email
  services.

- Add a utility to automatically download and format the list of domains from
  the disposable-email-domains project on github.

  (https://github.com/disposable-email-domains/disposable-email-domains)
  license: CC0 1.0 Universal (CC0 1.0) [Public Domain]

  from README:
  """
  This repo contains a list of disposable and temporary email address domains often used to register dummy users in order to spam or abuse some services.

  We cannot guarantee all of these can still be considered disposable but we do basic checking so chances are they were disposable at one point in time.
  """
This commit is contained in:
James Hatfield 2024-11-03 10:47:25 -06:00
parent 7015bdfa48
commit 16d06705b3
5 changed files with 4173 additions and 11 deletions

View file

@ -4,6 +4,9 @@
package setting
import (
"fmt"
"sort"
"strings"
"testing"
"code.gitea.io/gitea/modules/structs"
@ -11,8 +14,18 @@ import (
"github.com/gobwas/glob"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/net/publicsuffix"
)
func match(globs []glob.Glob, s string) bool {
for _, g := range globs {
if g.Match(s) {
return true
}
}
return false
}
func TestLoadServices(t *testing.T) {
oldService := Service
defer func() {
@ -28,15 +41,6 @@ EMAIL_DOMAIN_BLOCKLIST = d3, *.b
require.NoError(t, err)
loadServiceFrom(cfg)
match := func(globs []glob.Glob, s string) bool {
for _, g := range globs {
if g.Match(s) {
return true
}
}
return false
}
assert.True(t, match(Service.EmailDomainAllowList, "d1"))
assert.True(t, match(Service.EmailDomainAllowList, "foo.w"))
assert.True(t, match(Service.EmailDomainAllowList, "d2"))
@ -48,6 +52,121 @@ EMAIL_DOMAIN_BLOCKLIST = d3, *.b
assert.False(t, match(Service.EmailDomainBlockList, "d1"))
}
func TestLoadServiceBlockDisposable(t *testing.T) {
oldService := Service
defer func() {
Service = oldService
}()
cfg, err := NewConfigProviderFromData(`
[service]
EMAIL_DOMAIN_BLOCK_DISPOSABLE = true
`)
require.NoError(t, err)
loadServiceFrom(cfg)
for _, domain := range DisposableEmailDomains() {
require.True(t, match(Service.EmailDomainBlockList, domain))
}
require.Len(t, Service.EmailDomainBlockList, len(DisposableEmailDomains()))
knownGood := [...]string{
"aol.com",
"gmx.com",
"mail.com",
"zoho.com",
"proton.me",
"gmail.com",
"yahoo.com",
"icloud.com",
"outlook.com",
"protonmail.com",
}
for _, domain := range knownGood {
require.False(t, match(Service.EmailDomainBlockList, domain))
}
}
func TestLoadServiceBlockDisposableWithExistingGlobs(t *testing.T) {
oldService := Service
defer func() {
Service = oldService
}()
tldCounts := make(map[string]int)
for _, domain := range DisposableEmailDomains() {
tld, _ := publicsuffix.PublicSuffix(domain)
tldCounts[tld]++
}
type tldkv struct {
Tld string
Count int
}
sortedTldCounts := make([]tldkv, 0)
for tld, count := range tldCounts {
sortedTldCounts = append(sortedTldCounts, tldkv{tld, count})
}
sort.Slice(sortedTldCounts, func(i, j int) bool {
return sortedTldCounts[i].Count > sortedTldCounts[j].Count
})
require.GreaterOrEqual(t, len(sortedTldCounts), 2)
blockString := fmt.Sprintf("*.%s,*.%s", sortedTldCounts[0].Tld, sortedTldCounts[1].Tld)
cfg, err := NewConfigProviderFromData(fmt.Sprintf(`
[service]
EMAIL_DOMAIN_BLOCKLIST = %s
EMAIL_DOMAIN_BLOCK_DISPOSABLE = true
`, blockString))
require.NoError(t, err)
loadServiceFrom(cfg)
for _, domain := range DisposableEmailDomains() {
require.True(t, match(Service.EmailDomainBlockList, domain))
}
redundant := 0
for _, val := range DisposableEmailDomains() {
if strings.HasSuffix(val, sortedTldCounts[0].Tld) ||
strings.HasSuffix(val, sortedTldCounts[1].Tld) {
redundant++
}
}
expected := len(DisposableEmailDomains()) - redundant + 2
require.Len(t, Service.EmailDomainBlockList, expected)
}
func TestLoadServiceBlockDisposableWithComplementGlobs(t *testing.T) {
oldService := Service
defer func() {
Service = oldService
}()
cfg, err := NewConfigProviderFromData(`
[service]
EMAIL_DOMAIN_BLOCKLIST = *.random
EMAIL_DOMAIN_BLOCK_DISPOSABLE = true
`)
require.NoError(t, err)
loadServiceFrom(cfg)
for _, domain := range DisposableEmailDomains() {
require.True(t, match(Service.EmailDomainBlockList, domain))
}
expected := len(DisposableEmailDomains()) + 1
require.Len(t, Service.EmailDomainBlockList, expected)
}
func TestLoadServiceVisibilityModes(t *testing.T) {
oldService := Service
defer func() {