fix(translate): seed cookie jar from www.deepl.com on first call

A real chrome-extension fetch() to oneshot-free.www.deepl.com inherits
whatever cookies the browser has on .deepl.com — at minimum
`userCountry=<iso2>` and `verifiedBot=false`, both of which the
deepl.com server sets on any page load. Our outbound bytes were
otherwise extension-identical but went out cookieless, which is a
distinguishable signal.

Wire a process-wide net/http/cookiejar onto the req.Client and trigger
a single warmup GET to https://www.deepl.com/translator on the first
translate call (sync.Once). The Set-Cookie response (userCountry,
verifiedBot) lands on .deepl.com, which the jar then automatically
echoes back on every subsequent POST to oneshot-free.www.deepl.com
(cookies set on .deepl.com match any *.deepl.com subdomain).

Verified outbound:
  Cookie: userCountry=JP; verifiedBot=false

Latency cost: first call after process start pays one extra HTTP GET
(~1s warmup); subsequent calls are unaffected (sync.Once + connection
keep-alive).

Note: we cannot replicate the _ga / _ga_<id> cookies a real user
would also carry — those are set client-side by GA's JS, which a
non-browser HTTP client can't execute. The userCountry+verifiedBot
pair already matches the "first-time visitor with JS disabled" profile,
which is the closest plausible non-browser approximation.
This commit is contained in:
Vincent Young 2026-05-22 12:03:55 +08:00
parent 3acec2c252
commit a43aba64c1
No known key found for this signature in database
GPG Key ID: 070D9CD629BC1AAE

View File

@ -21,8 +21,10 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"net/http/cookiejar"
"net/url" "net/url"
"strings" "strings"
"sync"
"time" "time"
"github.com/andybalholm/brotli" "github.com/andybalholm/brotli"
@ -61,6 +63,36 @@ const (
// Rotating it per-request would be a far stronger signal than reusing one. // Rotating it per-request would be a far stronger signal than reusing one.
var instanceID = newInstanceID() var instanceID = newInstanceID()
// A real extension fetch() inherits whatever cookies the browser has
// accumulated on .deepl.com. A cold visit to www.deepl.com sets
// userCountry=<iso2> and verifiedBot=false; users who have ever opened
// the site additionally have _ga / _ga_<id> from analytics JS. We share
// a process-wide cookie jar so every oneshot POST automatically carries
// whatever the warmup GET picked up.
var (
cookieJar http.CookieJar
cookieJarOnce sync.Once
cookieWarmer sync.Once
)
func sharedCookieJar() http.CookieJar {
cookieJarOnce.Do(func() {
j, _ := cookiejar.New(nil)
cookieJar = j
})
return cookieJar
}
// warmCookies primes the shared jar by GETting www.deepl.com once.
// The Set-Cookie response (userCountry / verifiedBot) lands on .deepl.com,
// which is the eTLD+1 of oneshot-free.www.deepl.com, so subsequent POSTs
// to the oneshot endpoint will carry those cookies automatically.
func warmCookies(client *req.Client) {
cookieWarmer.Do(func() {
_, _ = client.R().Get("https://www.deepl.com/translator")
})
}
func newInstanceID() string { func newInstanceID() string {
b := make([]byte, 16) b := make([]byte, 16)
if _, err := rand.Read(b); err != nil { if _, err := rand.Read(b); err != nil {
@ -125,7 +157,7 @@ type oneshotRequest struct {
// that a fetch() never emits — wipe those so the WAF cannot tell us apart // that a fetch() never emits — wipe those so the WAF cannot tell us apart
// on that axis. // on that axis.
func newOneshotClient(proxyURL string) (*req.Client, error) { func newOneshotClient(proxyURL string) (*req.Client, error) {
client := req.C().ImpersonateChrome() client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar())
for _, h := range []string{ for _, h := range []string{
"Pragma", "Pragma",
"Cache-Control", "Cache-Control",
@ -159,6 +191,7 @@ func callOneshot(endpoint string, body []byte, bearerToken, proxyURL string) (gj
if err != nil { if err != nil {
return gjson.Result{}, 0, err return gjson.Result{}, 0, err
} }
warmCookies(client) // no-op after the first translation in the process
authValue := "None" authValue := "None"
if bearerToken != "" { if bearerToken != "" {