From a43aba64c1445d6db6678d3ce0450a2aa57ab349 Mon Sep 17 00:00:00 2001 From: Vincent Young Date: Fri, 22 May 2026 12:03:55 +0800 Subject: [PATCH] fix(translate): seed cookie jar from www.deepl.com on first call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A real chrome-extension fetch() to oneshot-free.www.deepl.com inherits whatever cookies the browser has on .deepl.com — at minimum `userCountry=` and `verifiedBot=false`, both of which the deepl.com server sets on any page load. Our outbound bytes were otherwise extension-identical but went out cookieless, which is a distinguishable signal. Wire a process-wide net/http/cookiejar onto the req.Client and trigger a single warmup GET to https://www.deepl.com/translator on the first translate call (sync.Once). The Set-Cookie response (userCountry, verifiedBot) lands on .deepl.com, which the jar then automatically echoes back on every subsequent POST to oneshot-free.www.deepl.com (cookies set on .deepl.com match any *.deepl.com subdomain). Verified outbound: Cookie: userCountry=JP; verifiedBot=false Latency cost: first call after process start pays one extra HTTP GET (~1s warmup); subsequent calls are unaffected (sync.Once + connection keep-alive). Note: we cannot replicate the _ga / _ga_ cookies a real user would also carry — those are set client-side by GA's JS, which a non-browser HTTP client can't execute. The userCountry+verifiedBot pair already matches the "first-time visitor with JS disabled" profile, which is the closest plausible non-browser approximation. --- translate/translate.go | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/translate/translate.go b/translate/translate.go index 556552b..99a8af0 100644 --- a/translate/translate.go +++ b/translate/translate.go @@ -21,8 +21,10 @@ import ( "fmt" "io" "net/http" + "net/http/cookiejar" "net/url" "strings" + "sync" "time" "github.com/andybalholm/brotli" @@ -61,6 +63,36 @@ const ( // Rotating it per-request would be a far stronger signal than reusing one. var instanceID = newInstanceID() +// A real extension fetch() inherits whatever cookies the browser has +// accumulated on .deepl.com. A cold visit to www.deepl.com sets +// userCountry= and verifiedBot=false; users who have ever opened +// the site additionally have _ga / _ga_ from analytics JS. We share +// a process-wide cookie jar so every oneshot POST automatically carries +// whatever the warmup GET picked up. +var ( + cookieJar http.CookieJar + cookieJarOnce sync.Once + cookieWarmer sync.Once +) + +func sharedCookieJar() http.CookieJar { + cookieJarOnce.Do(func() { + j, _ := cookiejar.New(nil) + cookieJar = j + }) + return cookieJar +} + +// warmCookies primes the shared jar by GETting www.deepl.com once. +// The Set-Cookie response (userCountry / verifiedBot) lands on .deepl.com, +// which is the eTLD+1 of oneshot-free.www.deepl.com, so subsequent POSTs +// to the oneshot endpoint will carry those cookies automatically. +func warmCookies(client *req.Client) { + cookieWarmer.Do(func() { + _, _ = client.R().Get("https://www.deepl.com/translator") + }) +} + func newInstanceID() string { b := make([]byte, 16) if _, err := rand.Read(b); err != nil { @@ -125,7 +157,7 @@ type oneshotRequest struct { // that a fetch() never emits — wipe those so the WAF cannot tell us apart // on that axis. func newOneshotClient(proxyURL string) (*req.Client, error) { - client := req.C().ImpersonateChrome() + client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar()) for _, h := range []string{ "Pragma", "Cache-Control", @@ -159,6 +191,7 @@ func callOneshot(endpoint string, body []byte, bearerToken, proxyURL string) (gj if err != nil { return gjson.Result{}, 0, err } + warmCookies(client) // no-op after the first translation in the process authValue := "None" if bearerToken != "" {