fix(translate): enforce 1500-char text limit + add request timeout (#220)

* fix(translate): enforce 1500-char limit upfront and add request timeout Two related stability issues hit during real-world use: 1. **Hung requests** — without an explicit timeout the upstream HTTP call could dangle indefinitely on a stuck connection. Browser extensions calling /translate would sit on a spinner forever with no error to surface to the user (reported in the field). 2. **No-feedback on oversized input** — the oneshot endpoint caps the total text length at 1500 characters (matches the extension's own \`G.notLoggedIn = 1500\` constant). We were forwarding the request anyway and letting DeepL 400 it, which a) wasted an upstream round trip and b) the caller had no way to distinguish from other 400s. Changes: - Pre-validate \`text\` length in characters (utf8.RuneCountInString, not byte length — verified the cap is rune-based: 1500 Chinese characters / 4500 bytes is accepted, 1501 is rejected). Return HTTP 413 Payload Too Large with a clear message naming both the observed length and the limit. - Set a 20s timeout on the oneshot HTTP client (req.SetTimeout). On timeout return HTTP 504 Gateway Timeout — distinguishes a slow DeepL from other 503 failure modes (DNS, TLS, etc.). The check catches both context.DeadlineExceeded and url.Error{Timeout()=true}. - Set a separate 5s timeout on the cookie-jar warmup GET to www.deepl.com. Warmup is best-effort; we'd rather a slow warmup (cookies still seed eventually next time) than block the very first translation behind a hung GET. Behaviour verified against the live oneshot endpoint: - 1500 ASCII chars → 200 - 1501 ASCII chars → 413 (upstream not contacted) - 1500 Chinese chars (4500 bytes) → 200 - 1501 Chinese chars → 413 - Pathological "your"*1500 → 504 at 20s (was hanging without timeout) - Realistic 245-char Chinese → 200 in ~13s * perf(translate): share oneshot req.Client across requests + eager warmup Each TranslateByDeepLX call was building a brand-new req.Client via newOneshotClient(), which meant a fresh TLS handshake + HTTP/2 SETTINGS negotiation per request — ~200-400ms of pure overhead on top of DeepL's own ~1.5s processing latency. Share one client per proxy URL (sync.Map) so subsequent requests reuse the kept-alive HTTP/2 connection in the underlying http.Transport's pool. Also flip the cookie-jar warmup from synchronous-on-first-call to fire-and-forget at first client creation. Same sync.Once semantics (runs exactly once per process), but in a background goroutine so the first translate request runs in parallel with the TLS handshake to www.deepl.com rather than serially behind it. Measured against the live oneshot endpoint (Tokyo → Frankfurt): before, 5 sequential requests: 3.19s, 2.05s, 2.07s, 2.89s, 2.22s after, 5 sequential requests: 2.20s, 1.27s, 1.26s, 1.42s, 1.34s └─ first └────────── warm path ─────┘ The warm-path 1.3s is also faster than a bare \`curl\` to oneshot (~1.9s, every call doing its own TLS handshake) — proof the connection-pool reuse is now actually paying off.
2026-07-27 14:21:01 +00:00 · 2026-05-22 13:03:15 +08:00 · 2026-05-22 13:03:15 +08:00 · 432c0a223c
commit 432c0a223c
parent 1a06baec6f
1 changed files with 82 additions and 5 deletions
--- a/translate/translate.go
+++ b/translate/translate.go
@ -15,9 +15,11 @@ package translate
 import (
 	"compress/flate"
 	"compress/gzip"
 	"context"
 	"crypto/rand"
 	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net/http"
@ -27,6 +29,7 @@ import (
 	"strings"
 	"sync"
 	"time"
 	"unicode/utf8"
 	"github.com/andybalholm/brotli"
 	"github.com/imroc/req/v3"
@ -57,6 +60,26 @@ const (
 	impersonatedChromeMajor = "120"
 	chromeExtensionVersion  = "1.86.0"
 	chromeExtensionID       = "cofdbpoegempjloogbagkncekinflcnj"
 	// oneshot enforces a 1500-character hard cap on the total length of
 	// the `text` array (sum across all items). Source: the extension's
 	// own `G.notLoggedIn = 1500` constant in background.js. The server
 	// returns 400 `{"errors":{"text":["text exceeds maximum length"]}}`
 	// past this; bail early to spare the upstream and give the caller a
 	// faster, less ambiguous error.
 	maxFreeTextLength = 1500
 	// oneshotTimeout caps how long we wait on a single translate request.
 	// Without an explicit timeout, a hung upstream connection would
 	// dangle indefinitely and the caller (e.g. browser extension) would
 	// sit on a spinner forever — observed in the field.
 	oneshotTimeout = 20 * time.Second
 	// warmupTimeout caps the initial GET to www.deepl.com that seeds the
 	// cookie jar. Shorter than oneshotTimeout because warmup typically
 	// completes in well under a second; we'd rather skip a slow warmup
 	// (cookies are best-effort anyway) than block the first translation.
 	warmupTimeout = 5 * time.Second
 )
 // instanceID mirrors the UUID the extension persists in chrome.storage on
@ -76,6 +99,14 @@ var (
 	cookieWarmer  sync.Once
 )
 // oneshotClients caches one req.Client per proxy URL so all translate
 // calls share the underlying TCP / TLS / HTTP/2 connection pool.
 // Creating a fresh req.Client per request meant a brand-new TLS
 // handshake every time (~200-400ms of overhead on top of DeepL's own
 // ~1.5s processing latency). Reusing the client lets keep-alive +
 // session tickets cut that to near zero on the warm path.
 var oneshotClients sync.Map // map[string]*req.Client
 func sharedCookieJar() http.CookieJar {
 	cookieJarOnce.Do(func() {
 		j, _ := cookiejar.New(nil)
@ -87,10 +118,15 @@ func sharedCookieJar() http.CookieJar {
 // warmCookies primes the shared jar by GETting www.deepl.com once.
 // The Set-Cookie response (userCountry / verifiedBot) lands on .deepl.com,
 // which is the eTLD+1 of oneshot-free.www.deepl.com, so subsequent POSTs
-// to the oneshot endpoint will carry those cookies automatically.
+// to the oneshot endpoint will carry those cookies automatically. The
 // same request doubles as a TLS-handshake warmup: it leaves a live
 // HTTP/2 connection to www.deepl.com in the client pool, which the
 // first oneshot POST then resumes via TLS session tickets.
 func warmCookies(client *req.Client) {
 	cookieWarmer.Do(func() {
-		_, _ = client.R().Get("https://www.deepl.com/translator")
+		ctx, cancel := context.WithTimeout(context.Background(), warmupTimeout)
 		defer cancel()
 		_, _ = client.R().SetContext(ctx).Get("https://www.deepl.com/translator")
 	})
 }
@ -239,8 +275,33 @@ type oneshotRequest struct {
 // headers (pragma, cache-control, upgrade-insecure-requests, sec-fetch-user)
 // that a fetch() never emits — wipe those so the WAF cannot tell us apart
 // on that axis.
 // getOneshotClient returns a process-wide cached client for the given
 // proxy URL, creating it on first use. Sharing the client across
 // requests is the single biggest latency win we have on the warm path:
 // it keeps the TLS / HTTP/2 connection in the pool so subsequent
 // requests skip the handshake entirely. Kicks off cookie-jar warmup
 // in the background on first creation so that the first real translate
 // call lands on an already-established connection.
 func getOneshotClient(proxyURL string) (*req.Client, error) {
 	if c, ok := oneshotClients.Load(proxyURL); ok {
 		return c.(*req.Client), nil
 	}
 	c, err := newOneshotClient(proxyURL)
 	if err != nil {
 		return nil, err
 	}
 	if actual, loaded := oneshotClients.LoadOrStore(proxyURL, c); loaded {
 		return actual.(*req.Client), nil
 	}
 	// First time we've seen this proxy. Kick warmup off in the
 	// background so the very first translate call can run in parallel
 	// with the TLS handshake to www.deepl.com.
 	go warmCookies(c)
 	return c, nil
 }
 func newOneshotClient(proxyURL string) (*req.Client, error) {
-	client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar())
+	client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar()).SetTimeout(oneshotTimeout)
 	for _, h := range []string{
 		"Pragma",
 		"Cache-Control",
@ -270,11 +331,10 @@ func newOneshotClient(proxyURL string) (*req.Client, error) {
 // exactly. Omitting that header instead would put the request on a
 // different server-side auth branch.
 func callOneshot(endpoint string, body []byte, bearerToken, proxyURL string) (gjson.Result, int, error) {
-	client, err := newOneshotClient(proxyURL)
+	client, err := getOneshotClient(proxyURL)
 	if err != nil {
 		return gjson.Result{}, 0, err
 	}
 	warmCookies(client) // no-op after the first translation in the process
 	authValue := "None"
 	if bearerToken != "" {
@ -349,6 +409,13 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
 		}, nil
 	}
 	if n := utf8.RuneCountInString(text); n > maxFreeTextLength {
 		return DeepLXTranslationResult{
 			Code:    http.StatusRequestEntityTooLarge,
 			Message: fmt.Sprintf("text exceeds maximum length: %d characters (anonymous oneshot limit is %d)", n, maxFreeTextLength),
 		}, nil
 	}
 	reqStruct := oneshotRequest{
 		Text:       []string{text},
 		TargetLang: resolvedTarget,
@ -372,6 +439,16 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
 	id := time.Now().UnixMilli()
 	result, status, err := callOneshot(endpoint, bodyBytes, dlSession, proxyURL)
 	if err != nil {
 		// Map upstream timeouts to 504 so callers can distinguish "DeepL
 		// took too long" from other 503 failure modes (DNS, TLS, etc.).
 		var ue *url.Error
 		if errors.Is(err, context.DeadlineExceeded) || (errors.As(err, &ue) && ue.Timeout()) {
 			return DeepLXTranslationResult{
 				ID:      id,
 				Code:    http.StatusGatewayTimeout,
 				Message: fmt.Sprintf("upstream DeepL request timed out after %s", oneshotTimeout),
 			}, nil
 		}
 		return DeepLXTranslationResult{
 			ID:      id,
 			Code:    http.StatusServiceUnavailable,