mirror of
https://github.com/OwO-Network/DeepLX.git
synced 2026-06-11 15:28:50 +00:00
fix(translate): enforce 1500-char text limit + add request timeout (#220)
* fix(translate): enforce 1500-char limit upfront and add request timeout
Two related stability issues hit during real-world use:
1. **Hung requests** — without an explicit timeout the upstream HTTP
call could dangle indefinitely on a stuck connection. Browser
extensions calling /translate would sit on a spinner forever with
no error to surface to the user (reported in the field).
2. **No-feedback on oversized input** — the oneshot endpoint caps the
total text length at 1500 characters (matches the extension's own
\`G.notLoggedIn = 1500\` constant). We were forwarding the request
anyway and letting DeepL 400 it, which a) wasted an upstream round
trip and b) the caller had no way to distinguish from other 400s.
Changes:
- Pre-validate \`text\` length in characters (utf8.RuneCountInString,
not byte length — verified the cap is rune-based: 1500 Chinese
characters / 4500 bytes is accepted, 1501 is rejected). Return
HTTP 413 Payload Too Large with a clear message naming both the
observed length and the limit.
- Set a 20s timeout on the oneshot HTTP client (req.SetTimeout).
On timeout return HTTP 504 Gateway Timeout — distinguishes a slow
DeepL from other 503 failure modes (DNS, TLS, etc.). The check
catches both context.DeadlineExceeded and url.Error{Timeout()=true}.
- Set a separate 5s timeout on the cookie-jar warmup GET to
www.deepl.com. Warmup is best-effort; we'd rather a slow warmup
(cookies still seed eventually next time) than block the very first
translation behind a hung GET.
Behaviour verified against the live oneshot endpoint:
- 1500 ASCII chars → 200
- 1501 ASCII chars → 413 (upstream not contacted)
- 1500 Chinese chars (4500 bytes) → 200
- 1501 Chinese chars → 413
- Pathological "your"*1500 → 504 at 20s (was hanging without timeout)
- Realistic 245-char Chinese → 200 in ~13s
* perf(translate): share oneshot req.Client across requests + eager warmup
Each TranslateByDeepLX call was building a brand-new req.Client via
newOneshotClient(), which meant a fresh TLS handshake + HTTP/2 SETTINGS
negotiation per request — ~200-400ms of pure overhead on top of DeepL's
own ~1.5s processing latency. Share one client per proxy URL
(sync.Map) so subsequent requests reuse the kept-alive HTTP/2
connection in the underlying http.Transport's pool.
Also flip the cookie-jar warmup from synchronous-on-first-call to
fire-and-forget at first client creation. Same sync.Once semantics
(runs exactly once per process), but in a background goroutine so the
first translate request runs in parallel with the TLS handshake to
www.deepl.com rather than serially behind it.
Measured against the live oneshot endpoint (Tokyo → Frankfurt):
before, 5 sequential requests: 3.19s, 2.05s, 2.07s, 2.89s, 2.22s
after, 5 sequential requests: 2.20s, 1.27s, 1.26s, 1.42s, 1.34s
└─ first └────────── warm path ─────┘
The warm-path 1.3s is also faster than a bare \`curl\` to oneshot
(~1.9s, every call doing its own TLS handshake) — proof the
connection-pool reuse is now actually paying off.
This commit is contained in:
parent
1a06baec6f
commit
432c0a223c
@ -15,9 +15,11 @@ package translate
|
|||||||
import (
|
import (
|
||||||
"compress/flate"
|
"compress/flate"
|
||||||
"compress/gzip"
|
"compress/gzip"
|
||||||
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@ -27,6 +29,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/andybalholm/brotli"
|
"github.com/andybalholm/brotli"
|
||||||
"github.com/imroc/req/v3"
|
"github.com/imroc/req/v3"
|
||||||
@ -57,6 +60,26 @@ const (
|
|||||||
impersonatedChromeMajor = "120"
|
impersonatedChromeMajor = "120"
|
||||||
chromeExtensionVersion = "1.86.0"
|
chromeExtensionVersion = "1.86.0"
|
||||||
chromeExtensionID = "cofdbpoegempjloogbagkncekinflcnj"
|
chromeExtensionID = "cofdbpoegempjloogbagkncekinflcnj"
|
||||||
|
|
||||||
|
// oneshot enforces a 1500-character hard cap on the total length of
|
||||||
|
// the `text` array (sum across all items). Source: the extension's
|
||||||
|
// own `G.notLoggedIn = 1500` constant in background.js. The server
|
||||||
|
// returns 400 `{"errors":{"text":["text exceeds maximum length"]}}`
|
||||||
|
// past this; bail early to spare the upstream and give the caller a
|
||||||
|
// faster, less ambiguous error.
|
||||||
|
maxFreeTextLength = 1500
|
||||||
|
|
||||||
|
// oneshotTimeout caps how long we wait on a single translate request.
|
||||||
|
// Without an explicit timeout, a hung upstream connection would
|
||||||
|
// dangle indefinitely and the caller (e.g. browser extension) would
|
||||||
|
// sit on a spinner forever — observed in the field.
|
||||||
|
oneshotTimeout = 20 * time.Second
|
||||||
|
|
||||||
|
// warmupTimeout caps the initial GET to www.deepl.com that seeds the
|
||||||
|
// cookie jar. Shorter than oneshotTimeout because warmup typically
|
||||||
|
// completes in well under a second; we'd rather skip a slow warmup
|
||||||
|
// (cookies are best-effort anyway) than block the first translation.
|
||||||
|
warmupTimeout = 5 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// instanceID mirrors the UUID the extension persists in chrome.storage on
|
// instanceID mirrors the UUID the extension persists in chrome.storage on
|
||||||
@ -76,6 +99,14 @@ var (
|
|||||||
cookieWarmer sync.Once
|
cookieWarmer sync.Once
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// oneshotClients caches one req.Client per proxy URL so all translate
|
||||||
|
// calls share the underlying TCP / TLS / HTTP/2 connection pool.
|
||||||
|
// Creating a fresh req.Client per request meant a brand-new TLS
|
||||||
|
// handshake every time (~200-400ms of overhead on top of DeepL's own
|
||||||
|
// ~1.5s processing latency). Reusing the client lets keep-alive +
|
||||||
|
// session tickets cut that to near zero on the warm path.
|
||||||
|
var oneshotClients sync.Map // map[string]*req.Client
|
||||||
|
|
||||||
func sharedCookieJar() http.CookieJar {
|
func sharedCookieJar() http.CookieJar {
|
||||||
cookieJarOnce.Do(func() {
|
cookieJarOnce.Do(func() {
|
||||||
j, _ := cookiejar.New(nil)
|
j, _ := cookiejar.New(nil)
|
||||||
@ -87,10 +118,15 @@ func sharedCookieJar() http.CookieJar {
|
|||||||
// warmCookies primes the shared jar by GETting www.deepl.com once.
|
// warmCookies primes the shared jar by GETting www.deepl.com once.
|
||||||
// The Set-Cookie response (userCountry / verifiedBot) lands on .deepl.com,
|
// The Set-Cookie response (userCountry / verifiedBot) lands on .deepl.com,
|
||||||
// which is the eTLD+1 of oneshot-free.www.deepl.com, so subsequent POSTs
|
// which is the eTLD+1 of oneshot-free.www.deepl.com, so subsequent POSTs
|
||||||
// to the oneshot endpoint will carry those cookies automatically.
|
// to the oneshot endpoint will carry those cookies automatically. The
|
||||||
|
// same request doubles as a TLS-handshake warmup: it leaves a live
|
||||||
|
// HTTP/2 connection to www.deepl.com in the client pool, which the
|
||||||
|
// first oneshot POST then resumes via TLS session tickets.
|
||||||
func warmCookies(client *req.Client) {
|
func warmCookies(client *req.Client) {
|
||||||
cookieWarmer.Do(func() {
|
cookieWarmer.Do(func() {
|
||||||
_, _ = client.R().Get("https://www.deepl.com/translator")
|
ctx, cancel := context.WithTimeout(context.Background(), warmupTimeout)
|
||||||
|
defer cancel()
|
||||||
|
_, _ = client.R().SetContext(ctx).Get("https://www.deepl.com/translator")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,8 +275,33 @@ type oneshotRequest struct {
|
|||||||
// headers (pragma, cache-control, upgrade-insecure-requests, sec-fetch-user)
|
// headers (pragma, cache-control, upgrade-insecure-requests, sec-fetch-user)
|
||||||
// that a fetch() never emits — wipe those so the WAF cannot tell us apart
|
// that a fetch() never emits — wipe those so the WAF cannot tell us apart
|
||||||
// on that axis.
|
// on that axis.
|
||||||
|
// getOneshotClient returns a process-wide cached client for the given
|
||||||
|
// proxy URL, creating it on first use. Sharing the client across
|
||||||
|
// requests is the single biggest latency win we have on the warm path:
|
||||||
|
// it keeps the TLS / HTTP/2 connection in the pool so subsequent
|
||||||
|
// requests skip the handshake entirely. Kicks off cookie-jar warmup
|
||||||
|
// in the background on first creation so that the first real translate
|
||||||
|
// call lands on an already-established connection.
|
||||||
|
func getOneshotClient(proxyURL string) (*req.Client, error) {
|
||||||
|
if c, ok := oneshotClients.Load(proxyURL); ok {
|
||||||
|
return c.(*req.Client), nil
|
||||||
|
}
|
||||||
|
c, err := newOneshotClient(proxyURL)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if actual, loaded := oneshotClients.LoadOrStore(proxyURL, c); loaded {
|
||||||
|
return actual.(*req.Client), nil
|
||||||
|
}
|
||||||
|
// First time we've seen this proxy. Kick warmup off in the
|
||||||
|
// background so the very first translate call can run in parallel
|
||||||
|
// with the TLS handshake to www.deepl.com.
|
||||||
|
go warmCookies(c)
|
||||||
|
return c, nil
|
||||||
|
}
|
||||||
|
|
||||||
func newOneshotClient(proxyURL string) (*req.Client, error) {
|
func newOneshotClient(proxyURL string) (*req.Client, error) {
|
||||||
client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar())
|
client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar()).SetTimeout(oneshotTimeout)
|
||||||
for _, h := range []string{
|
for _, h := range []string{
|
||||||
"Pragma",
|
"Pragma",
|
||||||
"Cache-Control",
|
"Cache-Control",
|
||||||
@ -270,11 +331,10 @@ func newOneshotClient(proxyURL string) (*req.Client, error) {
|
|||||||
// exactly. Omitting that header instead would put the request on a
|
// exactly. Omitting that header instead would put the request on a
|
||||||
// different server-side auth branch.
|
// different server-side auth branch.
|
||||||
func callOneshot(endpoint string, body []byte, bearerToken, proxyURL string) (gjson.Result, int, error) {
|
func callOneshot(endpoint string, body []byte, bearerToken, proxyURL string) (gjson.Result, int, error) {
|
||||||
client, err := newOneshotClient(proxyURL)
|
client, err := getOneshotClient(proxyURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return gjson.Result{}, 0, err
|
return gjson.Result{}, 0, err
|
||||||
}
|
}
|
||||||
warmCookies(client) // no-op after the first translation in the process
|
|
||||||
|
|
||||||
authValue := "None"
|
authValue := "None"
|
||||||
if bearerToken != "" {
|
if bearerToken != "" {
|
||||||
@ -349,6 +409,13 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if n := utf8.RuneCountInString(text); n > maxFreeTextLength {
|
||||||
|
return DeepLXTranslationResult{
|
||||||
|
Code: http.StatusRequestEntityTooLarge,
|
||||||
|
Message: fmt.Sprintf("text exceeds maximum length: %d characters (anonymous oneshot limit is %d)", n, maxFreeTextLength),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
reqStruct := oneshotRequest{
|
reqStruct := oneshotRequest{
|
||||||
Text: []string{text},
|
Text: []string{text},
|
||||||
TargetLang: resolvedTarget,
|
TargetLang: resolvedTarget,
|
||||||
@ -372,6 +439,16 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
|
|||||||
id := time.Now().UnixMilli()
|
id := time.Now().UnixMilli()
|
||||||
result, status, err := callOneshot(endpoint, bodyBytes, dlSession, proxyURL)
|
result, status, err := callOneshot(endpoint, bodyBytes, dlSession, proxyURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Map upstream timeouts to 504 so callers can distinguish "DeepL
|
||||||
|
// took too long" from other 503 failure modes (DNS, TLS, etc.).
|
||||||
|
var ue *url.Error
|
||||||
|
if errors.Is(err, context.DeadlineExceeded) || (errors.As(err, &ue) && ue.Timeout()) {
|
||||||
|
return DeepLXTranslationResult{
|
||||||
|
ID: id,
|
||||||
|
Code: http.StatusGatewayTimeout,
|
||||||
|
Message: fmt.Sprintf("upstream DeepL request timed out after %s", oneshotTimeout),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
return DeepLXTranslationResult{
|
return DeepLXTranslationResult{
|
||||||
ID: id,
|
ID: id,
|
||||||
Code: http.StatusServiceUnavailable,
|
Code: http.StatusServiceUnavailable,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user