mirror of
https://github.com/OwO-Network/DeepLX.git
synced 2026-06-11 15:28:50 +00:00
fix(translate): enforce 1500-char limit upfront and add request timeout
Two related stability issues hit during real-world use:
1. **Hung requests** — without an explicit timeout the upstream HTTP
call could dangle indefinitely on a stuck connection. Browser
extensions calling /translate would sit on a spinner forever with
no error to surface to the user (reported in the field).
2. **No-feedback on oversized input** — the oneshot endpoint caps the
total text length at 1500 characters (matches the extension's own
\`G.notLoggedIn = 1500\` constant). We were forwarding the request
anyway and letting DeepL 400 it, which a) wasted an upstream round
trip and b) the caller had no way to distinguish from other 400s.
Changes:
- Pre-validate \`text\` length in characters (utf8.RuneCountInString,
not byte length — verified the cap is rune-based: 1500 Chinese
characters / 4500 bytes is accepted, 1501 is rejected). Return
HTTP 413 Payload Too Large with a clear message naming both the
observed length and the limit.
- Set a 20s timeout on the oneshot HTTP client (req.SetTimeout).
On timeout return HTTP 504 Gateway Timeout — distinguishes a slow
DeepL from other 503 failure modes (DNS, TLS, etc.). The check
catches both context.DeadlineExceeded and url.Error{Timeout()=true}.
- Set a separate 5s timeout on the cookie-jar warmup GET to
www.deepl.com. Warmup is best-effort; we'd rather a slow warmup
(cookies still seed eventually next time) than block the very first
translation behind a hung GET.
Behaviour verified against the live oneshot endpoint:
- 1500 ASCII chars → 200
- 1501 ASCII chars → 413 (upstream not contacted)
- 1500 Chinese chars (4500 bytes) → 200
- 1501 Chinese chars → 413
- Pathological "your"*1500 → 504 at 20s (was hanging without timeout)
- Realistic 245-char Chinese → 200 in ~13s
This commit is contained in:
parent
1a06baec6f
commit
e74d34e7ab
@ -15,9 +15,11 @@ package translate
|
|||||||
import (
|
import (
|
||||||
"compress/flate"
|
"compress/flate"
|
||||||
"compress/gzip"
|
"compress/gzip"
|
||||||
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@ -27,6 +29,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/andybalholm/brotli"
|
"github.com/andybalholm/brotli"
|
||||||
"github.com/imroc/req/v3"
|
"github.com/imroc/req/v3"
|
||||||
@ -57,6 +60,26 @@ const (
|
|||||||
impersonatedChromeMajor = "120"
|
impersonatedChromeMajor = "120"
|
||||||
chromeExtensionVersion = "1.86.0"
|
chromeExtensionVersion = "1.86.0"
|
||||||
chromeExtensionID = "cofdbpoegempjloogbagkncekinflcnj"
|
chromeExtensionID = "cofdbpoegempjloogbagkncekinflcnj"
|
||||||
|
|
||||||
|
// oneshot enforces a 1500-character hard cap on the total length of
|
||||||
|
// the `text` array (sum across all items). Source: the extension's
|
||||||
|
// own `G.notLoggedIn = 1500` constant in background.js. The server
|
||||||
|
// returns 400 `{"errors":{"text":["text exceeds maximum length"]}}`
|
||||||
|
// past this; bail early to spare the upstream and give the caller a
|
||||||
|
// faster, less ambiguous error.
|
||||||
|
maxFreeTextLength = 1500
|
||||||
|
|
||||||
|
// oneshotTimeout caps how long we wait on a single translate request.
|
||||||
|
// Without an explicit timeout, a hung upstream connection would
|
||||||
|
// dangle indefinitely and the caller (e.g. browser extension) would
|
||||||
|
// sit on a spinner forever — observed in the field.
|
||||||
|
oneshotTimeout = 20 * time.Second
|
||||||
|
|
||||||
|
// warmupTimeout caps the initial GET to www.deepl.com that seeds the
|
||||||
|
// cookie jar. Shorter than oneshotTimeout because warmup typically
|
||||||
|
// completes in well under a second; we'd rather skip a slow warmup
|
||||||
|
// (cookies are best-effort anyway) than block the first translation.
|
||||||
|
warmupTimeout = 5 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// instanceID mirrors the UUID the extension persists in chrome.storage on
|
// instanceID mirrors the UUID the extension persists in chrome.storage on
|
||||||
@ -90,7 +113,9 @@ func sharedCookieJar() http.CookieJar {
|
|||||||
// to the oneshot endpoint will carry those cookies automatically.
|
// to the oneshot endpoint will carry those cookies automatically.
|
||||||
func warmCookies(client *req.Client) {
|
func warmCookies(client *req.Client) {
|
||||||
cookieWarmer.Do(func() {
|
cookieWarmer.Do(func() {
|
||||||
_, _ = client.R().Get("https://www.deepl.com/translator")
|
ctx, cancel := context.WithTimeout(context.Background(), warmupTimeout)
|
||||||
|
defer cancel()
|
||||||
|
_, _ = client.R().SetContext(ctx).Get("https://www.deepl.com/translator")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,7 +265,7 @@ type oneshotRequest struct {
|
|||||||
// that a fetch() never emits — wipe those so the WAF cannot tell us apart
|
// that a fetch() never emits — wipe those so the WAF cannot tell us apart
|
||||||
// on that axis.
|
// on that axis.
|
||||||
func newOneshotClient(proxyURL string) (*req.Client, error) {
|
func newOneshotClient(proxyURL string) (*req.Client, error) {
|
||||||
client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar())
|
client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar()).SetTimeout(oneshotTimeout)
|
||||||
for _, h := range []string{
|
for _, h := range []string{
|
||||||
"Pragma",
|
"Pragma",
|
||||||
"Cache-Control",
|
"Cache-Control",
|
||||||
@ -349,6 +374,13 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if n := utf8.RuneCountInString(text); n > maxFreeTextLength {
|
||||||
|
return DeepLXTranslationResult{
|
||||||
|
Code: http.StatusRequestEntityTooLarge,
|
||||||
|
Message: fmt.Sprintf("text exceeds maximum length: %d characters (anonymous oneshot limit is %d)", n, maxFreeTextLength),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
reqStruct := oneshotRequest{
|
reqStruct := oneshotRequest{
|
||||||
Text: []string{text},
|
Text: []string{text},
|
||||||
TargetLang: resolvedTarget,
|
TargetLang: resolvedTarget,
|
||||||
@ -372,6 +404,16 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
|
|||||||
id := time.Now().UnixMilli()
|
id := time.Now().UnixMilli()
|
||||||
result, status, err := callOneshot(endpoint, bodyBytes, dlSession, proxyURL)
|
result, status, err := callOneshot(endpoint, bodyBytes, dlSession, proxyURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Map upstream timeouts to 504 so callers can distinguish "DeepL
|
||||||
|
// took too long" from other 503 failure modes (DNS, TLS, etc.).
|
||||||
|
var ue *url.Error
|
||||||
|
if errors.Is(err, context.DeadlineExceeded) || (errors.As(err, &ue) && ue.Timeout()) {
|
||||||
|
return DeepLXTranslationResult{
|
||||||
|
ID: id,
|
||||||
|
Code: http.StatusGatewayTimeout,
|
||||||
|
Message: fmt.Sprintf("upstream DeepL request timed out after %s", oneshotTimeout),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
return DeepLXTranslationResult{
|
return DeepLXTranslationResult{
|
||||||
ID: id,
|
ID: id,
|
||||||
Code: http.StatusServiceUnavailable,
|
Code: http.StatusServiceUnavailable,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user