feat(translate): validate source/target language codes

Previously TranslateByDeepLX silently mapped any caller-supplied code
through toOneshotLang(), falling back to a lowercased pass-through for
unknown codes. The oneshot endpoint accepts unknown codes with a 200
but echoes the source text back untranslated, leaving callers to
distinguish "translated, identical to source" from "language not
supported" without a clear signal.

Validate strictly against the language table the extension bundles in
background.js (array `y` for target-capable codes, `A` for the
source-only EN / PT aliases) and return HTTP 400 with a list of
supported codes on mismatch. This also catches:

  - target_lang = ""        → "target_lang is required"
  - target_lang = "auto"    → "target_lang cannot be \"auto\"; pick one of: ..."
  - source_lang = ""/"auto" → allowed, server autodetects
  - case-insensitive       → strings.ToUpper before lookup

Pick up languages the previous map missed:

  + ES-419 (Latin American Spanish)
  + HE     (Hebrew)
  + VI     (Vietnamese)

Fix the EN / PT source-lang mapping: the extension's `A` array maps
both to the generic langCodeForIta ("en"/"pt"), not the regional
default. As a target they continue to resolve to en-US / pt-BR for
backward compat with callers that historically passed "EN" / "PT".

Verified end-to-end:
  - 5 valid codes (DE, ZH-HANT, HE, VI, ES-419) → 200 + translated text
  - Invalid target "XX"  → 400, message lists 38 supported codes
  - Invalid source "ZZ"  → 400, message lists 38 codes + "auto"
  - target_lang "auto"   → 400
  - source autodetect (empty / "auto") + valid target → 200
  - Lowercase input "de" → 200 (case-insensitive)
This commit is contained in:
Vincent Young 2026-05-22 12:12:27 +08:00
parent 98918dd9f6
commit 0dd935ba7e
No known key found for this signature in database
GPG Key ID: 070D9CD629BC1AAE

View File

@ -23,6 +23,7 @@ import (
"net/http" "net/http"
"net/http/cookiejar" "net/http/cookiejar"
"net/url" "net/url"
"sort"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -104,26 +105,108 @@ func newInstanceID() string {
return fmt.Sprintf("%s-%s-%s-%s-%s", s[0:8], s[8:12], s[12:16], s[16:20], s[20:32]) return fmt.Sprintf("%s-%s-%s-%s-%s", s[0:8], s[8:12], s[12:16], s[16:20], s[20:32])
} }
// langCodeToOneshot translates DeepL's uppercase codes (DE, EN, ZH, ...) // Language code tables mirror the bundled list in the extension's
// to the lowercase BCP-47-ish codes the oneshot endpoint requires (de, // background.js (arrays `y` ~offset 6000 for the full target-capable
// en-US, zh-Hans, ...). Unknown codes fall through lowercased. // set, `A` for source-only aliases). Keys are the uppercase forms
var langCodeToOneshot = map[string]string{ // callers pass; values are the lowercase BCP-47-ish forms the oneshot
// endpoint expects ("de", "en-US", "zh-Hans", ...).
//
// targetLangMap is what the API accepts as `target_lang`. EN and PT
// are intentionally absent — DeepL deprecated them as target codes in
// favour of EN-US/EN-GB and PT-BR/PT-PT, and the extension's y array
// reflects that. We accept EN/PT as a backward-compat convenience and
// resolve them to the regional default (en-US, pt-BR).
var targetLangMap = map[string]string{
"AR": "ar", "BG": "bg", "CS": "cs", "DA": "da", "DE": "de", "EL": "el", "AR": "ar", "BG": "bg", "CS": "cs", "DA": "da", "DE": "de", "EL": "el",
"EN": "en-US", "EN-GB": "en-GB", "EN-US": "en-US", "EN-GB": "en-GB", "EN-US": "en-US",
"ES": "es", "ET": "et", "FI": "fi", "FR": "fr", "HU": "hu", "ES": "es", "ES-419": "es-419", "ET": "et", "FI": "fi", "FR": "fr",
"ID": "id", "IT": "it", "JA": "ja", "KO": "ko", "LT": "lt", "LV": "lv", "HE": "he", "HU": "hu", "ID": "id", "IT": "it", "JA": "ja", "KO": "ko",
"NB": "nb", "NL": "nl", "PL": "pl", "LT": "lt", "LV": "lv", "NB": "nb", "NL": "nl", "PL": "pl",
"PT": "pt-BR", "PT-BR": "pt-BR", "PT-PT": "pt-PT", "PT-BR": "pt-BR", "PT-PT": "pt-PT",
"RO": "ro", "RU": "ru", "SK": "sk", "SL": "sl", "SV": "sv", "RO": "ro", "RU": "ru", "SK": "sk", "SL": "sl", "SV": "sv",
"TR": "tr", "UK": "uk", "TR": "tr", "UK": "uk", "VI": "vi",
"ZH": "zh-Hans", "ZH-HANS": "zh-Hans", "ZH-HANT": "zh-Hant", "ZH": "zh-Hans", "ZH-HANS": "zh-Hans", "ZH-HANT": "zh-Hant",
// Convenience aliases for legacy callers.
"EN": "en-US",
"PT": "pt-BR",
} }
func toOneshotLang(code string) string { // sourceLangMap is what the API accepts as `source_lang`. It is a
if v, ok := langCodeToOneshot[strings.ToUpper(code)]; ok { // superset of targetLangMap: EN and PT are first-class source codes
return v // (extension array `A`) mapping to the generic "en"/"pt" — used when
// the caller knows the input is English/Portuguese but does not want
// to commit to a regional variant.
var sourceLangMap = func() map[string]string {
m := make(map[string]string, len(targetLangMap)+2)
for k, v := range targetLangMap {
m[k] = v
} }
return strings.ToLower(code) m["EN"] = "en"
m["PT"] = "pt"
return m
}()
// resolveTargetLang validates and normalizes a user-supplied target
// language code. Returns "" and a non-nil error if the code is empty,
// "auto", or otherwise not in the supported set.
func resolveTargetLang(code string) (string, error) {
if code == "" {
return "", fmt.Errorf("target_lang is required")
}
if strings.EqualFold(code, "auto") {
return "", fmt.Errorf("target_lang cannot be \"auto\"; pick one of: %s", supportedTargetLangsList())
}
if v, ok := targetLangMap[strings.ToUpper(code)]; ok {
return v, nil
}
return "", fmt.Errorf("unsupported target_lang %q; valid codes: %s", code, supportedTargetLangsList())
}
// resolveSourceLang validates and normalizes a user-supplied source
// language code. An empty string or "auto" is allowed and returns
// ("", nil) so the caller omits source_lang and lets the server
// autodetect.
func resolveSourceLang(code string) (string, error) {
if code == "" || strings.EqualFold(code, "auto") {
return "", nil
}
if v, ok := sourceLangMap[strings.ToUpper(code)]; ok {
return v, nil
}
return "", fmt.Errorf("unsupported source_lang %q; valid codes: %s (or \"auto\")", code, supportedSourceLangsList())
}
// supportedTargetLangsList / supportedSourceLangsList return a sorted,
// comma-separated rendering of the supported codes for use in error
// messages. Cached at first call.
var (
targetLangsListOnce sync.Once
targetLangsList string
sourceLangsListOnce sync.Once
sourceLangsList string
)
func supportedTargetLangsList() string {
targetLangsListOnce.Do(func() {
targetLangsList = sortedKeys(targetLangMap)
})
return targetLangsList
}
func supportedSourceLangsList() string {
sourceLangsListOnce.Do(func() {
sourceLangsList = sortedKeys(sourceLangMap)
})
return sourceLangsList
}
func sortedKeys(m map[string]string) string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
return strings.Join(keys, ", ")
} }
// appInformation matches the snake_case shape produced by background.js // appInformation matches the snake_case shape produced by background.js
@ -251,9 +334,25 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
}, nil }, nil
} }
resolvedTarget, err := resolveTargetLang(targetLang)
if err != nil {
return DeepLXTranslationResult{
Code: http.StatusBadRequest,
Message: err.Error(),
}, nil
}
resolvedSource, err := resolveSourceLang(sourceLang)
if err != nil {
return DeepLXTranslationResult{
Code: http.StatusBadRequest,
Message: err.Error(),
}, nil
}
reqStruct := oneshotRequest{ reqStruct := oneshotRequest{
Text: []string{text}, Text: []string{text},
TargetLang: toOneshotLang(targetLang), TargetLang: resolvedTarget,
SourceLang: resolvedSource, // empty = autodetect; omitempty drops the field
UsageType: "Translate", UsageType: "Translate",
AppInformation: appInformation{ AppInformation: appInformation{
OS: "brex_macOS", OS: "brex_macOS",
@ -263,9 +362,6 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
InstanceID: instanceID, InstanceID: instanceID,
}, },
} }
if sourceLang != "" && !strings.EqualFold(sourceLang, "auto") {
reqStruct.SourceLang = toOneshotLang(sourceLang)
}
bodyBytes, _ := json.Marshal(reqStruct) bodyBytes, _ := json.Marshal(reqStruct)
endpoint := oneshotFreeEndpoint endpoint := oneshotFreeEndpoint