mirror of
https://github.com/OwO-Network/DeepLX.git
synced 2026-06-11 15:28:50 +00:00
feat(translate): validate source/target language codes (#218)
Previously TranslateByDeepLX silently mapped any caller-supplied code
through toOneshotLang(), falling back to a lowercased pass-through for
unknown codes. The oneshot endpoint accepts unknown codes with a 200
but echoes the source text back untranslated, leaving callers to
distinguish "translated, identical to source" from "language not
supported" without a clear signal.
Validate strictly against the language table the extension bundles in
background.js (array `y` for target-capable codes, `A` for the
source-only EN / PT aliases) and return HTTP 400 with a list of
supported codes on mismatch. This also catches:
- target_lang = "" → "target_lang is required"
- target_lang = "auto" → "target_lang cannot be \"auto\"; pick one of: ..."
- source_lang = ""/"auto" → allowed, server autodetects
- case-insensitive → strings.ToUpper before lookup
Pick up languages the previous map missed:
+ ES-419 (Latin American Spanish)
+ HE (Hebrew)
+ VI (Vietnamese)
Fix the EN / PT source-lang mapping: the extension's `A` array maps
both to the generic langCodeForIta ("en"/"pt"), not the regional
default. As a target they continue to resolve to en-US / pt-BR for
backward compat with callers that historically passed "EN" / "PT".
Verified end-to-end:
- 5 valid codes (DE, ZH-HANT, HE, VI, ES-419) → 200 + translated text
- Invalid target "XX" → 400, message lists 38 supported codes
- Invalid source "ZZ" → 400, message lists 38 codes + "auto"
- target_lang "auto" → 400
- source autodetect (empty / "auto") + valid target → 200
- Lowercase input "de" → 200 (case-insensitive)
This commit is contained in:
parent
98918dd9f6
commit
057387c957
@ -23,6 +23,7 @@ import (
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@ -104,26 +105,108 @@ func newInstanceID() string {
|
||||
return fmt.Sprintf("%s-%s-%s-%s-%s", s[0:8], s[8:12], s[12:16], s[16:20], s[20:32])
|
||||
}
|
||||
|
||||
// langCodeToOneshot translates DeepL's uppercase codes (DE, EN, ZH, ...)
|
||||
// to the lowercase BCP-47-ish codes the oneshot endpoint requires (de,
|
||||
// en-US, zh-Hans, ...). Unknown codes fall through lowercased.
|
||||
var langCodeToOneshot = map[string]string{
|
||||
// Language code tables mirror the bundled list in the extension's
|
||||
// background.js (arrays `y` ~offset 6000 for the full target-capable
|
||||
// set, `A` for source-only aliases). Keys are the uppercase forms
|
||||
// callers pass; values are the lowercase BCP-47-ish forms the oneshot
|
||||
// endpoint expects ("de", "en-US", "zh-Hans", ...).
|
||||
//
|
||||
// targetLangMap is what the API accepts as `target_lang`. EN and PT
|
||||
// are intentionally absent — DeepL deprecated them as target codes in
|
||||
// favour of EN-US/EN-GB and PT-BR/PT-PT, and the extension's y array
|
||||
// reflects that. We accept EN/PT as a backward-compat convenience and
|
||||
// resolve them to the regional default (en-US, pt-BR).
|
||||
var targetLangMap = map[string]string{
|
||||
"AR": "ar", "BG": "bg", "CS": "cs", "DA": "da", "DE": "de", "EL": "el",
|
||||
"EN": "en-US", "EN-GB": "en-GB", "EN-US": "en-US",
|
||||
"ES": "es", "ET": "et", "FI": "fi", "FR": "fr", "HU": "hu",
|
||||
"ID": "id", "IT": "it", "JA": "ja", "KO": "ko", "LT": "lt", "LV": "lv",
|
||||
"NB": "nb", "NL": "nl", "PL": "pl",
|
||||
"PT": "pt-BR", "PT-BR": "pt-BR", "PT-PT": "pt-PT",
|
||||
"EN-GB": "en-GB", "EN-US": "en-US",
|
||||
"ES": "es", "ES-419": "es-419", "ET": "et", "FI": "fi", "FR": "fr",
|
||||
"HE": "he", "HU": "hu", "ID": "id", "IT": "it", "JA": "ja", "KO": "ko",
|
||||
"LT": "lt", "LV": "lv", "NB": "nb", "NL": "nl", "PL": "pl",
|
||||
"PT-BR": "pt-BR", "PT-PT": "pt-PT",
|
||||
"RO": "ro", "RU": "ru", "SK": "sk", "SL": "sl", "SV": "sv",
|
||||
"TR": "tr", "UK": "uk",
|
||||
"TR": "tr", "UK": "uk", "VI": "vi",
|
||||
"ZH": "zh-Hans", "ZH-HANS": "zh-Hans", "ZH-HANT": "zh-Hant",
|
||||
// Convenience aliases for legacy callers.
|
||||
"EN": "en-US",
|
||||
"PT": "pt-BR",
|
||||
}
|
||||
|
||||
func toOneshotLang(code string) string {
|
||||
if v, ok := langCodeToOneshot[strings.ToUpper(code)]; ok {
|
||||
return v
|
||||
// sourceLangMap is what the API accepts as `source_lang`. It is a
|
||||
// superset of targetLangMap: EN and PT are first-class source codes
|
||||
// (extension array `A`) mapping to the generic "en"/"pt" — used when
|
||||
// the caller knows the input is English/Portuguese but does not want
|
||||
// to commit to a regional variant.
|
||||
var sourceLangMap = func() map[string]string {
|
||||
m := make(map[string]string, len(targetLangMap)+2)
|
||||
for k, v := range targetLangMap {
|
||||
m[k] = v
|
||||
}
|
||||
return strings.ToLower(code)
|
||||
m["EN"] = "en"
|
||||
m["PT"] = "pt"
|
||||
return m
|
||||
}()
|
||||
|
||||
// resolveTargetLang validates and normalizes a user-supplied target
|
||||
// language code. Returns "" and a non-nil error if the code is empty,
|
||||
// "auto", or otherwise not in the supported set.
|
||||
func resolveTargetLang(code string) (string, error) {
|
||||
if code == "" {
|
||||
return "", fmt.Errorf("target_lang is required")
|
||||
}
|
||||
if strings.EqualFold(code, "auto") {
|
||||
return "", fmt.Errorf("target_lang cannot be \"auto\"; pick one of: %s", supportedTargetLangsList())
|
||||
}
|
||||
if v, ok := targetLangMap[strings.ToUpper(code)]; ok {
|
||||
return v, nil
|
||||
}
|
||||
return "", fmt.Errorf("unsupported target_lang %q; valid codes: %s", code, supportedTargetLangsList())
|
||||
}
|
||||
|
||||
// resolveSourceLang validates and normalizes a user-supplied source
|
||||
// language code. An empty string or "auto" is allowed and returns
|
||||
// ("", nil) so the caller omits source_lang and lets the server
|
||||
// autodetect.
|
||||
func resolveSourceLang(code string) (string, error) {
|
||||
if code == "" || strings.EqualFold(code, "auto") {
|
||||
return "", nil
|
||||
}
|
||||
if v, ok := sourceLangMap[strings.ToUpper(code)]; ok {
|
||||
return v, nil
|
||||
}
|
||||
return "", fmt.Errorf("unsupported source_lang %q; valid codes: %s (or \"auto\")", code, supportedSourceLangsList())
|
||||
}
|
||||
|
||||
// supportedTargetLangsList / supportedSourceLangsList return a sorted,
|
||||
// comma-separated rendering of the supported codes for use in error
|
||||
// messages. Cached at first call.
|
||||
var (
|
||||
targetLangsListOnce sync.Once
|
||||
targetLangsList string
|
||||
sourceLangsListOnce sync.Once
|
||||
sourceLangsList string
|
||||
)
|
||||
|
||||
func supportedTargetLangsList() string {
|
||||
targetLangsListOnce.Do(func() {
|
||||
targetLangsList = sortedKeys(targetLangMap)
|
||||
})
|
||||
return targetLangsList
|
||||
}
|
||||
|
||||
func supportedSourceLangsList() string {
|
||||
sourceLangsListOnce.Do(func() {
|
||||
sourceLangsList = sortedKeys(sourceLangMap)
|
||||
})
|
||||
return sourceLangsList
|
||||
}
|
||||
|
||||
func sortedKeys(m map[string]string) string {
|
||||
keys := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
return strings.Join(keys, ", ")
|
||||
}
|
||||
|
||||
// appInformation matches the snake_case shape produced by background.js
|
||||
@ -251,9 +334,25 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
|
||||
}, nil
|
||||
}
|
||||
|
||||
resolvedTarget, err := resolveTargetLang(targetLang)
|
||||
if err != nil {
|
||||
return DeepLXTranslationResult{
|
||||
Code: http.StatusBadRequest,
|
||||
Message: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
resolvedSource, err := resolveSourceLang(sourceLang)
|
||||
if err != nil {
|
||||
return DeepLXTranslationResult{
|
||||
Code: http.StatusBadRequest,
|
||||
Message: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
reqStruct := oneshotRequest{
|
||||
Text: []string{text},
|
||||
TargetLang: toOneshotLang(targetLang),
|
||||
TargetLang: resolvedTarget,
|
||||
SourceLang: resolvedSource, // empty = autodetect; omitempty drops the field
|
||||
UsageType: "Translate",
|
||||
AppInformation: appInformation{
|
||||
OS: "brex_macOS",
|
||||
@ -263,9 +362,6 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
|
||||
InstanceID: instanceID,
|
||||
},
|
||||
}
|
||||
if sourceLang != "" && !strings.EqualFold(sourceLang, "auto") {
|
||||
reqStruct.SourceLang = toOneshotLang(sourceLang)
|
||||
}
|
||||
bodyBytes, _ := json.Marshal(reqStruct)
|
||||
|
||||
endpoint := oneshotFreeEndpoint
|
||||
|
||||
Loading…
Reference in New Issue
Block a user