mirror of
https://github.com/OwO-Network/DeepLX.git
synced 2026-06-11 15:28:50 +00:00
feat(translate): validate source/target language codes (#218)
Previously TranslateByDeepLX silently mapped any caller-supplied code
through toOneshotLang(), falling back to a lowercased pass-through for
unknown codes. The oneshot endpoint accepts unknown codes with a 200
but echoes the source text back untranslated, leaving callers to
distinguish "translated, identical to source" from "language not
supported" without a clear signal.
Validate strictly against the language table the extension bundles in
background.js (array `y` for target-capable codes, `A` for the
source-only EN / PT aliases) and return HTTP 400 with a list of
supported codes on mismatch. This also catches:
- target_lang = "" → "target_lang is required"
- target_lang = "auto" → "target_lang cannot be \"auto\"; pick one of: ..."
- source_lang = ""/"auto" → allowed, server autodetects
- case-insensitive → strings.ToUpper before lookup
Pick up languages the previous map missed:
+ ES-419 (Latin American Spanish)
+ HE (Hebrew)
+ VI (Vietnamese)
Fix the EN / PT source-lang mapping: the extension's `A` array maps
both to the generic langCodeForIta ("en"/"pt"), not the regional
default. As a target they continue to resolve to en-US / pt-BR for
backward compat with callers that historically passed "EN" / "PT".
Verified end-to-end:
- 5 valid codes (DE, ZH-HANT, HE, VI, ES-419) → 200 + translated text
- Invalid target "XX" → 400, message lists 38 supported codes
- Invalid source "ZZ" → 400, message lists 38 codes + "auto"
- target_lang "auto" → 400
- source autodetect (empty / "auto") + valid target → 200
- Lowercase input "de" → 200 (case-insensitive)
This commit is contained in:
parent
98918dd9f6
commit
057387c957
@ -23,6 +23,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/http/cookiejar"
|
"net/http/cookiejar"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -104,26 +105,108 @@ func newInstanceID() string {
|
|||||||
return fmt.Sprintf("%s-%s-%s-%s-%s", s[0:8], s[8:12], s[12:16], s[16:20], s[20:32])
|
return fmt.Sprintf("%s-%s-%s-%s-%s", s[0:8], s[8:12], s[12:16], s[16:20], s[20:32])
|
||||||
}
|
}
|
||||||
|
|
||||||
// langCodeToOneshot translates DeepL's uppercase codes (DE, EN, ZH, ...)
|
// Language code tables mirror the bundled list in the extension's
|
||||||
// to the lowercase BCP-47-ish codes the oneshot endpoint requires (de,
|
// background.js (arrays `y` ~offset 6000 for the full target-capable
|
||||||
// en-US, zh-Hans, ...). Unknown codes fall through lowercased.
|
// set, `A` for source-only aliases). Keys are the uppercase forms
|
||||||
var langCodeToOneshot = map[string]string{
|
// callers pass; values are the lowercase BCP-47-ish forms the oneshot
|
||||||
|
// endpoint expects ("de", "en-US", "zh-Hans", ...).
|
||||||
|
//
|
||||||
|
// targetLangMap is what the API accepts as `target_lang`. EN and PT
|
||||||
|
// are intentionally absent — DeepL deprecated them as target codes in
|
||||||
|
// favour of EN-US/EN-GB and PT-BR/PT-PT, and the extension's y array
|
||||||
|
// reflects that. We accept EN/PT as a backward-compat convenience and
|
||||||
|
// resolve them to the regional default (en-US, pt-BR).
|
||||||
|
var targetLangMap = map[string]string{
|
||||||
"AR": "ar", "BG": "bg", "CS": "cs", "DA": "da", "DE": "de", "EL": "el",
|
"AR": "ar", "BG": "bg", "CS": "cs", "DA": "da", "DE": "de", "EL": "el",
|
||||||
"EN": "en-US", "EN-GB": "en-GB", "EN-US": "en-US",
|
"EN-GB": "en-GB", "EN-US": "en-US",
|
||||||
"ES": "es", "ET": "et", "FI": "fi", "FR": "fr", "HU": "hu",
|
"ES": "es", "ES-419": "es-419", "ET": "et", "FI": "fi", "FR": "fr",
|
||||||
"ID": "id", "IT": "it", "JA": "ja", "KO": "ko", "LT": "lt", "LV": "lv",
|
"HE": "he", "HU": "hu", "ID": "id", "IT": "it", "JA": "ja", "KO": "ko",
|
||||||
"NB": "nb", "NL": "nl", "PL": "pl",
|
"LT": "lt", "LV": "lv", "NB": "nb", "NL": "nl", "PL": "pl",
|
||||||
"PT": "pt-BR", "PT-BR": "pt-BR", "PT-PT": "pt-PT",
|
"PT-BR": "pt-BR", "PT-PT": "pt-PT",
|
||||||
"RO": "ro", "RU": "ru", "SK": "sk", "SL": "sl", "SV": "sv",
|
"RO": "ro", "RU": "ru", "SK": "sk", "SL": "sl", "SV": "sv",
|
||||||
"TR": "tr", "UK": "uk",
|
"TR": "tr", "UK": "uk", "VI": "vi",
|
||||||
"ZH": "zh-Hans", "ZH-HANS": "zh-Hans", "ZH-HANT": "zh-Hant",
|
"ZH": "zh-Hans", "ZH-HANS": "zh-Hans", "ZH-HANT": "zh-Hant",
|
||||||
|
// Convenience aliases for legacy callers.
|
||||||
|
"EN": "en-US",
|
||||||
|
"PT": "pt-BR",
|
||||||
}
|
}
|
||||||
|
|
||||||
func toOneshotLang(code string) string {
|
// sourceLangMap is what the API accepts as `source_lang`. It is a
|
||||||
if v, ok := langCodeToOneshot[strings.ToUpper(code)]; ok {
|
// superset of targetLangMap: EN and PT are first-class source codes
|
||||||
return v
|
// (extension array `A`) mapping to the generic "en"/"pt" — used when
|
||||||
|
// the caller knows the input is English/Portuguese but does not want
|
||||||
|
// to commit to a regional variant.
|
||||||
|
var sourceLangMap = func() map[string]string {
|
||||||
|
m := make(map[string]string, len(targetLangMap)+2)
|
||||||
|
for k, v := range targetLangMap {
|
||||||
|
m[k] = v
|
||||||
}
|
}
|
||||||
return strings.ToLower(code)
|
m["EN"] = "en"
|
||||||
|
m["PT"] = "pt"
|
||||||
|
return m
|
||||||
|
}()
|
||||||
|
|
||||||
|
// resolveTargetLang validates and normalizes a user-supplied target
|
||||||
|
// language code. Returns "" and a non-nil error if the code is empty,
|
||||||
|
// "auto", or otherwise not in the supported set.
|
||||||
|
func resolveTargetLang(code string) (string, error) {
|
||||||
|
if code == "" {
|
||||||
|
return "", fmt.Errorf("target_lang is required")
|
||||||
|
}
|
||||||
|
if strings.EqualFold(code, "auto") {
|
||||||
|
return "", fmt.Errorf("target_lang cannot be \"auto\"; pick one of: %s", supportedTargetLangsList())
|
||||||
|
}
|
||||||
|
if v, ok := targetLangMap[strings.ToUpper(code)]; ok {
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("unsupported target_lang %q; valid codes: %s", code, supportedTargetLangsList())
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveSourceLang validates and normalizes a user-supplied source
|
||||||
|
// language code. An empty string or "auto" is allowed and returns
|
||||||
|
// ("", nil) so the caller omits source_lang and lets the server
|
||||||
|
// autodetect.
|
||||||
|
func resolveSourceLang(code string) (string, error) {
|
||||||
|
if code == "" || strings.EqualFold(code, "auto") {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
if v, ok := sourceLangMap[strings.ToUpper(code)]; ok {
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("unsupported source_lang %q; valid codes: %s (or \"auto\")", code, supportedSourceLangsList())
|
||||||
|
}
|
||||||
|
|
||||||
|
// supportedTargetLangsList / supportedSourceLangsList return a sorted,
|
||||||
|
// comma-separated rendering of the supported codes for use in error
|
||||||
|
// messages. Cached at first call.
|
||||||
|
var (
|
||||||
|
targetLangsListOnce sync.Once
|
||||||
|
targetLangsList string
|
||||||
|
sourceLangsListOnce sync.Once
|
||||||
|
sourceLangsList string
|
||||||
|
)
|
||||||
|
|
||||||
|
func supportedTargetLangsList() string {
|
||||||
|
targetLangsListOnce.Do(func() {
|
||||||
|
targetLangsList = sortedKeys(targetLangMap)
|
||||||
|
})
|
||||||
|
return targetLangsList
|
||||||
|
}
|
||||||
|
|
||||||
|
func supportedSourceLangsList() string {
|
||||||
|
sourceLangsListOnce.Do(func() {
|
||||||
|
sourceLangsList = sortedKeys(sourceLangMap)
|
||||||
|
})
|
||||||
|
return sourceLangsList
|
||||||
|
}
|
||||||
|
|
||||||
|
func sortedKeys(m map[string]string) string {
|
||||||
|
keys := make([]string, 0, len(m))
|
||||||
|
for k := range m {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
return strings.Join(keys, ", ")
|
||||||
}
|
}
|
||||||
|
|
||||||
// appInformation matches the snake_case shape produced by background.js
|
// appInformation matches the snake_case shape produced by background.js
|
||||||
@ -251,9 +334,25 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resolvedTarget, err := resolveTargetLang(targetLang)
|
||||||
|
if err != nil {
|
||||||
|
return DeepLXTranslationResult{
|
||||||
|
Code: http.StatusBadRequest,
|
||||||
|
Message: err.Error(),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
resolvedSource, err := resolveSourceLang(sourceLang)
|
||||||
|
if err != nil {
|
||||||
|
return DeepLXTranslationResult{
|
||||||
|
Code: http.StatusBadRequest,
|
||||||
|
Message: err.Error(),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
reqStruct := oneshotRequest{
|
reqStruct := oneshotRequest{
|
||||||
Text: []string{text},
|
Text: []string{text},
|
||||||
TargetLang: toOneshotLang(targetLang),
|
TargetLang: resolvedTarget,
|
||||||
|
SourceLang: resolvedSource, // empty = autodetect; omitempty drops the field
|
||||||
UsageType: "Translate",
|
UsageType: "Translate",
|
||||||
AppInformation: appInformation{
|
AppInformation: appInformation{
|
||||||
OS: "brex_macOS",
|
OS: "brex_macOS",
|
||||||
@ -263,9 +362,6 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
|
|||||||
InstanceID: instanceID,
|
InstanceID: instanceID,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
if sourceLang != "" && !strings.EqualFold(sourceLang, "auto") {
|
|
||||||
reqStruct.SourceLang = toOneshotLang(sourceLang)
|
|
||||||
}
|
|
||||||
bodyBytes, _ := json.Marshal(reqStruct)
|
bodyBytes, _ := json.Marshal(reqStruct)
|
||||||
|
|
||||||
endpoint := oneshotFreeEndpoint
|
endpoint := oneshotFreeEndpoint
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user