fix: parallel processing to improve translation speed

This commit is contained in:
Vincent Yang 2025-02-01 03:24:44 -05:00
parent de9888ca5f
commit b6d7e96db7
No known key found for this signature in database
GPG Key ID: 55F1635E821BF0E8

View File

@ -2,7 +2,7 @@
* @Author: Vincent Young * @Author: Vincent Young
* @Date: 2024-09-16 11:59:24 * @Date: 2024-09-16 11:59:24
* @LastEditors: Vincent Yang * @LastEditors: Vincent Yang
* @LastEditTime: 2025-01-20 17:09:59 * @LastEditTime: 2025-02-01 03:21:41
* @FilePath: /DeepLX/translate/translate.go * @FilePath: /DeepLX/translate/translate.go
* @Telegram: https://t.me/missuo * @Telegram: https://t.me/missuo
* @GitHub: https://github.com/missuo * @GitHub: https://github.com/missuo
@ -19,6 +19,7 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"strings" "strings"
"sync"
"github.com/abadojack/whatlanggo" "github.com/abadojack/whatlanggo"
"github.com/imroc/req/v3" "github.com/imroc/req/v3"
@ -126,92 +127,89 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
}, nil }, nil
} }
// Split text by newlines and store them for later reconstruction // Split text by newlines
textParts := strings.Split(text, "\n") textParts := strings.Split(text, "\n")
var translatedParts []string
var allAlternatives [][]string // Store alternatives for each part
for _, part := range textParts { // Create channels for results
if strings.TrimSpace(part) == "" { type translationResult struct {
translatedParts = append(translatedParts, "") index int
allAlternatives = append(allAlternatives, []string{""}) translation string
continue alternatives []string
} err error
}
results := make(chan translationResult, len(textParts))
// Split text first // Create a wait group to track all goroutines
splitResult, err := splitText(part, tagHandling == "html" || tagHandling == "xml", proxyURL, dlSession) var wg sync.WaitGroup
if err != nil {
return DeepLXTranslationResult{
Code: http.StatusServiceUnavailable,
Message: err.Error(),
}, nil
}
// Get detected language if source language is auto // Launch goroutines for each text part
if sourceLang == "auto" || sourceLang == "" { for i := range textParts {
sourceLang = strings.ToUpper(whatlanggo.DetectLang(part).Iso6391()) wg.Add(1)
} go func(index int, text string) {
defer wg.Done()
// Prepare jobs from split result if strings.TrimSpace(text) == "" {
var jobs []Job results <- translationResult{
chunks := splitResult.Get("result.texts.0.chunks").Array() index: index,
for idx, chunk := range chunks { translation: "",
sentence := chunk.Get("sentences.0") alternatives: []string{""},
}
// Handle context return
contextBefore := []string{}
contextAfter := []string{}
if idx > 0 {
contextBefore = []string{chunks[idx-1].Get("sentences.0.text").String()}
}
if idx < len(chunks)-1 {
contextAfter = []string{chunks[idx+1].Get("sentences.0.text").String()}
} }
jobs = append(jobs, Job{ // Split text first
Kind: "default", splitResult, err := splitText(text, tagHandling == "html" || tagHandling == "xml", proxyURL, dlSession)
PreferredNumBeams: 4, if err != nil {
RawEnContextBefore: contextBefore, results <- translationResult{index: index, err: err}
RawEnContextAfter: contextAfter, return
Sentences: []Sentence{{ }
Prefix: sentence.Get("prefix").String(),
Text: sentence.Get("text").String(),
ID: idx + 1,
}},
})
}
hasRegionalVariant := false // Get detected language if source language is auto
targetLangCode := targetLang currentSourceLang := sourceLang
targetLangParts := strings.Split(targetLang, "-") if currentSourceLang == "auto" || currentSourceLang == "" {
if len(targetLangParts) > 1 { currentSourceLang = strings.ToUpper(whatlanggo.DetectLang(text).Iso6391())
targetLangCode = targetLangParts[0] }
hasRegionalVariant = true
}
// Prepare translation request // Prepare jobs from split result
id := getRandomNumber() var jobs []Job
chunks := splitResult.Get("result.texts.0.chunks").Array()
for idx, chunk := range chunks {
sentence := chunk.Get("sentences.0")
postData := &PostData{ // Handle context
Jsonrpc: "2.0", contextBefore := []string{}
Method: "LMT_handle_jobs", contextAfter := []string{}
ID: id, if idx > 0 {
Params: Params{ contextBefore = []string{chunks[idx-1].Get("sentences.0.text").String()}
CommonJobParams: CommonJobParams{ }
Mode: "translate", if idx < len(chunks)-1 {
}, contextAfter = []string{chunks[idx+1].Get("sentences.0.text").String()}
Lang: Lang{ }
SourceLangComputed: strings.ToUpper(sourceLang),
TargetLang: strings.ToUpper(targetLangCode),
},
Jobs: jobs,
Priority: 1,
Timestamp: getTimeStamp(getICount(part)),
},
}
if hasRegionalVariant { jobs = append(jobs, Job{
postData = &PostData{ Kind: "default",
PreferredNumBeams: 4,
RawEnContextBefore: contextBefore,
RawEnContextAfter: contextAfter,
Sentences: []Sentence{{
Prefix: sentence.Get("prefix").String(),
Text: sentence.Get("text").String(),
ID: idx + 1,
}},
})
}
hasRegionalVariant := false
targetLangCode := targetLang
targetLangParts := strings.Split(targetLang, "-")
if len(targetLangParts) > 1 {
targetLangCode = targetLangParts[0]
hasRegionalVariant = true
}
// Prepare translation request
id := getRandomNumber()
postData := &PostData{
Jsonrpc: "2.0", Jsonrpc: "2.0",
Method: "LMT_handle_jobs", Method: "LMT_handle_jobs",
ID: id, ID: id,
@ -221,62 +219,82 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
RegionalVariant: map[bool]string{true: targetLang, false: ""}[hasRegionalVariant], RegionalVariant: map[bool]string{true: targetLang, false: ""}[hasRegionalVariant],
}, },
Lang: Lang{ Lang: Lang{
SourceLangComputed: strings.ToUpper(sourceLang), SourceLangComputed: strings.ToUpper(currentSourceLang),
TargetLang: strings.ToUpper(targetLangCode), TargetLang: strings.ToUpper(targetLangCode),
}, },
Jobs: jobs, Jobs: jobs,
Priority: 1, Priority: 1,
Timestamp: getTimeStamp(getICount(part)), Timestamp: getTimeStamp(getICount(text)),
}, },
} }
}
// Make translation request // Make translation request
result, err := makeRequest(postData, "LMT_handle_jobs", proxyURL, dlSession) result, err := makeRequest(postData, "LMT_handle_jobs", proxyURL, dlSession)
if err != nil { if err != nil {
return DeepLXTranslationResult{ results <- translationResult{index: index, err: err}
Code: http.StatusServiceUnavailable, return
Message: err.Error(),
}, nil
}
// Process translation results
var partTranslation string
var partAlternatives []string
translations := result.Get("result.translations").Array()
if len(translations) > 0 {
// Process main translation
for _, translation := range translations {
partTranslation += translation.Get("beams.0.sentences.0.text").String() + " "
} }
partTranslation = strings.TrimSpace(partTranslation)
// Process alternatives // Process translation results
numBeams := len(translations[0].Get("beams").Array()) var partTranslation string
for i := 1; i < numBeams; i++ { // Start from 1 since 0 is the main translation var partAlternatives []string
var altText string
translations := result.Get("result.translations").Array()
if len(translations) > 0 {
// Process main translation
for _, translation := range translations { for _, translation := range translations {
beams := translation.Get("beams").Array() partTranslation += translation.Get("beams.0.sentences.0.text").String() + " "
if i < len(beams) { }
altText += beams[i].Get("sentences.0.text").String() + " " partTranslation = strings.TrimSpace(partTranslation)
// Process alternatives
numBeams := len(translations[0].Get("beams").Array())
for i := 1; i < numBeams; i++ {
var altText string
for _, translation := range translations {
beams := translation.Get("beams").Array()
if i < len(beams) {
altText += beams[i].Get("sentences.0.text").String() + " "
}
}
if altText != "" {
partAlternatives = append(partAlternatives, strings.TrimSpace(altText))
} }
} }
if altText != "" {
partAlternatives = append(partAlternatives, strings.TrimSpace(altText))
}
} }
}
if partTranslation == "" { if partTranslation == "" {
results <- translationResult{index: index, err: fmt.Errorf("translation failed")}
return
}
results <- translationResult{
index: index,
translation: partTranslation,
alternatives: partAlternatives,
}
}(i, textParts[i])
}
// Close results channel when all goroutines are done
go func() {
wg.Wait()
close(results)
}()
// Collect results maintaining original order
translatedParts := make([]string, len(textParts))
allAlternatives := make([][]string, len(textParts))
for result := range results {
if result.err != nil {
return DeepLXTranslationResult{ return DeepLXTranslationResult{
Code: http.StatusServiceUnavailable, Code: http.StatusServiceUnavailable,
Message: "Translation failed", Message: result.err.Error(),
}, nil }, nil
} }
translatedParts[result.index] = result.translation
translatedParts = append(translatedParts, partTranslation) allAlternatives[result.index] = result.alternatives
allAlternatives = append(allAlternatives, partAlternatives)
} }
// Join all translated parts with newlines // Join all translated parts with newlines
@ -298,9 +316,9 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
if i < len(alts) { if i < len(alts) {
altParts = append(altParts, alts[i]) altParts = append(altParts, alts[i])
} else if len(translatedParts[j]) == 0 { } else if len(translatedParts[j]) == 0 {
altParts = append(altParts, "") // Keep empty lines altParts = append(altParts, "")
} else { } else {
altParts = append(altParts, translatedParts[j]) // Use main translation if no alternative altParts = append(altParts, translatedParts[j])
} }
} }
combinedAlternatives = append(combinedAlternatives, strings.Join(altParts, "\n")) combinedAlternatives = append(combinedAlternatives, strings.Join(altParts, "\n"))
@ -308,7 +326,7 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string,
return DeepLXTranslationResult{ return DeepLXTranslationResult{
Code: http.StatusOK, Code: http.StatusOK,
ID: getRandomNumber(), // Using new ID for the complete translation ID: getRandomNumber(),
Data: translatedText, Data: translatedText,
Alternatives: combinedAlternatives, Alternatives: combinedAlternatives,
SourceLang: sourceLang, SourceLang: sourceLang,