Skip to content

Commit 5408b83

Browse files
author
bram
committed
Fixed pylint
1 parent f9ec8af commit 5408b83

File tree

1 file changed

+80
-49
lines changed

1 file changed

+80
-49
lines changed

python_gpt_po/services/translation_service.py

Lines changed: 80 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,77 @@ def _get_provider_response(self, content: str) -> str:
290290
return ""
291291
return provider_instance.translate(self.config.provider_clients, self.config.model, content)
292292

293+
@staticmethod
294+
def _fix_json_quotes(json_text: str) -> str:
295+
"""Fix non-standard quotes in JSON response.
296+
297+
Args:
298+
json_text: JSON text with potentially non-standard quotes
299+
300+
Returns:
301+
JSON text with normalized quotes
302+
"""
303+
quote_fixes = [
304+
('"', '"'), # Left double quotation mark
305+
('"', '"'), # Right double quotation mark
306+
('„', '"'), # Double low-9 quotation mark (Lithuanian, German)
307+
('"', '"'), # Left double quotation mark (alternative)
308+
(''', "'"), # Left single quotation mark
309+
(''', "'"), # Right single quotation mark
310+
('‚', "'"), # Single low-9 quotation mark
311+
('«', '"'), # Left-pointing double angle quotation mark
312+
('»', '"'), # Right-pointing double angle quotation mark
313+
('‹', "'"), # Left-pointing single angle quotation mark
314+
('›', "'"), # Right-pointing single angle quotation mark
315+
]
316+
317+
fixed_text = json_text
318+
for old_quote, new_quote in quote_fixes:
319+
fixed_text = fixed_text.replace(old_quote, new_quote)
320+
321+
# Apply regex fix to handle quotes inside strings
322+
fixed_text = re.sub(
323+
r'"([^"\\]*(\\.[^"\\]*)*)"',
324+
lambda m: f'"{m.group(1).replace(chr(92) + chr(34), chr(34))}"',
325+
fixed_text
326+
)
327+
return fixed_text
328+
329+
def _extract_translations_from_malformed_json(
330+
self,
331+
json_text: str,
332+
expected_count: int) -> List[str]:
333+
"""Extract translations from malformed JSON as a fallback.
334+
335+
Args:
336+
json_text: Malformed JSON text
337+
expected_count: Expected number of translations
338+
339+
Returns:
340+
List of extracted translations
341+
342+
Raises:
343+
ValueError: If extraction fails or count mismatch
344+
"""
345+
if '[' not in json_text or ']' not in json_text:
346+
raise ValueError("No array structure found in malformed JSON")
347+
348+
# Extract content between first [ and last ]
349+
start_idx = json_text.find('[')
350+
end_idx = json_text.rfind(']') + 1
351+
array_content = json_text[start_idx:end_idx]
352+
353+
# Try to extract quoted strings
354+
matches = re.findall(r'"([^"]*(?:\\.[^"]*)*)"', array_content)
355+
if not matches or len(matches) != expected_count:
356+
raise ValueError(
357+
f"Could not extract expected number of translations "
358+
f"(expected {expected_count}, got {len(matches) if matches else 0})"
359+
)
360+
361+
# Unescape the extracted strings
362+
return [match.replace('\\"', '"').replace("\\'", "'") for match in matches]
363+
293364
def _process_bulk_response(
294365
self,
295366
response_text: str,
@@ -307,64 +378,24 @@ def _process_bulk_response(
307378
# Note: _stripped_texts parameter kept for future validation features
308379
# Current validation happens per-entry using original_texts
309380
try:
310-
# Clean the response text for formatting issues
311381
clean_response = self._clean_json_response(response_text)
312382
logging.debug("Cleaned JSON response: %s...", clean_response[:100])
313383

314384
# First attempt: try parsing as-is
315385
try:
316386
translated_texts = json.loads(clean_response)
317387
except json.JSONDecodeError:
318-
# Second attempt: fix various quote types that break JSON
319-
# First, normalize all quote types to standard quotes
320-
# Handle different languages' quotation marks
321-
quote_fixes = [
322-
('"', '"'), # Left double quotation mark
323-
('"', '"'), # Right double quotation mark
324-
('„', '"'), # Double low-9 quotation mark (Lithuanian, German)
325-
('"', '"'), # Left double quotation mark (alternative)
326-
(''', "'"), # Left single quotation mark
327-
(''', "'"), # Right single quotation mark
328-
('‚', "'"), # Single low-9 quotation mark
329-
('«', '"'), # Left-pointing double angle quotation mark
330-
('»', '"'), # Right-pointing double angle quotation mark
331-
('‹', "'"), # Left-pointing single angle quotation mark
332-
('›', "'"), # Right-pointing single angle quotation mark
333-
]
334-
335-
fixed_response = clean_response
336-
for old_quote, new_quote in quote_fixes:
337-
fixed_response = fixed_response.replace(old_quote, new_quote)
338-
339-
# Apply fix to all JSON strings (but not the JSON structure quotes)
388+
# Second attempt: fix non-standard quotes
389+
fixed_response = self._fix_json_quotes(clean_response)
340390
try:
341-
# More sophisticated regex to handle quotes inside strings
342-
fixed_response = re.sub(
343-
r'"([^"\\]*(\\.[^"\\]*)*)"',
344-
lambda m: f'"{m.group(1).replace(chr(92) + chr(34), chr(34))}"',
345-
fixed_response)
346391
translated_texts = json.loads(fixed_response)
347-
except json.JSONDecodeError as e:
348-
# Final attempt: try to extract array elements manually
349-
# This is a fallback for severely malformed JSON
350-
logging.warning("API returned malformed JSON, attempting to extract translations manually")
351-
352-
# Try to find array-like structure and extract elements
353-
if '[' in fixed_response and ']' in fixed_response:
354-
# Extract content between first [ and last ]
355-
start_idx = fixed_response.find('[')
356-
end_idx = fixed_response.rfind(']') + 1
357-
array_content = fixed_response[start_idx:end_idx]
358-
359-
# Try to extract quoted strings
360-
matches = re.findall(r'"([^"]*(?:\\.[^"]*)*)"', array_content)
361-
if matches and len(matches) == len(original_texts):
362-
# Unescape the extracted strings
363-
translated_texts = [match.replace('\\"', '"').replace("\\'", "'") for match in matches]
364-
else:
365-
raise ValueError("Could not extract expected number of translations") from e
366-
else:
367-
raise
392+
except json.JSONDecodeError:
393+
# Final attempt: extract from malformed JSON
394+
logging.warning("API returned malformed JSON, extracting translations manually")
395+
translated_texts = self._extract_translations_from_malformed_json(
396+
fixed_response,
397+
len(original_texts)
398+
)
368399

369400
# Validate the format
370401
if not isinstance(translated_texts, list) or len(translated_texts) != len(original_texts):

0 commit comments

Comments
 (0)