@@ -290,6 +290,77 @@ def _get_provider_response(self, content: str) -> str:
290290 return ""
291291 return provider_instance .translate (self .config .provider_clients , self .config .model , content )
292292
293+ @staticmethod
294+ def _fix_json_quotes (json_text : str ) -> str :
295+ """Fix non-standard quotes in JSON response.
296+
297+ Args:
298+ json_text: JSON text with potentially non-standard quotes
299+
300+ Returns:
301+ JSON text with normalized quotes
302+ """
303+ quote_fixes = [
304+ ('"' , '"' ), # Left double quotation mark
305+ ('"' , '"' ), # Right double quotation mark
306+ ('„' , '"' ), # Double low-9 quotation mark (Lithuanian, German)
307+ ('"' , '"' ), # Left double quotation mark (alternative)
308+ (''', "'"), # Left single quotation mark
309+ (''' , "'" ), # Right single quotation mark
310+ ('‚' , "'" ), # Single low-9 quotation mark
311+ ('«' , '"' ), # Left-pointing double angle quotation mark
312+ ('»' , '"' ), # Right-pointing double angle quotation mark
313+ ('‹' , "'" ), # Left-pointing single angle quotation mark
314+ ('›' , "'" ), # Right-pointing single angle quotation mark
315+ ]
316+
317+ fixed_text = json_text
318+ for old_quote , new_quote in quote_fixes :
319+ fixed_text = fixed_text .replace (old_quote , new_quote )
320+
321+ # Apply regex fix to handle quotes inside strings
322+ fixed_text = re .sub (
323+ r'"([^"\\]*(\\.[^"\\]*)*)"' ,
324+ lambda m : f'"{ m .group (1 ).replace (chr (92 ) + chr (34 ), chr (34 ))} "' ,
325+ fixed_text
326+ )
327+ return fixed_text
328+
329+ def _extract_translations_from_malformed_json (
330+ self ,
331+ json_text : str ,
332+ expected_count : int ) -> List [str ]:
333+ """Extract translations from malformed JSON as a fallback.
334+
335+ Args:
336+ json_text: Malformed JSON text
337+ expected_count: Expected number of translations
338+
339+ Returns:
340+ List of extracted translations
341+
342+ Raises:
343+ ValueError: If extraction fails or count mismatch
344+ """
345+ if '[' not in json_text or ']' not in json_text :
346+ raise ValueError ("No array structure found in malformed JSON" )
347+
348+ # Extract content between first [ and last ]
349+ start_idx = json_text .find ('[' )
350+ end_idx = json_text .rfind (']' ) + 1
351+ array_content = json_text [start_idx :end_idx ]
352+
353+ # Try to extract quoted strings
354+ matches = re .findall (r'"([^"]*(?:\\.[^"]*)*)"' , array_content )
355+ if not matches or len (matches ) != expected_count :
356+ raise ValueError (
357+ f"Could not extract expected number of translations "
358+ f"(expected { expected_count } , got { len (matches ) if matches else 0 } )"
359+ )
360+
361+ # Unescape the extracted strings
362+ return [match .replace ('\\ "' , '"' ).replace ("\\ '" , "'" ) for match in matches ]
363+
293364 def _process_bulk_response (
294365 self ,
295366 response_text : str ,
@@ -307,64 +378,24 @@ def _process_bulk_response(
307378 # Note: _stripped_texts parameter kept for future validation features
308379 # Current validation happens per-entry using original_texts
309380 try :
310- # Clean the response text for formatting issues
311381 clean_response = self ._clean_json_response (response_text )
312382 logging .debug ("Cleaned JSON response: %s..." , clean_response [:100 ])
313383
314384 # First attempt: try parsing as-is
315385 try :
316386 translated_texts = json .loads (clean_response )
317387 except json .JSONDecodeError :
318- # Second attempt: fix various quote types that break JSON
319- # First, normalize all quote types to standard quotes
320- # Handle different languages' quotation marks
321- quote_fixes = [
322- ('"' , '"' ), # Left double quotation mark
323- ('"' , '"' ), # Right double quotation mark
324- ('„' , '"' ), # Double low-9 quotation mark (Lithuanian, German)
325- ('"' , '"' ), # Left double quotation mark (alternative)
326- (''', "'"), # Left single quotation mark
327- (''' , "'" ), # Right single quotation mark
328- ('‚' , "'" ), # Single low-9 quotation mark
329- ('«' , '"' ), # Left-pointing double angle quotation mark
330- ('»' , '"' ), # Right-pointing double angle quotation mark
331- ('‹' , "'" ), # Left-pointing single angle quotation mark
332- ('›' , "'" ), # Right-pointing single angle quotation mark
333- ]
334-
335- fixed_response = clean_response
336- for old_quote , new_quote in quote_fixes :
337- fixed_response = fixed_response .replace (old_quote , new_quote )
338-
339- # Apply fix to all JSON strings (but not the JSON structure quotes)
388+ # Second attempt: fix non-standard quotes
389+ fixed_response = self ._fix_json_quotes (clean_response )
340390 try :
341- # More sophisticated regex to handle quotes inside strings
342- fixed_response = re .sub (
343- r'"([^"\\]*(\\.[^"\\]*)*)"' ,
344- lambda m : f'"{ m .group (1 ).replace (chr (92 ) + chr (34 ), chr (34 ))} "' ,
345- fixed_response )
346391 translated_texts = json .loads (fixed_response )
347- except json .JSONDecodeError as e :
348- # Final attempt: try to extract array elements manually
349- # This is a fallback for severely malformed JSON
350- logging .warning ("API returned malformed JSON, attempting to extract translations manually" )
351-
352- # Try to find array-like structure and extract elements
353- if '[' in fixed_response and ']' in fixed_response :
354- # Extract content between first [ and last ]
355- start_idx = fixed_response .find ('[' )
356- end_idx = fixed_response .rfind (']' ) + 1
357- array_content = fixed_response [start_idx :end_idx ]
358-
359- # Try to extract quoted strings
360- matches = re .findall (r'"([^"]*(?:\\.[^"]*)*)"' , array_content )
361- if matches and len (matches ) == len (original_texts ):
362- # Unescape the extracted strings
363- translated_texts = [match .replace ('\\ "' , '"' ).replace ("\\ '" , "'" ) for match in matches ]
364- else :
365- raise ValueError ("Could not extract expected number of translations" ) from e
366- else :
367- raise
392+ except json .JSONDecodeError :
393+ # Final attempt: extract from malformed JSON
394+ logging .warning ("API returned malformed JSON, extracting translations manually" )
395+ translated_texts = self ._extract_translations_from_malformed_json (
396+ fixed_response ,
397+ len (original_texts )
398+ )
368399
369400 # Validate the format
370401 if not isinstance (translated_texts , list ) or len (translated_texts ) != len (original_texts ):
0 commit comments