|
21 | 21 | from django.contrib.auth.models import User |
22 | 22 | from django.contrib.auth.password_validation import validate_password |
23 | 23 | from django.http import HttpRequest |
24 | | - |
25 | 24 | # Third Party |
| 25 | +from lingua import LanguageDetectorBuilder |
26 | 26 | from rest_framework import serializers |
27 | 27 | from rest_framework.fields import empty |
28 | 28 | from rest_framework.validators import UniqueValidator |
|
36 | 36 | WeightUnit, |
37 | 37 | ) |
38 | 38 |
|
39 | | - |
40 | 39 | logger = logging.getLogger(__name__) |
41 | 40 |
|
42 | 41 |
|
@@ -197,3 +196,66 @@ class RoutineWeightUnitSerializer(serializers.ModelSerializer): |
197 | 196 | class Meta: |
198 | 197 | model = WeightUnit |
199 | 198 | fields = ['id', 'name'] |
| 199 | + |
| 200 | + |
| 201 | +class LanguageCheckSerializer(serializers.Serializer): |
| 202 | + """ |
| 203 | + Serializer for language check |
| 204 | + """ |
| 205 | + |
| 206 | + language = serializers.PrimaryKeyRelatedField(queryset=Language.objects.all(), required=False) |
| 207 | + language_code = serializers.CharField(required=False, min_length=2, max_length=2) |
| 208 | + input = serializers.CharField(min_length=10) |
| 209 | + |
| 210 | + def validate(self, data): |
| 211 | + """ |
| 212 | + Check that the detected language of the description corresponds with the |
| 213 | + provided language. |
| 214 | + """ |
| 215 | + language = data.get('language') |
| 216 | + language_code = data.get('language_code') |
| 217 | + |
| 218 | + if not language and not language_code: |
| 219 | + raise serializers.ValidationError( |
| 220 | + {'language': 'Either a language ID or a language code must be provided.'} |
| 221 | + ) |
| 222 | + |
| 223 | + if not language: |
| 224 | + try: |
| 225 | + language = Language.objects.get(short_name=language_code) |
| 226 | + except Language.DoesNotExist: |
| 227 | + raise serializers.ValidationError( |
| 228 | + {'language': f'Language with code "{language_code}" does not exist.'} |
| 229 | + ) |
| 230 | + |
| 231 | + # Try to detect the language |
| 232 | + detector = ( |
| 233 | + LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build() |
| 234 | + ) |
| 235 | + input_str = data.get('input') |
| 236 | + |
| 237 | + detected_language = detector.detect_language_of(input_str) |
| 238 | + detected_language_code = detected_language.iso_code_639_1.name.lower() |
| 239 | + confidence_values = detector.compute_language_confidence_values(input_str) |
| 240 | + logger.debug( |
| 241 | + f'Detected language: {detected_language_code}, ' |
| 242 | + f'confidence values: {confidence_values}, ' |
| 243 | + f'input: {input_str}' |
| 244 | + ) |
| 245 | + |
| 246 | + if detected_language_code != language.short_name.lower(): |
| 247 | + raise serializers.ValidationError( |
| 248 | + { |
| 249 | + 'check': { |
| 250 | + 'result': False, |
| 251 | + 'detected_language': detected_language_code, |
| 252 | + 'message': f'The detected language is "{detected_language.name.capitalize()}" ({detected_language_code}), ' |
| 253 | + f'which does not match your selected language "{language.full_name.capitalize()}" ' |
| 254 | + f'({language.short_name}). If you believe this is incorrect, try adding more content ' |
| 255 | + f'or rephrasing your text, as language detection works better with longer or more ' |
| 256 | + f'complete sentences.', |
| 257 | + } |
| 258 | + } |
| 259 | + ) |
| 260 | + |
| 261 | + return super().validate(data) |
0 commit comments