📝 Add docstrings to fix-wordcoherencyrule

coderabbitai[bot] · web-flow · commit 91cefbfc8387 · 2025-10-15T01:40:27.000Z
Docstrings generation was requested by @minizhiren. * #11568 (comment) The following files were modified: * `languagetool-language-modules/en/src/main/java/org/languagetool/rules/en/WordCoherencyRule.java`
diff --git a/languagetool-language-modules/en/src/main/java/org/languagetool/rules/en/WordCoherencyRule.java b/languagetool-language-modules/en/src/main/java/org/languagetool/rules/en/WordCoherencyRule.java
@@ -19,12 +19,21 @@
 package org.languagetool.rules.en;
 
 import java.io.IOException;
+import java.util.Objects;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.ResourceBundle;
 import java.util.Set;
 
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedToken;
+import org.languagetool.AnalyzedTokenReadings;
 import org.languagetool.rules.AbstractWordCoherencyRule;
 import org.languagetool.rules.Example;
+import org.languagetool.rules.RuleMatch;
 import org.languagetool.rules.WordCoherencyDataLoader;
 
 /**
@@ -34,22 +43,118 @@ public class WordCoherencyRule extends AbstractWordCoherencyRule {
 
   private static final Map<String, Set<String>> wordMap = new WordCoherencyDataLoader().loadWords("/en/coherency.txt");
 
+  /**
+   * Detects occurrences of mixed spelling variants of the same word within a text and produces matches.
+   *
+   * Scans each sentence token-by-token for known variant forms (loaded from the rule's word map).
+   * If two different variants of the same lemma appear in the same text, produces a RuleMatch for the later occurrence
+   * with a short message and, when applicable, a suggested replacement that preserves initial capitalization.
+   *
+   * @param sentences the analyzed sentences of the document to check
+   * @return an array of RuleMatch objects for each detected mixed-variant occurrence; each match may include a single suggested replacement
+   */
+  @Override
+  public RuleMatch[] match(List<AnalyzedSentence> sentences) {
+    List<RuleMatch> ruleMatches = new ArrayList<>();
+    Map<String, String> shouldNotAppearWord = new HashMap<>(); // e.g. doggie -> doggy
+    int pos = 0;
+
+    for (AnalyzedSentence sentence : sentences) {
+      AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
+      for (AnalyzedTokenReadings atr : tokens) {
+        String surface = atr.getToken();
+        String surfaceLc = surface.toLowerCase(Locale.ROOT);
+
+        // Check whether this token is one of the variant candidates
+        Set<String> variants = getWordMap().get(surfaceLc);
+        if (variants == null || variants.isEmpty()) {
+          continue;
+        }
+
+        // ====== Key: lemma immunity to prevent false positives (e.g. doggies/doggier/doggiest) ======
+        Set<String> lemmasLc = atr.getReadings().stream()
+            .map(AnalyzedToken::getLemma)
+            .filter(Objects::nonNull)
+            .map(s -> s.toLowerCase(Locale.ROOT))
+            .collect(java.util.stream.Collectors.toSet());
+        if (!java.util.Collections.disjoint(lemmasLc, variants)) {
+           // The lemma itself is one of the coherent variants → inflected form → skip reporting
+          continue;
+        }
+        // ========================================================================
+
+        int fromPos = pos + atr.getStartPos();
+        int toPos = pos + atr.getEndPos();
+
+        // If an alternative spelling has already been encountered, this word is the opposite variant → create a match
+        if (shouldNotAppearWord.containsKey(surfaceLc)) {
+          String other = shouldNotAppearWord.get(surfaceLc);
+          String msg = getMessage(surface, other);
+          RuleMatch rm = new RuleMatch(this, sentence, fromPos, toPos, msg);
+
+          String marked = sentence.getText().substring(atr.getStartPos(), atr.getEndPos());
+          String replacement = createReplacement(marked, surfaceLc, other, atr);
+          if (org.languagetool.tools.StringTools.startsWithUppercase(surface)) {
+            replacement = org.languagetool.tools.StringTools.uppercaseFirstChar(replacement);
+          }
+          if (!marked.equalsIgnoreCase(replacement)) {
+            rm.setSuggestedReplacement(replacement);
+            ruleMatches.add(rm);
+          }
+          rm.setShortMessage(getShortMessage());
+        } else {
+          // Record the variant spelling so that later occurrences of the opposite form can be detected
+          for (String v : variants) {
+            shouldNotAppearWord.put(v, surfaceLc);
+          }
+        }
+      }
+      pos += sentence.getCorrectedTextLength();
+    }
+    return toRuleMatchArray(ruleMatches);
+  }
+
+  /**
+   * Creates a WordCoherencyRule configured with the provided resource bundle.
+   *
+   * Registers an example demonstrating the incorrect mixing of variants ("archeology" vs "archaeology").
+   *
+   * @param messages the ResourceBundle containing localized messages for this rule
+   * @throws IOException if loading the coherency data fails
+   */
   public WordCoherencyRule(ResourceBundle messages) throws IOException {
     super(messages);
     addExamplePair(Example.wrong("He likes archaeology. Really? She likes <marker>archeology</marker>, too."),
-                   Example.fixed("He likes archaeology. Really? She likes <marker>archaeology</marker>, too."));
+        Example.fixed("He likes archaeology. Really? She likes <marker>archaeology</marker>, too."));
   }
 
+  /**
+   * Provides the mapping of normalized (lowercase) word variants to their admitted variants used for coherency checks.
+   *
+   * @return the map whose keys are lowercase variant forms and whose values are sets of admitted variant strings
+   */
   @Override
   protected Map<String, Set<String>> getWordMap() {
     return wordMap;
   }
 
+  /**
+   * Produce the user-facing message advising against mixing two spelling variants in the same text.
+   *
+   * @param word1 the first spelling variant to mention in the message
+   * @param word2 the second spelling variant to mention in the message
+   * @return a message telling the user not to mix the two variants, containing `word1` and `word2`
+   */
   @Override
   protected String getMessage(String word1, String word2) {
     return "Do not mix variants of the same word ('" + word1 + "' and '" + word2 + "') within a single text.";
   }
-  
+
+  /**
+   * Provides the unique identifier for this word coherency rule.
+   *
+   * @return the rule identifier "EN_WORD_COHERENCY"
+   */
   @Override
   public String getId() {
     return "EN_WORD_COHERENCY";
@@ -60,4 +165,4 @@ public String getDescription() {
     return "Coherent spelling of words with two admitted variants.";
   }
 
-}
+}