languagetool-org · fabrichter · Nov 26, 2025 · Nov 20, 2025 · Nov 24, 2025 · Nov 25, 2025
diff --git a/languagetool-core/src/main/java/org/languagetool/JLanguageTool.java b/languagetool-core/src/main/java/org/languagetool/JLanguageTool.java
@@ -298,7 +298,8 @@ public JLanguageTool(Language language, List<Language> altLanguages, Language mo
                        GlobalConfig globalConfig, UserConfig userConfig, boolean inputLogging) {
     this(language, altLanguages, motherTongue, cache, globalConfig, userConfig, true, false);
   }
-
+
+
   /**
    * Create a JLanguageTool and setup the built-in rules for the
    * given language and false friend rules for the text language / mother tongue pair.
@@ -317,25 +318,51 @@ public JLanguageTool(Language language, List<Language> altLanguages, Language mo
    * @since 6.6
    */
   public JLanguageTool(Language language, List<Language> altLanguages, Language motherTongue, ResultCache cache, GlobalConfig globalConfig, UserConfig userConfig, boolean inputLogging, boolean withLanguageModel) {
+      this(language, altLanguages, motherTongue, cache, globalConfig, userConfig, inputLogging, withLanguageModel, null);
+  }
+
+  /**
+   * Create a JLanguageTool and setup the built-in rules for the
+   * given language and false friend rules for the text language / mother tongue pair.
+   *
+   * @param language     the language of the text to be checked
+   * @param altLanguages The languages that are accepted as alternative languages - currently this means
+   *                     words are accepted if they are in an alternative language and not similar to
+   *                     a word from {@code language}. If there's a similar word in {@code language},
+   *                     there will be an error of type {@link RuleMatch.Type#Hint} (EXPERIMENTAL)
+   * @param motherTongue the user's mother tongue, used for false friend rules, or <code>null</code>.
+   *          The mother tongue may also be used as a source language for checking bilingual texts.
+   * @param cache a cache to speed up checking if the same sentences get checked more than once,
+   *              e.g. when LT is running as a server and texts are re-checked due to changes
+   * @param inputLogging allow inclusion of input in logs on exceptions
+   * @param withLanguageModel will not call updateOptionalLanguageModelRules(null) if this is true
+   * @param customRules rules to use for the JLanguageTool instance instead of initializing with the built-in ones, or null to use built-in rules
+   * @since 6.6
+   */
+  public JLanguageTool(Language language, List<Language> altLanguages, Language motherTongue, ResultCache cache, GlobalConfig globalConfig, UserConfig userConfig, boolean inputLogging, boolean withLanguageModel, List<Rule> customRules) {
     this.language = Objects.requireNonNull(language, "language cannot be null");
     this.altLanguages = Objects.requireNonNull(altLanguages, "altLanguages cannot be null (but empty)");
     this.motherTongue = motherTongue;
     this.userConfig = Objects.requireNonNullElseGet(userConfig, UserConfig::new);
     this.globalConfig = globalConfig;
-    ResourceBundle messages = ResourceBundleTools.getMessageBundle(language);
-    builtinRules = getAllBuiltinRules(language, messages, userConfig, globalConfig);
     this.cleanOverlappingMatches = true;
-    try {
-      activateDefaultPatternRules();
-      if (!language.hasNGramFalseFriendRule(motherTongue)) {
-        // use the old false friends, which always match, not depending on context
-        activateDefaultFalseFriendRules();
-      }
-      if (!withLanguageModel) {
-        updateOptionalLanguageModelRules(null); // start out with rules without language model
+    ResourceBundle messages = ResourceBundleTools.getMessageBundle(language);
+    if (customRules != null) {
+      builtinRules = new ArrayList<>(customRules);
+    } else {
+      builtinRules = getAllBuiltinRules(language, messages, userConfig, globalConfig);
+      try {
+        activateDefaultPatternRules();
+        if (!language.hasNGramFalseFriendRule(motherTongue)) {
+          // use the old false friends, which always match, not depending on context
+          activateDefaultFalseFriendRules();
+        }
+        if (!withLanguageModel) {
+          updateOptionalLanguageModelRules(null); // start out with rules without language model
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Could not activate rules", e);
       }
-    } catch (Exception e) {
-      throw new RuntimeException("Could not activate rules", e);
     }
     this.cache = cache;
     descProvider = new ShortDescriptionProvider();
@@ -775,6 +802,20 @@ public void disableRules(List<String> ruleIds) {
     ruleSetCache.clear();
   }
 
+  /**
+   * Updates the rules for the system by replacing the user-defined rules with the provided set of rules.
+   * Clears any existing user and built-in rules, as well as the cached rule set, before applying the new rules.
+   *
+   * @param rules a list of Rule objects to be set as the new user-defined rules
+   * @since 6.8
+   */
+  public void setRules(List<Rule> rules) {
+    builtinRules.clear();
+    userRules.clear();
+    userRules.addAll(rules);
+    ruleSetCache.clear();
+  }
+
   /**
    * Disable the given rule category so the check methods like {@link #check(String)} won't use it.
    *
@@ -1038,9 +1079,15 @@ protected CheckResults checkInternal(AnnotatedText annotatedText, ParagraphHandl
   }
 
   protected CheckResults checkInternal(AnnotatedText annotatedText, ParagraphHandling paraMode, RuleMatchListener listener,
+                                       Mode mode, Level level, @NotNull Set<ToneTag> toneTags,
+                                       @Nullable Long textSessionID, List<String> sentences, List<AnalyzedSentence> analyzedSentences) throws IOException {
+    RuleSet rules = getActiveRulesForLevelAndToneTags(level, toneTags);
+    return checkInternalWithCustomRules(rules, annotatedText, paraMode, listener, mode, level, toneTags, textSessionID, sentences, analyzedSentences);
+  }
+
+  public CheckResults checkInternalWithCustomRules(RuleSet rules, AnnotatedText annotatedText, ParagraphHandling paraMode, RuleMatchListener listener,
                                      Mode mode, Level level, @NotNull Set<ToneTag> toneTags,
                                      @Nullable Long textSessionID, List<String> sentences, List<AnalyzedSentence> analyzedSentences) throws IOException {
-    RuleSet rules = getActiveRulesForLevelAndToneTags(level, toneTags);
     if (printStream != null) {
       printIfVerbose(rules.allRules().size() + " rules activated for language " + language);
     }
@@ -1304,7 +1351,7 @@ private RemoteRuleResult fetchResults(long deadlineStartNanos, Mode mode, Level
       if (matches == null) {
         continue;
       }
-      if (cache != null && result.isSuccess()) {
+      if (cache != null && result.isSuccess() && result.adjustOffsets()) {
         // store in cache
         InputSentence cacheKey = new InputSentence(
           sentence, language, motherTongue, disabledRules, disabledRuleCategories,
@@ -1318,8 +1365,10 @@ private RemoteRuleResult fetchResults(long deadlineStartNanos, Mode mode, Level
       // clone matches before adjusting offsets
       // match objects could be relevant to multiple (duplicate) sentences at different offsets
       List<RuleMatch> adjustedMatches = matches.stream().map(RuleMatch::new).collect(Collectors.toList());
-      for (RuleMatch match : adjustedMatches) {
-        adjustOffset(annotatedText, offset, match);
+      if (result.adjustOffsets()) {
+        for (RuleMatch match : adjustedMatches) {
+          adjustOffset(annotatedText, offset, match);
+        }
       }
       remoteMatches.addAll(adjustedMatches);
     }

diff --git a/languagetool-core/src/main/java/org/languagetool/rules/RemoteRule.java b/languagetool-core/src/main/java/org/languagetool/rules/RemoteRule.java
@@ -210,7 +210,7 @@ public FutureTask<RemoteRuleResult> run(List<AnalyzedSentence> sentences, @Nulla
             filteredMatches.addAll(filteredSentenceMatches);
           }
         }
-        result = new RemoteRuleResult(result.isRemote(), result.isSuccess(), filteredMatches, sentences);
+        result = new RemoteRuleResult(result.isRemote(), result.isSuccess(), result.adjustOffsets(), filteredMatches, sentences);
       }
 
       List<RuleMatch> filteredMatches = new ArrayList<>();
@@ -221,7 +221,7 @@ public FutureTask<RemoteRuleResult> run(List<AnalyzedSentence> sentences, @Nulla
           filteredMatches.addAll(filteredSentenceMatches);
         }
       }
-      result = new RemoteRuleResult(result.isRemote(), result.isSuccess(), filteredMatches, sentences);
+      result = new RemoteRuleResult(result.isRemote(), result.isSuccess(), result.adjustOffsets(), filteredMatches, sentences);
       return result;
     });
   }

diff --git a/languagetool-core/src/main/java/org/languagetool/rules/RemoteRuleResult.java b/languagetool-core/src/main/java/org/languagetool/rules/RemoteRuleResult.java
@@ -29,16 +29,19 @@
 public class RemoteRuleResult {
   private final boolean remote; // was remote needed/involved? rules may filter input sentences and only call remote on some; for metrics
   private final boolean success; // successful -> for caching, so that we can cache: remote not needed for this sentence
+  private final boolean adjustOffsets; // whether rule matches are relative to each sentence and need to be adjusted further
+  // or already use the positions from the analyzed sentence and don't need to be adjusted
   private final List<RuleMatch> matches;
   private final Set<AnalyzedSentence> processedSentences;
   // which sentences were processed? to distinguish between no matches because not processed (e.g. cached)
   // and no errors/corrections found
 
   private final Map<AnalyzedSentence, List<RuleMatch>> sentenceMatches = new HashMap<>();
 
-  public RemoteRuleResult(boolean remote, boolean success, List<RuleMatch> matches, List<AnalyzedSentence> processedSentences) {
+  public RemoteRuleResult(boolean remote, boolean success, boolean adjustOffsets, List<RuleMatch> matches, List<AnalyzedSentence> processedSentences) {
     this.remote = remote;
     this.success = success;
+    this.adjustOffsets = adjustOffsets;
     this.matches = matches;
     this.processedSentences = Collections.unmodifiableSet(new HashSet<>(processedSentences));
 
@@ -54,6 +57,10 @@ public RemoteRuleResult(boolean remote, boolean success, List<RuleMatch> matches
     }
   }
 
+  public RemoteRuleResult(boolean remote, boolean success, List<RuleMatch> matches, List<AnalyzedSentence> processedSentences) {
+    this(remote, success, true, matches, processedSentences);
+  }
+
   public boolean isRemote() {
     return remote;
   }
@@ -62,6 +69,10 @@ public boolean isSuccess() {
     return success;
   }
 
+  public boolean adjustOffsets() {
+    return adjustOffsets;
+  }
+
   public List<RuleMatch> getMatches() {
     return matches;
   }

diff --git a/languagetool-server/src/main/java/org/languagetool/server/TextChecker.java b/languagetool-server/src/main/java/org/languagetool/server/TextChecker.java
@@ -44,9 +44,13 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.net.HttpURLConnection;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.*;
 import java.util.concurrent.*;
 import java.util.regex.Pattern;
@@ -262,6 +266,27 @@ protected static Language parseLanguage(String code) throws BadRequestException
     }
   }
 
+  /**
+   * Hash a string deterministically into a 64-bit signed long; use textSessionIdParam if set, fall back to client IP.
+   */
+  protected static Long computeTextSessionID(String textSessionIdParam, String ip) {
+      String input = textSessionIdParam != null ? textSessionIdParam : ip;
+      if (input == null) {
+        return null;
+      }
+      try {
+        MessageDigest md = MessageDigest.getInstance("SHA-256");
+        byte[] bytes = md.digest(input.getBytes(StandardCharsets.UTF_8));
+
+        ByteBuffer buffer = ByteBuffer.wrap(bytes);
+        Long textSessionId = buffer.getLong();
+        return textSessionId;
+      } catch (NoSuchAlgorithmException e) {
+        // Should not happen for SHA-256, wrap in a runtime exception
+        throw new RuntimeException("SHA-256 not supported", e);
+      }
+  }
+
   private void prewarmPipelinePool() {
     // setting + number of pipelines
     // typical addon settings at the moment (2018-11-05)
@@ -433,38 +458,7 @@ public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
 
     boolean filterDictionaryMatches = "true".equals(params.getOrDefault("filterDictionaryMatches", "true"));
 
-    Long textSessionId = null;
-    try {
-      if (params.containsKey("textSessionId")) {
-        String textSessionIdStr = params.get("textSessionId");
-        if (textSessionIdStr.startsWith("user:")) {
-          int sepPos = textSessionIdStr.indexOf(':');
-          String sessionId = textSessionIdStr.substring(sepPos + 1);
-          textSessionId = Long.valueOf(sessionId);
-        } else if (textSessionIdStr.contains(":")) { // transitioning to new format used in chrome addon
-          // format: "{random number in 0..99999}:{unix time}"
-          long random, timestamp;
-          int sepPos = textSessionIdStr.indexOf(':');
-          random = Long.parseLong(textSessionIdStr.substring(0, sepPos));
-          timestamp = Long.parseLong(textSessionIdStr.substring(sepPos + 1));
-          // use random number to choose a slice in possible range of values
-          // then choose position in slice by timestamp
-          long maxRandom = 100000;
-          long randomSegmentSize = (Long.MAX_VALUE - maxRandom) / maxRandom;
-          long segmentOffset = random * randomSegmentSize;
-          if (timestamp > randomSegmentSize) {
-            log.warn(String.format("Could not transform textSessionId '%s'", textSessionIdStr));
-          }
-          textSessionId = segmentOffset + timestamp;
-        } else {
-          textSessionId = Long.valueOf(textSessionIdStr);
-        }
-      }
-    } catch (NumberFormatException ex) {
-      log.info("Could not parse textSessionId '" + params.get("textSessionId") + "' as long: " + ex.getMessage() +
-        ", user agent: " + params.get("useragent") + ", version: " + params.get("v") +
-        ", HTTP user agent: " + getHttpUserAgent(httpExchange) + ", referrer: " + getHttpReferrer(httpExchange));
-    }
+    Long textSessionId = computeTextSessionID(params.get("textSessionId"), remoteAddress);
 
     List<String> abTest = AB_TEST_SERVICE.getActiveAbTestForClient(params, config);