fix extendedSentenceRange issue with leading whitespaces (#11706)

This commit is contained in:
Stefan Viol 2026-04-14 11:44:01 +02:00 committed by GitHub
parent b9d405eeb1
commit fab4cc5f97
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 435 additions and 248 deletions

View file

@ -46,7 +46,7 @@ public class CheckResults {
public CheckResults(List<RuleMatch> ruleMatches, List<Range> ignoredRanges, List<ExtendedSentenceRange> extendedSentenceRanges) {
this.ruleMatches = Objects.requireNonNull(ruleMatches);
this.ignoredRanges = Objects.requireNonNull(ignoredRanges);
this.extendedSentenceRanges = Objects.requireNonNull(extendedSentenceRanges.stream().sorted().collect(Collectors.toList()));
this.extendedSentenceRanges = Objects.requireNonNull(extendedSentenceRanges.stream().sorted().toList());
//TODO: use this later, when we are sure the sentenceRanges (from extendedSentenceRange) are are correct.
// Right now the sentenceRanges are calculated different from those in extendedSentenceRange.
// extendedSentenceRanges.forEach(extendedSentenceRange -> this.sentenceRanges.add(new SentenceRange(extendedSentenceRange.getFromPos(), extendedSentenceRange.getToPos())));

View file

@ -2186,7 +2186,13 @@ public class JLanguageTool {
for (int i = 0, sentencesSize = sentences.size(); i < sentencesSize; i++) {
SentenceData sentence = sentences.get(i);
wordCounter += sentence.wordCount;
ExtendedSentenceRange extendedSentenceRange = new ExtendedSentenceRange(sentence.startOffset, sentence.startOffset + sentence.text.trim().length(), language.getShortCode());
int whitespaceFix = 0;
// Check if sentence in sentenceData has more than 1 whitespace at begin, if yes fix the range to match with the normal sentenceRange
if (sentence.text.startsWith(" ", 1)) {
String sentenceStripLeading = sentence.text.stripLeading();
whitespaceFix = (sentence.text.length() - sentenceStripLeading.length());
}
ExtendedSentenceRange extendedSentenceRange = new ExtendedSentenceRange(sentence.startOffset + whitespaceFix, sentence.startOffset + whitespaceFix + sentence.text.trim().length(), language.getShortCode());
extendedSentenceRanges.add(extendedSentenceRange);
try {
//comment in to trigger an exception via input text:

View file

@ -116,308 +116,308 @@ public class SentenceRangeTest {
public void testGermanSentenceRange() throws IOException {
JLanguageTool jLanguageTool = new JLanguageTool(new NoRulesGerman());
String text = "\n" +
"\n" +
"\n" +
"\n" +
"LanguageTool\n" +
"\n" +
"\n" +
"\n" +
"Unsere Grammatik-, Stil- und Rechtschreibprüfung ist in vielen Sprachen verfügbar und wird von Millionen Menschen weltweit genutzt\n" +
"\n" +
"\n" +
"\n" +
"\uFEFF\uFEFF\uFEFF\uFEFF\n" +
"\n" +
"Probieren Sie den LanguageTool-Editor aus.\n" +
"\n" +
"\n" +
"\n" +
"Bekommen Sie Tipps zur Verbesserung Ihrer Rechtschreibung (inklusive Kommasetzung u.v.m.) während Sie E-Mails schreiben, bloggen oder einfach nur twittern. LanguageTool erkennt automatisch, in welcher Sprache Sie schreiben. Um Ihre Daten zu schützen, werden vom Browser-Add-on keine Texte gespeichert.\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Holen Sie alles aus Ihren Dokumenten heraus und liefern Sie fehlerfreie Ergebnisse ab. Egal, ob Sie an einer Dissertation arbeiten, einen Aufsatz oder ein Buch schreiben oder einfach nur Notizen machen.\n" +
"\n" +
"\n" +
"\n" +
"\uFEFF\u2063\n" +
"\n" +
"\n" +
"\n" +
"\uFEFFProfessionalisieren Sie die Kommunikation Ihres Teams mit der Grammatik- und Stilprüfung von LanguageTool.\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Voll unterstützt (Rechtschreibung, Grammatik- und Stilhinweise):\n" +
"\n" +
"\n" +
"\n" +
"Englisch\n" +
"\n" +
"\n" +
"\n" +
"Deutsch\n" +
"\n" +
"\n" +
"\n" +
"Französisch\n" +
"\n" +
"\n" +
"\n" +
"Spanisch\n" +
"\n" +
"\n" +
"\n" +
"Niederländisch\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Danke, dass Sie es ausprobieren!\n";
"\n" +
"\n" +
"\n" +
"LanguageTool\n" +
"\n" +
"\n" +
"\n" +
"Unsere Grammatik-, Stil- und Rechtschreibprüfung ist in vielen Sprachen verfügbar und wird von Millionen Menschen weltweit genutzt\n" +
"\n" +
"\n" +
"\n" +
"\uFEFF\uFEFF\uFEFF\uFEFF\n" +
"\n" +
"Probieren Sie den LanguageTool-Editor aus.\n" +
"\n" +
"\n" +
"\n" +
"Bekommen Sie Tipps zur Verbesserung Ihrer Rechtschreibung (inklusive Kommasetzung u.v.m.) während Sie E-Mails schreiben, bloggen oder einfach nur twittern. LanguageTool erkennt automatisch, in welcher Sprache Sie schreiben. Um Ihre Daten zu schützen, werden vom Browser-Add-on keine Texte gespeichert.\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Holen Sie alles aus Ihren Dokumenten heraus und liefern Sie fehlerfreie Ergebnisse ab. Egal, ob Sie an einer Dissertation arbeiten, einen Aufsatz oder ein Buch schreiben oder einfach nur Notizen machen.\n" +
"\n" +
"\n" +
"\n" +
"\uFEFF\u2063\n" +
"\n" +
"\n" +
"\n" +
"\uFEFFProfessionalisieren Sie die Kommunikation Ihres Teams mit der Grammatik- und Stilprüfung von LanguageTool.\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Voll unterstützt (Rechtschreibung, Grammatik- und Stilhinweise):\n" +
"\n" +
"\n" +
"\n" +
"Englisch\n" +
"\n" +
"\n" +
"\n" +
"Deutsch\n" +
"\n" +
"\n" +
"\n" +
"Französisch\n" +
"\n" +
"\n" +
"\n" +
"Spanisch\n" +
"\n" +
"\n" +
"\n" +
"Niederländisch\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Danke, dass Sie es ausprobieren!\n";
AnnotatedText annotatedText = new AnnotatedTextBuilder().addText(text).build();
CheckResults checkResults = jLanguageTool.check2(annotatedText,
true,
JLanguageTool.ParagraphHandling.NORMAL,
ruleMatch -> {
},
JLanguageTool.Mode.ALL_BUT_TEXTLEVEL_ONLY,
JLanguageTool.Level.DEFAULT,
null);
true,
JLanguageTool.ParagraphHandling.NORMAL,
ruleMatch -> {
},
JLanguageTool.Mode.ALL_BUT_TEXTLEVEL_ONLY,
JLanguageTool.Level.DEFAULT,
null);
List<SentenceRange> sentenceRanges = checkResults.getSentenceRanges();
assertEquals(17, sentenceRanges.size());
assertEquals("LanguageTool",
text.substring(
sentenceRanges.get(0).getFromPos(),
sentenceRanges.get(0).getToPos()));
text.substring(
sentenceRanges.get(0).getFromPos(),
sentenceRanges.get(0).getToPos()));
assertEquals("Unsere Grammatik-, Stil- und Rechtschreibprüfung ist in vielen Sprachen verfügbar und wird von Millionen Menschen weltweit genutzt",
text.substring(
sentenceRanges.get(1).getFromPos(),
sentenceRanges.get(1).getToPos()));
text.substring(
sentenceRanges.get(1).getFromPos(),
sentenceRanges.get(1).getToPos()));
assertEquals("Probieren Sie den LanguageTool-Editor aus.",
text.substring(
sentenceRanges.get(2).getFromPos(),
sentenceRanges.get(2).getToPos()));
text.substring(
sentenceRanges.get(2).getFromPos(),
sentenceRanges.get(2).getToPos()));
assertEquals("Bekommen Sie Tipps zur Verbesserung Ihrer Rechtschreibung (inklusive Kommasetzung u.v.m.) während Sie E-Mails schreiben, bloggen oder einfach nur twittern.",
text.substring(
sentenceRanges.get(3).getFromPos(),
sentenceRanges.get(3).getToPos()));
text.substring(
sentenceRanges.get(3).getFromPos(),
sentenceRanges.get(3).getToPos()));
assertEquals("LanguageTool erkennt automatisch, in welcher Sprache Sie schreiben.",
text.substring(
sentenceRanges.get(4).getFromPos(),
sentenceRanges.get(4).getToPos()));
text.substring(
sentenceRanges.get(4).getFromPos(),
sentenceRanges.get(4).getToPos()));
assertEquals("Um Ihre Daten zu schützen, werden vom Browser-Add-on keine Texte gespeichert.",
text.substring(
sentenceRanges.get(5).getFromPos(),
sentenceRanges.get(5).getToPos()));
text.substring(
sentenceRanges.get(5).getFromPos(),
sentenceRanges.get(5).getToPos()));
assertEquals("Holen Sie alles aus Ihren Dokumenten heraus und liefern Sie fehlerfreie Ergebnisse ab.",
text.substring(
sentenceRanges.get(6).getFromPos(),
sentenceRanges.get(6).getToPos()));
text.substring(
sentenceRanges.get(6).getFromPos(),
sentenceRanges.get(6).getToPos()));
assertEquals("Egal, ob Sie an einer Dissertation arbeiten, einen Aufsatz oder ein Buch schreiben oder einfach nur Notizen machen.",
text.substring(
sentenceRanges.get(7).getFromPos(),
sentenceRanges.get(7).getToPos()));
text.substring(
sentenceRanges.get(7).getFromPos(),
sentenceRanges.get(7).getToPos()));
assertEquals("Professionalisieren Sie die Kommunikation Ihres Teams mit der Grammatik- und Stilprüfung von LanguageTool.",
text.substring(
sentenceRanges.get(9).getFromPos(),
sentenceRanges.get(9).getToPos()));
text.substring(
sentenceRanges.get(9).getFromPos(),
sentenceRanges.get(9).getToPos()));
assertEquals("Voll unterstützt (Rechtschreibung, Grammatik- und Stilhinweise):",
text.substring(
sentenceRanges.get(10).getFromPos(),
sentenceRanges.get(10).getToPos()));
text.substring(
sentenceRanges.get(10).getFromPos(),
sentenceRanges.get(10).getToPos()));
assertEquals("Englisch",
text.substring(
sentenceRanges.get(11).getFromPos(),
sentenceRanges.get(11).getToPos()));
text.substring(
sentenceRanges.get(11).getFromPos(),
sentenceRanges.get(11).getToPos()));
assertEquals("Deutsch",
text.substring(
sentenceRanges.get(12).getFromPos(),
sentenceRanges.get(12).getToPos()));
text.substring(
sentenceRanges.get(12).getFromPos(),
sentenceRanges.get(12).getToPos()));
assertEquals("Französisch",
text.substring(
sentenceRanges.get(13).getFromPos(),
sentenceRanges.get(13).getToPos()));
text.substring(
sentenceRanges.get(13).getFromPos(),
sentenceRanges.get(13).getToPos()));
assertEquals("Spanisch",
text.substring(
sentenceRanges.get(14).getFromPos(),
sentenceRanges.get(14).getToPos()));
text.substring(
sentenceRanges.get(14).getFromPos(),
sentenceRanges.get(14).getToPos()));
assertEquals("Niederländisch",
text.substring(
sentenceRanges.get(15).getFromPos(),
sentenceRanges.get(15).getToPos()));
text.substring(
sentenceRanges.get(15).getFromPos(),
sentenceRanges.get(15).getToPos()));
assertEquals("Danke, dass Sie es ausprobieren!",
text.substring(
sentenceRanges.get(16).getFromPos(),
sentenceRanges.get(16).getToPos()));
text.substring(
sentenceRanges.get(16).getFromPos(),
sentenceRanges.get(16).getToPos()));
}
@Test
public void testEnglishSentenceRange() throws IOException {
JLanguageTool jLanguageTool = new JLanguageTool(new NoRulesEnglish());
String text = "\n" +
"\n" +
"\n" +
"\n" +
"LanguageTool\n" +
"\n" +
"\n" +
"\n" +
"LanguageTools multilingual grammar, style, and spell checker is used by millions of people around the world.\n" +
"\n" +
"\n" +
"\n" +
"\uFEFF\uFEFF\uFEFF\uFEFF\n" +
"\n" +
"Trusted by our partners and customers\n" +
"\n" +
"\n" +
"\n" +
"Receive tips on how to improve your text (including punctuation advice etc.) while typing an e-mail, a blog post or just a simple tweet. Whatever language you're using, LanguageTool will automatically detect it and provide suggestions. To respect your privacy, no text is stored by the browser add-on.\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Get the best out of your docs and deliver error-free results. No matter whether you're working on a dissertation, an essay, or a book, or you just want to note down something.\n" +
"\n" +
"\n" +
"\n" +
"\uFEFF\u2063\n" +
"\n" +
"\n" +
"\n" +
"\uFEFFProfessionalize your team's communication with LanguageTool's grammar and style checker.\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Fully supported (spelling, grammar, style hints):\n" +
"\n" +
"\n" +
"\n" +
"English\n" +
"\n" +
"\n" +
"\n" +
"German\n" +
"\n" +
"\n" +
"\n" +
"French\n" +
"\n" +
"\n" +
"\n" +
"Spanish\n" +
"\n" +
"\n" +
"\n" +
"Dutch\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Thanks for checking it out!\n";
"\n" +
"\n" +
"\n" +
"LanguageTool\n" +
"\n" +
"\n" +
"\n" +
"LanguageTools multilingual grammar, style, and spell checker is used by millions of people around the world.\n" +
"\n" +
"\n" +
"\n" +
"\uFEFF\uFEFF\uFEFF\uFEFF\n" +
"\n" +
"Trusted by our partners and customers\n" +
"\n" +
"\n" +
"\n" +
"Receive tips on how to improve your text (including punctuation advice etc.) while typing an e-mail, a blog post or just a simple tweet. Whatever language you're using, LanguageTool will automatically detect it and provide suggestions. To respect your privacy, no text is stored by the browser add-on.\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Get the best out of your docs and deliver error-free results. No matter whether you're working on a dissertation, an essay, or a book, or you just want to note down something.\n" +
"\n" +
"\n" +
"\n" +
"\uFEFF\u2063\n" +
"\n" +
"\n" +
"\n" +
"\uFEFFProfessionalize your team's communication with LanguageTool's grammar and style checker.\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Fully supported (spelling, grammar, style hints):\n" +
"\n" +
"\n" +
"\n" +
"English\n" +
"\n" +
"\n" +
"\n" +
"German\n" +
"\n" +
"\n" +
"\n" +
"French\n" +
"\n" +
"\n" +
"\n" +
"Spanish\n" +
"\n" +
"\n" +
"\n" +
"Dutch\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"\n" +
"Thanks for checking it out!\n";
AnnotatedText annotatedText = new AnnotatedTextBuilder().addText(text).build();
CheckResults checkResults = jLanguageTool.check2(annotatedText,
true,
JLanguageTool.ParagraphHandling.NORMAL,
ruleMatch -> {
},
JLanguageTool.Mode.ALL_BUT_TEXTLEVEL_ONLY,
JLanguageTool.Level.DEFAULT,
null);
true,
JLanguageTool.ParagraphHandling.NORMAL,
ruleMatch -> {
},
JLanguageTool.Mode.ALL_BUT_TEXTLEVEL_ONLY,
JLanguageTool.Level.DEFAULT,
null);
List<SentenceRange> sentenceRanges = checkResults.getSentenceRanges();
assertEquals(17, sentenceRanges.size());
assertEquals("LanguageTool",
text.substring(
sentenceRanges.get(0).getFromPos(),
sentenceRanges.get(0).getToPos()));
text.substring(
sentenceRanges.get(0).getFromPos(),
sentenceRanges.get(0).getToPos()));
assertEquals("LanguageTools multilingual grammar, style, and spell checker is used by millions of people around the world.",
text.substring(
sentenceRanges.get(1).getFromPos(),
sentenceRanges.get(1).getToPos()));
text.substring(
sentenceRanges.get(1).getFromPos(),
sentenceRanges.get(1).getToPos()));
assertEquals("Trusted by our partners and customers",
text.substring(
sentenceRanges.get(2).getFromPos(),
sentenceRanges.get(2).getToPos()));
text.substring(
sentenceRanges.get(2).getFromPos(),
sentenceRanges.get(2).getToPos()));
assertEquals("Receive tips on how to improve your text (including punctuation advice etc.) while typing an e-mail, a blog post or just a simple tweet.",
text.substring(
sentenceRanges.get(3).getFromPos(),
sentenceRanges.get(3).getToPos()));
text.substring(
sentenceRanges.get(3).getFromPos(),
sentenceRanges.get(3).getToPos()));
assertEquals("Whatever language you're using, LanguageTool will automatically detect it and provide suggestions.",
text.substring(
sentenceRanges.get(4).getFromPos(),
sentenceRanges.get(4).getToPos()));
text.substring(
sentenceRanges.get(4).getFromPos(),
sentenceRanges.get(4).getToPos()));
assertEquals("To respect your privacy, no text is stored by the browser add-on.",
text.substring(
sentenceRanges.get(5).getFromPos(),
sentenceRanges.get(5).getToPos()));
text.substring(
sentenceRanges.get(5).getFromPos(),
sentenceRanges.get(5).getToPos()));
assertEquals("Get the best out of your docs and deliver error-free results.",
text.substring(
sentenceRanges.get(6).getFromPos(),
sentenceRanges.get(6).getToPos()));
text.substring(
sentenceRanges.get(6).getFromPos(),
sentenceRanges.get(6).getToPos()));
assertEquals("No matter whether you're working on a dissertation, an essay, or a book, or you just want to note down something.",
text.substring(
sentenceRanges.get(7).getFromPos(),
sentenceRanges.get(7).getToPos()));
text.substring(
sentenceRanges.get(7).getFromPos(),
sentenceRanges.get(7).getToPos()));
assertEquals("Professionalize your team's communication with LanguageTool's grammar and style checker.",
text.substring(
sentenceRanges.get(9).getFromPos(),
sentenceRanges.get(9).getToPos()));
text.substring(
sentenceRanges.get(9).getFromPos(),
sentenceRanges.get(9).getToPos()));
assertEquals("Fully supported (spelling, grammar, style hints):",
text.substring(
sentenceRanges.get(10).getFromPos(),
sentenceRanges.get(10).getToPos()));
text.substring(
sentenceRanges.get(10).getFromPos(),
sentenceRanges.get(10).getToPos()));
assertEquals("English",
text.substring(
sentenceRanges.get(11).getFromPos(),
sentenceRanges.get(11).getToPos()));
text.substring(
sentenceRanges.get(11).getFromPos(),
sentenceRanges.get(11).getToPos()));
assertEquals("German",
text.substring(
sentenceRanges.get(12).getFromPos(),
sentenceRanges.get(12).getToPos()));
text.substring(
sentenceRanges.get(12).getFromPos(),
sentenceRanges.get(12).getToPos()));
assertEquals("French",
text.substring(
sentenceRanges.get(13).getFromPos(),
sentenceRanges.get(13).getToPos()));
text.substring(
sentenceRanges.get(13).getFromPos(),
sentenceRanges.get(13).getToPos()));
assertEquals("Spanish",
text.substring(
sentenceRanges.get(14).getFromPos(),
sentenceRanges.get(14).getToPos()));
text.substring(
sentenceRanges.get(14).getFromPos(),
sentenceRanges.get(14).getToPos()));
assertEquals("Dutch",
text.substring(
sentenceRanges.get(15).getFromPos(),
sentenceRanges.get(15).getToPos()));
text.substring(
sentenceRanges.get(15).getFromPos(),
sentenceRanges.get(15).getToPos()));
assertEquals("Thanks for checking it out!",
text.substring(
sentenceRanges.get(16).getFromPos(),
sentenceRanges.get(16).getToPos()));
text.substring(
sentenceRanges.get(16).getFromPos(),
sentenceRanges.get(16).getToPos()));
}
@Test
public void testCorrectSentenceRange() {
// An sentence list as it would come from a sentenceTokenizer
List<String> sentences = Arrays.asList(
"Hallo,\n\n",
"Das ist ein neuer Satz.",
"\n\nEin Satz mit \uFEFFSonderzeichen.",
"\n\n\n\n\nSatz mehreren Leerzeichen.",
" Hier sind die Zeichen mal am Ende.\n\n\n",
"\n\n\n\uFeFFNoch ein Satz.\n\n\n\n");
"Hallo,\n\n",
"Das ist ein neuer Satz.",
"\n\nEin Satz mit \uFEFFSonderzeichen.",
"\n\n\n\n\nSatz mehreren Leerzeichen.",
" Hier sind die Zeichen mal am Ende.\n\n\n",
"\n\n\n\uFeFFNoch ein Satz.\n\n\n\n");
String text = String.join("", sentences);
AnnotatedText annotatedText = new AnnotatedTextBuilder().addText(text).build();
List<SentenceRange> ranges = SentenceRange.getRangesFromSentences(annotatedText, sentences);
@ -471,165 +471,346 @@ public class SentenceRangeTest {
JLanguageTool.Level.PICKY,
null);
List<SentenceRange> sentenceRanges = checkResults.getSentenceRanges();
List<ExtendedSentenceRange> extendedSentenceRanges = checkResults.getExtendedSentenceRanges();
assertEquals(51, sentenceRanges.size());
SentenceRange testSentence1 = sentenceRanges.get(0);
assertEquals(0, testSentence1.getFromPos());
assertEquals(29, testSentence1.getToPos());
assertEquals(0, extendedSentenceRanges.get(0).getFromPos());
assertEquals(29, extendedSentenceRanges.get(0).getToPos());
assertEquals("\"This\"+is+Mr.+Pigfat+calling.", text.substring(testSentence1.getFromPos(), testSentence1.getToPos()));
assertEquals(33, sentenceRanges.get(1).getFromPos());
assertEquals(34, sentenceRanges.get(1).getToPos());
assertEquals(33, extendedSentenceRanges.get(1).getFromPos());
assertEquals(34, extendedSentenceRanges.get(1).getToPos());
assertEquals(38, sentenceRanges.get(2).getFromPos());
assertEquals(39, sentenceRanges.get(2).getToPos());
assertEquals(38, extendedSentenceRanges.get(2).getFromPos());
assertEquals(39, extendedSentenceRanges.get(2).getToPos());
assertEquals(43, sentenceRanges.get(3).getFromPos());
assertEquals(44, sentenceRanges.get(3).getToPos());
assertEquals(43, extendedSentenceRanges.get(3).getFromPos());
assertEquals(44, extendedSentenceRanges.get(3).getToPos());
assertEquals(48, sentenceRanges.get(4).getFromPos());
assertEquals(49, sentenceRanges.get(4).getToPos());
assertEquals(48, extendedSentenceRanges.get(4).getFromPos());
assertEquals(49, extendedSentenceRanges.get(4).getToPos());
assertEquals(53, sentenceRanges.get(5).getFromPos());
assertEquals(54, sentenceRanges.get(5).getToPos());
assertEquals(53, extendedSentenceRanges.get(5).getFromPos());
assertEquals(54, extendedSentenceRanges.get(5).getToPos());
assertEquals(58, sentenceRanges.get(6).getFromPos());
assertEquals(59, sentenceRanges.get(6).getToPos());
assertEquals(58, extendedSentenceRanges.get(6).getFromPos());
assertEquals(59, extendedSentenceRanges.get(6).getToPos());
assertEquals(63, sentenceRanges.get(7).getFromPos());
assertEquals(64, sentenceRanges.get(7).getToPos());
assertEquals(63, extendedSentenceRanges.get(7).getFromPos());
assertEquals(64, extendedSentenceRanges.get(7).getToPos());
assertEquals(68, sentenceRanges.get(8).getFromPos());
assertEquals(69, sentenceRanges.get(8).getToPos());
assertEquals(68, extendedSentenceRanges.get(8).getFromPos());
assertEquals(69, extendedSentenceRanges.get(8).getToPos());
assertEquals(73, sentenceRanges.get(9).getFromPos());
assertEquals(74, sentenceRanges.get(9).getToPos());
assertEquals(73, extendedSentenceRanges.get(9).getFromPos());
assertEquals(74, extendedSentenceRanges.get(9).getToPos());
assertEquals(78, sentenceRanges.get(10).getFromPos());
assertEquals(79, sentenceRanges.get(10).getToPos());
assertEquals(78, extendedSentenceRanges.get(10).getFromPos());
assertEquals(79, extendedSentenceRanges.get(10).getToPos());
assertEquals(83, sentenceRanges.get(11).getFromPos());
assertEquals(84, sentenceRanges.get(11).getToPos());
assertEquals(83, extendedSentenceRanges.get(11).getFromPos());
assertEquals(84, extendedSentenceRanges.get(11).getToPos());
assertEquals(88, sentenceRanges.get(12).getFromPos());
assertEquals(89, sentenceRanges.get(12).getToPos());
assertEquals(88, extendedSentenceRanges.get(12).getFromPos());
assertEquals(89, extendedSentenceRanges.get(12).getToPos());
assertEquals(93, sentenceRanges.get(13).getFromPos());
assertEquals(94, sentenceRanges.get(13).getToPos());
assertEquals(93, extendedSentenceRanges.get(13).getFromPos());
assertEquals(94, extendedSentenceRanges.get(13).getToPos());
assertEquals(98, sentenceRanges.get(14).getFromPos());
assertEquals(99, sentenceRanges.get(14).getToPos());
assertEquals(98, extendedSentenceRanges.get(14).getFromPos());
assertEquals(99, extendedSentenceRanges.get(14).getToPos());
assertEquals(103, sentenceRanges.get(15).getFromPos());
assertEquals(104, sentenceRanges.get(15).getToPos());
assertEquals(103, extendedSentenceRanges.get(15).getFromPos());
assertEquals(104, extendedSentenceRanges.get(15).getToPos());
assertEquals(108, sentenceRanges.get(16).getFromPos());
assertEquals(109, sentenceRanges.get(16).getToPos());
assertEquals(108, extendedSentenceRanges.get(16).getFromPos());
assertEquals(109, extendedSentenceRanges.get(16).getToPos());
assertEquals(113, sentenceRanges.get(17).getFromPos());
assertEquals(114, sentenceRanges.get(17).getToPos());
assertEquals(113, extendedSentenceRanges.get(17).getFromPos());
assertEquals(114, extendedSentenceRanges.get(17).getToPos());
assertEquals(118, sentenceRanges.get(18).getFromPos());
assertEquals(119, sentenceRanges.get(18).getToPos());
assertEquals(118, extendedSentenceRanges.get(18).getFromPos());
assertEquals(119, extendedSentenceRanges.get(18).getToPos());
assertEquals(123, sentenceRanges.get(19).getFromPos());
assertEquals(124, sentenceRanges.get(19).getToPos());
assertEquals(123, extendedSentenceRanges.get(19).getFromPos());
assertEquals(124, extendedSentenceRanges.get(19).getToPos());
assertEquals(128, sentenceRanges.get(20).getFromPos());
assertEquals(129, sentenceRanges.get(20).getToPos());
assertEquals(128, extendedSentenceRanges.get(20).getFromPos());
assertEquals(129, extendedSentenceRanges.get(20).getToPos());
assertEquals(133, sentenceRanges.get(21).getFromPos());
assertEquals(134, sentenceRanges.get(21).getToPos());
assertEquals(133, extendedSentenceRanges.get(21).getFromPos());
assertEquals(134, extendedSentenceRanges.get(21).getToPos());
assertEquals(138, sentenceRanges.get(22).getFromPos());
assertEquals(139, sentenceRanges.get(22).getToPos());
assertEquals(138, extendedSentenceRanges.get(22).getFromPos());
assertEquals(139, extendedSentenceRanges.get(22).getToPos());
assertEquals(143, sentenceRanges.get(23).getFromPos());
assertEquals(144, sentenceRanges.get(23).getToPos());
assertEquals(143, extendedSentenceRanges.get(23).getFromPos());
assertEquals(144, extendedSentenceRanges.get(23).getToPos());
assertEquals(148, sentenceRanges.get(24).getFromPos());
assertEquals(149, sentenceRanges.get(24).getToPos());
assertEquals(148, extendedSentenceRanges.get(24).getFromPos());
assertEquals(149, extendedSentenceRanges.get(24).getToPos());
SentenceRange testSentence2 = sentenceRanges.get(25);
assertEquals(153, testSentence2.getFromPos());
assertEquals(171, testSentence2.getToPos());
assertEquals(153, extendedSentenceRanges.get(25).getFromPos());
assertEquals(171, extendedSentenceRanges.get(25).getToPos());
assertEquals("This+is+an+\"test\".", text.substring(testSentence2.getFromPos(), testSentence2.getToPos()));
assertEquals(175, sentenceRanges.get(26).getFromPos());
assertEquals(176, sentenceRanges.get(26).getToPos());
assertEquals(175, extendedSentenceRanges.get(26).getFromPos());
assertEquals(176, extendedSentenceRanges.get(26).getToPos());
assertEquals(180, sentenceRanges.get(27).getFromPos());
assertEquals(181, sentenceRanges.get(27).getToPos());
assertEquals(180, extendedSentenceRanges.get(27).getFromPos());
assertEquals(181, extendedSentenceRanges.get(27).getToPos());
assertEquals(185, sentenceRanges.get(28).getFromPos());
assertEquals(186, sentenceRanges.get(28).getToPos());
assertEquals(185, extendedSentenceRanges.get(28).getFromPos());
assertEquals(186, extendedSentenceRanges.get(28).getToPos());
assertEquals(190, sentenceRanges.get(29).getFromPos());
assertEquals(191, sentenceRanges.get(29).getToPos());
assertEquals(190, extendedSentenceRanges.get(29).getFromPos());
assertEquals(191, extendedSentenceRanges.get(29).getToPos());
assertEquals(195, sentenceRanges.get(30).getFromPos());
assertEquals(196, sentenceRanges.get(30).getToPos());
assertEquals(195, extendedSentenceRanges.get(30).getFromPos());
assertEquals(196, extendedSentenceRanges.get(30).getToPos());
assertEquals(200, sentenceRanges.get(31).getFromPos());
assertEquals(201, sentenceRanges.get(31).getToPos());
assertEquals(200, extendedSentenceRanges.get(31).getFromPos());
assertEquals(201, extendedSentenceRanges.get(31).getToPos());
assertEquals(205, sentenceRanges.get(32).getFromPos());
assertEquals(206, sentenceRanges.get(32).getToPos());
assertEquals(205, extendedSentenceRanges.get(32).getFromPos());
assertEquals(206, extendedSentenceRanges.get(32).getToPos());
assertEquals(210, sentenceRanges.get(33).getFromPos());
assertEquals(211, sentenceRanges.get(33).getToPos());
assertEquals(210, extendedSentenceRanges.get(33).getFromPos());
assertEquals(211, extendedSentenceRanges.get(33).getToPos());
assertEquals(215, sentenceRanges.get(34).getFromPos());
assertEquals(216, sentenceRanges.get(34).getToPos());
assertEquals(215, extendedSentenceRanges.get(34).getFromPos());
assertEquals(216, extendedSentenceRanges.get(34).getToPos());
assertEquals(220, sentenceRanges.get(35).getFromPos());
assertEquals(221, sentenceRanges.get(35).getToPos());
assertEquals(220, extendedSentenceRanges.get(35).getFromPos());
assertEquals(221, extendedSentenceRanges.get(35).getToPos());
assertEquals(225, sentenceRanges.get(36).getFromPos());
assertEquals(226, sentenceRanges.get(36).getToPos());
assertEquals(225, extendedSentenceRanges.get(36).getFromPos());
assertEquals(226, extendedSentenceRanges.get(36).getToPos());
assertEquals(230, sentenceRanges.get(37).getFromPos());
assertEquals(231, sentenceRanges.get(37).getToPos());
assertEquals(230, extendedSentenceRanges.get(37).getFromPos());
assertEquals(231, extendedSentenceRanges.get(37).getToPos());
assertEquals(235, sentenceRanges.get(38).getFromPos());
assertEquals(236, sentenceRanges.get(38).getToPos());
assertEquals(235, extendedSentenceRanges.get(38).getFromPos());
assertEquals(236, extendedSentenceRanges.get(38).getToPos());
assertEquals(240, sentenceRanges.get(39).getFromPos());
assertEquals(241, sentenceRanges.get(39).getToPos());
assertEquals(240, extendedSentenceRanges.get(39).getFromPos());
assertEquals(241, extendedSentenceRanges.get(39).getToPos());
assertEquals(245, sentenceRanges.get(40).getFromPos());
assertEquals(246, sentenceRanges.get(40).getToPos());
assertEquals(245, extendedSentenceRanges.get(40).getFromPos());
assertEquals(246, extendedSentenceRanges.get(40).getToPos());
assertEquals(250, sentenceRanges.get(41).getFromPos());
assertEquals(251, sentenceRanges.get(41).getToPos());
assertEquals(250, extendedSentenceRanges.get(41).getFromPos());
assertEquals(251, extendedSentenceRanges.get(41).getToPos());
assertEquals(255, sentenceRanges.get(42).getFromPos());
assertEquals(256, sentenceRanges.get(42).getToPos());
assertEquals(255, extendedSentenceRanges.get(42).getFromPos());
assertEquals(256, extendedSentenceRanges.get(42).getToPos());
assertEquals(260, sentenceRanges.get(43).getFromPos());
assertEquals(261, sentenceRanges.get(43).getToPos());
assertEquals(260, extendedSentenceRanges.get(43).getFromPos());
assertEquals(261, extendedSentenceRanges.get(43).getToPos());
assertEquals(265, sentenceRanges.get(44).getFromPos());
assertEquals(266, sentenceRanges.get(44).getToPos());
assertEquals(265, extendedSentenceRanges.get(44).getFromPos());
assertEquals(266, extendedSentenceRanges.get(44).getToPos());
assertEquals(270, sentenceRanges.get(45).getFromPos());
assertEquals(271, sentenceRanges.get(45).getToPos());
assertEquals(270, extendedSentenceRanges.get(45).getFromPos());
assertEquals(271, extendedSentenceRanges.get(45).getToPos());
assertEquals(275, sentenceRanges.get(46).getFromPos());
assertEquals(276, sentenceRanges.get(46).getToPos());
assertEquals(275, extendedSentenceRanges.get(46).getFromPos());
assertEquals(276, extendedSentenceRanges.get(46).getToPos());
assertEquals(280, sentenceRanges.get(47).getFromPos());
assertEquals(281, sentenceRanges.get(47).getToPos());
assertEquals(280, extendedSentenceRanges.get(47).getFromPos());
assertEquals(281, extendedSentenceRanges.get(47).getToPos());
assertEquals(285, sentenceRanges.get(48).getFromPos());
assertEquals(286, sentenceRanges.get(48).getToPos());
assertEquals(285, extendedSentenceRanges.get(48).getFromPos());
assertEquals(286, extendedSentenceRanges.get(48).getToPos());
assertEquals(290, sentenceRanges.get(49).getFromPos());
assertEquals(291, sentenceRanges.get(49).getToPos());
assertEquals(290, extendedSentenceRanges.get(49).getFromPos());
assertEquals(291, extendedSentenceRanges.get(49).getToPos());
SentenceRange testSentence3 = sentenceRanges.get(50);
assertEquals(295, testSentence3.getFromPos());
assertEquals(336, testSentence3.getToPos());
assertEquals(295, extendedSentenceRanges.get(50).getFromPos());
assertEquals(336, extendedSentenceRanges.get(50).getToPos());
assertEquals("He+was+very+\"afraid\"+of+the+consequeces  ", text.substring(testSentence3.getFromPos(), testSentence3.getToPos()));
}
@Test
public void testExtraWhitespaceCase() throws IOException {
JLanguageTool jLanguageTool = new JLanguageTool(new NoRulesEnglish());
String textWithoutExtraWhitespace = "Hello, how are you? This is an test.";
AnnotatedText annotatedTextWithoutExtraWhitespace = new AnnotatedTextBuilder().addText(textWithoutExtraWhitespace).build();
CheckResults checkResultsWithoutExtraWhitespace = jLanguageTool.check2(annotatedTextWithoutExtraWhitespace,
true,
JLanguageTool.ParagraphHandling.NORMAL,
ruleMatch -> {
},
JLanguageTool.Mode.ALL_BUT_TEXTLEVEL_ONLY,
JLanguageTool.Level.PICKY,
null);
List<SentenceRange> sentenceRangesWithoutExtraWhitespace = checkResultsWithoutExtraWhitespace.getSentenceRanges();
assertEquals(2, sentenceRangesWithoutExtraWhitespace.size());
List<ExtendedSentenceRange> extendedSentenceRangesWithoutExtraWhiteSpace = checkResultsWithoutExtraWhitespace.getExtendedSentenceRanges();
int fpWithoutExtraWhitespaceSR1 = sentenceRangesWithoutExtraWhitespace.get(0).getFromPos();
int tpWithoutExtraWhitespaceSR1 = sentenceRangesWithoutExtraWhitespace.get(0).getToPos();
assertEquals(0, fpWithoutExtraWhitespaceSR1);
assertEquals(19, tpWithoutExtraWhitespaceSR1);
assertEquals("Hello, how are you?", textWithoutExtraWhitespace.substring(fpWithoutExtraWhitespaceSR1, tpWithoutExtraWhitespaceSR1));
int fpWithoutExtraWhitespaceESR1 = extendedSentenceRangesWithoutExtraWhiteSpace.get(0).getFromPos();
assertEquals(fpWithoutExtraWhitespaceSR1, fpWithoutExtraWhitespaceESR1);
int tpWithoutExtraWhitespaceESR1 = extendedSentenceRangesWithoutExtraWhiteSpace.get(0).getToPos();
assertEquals(tpWithoutExtraWhitespaceSR1, tpWithoutExtraWhitespaceESR1);
int fpWithoutExtraWhitespaceSR2 = sentenceRangesWithoutExtraWhitespace.get(1).getFromPos();
int tpWithoutExtraWhitespaceSR2 = sentenceRangesWithoutExtraWhitespace.get(1).getToPos();
assertEquals(20, fpWithoutExtraWhitespaceSR2);
assertEquals(36, tpWithoutExtraWhitespaceSR2);
assertEquals("This is an test.", textWithoutExtraWhitespace.substring(fpWithoutExtraWhitespaceSR2, tpWithoutExtraWhitespaceSR2));
int fpWithoutExtraWhitespaceESR2 = extendedSentenceRangesWithoutExtraWhiteSpace.get(1).getFromPos();
assertEquals(fpWithoutExtraWhitespaceSR2, fpWithoutExtraWhitespaceESR2);
int tpWithoutExtraWhitespaceESR2 = extendedSentenceRangesWithoutExtraWhiteSpace.get(1).getToPos();
assertEquals(tpWithoutExtraWhitespaceSR2, tpWithoutExtraWhitespaceESR2);
String textWithExtraWhitespace = "Hello, how are you? This is an test.";
AnnotatedText annotatedTextWithExtraWhitespace = new AnnotatedTextBuilder().addText(textWithExtraWhitespace).build();
CheckResults checkResultsWithExtraWhitespace = jLanguageTool.check2(annotatedTextWithExtraWhitespace,
true,
JLanguageTool.ParagraphHandling.NORMAL,
ruleMatch -> {
},
JLanguageTool.Mode.ALL_BUT_TEXTLEVEL_ONLY,
JLanguageTool.Level.PICKY,
null);
List<SentenceRange> sentenceRangesWithExtraWhitespace = checkResultsWithExtraWhitespace.getSentenceRanges();
assertEquals(2, sentenceRangesWithExtraWhitespace.size());
List<ExtendedSentenceRange> extendedSentenceRangesWithExtraWhitespace = checkResultsWithExtraWhitespace.getExtendedSentenceRanges();
assertEquals(2, extendedSentenceRangesWithExtraWhitespace.size());
int fpWithExtraWhitespaceSR1 = sentenceRangesWithExtraWhitespace.get(0).getFromPos();
int tpWithExtraWhitespaceSR1 = sentenceRangesWithExtraWhitespace.get(0).getToPos();
assertEquals(0, fpWithExtraWhitespaceSR1);
assertEquals(19, tpWithExtraWhitespaceSR1);
assertEquals("Hello, how are you?", textWithExtraWhitespace.substring(fpWithExtraWhitespaceSR1, tpWithExtraWhitespaceSR1));
int fpWithExtraWhitespaceESR1 = extendedSentenceRangesWithExtraWhitespace.get(0).getFromPos();
assertEquals(fpWithExtraWhitespaceSR1, fpWithExtraWhitespaceESR1);
int tpWithExtraWhitespaceESR1 = extendedSentenceRangesWithExtraWhitespace.get(0).getToPos();
assertEquals(tpWithExtraWhitespaceSR1, tpWithExtraWhitespaceESR1);
int fpWithExtraWhitespaceSR2 = sentenceRangesWithExtraWhitespace.get(1).getFromPos();
int tpWithExtraWhitespaceSR2 = sentenceRangesWithExtraWhitespace.get(1).getToPos();
assertEquals(24, fpWithExtraWhitespaceSR2);
assertEquals(40, tpWithExtraWhitespaceSR2);
assertEquals("This is an test.", textWithExtraWhitespace.substring(fpWithExtraWhitespaceSR2, tpWithExtraWhitespaceSR2));
int fpWithExtraWhitespaceESR2 = extendedSentenceRangesWithExtraWhitespace.get(1).getFromPos();
assertEquals(fpWithExtraWhitespaceSR2, fpWithExtraWhitespaceESR2);
int tpWithExtraWhitespaceESR2 = extendedSentenceRangesWithExtraWhitespace.get(1).getToPos();
assertEquals(tpWithExtraWhitespaceSR2, tpWithExtraWhitespaceESR2);
}
}