diff --git a/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx b/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx index 21848742816..b72fd40872e 100644 --- a/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx +++ b/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx @@ -5164,7 +5164,7 @@ -[\[\(]?\.\.\.[\]\)]?[\u00A0\s]{1,2} +[\[\(]?(\.\.\.|…)[\]\)]?[\u00A0\s]{1,2} \p{Ll} @@ -5245,7 +5245,7 @@ -\b(betr|Geb|Stk|ggü|Mag|mtl|Flgh?|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Zz]ykl|[Dd]bzgl[Ss]tellv|d|Übers|[Bb]zw|Ab[hkst]|[Ee]ig|[Aa]bzü?gl|\d+-tlg|tlg|[Gg]gfls|[Ff]achspr|[Ll]tda|[Ee]inschl|[Vv]mtl|[Ss]tellv|Ev|[Bb]ezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|wsl|vsl|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|[Ee]lektr|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2} +\b(betr|Geb|Stk|ggü|Mag|mtl|Flgh?|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Zz]ykl|[Dd]bzgl[Ss]tellv|[Ss]tv|d|Übers|[Bb]zw|Ab[hkst]|[Ee]ig|[Aa]bzü?gl|\d+-tlg|tlg|[Gg]gfls|[Ff]achspr|[Ll]tda|[Ee]inschl|[Vv]mtl|[Ss]tellv|Ev|[Bb]ezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|wsl|vsl|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|[Ee]lektr|Dez|[Jj]gdfr|[Ee]ff|M)\.[\u00A0\s]{1,2} @@ -5257,6 +5257,11 @@ \b([Ee]tc)\.[\u00A0\s]{1,2} \p{Lu} + + + \bo\.[\u00A0\s]{1,2}Ä\. + + \b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|[Dd]t|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|[Ee]xkl|Expl|Exz)\.[\u00A0\s]{1,2} @@ -5299,15 +5304,15 @@ -[\.!?…][\u0002|'|"|“|«|‹|\)|\]|\}¹²³]?[\u00A0\s]+ +[\.!?][\u0002|'|"|“|«|‹|\)|\]|\}¹²³]?[\u00A0\s]+ -[\.!?…]['"“\p{Pe}\u00BB\u201D]? +[\.!?]['"“\p{Pe}\u00BB\u201D]? \p{Lu}[^\p{Lu}] -[\u00A0\s]\p{L}[\.!?…][\u00A0\s]{1,2} +[\u00A0\s]\p{L}[\.!?][\u00A0\s]{1,2} \p{Lu}\p{Ll} @@ -6936,9 +6941,9 @@ [\.!?] \S*@ - + -Yahoo![\s\u00A0] +(Yahoo|NAU)![\s\u00A0] \p{Ll} diff --git a/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/CaseRule.java b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/CaseRule.java index a95645a27b7..8886ad0c9f2 100644 --- a/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/CaseRule.java +++ b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/CaseRule.java @@ -83,12 +83,21 @@ public class CaseRule extends Rule { private static final String[] UNDEFINED_QUANTIFIERS = {"viel", "nichts", "nix", "wenig", "allerlei"}; + // Used for subordinate clause detection after verbs (e.g. "Überlegen, wie man...") + private static final String[] INTERROGATIVE_PARTICLES = {"was", "wodurch", "wofür", "womit", "woran", "worauf", "woraus", "wovon", "wie"}; private static final String[] POSSESSIVE_INDICATORS = {"einer", "eines", "der", "des", "dieser", "dieses"}; private static final String[] DAS_VERB_EXCEPTIONS = {"nur", "sogar", "auch", "die", "alle", "viele", "zu"}; + // Used for short question fragments after a colon (e.g. "Ich frage mich: Warum?") + // Intentionally different from INTERROGATIVE_PARTICLES, which targets subordinate clauses. + + private static final String[] COLON_QUESTION_WORDS = {"warum", "wieso", "weshalb", "wer", "was", "wann", "wo", "wie", "wozu"}; + + private static final String[] COLON_QUESTION_CONJUNCTIONS = {"und", "oder", "aber", "denn"}; + /* * These are words that Morphy only knows as non-nouns (or not at all). * The proper solution is to add all those to our Morphy data, but as a simple @@ -1042,10 +1051,15 @@ public class CaseRule extends Rule { !isInvisibleSeparator(i-1, tokens) && !language.getDefaultSpellingRule().isMisspelled(lcWord)) { if (":".equals(tokens[i - 1].getToken())) { + // allow short question sentences like "Warum? Und warum?" after colon + if (isQuestionEquivalentAfterColon(i, tokens)) { + return; + } + AnalyzedTokenReadings[] subarray = new AnalyzedTokenReadings[i]; System.arraycopy(tokens, 0, subarray, 0, i); + if (isVerbFollowing(i, tokens, lowercaseReadings) || getTokensWithPosTagStartingWithCount(subarray, "VER") == 0) { - // no error } else { addRuleMatch(ruleMatches, sentence, COLON_MESSAGE, tokens[i], lcWord); } @@ -1073,14 +1087,14 @@ public class CaseRule extends Rule { private boolean isNounWithVerbReading(int i, AnalyzedTokenReadings[] tokens) { return tokens[i].hasPosTagStartingWith("SUB") && - tokens[i].hasPosTagStartingWith("VER:INF"); - } + tokens[i].hasPosTagStartingWith("VER:INF"); + } private boolean isInvisibleSeparator(int i, AnalyzedTokenReadings[] tokens) { // u2063 is used internally by our browser add-on return i >= 0 && i < tokens.length && tokens[i].getToken().length() > 0 && tokens[i].getToken().charAt(0) == '\u2063'; } - private boolean isVerbFollowing(int i, AnalyzedTokenReadings[] tokens, AnalyzedTokenReadings lowercaseReadings) { + private boolean isVerbFollowing(int i, AnalyzedTokenReadings[] tokens, AnalyzedTokenReadings lowercaseReadings) { AnalyzedTokenReadings[] subarray = new AnalyzedTokenReadings[ tokens.length - i ]; System.arraycopy(tokens, i, subarray, 0, subarray.length); if (lowercaseReadings != null) { @@ -1091,6 +1105,10 @@ public class CaseRule extends Rule { return getTokensWithPosTagStartingWithCount(subarray, "VER:") != 0; } + private boolean isColonQuestionWord(String word) { + return StringUtils.equalsAnyIgnoreCase(word, COLON_QUESTION_WORDS); + } + private void addRuleMatch(List ruleMatches, AnalyzedSentence sentence, String msg, AnalyzedTokenReadings tokenReadings, String fixedWord) { RuleMatch ruleMatch = new RuleMatch(this, sentence, tokenReadings.getStartPos(), tokenReadings.getEndPos(), msg); ruleMatch.setSuggestedReplacement(fixedWord); @@ -1295,6 +1313,33 @@ public class CaseRule extends Rule { return true; } + /** + * Returns true if the token at position i starts a short question fragment + * that forms a valid Satzäquivalent after a colon, per Duden rules. + * Handles: + * single word: "Ich frage mich: Warum?" + * with conjunction: "Ich frage mich: Und warum?" + */ + @VisibleForTesting + boolean isQuestionEquivalentAfterColon(int i, AnalyzedTokenReadings[] tokens) { + if (i < tokens.length - 1) { + String word = tokens[i].getToken(); + String next = tokens[i + 1].getToken(); + // "Warum?" + if (isColonQuestionWord(word) && "?".equals(next)) { + return true; + } + // "Und warum?" + if (StringUtils.equalsAnyIgnoreCase(word, COLON_QUESTION_CONJUNCTIONS) + && i < tokens.length - 2 + && isColonQuestionWord(tokens[i + 1].getToken()) + && "?".equals(tokens[i + 2].getToken())) { + return true; + } + } + return false; + } + private AnalyzedTokenReadings lookup(String word) { try { return ((GermanTagger) language.getTagger()).lookup(word); @@ -1303,3 +1348,4 @@ public class CaseRule extends Rule { } } } + diff --git a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/case_rule_exceptions.txt b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/case_rule_exceptions.txt index 3c71c8da3a3..c63e57ac2f9 100644 --- a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/case_rule_exceptions.txt +++ b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/case_rule_exceptions.txt @@ -50,6 +50,7 @@ Alpine[ns] Museums? (der|Münchens?) Agile Coach(e?|ing)s? Amnesty Internationals? Andreas Scheuers? +Ankleiden? Anlage Sonstiges Angewandte[mnr]? Kunst Angewandte[mnr]? [A-ZÖÄÜ][a-zäöü]+(wissenschaft|forschung)(en)? @@ -59,6 +60,7 @@ Angewandte[mnr]? (Optik|Informatik|Ethik|Geophysik|Physik|Makroökonomie|Linguis Anne Will Anarchistischen? Pogo-Partei Anthony Blinkens? +Annies? Apokalyptische[nrm]? Reiters? Apostolische[nrm]? Palast(s|es)? Aqua Vitals? @@ -195,6 +197,7 @@ Director Legal Director of Legal Die Toten Hosen Die Leiden des jungen Werthers +Die Mumie [Dd]ie Schöne und das Biest Digital Assets?|IQ|Garage|Pushs?|Traces?|Pushing|Cockpits?|Ads?|Signals?|Designs?|Designer[ns]?|Engineerings?|Innovation|Media|Content|Community|Detox|Analyst|Leader[ns]?|Consultants?|Equipments?|Onboarding|Transformations?|Learnings?|Events?|Hubs?|Masterings?|Marketers?|Radios?|Experiences?|Kits?|Wallets?|Turbines?|Workspaces?|Leaderships?|Profiles?|News?|Managements?|Economy|Summits?|Fitness Digital Film Productions? @@ -549,11 +552,13 @@ Kaukasische[nsm] Eichhörnchens? Kubanischen? Revolution kaum Neues Kessel Buntes +KfW(-\d+)?(-[A-ZÄÖÜa-zäöüß]+)? Kitchen Impossible Klein (Nordendes?|Borstels?) Kleine[rnm]? Sauerampfers? Kleinen? Weser|Donau Knollige[rnm]? Hahnenfu(ß|ss) +Kommunale[nms]? Integrationszentrum Lost (Places?|Islands?) Lost and Found Landesverband(e?s)? Baye?rischer? Omnibusunternehmen @@ -772,6 +777,7 @@ Polar 8 Potemkinschen? Dörfern? Politische[rn]? Direktors? Polytechnischen? (.*schule|Instituts?) +Polytechnische[nr]? Universität Polnische[rnm]? (Korridors?|Fremdenverkehrsamt(e?s)?) Praktische[mns]? Jahr Present Progressive @@ -782,6 +788,7 @@ Prima Terras? Product[/-]Market Fit Progressive Web Apps? Progressive Rocks?|Metals? +ProMinent Dosiertechnik AG Psychologie Heute Psychologische[nsm]? Institute?s? Psychiatrische[rn]? Klinik ([A-ZÖÄÜ][a-zäöüß\-]+) @@ -847,6 +854,7 @@ Schiefe[nr]? Turms? Schiefen? Ebene Schiefen? Schlachtordnung Schlesische[rnm]? Kriege?s? +Schlesische[rn]? Universität Schlesische[snm]? Tors? Schnellen? Eingreiftruppe Schön Klinik @@ -866,6 +874,7 @@ Schweizerischen? Konferenz Schweizerische[nrm]? Verband für Schweizerische[nrm]? Gehörlosenbund Schweizerische[nrm]? Fu(ss|ß)ballverband +Schweizerische[rn]? Trassenvergabestelle(-[A-ZÄÖÜ][a-zäöüß]+)? Schweizer Illustrierten? Schweizerischen? Akademischen? Gesellschaft für Germanistik Seiner? Exzellenz @@ -956,6 +965,7 @@ Theologischen? Fakultät Thüringischen? Landeszeitung Thüringer Allgemeinen? Thorsten Lahm +THYMUSKIN Sensitive Shampoo Tiger Global Tote[nsm] Meer Total Gross|Values?|Blocking @@ -1011,6 +1021,7 @@ Wei(ß|ss)e[nrm]? Zwerg(e|s|en|es)?|Sonntags?|Tod|Rings?|Burgunder[ns]? (WELT|Welt) Digital Wenn und Aber Werd Verlag +Western Union International Bank Westliche[nrm]? Gorillas? Westfälische[nrm]? Friede(ns?)?|Pforte|Bucht|Nachrichten|Wilhelms-Universität|Hochschule|Anzeigers? Wilde[nrm]? Westens?|Kaisers?|Berge?s? diff --git a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/disambiguation.xml b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/disambiguation.xml index f7f9a7bf6a2..1cb74e23e17 100644 --- a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/disambiguation.xml +++ b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/disambiguation.xml @@ -1327,6 +1327,11 @@ Copyright © 2013 Markus Brenneis, Daniel Naber, Jan Schreiber + + + bereitet?|bereiten + + diff --git a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/CaseRuleTest.java b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/CaseRuleTest.java index 90e2fac67b4..f2df040e87a 100644 --- a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/CaseRuleTest.java +++ b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/CaseRuleTest.java @@ -383,6 +383,14 @@ public class CaseRuleTest { assertBad("Das ist es: Kein Satz."); assertBad("Wen magst du lieber: Die Giants oder die Dragons?"); + assertGood("Ich frage mich: Warum?"); + assertGood("Ich frage mich: Wieso?"); + assertGood("Ich frage mich: Weshalb?"); + assertGood("Ich frage mich: Und warum?"); + assertGood("Ich frage mich: Oder wieso?"); + assertGood("Ich frage mich: Aber warum?"); + assertBad("Ich frage mich: Warum Das so ist."); + assertGood("Das wirklich Wichtige ist dies:"); assertGood("Das wirklich wichtige Verfahren ist dies:"); //assertBad("Das wirklich wichtige ist dies:"); diff --git a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/UppercaseSentenceStartRuleTest.java b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/UppercaseSentenceStartRuleTest.java index c88559186e8..66ffea81eba 100644 --- a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/UppercaseSentenceStartRuleTest.java +++ b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/UppercaseSentenceStartRuleTest.java @@ -79,6 +79,9 @@ public class UppercaseSentenceStartRuleTest { assertEquals(1, matches3.size()); assertEquals(19, matches3.get(0).getFromPos()); assertEquals(22, matches3.get(0).getToPos()); + + assertEquals(0, lt.check("Willkommen… im Berlin.").size()); + assertEquals(0, lt.check("Die neue Kollektion von NAU! ist jetzt online.").size()); } }