From 4a6d9b179030bd9dc07009526531df2782b36e3c Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Thu, 2 Apr 2026 09:19:16 +0200 Subject: [PATCH 1/6] 1603 vibe fix --- .../advancedqueryparser/AdvancedQuery.g4 | 3 +- .../lapis/model/AdvancedQueryFacadeTest.kt | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 b/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 index 9e2dbe183..41ccf2666 100644 --- a/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 +++ b/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 @@ -84,7 +84,7 @@ value: name | QUOTED_STRING; dateOrNumber: digit+; digit: NUMBER | MINUS | DOT; name: charOrNumber+; -charOrNumber: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z | NUMBER | MINUS | UNDERSCORE | DOT | ASTERISK; +charOrNumber: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z | NUMBER | MINUS | UNDERSCORE | DOT | ASTERISK | UNICODE_LETTER; isNullQuery: isnull_ '(' name ')'; isnull_: I S N U L L ; @@ -122,6 +122,7 @@ UNDERSCORE: '_'; DOT: '.'; ASTERISK: '*'; QUOTED_STRING: '\'' ( '\\' . | ~['\\] )* '\''; // matches all strings with quotes, supports backslash escaping (e.g. \' for a literal single quote, \\ for a literal backslash) +UNICODE_LETTER: [\p{L}\p{M}] ; // matches non-ASCII Unicode letters and combining marks (e.g. umlauts, accented characters) AND: ' ' A N D ' '; // space is important here, otherwise metadataNames with 'AND' in them would be misinterpreted OR: ' ' O R ' '; NOT: N O T ' '; diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt index 6c376e897..a60e2dbe7 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt @@ -341,6 +341,11 @@ class AdvancedQueryFacadeTest { query = "some_metadata.regex='it\\'s'", expected = StringSearch("some_metadata", "it's"), ), + ValidTestCase( + description = "unquoted regex with non-ASCII characters", + query = "some_metadata.regex=Graubünden", + expected = StringSearch("some_metadata", "Graubünden"), + ), ), invalid = listOf( InvalidTestCase( @@ -625,6 +630,41 @@ class AdvancedQueryFacadeTest { query = "some_metadata='Côte d\\'Ivoire'", expected = StringEquals("some_metadata", "Côte d'Ivoire"), ), + ValidTestCase( + description = "string equals with unquoted umlaut (ü)", + query = "some_metadata=Zürich", + expected = StringEquals("some_metadata", "Zürich"), + ), + ValidTestCase( + description = "string equals with unquoted accented character (â)", + query = "some_metadata=Neuchâtel", + expected = StringEquals("some_metadata", "Neuchâtel"), + ), + ValidTestCase( + description = "string equals with unquoted cedilla (ç)", + query = "some_metadata=Français", + expected = StringEquals("some_metadata", "Français"), + ), + ValidTestCase( + description = "string equals with unquoted tilde-n (ñ)", + query = "some_metadata=España", + expected = StringEquals("some_metadata", "España"), + ), + ValidTestCase( + description = "string equals with unquoted Cyrillic characters", + query = "some_metadata=Москва", + expected = StringEquals("some_metadata", "Москва"), + ), + ValidTestCase( + description = "string equals with unquoted Chinese characters", + query = "some_metadata=北京", + expected = StringEquals("some_metadata", "北京"), + ), + ValidTestCase( + description = "string equals with unquoted mixed ASCII and non-ASCII", + query = "some_metadata=Graubünden", + expected = StringEquals("some_metadata", "Graubünden"), + ), ValidTestCase( description = "string equals with escaped backslash in value", query = "some_metadata='back\\\\slash'", From 34c15febb64b136ad62c40c1bed54c0a634fd75c Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Thu, 2 Apr 2026 09:20:21 +0200 Subject: [PATCH 2/6] 1603 apparently we don't need this --- .../lapis/model/advancedqueryparser/AdvancedQuery.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 b/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 index 41ccf2666..3f452061f 100644 --- a/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 +++ b/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 @@ -122,7 +122,7 @@ UNDERSCORE: '_'; DOT: '.'; ASTERISK: '*'; QUOTED_STRING: '\'' ( '\\' . | ~['\\] )* '\''; // matches all strings with quotes, supports backslash escaping (e.g. \' for a literal single quote, \\ for a literal backslash) -UNICODE_LETTER: [\p{L}\p{M}] ; // matches non-ASCII Unicode letters and combining marks (e.g. umlauts, accented characters) +UNICODE_LETTER: [\p{L}] ; // matches non-ASCII Unicode letters and combining marks (e.g. umlauts, accented characters) AND: ' ' A N D ' '; // space is important here, otherwise metadataNames with 'AND' in them would be misinterpreted OR: ' ' O R ' '; NOT: N O T ' '; From 0d47ec86b0f2a9af2ee01786a6765ae2e14a49c6 Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Thu, 2 Apr 2026 09:45:29 +0200 Subject: [PATCH 3/6] 1603 don't abbreviate --- .../lapis/model/advancedqueryparser/AdvancedQuery.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 b/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 index 3f452061f..f2cab6ac3 100644 --- a/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 +++ b/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 @@ -122,7 +122,7 @@ UNDERSCORE: '_'; DOT: '.'; ASTERISK: '*'; QUOTED_STRING: '\'' ( '\\' . | ~['\\] )* '\''; // matches all strings with quotes, supports backslash escaping (e.g. \' for a literal single quote, \\ for a literal backslash) -UNICODE_LETTER: [\p{L}] ; // matches non-ASCII Unicode letters and combining marks (e.g. umlauts, accented characters) +UNICODE_LETTER: [\p{Letter}] ; // matches non-ASCII Unicode letters and combining marks (e.g. umlauts, accented characters) AND: ' ' A N D ' '; // space is important here, otherwise metadataNames with 'AND' in them would be misinterpreted OR: ' ' O R ' '; NOT: N O T ' '; From 0c45111e27b355aacc2568d70290d8697e2787d4 Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Thu, 2 Apr 2026 10:30:27 +0200 Subject: [PATCH 4/6] 1603 more tests --- .../lapis/model/AdvancedQueryFacadeTest.kt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt index a60e2dbe7..4d1322f12 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt @@ -766,6 +766,16 @@ class AdvancedQueryFacadeTest { "floatField=notAFloat", "'notAFloat' is not a valid float", ), + InvalidTestCase( + description = "non-ASCII field name", + query = "divïsion=Bern", + expected = "Metadata field divïsion does not exist", + ), + InvalidTestCase( + description = "non-ASCII field name with regex suffix", + query = "divïsion.regex=Bern", + expected = "Metadata field divïsion does not exist", + ), ), ) @@ -837,6 +847,11 @@ class AdvancedQueryFacadeTest { query = "invalidGene:501Y", expected = "invalidGene is not a known segment or gene", ), + InvalidTestCase( + description = "named mutation with non-ASCII gene/segment name", + query = "Ñ:123A", + expected = "Ñ is not a known segment or gene", + ), InvalidTestCase( description = "'-' in nucleotide 'from' position is invalid", query = "-300A", From 0e1fb6f56021c0c1809ef956b1f6a8afba075219 Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Thu, 2 Apr 2026 10:39:01 +0200 Subject: [PATCH 5/6] 1603 fix unrelated test --- lapis-e2e/test/aminoAcidSequence.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapis-e2e/test/aminoAcidSequence.spec.ts b/lapis-e2e/test/aminoAcidSequence.spec.ts index 0f833fb5f..8320ed147 100644 --- a/lapis-e2e/test/aminoAcidSequence.spec.ts +++ b/lapis-e2e/test/aminoAcidSequence.spec.ts @@ -190,7 +190,7 @@ describe('The /alignedAminoAcidSequence endpoint', () => { const errorResponse = await response.json(); expect(errorResponse.error.detail).to.match( - /Error from SILO: The table does not contain the SequenceColumn 'unknownGene'/ + /Error from SILO: The table does not contain the field 'unknownGene'/ ); }); From 2c9afd538eb4940995c89b1c3c99819c51b0bcc3 Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Thu, 2 Apr 2026 10:51:07 +0200 Subject: [PATCH 6/6] 1603 fix unrelated test --- lapis-e2e/test/aminoAcidSequence.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapis-e2e/test/aminoAcidSequence.spec.ts b/lapis-e2e/test/aminoAcidSequence.spec.ts index 8320ed147..fe2c58a09 100644 --- a/lapis-e2e/test/aminoAcidSequence.spec.ts +++ b/lapis-e2e/test/aminoAcidSequence.spec.ts @@ -190,7 +190,7 @@ describe('The /alignedAminoAcidSequence endpoint', () => { const errorResponse = await response.json(); expect(errorResponse.error.detail).to.match( - /Error from SILO: The table does not contain the field 'unknownGene'/ + /Error from SILO: The table does not contain the field unknownGene/ ); });