diff --git a/lapis-e2e/test/aminoAcidSequence.spec.ts b/lapis-e2e/test/aminoAcidSequence.spec.ts index 0f833fb5f..fe2c58a09 100644 --- a/lapis-e2e/test/aminoAcidSequence.spec.ts +++ b/lapis-e2e/test/aminoAcidSequence.spec.ts @@ -190,7 +190,7 @@ describe('The /alignedAminoAcidSequence endpoint', () => { const errorResponse = await response.json(); expect(errorResponse.error.detail).to.match( - /Error from SILO: The table does not contain the SequenceColumn 'unknownGene'/ + /Error from SILO: The table does not contain the field unknownGene/ ); }); diff --git a/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 b/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 index 9e2dbe183..f2cab6ac3 100644 --- a/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 +++ b/lapis/src/main/antlr/org/genspectrum/lapis/model/advancedqueryparser/AdvancedQuery.g4 @@ -84,7 +84,7 @@ value: name | QUOTED_STRING; dateOrNumber: digit+; digit: NUMBER | MINUS | DOT; name: charOrNumber+; -charOrNumber: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z | NUMBER | MINUS | UNDERSCORE | DOT | ASTERISK; +charOrNumber: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z | NUMBER | MINUS | UNDERSCORE | DOT | ASTERISK | UNICODE_LETTER; isNullQuery: isnull_ '(' name ')'; isnull_: I S N U L L ; @@ -122,6 +122,7 @@ UNDERSCORE: '_'; DOT: '.'; ASTERISK: '*'; QUOTED_STRING: '\'' ( '\\' . | ~['\\] )* '\''; // matches all strings with quotes, supports backslash escaping (e.g. \' for a literal single quote, \\ for a literal backslash) +UNICODE_LETTER: [\p{Letter}] ; // matches non-ASCII Unicode letters and combining marks (e.g. umlauts, accented characters) AND: ' ' A N D ' '; // space is important here, otherwise metadataNames with 'AND' in them would be misinterpreted OR: ' ' O R ' '; NOT: N O T ' '; diff --git a/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt b/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt index 6c376e897..4d1322f12 100644 --- a/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt +++ b/lapis/src/test/kotlin/org/genspectrum/lapis/model/AdvancedQueryFacadeTest.kt @@ -341,6 +341,11 @@ class AdvancedQueryFacadeTest { query = "some_metadata.regex='it\\'s'", expected = StringSearch("some_metadata", "it's"), ), + ValidTestCase( + description = "unquoted regex with non-ASCII characters", + query = "some_metadata.regex=Graubünden", + expected = StringSearch("some_metadata", "Graubünden"), + ), ), invalid = listOf( InvalidTestCase( @@ -625,6 +630,41 @@ class AdvancedQueryFacadeTest { query = "some_metadata='Côte d\\'Ivoire'", expected = StringEquals("some_metadata", "Côte d'Ivoire"), ), + ValidTestCase( + description = "string equals with unquoted umlaut (ü)", + query = "some_metadata=Zürich", + expected = StringEquals("some_metadata", "Zürich"), + ), + ValidTestCase( + description = "string equals with unquoted accented character (â)", + query = "some_metadata=Neuchâtel", + expected = StringEquals("some_metadata", "Neuchâtel"), + ), + ValidTestCase( + description = "string equals with unquoted cedilla (ç)", + query = "some_metadata=Français", + expected = StringEquals("some_metadata", "Français"), + ), + ValidTestCase( + description = "string equals with unquoted tilde-n (ñ)", + query = "some_metadata=España", + expected = StringEquals("some_metadata", "España"), + ), + ValidTestCase( + description = "string equals with unquoted Cyrillic characters", + query = "some_metadata=Москва", + expected = StringEquals("some_metadata", "Москва"), + ), + ValidTestCase( + description = "string equals with unquoted Chinese characters", + query = "some_metadata=北京", + expected = StringEquals("some_metadata", "北京"), + ), + ValidTestCase( + description = "string equals with unquoted mixed ASCII and non-ASCII", + query = "some_metadata=Graubünden", + expected = StringEquals("some_metadata", "Graubünden"), + ), ValidTestCase( description = "string equals with escaped backslash in value", query = "some_metadata='back\\\\slash'", @@ -726,6 +766,16 @@ class AdvancedQueryFacadeTest { "floatField=notAFloat", "'notAFloat' is not a valid float", ), + InvalidTestCase( + description = "non-ASCII field name", + query = "divïsion=Bern", + expected = "Metadata field divïsion does not exist", + ), + InvalidTestCase( + description = "non-ASCII field name with regex suffix", + query = "divïsion.regex=Bern", + expected = "Metadata field divïsion does not exist", + ), ), ) @@ -797,6 +847,11 @@ class AdvancedQueryFacadeTest { query = "invalidGene:501Y", expected = "invalidGene is not a known segment or gene", ), + InvalidTestCase( + description = "named mutation with non-ASCII gene/segment name", + query = "Ñ:123A", + expected = "Ñ is not a known segment or gene", + ), InvalidTestCase( description = "'-' in nucleotide 'from' position is invalid", query = "-300A",