@@ -108,6 +108,7 @@ def emit_rust_file(path: str, generator: Callable[[IO[str]], None]):
108108 f .write (FILE_HEADER )
109109 generator (f )
110110
111+
111112Codepoint = int
112113BitPos = int
113114
@@ -1311,281 +1312,6 @@ def lookup_fns(
13111312 None
13121313 }}
13131314}}
1314-
1315- /// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c`.
1316- /// Ambiguous width characters are treated as { ambig } .
1317- { cfg } #[inline]
1318- pub(crate) fn width_in_str{ cjk_lo } (c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) {{
1319- if next_info.is_emoji_presentation() {{
1320- if starts_emoji_presentation_seq(c) {{
1321- let width = if next_info.is_zwj_emoji_presentation() {{
1322- 0
1323- }} else {{
1324- 2
1325- }};
1326- return (width, WidthInfo::EMOJI_PRESENTATION);
1327- }} else {{
1328- next_info = next_info.unset_emoji_presentation();
1329- }}
1330- }}"""
1331-
1332- if is_cjk :
1333- s += """
1334- if (matches!(
1335- next_info,
1336- WidthInfo::COMBINING_LONG_SOLIDUS_OVERLAY | WidthInfo::SOLIDUS_OVERLAY_ALEF
1337- ) && matches!(c, '<' | '=' | '>'))
1338- {
1339- return (2, WidthInfo::DEFAULT);
1340- }"""
1341-
1342- s += """
1343- if c <= '\\ u{A0}' {
1344- match c {
1345- '\\ n' => (1, WidthInfo::LINE_FEED),
1346- '\\ r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),
1347- _ => (1, WidthInfo::DEFAULT),
1348- }
1349- } else {
1350- // Fast path
1351- if next_info != WidthInfo::DEFAULT {
1352- if c == '\\ u{FE0F}' {
1353- return (0, next_info.set_emoji_presentation());
1354- }"""
1355-
1356- if is_cjk :
1357- s += """
1358- if matches!(c, '\\ u{FE00}' | '\\ u{FE02}') {
1359- return (0, next_info.set_vs1_2_3());
1360- }
1361- """
1362- else :
1363- s += """
1364- if c == '\\ u{FE01}' {
1365- return (0, next_info.set_vs1_2_3());
1366- }
1367- if c == '\\ u{FE0E}' {
1368- return (0, next_info.set_text_presentation());
1369- }
1370- if next_info.is_text_presentation() {
1371- if starts_non_ideographic_text_presentation_seq(c) {
1372- return (1, WidthInfo::DEFAULT);
1373- } else {
1374- next_info = next_info.unset_text_presentation();
1375- }
1376- } else """
1377-
1378- s += """if next_info.is_vs1_2_3() {
1379- if matches!(c, '\\ u{2018}' | '\\ u{2019}' | '\\ u{201C}' | '\\ u{201D}') {
1380- return ("""
1381-
1382- s += str (2 - is_cjk )
1383-
1384- s += """, WidthInfo::DEFAULT);
1385- } else {
1386- next_info = next_info.unset_vs1_2_3();
1387- }
1388- }
1389- if next_info.is_ligature_transparent() {
1390- if c == '\\ u{200D}' {
1391- return (0, next_info.set_zwj_bit());
1392- } else if is_ligature_transparent(c) {
1393- return (0, next_info);
1394- }
1395- }
1396-
1397- match (next_info, c) {"""
1398- if is_cjk :
1399- s += """
1400- (WidthInfo::COMBINING_LONG_SOLIDUS_OVERLAY, _) if is_solidus_transparent(c) => {
1401- return (
1402- lookup_width_cjk(c).0 as i8,
1403- WidthInfo::COMBINING_LONG_SOLIDUS_OVERLAY,
1404- );
1405- }
1406- (WidthInfo::JOINING_GROUP_ALEF, '\\ u{0338}') => {
1407- return (0, WidthInfo::SOLIDUS_OVERLAY_ALEF);
1408- }
1409- // Arabic Lam-Alef ligature
1410- (WidthInfo::JOINING_GROUP_ALEF | WidthInfo::SOLIDUS_OVERLAY_ALEF, _)
1411- if is_joining_group_lam(c) =>
1412- {
1413- return (0, WidthInfo::DEFAULT)
1414- }
1415- (WidthInfo::JOINING_GROUP_ALEF, _) if is_transparent_zero_width(c) => {
1416- return (0, WidthInfo::JOINING_GROUP_ALEF);
1417- }
1418- """
1419- else :
1420- s += """
1421- // Arabic Lam-Alef ligature
1422- (WidthInfo::JOINING_GROUP_ALEF, _) if is_joining_group_lam(c) => {
1423- return (0, WidthInfo::DEFAULT)
1424- }
1425- (WidthInfo::JOINING_GROUP_ALEF, _) if is_transparent_zero_width(c) => {
1426- return (0, WidthInfo::JOINING_GROUP_ALEF);
1427- }
1428- """
1429-
1430- s += """
1431- // Hebrew Alef-ZWJ-Lamed ligature
1432- (WidthInfo::ZWJ_HEBREW_LETTER_LAMED, '\\ u{05D0}') => {
1433- return (0, WidthInfo::DEFAULT);
1434- }
1435-
1436- // Khmer coeng signs
1437- (WidthInfo::KHMER_COENG_ELIGIBLE_LETTER, '\\ u{17D2}') => {
1438- return (-1, WidthInfo::DEFAULT);
1439- }
1440-
1441- // Buginese <a, -i> ZWJ ya ligature
1442- (WidthInfo::ZWJ_BUGINESE_LETTER_YA, '\\ u{1A17}') => {
1443- return (0, WidthInfo::BUGINESE_VOWEL_SIGN_I_ZWJ_LETTER_YA)
1444- }
1445- (WidthInfo::BUGINESE_VOWEL_SIGN_I_ZWJ_LETTER_YA, '\\ u{1A15}') => {
1446- return (0, WidthInfo::DEFAULT)
1447- }
1448-
1449- // Tifinagh bi-consonants
1450- (WidthInfo::TIFINAGH_CONSONANT | WidthInfo::ZWJ_TIFINAGH_CONSONANT, '\\ u{2D7F}') => {
1451- return (1, WidthInfo::TIFINAGH_JOINER_CONSONANT);
1452- }
1453- (WidthInfo::ZWJ_TIFINAGH_CONSONANT, '\\ u{2D31}'..='\\ u{2D65}' | '\\ u{2D6F}') => {
1454- return (0, WidthInfo::DEFAULT);
1455- }
1456- (WidthInfo::TIFINAGH_JOINER_CONSONANT, '\\ u{2D31}'..='\\ u{2D65}' | '\\ u{2D6F}') => {
1457- return (-1, WidthInfo::DEFAULT);
1458- }
1459-
1460- // Lisu tone letter combinations
1461- (WidthInfo::LISU_TONE_LETTER_MYA_NA_JEU, '\\ u{A4F8}'..='\\ u{A4FB}') => {
1462- return (0, WidthInfo::DEFAULT);
1463- }
1464-
1465- // Old Turkic ligature
1466- (WidthInfo::ZWJ_OLD_TURKIC_LETTER_ORKHON_I, '\\ u{10C32}') => {
1467- return (0, WidthInfo::DEFAULT);
1468- }"""
1469-
1470- s += f"""
1471- // Emoji modifier
1472- (WidthInfo::EMOJI_MODIFIER, _) if is_emoji_modifier_base(c) => {{
1473- return (0, WidthInfo::EMOJI_PRESENTATION);
1474- }}
1475-
1476- // Regional indicator
1477- (
1478- WidthInfo::REGIONAL_INDICATOR | WidthInfo::SEVERAL_REGIONAL_INDICATOR,
1479- '\\ u{{1F1E6}}'..='\\ u{{1F1FF}}',
1480- ) => return (1, WidthInfo::SEVERAL_REGIONAL_INDICATOR),
1481-
1482- // ZWJ emoji
1483- (
1484- WidthInfo::EMOJI_PRESENTATION
1485- | WidthInfo::SEVERAL_REGIONAL_INDICATOR
1486- | WidthInfo::EVEN_REGIONAL_INDICATOR_ZWJ_PRESENTATION
1487- | WidthInfo::ODD_REGIONAL_INDICATOR_ZWJ_PRESENTATION
1488- | WidthInfo::EMOJI_MODIFIER,
1489- '\\ u{{200D}}',
1490- ) => return (0, WidthInfo::ZWJ_EMOJI_PRESENTATION),
1491- (WidthInfo::ZWJ_EMOJI_PRESENTATION, '\\ u{{20E3}}') => {{
1492- return (0, WidthInfo::KEYCAP_ZWJ_EMOJI_PRESENTATION);
1493- }}
1494- (WidthInfo::VS16_ZWJ_EMOJI_PRESENTATION, _) if starts_emoji_presentation_seq(c) => {{
1495- return (0, WidthInfo::EMOJI_PRESENTATION)
1496- }}
1497- (WidthInfo::VS16_KEYCAP_ZWJ_EMOJI_PRESENTATION, '0'..='9' | '#' | '*') => {{
1498- return (0, WidthInfo::EMOJI_PRESENTATION)
1499- }}
1500- (WidthInfo::ZWJ_EMOJI_PRESENTATION, '\\ u{{1F1E6}}'..='\\ u{{1F1FF}}') => {{
1501- return (1, WidthInfo::REGIONAL_INDICATOR_ZWJ_PRESENTATION);
1502- }}
1503- (
1504- WidthInfo::REGIONAL_INDICATOR_ZWJ_PRESENTATION
1505- | WidthInfo::ODD_REGIONAL_INDICATOR_ZWJ_PRESENTATION,
1506- '\\ u{{1F1E6}}'..='\\ u{{1F1FF}}',
1507- ) => return (-1, WidthInfo::EVEN_REGIONAL_INDICATOR_ZWJ_PRESENTATION),
1508- (
1509- WidthInfo::EVEN_REGIONAL_INDICATOR_ZWJ_PRESENTATION,
1510- '\\ u{{1F1E6}}'..='\\ u{{1F1FF}}',
1511- ) => return (3, WidthInfo::ODD_REGIONAL_INDICATOR_ZWJ_PRESENTATION),
1512- (WidthInfo::ZWJ_EMOJI_PRESENTATION, '\\ u{{1F3FB}}'..='\\ u{{1F3FF}}') => {{
1513- return (0, WidthInfo::EMOJI_MODIFIER);
1514- }}
1515- (WidthInfo::ZWJ_EMOJI_PRESENTATION, '\\ u{{E007F}}') => {{
1516- return (0, WidthInfo::TAG_END_ZWJ_EMOJI_PRESENTATION);
1517- }}
1518- (WidthInfo::TAG_END_ZWJ_EMOJI_PRESENTATION, '\\ u{{E0061}}'..='\\ u{{E007A}}') => {{
1519- return (0, WidthInfo::TAG_A1_END_ZWJ_EMOJI_PRESENTATION);
1520- }}
1521- (WidthInfo::TAG_A1_END_ZWJ_EMOJI_PRESENTATION, '\\ u{{E0061}}'..='\\ u{{E007A}}') => {{
1522- return (0, WidthInfo::TAG_A2_END_ZWJ_EMOJI_PRESENTATION)
1523- }}
1524- (WidthInfo::TAG_A2_END_ZWJ_EMOJI_PRESENTATION, '\\ u{{E0061}}'..='\\ u{{E007A}}') => {{
1525- return (0, WidthInfo::TAG_A3_END_ZWJ_EMOJI_PRESENTATION)
1526- }}
1527- (WidthInfo::TAG_A3_END_ZWJ_EMOJI_PRESENTATION, '\\ u{{E0061}}'..='\\ u{{E007A}}') => {{
1528- return (0, WidthInfo::TAG_A4_END_ZWJ_EMOJI_PRESENTATION)
1529- }}
1530- (WidthInfo::TAG_A4_END_ZWJ_EMOJI_PRESENTATION, '\\ u{{E0061}}'..='\\ u{{E007A}}') => {{
1531- return (0, WidthInfo::TAG_A5_END_ZWJ_EMOJI_PRESENTATION)
1532- }}
1533- (WidthInfo::TAG_A5_END_ZWJ_EMOJI_PRESENTATION, '\\ u{{E0061}}'..='\\ u{{E007A}}') => {{
1534- return (0, WidthInfo::TAG_A6_END_ZWJ_EMOJI_PRESENTATION)
1535- }}
1536- (
1537- WidthInfo::TAG_END_ZWJ_EMOJI_PRESENTATION
1538- | WidthInfo::TAG_A1_END_ZWJ_EMOJI_PRESENTATION
1539- | WidthInfo::TAG_A2_END_ZWJ_EMOJI_PRESENTATION
1540- | WidthInfo::TAG_A3_END_ZWJ_EMOJI_PRESENTATION
1541- | WidthInfo::TAG_A4_END_ZWJ_EMOJI_PRESENTATION,
1542- '\\ u{{E0030}}'..='\\ u{{E0039}}',
1543- ) => return (0, WidthInfo::TAG_D1_END_ZWJ_EMOJI_PRESENTATION),
1544- (WidthInfo::TAG_D1_END_ZWJ_EMOJI_PRESENTATION, '\\ u{{E0030}}'..='\\ u{{E0039}}') => {{
1545- return (0, WidthInfo::TAG_D2_END_ZWJ_EMOJI_PRESENTATION);
1546- }}
1547- (WidthInfo::TAG_D2_END_ZWJ_EMOJI_PRESENTATION, '\\ u{{E0030}}'..='\\ u{{E0039}}') => {{
1548- return (0, WidthInfo::TAG_D3_END_ZWJ_EMOJI_PRESENTATION);
1549- }}
1550- (
1551- WidthInfo::TAG_A3_END_ZWJ_EMOJI_PRESENTATION
1552- | WidthInfo::TAG_A4_END_ZWJ_EMOJI_PRESENTATION
1553- | WidthInfo::TAG_A5_END_ZWJ_EMOJI_PRESENTATION
1554- | WidthInfo::TAG_A6_END_ZWJ_EMOJI_PRESENTATION
1555- | WidthInfo::TAG_D3_END_ZWJ_EMOJI_PRESENTATION,
1556- '\\ u{{1F3F4}}',
1557- ) => return (0, WidthInfo::EMOJI_PRESENTATION),
1558- (WidthInfo::ZWJ_EMOJI_PRESENTATION, _)
1559- if lookup_width{ cjk_lo } (c).1 == WidthInfo::EMOJI_PRESENTATION =>
1560- {{
1561- return (0, WidthInfo::EMOJI_PRESENTATION)
1562- }}
1563-
1564- (WidthInfo::KIRAT_RAI_VOWEL_SIGN_E, '\\ u{{16D63}}') => {{
1565- return (0, WidthInfo::DEFAULT);
1566- }}
1567- (WidthInfo::KIRAT_RAI_VOWEL_SIGN_E, '\\ u{{16D67}}') => {{
1568- return (0, WidthInfo::KIRAT_RAI_VOWEL_SIGN_AI);
1569- }}
1570- (WidthInfo::KIRAT_RAI_VOWEL_SIGN_E, '\\ u{{16D68}}') => {{
1571- return (1, WidthInfo::KIRAT_RAI_VOWEL_SIGN_E);
1572- }}
1573- (WidthInfo::KIRAT_RAI_VOWEL_SIGN_E, '\\ u{{16D69}}') => {{
1574- return (0, WidthInfo::DEFAULT);
1575- }}
1576- (WidthInfo::KIRAT_RAI_VOWEL_SIGN_AI, '\\ u{{16D63}}') => {{
1577- return (0, WidthInfo::DEFAULT);
1578- }}
1579-
1580- // Fallback
1581- _ => {{}}
1582- }}
1583- }}
1584-
1585- let ret = lookup_width{ cjk_lo } (c);
1586- (ret.0 as i8, ret.1)
1587- }}
1588- }}
15891315"""
15901316
15911317 return s
@@ -1627,7 +1353,6 @@ def emit_props(
16271353
16281354 module .write (
16291355 """/// Whether this character has Joining_Group=Lam.
1630- #[rustfmt::skip]
16311356pub fn is_joining_group_lam(c: char) -> bool {
16321357 matches!(
16331358 c,
@@ -1650,7 +1375,6 @@ def emit_props(
16501375
16511376/// Whether this character is a default-ignorable combining mark
16521377/// or ZWJ. These characters won't interrupt non-Arabic ligatures.
1653- #[rustfmt::skip]
16541378pub fn is_ligature_transparent(c: char) -> bool {
16551379 matches!(
16561380 c,
@@ -1774,7 +1498,6 @@ def emit_lookup(
17741498 joining_group_lam : list [tuple [Codepoint , Codepoint ]],
17751499):
17761500 """Outputs a Rust module to `module` containing generated lookup functions."""
1777- module .write ("use crate::props::*;\n " )
17781501 module .write ("use crate::tables::*;\n " )
17791502 module .write ("use crate::width_info::WidthInfo;\n \n " )
17801503
0 commit comments