@@ -360,6 +360,46 @@ namespace
360
360
}
361
361
};
362
362
363
+ inline uint8_t toCharWidth (codepoint_properties const & properties) noexcept
364
+ {
365
+ switch (properties.general_category )
366
+ {
367
+ case General_Category::Control: // XXX really?
368
+ case General_Category::Enclosing_Mark:
369
+ case General_Category::Format:
370
+ case General_Category::Line_Separator:
371
+ // case General_Category::Modifier_Symbol:
372
+ case General_Category::Nonspacing_Mark:
373
+ case General_Category::Paragraph_Separator:
374
+ case General_Category::Spacing_Mark:
375
+ case General_Category::Surrogate: return 0 ;
376
+ default : break ;
377
+ }
378
+
379
+ if (properties.emoji_presentation ())
380
+ // UAX #11 §5 Recommendations:
381
+ // [UTS51] emoji presentation sequences behave as though they were East Asian Wide,
382
+ // regardless of their assigned East_Asian_Width property value.
383
+ return 2 ;
384
+
385
+ switch (properties.east_asian_width )
386
+ {
387
+ case East_Asian_Width::Narrow:
388
+ case East_Asian_Width::Ambiguous:
389
+ case East_Asian_Width::Halfwidth:
390
+ case East_Asian_Width::Neutral:
391
+ // .
392
+ return 1 ;
393
+ case East_Asian_Width::Wide:
394
+ case East_Asian_Width::Fullwidth:
395
+ // .
396
+ return 2 ;
397
+ }
398
+
399
+ // Should never be reached.
400
+ return 1 ;
401
+ }
402
+
363
403
inline EmojiSegmentationCategory toEmojiSegmentationCategory (char32_t codepoint,
364
404
codepoint_properties const & props) noexcept
365
405
{
@@ -397,6 +437,7 @@ namespace
397
437
398
438
return EmojiSegmentationCategory::Invalid;
399
439
}
440
+
400
441
class codepoint_properties_loader
401
442
{
402
443
public:
@@ -493,7 +534,6 @@ namespace
493
534
properties (codepoint).flags |= i->second ;
494
535
});
495
536
496
-
497
537
process_properties (" extracted/DerivedGeneralCategory.txt" ,
498
538
[&](char32_t codepoint, string_view value) {
499
539
(void ) codepoint;
@@ -552,6 +592,14 @@ namespace
552
592
toEmojiSegmentationCategory (codepoint, properties (codepoint));
553
593
}
554
594
// }}}
595
+
596
+ // {{{ assign char_width
597
+ {
598
+ auto const _ = scoped_timer { _log, " Assigning char_width" };
599
+ for (char32_t codepoint = 0 ; codepoint < 0x110'000 ; ++codepoint)
600
+ properties (codepoint).char_width = toCharWidth (properties (codepoint));
601
+ }
602
+ // }}}
555
603
}
556
604
557
605
codepoint_properties_table codepoint_properties_loader::load_from_directory (
0 commit comments