Skip to content

Commit 8e726f2

Browse files
authored
Add support for Grapheme_Cluster_Break and Sentence_Break (#96)
1 parent e13ec20 commit 8e726f2

File tree

6 files changed

+4635
-42
lines changed

6 files changed

+4635
-42
lines changed

data/17.0.0-grapheme-cluster-break.txt

Lines changed: 1513 additions & 0 deletions
Large diffs are not rendered by default.

data/17.0.0-sentence-break.txt

Lines changed: 3019 additions & 0 deletions
Large diffs are not rendered by default.

data/resources.js

Lines changed: 84 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,12 @@ const resources = [
176176
'special-casing': 'https://unicode.org/Public/4.1.0/ucd/SpecialCasing.txt',
177177
'bidi-mirroring': 'https://unicode.org/Public/4.1.0/ucd/BidiMirroring.txt',
178178
'line-break': 'https://unicode.org/Public/4.1.0/ucd/LineBreak.txt',
179+
'grapheme-cluster-break':
180+
'https://unicode.org/Public/4.1.0/ucd/auxiliary/GraphemeBreakProperty.txt',
179181
'word-break':
180182
'https://unicode.org/Public/4.1.0/ucd/auxiliary/WordBreakProperty.txt',
183+
'sentence-break':
184+
'https://unicode.org/Public/4.1.0/ucd/auxiliary/SentenceBreakProperty.txt',
181185
},
182186
{
183187
version: '5.0.0',
@@ -199,8 +203,12 @@ const resources = [
199203
'special-casing': 'https://unicode.org/Public/5.0.0/ucd/SpecialCasing.txt',
200204
'bidi-mirroring': 'https://unicode.org/Public/5.0.0/ucd/BidiMirroring.txt',
201205
'line-break': 'https://unicode.org/Public/5.0.0/ucd/LineBreak.txt',
206+
'grapheme-cluster-break':
207+
'https://unicode.org/Public/5.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
202208
'word-break':
203209
'https://unicode.org/Public/5.0.0/ucd/auxiliary/WordBreakProperty.txt',
210+
'sentence-break':
211+
'https://unicode.org/Public/5.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
204212
},
205213
{
206214
version: '5.1.0',
@@ -222,8 +230,12 @@ const resources = [
222230
'special-casing': 'https://unicode.org/Public/5.1.0/ucd/SpecialCasing.txt',
223231
'bidi-mirroring': 'https://unicode.org/Public/5.1.0/ucd/BidiMirroring.txt',
224232
'line-break': 'https://unicode.org/Public/5.1.0/ucd/LineBreak.txt',
233+
'grapheme-cluster-break':
234+
'https://unicode.org/Public/5.1.0/ucd/auxiliary/GraphemeBreakProperty.txt',
225235
'word-break':
226236
'https://unicode.org/Public/5.1.0/ucd/auxiliary/WordBreakProperty.txt',
237+
'sentence-break':
238+
'https://unicode.org/Public/5.1.0/ucd/auxiliary/SentenceBreakProperty.txt',
227239
},
228240
{
229241
version: '5.2.0',
@@ -245,8 +257,12 @@ const resources = [
245257
'special-casing': 'https://unicode.org/Public/5.2.0/ucd/SpecialCasing.txt',
246258
'bidi-mirroring': 'https://unicode.org/Public/5.2.0/ucd/BidiMirroring.txt',
247259
'line-break': 'https://unicode.org/Public/5.2.0/ucd/LineBreak.txt',
260+
'grapheme-cluster-break':
261+
'https://unicode.org/Public/5.2.0/ucd/auxiliary/GraphemeBreakProperty.txt',
248262
'word-break':
249263
'https://unicode.org/Public/5.2.0/ucd/auxiliary/WordBreakProperty.txt',
264+
'sentence-break':
265+
'https://unicode.org/Public/5.2.0/ucd/auxiliary/SentenceBreakProperty.txt',
250266
},
251267
{
252268
version: '6.0.0',
@@ -270,8 +286,12 @@ const resources = [
270286
'special-casing': 'https://unicode.org/Public/6.0.0/ucd/SpecialCasing.txt',
271287
'bidi-mirroring': 'https://unicode.org/Public/6.0.0/ucd/BidiMirroring.txt',
272288
'line-break': 'https://unicode.org/Public/6.0.0/ucd/LineBreak.txt',
289+
'grapheme-cluster-break':
290+
'https://unicode.org/Public/6.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
273291
'word-break':
274292
'https://unicode.org/Public/6.0.0/ucd/auxiliary/WordBreakProperty.txt',
293+
'sentence-break':
294+
'https://unicode.org/Public/6.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
275295
},
276296
{
277297
version: '6.1.0',
@@ -296,8 +316,12 @@ const resources = [
296316
'special-casing': 'https://unicode.org/Public/6.1.0/ucd/SpecialCasing.txt',
297317
'bidi-mirroring': 'https://unicode.org/Public/6.1.0/ucd/BidiMirroring.txt',
298318
'line-break': 'https://unicode.org/Public/6.1.0/ucd/LineBreak.txt',
319+
'grapheme-cluster-break':
320+
'https://unicode.org/Public/6.1.0/ucd/auxiliary/GraphemeBreakProperty.txt',
299321
'word-break':
300322
'https://unicode.org/Public/6.1.0/ucd/auxiliary/WordBreakProperty.txt',
323+
'sentence-break':
324+
'https://unicode.org/Public/6.1.0/ucd/auxiliary/SentenceBreakProperty.txt',
301325
},
302326
{
303327
version: '6.2.0',
@@ -322,8 +346,12 @@ const resources = [
322346
'special-casing': 'https://unicode.org/Public/6.2.0/ucd/SpecialCasing.txt',
323347
'bidi-mirroring': 'https://unicode.org/Public/6.2.0/ucd/BidiMirroring.txt',
324348
'line-break': 'https://unicode.org/Public/6.2.0/ucd/LineBreak.txt',
349+
'grapheme-cluster-break':
350+
'https://unicode.org/Public/6.2.0/ucd/auxiliary/GraphemeBreakProperty.txt',
325351
'word-break':
326352
'https://unicode.org/Public/6.2.0/ucd/auxiliary/WordBreakProperty.txt',
353+
'sentence-break':
354+
'https://unicode.org/Public/6.2.0/ucd/auxiliary/SentenceBreakProperty.txt',
327355
},
328356
{
329357
version: '6.3.0',
@@ -349,8 +377,12 @@ const resources = [
349377
'bidi-mirroring': 'https://unicode.org/Public/6.3.0/ucd/BidiMirroring.txt',
350378
'bidi-brackets': 'https://unicode.org/Public/6.3.0/ucd/BidiBrackets.txt',
351379
'line-break': 'https://unicode.org/Public/6.3.0/ucd/LineBreak.txt',
380+
'grapheme-cluster-break':
381+
'https://unicode.org/Public/6.3.0/ucd/auxiliary/GraphemeBreakProperty.txt',
352382
'word-break':
353383
'https://unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt',
384+
'sentence-break':
385+
'https://unicode.org/Public/6.3.0/ucd/auxiliary/SentenceBreakProperty.txt',
354386
},
355387
{
356388
version: '7.0.0',
@@ -376,8 +408,12 @@ const resources = [
376408
'bidi-mirroring': 'https://unicode.org/Public/7.0.0/ucd/BidiMirroring.txt',
377409
'bidi-brackets': 'https://unicode.org/Public/7.0.0/ucd/BidiBrackets.txt',
378410
'line-break': 'https://unicode.org/Public/7.0.0/ucd/LineBreak.txt',
411+
'grapheme-cluster-break':
412+
'https://unicode.org/Public/7.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
379413
'word-break':
380414
'https://unicode.org/Public/7.0.0/ucd/auxiliary/WordBreakProperty.txt',
415+
'sentence-break':
416+
'https://unicode.org/Public/7.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
381417
},
382418
{
383419
version: '8.0.0',
@@ -403,8 +439,12 @@ const resources = [
403439
'bidi-mirroring': 'https://unicode.org/Public/8.0.0/ucd/BidiMirroring.txt',
404440
'bidi-brackets': 'https://unicode.org/Public/8.0.0/ucd/BidiBrackets.txt',
405441
'line-break': 'https://unicode.org/Public/8.0.0/ucd/LineBreak.txt',
442+
'grapheme-cluster-break':
443+
'https://unicode.org/Public/8.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
406444
'word-break':
407445
'https://unicode.org/Public/8.0.0/ucd/auxiliary/WordBreakProperty.txt',
446+
'sentence-break':
447+
'https://unicode.org/Public/8.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
408448
},
409449
{
410450
version: '9.0.0',
@@ -430,8 +470,12 @@ const resources = [
430470
'bidi-mirroring': 'https://unicode.org/Public/9.0.0/ucd/BidiMirroring.txt',
431471
'bidi-brackets': 'https://unicode.org/Public/9.0.0/ucd/BidiBrackets.txt',
432472
'line-break': 'https://unicode.org/Public/9.0.0/ucd/LineBreak.txt',
473+
'grapheme-cluster-break':
474+
'https://unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
433475
'word-break':
434476
'https://unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakProperty.txt',
477+
'sentence-break':
478+
'https://unicode.org/Public/9.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
435479
},
436480
{
437481
version: '10.0.0',
@@ -457,8 +501,12 @@ const resources = [
457501
'bidi-mirroring': 'https://unicode.org/Public/10.0.0/ucd/BidiMirroring.txt',
458502
'bidi-brackets': 'https://unicode.org/Public/10.0.0/ucd/BidiBrackets.txt',
459503
'line-break': 'https://unicode.org/Public/10.0.0/ucd/LineBreak.txt',
504+
'grapheme-cluster-break':
505+
'https://unicode.org/Public/10.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
460506
'word-break':
461507
'https://unicode.org/Public/10.0.0/ucd/auxiliary/WordBreakProperty.txt',
508+
'sentence-break':
509+
'https://unicode.org/Public/10.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
462510
},
463511
{
464512
version: '11.0.0',
@@ -484,8 +532,12 @@ const resources = [
484532
'bidi-mirroring': 'https://unicode.org/Public/11.0.0/ucd/BidiMirroring.txt',
485533
'bidi-brackets': 'https://unicode.org/Public/11.0.0/ucd/BidiBrackets.txt',
486534
'line-break': 'https://unicode.org/Public/11.0.0/ucd/LineBreak.txt',
535+
'grapheme-cluster-break':
536+
'https://unicode.org/Public/11.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
487537
'word-break':
488538
'https://unicode.org/Public/11.0.0/ucd/auxiliary/WordBreakProperty.txt',
539+
'sentence-break':
540+
'https://unicode.org/Public/11.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
489541
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
490542
emoji: 'https://unicode.org/Public/emoji/11.0/emoji-data.txt',
491543
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence
@@ -521,8 +573,12 @@ const resources = [
521573
'bidi-mirroring': 'https://unicode.org/Public/12.0.0/ucd/BidiMirroring.txt',
522574
'bidi-brackets': 'https://unicode.org/Public/12.0.0/ucd/BidiBrackets.txt',
523575
'line-break': 'https://unicode.org/Public/12.0.0/ucd/LineBreak.txt',
576+
'grapheme-cluster-break':
577+
'https://unicode.org/Public/12.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
524578
'word-break':
525579
'https://unicode.org/Public/12.0.0/ucd/auxiliary/WordBreakProperty.txt',
580+
'sentence-break':
581+
'https://unicode.org/Public/12.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
526582
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
527583
emoji: 'https://unicode.org/Public/emoji/12.0/emoji-data.txt',
528584
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence
@@ -558,8 +614,12 @@ const resources = [
558614
'bidi-mirroring': 'https://unicode.org/Public/12.1.0/ucd/BidiMirroring.txt',
559615
'bidi-brackets': 'https://unicode.org/Public/12.1.0/ucd/BidiBrackets.txt',
560616
'line-break': 'https://unicode.org/Public/12.1.0/ucd/LineBreak.txt',
617+
'grapheme-cluster-break':
618+
'https://unicode.org/Public/12.1.0/ucd/auxiliary/GraphemeBreakProperty.txt',
561619
'word-break':
562620
'https://unicode.org/Public/12.1.0/ucd/auxiliary/WordBreakProperty.txt',
621+
'sentence-break':
622+
'https://unicode.org/Public/12.1.0/ucd/auxiliary/SentenceBreakProperty.txt',
563623
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
564624
emoji: 'https://unicode.org/Public/emoji/12.1/emoji-data.txt',
565625
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence
@@ -595,8 +655,12 @@ const resources = [
595655
'bidi-mirroring': 'https://unicode.org/Public/13.0.0/ucd/BidiMirroring.txt',
596656
'bidi-brackets': 'https://unicode.org/Public/13.0.0/ucd/BidiBrackets.txt',
597657
'line-break': 'https://unicode.org/Public/13.0.0/ucd/LineBreak.txt',
658+
'grapheme-cluster-break':
659+
'https://unicode.org/Public/13.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
598660
'word-break':
599661
'https://unicode.org/Public/13.0.0/ucd/auxiliary/WordBreakProperty.txt',
662+
'sentence-break':
663+
'https://unicode.org/Public/13.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
600664
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
601665
emoji: 'https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt',
602666
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence
@@ -632,8 +696,12 @@ const resources = [
632696
'bidi-mirroring': 'https://unicode.org/Public/14.0.0/ucd/BidiMirroring.txt',
633697
'bidi-brackets': 'https://unicode.org/Public/14.0.0/ucd/BidiBrackets.txt',
634698
'line-break': 'https://unicode.org/Public/14.0.0/ucd/LineBreak.txt',
699+
'grapheme-cluster-break':
700+
'https://unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
635701
'word-break':
636702
'https://unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt',
703+
'sentence-break':
704+
'https://unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
637705
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
638706
emoji: 'https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt',
639707
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence
@@ -669,8 +737,12 @@ const resources = [
669737
'bidi-mirroring': 'https://unicode.org/Public/15.0.0/ucd/BidiMirroring.txt',
670738
'bidi-brackets': 'https://unicode.org/Public/15.0.0/ucd/BidiBrackets.txt',
671739
'line-break': 'https://unicode.org/Public/15.0.0/ucd/LineBreak.txt',
740+
'grapheme-cluster-break':
741+
'https://unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
672742
'word-break':
673743
'https://unicode.org/Public/15.0.0/ucd/auxiliary/WordBreakProperty.txt',
744+
'sentence-break':
745+
'https://unicode.org/Public/15.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
674746
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
675747
emoji: 'https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt',
676748
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence
@@ -706,8 +778,12 @@ const resources = [
706778
'bidi-mirroring': 'https://unicode.org/Public/15.1.0/ucd/BidiMirroring.txt',
707779
'bidi-brackets': 'https://unicode.org/Public/15.1.0/ucd/BidiBrackets.txt',
708780
'line-break': 'https://unicode.org/Public/15.1.0/ucd/LineBreak.txt',
781+
'grapheme-cluster-break':
782+
'https://unicode.org/Public/15.1.0/ucd/auxiliary/GraphemeBreakProperty.txt',
709783
'word-break':
710784
'https://unicode.org/Public/15.1.0/ucd/auxiliary/WordBreakProperty.txt',
785+
'sentence-break':
786+
'https://unicode.org/Public/15.1.0/ucd/auxiliary/SentenceBreakProperty.txt',
711787
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
712788
emoji: 'https://unicode.org/Public/15.1.0/ucd/emoji/emoji-data.txt',
713789
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence
@@ -743,8 +819,12 @@ const resources = [
743819
'bidi-mirroring': 'https://unicode.org/Public/16.0.0/ucd/BidiMirroring.txt',
744820
'bidi-brackets': 'https://unicode.org/Public/16.0.0/ucd/BidiBrackets.txt',
745821
'line-break': 'https://unicode.org/Public/16.0.0/ucd/LineBreak.txt',
822+
'grapheme-cluster-break':
823+
'https://unicode.org/Public/16.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
746824
'word-break':
747825
'https://unicode.org/Public/16.0.0/ucd/auxiliary/WordBreakProperty.txt',
826+
'sentence-break':
827+
'https://unicode.org/Public/16.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
748828
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
749829
emoji: 'https://unicode.org/Public/16.0.0/ucd/emoji/emoji-data.txt',
750830
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence
@@ -780,45 +860,12 @@ const resources = [
780860
'bidi-mirroring': 'https://unicode.org/Public/17.0.0/ucd/BidiMirroring.txt',
781861
'bidi-brackets': 'https://unicode.org/Public/17.0.0/ucd/BidiBrackets.txt',
782862
'line-break': 'https://unicode.org/Public/17.0.0/ucd/LineBreak.txt',
863+
'grapheme-cluster-break':
864+
'https://unicode.org/Public/17.0.0/ucd/auxiliary/GraphemeBreakProperty.txt',
783865
'word-break':
784866
'https://unicode.org/Public/17.0.0/ucd/auxiliary/WordBreakProperty.txt',
785-
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
786-
emoji: 'https://unicode.org/Public/17.0.0/ucd/emoji/emoji-data.txt',
787-
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence
788-
'emoji-sequences':
789-
'https://unicode.org/Public/emoji/17.0/emoji-sequences.txt',
790-
// Emoji_ZWJ_Sequence
791-
'emoji-zwj-sequences':
792-
'https://unicode.org/Public/emoji/17.0/emoji-zwj-sequences.txt',
793-
// Emoji_Test (not an official property)
794-
'emoji-test': 'https://unicode.org/Public/emoji/17.0/emoji-test.txt',
795-
},
796-
{
797-
version: '17.0.0',
798-
main: 'https://unicode.org/Public/17.0.0/ucd/UnicodeData.txt',
799-
scripts: 'https://unicode.org/Public/17.0.0/ucd/Scripts.txt',
800-
'script-extensions':
801-
'https://unicode.org/Public/17.0.0/ucd/ScriptExtensions.txt',
802-
blocks: 'https://unicode.org/Public/17.0.0/ucd/Blocks.txt',
803-
properties: 'https://unicode.org/Public/17.0.0/ucd/PropList.txt',
804-
'name-aliases': 'https://unicode.org/Public/17.0.0/ucd/NameAliases.txt',
805-
'derived-binary-properties':
806-
'https://unicode.org/Public/17.0.0/ucd/extracted/DerivedBinaryProperties.txt',
807-
'derived-core-properties':
808-
'https://unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt',
809-
'derived-general-category':
810-
'https://unicode.org/Public/17.0.0/ucd/extracted/DerivedGeneralCategory.txt',
811-
'derived-normalization-properties':
812-
'https://unicode.org/Public/17.0.0/ucd/DerivedNormalizationProps.txt',
813-
'composition-exclusions':
814-
'https://unicode.org/Public/17.0.0/ucd/CompositionExclusions.txt',
815-
'case-folding': 'https://unicode.org/Public/17.0.0/ucd/CaseFolding.txt',
816-
'special-casing': 'https://unicode.org/Public/17.0.0/ucd/SpecialCasing.txt',
817-
'bidi-mirroring': 'https://unicode.org/Public/17.0.0/ucd/BidiMirroring.txt',
818-
'bidi-brackets': 'https://unicode.org/Public/17.0.0/ucd/BidiBrackets.txt',
819-
'line-break': 'https://unicode.org/Public/17.0.0/ucd/LineBreak.txt',
820-
'word-break':
821-
'https://unicode.org/Public/17.0.0/ucd/auxiliary/WordBreakProperty.txt',
867+
'sentence-break':
868+
'https://unicode.org/Public/17.0.0/ucd/auxiliary/SentenceBreakProperty.txt',
822869
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
823870
emoji: 'https://unicode.org/Public/17.0.0/ucd/emoji/emoji-data.txt',
824871
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence

index.js

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ parsers.parseLineBreak = require('./scripts/parse-line-break.js');
1212
parsers.parseScriptExtensions = require('./scripts/parse-script-extensions.js');
1313
parsers.parseSpecialCasing = require('./scripts/parse-special-casing.js');
1414
parsers.parseSimpleCaseMapping = require('./scripts/parse-simple-case-mapping.js');
15-
parsers.parseWordBreak = require('./scripts/parse-word-break.js');
15+
parsers.parseGraphemeWordSentenceBreak = require('./scripts/parse-grapheme-word-sentence-break.js');
1616
parsers.parseEmoji = require('./scripts/parse-emoji.js');
1717
parsers.parseEmojiSequences = require('./scripts/parse-emoji-sequences.js');
1818
parsers.parseNames = require('./scripts/parse-names.js');
@@ -126,12 +126,24 @@ const generateData = function(version) {
126126
'map': parsers.parseLineBreak(version),
127127
'type': 'Line_Break'
128128
}));
129+
console.log('Parsing Unicode v%s `Grapheme_Cluster_Break`…', version);
130+
extend(dirMap, utils.writeFiles({
131+
'version': version,
132+
'map': parsers.parseGraphemeWordSentenceBreak(version, 'grapheme-cluster-break'),
133+
'type': 'Grapheme_Cluster_Break'
134+
}));
129135
console.log('Parsing Unicode v%s `Word_Break`…', version);
130136
extend(dirMap, utils.writeFiles({
131137
'version': version,
132-
'map': parsers.parseWordBreak(version),
138+
'map': parsers.parseGraphemeWordSentenceBreak(version, 'word-break'),
133139
'type': 'Word_Break'
134140
}));
141+
console.log('Parsing Unicode v%s `Sentence_Break`…', version);
142+
extend(dirMap, utils.writeFiles({
143+
'version': version,
144+
'map': parsers.parseGraphemeWordSentenceBreak(version, 'sentence-break'),
145+
'type': 'Sentence_Break'
146+
}));
135147
console.log('Parsing Unicode v%s binary emoji properties…', version);
136148
extend(dirMap, utils.writeFiles({
137149
'version': version,

scripts/download.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ const TYPES = [
4242
'bidi-mirroring',
4343
'bidi-brackets',
4444
'line-break',
45+
'grapheme-cluster-break',
4546
'word-break',
47+
'sentence-break',
4648
'emoji',
4749
'emoji-sequences',
4850
'emoji-test',

scripts/parse-word-break.js renamed to scripts/parse-grapheme-word-sentence-break.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
const utils = require('./utils.js');
44
const regenerate = require('regenerate');
55

6-
const parseWordBreak = function(version) {
7-
const source = utils.readDataFile(version, 'word-break');
6+
const parseGraphemeWordSentenceBreak = function(version, kind) {
7+
const source = utils.readDataFile(version, kind);
88
if (!source) {
99
return;
1010
}
@@ -39,4 +39,4 @@ const parseWordBreak = function(version) {
3939
return map;
4040
};
4141

42-
module.exports = parseWordBreak;
42+
module.exports = parseGraphemeWordSentenceBreak;

0 commit comments

Comments
 (0)