# The following Ragel file was autogenerated with unicode2ragel.rb # from: http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty.txt # # It defines ["Prepend", "CR", "LF", "Control", "Extend", "Regional_Indicator", "SpacingMark", "L", "V", "T", "LV", "LVT", "E_Base", "E_Modifier", "ZWJ", "Glue_After_Zwj", "E_Base_GAZ"]. # # To use this, make sure that your alphtype is set to byte, # and that your input is in utf8. %%{ machine GraphemeCluster; Prepend = 0xD8 0x80..0x85 #Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER ... | 0xDB 0x9D #Cf ARABIC END OF AYAH | 0xDC 0x8F #Cf SYRIAC ABBREVIATION MARK | 0xE0 0xA3 0xA2 #Cf ARABIC DISPUTED END OF AYAH | 0xE0 0xB5 0x8E #Lo MALAYALAM LETTER DOT REPH | 0xF0 0x91 0x82 0xBD #Cf KAITHI NUMBER SIGN | 0xF0 0x91 0x87 0x82..0x83 #Lo [2] SHARADA SIGN JIHVAMULIYA..SHARA... ; CR = 0x0D #Cc ; LF = 0x0A #Cc ; Control = 0x00..0x09 #Cc [10] .. | 0x0B..0x0C #Cc [2] .. | 0x0E..0x1F #Cc [18] .. | 0x7F #Cc [33] .. | 0xC2 0x80..0x9F # | 0xC2 0xAD #Cf SOFT HYPHEN | 0xD8 0x9C #Cf ARABIC LETTER MARK | 0xE1 0xA0 0x8E #Cf MONGOLIAN VOWEL SEPARATOR | 0xE2 0x80 0x8B #Cf ZERO WIDTH SPACE | 0xE2 0x80 0x8E..0x8F #Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT ... | 0xE2 0x80 0xA8 #Zl LINE SEPARATOR | 0xE2 0x80 0xA9 #Zp PARAGRAPH SEPARATOR | 0xE2 0x80 0xAA..0xAE #Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-... | 0xE2 0x81 0xA0..0xA4 #Cf [5] WORD JOINER..INVISIBLE PLUS | 0xE2 0x81 0xA5 #Cn | 0xE2 0x81 0xA6..0xAF #Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIG... | 0xED 0xA0 0x80..0xFF #Cs [2048] .... | 0xEF 0xBF 0xB9..0xBB #Cf [3] INTERLINEAR ANNOTATION ANCHOR..INT... | 0xF0 0x9B 0xB2 0xA0..0xA3 #Cf [4] SHORTHAND FORMAT LETTER OVERLAP... | 0xF0 0x9D 0x85 0xB3..0xBA #Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSI... | 0xF3 0xA0 0x80 0x80 #Cn | 0xF3 0xA0 0x80 0x81 #Cf LANGUAGE TAG | 0xF3 0xA0 0x80 0x82..0x9F #Cn [30] .. | 0xF3 0xA0 0x82 0x80..0xFF #Cn [128] .. | 0xF3 0xA0 0x83 0x00..0xBF # | 0xF3 0xA0 0x87 0xB0..0xFF #Cn [3600] ..