package mahonia import ( "sync" ) // Converters for GB18030 encoding. func init() { RegisterCharset(&Charset{ Name: "GB18030", NewDecoder: func() Decoder { gb18030Once.Do(buildGB18030Tables) return decodeGB18030Rune }, NewEncoder: func() Encoder { gb18030Once.Do(buildGB18030Tables) return encodeGB18030Rune }, }) } func decodeGB18030Rune(p []byte) (r rune, size int, status Status) { if len(p) == 0 { status = NO_ROOM return } b := p[0] if b < 128 { return rune(b), 1, SUCCESS } if len(p) < 2 { status = NO_ROOM return } if p[0] < 0x81 || p[0] > 0xfe { return 0xfffd, 1, INVALID_CHAR } if p[1] >= 0x40 { // 2-byte character c := uint16(p[0])<<8 + uint16(p[1]) r = rune(gbkToUnicode[c]) if r == 0 { r = gbkToUnicodeExtra[c] } if r != 0 { return r, 2, SUCCESS } } else if p[1] >= 0x30 { // 4-byte character if len(p) < 4 { return 0, 0, NO_ROOM } if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 { return 0xfffd, 1, INVALID_CHAR } code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3]) lin := gb18030Linear(code) if lin <= maxGB18030Linear { r = rune(gb18030LinearToUnicode[lin]) if r != 0 { return r, 4, SUCCESS } } for _, rng := range gb18030Ranges { if lin >= rng.firstGB && lin <= rng.lastGB { return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS } } } return 0xfffd, 1, INVALID_CHAR } func encodeGB18030Rune(p []byte, r rune) (size int, status Status) { if len(p) == 0 { status = NO_ROOM return } if r < 128 { p[0] = byte(r) return 1, SUCCESS } if len(p) < 2 { status = NO_ROOM return } var c uint16 if r < 0x10000 { c = unicodeToGBK[r] } else { c = unicodeToGBKExtra[r] } if c != 0 { p[0] = byte(c >> 8) p[1] = byte(c) return 2, SUCCESS } if len(p) < 4 { return 0, NO_ROOM } if r < 0x10000 { f := unicodeToGB18030[r] if f != 0 { p[0] = byte(f >> 24) p[1] = byte(f >> 16) p[2] = byte(f >> 8) p[3] = byte(f) return 4, SUCCESS } } for _, rng := range gb18030Ranges { if r >= rng.firstRune && r <= rng.lastRune { lin := rng.firstGB + uint32(r) - uint32(rng.firstRune) p[0] = byte(lin/(10*126*10)) + 0x81 p[1] = byte(lin/(126*10)%10) + 0x30 p[2] = byte(lin/10%126) + 0x81 p[3] = byte(lin%10) + 0x30 return 4, SUCCESS } } p[0] = 0x1a return 1, INVALID_CHAR } var gb18030Once sync.Once // Mapping from gb18039Linear values to Unicode. var gb18030LinearToUnicode []uint16 var unicodeToGB18030 []uint32 func buildGB18030Tables() { gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1) unicodeToGB18030 = make([]uint32, 65536) for _, data := range gb18030Data { gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode unicodeToGB18030[data.unicode] = data.gb18030 } }