|
4 | 4 |
|
5 | 5 | //go:generate go run gen.go |
6 | 6 |
|
7 | | -// Package identifier defines the contract between implementations of Encoding |
8 | | -// and Index by defining identifiers that uniquely identify standardized coded |
9 | | -// character sets (CCS) and character encoding schemes (CES), which we will |
10 | | -// together refer to as encodings, for which Encoding implementations provide |
11 | | -// converters to and from UTF-8. This package is typically only of concern to |
12 | | -// implementers of Indexes and Encodings. |
13 | | -// |
14 | | -// One part of the identifier is the MIB code, which is defined by IANA and |
15 | | -// uniquely identifies a CCS or CES. Each code is associated with data that |
16 | | -// references authorities, official documentation as well as aliases and MIME |
17 | | -// names. |
18 | | -// |
19 | | -// Not all CESs are covered by the IANA registry. The "other" string that is |
20 | | -// returned by ID can be used to identify other character sets or versions of |
21 | | -// existing ones. |
22 | | -// |
23 | | -// It is recommended that each package that provides a set of Encodings provide |
24 | | -// the All and Common variables to reference all supported encodings and |
25 | | -// commonly used subset. This allows Index implementations to include all |
26 | | -// available encodings without explicitly referencing or knowing about them. |
27 | 7 | package identifier |
28 | 8 |
|
29 | | -// Note: this package is internal, but could be made public if there is a need |
30 | | -// for writing third-party Indexes and Encodings. |
31 | | - |
32 | | -// References: |
33 | | -// - http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt |
34 | | -// - http://www.iana.org/assignments/character-sets/character-sets.xhtml |
35 | | -// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib |
36 | | -// - http://www.ietf.org/rfc/rfc2978.txt |
37 | | -// - https://www.unicode.org/reports/tr22/ |
38 | | -// - http://www.w3.org/TR/encoding/ |
39 | | -// - https://encoding.spec.whatwg.org/ |
40 | | -// - https://encoding.spec.whatwg.org/encodings.json |
41 | | -// - https://tools.ietf.org/html/rfc6657#section-5 |
42 | | - |
43 | | -// Interface can be implemented by Encodings to define the CCS or CES for which |
44 | | -// it implements conversions. |
45 | | -type Interface interface { |
46 | | - // ID returns an encoding identifier. Exactly one of the mib and other |
47 | | - // values should be non-zero. |
48 | | - // |
49 | | - // In the usual case it is only necessary to indicate the MIB code. The |
50 | | - // other string can be used to specify encodings for which there is no MIB, |
51 | | - // such as "x-mac-dingbat". |
52 | | - // |
53 | | - // The other string may only contain the characters a-z, A-Z, 0-9, - and _. |
54 | | - ID() (mib MIB, other string) |
55 | | - |
56 | | - // NOTE: the restrictions on the encoding are to allow extending the syntax |
57 | | - // with additional information such as versions, vendors and other variants. |
58 | | -} |
59 | | - |
60 | | -// A MIB identifies an encoding. It is derived from the IANA MIB codes and adds |
61 | | -// some identifiers for some encodings that are not covered by the IANA |
62 | | -// standard. |
63 | | -// |
64 | | -// See http://www.iana.org/assignments/ianacharset-mib. |
65 | | -type MIB uint16 |
66 | | - |
67 | | -// These additional MIB types are not defined in IANA. They are added because |
68 | | -// they are common and defined within the text repo. |
69 | | -const ( |
70 | | - // Unofficial marks the start of encodings not registered by IANA. |
71 | | - Unofficial MIB = 10000 + iota |
| 9 | +import ( |
| 10 | + "golang.org/x/text/encoding/identifier" |
| 11 | +) |
72 | 12 |
|
73 | | - // Replacement is the WhatWG replacement encoding. |
74 | | - Replacement |
| 13 | +var Replacement = identifier.Replacement |
75 | 14 |
|
76 | | - // XUserDefined is the code for x-user-defined. |
77 | | - XUserDefined |
| 15 | +type Interface = identifier.Interface |
| 16 | +type MIB = identifier.MIB |
78 | 17 |
|
79 | | - // MacintoshCyrillic is the code for x-mac-cyrillic. |
80 | | - MacintoshCyrillic |
81 | | -) |
| 18 | +var Unofficial = identifier.Unofficial |
| 19 | +var MacintoshCyrillic = identifier.MacintoshCyrillic |
| 20 | +var XUserDefined = identifier.XUserDefined |
0 commit comments