Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
ExtraCharsetsProvider |
|
| 4.5;4,5 |
1 | /* | |
2 | * Copyright (c) 2009 - 2011 Michael Schierl | |
3 | * | |
4 | * All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * | |
10 | * - Redistributions of source code must retain the above copyright notice, | |
11 | * this list of conditions and the following disclaimer. | |
12 | * | |
13 | * - Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * | |
17 | * - Neither name of the copyright holders nor the names of its | |
18 | * contributors may be used to endorse or promote products derived from | |
19 | * this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
26 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | |
28 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
29 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR | |
30 | * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |
31 | * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | package net.sf.jmatchparser.util.charset; | |
34 | ||
35 | import java.nio.charset.Charset; | |
36 | import java.nio.charset.spi.CharsetProvider; | |
37 | import java.util.Arrays; | |
38 | import java.util.HashMap; | |
39 | import java.util.HashSet; | |
40 | import java.util.Iterator; | |
41 | import java.util.Map; | |
42 | import java.util.Set; | |
43 | ||
44 | /** | |
45 | * Charset provider that provides over 150 extra character sets. | |
46 | * | |
47 | * <p> | |
48 | * It also provides more aliases for existing charsets, based on the <a | |
49 | * href="http://www.iana.org/assignments/character-sets">IANA Character Set | |
50 | * Database</a>. | |
51 | * | |
52 | * <p> | |
53 | * This class is loaded automatically via SPI when it is in the class path. | |
54 | * | |
55 | * <h2>Character sets added by this provider</h2> | |
56 | * | |
57 | * <h3>From <a href="http://www.ietf.org/rfc/rfc1345.txt">RFC 1345</a></h3> | |
58 | * <ul> | |
59 | * <li>ISO_646.basic:1983</li> | |
60 | * <li>INVARIANT</li> | |
61 | * <li>BS_4730</li> | |
62 | * <li>NATS-SEFI</li> | |
63 | * <li>NATS-SEFI-ADD</li> | |
64 | * <li>NATS-DANO</li> | |
65 | * <li>NATS-DANO-ADD</li> | |
66 | * <li>SEN_850200_B</li> | |
67 | * <li>SEN_850200_C</li> | |
68 | * <li>JIS_C6220-1969-jp</li> | |
69 | * <li>JIS_C6220-1969-ro</li> | |
70 | * <li>IT</li> | |
71 | * <li>PT</li> | |
72 | * <li>ES</li> | |
73 | * <li>greek7-old</li> | |
74 | * <li>latin-greek</li> | |
75 | * <li>DIN_66003</li> | |
76 | * <li>iso-ir-25</li> | |
77 | * <li>Latin-greek-1</li> | |
78 | * <li>ISO_5427</li> | |
79 | * <li>BS_viewdata</li> | |
80 | * <li>INIS</li> | |
81 | * <li>INIS-8</li> | |
82 | * <li>INIS-cyrillic</li> | |
83 | * <li>ISO_5427:1981</li> | |
84 | * <li>ISO_5428:1980</li> | |
85 | * <li>GB_1988-80</li> | |
86 | * <li>NS_4551-1</li> | |
87 | * <li>NS_4551-2</li> | |
88 | * <li>NF_Z_62-010</li> | |
89 | * <li>PT2</li> | |
90 | * <li>ES2</li> | |
91 | * <li>MSZ_7795.3</li> | |
92 | * <li>greek7</li> | |
93 | * <li>ASMO_449</li> | |
94 | * <li>JIS_C6229-1984-a</li> | |
95 | * <li>JIS_C6229-1984-b</li> | |
96 | * <li>JIS_C6229-1984-b-add</li> | |
97 | * <li>JIS_C6229-1984-hand</li> | |
98 | * <li>JIS_C6229-1984-hand-add</li> | |
99 | * <li>JIS_C6229-1984-kana</li> | |
100 | * <li>ISO_2033-1983</li> | |
101 | * <li>T.61-7bit</li> | |
102 | * <li>ECMA-cyrillic</li> | |
103 | * <li>CSA_Z243.4-1985-1</li> | |
104 | * <li>CSA_Z243.4-1985-2</li> | |
105 | * <li>CSA_Z243.4-1985-gr</li> | |
106 | * <li>CSN_369103</li> | |
107 | * <li>JUS_I.B1.002</li> | |
108 | * <li>IEC_P27-1</li> | |
109 | * <li>JUS_I.B1.003-serb</li> | |
110 | * <li>JUS_I.B1.003-mac</li> | |
111 | * <li>greek-ccitt</li> | |
112 | * <li>NC_NC00-10:81</li> | |
113 | * <li>ISO_6937-2-25</li> | |
114 | * <li>ISO_8859-supp</li> | |
115 | * <li>ISO_10367-box</li> | |
116 | * <li>latin-lap</li> | |
117 | * <li>DS_2089</li> | |
118 | * <li>KSC5636</li> | |
119 | * <li>DEC-MCS</li> | |
120 | * <li>hp-roman8</li> | |
121 | * <li>macintosh</li> | |
122 | * <li>IBM038</li> | |
123 | * <li>IBM274</li> | |
124 | * <li>IBM275</li> | |
125 | * <li>IBM281</li> | |
126 | * <li>IBM290</li> | |
127 | * <li>IBM423</li> | |
128 | * <li>IBM851</li> | |
129 | * <li>IBM880</li> | |
130 | * <li>IBM891</li> | |
131 | * <li>IBM903</li> | |
132 | * <li>IBM904</li> | |
133 | * <li>IBM905</li> | |
134 | * <li>EBCDIC-AT-DE</li> | |
135 | * <li>EBCDIC-AT-DE-A</li> | |
136 | * <li>EBCDIC-CA-FR</li> | |
137 | * <li>EBCDIC-DK-NO</li> | |
138 | * <li>EBCDIC-DK-NO-A</li> | |
139 | * <li>EBCDIC-FI-SE</li> | |
140 | * <li>EBCDIC-FI-SE-A</li> | |
141 | * <li>EBCDIC-FR</li> | |
142 | * <li>EBCDIC-IT</li> | |
143 | * <li>EBCDIC-PT</li> | |
144 | * <li>EBCDIC-ES</li> | |
145 | * <li>EBCDIC-ES-A</li> | |
146 | * <li>EBCDIC-ES-S</li> | |
147 | * <li>EBCDIC-UK</li> | |
148 | * <li>EBCDIC-US</li> | |
149 | * <li>videotex-suppl</li> | |
150 | * <li>iso-ir-90</li> | |
151 | * <li>ANSI_X3.110-1983</li> | |
152 | * <li>T.61-8bit</li> | |
153 | * <li>T.101-G2</li> | |
154 | * <li>ISO_6937-2-add</li> | |
155 | * <li>us-dk</li> | |
156 | * <li>dk-us</li> | |
157 | * </ul> | |
158 | * | |
159 | * <h3>From <a href="http://www.unicode.org/Public/MAPPINGS/">Unicode.org</a></h3> | |
160 | * <ul> | |
161 | * <li>ISO-8859-1:1998</li> | |
162 | * <li>ISO-8859-10:1998</li> | |
163 | * <li>ISO-8859-11:2001</li> | |
164 | * <li>ISO-8859-13:1998</li> | |
165 | * <li>ISO-8859-14:1998</li> | |
166 | * <li>ISO-8859-15:1999</li> | |
167 | * <li>ISO-8859-16:2001</li> | |
168 | * <li>ISO-8859-2:1999</li> | |
169 | * <li>ISO-8859-3:1998</li> | |
170 | * <li>ISO-8859-4:1998</li> | |
171 | * <li>ISO-8859-5:1999</li> | |
172 | * <li>ISO-8859-6:1999</li> | |
173 | * <li>ISO-8859-7:1987a</li> | |
174 | * <li>ISO-8859-7:1987b</li> | |
175 | * <li>ISO-8859-7:2003</li> | |
176 | * <li>ISO-8859-8:1999</li> | |
177 | * <li>ISO-8859-9:1999</li> | |
178 | * <li>ISO-8859-10</li> | |
179 | * <li>ISO-8859-14</li> | |
180 | * <li>ISO-8859-16</li> | |
181 | * <li>MacCeltic</li> | |
182 | * <li>MacCenteuro</li> | |
183 | * <li>Apple-MacCroatian</li> | |
184 | * <li>Apple-MacCyrillic</li> | |
185 | * <li>MacDingbats</li> | |
186 | * <li>MacGaelic</li> | |
187 | * <li>Apple-MacGreek</li> | |
188 | * <li>MacIcelandic</li> | |
189 | * <li>MacInuit</li> | |
190 | * <li>Apple-MacRoman</li> | |
191 | * <li>MacRomanian</li> | |
192 | * <li>Apple-MacTurkish</li> | |
193 | * <li>Microsoft-MacIcelandic</li> | |
194 | * <li>Microsoft-MacLatin2</li> | |
195 | * <li>AtariST</li> | |
196 | * <li>KZ-1048</li> | |
197 | * <li>US-ASCII-QUOTES</li> | |
198 | * <li>NextStep</li> | |
199 | * <li>Adobe-Standard-Encoding</li> | |
200 | * <li>Adobe-Symbol-Encoding</li> | |
201 | * <li>Adobe-Zapf-Dingbats-Encoding</li> | |
202 | * <li>windows-1250-bestfit</li> | |
203 | * <li>windows-1251-bestfit</li> | |
204 | * <li>windows-1252-bestfit</li> | |
205 | * <li>windows-1253-bestfit</li> | |
206 | * <li>windows-1254-bestfit</li> | |
207 | * <li>windows-1255-bestfit</li> | |
208 | * <li>windows-1256-bestfit</li> | |
209 | * <li>windows-1257-bestfit</li> | |
210 | * <li>windows-1258-bestfit</li> | |
211 | * <li>windows-874-bestfit</li> | |
212 | * </ul> | |
213 | * | |
214 | * <h3>Bestfit charset derived from the charsets above</h3> | |
215 | * <ul> | |
216 | * <li>US-ASCII-bestfit</li> | |
217 | * </ul> | |
218 | * | |
219 | * <h3>Bestfit charset derived from the charsets above and from <a | |
220 | * href="http://unicode.org/Public/UNIDATA/UnicodeData.txt">The Unicode | |
221 | * Database</a>, see {@link ToAsciiMapping}</h3> | |
222 | * <ul> | |
223 | * <li>US-ASCII-bestfit-2</li> | |
224 | * </ul> | |
225 | */ | |
226 | 6093 | public class ExtraCharsetsProvider extends CharsetProvider { |
227 | ||
228 | 1 | private static volatile Map<String, Charset> allCharsets = null; |
229 | ||
230 | 1 | private static volatile Set<String> allCharsetNames = null; |
231 | ||
232 | @Override | |
233 | public Charset charsetForName(String charsetName) { | |
234 | 6093 | if (allCharsets == null) { |
235 | 1 | if (allCharsetNames == null) { |
236 | 1 | allCharsetNames = new HashSet<String>(); |
237 | 1 | allCharsetNames.addAll(Arrays.asList(CharsetNameList.CHARSET_NAMES)); |
238 | } | |
239 | 1 | if (!allCharsetNames.contains(charsetName.toLowerCase())) |
240 | 0 | return null; |
241 | 1 | if (allCharsets == null) { |
242 | 1 | allCharsets = new HashMap<String, Charset>(); |
243 | 169 | for (Charset cs : CharsetList.ALL_CHARSETS) { |
244 | 168 | allCharsets.put(cs.name().toLowerCase(), cs); |
245 | 168 | for (String alias : cs.aliases()) { |
246 | 271 | allCharsets.put(alias.toLowerCase(), cs); |
247 | } | |
248 | } | |
249 | } | |
250 | 1 | allCharsetNames = null; |
251 | } | |
252 | 6093 | return allCharsets.get(charsetName.toLowerCase()); |
253 | } | |
254 | ||
255 | @Override | |
256 | public Iterator<Charset> charsets() { | |
257 | 0 | return Arrays.asList(CharsetList.ALL_CHARSETS).iterator(); |
258 | } | |
259 | } |