001/*
002 *    GeoAPI - Java interfaces for OGC/ISO standards
003 *    Copyright © 2007-2024 Open Geospatial Consortium, Inc.
004 *    http://www.geoapi.org
005 *
006 *    Licensed under the Apache License, Version 2.0 (the "License");
007 *    you may not use this file except in compliance with the License.
008 *    You may obtain a copy of the License at
009 *
010 *        http://www.apache.org/licenses/LICENSE-2.0
011 *
012 *    Unless required by applicable law or agreed to in writing, software
013 *    distributed under the License is distributed on an "AS IS" BASIS,
014 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 *    See the License for the specific language governing permissions and
016 *    limitations under the License.
017 */
018package org.opengis.metadata.identification;
019
020import java.nio.charset.Charset;
021import java.nio.charset.UnsupportedCharsetException;
022
023import org.opengis.util.CodeList;
024import org.opengis.annotation.UML;
025import org.opengis.geoapi.internal.Vocabulary;
026
027import static org.opengis.annotation.Specification.ISO_19115;
028import static org.opengis.annotation.Obligation.CONDITIONAL;
029
030
031/**
032 * Name of the character coding standard used for the resource.
033 * The UML identifiers were defined in ISO 19115:2003, but removed from ISO 19115:2014.
034 * Mapping from legacy UML identifiers to the IANA names is as below:
035 *
036 * <blockquote><pre> # From ISO 19115:2003 to java.nio.charset
037 * ucs2       = UCS-2
038 * ucs4       = UCS-4
039 * utf7       = UTF-7
040 * utf8       = UTF-8
041 * utf16      = UTF-16
042 * 8859part1  = ISO-8859-1
043 * 8859part2  = ISO-8859-2
044 * 8859part3  = ISO-8859-3
045 * 8859part4  = ISO-8859-4
046 * 8859part5  = ISO-8859-5
047 * 8859part6  = ISO-8859-6
048 * 8859part7  = ISO-8859-7
049 * 8859part8  = ISO-8859-8
050 * 8859part9  = ISO-8859-9
051 * 8859part10 = ISO-8859-10
052 * 8859part11 = ISO-8859-11
053 * 8859part12 = ISO-8859-12
054 * 8859part13 = ISO-8859-13
055 * 8859part14 = ISO-8859-14
056 * 8859part15 = ISO-8859-15
057 * 8859part16 = ISO-8859-16
058 * jis        = JIS_X0201
059 * shiftJIS   = Shift_JIS
060 * eucJP      = EUC-JP
061 * usAscii    = US-ASCII
062 * ebcdic     = EBCDIC
063 * eucKR      = EUC-KR
064 * big5       = Big5
065 * GB2312     = GB2312</pre></blockquote>
066 *
067 * This mapping is provided by GeoAPI in the {@code org/opengis/metadata/2003/charset-codes.properties} file.
068 * This file can be read as below:
069 *
070 * {@snippet lang="java" :
071 * final Properties codes = new Properties();
072 * try (InputStream in = Metadata.class.getResourceAsStream("2003/charset-codes.properties")) {
073 *     codes.load(in);
074 * }}
075 *
076 * <!-- We do not provide convenience method here for above snippet because this class is deprecated.
077 *      Above snippet works with user's named module because "2003" is not a valid package name. -->
078 *
079 * @author  Ely Conn (Leica Geosystems Geospatial Imaging, LLC)
080 * @version 3.1
081 * @since   2.1
082 *
083 * @deprecated As of ISO 19115:2014, replaced by a reference to the
084 * <a href="http://www.iana.org/assignments/character-sets">IANA Character Set register</a>.
085 * Represented in Java by {@link java.nio.charset.Charset}.
086 */
087@Vocabulary(capacity=29)
088@Deprecated(since="3.1")
089@UML(identifier="MD_CharacterSetCode", specification=ISO_19115)
090public final class CharacterSet extends CodeList<CharacterSet> {
091    /**
092     * Serial number for compatibility with different versions.
093     */
094    private static final long serialVersionUID = -4726629268456735927L;
095
096    /**
097     * 16-bit fixed size Universal Character Set, based on ISO/IEC 10646.
098     */
099    @UML(identifier="ucs2", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
100    public static final CharacterSet UCS_2 = new CharacterSet("UCS_2", "UCS-2");
101
102    /**
103     * 32-bit fixed size Universal Character Set, based on ISO/IEC 10646.
104     */
105    @UML(identifier="ucs4", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
106    public static final CharacterSet UCS_4 = new CharacterSet("UCS_4", "UCS-4");
107
108    /**
109     * 7-bit variable size UCS Transfer Format, based on ISO/IEC 10646.
110     */
111    @UML(identifier="utf7", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
112    public static final CharacterSet UTF_7 = new CharacterSet("UTF_7", "UTF-7");
113
114    /**
115     * 8-bit variable size UCS Transfer Format, based on ISO/IEC 10646.
116     */
117    @UML(identifier="utf8", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
118    public static final CharacterSet UTF_8 = new CharacterSet("UTF_8", "UTF-8");
119
120    /**
121     * 16-bit variable size UCS Transfer Format, based on ISO/IEC 10646.
122     */
123    @UML(identifier="utf16", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
124    public static final CharacterSet UTF_16 = new CharacterSet("UTF_16", "UTF-16");
125
126    /**
127     * ISO/IEC 8859-1, Information technology.
128     * 8-bit single-byte coded graphic character sets - Part 1: Latin alphabet No. 1.
129     */
130    @UML(identifier="8859part1", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
131    public static final CharacterSet ISO_8859_1 = new CharacterSet("ISO_8859_1", "ISO-8859-1");
132
133    /**
134     * ISO/IEC 8859-2, Information technology.
135     * 8-bit single-byte coded graphic character sets - Part 2: Latin alphabet No. 2.
136     */
137    @UML(identifier="8859part2", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
138    public static final CharacterSet ISO_8859_2 = new CharacterSet("ISO_8859_2", "ISO-8859-2");
139
140    /**
141     * ISO/IEC 8859-3, Information technology.
142     * 8-bit single-byte coded graphic character sets - Part 3: Latin alphabet No. 3.
143     */
144    @UML(identifier="8859part3", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
145    public static final CharacterSet ISO_8859_3 = new CharacterSet("ISO_8859_3", "ISO-8859-3");
146
147    /**
148     * ISO/IEC 8859-4, Information technology.
149     * 8-bit single-byte coded graphic character sets - Part 4: Latin alphabet No. 4.
150     */
151    @UML(identifier="8859part4", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
152    public static final CharacterSet ISO_8859_4 = new CharacterSet("ISO_8859_4", "ISO-8859-4");
153
154    /**
155     * ISO/IEC 8859-5, Information technology.
156     * 8-bit single-byte coded graphic character sets - Part 5: Latin/Cyrillic alphabet.
157     */
158    @UML(identifier="8859part5", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
159    public static final CharacterSet ISO_8859_5 = new CharacterSet("ISO_8859_5", "ISO-8859-5");
160
161    /**
162     * ISO/IEC 8859-6, Information technology.
163     * 8-bit single-byte coded graphic character sets - Part 6: Latin/Arabic alphabet.
164     */
165    @UML(identifier="8859part6", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
166    public static final CharacterSet ISO_8859_6 = new CharacterSet("ISO_8859_6", "ISO-8859-6");
167
168    /**
169     * ISO/IEC 8859-7, Information technology.
170     * 8-bit single-byte coded graphic character sets - Part 7: Latin/Greek alphabet.
171     */
172    @UML(identifier="8859part7", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
173    public static final CharacterSet ISO_8859_7 = new CharacterSet("ISO_8859_7", "ISO-8859-7");
174
175    /**
176     * ISO/IEC 8859-8, Information technology.
177     * 8-bit single-byte coded graphic character sets - Part 8: Latin/Hebrew alphabet.
178     */
179    @UML(identifier="8859part8", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
180    public static final CharacterSet ISO_8859_8 = new CharacterSet("ISO_8859_8", "ISO-8859-8");
181
182    /**
183     * ISO/IEC 8859-9, Information technology.
184     * 8-bit single-byte coded graphic character sets - Part 9: Latin alphabet No. 5.
185     */
186    @UML(identifier="8859part9", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
187    public static final CharacterSet ISO_8859_9 = new CharacterSet("ISO_8859_9", "ISO-8859-9");
188
189    /**
190     * ISO/IEC 8859-10, Information technology.
191     * 8-bit single-byte coded graphic character sets - Part 10: Latin alphabet No. 6.
192     */
193    @UML(identifier="8859part10", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
194    public static final CharacterSet ISO_8859_10 = new CharacterSet("ISO_8859_10", "ISO-8859-10");
195
196    /**
197     * ISO/IEC 8859-11, Information technology.
198     * 8-bit single-byte coded graphic character sets - Part 11: Latin/Thai alphabet.
199     */
200    @UML(identifier="8859part11", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
201    public static final CharacterSet ISO_8859_11 = new CharacterSet("ISO_8859_11", "ISO-8859-11");
202
203    /**
204     * A future ISO/IEC 8-bit single-byte coded graphic character set.
205     */
206    @UML(identifier="8859part12", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
207    public static final CharacterSet ISO_8859_12 = new CharacterSet("ISO_8859_12", "ISO-8859-12");
208
209    /**
210     * ISO/IEC 8859-13, Information technology.
211     * 8-bit single-byte coded graphic character sets - Part 13: Latin alphabet No. 7.
212     */
213    @UML(identifier="8859part13", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
214    public static final CharacterSet ISO_8859_13 = new CharacterSet("ISO_8859_13", "ISO-8859-13");
215
216    /**
217     * ISO/IEC 8859-14, Information technology.
218     * 8-bit single-byte coded graphic character sets - Part 14: Latin alphabet No. 8 (Celtic).
219     */
220    @UML(identifier="8859part14", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
221    public static final CharacterSet ISO_8859_14 = new CharacterSet("ISO_8859_14", "ISO-8859-14");
222
223    /**
224     * ISO/IEC 8859-15, Information technology.
225     * 8-bit single-byte coded graphic character sets - Part 15: Latin alphabet No. 9.
226     */
227    @UML(identifier="8859part15", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
228    public static final CharacterSet ISO_8859_15 = new CharacterSet("ISO_8859_15", "ISO-8859-15");
229
230    /**
231     * ISO/IEC 8859-16, Information technology.
232     * 8-bit single-byte coded graphic character sets - Part 16: Latin alphabet No. 10.
233     */
234    @UML(identifier="8859part16", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
235    public static final CharacterSet ISO_8859_16 = new CharacterSet("ISO_8859_16", "ISO-8859-16");
236
237    /**
238     * Japanese code set used for electronic transmission.
239     */
240    @UML(identifier="jis", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
241    public static final CharacterSet JIS = new CharacterSet("JIS", "JIS_X0201");
242
243    /**
244     * Japanese code set used on MS-DOS based machines.
245     */
246    @UML(identifier="shiftJIS", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
247    public static final CharacterSet SHIFT_JIS = new CharacterSet("SHIFT_JIS", "Shift_JIS");
248
249    /**
250     * Japanese code set used on UNIX based machines.
251     */
252    @UML(identifier="eucJP", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
253    public static final CharacterSet EUC_JP = new CharacterSet("EUC_JP", "EUC-JP");
254
255    /**
256     * United States ASCII code set (ISO 646 US).
257     */
258    @UML(identifier="usAscii", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
259    public static final CharacterSet US_ASCII = new CharacterSet("US_ASCII", "US-ASCII");
260
261    /**
262     * IBM mainframe code set.
263     */
264    @UML(identifier="ebcdic", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
265    public static final CharacterSet EBCDIC = new CharacterSet("EBCDIC", null);
266
267    /**
268     * Korean code set.
269     */
270    @UML(identifier="eucKR", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
271    public static final CharacterSet EUC_KR = new CharacterSet("EUC_KR", "EUC-KR");
272
273    /**
274     * Traditional Chinese code set used in Taiwan, Hong Kong, and other areas.
275     */
276    @UML(identifier="big5", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
277    public static final CharacterSet BIG_5 = new CharacterSet("BIG_5", "Big5");
278
279    /**
280     * Simplified Chinese code set.
281     */
282    @UML(identifier="GB2312", obligation=CONDITIONAL, specification=ISO_19115, version=2003)
283    public static final CharacterSet GB2312 = new CharacterSet("GB2312", "GB2312");
284
285    /**
286     * The Java {@link Charset} name (never {@code null}).
287     */
288    private final String charset;
289
290    /**
291     * Constructs an element of the given name.
292     *
293     * @param name     the name of the new element. This name shall not be in use by another element of this type.
294     * @param charset  the Java {@link Charset} name, or {@code null} if none.
295     */
296    private CharacterSet(final String name, final String charset) {
297        super(name);
298        this.charset = (charset != null) ? charset : name;
299    }
300
301    /**
302     * Constructs an enum with identical name and charset.
303     * This is needed for {@link CodeList#valueOf(Class, String)} reflection.
304     *
305     * @param name  the name of the new element.
306     */
307    private CharacterSet(final String name) {
308        this(name, name);
309    }
310
311    /**
312     * Converts the given Java Character Set to {@code CharacterSet}.
313     *
314     * @param  cs  the character set, or {@code null}.
315     * @return a code list for the given character set, or {@code null} if the given {@code cs} was null.
316     *
317     * @since 3.1
318     */
319    public static CharacterSet fromCharset(final Charset cs) {
320        if (cs == null) {
321            return null;
322        }
323        final String name = cs.name();
324        for (final CharacterSet candidate : CharacterSet.values()) {
325            for (final String n : candidate.names()) {
326                if (name.equals(n)) {
327                    return candidate;
328                }
329            }
330        }
331        return valueOf(name);
332    }
333
334    /**
335     * Converts the Character Set to a java Charset, if it can.
336     * This method is provided for migration from this legacy code lists to {@link Charset}.
337     *
338     * @return the Java Charset.
339     * @throws UnsupportedCharsetException if no support for the charset is available.
340     *
341     * @see <a href="https://docs.oracle.com/en/java/javase/11/intl/supported-encodings.html">Supported encodings</a>
342     */
343    public Charset toCharset() throws UnsupportedCharsetException {
344        return Charset.forName(charset);
345    }
346
347    /**
348     * Returns all the names of this code. The returned array contains the
349     * following elements, with duplicated values and null values removed:
350     *
351     * <ul>
352     *   <li>The programmatic {@linkplain #name() name}</li>
353     *   <li>The UML {@linkplain #identifier() identifier}</li>
354     *   <li>The {@linkplain #toCharset() charset} name</li>
355     * </ul>
356     *
357     * Those names are typically equal except for the case (programmatic names are upper case
358     * while UML names are lower case) and special characters like {@code '-'}.
359     *
360     * @return all names of this code constant. This array is never null and never empty.
361     */
362    @Override
363    public String[] names() {
364        final String name = name();
365        if (charset.equals(name)) {
366            return super.names();
367        }
368        final String identifier = identifier().orElse(null);
369        if (identifier != null && !identifier.equals(name)) {
370            return new String[] {name, identifier, charset};
371        } else {
372            return new String[] {name, charset};
373        }
374    }
375
376    /**
377     * Returns the list of {@code CharacterSet}s.
378     *
379     * @return the list of codes declared in the current JVM.
380     */
381    public static CharacterSet[] values() {
382        return values(CharacterSet.class);
383    }
384
385    /**
386     * Returns the list of codes of the same kind as this code list element.
387     * Invoking this method is equivalent to invoking {@link #values()}, except that
388     * this method can be invoked on an instance of the parent {@code CodeList} class.
389     *
390     * @return all code {@linkplain #values() values} for this code list.
391     */
392    @Override
393    public CharacterSet[] family() {
394        return values();
395    }
396
397    /**
398     * Returns the character set that matches the given string, or returns a new one if none match it.
399     * This methods returns the first instance (in declaration order) for which the {@linkplain #name() name}
400     * is {@linkplain String#equalsIgnoreCase(String) equals, ignoring case}, to the given name.
401     * If no existing instance is found, then a new one is created for the given name.
402     *
403     * @param  code  the name of the code to fetch or to create.
404     * @return a code matching the given name.
405     */
406    public static CharacterSet valueOf(String code) {
407        return valueOf(CharacterSet.class, code, CharacterSet::new).get();
408    }
409}