001/* 002 * GeoAPI - Java interfaces for OGC/ISO standards 003 * Copyright © 2007-2024 Open Geospatial Consortium, Inc. 004 * http://www.geoapi.org 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.opengis.metadata.identification; 019 020import java.nio.charset.Charset; 021import java.nio.charset.UnsupportedCharsetException; 022 023import org.opengis.util.CodeList; 024import org.opengis.annotation.UML; 025import org.opengis.geoapi.internal.Vocabulary; 026 027import static org.opengis.annotation.Specification.ISO_19115; 028import static org.opengis.annotation.Obligation.CONDITIONAL; 029 030 031/** 032 * Name of the character coding standard used for the resource. 033 * The UML identifiers were defined in ISO 19115:2003, but removed from ISO 19115:2014. 034 * Mapping from legacy UML identifiers to the IANA names is as below: 035 * 036 * <blockquote><pre> # From ISO 19115:2003 to java.nio.charset 037 * ucs2 = UCS-2 038 * ucs4 = UCS-4 039 * utf7 = UTF-7 040 * utf8 = UTF-8 041 * utf16 = UTF-16 042 * 8859part1 = ISO-8859-1 043 * 8859part2 = ISO-8859-2 044 * 8859part3 = ISO-8859-3 045 * 8859part4 = ISO-8859-4 046 * 8859part5 = ISO-8859-5 047 * 8859part6 = ISO-8859-6 048 * 8859part7 = ISO-8859-7 049 * 8859part8 = ISO-8859-8 050 * 8859part9 = ISO-8859-9 051 * 8859part10 = ISO-8859-10 052 * 8859part11 = ISO-8859-11 053 * 8859part12 = ISO-8859-12 054 * 8859part13 = ISO-8859-13 055 * 8859part14 = ISO-8859-14 056 * 8859part15 = ISO-8859-15 057 * 8859part16 = ISO-8859-16 058 * jis = JIS_X0201 059 * shiftJIS = Shift_JIS 060 * eucJP = EUC-JP 061 * usAscii = US-ASCII 062 * ebcdic = EBCDIC 063 * eucKR = EUC-KR 064 * big5 = Big5 065 * GB2312 = GB2312</pre></blockquote> 066 * 067 * This mapping is provided by GeoAPI in the {@code org/opengis/metadata/2003/charset-codes.properties} file. 068 * This file can be read as below: 069 * 070 * {@snippet lang="java" : 071 * final Properties codes = new Properties(); 072 * try (InputStream in = Metadata.class.getResourceAsStream("2003/charset-codes.properties")) { 073 * codes.load(in); 074 * }} 075 * 076 * <!-- We do not provide convenience method here for above snippet because this class is deprecated. 077 * Above snippet works with user's named module because "2003" is not a valid package name. --> 078 * 079 * @author Ely Conn (Leica Geosystems Geospatial Imaging, LLC) 080 * @version 3.1 081 * @since 2.1 082 * 083 * @deprecated As of ISO 19115:2014, replaced by a reference to the 084 * <a href="http://www.iana.org/assignments/character-sets">IANA Character Set register</a>. 085 * Represented in Java by {@link java.nio.charset.Charset}. 086 */ 087@Vocabulary(capacity=29) 088@Deprecated(since="3.1") 089@UML(identifier="MD_CharacterSetCode", specification=ISO_19115) 090public final class CharacterSet extends CodeList<CharacterSet> { 091 /** 092 * Serial number for compatibility with different versions. 093 */ 094 private static final long serialVersionUID = -4726629268456735927L; 095 096 /** 097 * 16-bit fixed size Universal Character Set, based on ISO/IEC 10646. 098 */ 099 @UML(identifier="ucs2", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 100 public static final CharacterSet UCS_2 = new CharacterSet("UCS_2", "UCS-2"); 101 102 /** 103 * 32-bit fixed size Universal Character Set, based on ISO/IEC 10646. 104 */ 105 @UML(identifier="ucs4", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 106 public static final CharacterSet UCS_4 = new CharacterSet("UCS_4", "UCS-4"); 107 108 /** 109 * 7-bit variable size UCS Transfer Format, based on ISO/IEC 10646. 110 */ 111 @UML(identifier="utf7", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 112 public static final CharacterSet UTF_7 = new CharacterSet("UTF_7", "UTF-7"); 113 114 /** 115 * 8-bit variable size UCS Transfer Format, based on ISO/IEC 10646. 116 */ 117 @UML(identifier="utf8", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 118 public static final CharacterSet UTF_8 = new CharacterSet("UTF_8", "UTF-8"); 119 120 /** 121 * 16-bit variable size UCS Transfer Format, based on ISO/IEC 10646. 122 */ 123 @UML(identifier="utf16", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 124 public static final CharacterSet UTF_16 = new CharacterSet("UTF_16", "UTF-16"); 125 126 /** 127 * ISO/IEC 8859-1, Information technology. 128 * 8-bit single-byte coded graphic character sets - Part 1: Latin alphabet No. 1. 129 */ 130 @UML(identifier="8859part1", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 131 public static final CharacterSet ISO_8859_1 = new CharacterSet("ISO_8859_1", "ISO-8859-1"); 132 133 /** 134 * ISO/IEC 8859-2, Information technology. 135 * 8-bit single-byte coded graphic character sets - Part 2: Latin alphabet No. 2. 136 */ 137 @UML(identifier="8859part2", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 138 public static final CharacterSet ISO_8859_2 = new CharacterSet("ISO_8859_2", "ISO-8859-2"); 139 140 /** 141 * ISO/IEC 8859-3, Information technology. 142 * 8-bit single-byte coded graphic character sets - Part 3: Latin alphabet No. 3. 143 */ 144 @UML(identifier="8859part3", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 145 public static final CharacterSet ISO_8859_3 = new CharacterSet("ISO_8859_3", "ISO-8859-3"); 146 147 /** 148 * ISO/IEC 8859-4, Information technology. 149 * 8-bit single-byte coded graphic character sets - Part 4: Latin alphabet No. 4. 150 */ 151 @UML(identifier="8859part4", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 152 public static final CharacterSet ISO_8859_4 = new CharacterSet("ISO_8859_4", "ISO-8859-4"); 153 154 /** 155 * ISO/IEC 8859-5, Information technology. 156 * 8-bit single-byte coded graphic character sets - Part 5: Latin/Cyrillic alphabet. 157 */ 158 @UML(identifier="8859part5", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 159 public static final CharacterSet ISO_8859_5 = new CharacterSet("ISO_8859_5", "ISO-8859-5"); 160 161 /** 162 * ISO/IEC 8859-6, Information technology. 163 * 8-bit single-byte coded graphic character sets - Part 6: Latin/Arabic alphabet. 164 */ 165 @UML(identifier="8859part6", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 166 public static final CharacterSet ISO_8859_6 = new CharacterSet("ISO_8859_6", "ISO-8859-6"); 167 168 /** 169 * ISO/IEC 8859-7, Information technology. 170 * 8-bit single-byte coded graphic character sets - Part 7: Latin/Greek alphabet. 171 */ 172 @UML(identifier="8859part7", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 173 public static final CharacterSet ISO_8859_7 = new CharacterSet("ISO_8859_7", "ISO-8859-7"); 174 175 /** 176 * ISO/IEC 8859-8, Information technology. 177 * 8-bit single-byte coded graphic character sets - Part 8: Latin/Hebrew alphabet. 178 */ 179 @UML(identifier="8859part8", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 180 public static final CharacterSet ISO_8859_8 = new CharacterSet("ISO_8859_8", "ISO-8859-8"); 181 182 /** 183 * ISO/IEC 8859-9, Information technology. 184 * 8-bit single-byte coded graphic character sets - Part 9: Latin alphabet No. 5. 185 */ 186 @UML(identifier="8859part9", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 187 public static final CharacterSet ISO_8859_9 = new CharacterSet("ISO_8859_9", "ISO-8859-9"); 188 189 /** 190 * ISO/IEC 8859-10, Information technology. 191 * 8-bit single-byte coded graphic character sets - Part 10: Latin alphabet No. 6. 192 */ 193 @UML(identifier="8859part10", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 194 public static final CharacterSet ISO_8859_10 = new CharacterSet("ISO_8859_10", "ISO-8859-10"); 195 196 /** 197 * ISO/IEC 8859-11, Information technology. 198 * 8-bit single-byte coded graphic character sets - Part 11: Latin/Thai alphabet. 199 */ 200 @UML(identifier="8859part11", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 201 public static final CharacterSet ISO_8859_11 = new CharacterSet("ISO_8859_11", "ISO-8859-11"); 202 203 /** 204 * A future ISO/IEC 8-bit single-byte coded graphic character set. 205 */ 206 @UML(identifier="8859part12", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 207 public static final CharacterSet ISO_8859_12 = new CharacterSet("ISO_8859_12", "ISO-8859-12"); 208 209 /** 210 * ISO/IEC 8859-13, Information technology. 211 * 8-bit single-byte coded graphic character sets - Part 13: Latin alphabet No. 7. 212 */ 213 @UML(identifier="8859part13", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 214 public static final CharacterSet ISO_8859_13 = new CharacterSet("ISO_8859_13", "ISO-8859-13"); 215 216 /** 217 * ISO/IEC 8859-14, Information technology. 218 * 8-bit single-byte coded graphic character sets - Part 14: Latin alphabet No. 8 (Celtic). 219 */ 220 @UML(identifier="8859part14", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 221 public static final CharacterSet ISO_8859_14 = new CharacterSet("ISO_8859_14", "ISO-8859-14"); 222 223 /** 224 * ISO/IEC 8859-15, Information technology. 225 * 8-bit single-byte coded graphic character sets - Part 15: Latin alphabet No. 9. 226 */ 227 @UML(identifier="8859part15", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 228 public static final CharacterSet ISO_8859_15 = new CharacterSet("ISO_8859_15", "ISO-8859-15"); 229 230 /** 231 * ISO/IEC 8859-16, Information technology. 232 * 8-bit single-byte coded graphic character sets - Part 16: Latin alphabet No. 10. 233 */ 234 @UML(identifier="8859part16", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 235 public static final CharacterSet ISO_8859_16 = new CharacterSet("ISO_8859_16", "ISO-8859-16"); 236 237 /** 238 * Japanese code set used for electronic transmission. 239 */ 240 @UML(identifier="jis", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 241 public static final CharacterSet JIS = new CharacterSet("JIS", "JIS_X0201"); 242 243 /** 244 * Japanese code set used on MS-DOS based machines. 245 */ 246 @UML(identifier="shiftJIS", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 247 public static final CharacterSet SHIFT_JIS = new CharacterSet("SHIFT_JIS", "Shift_JIS"); 248 249 /** 250 * Japanese code set used on UNIX based machines. 251 */ 252 @UML(identifier="eucJP", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 253 public static final CharacterSet EUC_JP = new CharacterSet("EUC_JP", "EUC-JP"); 254 255 /** 256 * United States ASCII code set (ISO 646 US). 257 */ 258 @UML(identifier="usAscii", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 259 public static final CharacterSet US_ASCII = new CharacterSet("US_ASCII", "US-ASCII"); 260 261 /** 262 * IBM mainframe code set. 263 */ 264 @UML(identifier="ebcdic", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 265 public static final CharacterSet EBCDIC = new CharacterSet("EBCDIC", null); 266 267 /** 268 * Korean code set. 269 */ 270 @UML(identifier="eucKR", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 271 public static final CharacterSet EUC_KR = new CharacterSet("EUC_KR", "EUC-KR"); 272 273 /** 274 * Traditional Chinese code set used in Taiwan, Hong Kong, and other areas. 275 */ 276 @UML(identifier="big5", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 277 public static final CharacterSet BIG_5 = new CharacterSet("BIG_5", "Big5"); 278 279 /** 280 * Simplified Chinese code set. 281 */ 282 @UML(identifier="GB2312", obligation=CONDITIONAL, specification=ISO_19115, version=2003) 283 public static final CharacterSet GB2312 = new CharacterSet("GB2312", "GB2312"); 284 285 /** 286 * The Java {@link Charset} name (never {@code null}). 287 */ 288 private final String charset; 289 290 /** 291 * Constructs an element of the given name. 292 * 293 * @param name the name of the new element. This name shall not be in use by another element of this type. 294 * @param charset the Java {@link Charset} name, or {@code null} if none. 295 */ 296 private CharacterSet(final String name, final String charset) { 297 super(name); 298 this.charset = (charset != null) ? charset : name; 299 } 300 301 /** 302 * Constructs an enum with identical name and charset. 303 * This is needed for {@link CodeList#valueOf(Class, String)} reflection. 304 * 305 * @param name the name of the new element. 306 */ 307 private CharacterSet(final String name) { 308 this(name, name); 309 } 310 311 /** 312 * Converts the given Java Character Set to {@code CharacterSet}. 313 * 314 * @param cs the character set, or {@code null}. 315 * @return a code list for the given character set, or {@code null} if the given {@code cs} was null. 316 * 317 * @since 3.1 318 */ 319 public static CharacterSet fromCharset(final Charset cs) { 320 if (cs == null) { 321 return null; 322 } 323 final String name = cs.name(); 324 for (final CharacterSet candidate : CharacterSet.values()) { 325 for (final String n : candidate.names()) { 326 if (name.equals(n)) { 327 return candidate; 328 } 329 } 330 } 331 return valueOf(name); 332 } 333 334 /** 335 * Converts the Character Set to a java Charset, if it can. 336 * This method is provided for migration from this legacy code lists to {@link Charset}. 337 * 338 * @return the Java Charset. 339 * @throws UnsupportedCharsetException if no support for the charset is available. 340 * 341 * @see <a href="https://docs.oracle.com/en/java/javase/11/intl/supported-encodings.html">Supported encodings</a> 342 */ 343 public Charset toCharset() throws UnsupportedCharsetException { 344 return Charset.forName(charset); 345 } 346 347 /** 348 * Returns all the names of this code. The returned array contains the 349 * following elements, with duplicated values and null values removed: 350 * 351 * <ul> 352 * <li>The programmatic {@linkplain #name() name}</li> 353 * <li>The UML {@linkplain #identifier() identifier}</li> 354 * <li>The {@linkplain #toCharset() charset} name</li> 355 * </ul> 356 * 357 * Those names are typically equal except for the case (programmatic names are upper case 358 * while UML names are lower case) and special characters like {@code '-'}. 359 * 360 * @return all names of this code constant. This array is never null and never empty. 361 */ 362 @Override 363 public String[] names() { 364 final String name = name(); 365 if (charset.equals(name)) { 366 return super.names(); 367 } 368 final String identifier = identifier().orElse(null); 369 if (identifier != null && !identifier.equals(name)) { 370 return new String[] {name, identifier, charset}; 371 } else { 372 return new String[] {name, charset}; 373 } 374 } 375 376 /** 377 * Returns the list of {@code CharacterSet}s. 378 * 379 * @return the list of codes declared in the current JVM. 380 */ 381 public static CharacterSet[] values() { 382 return values(CharacterSet.class); 383 } 384 385 /** 386 * Returns the list of codes of the same kind as this code list element. 387 * Invoking this method is equivalent to invoking {@link #values()}, except that 388 * this method can be invoked on an instance of the parent {@code CodeList} class. 389 * 390 * @return all code {@linkplain #values() values} for this code list. 391 */ 392 @Override 393 public CharacterSet[] family() { 394 return values(); 395 } 396 397 /** 398 * Returns the character set that matches the given string, or returns a new one if none match it. 399 * This methods returns the first instance (in declaration order) for which the {@linkplain #name() name} 400 * is {@linkplain String#equalsIgnoreCase(String) equals, ignoring case}, to the given name. 401 * If no existing instance is found, then a new one is created for the given name. 402 * 403 * @param code the name of the code to fetch or to create. 404 * @return a code matching the given name. 405 */ 406 public static CharacterSet valueOf(String code) { 407 return valueOf(CharacterSet.class, code, CharacterSet::new).get(); 408 } 409}