/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.Hashtable;
import java.util.Locale;
/**
* Provides information about encodings. Depends on the Java runtime to provides
* writers for the different encodings, but can be used to override encoding
* names and provide the last printable character for each encoding.
*
*
* @version $Id: Encodings.java 476047 2006-11-17 04:27:45Z mrglavas $
* @author Assaf Arkin
*/
public class Encodings {
/**
* The last printable character for unknown encodings.
*/
static final int DEFAULT_LAST_PRINTABLE = 0x7F;
// last printable character for Unicode-compatible encodings
static final int LAST_PRINTABLE_UNICODE = 0xffff;
// unicode-compliant encodings; can express plane 0
static final String[] UNICODE_ENCODINGS = { "Unicode", "UnicodeBig", "UnicodeLittle", "GB2312",
"UTF8", "UTF-16", };
// default (Java) encoding if none supplied:
static final String DEFAULT_ENCODING = "UTF8";
// note that the size of this Hashtable
// is bounded by the number of encodings recognized by EncodingMap;
// therefore it poses no static mutability risk.
static Hashtable _encodings = new Hashtable();
/**
* @param encoding
* a MIME charset name, or null.
*/
static EncodingInfo getEncodingInfo(String encoding, boolean allowJavaNames)
throws UnsupportedEncodingException {
EncodingInfo eInfo = null;
if (encoding == null) {
if ((eInfo = (EncodingInfo) _encodings.get(DEFAULT_ENCODING)) != null)
return eInfo;
eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(DEFAULT_ENCODING), DEFAULT_ENCODING,
LAST_PRINTABLE_UNICODE);
_encodings.put(DEFAULT_ENCODING, eInfo);
return eInfo;
}
// need to convert it to upper case:
encoding = encoding.toUpperCase(Locale.ENGLISH);
String jName = EncodingMap.getIANA2JavaMapping(encoding);
if (jName == null) {
// see if the encoding passed in is a Java encoding name.
if (allowJavaNames) {
EncodingInfo.testJavaEncodingName(encoding);
if ((eInfo = (EncodingInfo) _encodings.get(encoding)) != null)
return eInfo;
// is it known to be unicode-compliant?
int i = 0;
for (; i < UNICODE_ENCODINGS.length; i++) {
if (UNICODE_ENCODINGS[i].equalsIgnoreCase(encoding)) {
eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding,
LAST_PRINTABLE_UNICODE);
break;
}
}
if (i == UNICODE_ENCODINGS.length) {
eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding,
DEFAULT_LAST_PRINTABLE);
}
_encodings.put(encoding, eInfo);
return eInfo;
}
throw new UnsupportedEncodingException(encoding);
}
if ((eInfo = (EncodingInfo) _encodings.get(jName)) != null)
return eInfo;
// have to create one...
// is it known to be unicode-compliant?
int i = 0;
for (; i < UNICODE_ENCODINGS.length; i++) {
if (UNICODE_ENCODINGS[i].equalsIgnoreCase(jName)) {
eInfo = new EncodingInfo(encoding, jName, LAST_PRINTABLE_UNICODE);
break;
}
}
if (i == UNICODE_ENCODINGS.length) {
eInfo = new EncodingInfo(encoding, jName, DEFAULT_LAST_PRINTABLE);
}
_encodings.put(jName, eInfo);
return eInfo;
}
static final String JIS_DANGER_CHARS = "\\\u007e\u007f\u00a2\u00a3\u00a5\u00ac"
+ "\u2014\u2015\u2016\u2026\u203e\u203e\u2225\u222f\u301c"
+ "\uff3c\uff5e\uffe0\uffe1\uffe2\uffe3";
}
/**
* This class represents an encoding.
*
* @version $Id: EncodingInfo.java 476047 2006-11-17 04:27:45Z mrglavas $
*/
class EncodingInfo {
// An array to hold the argument for a method of Charset, CharsetEncoder or
// CharToByteConverter.
private Object[] fArgsForMethod = null;
// name of encoding as registered with IANA;
// preferably a MIME name, but aliases are fine too.
String ianaName;
String javaName;
int lastPrintable;
// The CharsetEncoder with which we test unusual characters.
Object fCharsetEncoder = null;
// The CharToByteConverter with which we test unusual characters.
Object fCharToByteConverter = null;
// Is the converter null because it can't be instantiated
// for some reason (perhaps we're running with insufficient authority as
// an applet?
boolean fHaveTriedCToB = false;
// Is the charset encoder usable or available.
boolean fHaveTriedCharsetEncoder = false;
/**
* Creates new EncodingInfo
instance.
*/
public EncodingInfo(String ianaName, String javaName, int lastPrintable) {
this.ianaName = ianaName;
this.javaName = EncodingMap.getIANA2JavaMapping(ianaName);
this.lastPrintable = lastPrintable;
}
/**
* Returns a MIME charset name of this encoding.
*/
public String getIANAName() {
return this.ianaName;
}
/**
* Returns a writer for this encoding based on an output stream.
*
* @return A suitable writer
* @exception UnsupportedEncodingException
* There is no convertor to support this encoding
*/
public Writer getWriter(OutputStream output) throws UnsupportedEncodingException {
// this should always be true!
if (javaName != null)
return new OutputStreamWriter(output, javaName);
javaName = EncodingMap.getIANA2JavaMapping(ianaName);
if (javaName == null)
// use UTF-8 as preferred encoding
return new OutputStreamWriter(output, "UTF8");
return new OutputStreamWriter(output, javaName);
}
/**
* Checks whether the specified character is printable or not in this
* encoding.
*
* @param ch
* a code point (0-0x10ffff)
*/
public boolean isPrintable(char ch) {
if (ch <= this.lastPrintable) {
return true;
}
return isPrintable0(ch);
}
/**
* Checks whether the specified character is printable or not in this
* encoding. This method accomplishes this using a java.nio.CharsetEncoder. If
* NIO isn't available it will attempt use a sun.io.CharToByteConverter.
*
* @param ch
* a code point (0-0x10ffff)
*/
private boolean isPrintable0(char ch) {
// Attempt to get a CharsetEncoder for this encoding.
if (fCharsetEncoder == null && CharsetMethods.fgNIOCharsetAvailable
&& !fHaveTriedCharsetEncoder) {
if (fArgsForMethod == null) {
fArgsForMethod = new Object[1];
}
// try and create the CharsetEncoder
try {
fArgsForMethod[0] = javaName;
Object charset = CharsetMethods.fgCharsetForNameMethod.invoke(null, fArgsForMethod);
if (((Boolean) CharsetMethods.fgCharsetCanEncodeMethod.invoke(charset, (Object[]) null))
.booleanValue()) {
fCharsetEncoder = CharsetMethods.fgCharsetNewEncoderMethod.invoke(charset,
(Object[]) null);
}
// This charset cannot be used for encoding, don't try it again...
else {
fHaveTriedCharsetEncoder = true;
}
} catch (Exception e) {
// don't try it again...
fHaveTriedCharsetEncoder = true;
}
}
// Attempt to use the CharsetEncoder to determine whether the character is
// printable.
if (fCharsetEncoder != null) {
try {
fArgsForMethod[0] = new Character(ch);
return ((Boolean) CharsetMethods.fgCharsetEncoderCanEncodeMethod.invoke(fCharsetEncoder,
fArgsForMethod)).booleanValue();
} catch (Exception e) {
// obviously can't use this charset encoder; possibly a JDK bug
fCharsetEncoder = null;
fHaveTriedCharsetEncoder = false;
}
}
// As a last resort try to use a sun.io.CharToByteConverter to
// determine whether this character is printable. We will always
// reach here on JDK 1.3 or below.
if (fCharToByteConverter == null) {
if (fHaveTriedCToB || !CharToByteConverterMethods.fgConvertersAvailable) {
// forget it; nothing we can do...
return false;
}
if (fArgsForMethod == null) {
fArgsForMethod = new Object[1];
}
// try and create the CharToByteConverter
try {
fArgsForMethod[0] = javaName;
fCharToByteConverter = CharToByteConverterMethods.fgGetConverterMethod.invoke(null,
fArgsForMethod);
} catch (Exception e) {
// don't try it again...
fHaveTriedCToB = true;
return false;
}
}
try {
fArgsForMethod[0] = new Character(ch);
return ((Boolean) CharToByteConverterMethods.fgCanConvertMethod.invoke(fCharToByteConverter,
fArgsForMethod)).booleanValue();
} catch (Exception e) {
// obviously can't use this converter; probably some kind of
// security restriction
fCharToByteConverter = null;
fHaveTriedCToB = false;
return false;
}
}
// is this an encoding name recognized by this JDK?
// if not, will throw UnsupportedEncodingException
public static void testJavaEncodingName(String name) throws UnsupportedEncodingException {
final byte[] bTest = { (byte) 'v', (byte) 'a', (byte) 'l', (byte) 'i', (byte) 'd' };
String s = new String(bTest, name);
}
/**
* Holder of methods from java.nio.charset.Charset and
* java.nio.charset.CharsetEncoder.
*/
static class CharsetMethods {
// Method: java.nio.charset.Charset.forName(java.lang.String)
private static java.lang.reflect.Method fgCharsetForNameMethod = null;
// Method: java.nio.charset.Charset.canEncode()
private static java.lang.reflect.Method fgCharsetCanEncodeMethod = null;
// Method: java.nio.charset.Charset.newEncoder()
private static java.lang.reflect.Method fgCharsetNewEncoderMethod = null;
// Method: java.nio.charset.CharsetEncoder.canEncode(char)
private static java.lang.reflect.Method fgCharsetEncoderCanEncodeMethod = null;
// Flag indicating whether or not java.nio.charset.* is available.
private static boolean fgNIOCharsetAvailable = false;
private CharsetMethods() {
}
// Attempt to get methods for Charset and CharsetEncoder on class
// initialization.
static {
try {
Class charsetClass = Class.forName("java.nio.charset.Charset");
Class charsetEncoderClass = Class.forName("java.nio.charset.CharsetEncoder");
fgCharsetForNameMethod = charsetClass.getMethod("forName", new Class[] { String.class });
fgCharsetCanEncodeMethod = charsetClass.getMethod("canEncode", new Class[] {});
fgCharsetNewEncoderMethod = charsetClass.getMethod("newEncoder", new Class[] {});
fgCharsetEncoderCanEncodeMethod = charsetEncoderClass.getMethod("canEncode",
new Class[] { Character.TYPE });
fgNIOCharsetAvailable = true;
}
// ClassNotFoundException, NoSuchMethodException or SecurityException
// Whatever the case, we cannot use java.nio.charset.*.
catch (Exception exc) {
fgCharsetForNameMethod = null;
fgCharsetCanEncodeMethod = null;
fgCharsetEncoderCanEncodeMethod = null;
fgCharsetNewEncoderMethod = null;
fgNIOCharsetAvailable = false;
}
}
}
/**
* Holder of methods from sun.io.CharToByteConverter.
*/
static class CharToByteConverterMethods {
// Method: sun.io.CharToByteConverter.getConverter(java.lang.String)
private static java.lang.reflect.Method fgGetConverterMethod = null;
// Method: sun.io.CharToByteConverter.canConvert(char)
private static java.lang.reflect.Method fgCanConvertMethod = null;
// Flag indicating whether or not sun.io.CharToByteConverter is available.
private static boolean fgConvertersAvailable = false;
private CharToByteConverterMethods() {
}
// Attempt to get methods for char to byte converter on class
// initialization.
static {
try {
Class clazz = Class.forName("sun.io.CharToByteConverter");
fgGetConverterMethod = clazz.getMethod("getConverter", new Class[] { String.class });
fgCanConvertMethod = clazz.getMethod("canConvert", new Class[] { Character.TYPE });
fgConvertersAvailable = true;
}
// ClassNotFoundException, NoSuchMethodException or SecurityException
// Whatever the case, we cannot use sun.io.CharToByteConverter.
catch (Exception exc) {
fgGetConverterMethod = null;
fgCanConvertMethod = null;
fgConvertersAvailable = false;
}
}
}
}
/**
* EncodingMap is a convenience class which handles conversions between IANA
* encoding names and Java encoding names, and vice versa. The encoding names
* used in XML instance documents must be the IANA encoding
* names specified or one of the aliases for those names which IANA defines.
*
*
*
*
*
* Common Name
*
*
* Use this name in XML files
*
*
* Name Type
*
*
* Xerces converts to this Java Encoder Name
*
*
* 8 bit Unicode
*
*
* UTF-8
*
*
* IANA
*
*
* UTF8
*
*
* ISO Latin 1
*
*
* ISO-8859-1
*
*
* MIME
*
*
* ISO-8859-1
*
*
* ISO Latin 2
*
*
* ISO-8859-2
*
*
* MIME
*
*
* ISO-8859-2
*
*
* ISO Latin 3
*
*
* ISO-8859-3
*
*
* MIME
*
*
* ISO-8859-3
*
*
* ISO Latin 4
*
*
* ISO-8859-4
*
*
* MIME
*
*
* ISO-8859-4
*
*
* ISO Latin Cyrillic
*
*
* ISO-8859-5
*
*
* MIME
*
*
* ISO-8859-5
*
*
* ISO Latin Arabic
*
*
* ISO-8859-6
*
*
* MIME
*
*
* ISO-8859-6
*
*
* ISO Latin Greek
*
*
* ISO-8859-7
*
*
* MIME
*
*
* ISO-8859-7
*
*
* ISO Latin Hebrew
*
*
* ISO-8859-8
*
*
* MIME
*
*
* ISO-8859-8
*
*
* ISO Latin 5
*
*
* ISO-8859-9
*
*
* MIME
*
*
* ISO-8859-9
*
*
* EBCDIC: US
*
*
* ebcdic-cp-us
*
*
* IANA
*
*
* cp037
*
*
* EBCDIC: Canada
*
*
* ebcdic-cp-ca
*
*
* IANA
*
*
* cp037
*
*
* EBCDIC: Netherlands
*
*
* ebcdic-cp-nl
*
*
* IANA
*
*
* cp037
*
*
* EBCDIC: Denmark
*
*
* ebcdic-cp-dk
*
*
* IANA
*
*
* cp277
*
*
* EBCDIC: Norway
*
*
* ebcdic-cp-no
*
*
* IANA
*
*
* cp277
*
*
* EBCDIC: Finland
*
*
* ebcdic-cp-fi
*
*
* IANA
*
*
* cp278
*
*
* EBCDIC: Sweden
*
*
* ebcdic-cp-se
*
*
* IANA
*
*
* cp278
*
*
* EBCDIC: Italy
*
*
* ebcdic-cp-it
*
*
* IANA
*
*
* cp280
*
*
* EBCDIC: Spain, Latin America
*
*
* ebcdic-cp-es
*
*
* IANA
*
*
* cp284
*
*
* EBCDIC: Great Britain
*
*
* ebcdic-cp-gb
*
*
* IANA
*
*
* cp285
*
*
* EBCDIC: France
*
*
* ebcdic-cp-fr
*
*
* IANA
*
*
* cp297
*
*
* EBCDIC: Arabic
*
*
* ebcdic-cp-ar1
*
*
* IANA
*
*
* cp420
*
*
* EBCDIC: Hebrew
*
*
* ebcdic-cp-he
*
*
* IANA
*
*
* cp424
*
*
* EBCDIC: Switzerland
*
*
* ebcdic-cp-ch
*
*
* IANA
*
*
* cp500
*
*
* EBCDIC: Roece
*
*
* ebcdic-cp-roece
*
*
* IANA
*
*
* cp870
*
*
* EBCDIC: Yugoslavia
*
*
* ebcdic-cp-yu
*
*
* IANA
*
*
* cp870
*
*
* EBCDIC: Iceland
*
*
* ebcdic-cp-is
*
*
* IANA
*
*
* cp871
*
*
* EBCDIC: Urdu
*
*
* ebcdic-cp-ar2
*
*
* IANA
*
*
* cp918
*
*
* Chinese for PRC, mixed 1/2 byte
*
*
* gb2312
*
*
* MIME
*
*
* GB2312
*
*
* Extended Unix Code, packed for Japanese
*
*
* euc-jp
*
*
* MIME
*
*
* eucjis
*
*
* Japanese: iso-2022-jp
*
*
* iso-2020-jp
*
*
* MIME
*
*
* JIS
*
*
* Japanese: Shift JIS
*
*
* Shift_JIS
*
*
* MIME
*
*
* SJIS
*
*
* Chinese: Big5
*
*
* Big5
*
*
* MIME
*
*
* Big5
*
*
* Extended Unix Code, packed for Korean
*
*
* euc-kr
*
*
* MIME
*
*
* iso2022kr
*
*
* Cyrillic
*
*
* koi8-r
*
*
* MIME
*
*
* koi8-r
*
*
*
* @author TAMURA Kent, IBM
* @author Andy Clark, IBM
*
* @version $Id: EncodingMap.java 447241 2006-09-18 05:12:57Z mrglavas $
*/
class EncodingMap {
//
// Data
//
/** fIANA2JavaMap */
protected final static Hashtable fIANA2JavaMap = new Hashtable();
/** fJava2IANAMap */
protected final static Hashtable fJava2IANAMap = new Hashtable();
//
// Static initialization
//
static {
// add IANA to Java encoding mappings.
fIANA2JavaMap.put("BIG5", "Big5");
fIANA2JavaMap.put("CSBIG5", "Big5");
fIANA2JavaMap.put("CP037", "CP037");
fIANA2JavaMap.put("IBM037", "CP037");
fIANA2JavaMap.put("CSIBM037", "CP037");
fIANA2JavaMap.put("EBCDIC-CP-US", "CP037");
fIANA2JavaMap.put("EBCDIC-CP-CA", "CP037");
fIANA2JavaMap.put("EBCDIC-CP-NL", "CP037");
fIANA2JavaMap.put("EBCDIC-CP-WT", "CP037");
fIANA2JavaMap.put("IBM273", "CP273");
fIANA2JavaMap.put("CP273", "CP273");
fIANA2JavaMap.put("CSIBM273", "CP273");
fIANA2JavaMap.put("IBM277", "CP277");
fIANA2JavaMap.put("CP277", "CP277");
fIANA2JavaMap.put("CSIBM277", "CP277");
fIANA2JavaMap.put("EBCDIC-CP-DK", "CP277");
fIANA2JavaMap.put("EBCDIC-CP-NO", "CP277");
fIANA2JavaMap.put("IBM278", "CP278");
fIANA2JavaMap.put("CP278", "CP278");
fIANA2JavaMap.put("CSIBM278", "CP278");
fIANA2JavaMap.put("EBCDIC-CP-FI", "CP278");
fIANA2JavaMap.put("EBCDIC-CP-SE", "CP278");
fIANA2JavaMap.put("IBM280", "CP280");
fIANA2JavaMap.put("CP280", "CP280");
fIANA2JavaMap.put("CSIBM280", "CP280");
fIANA2JavaMap.put("EBCDIC-CP-IT", "CP280");
fIANA2JavaMap.put("IBM284", "CP284");
fIANA2JavaMap.put("CP284", "CP284");
fIANA2JavaMap.put("CSIBM284", "CP284");
fIANA2JavaMap.put("EBCDIC-CP-ES", "CP284");
fIANA2JavaMap.put("EBCDIC-CP-GB", "CP285");
fIANA2JavaMap.put("IBM285", "CP285");
fIANA2JavaMap.put("CP285", "CP285");
fIANA2JavaMap.put("CSIBM285", "CP285");
fIANA2JavaMap.put("EBCDIC-JP-KANA", "CP290");
fIANA2JavaMap.put("IBM290", "CP290");
fIANA2JavaMap.put("CP290", "CP290");
fIANA2JavaMap.put("CSIBM290", "CP290");
fIANA2JavaMap.put("EBCDIC-CP-FR", "CP297");
fIANA2JavaMap.put("IBM297", "CP297");
fIANA2JavaMap.put("CP297", "CP297");
fIANA2JavaMap.put("CSIBM297", "CP297");
fIANA2JavaMap.put("EBCDIC-CP-AR1", "CP420");
fIANA2JavaMap.put("IBM420", "CP420");
fIANA2JavaMap.put("CP420", "CP420");
fIANA2JavaMap.put("CSIBM420", "CP420");
fIANA2JavaMap.put("EBCDIC-CP-HE", "CP424");
fIANA2JavaMap.put("IBM424", "CP424");
fIANA2JavaMap.put("CP424", "CP424");
fIANA2JavaMap.put("CSIBM424", "CP424");
fIANA2JavaMap.put("IBM437", "CP437");
fIANA2JavaMap.put("437", "CP437");
fIANA2JavaMap.put("CP437", "CP437");
fIANA2JavaMap.put("CSPC8CODEPAGE437", "CP437");
fIANA2JavaMap.put("EBCDIC-CP-CH", "CP500");
fIANA2JavaMap.put("IBM500", "CP500");
fIANA2JavaMap.put("CP500", "CP500");
fIANA2JavaMap.put("CSIBM500", "CP500");
fIANA2JavaMap.put("EBCDIC-CP-CH", "CP500");
fIANA2JavaMap.put("EBCDIC-CP-BE", "CP500");
fIANA2JavaMap.put("IBM775", "CP775");
fIANA2JavaMap.put("CP775", "CP775");
fIANA2JavaMap.put("CSPC775BALTIC", "CP775");
fIANA2JavaMap.put("IBM850", "CP850");
fIANA2JavaMap.put("850", "CP850");
fIANA2JavaMap.put("CP850", "CP850");
fIANA2JavaMap.put("CSPC850MULTILINGUAL", "CP850");
fIANA2JavaMap.put("IBM852", "CP852");
fIANA2JavaMap.put("852", "CP852");
fIANA2JavaMap.put("CP852", "CP852");
fIANA2JavaMap.put("CSPCP852", "CP852");
fIANA2JavaMap.put("IBM855", "CP855");
fIANA2JavaMap.put("855", "CP855");
fIANA2JavaMap.put("CP855", "CP855");
fIANA2JavaMap.put("CSIBM855", "CP855");
fIANA2JavaMap.put("IBM857", "CP857");
fIANA2JavaMap.put("857", "CP857");
fIANA2JavaMap.put("CP857", "CP857");
fIANA2JavaMap.put("CSIBM857", "CP857");
fIANA2JavaMap.put("IBM00858", "CP858");
fIANA2JavaMap.put("CP00858", "CP858");
fIANA2JavaMap.put("CCSID00858", "CP858");
fIANA2JavaMap.put("IBM860", "CP860");
fIANA2JavaMap.put("860", "CP860");
fIANA2JavaMap.put("CP860", "CP860");
fIANA2JavaMap.put("CSIBM860", "CP860");
fIANA2JavaMap.put("IBM861", "CP861");
fIANA2JavaMap.put("861", "CP861");
fIANA2JavaMap.put("CP861", "CP861");
fIANA2JavaMap.put("CP-IS", "CP861");
fIANA2JavaMap.put("CSIBM861", "CP861");
fIANA2JavaMap.put("IBM862", "CP862");
fIANA2JavaMap.put("862", "CP862");
fIANA2JavaMap.put("CP862", "CP862");
fIANA2JavaMap.put("CSPC862LATINHEBREW", "CP862");
fIANA2JavaMap.put("IBM863", "CP863");
fIANA2JavaMap.put("863", "CP863");
fIANA2JavaMap.put("CP863", "CP863");
fIANA2JavaMap.put("CSIBM863", "CP863");
fIANA2JavaMap.put("IBM864", "CP864");
fIANA2JavaMap.put("CP864", "CP864");
fIANA2JavaMap.put("CSIBM864", "CP864");
fIANA2JavaMap.put("IBM865", "CP865");
fIANA2JavaMap.put("865", "CP865");
fIANA2JavaMap.put("CP865", "CP865");
fIANA2JavaMap.put("CSIBM865", "CP865");
fIANA2JavaMap.put("IBM866", "CP866");
fIANA2JavaMap.put("866", "CP866");
fIANA2JavaMap.put("CP866", "CP866");
fIANA2JavaMap.put("CSIBM866", "CP866");
fIANA2JavaMap.put("IBM868", "CP868");
fIANA2JavaMap.put("CP868", "CP868");
fIANA2JavaMap.put("CSIBM868", "CP868");
fIANA2JavaMap.put("CP-AR", "CP868");
fIANA2JavaMap.put("IBM869", "CP869");
fIANA2JavaMap.put("CP869", "CP869");
fIANA2JavaMap.put("CSIBM869", "CP869");
fIANA2JavaMap.put("CP-GR", "CP869");
fIANA2JavaMap.put("IBM870", "CP870");
fIANA2JavaMap.put("CP870", "CP870");
fIANA2JavaMap.put("CSIBM870", "CP870");
fIANA2JavaMap.put("EBCDIC-CP-ROECE", "CP870");
fIANA2JavaMap.put("EBCDIC-CP-YU", "CP870");
fIANA2JavaMap.put("IBM871", "CP871");
fIANA2JavaMap.put("CP871", "CP871");
fIANA2JavaMap.put("CSIBM871", "CP871");
fIANA2JavaMap.put("EBCDIC-CP-IS", "CP871");
fIANA2JavaMap.put("IBM918", "CP918");
fIANA2JavaMap.put("CP918", "CP918");
fIANA2JavaMap.put("CSIBM918", "CP918");
fIANA2JavaMap.put("EBCDIC-CP-AR2", "CP918");
fIANA2JavaMap.put("IBM00924", "CP924");
fIANA2JavaMap.put("CP00924", "CP924");
fIANA2JavaMap.put("CCSID00924", "CP924");
// is this an error???
fIANA2JavaMap.put("EBCDIC-LATIN9--EURO", "CP924");
fIANA2JavaMap.put("IBM1026", "CP1026");
fIANA2JavaMap.put("CP1026", "CP1026");
fIANA2JavaMap.put("CSIBM1026", "CP1026");
fIANA2JavaMap.put("IBM01140", "Cp1140");
fIANA2JavaMap.put("CP01140", "Cp1140");
fIANA2JavaMap.put("CCSID01140", "Cp1140");
fIANA2JavaMap.put("IBM01141", "Cp1141");
fIANA2JavaMap.put("CP01141", "Cp1141");
fIANA2JavaMap.put("CCSID01141", "Cp1141");
fIANA2JavaMap.put("IBM01142", "Cp1142");
fIANA2JavaMap.put("CP01142", "Cp1142");
fIANA2JavaMap.put("CCSID01142", "Cp1142");
fIANA2JavaMap.put("IBM01143", "Cp1143");
fIANA2JavaMap.put("CP01143", "Cp1143");
fIANA2JavaMap.put("CCSID01143", "Cp1143");
fIANA2JavaMap.put("IBM01144", "Cp1144");
fIANA2JavaMap.put("CP01144", "Cp1144");
fIANA2JavaMap.put("CCSID01144", "Cp1144");
fIANA2JavaMap.put("IBM01145", "Cp1145");
fIANA2JavaMap.put("CP01145", "Cp1145");
fIANA2JavaMap.put("CCSID01145", "Cp1145");
fIANA2JavaMap.put("IBM01146", "Cp1146");
fIANA2JavaMap.put("CP01146", "Cp1146");
fIANA2JavaMap.put("CCSID01146", "Cp1146");
fIANA2JavaMap.put("IBM01147", "Cp1147");
fIANA2JavaMap.put("CP01147", "Cp1147");
fIANA2JavaMap.put("CCSID01147", "Cp1147");
fIANA2JavaMap.put("IBM01148", "Cp1148");
fIANA2JavaMap.put("CP01148", "Cp1148");
fIANA2JavaMap.put("CCSID01148", "Cp1148");
fIANA2JavaMap.put("IBM01149", "Cp1149");
fIANA2JavaMap.put("CP01149", "Cp1149");
fIANA2JavaMap.put("CCSID01149", "Cp1149");
fIANA2JavaMap.put("EUC-JP", "EUCJIS");
fIANA2JavaMap.put("CSEUCPKDFMTJAPANESE", "EUCJIS");
fIANA2JavaMap.put("EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE", "EUCJIS");
fIANA2JavaMap.put("EUC-KR", "KSC5601");
fIANA2JavaMap.put("CSEUCKR", "KSC5601");
fIANA2JavaMap.put("KS_C_5601-1987", "KS_C_5601-1987");
fIANA2JavaMap.put("ISO-IR-149", "KS_C_5601-1987");
fIANA2JavaMap.put("KS_C_5601-1989", "KS_C_5601-1987");
fIANA2JavaMap.put("KSC_5601", "KS_C_5601-1987");
fIANA2JavaMap.put("KOREAN", "KS_C_5601-1987");
fIANA2JavaMap.put("CSKSC56011987", "KS_C_5601-1987");
fIANA2JavaMap.put("GB2312", "GB2312");
fIANA2JavaMap.put("CSGB2312", "GB2312");
fIANA2JavaMap.put("ISO-2022-JP", "JIS");
fIANA2JavaMap.put("CSISO2022JP", "JIS");
fIANA2JavaMap.put("ISO-2022-KR", "ISO2022KR");
fIANA2JavaMap.put("CSISO2022KR", "ISO2022KR");
fIANA2JavaMap.put("ISO-2022-CN", "ISO2022CN");
fIANA2JavaMap.put("X0201", "JIS0201");
fIANA2JavaMap.put("CSISO13JISC6220JP", "JIS0201");
fIANA2JavaMap.put("X0208", "JIS0208");
fIANA2JavaMap.put("ISO-IR-87", "JIS0208");
fIANA2JavaMap.put("X0208dbiJIS_X0208-1983", "JIS0208");
fIANA2JavaMap.put("CSISO87JISX0208", "JIS0208");
fIANA2JavaMap.put("X0212", "JIS0212");
fIANA2JavaMap.put("ISO-IR-159", "JIS0212");
fIANA2JavaMap.put("CSISO159JISX02121990", "JIS0212");
fIANA2JavaMap.put("GB18030", "GB18030");
fIANA2JavaMap.put("GBK", "GBK");
fIANA2JavaMap.put("CP936", "GBK");
fIANA2JavaMap.put("MS936", "GBK");
fIANA2JavaMap.put("WINDOWS-936", "GBK");
fIANA2JavaMap.put("SHIFT_JIS", "SJIS");
fIANA2JavaMap.put("CSSHIFTJIS", "SJIS");
fIANA2JavaMap.put("MS_KANJI", "SJIS");
fIANA2JavaMap.put("WINDOWS-31J", "MS932");
fIANA2JavaMap.put("CSWINDOWS31J", "MS932");
// Add support for Cp1252 and its friends
fIANA2JavaMap.put("WINDOWS-1250", "Cp1250");
fIANA2JavaMap.put("WINDOWS-1251", "Cp1251");
fIANA2JavaMap.put("WINDOWS-1252", "Cp1252");
fIANA2JavaMap.put("WINDOWS-1253", "Cp1253");
fIANA2JavaMap.put("WINDOWS-1254", "Cp1254");
fIANA2JavaMap.put("WINDOWS-1255", "Cp1255");
fIANA2JavaMap.put("WINDOWS-1256", "Cp1256");
fIANA2JavaMap.put("WINDOWS-1257", "Cp1257");
fIANA2JavaMap.put("WINDOWS-1258", "Cp1258");
fIANA2JavaMap.put("TIS-620", "TIS620");
fIANA2JavaMap.put("ISO-8859-1", "ISO8859_1");
fIANA2JavaMap.put("ISO-IR-100", "ISO8859_1");
fIANA2JavaMap.put("ISO_8859-1", "ISO8859_1");
fIANA2JavaMap.put("LATIN1", "ISO8859_1");
fIANA2JavaMap.put("CSISOLATIN1", "ISO8859_1");
fIANA2JavaMap.put("L1", "ISO8859_1");
fIANA2JavaMap.put("IBM819", "ISO8859_1");
fIANA2JavaMap.put("CP819", "ISO8859_1");
fIANA2JavaMap.put("ISO-8859-2", "ISO8859_2");
fIANA2JavaMap.put("ISO-IR-101", "ISO8859_2");
fIANA2JavaMap.put("ISO_8859-2", "ISO8859_2");
fIANA2JavaMap.put("LATIN2", "ISO8859_2");
fIANA2JavaMap.put("CSISOLATIN2", "ISO8859_2");
fIANA2JavaMap.put("L2", "ISO8859_2");
fIANA2JavaMap.put("ISO-8859-3", "ISO8859_3");
fIANA2JavaMap.put("ISO-IR-109", "ISO8859_3");
fIANA2JavaMap.put("ISO_8859-3", "ISO8859_3");
fIANA2JavaMap.put("LATIN3", "ISO8859_3");
fIANA2JavaMap.put("CSISOLATIN3", "ISO8859_3");
fIANA2JavaMap.put("L3", "ISO8859_3");
fIANA2JavaMap.put("ISO-8859-4", "ISO8859_4");
fIANA2JavaMap.put("ISO-IR-110", "ISO8859_4");
fIANA2JavaMap.put("ISO_8859-4", "ISO8859_4");
fIANA2JavaMap.put("LATIN4", "ISO8859_4");
fIANA2JavaMap.put("CSISOLATIN4", "ISO8859_4");
fIANA2JavaMap.put("L4", "ISO8859_4");
fIANA2JavaMap.put("ISO-8859-5", "ISO8859_5");
fIANA2JavaMap.put("ISO-IR-144", "ISO8859_5");
fIANA2JavaMap.put("ISO_8859-5", "ISO8859_5");
fIANA2JavaMap.put("CYRILLIC", "ISO8859_5");
fIANA2JavaMap.put("CSISOLATINCYRILLIC", "ISO8859_5");
fIANA2JavaMap.put("ISO-8859-6", "ISO8859_6");
fIANA2JavaMap.put("ISO-IR-127", "ISO8859_6");
fIANA2JavaMap.put("ISO_8859-6", "ISO8859_6");
fIANA2JavaMap.put("ECMA-114", "ISO8859_6");
fIANA2JavaMap.put("ASMO-708", "ISO8859_6");
fIANA2JavaMap.put("ARABIC", "ISO8859_6");
fIANA2JavaMap.put("CSISOLATINARABIC", "ISO8859_6");
fIANA2JavaMap.put("ISO-8859-7", "ISO8859_7");
fIANA2JavaMap.put("ISO-IR-126", "ISO8859_7");
fIANA2JavaMap.put("ISO_8859-7", "ISO8859_7");
fIANA2JavaMap.put("ELOT_928", "ISO8859_7");
fIANA2JavaMap.put("ECMA-118", "ISO8859_7");
fIANA2JavaMap.put("GREEK", "ISO8859_7");
fIANA2JavaMap.put("CSISOLATINGREEK", "ISO8859_7");
fIANA2JavaMap.put("GREEK8", "ISO8859_7");
fIANA2JavaMap.put("ISO-8859-8", "ISO8859_8");
fIANA2JavaMap.put("ISO-8859-8-I", "ISO8859_8"); // added since this encoding
// only differs w.r.t.
// presentation
fIANA2JavaMap.put("ISO-IR-138", "ISO8859_8");
fIANA2JavaMap.put("ISO_8859-8", "ISO8859_8");
fIANA2JavaMap.put("HEBREW", "ISO8859_8");
fIANA2JavaMap.put("CSISOLATINHEBREW", "ISO8859_8");
fIANA2JavaMap.put("ISO-8859-9", "ISO8859_9");
fIANA2JavaMap.put("ISO-IR-148", "ISO8859_9");
fIANA2JavaMap.put("ISO_8859-9", "ISO8859_9");
fIANA2JavaMap.put("LATIN5", "ISO8859_9");
fIANA2JavaMap.put("CSISOLATIN5", "ISO8859_9");
fIANA2JavaMap.put("L5", "ISO8859_9");
fIANA2JavaMap.put("ISO-8859-13", "ISO8859_13");
fIANA2JavaMap.put("ISO-8859-15", "ISO8859_15_FDIS");
fIANA2JavaMap.put("ISO_8859-15", "ISO8859_15_FDIS");
fIANA2JavaMap.put("LATIN-9", "ISO8859_15_FDIS");
fIANA2JavaMap.put("KOI8-R", "KOI8_R");
fIANA2JavaMap.put("CSKOI8R", "KOI8_R");
fIANA2JavaMap.put("US-ASCII", "ASCII");
fIANA2JavaMap.put("ISO-IR-6", "ASCII");
fIANA2JavaMap.put("ANSI_X3.4-1968", "ASCII");
fIANA2JavaMap.put("ANSI_X3.4-1986", "ASCII");
fIANA2JavaMap.put("ISO_646.IRV:1991", "ASCII");
fIANA2JavaMap.put("ASCII", "ASCII");
fIANA2JavaMap.put("CSASCII", "ASCII");
fIANA2JavaMap.put("ISO646-US", "ASCII");
fIANA2JavaMap.put("US", "ASCII");
fIANA2JavaMap.put("IBM367", "ASCII");
fIANA2JavaMap.put("CP367", "ASCII");
fIANA2JavaMap.put("UTF-8", "UTF8");
fIANA2JavaMap.put("UTF-16", "UTF-16");
fIANA2JavaMap.put("UTF-16BE", "UnicodeBig");
fIANA2JavaMap.put("UTF-16LE", "UnicodeLittle");
// support for 1047, as proposed to be added to the
// IANA registry in
// http://lists.w3.org/Archives/Public/ietf-charset/2002JulSep/0049.html
fIANA2JavaMap.put("IBM-1047", "Cp1047");
fIANA2JavaMap.put("IBM1047", "Cp1047");
fIANA2JavaMap.put("CP1047", "Cp1047");
// Adding new aliases as proposed in
// http://lists.w3.org/Archives/Public/ietf-charset/2002JulSep/0058.html
fIANA2JavaMap.put("IBM-37", "CP037");
fIANA2JavaMap.put("IBM-273", "CP273");
fIANA2JavaMap.put("IBM-277", "CP277");
fIANA2JavaMap.put("IBM-278", "CP278");
fIANA2JavaMap.put("IBM-280", "CP280");
fIANA2JavaMap.put("IBM-284", "CP284");
fIANA2JavaMap.put("IBM-285", "CP285");
fIANA2JavaMap.put("IBM-290", "CP290");
fIANA2JavaMap.put("IBM-297", "CP297");
fIANA2JavaMap.put("IBM-420", "CP420");
fIANA2JavaMap.put("IBM-424", "CP424");
fIANA2JavaMap.put("IBM-437", "CP437");
fIANA2JavaMap.put("IBM-500", "CP500");
fIANA2JavaMap.put("IBM-775", "CP775");
fIANA2JavaMap.put("IBM-850", "CP850");
fIANA2JavaMap.put("IBM-852", "CP852");
fIANA2JavaMap.put("IBM-855", "CP855");
fIANA2JavaMap.put("IBM-857", "CP857");
fIANA2JavaMap.put("IBM-858", "CP858");
fIANA2JavaMap.put("IBM-860", "CP860");
fIANA2JavaMap.put("IBM-861", "CP861");
fIANA2JavaMap.put("IBM-862", "CP862");
fIANA2JavaMap.put("IBM-863", "CP863");
fIANA2JavaMap.put("IBM-864", "CP864");
fIANA2JavaMap.put("IBM-865", "CP865");
fIANA2JavaMap.put("IBM-866", "CP866");
fIANA2JavaMap.put("IBM-868", "CP868");
fIANA2JavaMap.put("IBM-869", "CP869");
fIANA2JavaMap.put("IBM-870", "CP870");
fIANA2JavaMap.put("IBM-871", "CP871");
fIANA2JavaMap.put("IBM-918", "CP918");
fIANA2JavaMap.put("IBM-924", "CP924");
fIANA2JavaMap.put("IBM-1026", "CP1026");
fIANA2JavaMap.put("IBM-1140", "Cp1140");
fIANA2JavaMap.put("IBM-1141", "Cp1141");
fIANA2JavaMap.put("IBM-1142", "Cp1142");
fIANA2JavaMap.put("IBM-1143", "Cp1143");
fIANA2JavaMap.put("IBM-1144", "Cp1144");
fIANA2JavaMap.put("IBM-1145", "Cp1145");
fIANA2JavaMap.put("IBM-1146", "Cp1146");
fIANA2JavaMap.put("IBM-1147", "Cp1147");
fIANA2JavaMap.put("IBM-1148", "Cp1148");
fIANA2JavaMap.put("IBM-1149", "Cp1149");
fIANA2JavaMap.put("IBM-819", "ISO8859_1");
fIANA2JavaMap.put("IBM-367", "ASCII");
// REVISIT:
// j:CNS11643 -> EUC-TW?
// ISO-2022-CN? ISO-2022-CN-EXT?
// add Java to IANA encoding mappings
// fJava2IANAMap.put("8859_1", "US-ASCII"); // ?
fJava2IANAMap.put("ISO8859_1", "ISO-8859-1");
fJava2IANAMap.put("ISO8859_2", "ISO-8859-2");
fJava2IANAMap.put("ISO8859_3", "ISO-8859-3");
fJava2IANAMap.put("ISO8859_4", "ISO-8859-4");
fJava2IANAMap.put("ISO8859_5", "ISO-8859-5");
fJava2IANAMap.put("ISO8859_6", "ISO-8859-6");
fJava2IANAMap.put("ISO8859_7", "ISO-8859-7");
fJava2IANAMap.put("ISO8859_8", "ISO-8859-8");
fJava2IANAMap.put("ISO8859_9", "ISO-8859-9");
fJava2IANAMap.put("ISO8859_13", "ISO-8859-13");
fJava2IANAMap.put("ISO8859_15", "ISO-8859-15");
fJava2IANAMap.put("ISO8859_15_FDIS", "ISO-8859-15");
fJava2IANAMap.put("Big5", "BIG5");
fJava2IANAMap.put("CP037", "EBCDIC-CP-US");
fJava2IANAMap.put("CP273", "IBM273");
fJava2IANAMap.put("CP277", "EBCDIC-CP-DK");
fJava2IANAMap.put("CP278", "EBCDIC-CP-FI");
fJava2IANAMap.put("CP280", "EBCDIC-CP-IT");
fJava2IANAMap.put("CP284", "EBCDIC-CP-ES");
fJava2IANAMap.put("CP285", "EBCDIC-CP-GB");
fJava2IANAMap.put("CP290", "EBCDIC-JP-KANA");
fJava2IANAMap.put("CP297", "EBCDIC-CP-FR");
fJava2IANAMap.put("CP420", "EBCDIC-CP-AR1");
fJava2IANAMap.put("CP424", "EBCDIC-CP-HE");
fJava2IANAMap.put("CP437", "IBM437");
fJava2IANAMap.put("CP500", "EBCDIC-CP-CH");
fJava2IANAMap.put("CP775", "IBM775");
fJava2IANAMap.put("CP850", "IBM850");
fJava2IANAMap.put("CP852", "IBM852");
fJava2IANAMap.put("CP855", "IBM855");
fJava2IANAMap.put("CP857", "IBM857");
fJava2IANAMap.put("CP858", "IBM00858");
fJava2IANAMap.put("CP860", "IBM860");
fJava2IANAMap.put("CP861", "IBM861");
fJava2IANAMap.put("CP862", "IBM862");
fJava2IANAMap.put("CP863", "IBM863");
fJava2IANAMap.put("CP864", "IBM864");
fJava2IANAMap.put("CP865", "IBM865");
fJava2IANAMap.put("CP866", "IBM866");
fJava2IANAMap.put("CP868", "IBM868");
fJava2IANAMap.put("CP869", "IBM869");
fJava2IANAMap.put("CP870", "EBCDIC-CP-ROECE");
fJava2IANAMap.put("CP871", "EBCDIC-CP-IS");
fJava2IANAMap.put("CP918", "EBCDIC-CP-AR2");
fJava2IANAMap.put("CP924", "IBM00924");
fJava2IANAMap.put("CP1026", "IBM1026");
fJava2IANAMap.put("CP1140", "IBM01140");
fJava2IANAMap.put("CP1141", "IBM01141");
fJava2IANAMap.put("CP1142", "IBM01142");
fJava2IANAMap.put("CP1143", "IBM01143");
fJava2IANAMap.put("CP1144", "IBM01144");
fJava2IANAMap.put("CP1145", "IBM01145");
fJava2IANAMap.put("CP1146", "IBM01146");
fJava2IANAMap.put("CP1147", "IBM01147");
fJava2IANAMap.put("CP1148", "IBM01148");
fJava2IANAMap.put("CP1149", "IBM01149");
fJava2IANAMap.put("EUCJIS", "EUC-JP");
fJava2IANAMap.put("KS_C_5601-1987", "KS_C_5601-1987");
fJava2IANAMap.put("GB2312", "GB2312");
fJava2IANAMap.put("ISO2022KR", "ISO-2022-KR");
fJava2IANAMap.put("ISO2022CN", "ISO-2022-CN");
fJava2IANAMap.put("JIS", "ISO-2022-JP");
fJava2IANAMap.put("KOI8_R", "KOI8-R");
fJava2IANAMap.put("KSC5601", "EUC-KR");
fJava2IANAMap.put("GB18030", "GB18030");
fJava2IANAMap.put("GBK", "GBK");
fJava2IANAMap.put("SJIS", "SHIFT_JIS");
fJava2IANAMap.put("MS932", "WINDOWS-31J");
fJava2IANAMap.put("UTF8", "UTF-8");
fJava2IANAMap.put("Unicode", "UTF-16");
fJava2IANAMap.put("UnicodeBig", "UTF-16BE");
fJava2IANAMap.put("UnicodeLittle", "UTF-16LE");
fJava2IANAMap.put("JIS0201", "X0201");
fJava2IANAMap.put("JIS0208", "X0208");
fJava2IANAMap.put("JIS0212", "ISO-IR-159");
// proposed addition (see above for details):
fJava2IANAMap.put("CP1047", "IBM1047");
} // ()
//
// Constructors
//
/** Default constructor. */
public EncodingMap() {
}
//
// Public static methods
//
/**
* Adds an IANA to Java encoding name mapping.
*
* @param ianaEncoding
* The IANA encoding name.
* @param javaEncoding
* The Java encoding name.
*
* @deprecated Use of this method is not recommended. Its effect is JVM wide
* and may cause unforeseen behaviour for other applications
* running in the system.
*/
public static void putIANA2JavaMapping(String ianaEncoding, String javaEncoding) {
fIANA2JavaMap.put(ianaEncoding, javaEncoding);
} // putIANA2JavaMapping(String,String)
/**
* Returns the Java encoding name for the specified IANA encoding name.
*
* @param ianaEncoding
* The IANA encoding name.
*/
public static String getIANA2JavaMapping(String ianaEncoding) {
return (String) fIANA2JavaMap.get(ianaEncoding);
} // getIANA2JavaMapping(String):String
/**
* Removes an IANA to Java encoding name mapping.
*
* @param ianaEncoding
* The IANA encoding name.
*
* @deprecated Use of this method is not recommended. Its effect is JVM wide
* and may cause unforeseen behaviour for other applications
* running in the system.
*/
public static String removeIANA2JavaMapping(String ianaEncoding) {
return (String) fIANA2JavaMap.remove(ianaEncoding);
} // removeIANA2JavaMapping(String):String
/**
* Adds a Java to IANA encoding name mapping.
*
* @param javaEncoding
* The Java encoding name.
* @param ianaEncoding
* The IANA encoding name.
*
* @deprecated Use of this method is not recommended. Its effect is JVM wide
* and may cause unforeseen behaviour for other applications
* running in the system.
*/
public static void putJava2IANAMapping(String javaEncoding, String ianaEncoding) {
fJava2IANAMap.put(javaEncoding, ianaEncoding);
} // putJava2IANAMapping(String,String)
/**
* Returns the IANA encoding name for the specified Java encoding name.
*
* @param javaEncoding
* The Java encoding name.
*/
public static String getJava2IANAMapping(String javaEncoding) {
return (String) fJava2IANAMap.get(javaEncoding);
} // getJava2IANAMapping(String):String
/**
* Removes a Java to IANA encoding name mapping.
*
* @param javaEncoding
* The Java encoding name.
*
* @deprecated Use of this method is not recommended. Its effect is JVM wide
* and may cause unforeseen behaviour for other applications
* running in the system.
*/
public static String removeJava2IANAMapping(String javaEncoding) {
return (String) fJava2IANAMap.remove(javaEncoding);
} // removeJava2IANAMapping
} // class EncodingMap