Coverage Report - net.sf.jmatchparser.util.charset.jchardet.JChardetCharset
 
Classes in this File Line Coverage Branch Coverage Complexity
JChardetCharset
76%
10/13
50%
2/4
2,812
JChardetCharset$Decoder
81%
68/83
50%
20/40
2,812
 
 1  
 /* ***** BEGIN LICENSE BLOCK *****
 2  
  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 3  
  *
 4  
  * The contents of this file are subject to the Mozilla Public License Version
 5  
  * 1.1 (the "License"); you may not use this file except in compliance with
 6  
  * the License. You may obtain a copy of the License at
 7  
  * http://www.mozilla.org/MPL/
 8  
  *
 9  
  * Software distributed under the License is distributed on an "AS IS" basis,
 10  
  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 11  
  * for the specific language governing rights and limitations under the
 12  
  * License.
 13  
  *
 14  
  * The Original Code is mozilla.org code.
 15  
  *
 16  
  * The Initial Developer of the Original Code is
 17  
  * Netscape Communications Corporation.
 18  
  * Portions created by the Initial Developer are Copyright (C) 1998
 19  
  * the Initial Developer. All Rights Reserved.
 20  
  *
 21  
  * Alternatively, the contents of this file may be used under the terms of
 22  
  * either of the GNU General Public License Version 2 or later (the "GPL"),
 23  
  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 24  
  * in which case the provisions of the GPL or the LGPL are applicable instead
 25  
  * of those above. If you wish to allow use of your version of this file only
 26  
  * under the terms of either the GPL or the LGPL, and not to allow others to
 27  
  * use your version of this file under the terms of the MPL, indicate your
 28  
  * decision by deleting the provisions above and replace them with the notice
 29  
  * and other provisions required by the GPL or the LGPL. If you do not delete
 30  
  * the provisions above, a recipient may use your version of this file under
 31  
  * the terms of any one of the MPL, the GPL or the LGPL.
 32  
  *
 33  
  * ***** END LICENSE BLOCK ***** */
 34  
 package net.sf.jmatchparser.util.charset.jchardet;
 35  
 
 36  
 import java.io.ByteArrayOutputStream;
 37  
 import java.nio.ByteBuffer;
 38  
 import java.nio.CharBuffer;
 39  
 import java.nio.charset.Charset;
 40  
 import java.nio.charset.CharsetDecoder;
 41  
 import java.nio.charset.CharsetEncoder;
 42  
 import java.nio.charset.CoderResult;
 43  
 import java.nio.charset.UnsupportedCharsetException;
 44  
 
 45  
 import org.mozilla.intl.chardet.nsDetector;
 46  
 import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
 47  
 
 48  61
 class JChardetCharset extends Charset {
 49  
 
 50  1
         public static final String[] NAMES = new String[] {
 51  
                         "jChardet",
 52  
                         "jChardet-JAPANESE",
 53  
                         "jChardet-CHINESE",
 54  
                         "jChardet-SIMPLIFIED-CHINESE",
 55  
                         "jChardet-TRADITIONAL-CHINESE",
 56  
                         "jChardet-KOREAN",
 57  
         };
 58  
 
 59  1
         private static final JChardetCharset[] instances = new JChardetCharset[NAMES.length];
 60  
 
 61  
         protected static JChardetCharset getInstance(int languageFlag) {
 62  28
                 if (instances[languageFlag] == null) {
 63  1
                         instances[languageFlag] = new JChardetCharset(languageFlag);
 64  
                 }
 65  28
                 return instances[languageFlag];
 66  
         }
 67  
 
 68  
         private final int languageFlag;
 69  
 
 70  
         private JChardetCharset(int languageFlag) {
 71  1
                 super(NAMES[languageFlag], null);
 72  1
                 this.languageFlag = languageFlag;
 73  1
         }
 74  
 
 75  
         @Override
 76  
         public boolean contains(Charset cs) {
 77  0
                 return cs == this;
 78  
         }
 79  
 
 80  
         @Override
 81  
         public CharsetDecoder newDecoder() {
 82  31
                 return new Decoder();
 83  
         }
 84  
 
 85  
         @Override
 86  
         public CharsetEncoder newEncoder() {
 87  0
                 throw new UnsupportedOperationException();
 88  
         }
 89  
 
 90  
         @Override
 91  
         public boolean canEncode() {
 92  0
                 return false;
 93  
         }
 94  
 
 95  
         private class Decoder extends CharsetDecoder implements nsICharsetDetectionObserver {
 96  
                 private nsDetector detector;
 97  31
                 private ByteArrayOutputStream buffer = new ByteArrayOutputStream();
 98  31
                 private boolean isASCII = true;
 99  
 
 100  31
                 private CharsetDecoder usedDecoder = null;
 101  31
                 private ByteBuffer remaining = null;
 102  
 
 103  31
                 protected Decoder() {
 104  31
                         super(JChardetCharset.this, 1.0f, 2.0f);
 105  31
                         detector = new nsDetector(languageFlag);
 106  31
                         detector.Init(this);
 107  31
                 }
 108  
 
 109  
                 @Override
 110  
                 public void Notify(String charset) {
 111  0
                         setCharset(charset);
 112  0
                 }
 113  
 
 114  
                 private void setCharset(String charset) {
 115  
                         try {
 116  30
                                 usedDecoder = Charset.forName(charset).newDecoder();
 117  1
                         } catch (UnsupportedCharsetException ex) {
 118  1
                                 usedDecoder = Charset.forName("ISO-8859-1").newDecoder();
 119  29
                         }
 120  30
                         usedDecoder.onUnmappableCharacter(unmappableCharacterAction());
 121  30
                         usedDecoder.onMalformedInput(malformedInputAction());
 122  30
                         byte[] buf = buffer.toByteArray();
 123  30
                         if (buf.length > 0) {
 124  30
                                 remaining = ByteBuffer.allocate(buf.length);
 125  30
                                 remaining.put(buf);
 126  30
                                 remaining.flip();
 127  
                         }
 128  30
                         buffer = null;
 129  30
                         detector = null;
 130  30
                         isASCII = false;
 131  30
                 }
 132  
 
 133  
                 @Override
 134  
                 protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
 135  30
                         byte[] buf = new byte[1024];
 136  
                         while (true) {
 137  1056
                                 if (usedDecoder != null) {
 138  0
                                         if (remaining != null) {
 139  0
                                                 CoderResult result = writeOut(out);
 140  0
                                                 if (!result.isUnderflow())
 141  0
                                                         return result;
 142  
                                         }
 143  0
                                         return usedDecoder.decode(in, out, false);
 144  
                                 }
 145  1056
                                 if (in.remaining() == 0)
 146  30
                                         return CoderResult.UNDERFLOW;
 147  1026
                                 int len = Math.min(in.remaining(), buf.length);
 148  1026
                                 in.get(buf, 0, len);
 149  1026
                                 buffer.write(buf, 0, len);
 150  1026
                                 if (isASCII)
 151  324
                                         isASCII = detector.isAscii(buf, len);
 152  
 
 153  1026
                                 if (!isASCII) {
 154  727
                                         boolean done = detector.DoIt(buf, len, false);
 155  727
                                         done = false;
 156  727
                                         if (done && usedDecoder == null) {
 157  
                                                 // no charset left, fall back
 158  0
                                                 setCharset("ISO-8859-1");
 159  
                                         }
 160  
                                 }
 161  1026
                         }
 162  
                 }
 163  
 
 164  
                 @Override
 165  
                 protected CoderResult implFlush(CharBuffer out) {
 166  30
                         if (usedDecoder == null) {
 167  30
                                 detector.DataEnd();
 168  
                         }
 169  30
                         if (usedDecoder == null) {
 170  30
                                 if (isASCII) {
 171  5
                                         setCharset("US-ASCII");
 172  
                                 } else {
 173  25
                                         String prob[] = detector.getProbableCharsets();
 174  25
                                         if (prob.length > 0) {
 175  25
                                                 setCharset(prob[0]);
 176  
                                         } else {
 177  
                                                 // fallback
 178  0
                                                 setCharset("ISO-8859-1");
 179  
                                         }
 180  
                                 }
 181  
                         }
 182  30
                         if (remaining != null) {
 183  30
                                 CoderResult result = writeOut(out);
 184  30
                                 if (!result.isUnderflow())
 185  0
                                         return result;
 186  
                         }
 187  30
                         if (!isASCII) {
 188  30
                                 ByteBuffer empty = ByteBuffer.allocate(1);
 189  30
                                 empty.flip();
 190  30
                                 CoderResult result = usedDecoder.decode(empty, out, true);
 191  30
                                 if (!result.isUnderflow())
 192  0
                                         return result;
 193  30
                                 result = usedDecoder.flush(out);
 194  30
                                 if (!result.isUnderflow())
 195  0
                                         return result;
 196  30
                                 isASCII = true;
 197  
                         }
 198  30
                         return super.implFlush(out);
 199  
                 }
 200  
 
 201  
                 private CoderResult writeOut(CharBuffer out) {
 202  30
                         CoderResult result = usedDecoder.decode(remaining, out, false);
 203  30
                         if (remaining.remaining() == 0)
 204  30
                                 remaining = null;
 205  30
                         return result;
 206  
                 }
 207  
 
 208  
                 @Override
 209  
                 protected void implReset() {
 210  30
                         detector = new nsDetector(languageFlag);
 211  30
                         buffer = new ByteArrayOutputStream();
 212  30
                         usedDecoder = null;
 213  30
                         remaining = null;
 214  30
                         isASCII = true;
 215  30
                 }
 216  
 
 217  
                 @Override
 218  
                 public boolean isAutoDetecting() {
 219  0
                         return true;
 220  
                 }
 221  
 
 222  
                 @Override
 223  
                 public boolean isCharsetDetected() {
 224  0
                         return usedDecoder != null;
 225  
                 }
 226  
 
 227  
                 @Override
 228  
                 public Charset detectedCharset() {
 229  0
                         return usedDecoder.charset();
 230  
                 }
 231  
         }
 232  
 }