Coverage Report - net.sf.jmatchparser.util.charset.juniversalchardet.JUniversalChardetCharset
 
Classes in this File Line Coverage Branch Coverage Complexity
JUniversalChardetCharset
70%
7/10
50%
2/4
2,6
JUniversalChardetCharset$Decoder
87%
61/70
70%
21/30
2,6
 
 1  
 /* ***** BEGIN LICENSE BLOCK *****
 2  
  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 3  
  *
 4  
  * The contents of this file are subject to the Mozilla Public License Version
 5  
  * 1.1 (the "License"); you may not use this file except in compliance with
 6  
  * the License. You may obtain a copy of the License at
 7  
  * http://www.mozilla.org/MPL/
 8  
  *
 9  
  * Software distributed under the License is distributed on an "AS IS" basis,
 10  
  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 11  
  * for the specific language governing rights and limitations under the
 12  
  * License.
 13  
  *
 14  
  * The Original Code is mozilla.org code.
 15  
  *
 16  
  * The Initial Developer of the Original Code is
 17  
  * Netscape Communications Corporation.
 18  
  * Portions created by the Initial Developer are Copyright (C) 1998
 19  
  * the Initial Developer. All Rights Reserved.
 20  
  *
 21  
  * Alternatively, the contents of this file may be used under the terms of
 22  
  * either of the GNU General Public License Version 2 or later (the "GPL"),
 23  
  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 24  
  * in which case the provisions of the GPL or the LGPL are applicable instead
 25  
  * of those above. If you wish to allow use of your version of this file only
 26  
  * under the terms of either the GPL or the LGPL, and not to allow others to
 27  
  * use your version of this file under the terms of the MPL, indicate your
 28  
  * decision by deleting the provisions above and replace them with the notice
 29  
  * and other provisions required by the GPL or the LGPL. If you do not delete
 30  
  * the provisions above, a recipient may use your version of this file under
 31  
  * the terms of any one of the MPL, the GPL or the LGPL.
 32  
  *
 33  
  * ***** END LICENSE BLOCK ***** */
 34  
 package net.sf.jmatchparser.util.charset.juniversalchardet;
 35  
 
 36  
 import java.io.ByteArrayOutputStream;
 37  
 import java.nio.ByteBuffer;
 38  
 import java.nio.CharBuffer;
 39  
 import java.nio.charset.Charset;
 40  
 import java.nio.charset.CharsetDecoder;
 41  
 import java.nio.charset.CharsetEncoder;
 42  
 import java.nio.charset.CoderResult;
 43  
 import java.nio.charset.UnsupportedCharsetException;
 44  
 
 45  
 import org.mozilla.universalchardet.UniversalDetector;
 46  
 
 47  
 class JUniversalChardetCharset extends Charset {
 48  
 
 49  
         public static final String NAME = "jUniversalChardet";
 50  
 
 51  1
         private static JUniversalChardetCharset instance = null;
 52  
 
 53  
         protected static JUniversalChardetCharset getInstance() {
 54  66
                 if (instance == null) {
 55  1
                         instance = new JUniversalChardetCharset();
 56  
                 }
 57  66
                 return instance;
 58  
         }
 59  
 
 60  
         private JUniversalChardetCharset() {
 61  1
                 super(NAME, null);
 62  1
         }
 63  
 
 64  
         @Override
 65  
         public boolean contains(Charset cs) {
 66  0
                 return cs == this;
 67  
         }
 68  
 
 69  
         @Override
 70  
         public CharsetDecoder newDecoder() {
 71  67
                 return new Decoder();
 72  
         }
 73  
 
 74  
         @Override
 75  
         public CharsetEncoder newEncoder() {
 76  0
                 throw new UnsupportedOperationException();
 77  
         }
 78  
 
 79  
         @Override
 80  
         public boolean canEncode() {
 81  0
                 return false;
 82  
         }
 83  
 
 84  
         private class Decoder extends CharsetDecoder {
 85  
                 private UniversalDetector detector;
 86  67
                 private ByteArrayOutputStream buffer = new ByteArrayOutputStream();
 87  
 
 88  67
                 private CharsetDecoder usedDecoder = null;
 89  67
                 private ByteBuffer remaining = null;
 90  67
                 private boolean isFlushed = false;
 91  
 
 92  67
                 protected Decoder() {
 93  67
                         super(JUniversalChardetCharset.this, 1.0f, 2.0f);
 94  67
                         detector = new UniversalDetector(null);
 95  67
                 }
 96  
 
 97  
                 private void setCharset(String charset) {
 98  
                         try {
 99  66
                                 usedDecoder = Charset.forName(charset).newDecoder();
 100  0
                         } catch (UnsupportedCharsetException ex) {
 101  0
                                 usedDecoder = Charset.forName("ISO-8859-1").newDecoder();
 102  66
                         }
 103  66
                         usedDecoder.onUnmappableCharacter(unmappableCharacterAction());
 104  66
                         usedDecoder.onMalformedInput(malformedInputAction());
 105  66
                         byte[] buf = buffer.toByteArray();
 106  66
                         if (buf.length > 0) {
 107  66
                                 remaining = ByteBuffer.allocate(buf.length);
 108  66
                                 remaining.put(buf);
 109  66
                                 remaining.flip();
 110  
                         }
 111  66
                         buffer = null;
 112  66
                         detector = null;
 113  66
                 }
 114  
 
 115  
                 @Override
 116  
                 protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
 117  66
                         byte[] buf = new byte[1024];
 118  
                         while (true) {
 119  2720
                                 if (usedDecoder != null) {
 120  26
                                         if (remaining != null) {
 121  26
                                                 CoderResult result = writeOut(out);
 122  26
                                                 if (!result.isUnderflow())
 123  0
                                                         return result;
 124  
                                         }
 125  26
                                         return usedDecoder.decode(in, out, false);
 126  
                                 }
 127  2694
                                 if (in.remaining() == 0)
 128  40
                                         return CoderResult.UNDERFLOW;
 129  2654
                                 int len = Math.min(in.remaining(), buf.length);
 130  2654
                                 in.get(buf, 0, len);
 131  2654
                                 buffer.write(buf, 0, len);
 132  2654
                                 detector.handleData(buf, 0, len);
 133  2654
                                 if (detector.isDone()) {
 134  26
                                         detector.dataEnd();
 135  26
                                         setCharset(detector.getDetectedCharset());
 136  
                                 }
 137  2654
                         }
 138  
                 }
 139  
 
 140  
                 @Override
 141  
                 protected CoderResult implFlush(CharBuffer out) {
 142  66
                         if (usedDecoder == null) {
 143  40
                                 detector.dataEnd();
 144  40
                                 setCharset(detector.getDetectedCharset() == null ? "ISO-8859-1" : detector.getDetectedCharset());
 145  
                         }
 146  66
                         if (remaining != null) {
 147  46
                                 CoderResult result = writeOut(out);
 148  46
                                 if (!result.isUnderflow())
 149  0
                                         return result;
 150  
                         }
 151  66
                         if (!isFlushed) {
 152  66
                                 ByteBuffer empty = ByteBuffer.allocate(1);
 153  66
                                 empty.flip();
 154  66
                                 CoderResult result = usedDecoder.decode(empty, out, true);
 155  66
                                 if (!result.isUnderflow())
 156  0
                                         return result;
 157  66
                                 result = usedDecoder.flush(out);
 158  66
                                 if (!result.isUnderflow())
 159  0
                                         return result;
 160  66
                                 isFlushed = true;
 161  
                         }
 162  66
                         return super.implFlush(out);
 163  
                 }
 164  
 
 165  
                 private CoderResult writeOut(CharBuffer out) {
 166  72
                         CoderResult result = usedDecoder.decode(remaining, out, false);
 167  72
                         if (remaining.remaining() == 0)
 168  62
                                 remaining = null;
 169  72
                         return result;
 170  
                 }
 171  
 
 172  
                 @Override
 173  
                 protected void implReset() {
 174  66
                         detector = new UniversalDetector(null);
 175  66
                         buffer = new ByteArrayOutputStream();
 176  66
                         usedDecoder = null;
 177  66
                         remaining = null;
 178  66
                         isFlushed = false;
 179  66
                 }
 180  
 
 181  
                 @Override
 182  
                 public boolean isAutoDetecting() {
 183  0
                         return true;
 184  
                 }
 185  
 
 186  
                 @Override
 187  
                 public boolean isCharsetDetected() {
 188  0
                         return usedDecoder != null;
 189  
                 }
 190  
 
 191  
                 @Override
 192  
                 public Charset detectedCharset() {
 193  0
                         return usedDecoder.charset();
 194  
                 }
 195  
         }
 196  
 }