Coverage Report - net.sf.jmatchparser.util.charset.UTFBOMCharset
 
Classes in this File Line Coverage Branch Coverage Complexity
UTFBOMCharset
76%
10/13
33%
2/6
3,714
UTFBOMCharset$Decoder
88%
67/76
76%
43/56
3,714
 
 1  
 /*
 2  
  * Copyright (c) 2010 - 2011 Michael Schierl
 3  
  * 
 4  
  * All rights reserved.
 5  
  * 
 6  
  * Redistribution and use in source and binary forms, with or without
 7  
  * modification, are permitted provided that the following conditions
 8  
  * are met:
 9  
  * 
 10  
  * - Redistributions of source code must retain the above copyright notice,
 11  
  *   this list of conditions and the following disclaimer.
 12  
  *   
 13  
  * - Redistributions in binary form must reproduce the above copyright
 14  
  *   notice, this list of conditions and the following disclaimer in the
 15  
  *   documentation and/or other materials provided with the distribution.
 16  
  *   
 17  
  * - Neither name of the copyright holders nor the names of its
 18  
  *   contributors may be used to endorse or promote products derived from
 19  
  *   this software without specific prior written permission.
 20  
  *   
 21  
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS
 22  
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 23  
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 24  
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 25  
  * HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 26  
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 27  
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 28  
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 29  
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 30  
  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 31  
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 32  
  */
 33  
 package net.sf.jmatchparser.util.charset;
 34  
 
 35  
 import java.nio.ByteBuffer;
 36  
 import java.nio.CharBuffer;
 37  
 import java.nio.charset.Charset;
 38  
 import java.nio.charset.CharsetDecoder;
 39  
 import java.nio.charset.CharsetEncoder;
 40  
 import java.nio.charset.CoderResult;
 41  
 import java.nio.charset.CodingErrorAction;
 42  
 import java.util.Set;
 43  
 
 44  
 class UTFBOMCharset extends Charset {
 45  
 
 46  
         public static final String PREFIX = "UTF-BOM.";
 47  
         private final Charset fallback;
 48  
 
 49  
         protected UTFBOMCharset(Charset fallback) {
 50  6
                 super(PREFIX + fallback.name(), buildAliases(fallback.aliases()));
 51  6
                 this.fallback = fallback;
 52  6
         }
 53  
 
 54  
         private static String[] buildAliases(Set<String> aliases) {
 55  6
                 String[] result = new String[aliases.size()];
 56  6
                 int i = 0;
 57  6
                 for (String alias : aliases) {
 58  84
                         result[i] = PREFIX + alias;
 59  84
                         i++;
 60  
                 }
 61  6
                 return result;
 62  
         }
 63  
 
 64  
         @Override
 65  
         public boolean contains(Charset cs) {
 66  0
                 return cs.name() == this.name() || fallback.contains(cs);
 67  
         }
 68  
 
 69  
         @Override
 70  
         public CharsetDecoder newDecoder() {
 71  6
                 return new Decoder(fallback.newDecoder());
 72  
         }
 73  
 
 74  
         @Override
 75  
         public CharsetEncoder newEncoder() {
 76  0
                 throw new UnsupportedOperationException();
 77  
         }
 78  
 
 79  
         @Override
 80  
         public boolean canEncode() {
 81  0
                 return false;
 82  
         }
 83  
 
 84  
         private class Decoder extends CharsetDecoder {
 85  
 
 86  
                 private final CharsetDecoder fallbackDecoder;
 87  6
                 private CharsetDecoder usedDecoder = null;
 88  6
                 private byte state = 0;
 89  
 
 90  6
                 protected Decoder(CharsetDecoder fallbackDecoder) {
 91  6
                         super(UTFBOMCharset.this, fallbackDecoder.averageCharsPerByte(), fallbackDecoder.maxCharsPerByte() + 3);
 92  6
                         this.fallbackDecoder = fallbackDecoder;
 93  6
                 }
 94  
 
 95  
                 @Override
 96  
                 protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
 97  
                         while (true) {
 98  54
                                 if (usedDecoder != null) {
 99  14
                                         if (state != 0) {
 100  4
                                                 CoderResult result = writeOut(out);
 101  4
                                                 if (!result.isUnderflow())
 102  0
                                                         return result;
 103  
                                         }
 104  14
                                         return usedDecoder.decode(in, out, false);
 105  
                                 }
 106  40
                                 if (in.remaining() == 0)
 107  4
                                         return CoderResult.UNDERFLOW;
 108  36
                                 if (out.remaining() == 0)
 109  0
                                         return CoderResult.OVERFLOW;
 110  36
                                 byte b = in.get();
 111  36
                                 if (state == 0 && (b == (byte) 0xFE || b == (byte) 0xFF || b == (byte) 0xEF)) {
 112  17
                                         state = b;
 113  19
                                 } else if (state == (byte) 0xEF && b == (byte) 0xBB) {
 114  5
                                         state = b;
 115  14
                                 } else if (state == (byte) 0xFE && b == (byte) 0xFF) {
 116  3
                                         state = 0;
 117  3
                                         usedDecoder = Charset.forName("UTF-16BE").newDecoder();
 118  3
                                         usedDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
 119  3
                                         usedDecoder.onMalformedInput(CodingErrorAction.REPORT);
 120  11
                                 } else if (state == (byte) 0xFF && b == (byte) 0xFE) {
 121  3
                                         state = 0;
 122  3
                                         usedDecoder = Charset.forName("UTF-16LE").newDecoder();
 123  3
                                         usedDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
 124  3
                                         usedDecoder.onMalformedInput(CodingErrorAction.REPORT);
 125  8
                                 } else if (state == (byte) 0xBB && b == (byte) 0xBF) {
 126  3
                                         state = 0;
 127  3
                                         usedDecoder = Charset.forName("UTF-8").newDecoder();
 128  3
                                         usedDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
 129  3
                                         usedDecoder.onMalformedInput(CodingErrorAction.REPORT);
 130  
                                 } else {
 131  5
                                         in.position(in.position() - 1);
 132  5
                                         usedDecoder = fallbackDecoder;
 133  5
                                         usedDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
 134  5
                                         usedDecoder.onMalformedInput(CodingErrorAction.REPORT);
 135  
                                 }
 136  36
                         }
 137  
                 }
 138  
 
 139  
                 @Override
 140  
                 protected CoderResult implFlush(CharBuffer out) {
 141  18
                         if (state != 0) {
 142  4
                                 if (usedDecoder == null)
 143  4
                                         usedDecoder = fallbackDecoder;
 144  4
                                 CoderResult result = writeOut(out);
 145  4
                                 if (!result.isUnderflow())
 146  0
                                         return result;
 147  
                         }
 148  18
                         if (usedDecoder != null) {
 149  18
                                 ByteBuffer empty = ByteBuffer.allocate(1);
 150  18
                                 empty.flip();
 151  18
                                 CoderResult result = usedDecoder.decode(empty, out, true);
 152  18
                                 if (!result.isUnderflow())
 153  0
                                         return result;
 154  18
                                 result = usedDecoder.flush(out);
 155  18
                                 if (!result.isUnderflow())
 156  0
                                         return result;
 157  18
                                 usedDecoder = null;
 158  
                         }
 159  18
                         return super.implFlush(out);
 160  
                 }
 161  
 
 162  
                 private CoderResult writeOut(CharBuffer out) {
 163  8
                         ByteBuffer in = ByteBuffer.allocate(2);
 164  8
                         if (state == (byte) 0xBB) {
 165  2
                                 in.put((byte) 0xEF);
 166  
                         }
 167  8
                         in.put(state);
 168  8
                         in.flip();
 169  
 
 170  8
                         CoderResult result = usedDecoder.decode(in, out, false);
 171  8
                         if (in.remaining() == 0)
 172  8
                                 state = 0;
 173  8
                         if (in.remaining() == 1 && state == (byte) 0xBB)
 174  0
                                 state = (byte) 0xEF;
 175  8
                         return result;
 176  
                 }
 177  
 
 178  
                 @Override
 179  
                 protected void implReset() {
 180  18
                         usedDecoder = null;
 181  18
                         state = 0;
 182  18
                         fallbackDecoder.reset();
 183  18
                 }
 184  
 
 185  
                 @Override
 186  
                 public boolean isAutoDetecting() {
 187  0
                         return true;
 188  
                 }
 189  
 
 190  
                 @Override
 191  
                 public boolean isCharsetDetected() {
 192  0
                         return usedDecoder != null;
 193  
                 }
 194  
 
 195  
                 @Override
 196  
                 public Charset detectedCharset() {
 197  0
                         return usedDecoder.charset();
 198  
                 }
 199  
         }
 200  
 }