Coverage Report - net.sf.jmatchparser.util.charset.UTFBOMCharsetsProvider
 
Classes in this File Line Coverage Branch Coverage Complexity
UTFBOMCharsetsProvider
46%
13/28
75%
12/16
8
 
 1  
 /*
 2  
  * Copyright (c) 2010 - 2011 Michael Schierl
 3  
  * 
 4  
  * All rights reserved.
 5  
  * 
 6  
  * Redistribution and use in source and binary forms, with or without
 7  
  * modification, are permitted provided that the following conditions
 8  
  * are met:
 9  
  * 
 10  
  * - Redistributions of source code must retain the above copyright notice,
 11  
  *   this list of conditions and the following disclaimer.
 12  
  *   
 13  
  * - Redistributions in binary form must reproduce the above copyright
 14  
  *   notice, this list of conditions and the following disclaimer in the
 15  
  *   documentation and/or other materials provided with the distribution.
 16  
  *   
 17  
  * - Neither name of the copyright holders nor the names of its
 18  
  *   contributors may be used to endorse or promote products derived from
 19  
  *   this software without specific prior written permission.
 20  
  *   
 21  
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS
 22  
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 23  
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 24  
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 25  
  * HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 26  
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 27  
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 28  
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 29  
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 30  
  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 31  
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 32  
  */
 33  
 package net.sf.jmatchparser.util.charset;
 34  
 
 35  
 import java.nio.charset.Charset;
 36  
 import java.nio.charset.UnsupportedCharsetException;
 37  
 import java.nio.charset.spi.CharsetProvider;
 38  
 import java.util.ArrayList;
 39  
 import java.util.Iterator;
 40  
 import java.util.List;
 41  
 
 42  
 /**
 43  
  * Charset provider that provides an <code>UTF-BOM.<i>charset</i></code> charset
 44  
  * for every other supported charset, and a <code><i>charset</i>-BOM</code>
 45  
  * charset for each UTF charset.
 46  
  * 
 47  
  * <p>
 48  
  * The <code>UTF-BOM.</code> charsets will try to detect a byte order mark of
 49  
  * <code>UTF-16LE</code>, <code>UTF-16BE</code> or <code>UTF-8</code>.
 50  
  * 
 51  
  * <p>
 52  
  * If no byte order mark could be detected, it falls back to the charset given
 53  
  * at the end of the charset name.
 54  
  * 
 55  
  * <p>
 56  
  * This provider also provides charsets <tt>UTF-8-BOM</tt>,
 57  
  * <tt>UTF-16LE-BOM</tt> and <tt>UTF-16BE-BOM</tt>, which act like their
 58  
  * counterparts without <tt>-BOM</tt>, but will add a byte order mark when
 59  
  * encoding and strip it when decoding (if present).
 60  
  * 
 61  
  * <p>
 62  
  * Two additional charsets, <tt>UTF-8-Binary</tt> and <tt>UTF-8-Binary-PUA</tt>
 63  
  * are supersets of UTF-8 that will be binary safe on decoding (i. e. every byte
 64  
  * sequence will remain intact if decoded and encoded again). The first
 65  
  * mentioned charset will use unpaired surrogates in the range <tt>U+DC80</tt>
 66  
  * to <tt>U+DCFF</tt>, as suggested in the <a href=
 67  
  * "http://en.wikipedia.org/w/index.php?title=UTF-8&oldid=448714013#Invalid_byte_sequences"
 68  
  * >UTF-8 Wikipedia article</a>; the second one uses codepoints <tt>U+E980</tt>
 69  
  * to <tt>U+E9FF</tt> from the Private Use Area, escaping those code points (and
 70  
  * the escape character) with a <tt>U+E97F</tt> character if needed.
 71  
  * 
 72  
  * <p>
 73  
  * This class is loaded automatically via SPI when it is in the class path.
 74  
  */
 75  6090
 public class UTFBOMCharsetsProvider extends CharsetProvider {
 76  
 
 77  
         @Override
 78  
         public Charset charsetForName(String charsetName) {
 79  6090
                 if (charsetName.equals("UTF-8-BOM") || charsetName.equals("UTF-16LE-BOM") || charsetName.equals("UTF-16BE-BOM")) {
 80  6
                         Charset cs = Charset.forName(charsetName.substring(0, charsetName.length() - 4));
 81  6
                         return new AddBOMCharset(cs);
 82  
                 }
 83  6084
                 if (charsetName.equals(UTF8BinaryCharset.SURROGATE_VARIANT)) {
 84  2991
                         return new UTF8BinaryCharset(false);
 85  
                 }
 86  3093
                 if (charsetName.equals(UTF8BinaryCharset.PUA_VARIANT)) {
 87  2992
                         return new UTF8BinaryCharset(true);
 88  
                 }
 89  101
                 if (charsetName.startsWith(UTFBOMCharset.PREFIX)) {
 90  
                         try {
 91  6
                                 Charset cs = Charset.forName(charsetName.substring(UTFBOMCharset.PREFIX.length()));
 92  6
                                 return new UTFBOMCharset(cs);
 93  0
                         } catch (UnsupportedCharsetException ex) {
 94  
                         }
 95  
                 }
 96  95
                 return null;
 97  
         }
 98  
 
 99  
         // reentrance checker
 100  1
         private static ThreadLocal<Boolean> in = new ThreadLocal<Boolean>();
 101  
 
 102  
         @Override
 103  
         public Iterator<Charset> charsets() {
 104  0
                 List<Charset> l = new ArrayList<Charset>();
 105  0
                 l.add(charsetForName(UTF8BinaryCharset.SURROGATE_VARIANT));
 106  0
                 l.add(charsetForName(UTF8BinaryCharset.PUA_VARIANT));
 107  0
                 l.add(charsetForName("UTF-8-BOM"));
 108  0
                 l.add(charsetForName("UTF-16LE-BOM"));
 109  0
                 l.add(charsetForName("UTF-16BE-BOM"));
 110  0
                 if (in.get() != null)
 111  0
                         return l.iterator();
 112  0
                 in.set(true);
 113  
                 try {
 114  0
                         for (String cs : Charset.availableCharsets().keySet()) {
 115  0
                                 l.add(charsetForName(UTFBOMCharset.PREFIX + cs));
 116  
                         }
 117  
                 } finally {
 118  0
                         in.set(null);
 119  0
                 }
 120  0
                 return l.iterator();
 121  
         }
 122  
 }