Coverage Report - net.sf.jmatchparser.util.csv.CSVReader
 
Classes in this File Line Coverage Branch Coverage Complexity
CSVReader
92%
71/77
86%
43/50
3,636
 
 1  
 /*
 2  
  * Copyright (c) 2006 - 2011 Michael Schierl
 3  
  * 
 4  
  * All rights reserved.
 5  
  * 
 6  
  * Redistribution and use in source and binary forms, with or without
 7  
  * modification, are permitted provided that the following conditions
 8  
  * are met:
 9  
  * 
 10  
  * - Redistributions of source code must retain the above copyright notice,
 11  
  *   this list of conditions and the following disclaimer.
 12  
  *   
 13  
  * - Redistributions in binary form must reproduce the above copyright
 14  
  *   notice, this list of conditions and the following disclaimer in the
 15  
  *   documentation and/or other materials provided with the distribution.
 16  
  *   
 17  
  * - Neither name of the copyright holders nor the names of its
 18  
  *   contributors may be used to endorse or promote products derived from
 19  
  *   this software without specific prior written permission.
 20  
  *   
 21  
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS
 22  
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 23  
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 24  
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 25  
  * HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 26  
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 27  
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 28  
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 29  
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 30  
  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 31  
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 32  
  */
 33  
 package net.sf.jmatchparser.util.csv;
 34  
 
 35  
 import java.io.BufferedReader;
 36  
 import java.io.IOException;
 37  
 import java.io.InputStream;
 38  
 import java.io.InputStreamReader;
 39  
 import java.io.Reader;
 40  
 import java.io.UnsupportedEncodingException;
 41  
 import java.util.ArrayList;
 42  
 import java.util.List;
 43  
 
 44  
 /**
 45  
  * Class to read data from a CSV (Comma Separated Value) file, supporting
 46  
  * different separator characters and quoted values.
 47  
  */
 48  
 public class CSVReader extends AbstractCSVReader {
 49  
 
 50  
         private final BufferedReader br;
 51  7
         private char separator = 0;
 52  7
         private boolean stripComments = false;
 53  7
         private boolean supportFullyQuotedLines = false;
 54  7
         private boolean supportMultiLineCells = false;
 55  
 
 56  
         /**
 57  
          * Create a new {@link CSVReader} that reads from the given buffered reader.
 58  
          */
 59  7
         public CSVReader(BufferedReader br) {
 60  7
                 this.br = br;
 61  7
         }
 62  
 
 63  
         /**
 64  
          * Create a new {@link CSVReader} that reads from the given reader.
 65  
          */
 66  
         public CSVReader(Reader r) {
 67  7
                 this(new BufferedReader(r));
 68  7
         }
 69  
 
 70  
         /**
 71  
          * Create a new {@link CSVReader} that reads from the given stream.
 72  
          */
 73  
         public CSVReader(InputStream in, String charsetName) throws UnsupportedEncodingException {
 74  2
                 this(new InputStreamReader(in, charsetName));
 75  2
         }
 76  
 
 77  
         /**
 78  
          * Return the separator character (usually comma, tab or semicolon), or 0 to
 79  
          * auto-detect.
 80  
          */
 81  
         public char getSeparator() {
 82  0
                 return separator;
 83  
         }
 84  
 
 85  
         /**
 86  
          * Set the separator character (usually comma, tab or semicolon), or 0 to
 87  
          * auto-detect.
 88  
          */
 89  
         public void setSeparator(char separator) {
 90  2
                 this.separator = separator;
 91  2
         }
 92  
 
 93  
         /**
 94  
          * Set whether comments (lines starting with # or empty lines) should be
 95  
          * stripped.
 96  
          */
 97  
         public void setStripComments(boolean stripComments) {
 98  3
                 this.stripComments = stripComments;
 99  3
         }
 100  
 
 101  
         /**
 102  
          * Some broken CSV writers quote the whole line if no field of the line
 103  
          * needs to be quoted. In that case, a standard-compliant CSV parser will
 104  
          * read everything into one field. To support these files, this option can
 105  
          * be enabled. In that case, if there is only a quote character at the
 106  
          * beginning and the end of the line, it is stripped.
 107  
          */
 108  
         public void setSupportFullyQuotedLines(boolean supportFullyQuotedLines) {
 109  3
                 this.supportFullyQuotedLines = supportFullyQuotedLines;
 110  3
         }
 111  
 
 112  
         /**
 113  
          * Enable support for multi-line values in cells. In that case, if a
 114  
          * quotation mark is not closed within the same line, more lines are read to
 115  
          * find the closing quotation mark. Lines will be combined with newline
 116  
          * characters <tt>\n</tt> regardless of the original line delimiter
 117  
          * character.
 118  
          * 
 119  
          * If disabled, quotation marks that are not closed within the same line are
 120  
          * treated as a parse error.
 121  
          */
 122  
         public void setSupportMultiLineCells(boolean supportMutiLineCells) {
 123  3
                 this.supportMultiLineCells = supportMutiLineCells;
 124  3
         }
 125  
 
 126  
         @Override
 127  
         public void close() throws IOException {
 128  4
                 br.close();
 129  4
         }
 130  
 
 131  
         @Override
 132  
         public String[] read() throws IOException {
 133  25
                 String line = readNextLine();
 134  25
                 if (line == null)
 135  7
                         return null;
 136  18
                 if (separator == 0) {
 137  6
                         if (line.indexOf('\t') != -1)
 138  0
                                 separator = '\t';
 139  6
                         else if (line.indexOf(';') != -1)
 140  5
                                 separator = ';';
 141  1
                         else if (line.indexOf(',') != -1)
 142  1
                                 separator = ',';
 143  
                         else
 144  0
                                 separator = ';';
 145  
                 }
 146  18
                 final String origLine = line;
 147  18
                 List<String> elems = new ArrayList<String>();
 148  61
                 while (line != null) {
 149  43
                         if (line.startsWith("\"")) {
 150  31
                                 String part = "";
 151  
                                 while (true) {
 152  35
                                         int pos = line.indexOf("\"", 1);
 153  38
                                         while (supportMultiLineCells && pos == -1) {
 154  3
                                                 String nextLine = readNextLine();
 155  3
                                                 if (nextLine == null)
 156  0
                                                         break;
 157  3
                                                 line = line + '\n' + nextLine;
 158  3
                                                 pos = line.indexOf("\"", 1);
 159  3
                                         }
 160  35
                                         if (pos == -1)
 161  0
                                                 throw new IOException("CSV line cannot be parsed: " + origLine);
 162  35
                                         part += line.substring(1, pos);
 163  35
                                         line = line.substring(pos + 1);
 164  35
                                         if (line.length() == 0) {
 165  13
                                                 line = null;
 166  13
                                                 break;
 167  22
                                         } else if (line.charAt(0) == separator) {
 168  18
                                                 line = line.substring(1);
 169  18
                                                 break;
 170  4
                                         } else if (line.charAt(0) != '"') {
 171  0
                                                 throw new IOException("CSV line cannot be parsed: " + origLine + "\nat: " + line);
 172  
                                         }
 173  4
                                         part += "\"";
 174  4
                                 }
 175  31
                                 elems.add(part);
 176  31
                         } else {
 177  12
                                 int pos = line.indexOf(separator);
 178  12
                                 if (pos == -1) {
 179  5
                                         elems.add(line);
 180  5
                                         line = null;
 181  
                                 } else {
 182  7
                                         elems.add(line.substring(0, pos));
 183  7
                                         line = line.substring(pos + 1);
 184  
                                 }
 185  12
                         }
 186  
                 }
 187  18
                 return elems.toArray(new String[elems.size()]);
 188  
         }
 189  
 
 190  
         private String readNextLine() throws IOException {
 191  28
                 String line = br.readLine();
 192  28
                 if (stripComments) {
 193  13
                         while (line != null && (line.startsWith("#") || line.length() == 0))
 194  6
                                 line = br.readLine();
 195  
                 }
 196  28
                 if (line != null && supportFullyQuotedLines && line.length() > 2 && line.startsWith("\"") && line.endsWith("\"") && line.substring(1, line.length() - 1).indexOf('"') == -1) {
 197  2
                         line = line.substring(1, line.length() - 1);
 198  
                 }
 199  28
                 return line;
 200  
         }
 201  
 }