Coverage Report - net.sf.jmatchparser.util.BinaryExpression
 
Classes in this File Line Coverage Branch Coverage Complexity
BinaryExpression
100%
162/162
100%
110/110
9,875
 
 1  
 /*
 2  
  * Copyright (c) 2010-2011 Michael Schierl
 3  
  * 
 4  
  * All rights reserved.
 5  
  * 
 6  
  * Redistribution and use in source and binary forms, with or without
 7  
  * modification, are permitted provided that the following conditions
 8  
  * are met:
 9  
  * 
 10  
  * - Redistributions of source code must retain the above copyright notice,
 11  
  *   this list of conditions and the following disclaimer.
 12  
  *   
 13  
  * - Redistributions in binary form must reproduce the above copyright
 14  
  *   notice, this list of conditions and the following disclaimer in the
 15  
  *   documentation and/or other materials provided with the distribution.
 16  
  *   
 17  
  * - Neither name of the copyright holders nor the names of its
 18  
  *   contributors may be used to endorse or promote products derived from
 19  
  *   this software without specific prior written permission.
 20  
  *   
 21  
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS
 22  
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 23  
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 24  
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 25  
  * HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 26  
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 27  
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 28  
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 29  
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 30  
  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 31  
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 32  
  */
 33  
 package net.sf.jmatchparser.util;
 34  
 
 35  
 import java.io.ByteArrayOutputStream;
 36  
 import java.io.IOException;
 37  
 import java.security.MessageDigest;
 38  
 import java.security.NoSuchAlgorithmException;
 39  
 import java.util.Random;
 40  
 import java.util.Stack;
 41  
 import java.util.StringTokenizer;
 42  
 
 43  
 /**
 44  
  * An expression that can be used to build or match binary byte arrays.
 45  
  * 
 46  
  * <p>
 47  
  * This is useful when binary data (like encryption keys) should be configurable
 48  
  * in a text file, where the user can decide which format (hex, base64, etc.) he
 49  
  * wants to use to provide the data.
 50  
  * 
 51  
  * <p>
 52  
  * Therefore, the user can give a list of transformations (separated by colons),
 53  
  * followed by data to be transformed. Each transformation is applied on the
 54  
  * result of the previous transformation. Therefore,
 55  
  * <tt>hex:base64:NzQ2NTczNzQ=</tt> will result in <tt>test</tt>.
 56  
  * 
 57  
  * <p>
 58  
  * An empty transformation name will stop parsing transformations; therefore, to
 59  
  * match the literal (UTF-8) bytes <tt>C:\&gt;</tt>, the expression
 60  
  * <tt>:C:\&gt;</tt> can be used. If no encoding is given, everything is encoded
 61  
  * as UTF-8. An explicit encoding can be given by using the <tt>encode-</tt>
 62  
  * <i>&lt;encoding&gt;</i><tt>:</tt> transformation.
 63  
  * 
 64  
  * <p>
 65  
  * Optionally, a secondary expression can be parsed at the same time, for
 66  
  * example an initialization vector for an encryption key or a mask for matching
 67  
  * data. In case of a mask, {@link MaskedBinaryExpression} might have been the
 68  
  * better choice.
 69  
  * 
 70  
  * <p>
 71  
  * The following tranformations are supported:
 72  
  * 
 73  
  * <dl>
 74  
  * <dt><b><tt>hex</tt></b></dt>
 75  
  * <dd>decode hexadecimal, like <tt>hex:444a32</tt> -&gt; <tt>DJ2</tt></dd>
 76  
  * <dt><b><tt>base64</tt></b></dt>
 77  
  * <dd>decode base64, like <tt>base64:SGVsbG8=</tt> -&gt; <tt>Hello</tt></dd>
 78  
  * <dt><b><tt>sha1</tt></b></dt>
 79  
  * <dd>Encode SHA1, like <tt>sha1:test</tt> -&gt;
 80  
  * <tt>hex:a94a8fe5ccb19ba61c4c0873d391e987982fbbd3</tt></dd>
 81  
  * <dt><b><tt>md5</tt></b></dt>
 82  
  * <dd>Encode MD5, like <tt>md5:test</tt> -&gt;
 83  
  * <tt>hex:098f6bcd4621d373cade4e832627b4f6</tt></dd>
 84  
  * <dt><b><tt>hash-</tt><i>algorithm</i></b></dt>
 85  
  * <dd>Encode any hash algorithm; <tt>hash-MD5</tt> is equivalent to
 86  
  * <tt>md5</tt>, <tt>hash-SHA-1</tt> is equivalent to <tt>sha1</tt></dd>
 87  
  * <dt><b><tt>split</tt></b></dt>
 88  
  * <dd>Split the expression into multiple expressions, delimited by a custom
 89  
  * character directly following the colon after <tt>split</tt>, like
 90  
  * <tt>split:#Area#hex:35#1</tt> -&gt; <tt>Area51</tt></dd>
 91  
  * <dt><b><tt>unescape</tt></b></dt>
 92  
  * <dd>Parse Java escapes, like <tt>unescape:\r\n</tt> -&gt; <tt>hex:0d0a</tt>,
 93  
  * or <tt>unescape:Some\u20acfor\44</tt> -&gt; <tt>Some€for$</tt></dd>
 94  
  * <dt><b><tt>encode-</tt><i>charset</i></b></dt>
 95  
  * <dd>Encode in a given charset, like <tt>encode-ISO-8859-15:€</tt> -&gt;
 96  
  * <tt>hex:a4</tt>, or <tt>encode-UTF-8:€</tt> -&gt; <tt>hex:E282AC</tt></dd>
 97  
  * <dt><b><tt>map</tt></b></dt>
 98  
  * <dd>Select a given range of characters (or more than one) out of another
 99  
  * expression, like <tt>map1-4+5-9o+9+3+3:987654321</tt> -&gt;
 100  
  * <tt>9876531177</tt>. The result of a <i>map</i> expression may be up to 4KB
 101  
  * long.</dd>
 102  
  * <dt><b><i>secondaryName</i></b></dt>
 103  
  * <dd>Similar to map, but store the result as the secondary value. When in
 104  
  * <i>random</i> mode, also supports expressions like 4r for 4 random
 105  
  * characters; when not in <i>random</i> mode, it can use <tt>3-5?</tt>, meaning
 106  
  * that characters 3 to 5 are copied to the secondary value, but marked as
 107  
  * optional.</dd>
 108  
  * </dl>
 109  
  */
 110  
 public class BinaryExpression {
 111  
 
 112  
         /**
 113  
          * Parse a binary expression with no secondary expression.
 114  
          */
 115  
         public static byte[] parseBinaryExpression(String expression) throws IOException {
 116  51
                 return new BinaryExpression(expression, null, false).getValue();
 117  
         }
 118  
 
 119  
         private final byte[] value, secondaryValue;
 120  
         private final boolean secondaryValid, secondaryRandom;
 121  
         private final boolean[] secondaryBits;
 122  
 
 123  
         /**
 124  
          * Create a new binary expression with optional secondary expressions
 125  
          * 
 126  
          * @param expression
 127  
          *            Expression to parse
 128  
          * @param secondaryName
 129  
          *            Name used in the expression to denote secondary expression
 130  
          *            (like <tt>mask</tt> or <tt>iv</tt>)
 131  
          * @param secondaryRandom
 132  
          *            Whether the secondary expression may contain random bytes
 133  
          *            (like an iv); if not, the marked bytes are optional instead of
 134  
          *            random
 135  
          */
 136  80
         public BinaryExpression(String expression, String secondaryName, boolean secondaryRandom) throws IOException {
 137  80
                 Stack<String> transforms = new Stack<String>();
 138  
                 int pos;
 139  160
                 while ((pos = expression.indexOf(':')) != -1) {
 140  89
                         String transform = expression.substring(0, pos);
 141  89
                         transforms.push(transform);
 142  89
                         expression = expression.substring(pos + 1);
 143  89
                         if (transform.length() == 0) {
 144  5
                                 transforms.pop();
 145  5
                                 break;
 146  84
                         } else if (transform.equals("split")) {
 147  4
                                 break;
 148  
                         }
 149  80
                 }
 150  
 
 151  80
                 byte[] value = expression.getBytes("UTF-8");
 152  80
                 byte[] secondaryValue = null;
 153  80
                 boolean[] secondaryBits = null;
 154  
 
 155  
                 try {
 156  159
                         while (!transforms.isEmpty()) {
 157  84
                                 String transform = transforms.pop();
 158  84
                                 if (transform.equals("hex")) {
 159  26
                                         if (value.length % 2 != 0)
 160  1
                                                 throw new IllegalArgumentException("invalid hex length");
 161  25
                                         byte[] valueNew = new byte[value.length / 2];
 162  181
                                         for (int j = 0; j < valueNew.length; j++) {
 163  157
                                                 valueNew[j] = (byte) Integer.parseInt((char) value[j * 2] + "" + (char) value[j * 2 + 1], 16);
 164  
                                         }
 165  24
                                         value = valueNew;
 166  24
                                 } else if (transform.equals("base64")) {
 167  2
                                         String base64Text = new String(value, "UTF-8");
 168  2
                                         value = decodeBase64(base64Text);
 169  2
                                 } else if (transform.equals("sha1")) {
 170  1
                                         value = MessageDigest.getInstance("SHA-1").digest(value);
 171  55
                                 } else if (transform.equals("md5")) {
 172  1
                                         value = MessageDigest.getInstance("MD5").digest(value);
 173  54
                                 } else if (transform.equals("split")) {
 174  4
                                         String parts = new String(value, "UTF-8");
 175  4
                                         StringTokenizer st = new StringTokenizer(parts.substring(1), "" + parts.charAt(0));
 176  4
                                         ByteArrayOutputStream baos = new ByteArrayOutputStream();
 177  16
                                         while (st.hasMoreTokens()) {
 178  12
                                                 baos.write(parseBinaryExpression(st.nextToken()));
 179  
                                         }
 180  4
                                         value = baos.toByteArray();
 181  4
                                 } else if (transform.equals("unescape")) {
 182  2
                                         value = unescape(new String(value, "UTF-8"), "").getBytes("UTF-8");
 183  48
                                 } else if (transform.startsWith("hash-")) {
 184  3
                                         value = MessageDigest.getInstance(transform.substring(5)).digest(value);
 185  45
                                 } else if (transform.startsWith("encode-")) {
 186  7
                                         value = new String(value, "UTF-8").getBytes(transform.substring(7));
 187  38
                                 } else if (transform.startsWith("map")) {
 188  16
                                         byte[] tmp = new byte[4096];
 189  16
                                         int count = 0;
 190  16
                                         String[] exprs = transform.substring(3).split("\\+");
 191  36
                                         for (int j = 0; j < exprs.length; j++) {
 192  20
                                                 if (exprs[j].length() == 0)
 193  3
                                                         continue;
 194  17
                                                 NumberExpression expr = new NumberExpression(exprs[j]);
 195  73
                                                 for (int k = expr.getMinimum(); k <= expr.getMaximum(); k++) {
 196  56
                                                         if (expr.matches(k))
 197  54
                                                                 tmp[count++] = value[k - 1];
 198  
                                                 }
 199  
                                         }
 200  16
                                         value = new byte[count];
 201  16
                                         System.arraycopy(tmp, 0, value, 0, count);
 202  16
                                 } else if (secondaryName != null && transform.startsWith(secondaryName)) {
 203  20
                                         boolean bitsUsedTemp = false;
 204  20
                                         byte[] tmp = new byte[4096];
 205  20
                                         boolean[] tmpbit = new boolean[4096];
 206  20
                                         int count = 0;
 207  20
                                         String[] exprs = transform.substring(secondaryName.length()).split("\\+");
 208  57
                                         for (int j = 0; j < exprs.length; j++) {
 209  37
                                                 if (secondaryRandom && exprs[j].endsWith("r")) {
 210  8
                                                         for (int k = 0; k < Integer.parseInt(exprs[j].substring(0, exprs[j].length() - 1)); k++) {
 211  6
                                                                 tmpbit[count++] = true;
 212  6
                                                                 bitsUsedTemp = true;
 213  
                                                         }
 214  
                                                 } else {
 215  35
                                                         boolean optional = false;
 216  35
                                                         if (!secondaryRandom && exprs[j].endsWith("?")) {
 217  13
                                                                 optional = true;
 218  13
                                                                 bitsUsedTemp = true;
 219  13
                                                                 exprs[j] = exprs[j].substring(0, exprs[j].length() - 1);
 220  
                                                         }
 221  35
                                                         NumberExpression expr = new NumberExpression(exprs[j]);
 222  101
                                                         for (int k = expr.getMinimum(); k <= expr.getMaximum(); k++) {
 223  66
                                                                 if (expr.matches(k)) {
 224  64
                                                                         tmp[count] = value[k - 1];
 225  64
                                                                         tmpbit[count] = optional;
 226  64
                                                                         count++;
 227  
                                                                 }
 228  
                                                         }
 229  
                                                 }
 230  
                                         }
 231  20
                                         secondaryValue = new byte[count];
 232  20
                                         System.arraycopy(tmp, 0, secondaryValue, 0, count);
 233  20
                                         if (bitsUsedTemp) {
 234  15
                                                 secondaryBits = new boolean[count];
 235  15
                                                 System.arraycopy(tmpbit, 0, secondaryBits, 0, count);
 236  
                                         } else {
 237  5
                                                 secondaryBits = null;
 238  
                                         }
 239  20
                                 } else {
 240  2
                                         throw new IllegalArgumentException("Unsupported transform: " + transform);
 241  
                                 }
 242  79
                         }
 243  1
                 } catch (NoSuchAlgorithmException ex) {
 244  1
                         IOException ioex = new IOException("Cannot create algorithm");
 245  1
                         ioex.initCause(ex);
 246  1
                         throw ioex;
 247  75
                 }
 248  75
                 this.value = value;
 249  75
                 this.secondaryValue = secondaryValue;
 250  75
                 this.secondaryRandom = secondaryRandom;
 251  75
                 this.secondaryValid = secondaryName != null;
 252  75
                 this.secondaryBits = secondaryBits;
 253  75
         }
 254  
 
 255  
         @SuppressWarnings("restriction")
 256  
         private byte[] decodeBase64(String base64Text) throws IOException {
 257  2
                 return new sun.misc.BASE64Decoder().decodeBuffer(base64Text);
 258  
         }
 259  
 
 260  
         /**
 261  
          * Unescape Java escape sequences like \n or octal or unicode escapes.
 262  
          * 
 263  
          * @param string
 264  
          *            The string to unescape
 265  
          * @param literalChars
 266  
          *            List of characters (like quotation marks) that should return
 267  
          *            themselves instead of producing an error. A backslash will
 268  
          *            always return itself regardless whether it is in this list or
 269  
          *            not.
 270  
          */
 271  
         public static String unescape(String string, String literalChars) throws IOException {
 272  9
                 char[] chars = string.toCharArray();
 273  9
                 StringBuffer sb = new StringBuffer(chars.length);
 274  87
                 for (int i = 0; i < chars.length; i++) {
 275  82
                         if (chars[i] != '\\') {
 276  57
                                 sb.append(chars[i]);
 277  57
                                 continue;
 278  
                         }
 279  25
                         i++;
 280  25
                         if (i == chars.length)
 281  1
                                 throw new IOException("Cannot unescape backslash at end of string");
 282  
 
 283  24
                         switch (chars[i]) {
 284  
                         case 'b':
 285  1
                                 sb.append('\b');
 286  1
                                 break;
 287  
                         case 't':
 288  1
                                 sb.append('\t');
 289  1
                                 break;
 290  
                         case 'n':
 291  2
                                 sb.append('\n');
 292  2
                                 break;
 293  
                         case 'f':
 294  1
                                 sb.append('\f');
 295  1
                                 break;
 296  
                         case 'r':
 297  2
                                 sb.append('\r');
 298  2
                                 break;
 299  
                         case '\\':
 300  1
                                 sb.append('\\');
 301  1
                                 break;
 302  
 
 303  
                         case 'u':
 304  3
                                 if (chars.length < i + 5)
 305  1
                                         throw new IOException("Invalid unicode escape: " + string.substring(i - 1));
 306  
                                 try {
 307  2
                                         String unicodeChars = string.substring(i + 1, i + 5);
 308  2
                                         sb.append((char) Integer.parseInt(unicodeChars, 16));
 309  1
                                         i += 4;
 310  1
                                 } catch (NumberFormatException ex) {
 311  1
                                         throw new IOException("Invalid unicode escape: " + string.substring(i - 1, i + 5));
 312  1
                                 }
 313  
                                 break;
 314  
 
 315  
                         default:
 316  13
                                 if (literalChars.indexOf(chars[i]) != -1) {
 317  3
                                         sb.append(chars[i]);
 318  3
                                         break;
 319  
                                 }
 320  10
                                 String octalValue = "";
 321  32
                                 while (i < chars.length && chars[i] >= '0' && chars[i] <= '7' && (octalValue.length() < 2 || (octalValue.length() == 2 && octalValue.charAt(0) <= '3'))) {
 322  22
                                         octalValue += chars[i];
 323  22
                                         i++;
 324  
                                 }
 325  10
                                 if (octalValue.length() == 0) {
 326  1
                                         throw new IOException("Invalid escape sequence: " + string.substring(i - 1, i + 1));
 327  
                                 }
 328  9
                                 i--;
 329  9
                                 sb.append((char) Integer.parseInt(octalValue, 8));
 330  
                                 break;
 331  
                         }
 332  
                 }
 333  5
                 return sb.toString();
 334  
         }
 335  
 
 336  
         /**
 337  
          * Return the value of the parsed expression.
 338  
          */
 339  
         public byte[] getValue() {
 340  71
                 return value;
 341  
         }
 342  
 
 343  
         /**
 344  
          * Return the secondary value, if any.
 345  
          */
 346  
         public byte[] getSecondaryValue() {
 347  25
                 if (!secondaryValid)
 348  1
                         throw new IllegalStateException("No secondary value parsed");
 349  24
                 return secondaryValue;
 350  
         }
 351  
 
 352  
         /**
 353  
          * Return the bytes marked (as optional or random) in the secondary value,
 354  
          * if any.
 355  
          */
 356  
         public boolean[] getMarkedSecondaryBytes() {
 357  24
                 if (!secondaryValid)
 358  1
                         throw new IllegalStateException("No secondary value parsed");
 359  23
                 return secondaryBits;
 360  
         }
 361  
 
 362  
         /**
 363  
          * Compute a new random secondary value. This works only if the secondary
 364  
          * value was parsed in random mode.
 365  
          * 
 366  
          * @param randomSource
 367  
          *            source for the randomness
 368  
          */
 369  
         public byte[] getRandomSecondaryValue(Random randomSource) {
 370  5
                 if (!secondaryValid)
 371  1
                         throw new IllegalStateException("No secondary value parsed");
 372  4
                 if (!secondaryRandom)
 373  1
                         throw new IllegalStateException("Not parsed in random mode");
 374  3
                 if (secondaryBits == null)
 375  2
                         return secondaryValue;
 376  
 
 377  1
                 byte[] result = new byte[secondaryValue.length];
 378  5
                 for (int i = 0; i < result.length; i++) {
 379  4
                         if (secondaryBits[i])
 380  2
                                 result[i] = (byte) randomSource.nextInt(256);
 381  
                         else
 382  2
                                 result[i] = secondaryValue[i];
 383  
                 }
 384  1
                 return result;
 385  
         }
 386  
 }