1 package cz.cuni.amis.pogamut.shady; 2 3 /** 4 * Store the string value as an argument. We want to allow escaping (e.g. \n, \t 5 * ..) and we are using syntax defined by the ({@linkplain http://java.sun.com/docs/books/jls/second_edition/html/lexical.doc.html#101084 java lexical structure}). 6 * <p/> 7 * <em>The constructor won't automatically unescape the passed value</em>. Use {@link ArgString#parseStringLiteral(java.lang.StringBuilder) 8 * }. 9 * 10 * @author Honza 11 */ 12 public class ArgString extends Arg<String> { 13 14 /** 15 * Take the string and use it as the value of the argument. Be careful, in 16 * most cases, use of {@link ArgString#unescape(java.lang.String)} is 17 * recommended (it parses escaped string into an unescaped form). 18 * 19 * @param string value of the argument 20 */ 21 public ArgString(String string) { 22 super(string); 23 } 24 25 /** 26 * Get unescaped version of passed string. 27 * 28 * @param escapedString string escaped according to java lexical structure, 29 * without double quotes (e.g. Hello\nWorld) 30 * @return unescaped string 31 * @throws ParseException if there is an error in the escaping 32 */ 33 public static String unescape(String escapedString) throws ParseException { 34 if (escapedString.length() == 0) { 35 return escapedString; 36 } 37 return parseStringCharacters(new StringBuilder(escapedString)); 38 } 39 40 /** 41 * Take the escaped string, parse it and return the unescaped value. 42 * 43 * @param string string to parse, incl. quotes (e.g. "Foo\nBar") 44 * <p/> 45 * <cite>StringLiteral: " [StringCharacters] "</cite> 46 * @return parsed string 47 */ 48 protected static String parseStringLiteral(String escaped) throws ParseException { 49 StringBuilder sb = new StringBuilder(escaped); 50 if (sb.charAt(0) != '\"') { 51 throw new ParseException("Expecting \" at the start of " + escaped.toString()); 52 } 53 sb.deleteCharAt(0); 54 55 // StringCharacters nonterminal is optional. If it is missing, ther is 56 // only ending double quote. 57 String parsed = ""; 58 if (sb.length() != 1) { 59 parsed = parseStringCharacters(sb); 60 } 61 62 if (sb.length() != 1) { 63 throw new ParseException("Expecting exactly one character (double quote), but " + sb.length() + " characters remain:" + sb.toString()); 64 } 65 66 if (sb.charAt(0) != '\"') { 67 throw new ParseException("Expecting ending double quote, but got: " + sb.charAt(0)); 68 } 69 70 return parsed.toString(); 71 } 72 73 /** 74 * Parse nonterminal <em>StringCharacters: ( StringCharacter )+</em> 75 * 76 * @param sb escaped string to be parsed, something may be even after last 77 * <em>StringCharacter</em>. This will be modified during progress, the 78 * parsed characters will be "eaten" 79 * @return unescaped string 80 */ 81 private static String parseStringCharacters(StringBuilder sb) throws ParseException { 82 Character ch; 83 StringBuilder stringCharacters = new StringBuilder(); 84 while ((ch = parseStringCharacter(sb)) != null) { 85 stringCharacters.append(ch); 86 } 87 return stringCharacters.toString(); 88 } 89 90 /** 91 * Extract the <em>StringCharacter</em> from the sb and return it. 92 * <p/> 93 * < 94 * pre> 95 * StringCharacter: InputCharacter but not " or \ 96 * StringCharacter: EscapeSequence 97 * InputCharacter: UnicodeInputCharacter but not CR or LF 98 * </pre> Basically when you expand it all, you will get 99 * <pre> 100 * \(u)+ [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] ~ Unicode char 101 * \[btnfr"'\] 102 * \[0-3][0-7][0-7] 103 * \[0-7][0-7] 104 * \[0-7] 105 * </pre> 106 * 107 * @param sb sequence of chars that is used to extract the character. Is 108 * modified (characters are removed) during parsing. 109 * @return found character if there is a <em>StringCharacter</em>, null 110 * otherwise 111 */ 112 protected static Character parseStringCharacter(StringBuilder sb) throws ParseException { 113 if (sb.length() == 0) { 114 return null; 115 } 116 char ch = sb.charAt(0); 117 if (ch == '\"' || ch == '\r' || ch == '\n') { 118 return null; 119 } 120 121 sb.deleteCharAt(0); 122 // Is that an escape sequence 123 if (ch == '\\') { 124 Character res = ArgChar.parseEscapeSequence(sb); 125 if (res == null) { 126 throw new ParseException("Unable to unescape sequence:" + sb.toString()); 127 } 128 return res; 129 } 130 return ch; 131 } 132 }