1 package cz.cuni.amis.pogamut.shady;
2
3 /**
4 * Store the string value as an argument. We want to allow escaping (e.g. \n, \t
5 * ..) and we are using syntax defined by the ({@linkplain http://java.sun.com/docs/books/jls/second_edition/html/lexical.doc.html#101084 java lexical structure}).
6 * <p/>
7 * <em>The constructor won't automatically unescape the passed value</em>. Use {@link ArgString#parseStringLiteral(java.lang.StringBuilder)
8 * }.
9 *
10 * @author Honza
11 */
12 public class ArgString extends Arg<String> {
13
14 /**
15 * Take the string and use it as the value of the argument. Be careful, in
16 * most cases, use of {@link ArgString#unescape(java.lang.String)} is
17 * recommended (it parses escaped string into an unescaped form).
18 *
19 * @param string value of the argument
20 */
21 public ArgString(String string) {
22 super(string);
23 }
24
25 /**
26 * Get unescaped version of passed string.
27 *
28 * @param escapedString string escaped according to java lexical structure,
29 * without double quotes (e.g. Hello\nWorld)
30 * @return unescaped string
31 * @throws ParseException if there is an error in the escaping
32 */
33 public static String unescape(String escapedString) throws ParseException {
34 if (escapedString.length() == 0) {
35 return escapedString;
36 }
37 return parseStringCharacters(new StringBuilder(escapedString));
38 }
39
40 /**
41 * Take the escaped string, parse it and return the unescaped value.
42 *
43 * @param string string to parse, incl. quotes (e.g. "Foo\nBar")
44 * <p/>
45 * <cite>StringLiteral: " [StringCharacters] "</cite>
46 * @return parsed string
47 */
48 protected static String parseStringLiteral(String escaped) throws ParseException {
49 StringBuilder sb = new StringBuilder(escaped);
50 if (sb.charAt(0) != '\"') {
51 throw new ParseException("Expecting \" at the start of " + escaped.toString());
52 }
53 sb.deleteCharAt(0);
54
55 // StringCharacters nonterminal is optional. If it is missing, ther is
56 // only ending double quote.
57 String parsed = "";
58 if (sb.length() != 1) {
59 parsed = parseStringCharacters(sb);
60 }
61
62 if (sb.length() != 1) {
63 throw new ParseException("Expecting exactly one character (double quote), but " + sb.length() + " characters remain:" + sb.toString());
64 }
65
66 if (sb.charAt(0) != '\"') {
67 throw new ParseException("Expecting ending double quote, but got: " + sb.charAt(0));
68 }
69
70 return parsed.toString();
71 }
72
73 /**
74 * Parse nonterminal <em>StringCharacters: ( StringCharacter )+</em>
75 *
76 * @param sb escaped string to be parsed, something may be even after last
77 * <em>StringCharacter</em>. This will be modified during progress, the
78 * parsed characters will be "eaten"
79 * @return unescaped string
80 */
81 private static String parseStringCharacters(StringBuilder sb) throws ParseException {
82 Character ch;
83 StringBuilder stringCharacters = new StringBuilder();
84 while ((ch = parseStringCharacter(sb)) != null) {
85 stringCharacters.append(ch);
86 }
87 return stringCharacters.toString();
88 }
89
90 /**
91 * Extract the <em>StringCharacter</em> from the sb and return it.
92 * <p/>
93 * <
94 * pre>
95 * StringCharacter: InputCharacter but not " or \
96 * StringCharacter: EscapeSequence
97 * InputCharacter: UnicodeInputCharacter but not CR or LF
98 * </pre> Basically when you expand it all, you will get
99 * <pre>
100 * \(u)+ [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] ~ Unicode char
101 * \[btnfr"'\]
102 * \[0-3][0-7][0-7]
103 * \[0-7][0-7]
104 * \[0-7]
105 * </pre>
106 *
107 * @param sb sequence of chars that is used to extract the character. Is
108 * modified (characters are removed) during parsing.
109 * @return found character if there is a <em>StringCharacter</em>, null
110 * otherwise
111 */
112 protected static Character parseStringCharacter(StringBuilder sb) throws ParseException {
113 if (sb.length() == 0) {
114 return null;
115 }
116 char ch = sb.charAt(0);
117 if (ch == '\"' || ch == '\r' || ch == '\n') {
118 return null;
119 }
120
121 sb.deleteCharAt(0);
122 // Is that an escape sequence
123 if (ch == '\\') {
124 Character res = ArgChar.parseEscapeSequence(sb);
125 if (res == null) {
126 throw new ParseException("Unable to unescape sequence:" + sb.toString());
127 }
128 return res;
129 }
130 return ch;
131 }
132 }