View Javadoc

1   package cz.cuni.amis.pogamut.shady;
2   
3   /**
4    * Store the string value as an argument. We want to allow escaping (e.g. \n, \t
5    * ..) and we are using syntax defined by the ({@linkplain http://java.sun.com/docs/books/jls/second_edition/html/lexical.doc.html#101084 java lexical structure}).
6    * <p/>
7    * <em>The constructor won't automatically unescape the passed value</em>. Use {@link ArgString#parseStringLiteral(java.lang.StringBuilder)
8    * }.
9    *
10   * @author Honza
11   */
12  public class ArgString extends Arg<String> {
13  
14      /**
15       * Take the string and use it as the value of the argument. Be careful, in
16       * most cases, use of {@link ArgString#unescape(java.lang.String)} is
17       * recommended (it parses escaped string into an unescaped form).
18       *
19       * @param string value of the argument
20       */
21      public ArgString(String string) {
22          super(string);
23      }
24  
25      /**
26       * Get unescaped version of passed string.
27       *
28       * @param escapedString string escaped according to java lexical structure,
29       * without double quotes (e.g. Hello\nWorld)
30       * @return unescaped string
31       * @throws ParseException if there is an error in the escaping
32       */
33      public static String unescape(String escapedString) throws ParseException {
34          if (escapedString.length() == 0) {
35              return escapedString;
36          }
37          return parseStringCharacters(new StringBuilder(escapedString));
38      }
39  
40      /**
41       * Take the escaped string, parse it and return the unescaped value.
42       *
43       * @param string string to parse, incl. quotes (e.g. "Foo\nBar")
44       * <p/>
45       * <cite>StringLiteral: " [StringCharacters] "</cite>
46       * @return parsed string
47       */
48      protected static String parseStringLiteral(String escaped) throws ParseException {
49          StringBuilder sb = new StringBuilder(escaped);
50          if (sb.charAt(0) != '\"') {
51              throw new ParseException("Expecting \" at the start of " + escaped.toString());
52          }
53          sb.deleteCharAt(0);
54  
55          // StringCharacters nonterminal is optional. If it is missing, ther is 
56          // only ending double quote.
57          String parsed = "";
58          if (sb.length() != 1) {
59              parsed = parseStringCharacters(sb);
60          }
61  
62          if (sb.length() != 1) {
63              throw new ParseException("Expecting exactly one character (double quote), but " + sb.length() + " characters remain:" + sb.toString());
64          }
65  
66          if (sb.charAt(0) != '\"') {
67              throw new ParseException("Expecting ending double quote, but got: " + sb.charAt(0));
68          }
69  
70          return parsed.toString();
71      }
72  
73      /**
74       * Parse nonterminal <em>StringCharacters: ( StringCharacter )+</em>
75       *
76       * @param sb escaped string to be parsed, something may be even after last
77       * <em>StringCharacter</em>. This will be modified during progress, the
78       * parsed characters will be "eaten"
79       * @return unescaped string
80       */
81      private static String parseStringCharacters(StringBuilder sb) throws ParseException {
82          Character ch;
83          StringBuilder stringCharacters = new StringBuilder();
84          while ((ch = parseStringCharacter(sb)) != null) {
85              stringCharacters.append(ch);
86          }
87          return stringCharacters.toString();
88      }
89  
90      /**
91       * Extract the <em>StringCharacter</em> from the sb and return it.
92       * <p/>
93       * <
94       * pre>
95       * StringCharacter: InputCharacter but not " or \
96       * StringCharacter: EscapeSequence
97       * InputCharacter: UnicodeInputCharacter but not CR or LF
98       * </pre> Basically when you expand it all, you will get
99       * <pre>
100      *  \(u)+ [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]  ~ Unicode char
101      *  \[btnfr"'\]
102      *  \[0-3][0-7][0-7]
103      *  \[0-7][0-7]
104      *  \[0-7]
105      * </pre>
106      *
107      * @param sb sequence of chars that is used to extract the character. Is
108      * modified (characters are removed) during parsing.
109      * @return found character if there is a <em>StringCharacter</em>, null
110      * otherwise
111      */
112     protected static Character parseStringCharacter(StringBuilder sb) throws ParseException {
113         if (sb.length() == 0) {
114             return null;
115         }
116         char ch = sb.charAt(0);
117         if (ch == '\"' || ch == '\r' || ch == '\n') {
118             return null;
119         }
120 
121         sb.deleteCharAt(0);
122         // Is that an escape sequence
123         if (ch == '\\') {
124             Character res = ArgChar.parseEscapeSequence(sb);
125             if (res == null) {
126                 throw new ParseException("Unable to unescape sequence:" + sb.toString());
127             }
128             return res;
129         }
130         return ch;
131     }
132 }