comparison src/com/go/trove/io/UnicodeReader.java @ 0:3dc0c5604566

Initial checkin of blitz 2.0 fcs - no installer yet.
author Dan Creswell <dan.creswell@gmail.com>
date Sat, 21 Mar 2009 11:00:06 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3dc0c5604566
1 /* ====================================================================
2 * Trove - Copyright (c) 1997-2000 Walt Disney Internet Group
3 * ====================================================================
4 * The Tea Software License, Version 1.1
5 *
6 * Copyright (c) 2000 Walt Disney Internet Group. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution,
21 * if any, must include the following acknowledgment:
22 * "This product includes software developed by the
23 * Walt Disney Internet Group (http://opensource.go.com/)."
24 * Alternately, this acknowledgment may appear in the software itself,
25 * if and wherever such third-party acknowledgments normally appear.
26 *
27 * 4. The names "Tea", "TeaServlet", "Kettle", "Trove" and "BeanDoc" must
28 * not be used to endorse or promote products derived from this
29 * software without prior written permission. For written
30 * permission, please contact opensource@dig.com.
31 *
32 * 5. Products derived from this software may not be called "Tea",
33 * "TeaServlet", "Kettle" or "Trove", nor may "Tea", "TeaServlet",
34 * "Kettle", "Trove" or "BeanDoc" appear in their name, without prior
35 * written permission of the Walt Disney Internet Group.
36 *
37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40 * DISCLAIMED. IN NO EVENT SHALL THE WALT DISNEY INTERNET GROUP OR ITS
41 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
42 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
43 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
44 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
45 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * For more information about Tea, please see http://opensource.go.com/.
51 */
52
53 package com.go.trove.io;
54
55 import java.io.*;
56
57 /******************************************************************************
58 * This reader handles unicode escapes in a character stream as defined by
59 * <i>The Java Language Specification</i>.
60 *
61 * <p>A unicode escape consists of six characters: '\' and 'u' followed by
62 * four hexadecimal digits. If the format of the escape is not correct, then
63 * the escape is unprocessed. To prevent a correctly formatted unicode escape
64 * from being processed, preceed it with another '\'.
65 *
66 * @author Brian S O'Neill
67 * @version
68 * <!--$$Revision: 1.1 $-->, <!--$$JustDate:--> 12/11/00 <!-- $-->
69 */
70 public class UnicodeReader extends EscapeReader {
71 /** Just a temporary buffer for holding the four hexadecimal digits. */
72 private char[] mMinibuf = new char[4];
73
74 private boolean mEscaped;
75
76 /**
77 * A UnicodeReader needs an underlying source Reader.
78 *
79 * @param source the source PositionReader
80 */
81 public UnicodeReader(Reader source) {
82 super(source, 6);
83 }
84
85 public int read() throws IOException {
86 int c = mSource.read();
87
88 if (c != '\\' || !mEscapesEnabled) {
89 mEscaped = false;
90 return c;
91 }
92
93 c = mSource.read();
94
95 // Have scanned "\\"? (two backslashes)
96 if (c == '\\') {
97 mEscaped = !mEscaped;
98 mSource.unread();
99 return '\\';
100 }
101
102 // Have not scanned '\', 'u'?
103 if (c != 'u') {
104 mSource.unread();
105 return '\\';
106 }
107
108 // At this point, have scanned '\', 'u'.
109
110 // If previously escaped, then don't process unicode escape.
111 if (mEscaped) {
112 mEscaped = false;
113 mSource.unread();
114 return '\\';
115 }
116
117 int len = mSource.read(mMinibuf, 0, 4);
118
119 if (len == 4) {
120 try {
121 int val =
122 Integer.valueOf(new String(mMinibuf, 0, 4), 16).intValue();
123
124 return val;
125 }
126 catch (NumberFormatException e) {
127 // If the number is not a parseable as hexadecimal, then
128 // treat this as a bad format and do not process the
129 // unicode escape.
130 }
131 }
132
133 // Unread the four hexadecimal characters and the leading 'u'.
134 if (len >= 0) {
135 mSource.unread(len + 1);
136 }
137
138 return '\\';
139 }
140
141 public static void main(String[] arg) throws Exception {
142 Tester.test(arg);
143 }
144
145 private static class Tester {
146 public static void test(String[] arg) throws Exception {
147 String str =
148 "This is \\" + "u0061 test.\n" +
149 "This is \\" + "u00612 test.\n" +
150 "This is \\" + "u0061" + "\\" + "u0061" + " test.\n" +
151 "This is \\" + "u061 test.\n" +
152 "This is \\\\" + "u0061 test.\n" +
153 "This is \\" + "a test.\n";
154
155 System.out.println("\nOriginal:\n");
156
157 Reader reader = new StringReader(str);
158
159 int c;
160 while ( (c = reader.read()) >= 0 ) {
161 System.out.print((char)c);
162 }
163
164 System.out.println("\nConverted:\n");
165
166 reader = new StringReader(str);
167 reader = new UnicodeReader(reader);
168
169 while ( (c = reader.read()) != -1 ) {
170 System.out.print((char)c);
171 }
172
173 System.out.println("\nUnread test 1:\n");
174
175 reader = new StringReader(str);
176 PushbackPositionReader pr =
177 new PushbackPositionReader(new UnicodeReader(reader), 1);
178
179 while ( (c = pr.read()) != -1 ) {
180 pr.unread();
181 c = pr.read();
182 System.out.print((char)c);
183 }
184
185 System.out.println("\nUnread test 2:\n");
186
187 reader = new StringReader(str);
188 pr = new PushbackPositionReader(new UnicodeReader(reader), 2);
189
190 int i = 0;
191 while ( (c = pr.read()) != -1 ) {
192 if ( (i++ % 5) == 0 ) {
193 c = pr.read();
194 pr.unread();
195 pr.unread();
196 c = pr.read();
197 }
198
199 System.out.print((char)c);
200 }
201
202 System.out.println("\nUnread position test:\n");
203
204 reader = new StringReader(str);
205 pr = new PushbackPositionReader(new UnicodeReader(reader), 2);
206
207 System.out.print(pr.getNextPosition() + "\t");
208 i = 0;
209 while ( (c = pr.read()) != -1 ) {
210 if ( (i++ % 5) == 0 ) {
211 c = pr.read();
212 pr.unread();
213 pr.unread();
214 c = pr.read();
215 }
216
217 System.out.println((char)c);
218 System.out.print(pr.getNextPosition() + "\t");
219 }
220 }
221 }
222 }