Mercurial > hg > blitz_stable
comparison src/com/go/trove/io/UnicodeReader.java @ 0:3dc0c5604566
Initial checkin of blitz 2.0 fcs - no installer yet.
author | Dan Creswell <dan.creswell@gmail.com> |
---|---|
date | Sat, 21 Mar 2009 11:00:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3dc0c5604566 |
---|---|
1 /* ==================================================================== | |
2 * Trove - Copyright (c) 1997-2000 Walt Disney Internet Group | |
3 * ==================================================================== | |
4 * The Tea Software License, Version 1.1 | |
5 * | |
6 * Copyright (c) 2000 Walt Disney Internet Group. All rights reserved. | |
7 * | |
8 * Redistribution and use in source and binary forms, with or without | |
9 * modification, are permitted provided that the following conditions | |
10 * are met: | |
11 * | |
12 * 1. Redistributions of source code must retain the above copyright | |
13 * notice, this list of conditions and the following disclaimer. | |
14 * | |
15 * 2. Redistributions in binary form must reproduce the above copyright | |
16 * notice, this list of conditions and the following disclaimer in | |
17 * the documentation and/or other materials provided with the | |
18 * distribution. | |
19 * | |
20 * 3. The end-user documentation included with the redistribution, | |
21 * if any, must include the following acknowledgment: | |
22 * "This product includes software developed by the | |
23 * Walt Disney Internet Group (http://opensource.go.com/)." | |
24 * Alternately, this acknowledgment may appear in the software itself, | |
25 * if and wherever such third-party acknowledgments normally appear. | |
26 * | |
27 * 4. The names "Tea", "TeaServlet", "Kettle", "Trove" and "BeanDoc" must | |
28 * not be used to endorse or promote products derived from this | |
29 * software without prior written permission. For written | |
30 * permission, please contact opensource@dig.com. | |
31 * | |
32 * 5. Products derived from this software may not be called "Tea", | |
33 * "TeaServlet", "Kettle" or "Trove", nor may "Tea", "TeaServlet", | |
34 * "Kettle", "Trove" or "BeanDoc" appear in their name, without prior | |
35 * written permission of the Walt Disney Internet Group. | |
36 * | |
37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |
38 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
39 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
40 * DISCLAIMED. IN NO EVENT SHALL THE WALT DISNEY INTERNET GROUP OR ITS | |
41 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
42 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
43 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
44 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
45 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 * ==================================================================== | |
49 * | |
50 * For more information about Tea, please see http://opensource.go.com/. | |
51 */ | |
52 | |
53 package com.go.trove.io; | |
54 | |
55 import java.io.*; | |
56 | |
57 /****************************************************************************** | |
58 * This reader handles unicode escapes in a character stream as defined by | |
59 * <i>The Java Language Specification</i>. | |
60 * | |
61 * <p>A unicode escape consists of six characters: '\' and 'u' followed by | |
62 * four hexadecimal digits. If the format of the escape is not correct, then | |
63 * the escape is unprocessed. To prevent a correctly formatted unicode escape | |
64 * from being processed, preceed it with another '\'. | |
65 * | |
66 * @author Brian S O'Neill | |
67 * @version | |
68 * <!--$$Revision: 1.1 $-->, <!--$$JustDate:--> 12/11/00 <!-- $--> | |
69 */ | |
70 public class UnicodeReader extends EscapeReader { | |
71 /** Just a temporary buffer for holding the four hexadecimal digits. */ | |
72 private char[] mMinibuf = new char[4]; | |
73 | |
74 private boolean mEscaped; | |
75 | |
76 /** | |
77 * A UnicodeReader needs an underlying source Reader. | |
78 * | |
79 * @param source the source PositionReader | |
80 */ | |
81 public UnicodeReader(Reader source) { | |
82 super(source, 6); | |
83 } | |
84 | |
85 public int read() throws IOException { | |
86 int c = mSource.read(); | |
87 | |
88 if (c != '\\' || !mEscapesEnabled) { | |
89 mEscaped = false; | |
90 return c; | |
91 } | |
92 | |
93 c = mSource.read(); | |
94 | |
95 // Have scanned "\\"? (two backslashes) | |
96 if (c == '\\') { | |
97 mEscaped = !mEscaped; | |
98 mSource.unread(); | |
99 return '\\'; | |
100 } | |
101 | |
102 // Have not scanned '\', 'u'? | |
103 if (c != 'u') { | |
104 mSource.unread(); | |
105 return '\\'; | |
106 } | |
107 | |
108 // At this point, have scanned '\', 'u'. | |
109 | |
110 // If previously escaped, then don't process unicode escape. | |
111 if (mEscaped) { | |
112 mEscaped = false; | |
113 mSource.unread(); | |
114 return '\\'; | |
115 } | |
116 | |
117 int len = mSource.read(mMinibuf, 0, 4); | |
118 | |
119 if (len == 4) { | |
120 try { | |
121 int val = | |
122 Integer.valueOf(new String(mMinibuf, 0, 4), 16).intValue(); | |
123 | |
124 return val; | |
125 } | |
126 catch (NumberFormatException e) { | |
127 // If the number is not a parseable as hexadecimal, then | |
128 // treat this as a bad format and do not process the | |
129 // unicode escape. | |
130 } | |
131 } | |
132 | |
133 // Unread the four hexadecimal characters and the leading 'u'. | |
134 if (len >= 0) { | |
135 mSource.unread(len + 1); | |
136 } | |
137 | |
138 return '\\'; | |
139 } | |
140 | |
141 public static void main(String[] arg) throws Exception { | |
142 Tester.test(arg); | |
143 } | |
144 | |
145 private static class Tester { | |
146 public static void test(String[] arg) throws Exception { | |
147 String str = | |
148 "This is \\" + "u0061 test.\n" + | |
149 "This is \\" + "u00612 test.\n" + | |
150 "This is \\" + "u0061" + "\\" + "u0061" + " test.\n" + | |
151 "This is \\" + "u061 test.\n" + | |
152 "This is \\\\" + "u0061 test.\n" + | |
153 "This is \\" + "a test.\n"; | |
154 | |
155 System.out.println("\nOriginal:\n"); | |
156 | |
157 Reader reader = new StringReader(str); | |
158 | |
159 int c; | |
160 while ( (c = reader.read()) >= 0 ) { | |
161 System.out.print((char)c); | |
162 } | |
163 | |
164 System.out.println("\nConverted:\n"); | |
165 | |
166 reader = new StringReader(str); | |
167 reader = new UnicodeReader(reader); | |
168 | |
169 while ( (c = reader.read()) != -1 ) { | |
170 System.out.print((char)c); | |
171 } | |
172 | |
173 System.out.println("\nUnread test 1:\n"); | |
174 | |
175 reader = new StringReader(str); | |
176 PushbackPositionReader pr = | |
177 new PushbackPositionReader(new UnicodeReader(reader), 1); | |
178 | |
179 while ( (c = pr.read()) != -1 ) { | |
180 pr.unread(); | |
181 c = pr.read(); | |
182 System.out.print((char)c); | |
183 } | |
184 | |
185 System.out.println("\nUnread test 2:\n"); | |
186 | |
187 reader = new StringReader(str); | |
188 pr = new PushbackPositionReader(new UnicodeReader(reader), 2); | |
189 | |
190 int i = 0; | |
191 while ( (c = pr.read()) != -1 ) { | |
192 if ( (i++ % 5) == 0 ) { | |
193 c = pr.read(); | |
194 pr.unread(); | |
195 pr.unread(); | |
196 c = pr.read(); | |
197 } | |
198 | |
199 System.out.print((char)c); | |
200 } | |
201 | |
202 System.out.println("\nUnread position test:\n"); | |
203 | |
204 reader = new StringReader(str); | |
205 pr = new PushbackPositionReader(new UnicodeReader(reader), 2); | |
206 | |
207 System.out.print(pr.getNextPosition() + "\t"); | |
208 i = 0; | |
209 while ( (c = pr.read()) != -1 ) { | |
210 if ( (i++ % 5) == 0 ) { | |
211 c = pr.read(); | |
212 pr.unread(); | |
213 pr.unread(); | |
214 c = pr.read(); | |
215 } | |
216 | |
217 System.out.println((char)c); | |
218 System.out.print(pr.getNextPosition() + "\t"); | |
219 } | |
220 } | |
221 } | |
222 } |