Mercurial > hg > blitz_condensed
diff src/com/go/trove/io/UnicodeReader.java @ 0:3dc0c5604566
Initial checkin of blitz 2.0 fcs - no installer yet.
author | Dan Creswell <dan.creswell@gmail.com> |
---|---|
date | Sat, 21 Mar 2009 11:00:06 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/com/go/trove/io/UnicodeReader.java Sat Mar 21 11:00:06 2009 +0000 @@ -0,0 +1,222 @@ +/* ==================================================================== + * Trove - Copyright (c) 1997-2000 Walt Disney Internet Group + * ==================================================================== + * The Tea Software License, Version 1.1 + * + * Copyright (c) 2000 Walt Disney Internet Group. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Walt Disney Internet Group (http://opensource.go.com/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Tea", "TeaServlet", "Kettle", "Trove" and "BeanDoc" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact opensource@dig.com. + * + * 5. Products derived from this software may not be called "Tea", + * "TeaServlet", "Kettle" or "Trove", nor may "Tea", "TeaServlet", + * "Kettle", "Trove" or "BeanDoc" appear in their name, without prior + * written permission of the Walt Disney Internet Group. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE WALT DISNEY INTERNET GROUP OR ITS + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * For more information about Tea, please see http://opensource.go.com/. + */ + +package com.go.trove.io; + +import java.io.*; + +/****************************************************************************** + * This reader handles unicode escapes in a character stream as defined by + * <i>The Java Language Specification</i>. + * + * <p>A unicode escape consists of six characters: '\' and 'u' followed by + * four hexadecimal digits. If the format of the escape is not correct, then + * the escape is unprocessed. To prevent a correctly formatted unicode escape + * from being processed, preceed it with another '\'. + * + * @author Brian S O'Neill + * @version + * <!--$$Revision: 1.1 $-->, <!--$$JustDate:--> 12/11/00 <!-- $--> + */ +public class UnicodeReader extends EscapeReader { + /** Just a temporary buffer for holding the four hexadecimal digits. */ + private char[] mMinibuf = new char[4]; + + private boolean mEscaped; + + /** + * A UnicodeReader needs an underlying source Reader. + * + * @param source the source PositionReader + */ + public UnicodeReader(Reader source) { + super(source, 6); + } + + public int read() throws IOException { + int c = mSource.read(); + + if (c != '\\' || !mEscapesEnabled) { + mEscaped = false; + return c; + } + + c = mSource.read(); + + // Have scanned "\\"? (two backslashes) + if (c == '\\') { + mEscaped = !mEscaped; + mSource.unread(); + return '\\'; + } + + // Have not scanned '\', 'u'? + if (c != 'u') { + mSource.unread(); + return '\\'; + } + + // At this point, have scanned '\', 'u'. + + // If previously escaped, then don't process unicode escape. + if (mEscaped) { + mEscaped = false; + mSource.unread(); + return '\\'; + } + + int len = mSource.read(mMinibuf, 0, 4); + + if (len == 4) { + try { + int val = + Integer.valueOf(new String(mMinibuf, 0, 4), 16).intValue(); + + return val; + } + catch (NumberFormatException e) { + // If the number is not a parseable as hexadecimal, then + // treat this as a bad format and do not process the + // unicode escape. + } + } + + // Unread the four hexadecimal characters and the leading 'u'. + if (len >= 0) { + mSource.unread(len + 1); + } + + return '\\'; + } + + public static void main(String[] arg) throws Exception { + Tester.test(arg); + } + + private static class Tester { + public static void test(String[] arg) throws Exception { + String str = + "This is \\" + "u0061 test.\n" + + "This is \\" + "u00612 test.\n" + + "This is \\" + "u0061" + "\\" + "u0061" + " test.\n" + + "This is \\" + "u061 test.\n" + + "This is \\\\" + "u0061 test.\n" + + "This is \\" + "a test.\n"; + + System.out.println("\nOriginal:\n"); + + Reader reader = new StringReader(str); + + int c; + while ( (c = reader.read()) >= 0 ) { + System.out.print((char)c); + } + + System.out.println("\nConverted:\n"); + + reader = new StringReader(str); + reader = new UnicodeReader(reader); + + while ( (c = reader.read()) != -1 ) { + System.out.print((char)c); + } + + System.out.println("\nUnread test 1:\n"); + + reader = new StringReader(str); + PushbackPositionReader pr = + new PushbackPositionReader(new UnicodeReader(reader), 1); + + while ( (c = pr.read()) != -1 ) { + pr.unread(); + c = pr.read(); + System.out.print((char)c); + } + + System.out.println("\nUnread test 2:\n"); + + reader = new StringReader(str); + pr = new PushbackPositionReader(new UnicodeReader(reader), 2); + + int i = 0; + while ( (c = pr.read()) != -1 ) { + if ( (i++ % 5) == 0 ) { + c = pr.read(); + pr.unread(); + pr.unread(); + c = pr.read(); + } + + System.out.print((char)c); + } + + System.out.println("\nUnread position test:\n"); + + reader = new StringReader(str); + pr = new PushbackPositionReader(new UnicodeReader(reader), 2); + + System.out.print(pr.getNextPosition() + "\t"); + i = 0; + while ( (c = pr.read()) != -1 ) { + if ( (i++ % 5) == 0 ) { + c = pr.read(); + pr.unread(); + pr.unread(); + c = pr.read(); + } + + System.out.println((char)c); + System.out.print(pr.getNextPosition() + "\t"); + } + } + } +}