// HtmlEditScanner - an HTML scanner with editing // // Copyright (C) 1996 by Jef Poskanzer . All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF // SUCH DAMAGE. // // Visit the ACME Labs Java page for up-to-date versions of this and other // fine Java utilities: http://www.acme.com/java/ package Acme; import java.util.*; import java.net.*; import java.io.*; /// An HTML scanner with editing. //

// This is like HtmlScanner but it lets you make changes to the URLs in // the HTML stream you are scanning. The regular scanner class lets // you define callbacks that get called with the URLs; in this version, // you can return substitute URLs from the callbacks, and they get // inserted into the stream in place of the old URLs. //

// Fetch the software.
// Fetch the entire Acme package. public class HtmlEditScanner extends FilterInputStream implements Acme.HtmlObserver { // The underlying HtmlScanner. HtmlScanner scanner; // The list of HtmlEditObservers to call, paired with clientDatas. private Vector observers = new Vector(); /// Constructor. // If the client is not interested in getting called back with URLs, // observer can be null (but then there's not much point in // using this class). public HtmlEditScanner( InputStream s, URL thisUrl, Acme.HtmlEditObserver observer ) { this( s, thisUrl, observer, null ); } /// Constructor with clientData. // If the client is not interested in getting called back with URLs, // observer can be null (but then there's not much point in // using this class). public HtmlEditScanner( InputStream s, URL thisUrl, Acme.HtmlEditObserver observer, Object clientData ) { this( new HtmlScanner( s, thisUrl, null ), observer, clientData ); } /// Constructor with a pre-made HtmlScanner. // If the client is not interested in getting called back with URLs, // observer can be null (but then there's not much point in // using this class). public HtmlEditScanner( HtmlScanner scanner, Acme.HtmlEditObserver observer ) { this( scanner, observer, null ); } /// Constructor with a pre-made HtmlScanner, with clientData. // If the client is not interested in getting called back with URLs, // observer can be null (but then there's not much point in // using this class). public HtmlEditScanner( HtmlScanner scanner, Acme.HtmlEditObserver observer, Object clientData ) { super( scanner ); this.scanner = scanner; scanner.addObserver( this ); if ( observer != null ) addObserver( observer, clientData ); } /// Add an extra observer to this editor. Multiple observers get called // in the order they were added. public void addObserver( Acme.HtmlEditObserver observer ) { addObserver( observer, null ); } /// Add an extra observer to this editor. Multiple observers get called // in the order they were added. public void addObserver( Acme.HtmlEditObserver observer, Object clientData ) { observers.addElement( new Acme.Pair( observer, clientData ) ); } private boolean gotEOF = false; private byte[] buf = new byte[4096]; private int bufSize = buf.length; private int bufOff = 0; private int bufLen = 0; /// Special version of read() that's careful about URLs split across // buffer-loads. public int read( byte[] b, int off, int len ) throws IOException { int i = len; while ( ( bufLen < len || scanner.gettingUrl ) && ! gotEOF ) { if ( bufLen >= len ) i += 50; // read a little extra to complete the URL checkBuf( i + 500 ); // add a little room for expanded URLs int r = in.read( buf, bufOff + bufLen, i - bufLen ); if ( r == -1 ) gotEOF = true; else bufLen += r; } if ( bufLen == 0 ) return -1; i = Math.min( len, bufLen ); System.arraycopy( buf, bufOff, b, off, i ); bufOff += i; bufLen -= i; return i; } /// Override to make sure this goes through the above // read( byte[], int, int) method. public int read() throws IOException { byte[] b = new byte[1]; int r = read( b, 0, 1 ); if ( r == -1 ) return -1; else return b[0]; } // Check if we need to expand the buffer. private void checkBuf( int need ) { // First, if there's no data in the buffer we can zero the offset. if ( bufLen == 0 ) bufOff = 0; // Is there room? if ( bufOff + need > bufSize ) { // No. Should we just compactify this buffer, or make a new one? if ( need * 2 < bufSize ) { // Compactify. System.arraycopy( buf, bufOff, buf, 0, bufLen ); } else { // New buffer. byte[] newBuf = new byte[need * 2]; System.arraycopy( buf, bufOff, newBuf, 0, bufLen ); buf = newBuf; bufSize = buf.length; } bufOff = 0; } } /// Callback from HtmlScanner. public void gotAHREF( String urlStr, URL contextUrl, Object junk ) { Enumeration en = observers.elements(); while ( en.hasMoreElements() ) { Acme.Pair pair = (Acme.Pair) en.nextElement(); Acme.HtmlEditObserver observer = (HtmlEditObserver) pair.left(); Object clientData = pair.right(); String changedUrlStr = observer.editAHREF( urlStr, contextUrl, clientData ); if ( changedUrlStr != null ) scanner.substitute( urlStr.length(), changedUrlStr ); } } /// Callback from HtmlScanner. public void gotIMGSRC( String urlStr, URL contextUrl, Object junk ) { Enumeration en = observers.elements(); while ( en.hasMoreElements() ) { Acme.Pair pair = (Acme.Pair) en.nextElement(); Acme.HtmlEditObserver observer = (HtmlEditObserver) pair.left(); Object clientData = pair.right(); String changedUrlStr = observer.editIMGSRC( urlStr, contextUrl, clientData ); if ( changedUrlStr != null ) scanner.substitute( urlStr.length(), changedUrlStr ); } } /// Callback from HtmlScanner. public void gotFRAMESRC( String urlStr, URL contextUrl, Object junk ) { Enumeration en = observers.elements(); while ( en.hasMoreElements() ) { Acme.Pair pair = (Acme.Pair) en.nextElement(); Acme.HtmlEditObserver observer = (HtmlEditObserver) pair.left(); Object clientData = pair.right(); String changedUrlStr = observer.editFRAMESRC( urlStr, contextUrl, clientData ); if ( changedUrlStr != null ) scanner.substitute( urlStr.length(), changedUrlStr ); } } /// Callback from HtmlScanner. public void gotBASEHREF( String urlStr, URL contextUrl, Object junk ) { Enumeration en = observers.elements(); while ( en.hasMoreElements() ) { Acme.Pair pair = (Acme.Pair) en.nextElement(); Acme.HtmlEditObserver observer = (HtmlEditObserver) pair.left(); Object clientData = pair.right(); String changedUrlStr = observer.editBASEHREF( urlStr, contextUrl, clientData ); if ( changedUrlStr != null ) scanner.substitute( urlStr.length(), changedUrlStr ); } } /// Callback from HtmlScanner. public void gotAREAHREF( String urlStr, URL contextUrl, Object junk ) { Enumeration en = observers.elements(); while ( en.hasMoreElements() ) { Acme.Pair pair = (Acme.Pair) en.nextElement(); Acme.HtmlEditObserver observer = (HtmlEditObserver) pair.left(); Object clientData = pair.right(); String changedUrlStr = observer.editAREAHREF( urlStr, contextUrl, clientData ); if ( changedUrlStr != null ) scanner.substitute( urlStr.length(), changedUrlStr ); } } /// Callback from HtmlScanner. public void gotLINKHREF( String urlStr, URL contextUrl, Object junk ) { Enumeration en = observers.elements(); while ( en.hasMoreElements() ) { Acme.Pair pair = (Acme.Pair) en.nextElement(); Acme.HtmlEditObserver observer = (HtmlEditObserver) pair.left(); Object clientData = pair.right(); String changedUrlStr = observer.editLINKHREF( urlStr, contextUrl, clientData ); if ( changedUrlStr != null ) scanner.substitute( urlStr.length(), changedUrlStr ); } } /// Callback from HtmlScanner. public void gotBODYBACKGROUND( String urlStr, URL contextUrl, Object junk ) { Enumeration en = observers.elements(); while ( en.hasMoreElements() ) { Acme.Pair pair = (Acme.Pair) en.nextElement(); Acme.HtmlEditObserver observer = (HtmlEditObserver) pair.left(); Object clientData = pair.right(); String changedUrlStr = observer.editBODYBACKGROUND( urlStr, contextUrl, clientData ); if ( changedUrlStr != null ) scanner.substitute( urlStr.length(), changedUrlStr ); } } }