import java.awt.*;
import java.applet.*;
import java.net.*;
import java.io.*;
import java.util.*;

// v1.5 Home Page Search applet
// 15th February 1998

/*
 * This applet provides search facilities for Web sites with no CGI access
 *
 * Copyright (c) 1997 Richard Everitt G4ZFE
 *      richard@babbage.demon.co.uk
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *
 */

/* Applet parameters:
 * This applet takes two parameters
 * a. hostname, the name of the Demon Home Page (e.g babbage). The name is
 *              converted to lower case and used to create the URL of the
 *              pages to search i.e. http://www.<hostname>.demon.co.uk.
 *              (this parameter is required for Demon Internet users
 *               *only*)
 * b. IPaddress, the corresponding IP address for the Home Page. I plan to
 *               use it as it will allow the search applet to run from
 *               behind a firewall. Demon have stated in the HomePage AUP
 *               that the IP address should not be directly used. I do not
 *               recommend its use (the www.babbage.demon.co.uk IP address
 *               has already changed once)
 *               (this parameter is optional) 
 * c. maxSearch, the maximum number of pages to search. If your site is vast
 *               then the search will take a long time so people will give
 *               up. This parameter limits the number of pages to be searched
 *               to a sensible value reducing the search time (but also
 *               reducing its usefullness)
 *               (this parameter is optional. Defaults to 100)
 * d. debug,     this parameter is for my use. Set to true to display 
 *               parameter and debug information.
 *               (this parameter is optional)
 * e. server,    this parameter allows the search applet to be used on non-
 *               Demon home pages. This parameter should point to the name
 *               of the site, e.g "http://www.myisp.com/me/" (note use of
 *               trailing "/" character.
 *               (this parameter is required for Non-Demon Internet users)
 * f. indexName, this parameter allows the search applet to be used on non-
 *               Demon home pages. This parameter should point to the name
 *               of the index page (e.g home.htm). If not set then 
 *               "index.htm" ot "index.html" is asummed.
 *               (this parameter is optional)
 * g. bgColour,  The background colour for the applet in RGB hex format
 *               (rrggbb). The default is light grey.
 * h. fgColour,  The foreground colour for the applet in RGB hex format
 *               (rrggbb). The default is black.
 *
 * Example of applet use on a Demon Home Page - www.babbage.demon.co.uk
 * <APPLET CODE="HomePageSearch.class" WIDTH=650 HEIGHT=400>
 * <PARAM NAME="hostname" VALUE="babbage">
 * <EM>Sorry but the Search applet requires a Java aware
 * Web browser </EM>
 * </APPLET>
 *
 * Example of applet use on a non-Demon Home Page - www.myisp.co.uk/fred/
 * and the "index" page is called home.htm
 * <APPLET CODE="HomePageSearch.class" WIDTH=650 HEIGHT=400>
 * <PARAM NAME="server" VALUE="http://www.myisp.co.uk/fred/">
 * <PARAM NAME="indexName" VALUE="home.htm">
 * <EM>Sorry but the Search applet requires a Java aware
 * Web browser </EM>
 * </APPLET>
 */


/* Modification history
 * xxxx  12th February - alpha version
 * v0.9  19th February - first beta version
 * v0.91 26th February - tidy up
 *                     - added maxSearch functionality
 *                     - added debugMode parameter
 * v0.92 03rd March    - added server and indexname parameters to 
 *                       allow use on non-Demon home pages
 * v0.93 09th March    - fixed bug with lowercase filenames
 *                       added case insensitive/sensitive/match whole
 *                       word functionality
 * v0.94 12th March    - fixed bug which resulted in "cannot connect"
 *                       error on non-demon sites.
 * v0.95 15th March    - Removed some uses of debugMode. Server parameter
 *                       can be set to http://localhost/ to simulate this.
 *                     - Added support for working behind proxy servers/
 *                       firewalls. This uses the IP Address rather than
 *                       the hostname of the server for connections.
 * v0.96 17th March    - Corrected code to parse HREFs. It was not 
 *                       understanding framed format or spaces.
 *                     - Match whole word not working properly
 *                     - HREF="http://server/" was not being followed
 *                       correctly
 * v0.97 20th March    - fixed bug where incorrect page name was being
 *                       displayed for a match. This was due to the use
 *                       of a global variable for the page name. As the
 *                       stack unwound then this variable was lost. Stack
 *                       used to stored page name instead.
 * v0.98 23rd March   -  if match found on index page (using HTTP) then URL to
 *                       jump to was created incorrectly.
 * v0.99 25th March   -  allow to be resized < 600 pixels
 *                       allow handling of links such as
 *                       "/www/page.html"
 * v1.0 8th April     -  Added bgColour, fgColour applet parameters
 *                       Set default of 100 for maximum number pages to search
 *                       Added option menu for number of pages to search
 *                       Allow handling of framed links such as
 *                       <FRAME SRC="framepage.html">
 * v1.1 18th April    -  Display Page title rather than page name in list of
 *                       matches.
 *                       If match found on index page (using FILE://) then
 *                       URL to jump to was created incorrectly.
 *                       Broke the <= 600 pixels code by adding the "Max 
 *                       Pages" option menu. Size of buttons adjusted to
 *                       allow all widgets to be display in < 600 pixles
 * v1.2 9th May       -  Removed hard limits by using vectors rather than 
 *                       arrays.
 *                       Search the index page and index page links first.
 *                       Added internalisation support for titles. A subset
 *                       of the special character entity names (e.g &egrave;)
 *                       are converted into Unicode characters so that they
 *                       are displayed correctly.
 *                       Fixed bug - "match word" did not match the last word
 *                       on a line.   
 * v1.3 8th July      -  Bug fix release     
 *                       Links with single quotes e.g 
 *                       <a href='page.html'>Test Page</a>
 *                       were not being searched
 *                       fgColour and bgColour only worked with UNIX browsers!
 *                       Fixed to allow useage with MS Windows browsers,
 *                       although due to limitations in Win32 AWT the colour
 *                       of buttons and their text cannot be changed.
 * v1.4 4th August     - Bug fix release
 *                       Single quote HREF fix in v1.3 broke some normal
 *                       HREF link code (no </A> on same line as HREF).
 * v1.5 15th February  - Applet now searches .txt files
 *                       Fixed bug for demon internet users who use index
 *                       pages other than index.htm and index.html
 *                       Added further lower case localisation support
 * v1.5x 8-Aug-1999    - Modifications by Lyle Winton.
 *			 NAME tags are now included in seach locations.
 *			 You can use & or | to search for word1 AND word2 or
 *			 word1 OR word2.
 *
 */

public class HomePageSearch extends Applet
{
	final int MAX_NUMBER_PAGES = 100;       // default limit of number
						// pages to read
	final int BACKSPACE_CHARACTER = 8;      // ASCII backspace 
	final int NUMBER_SPECIAL_CHARS = 45;    // Number of special character
						// entity names supported
	final int MAX_MATCHLINES = 10;		// If "&" is used this is the max
						// number of lines all words must be within


	Button search, clear, abort;    // GUI buttons

	TextField inputArea;            // TextField used to enter 
					// search text in
	TextField statusArea;           // TextField used to display 
					// search status
	List  resultsArea;              // List to display matches in

	public String hostName;         // Host name paramter read by
					// applet (required)
	public String IPAddress;        // IP address parameter read by
					// applet (optional)
	public int maxSearch = MAX_NUMBER_PAGES;        
					// Maximum number of pages to 
					// search (optional)

	public boolean debugMode;       // TRUE = localhost
					// FALSE = on-line

	Vector pageNames;               // Pages that have been visited

	public String server;           // Non-Demon home page starting point

	public String indexName;        // Name of index page (defaults to
					// index.html or index.htm)

	SearchPages cp = null;          // Search thread

	Checkbox caseSensitive; 
	Checkbox matchWholeWord;

	public boolean matchCase = false;       // Flag to indicate if we
						// need to match case.


	public boolean matchWord = false;       // Flag to indicate if we need
						// to match the whole word

	String versionNumber = "v1.5x";
	
	boolean packComponents;                 // Set to true if size < 600

	Color   bgColour;                       // Background colour of applet
	Color   fgColour;                       // Foreground colour of applet

	//Choice numPagesChoice;                  // Option menu to select max
						// number of pages to search
	Vector pageMatch;                       // Pages that contain the
						// search word

	public void init ()
	{
		Panel p;

		getParameters ();       // Read the applet parameters

		setLayout (new BorderLayout ());

		// If applet size is <= 600 pixels then reduce the length
		// of text fields, labels etc so that the applet will
		// display OK
		if (size().width <= 600)
			packComponents = true;
		else
			packComponents = false;

		// This panel consists of a input text field where the
		// user enters the text to search for. The buttons allow
		// the search to be started, aborted and clear the applet's
		// output fields.

		p = new Panel();
		p.setLayout (new FlowLayout());
		Label lab = new Label ("Search for: ");
		lab.setFont (new Font ("Helvetica", Font.PLAIN, 12));
		p.add (lab);
		if (packComponents)
			inputArea = new TextField ("",15);
		else
			inputArea = new TextField ("",20);
		p.add (inputArea);
		if (packComponents)
		{
			search = new Button ("search");
			search.setFont (new Font ("Helvetica", Font.BOLD, 12));
		}
		else
		{
			search = new Button (" search ");
			search.setFont (new Font ("Helvetica", Font.BOLD, 14));
		}
		p.add (search);
		if (packComponents)
		{
			clear = new Button ("clear");
			clear.setFont (new Font ("Helvetica", Font.BOLD, 12));
		}                       
		else
		{
			clear = new Button (" clear ");
			clear.setFont (new Font ("Helvetica", Font.BOLD, 14));
		}                       
		p.add (clear);
		if (packComponents)
		{
			abort = new Button ("stop");
			abort.setFont (new Font ("Helvetica", Font.BOLD, 12));
		}
		else
		{
			abort = new Button (" stop ");
			abort.setFont (new Font ("Helvetica", Font.BOLD, 14));
		}
		abort.disable();
		p.add (abort);
		//if (packComponents)
		//	lab = new Label ("Pages");
		//else
		//	lab = new Label (" Max. Pages:");
		//lab.setFont (new Font ("Helvetica", Font.PLAIN, 12));
		//p.add (lab);
		//numPagesChoice = new Choice();
		//p.add (numPagesChoice);
		p.setForeground (fgColour);
		p.setBackground (bgColour);
		add ("North",p);

		// This panel lists the results. When an item from the list
		// box is double clicked the URL is opened up.
		p = new Panel();
		p.setLayout (new GridLayout(0,1));
		resultsArea = new List (10,false);
		p.add (resultsArea);
		p.setForeground (fgColour);
		p.setBackground (bgColour);
		add ("Center",p);

		p = new Panel();
		Label labVersion = new Label (versionNumber);
		labVersion.setFont (new Font ("Helvetica", Font.PLAIN, 12));
		p.add (labVersion);
		//CheckboxGroup caseSense = new CheckboxGroup();

		// This textfield shows the status of the search to provide
		// some feedback to the user. The page count is displayed.
		if (packComponents)
			statusArea = new TextField ("",14);
		else
			statusArea = new TextField ("",20);

		statusArea.setEditable (false);
		p.add (statusArea);

		//if (packComponents)
		//	caseInsensitive = new Checkbox ("in-sensitive");
		//else
		//	caseInsensitive = new Checkbox ("case in-sensitive");

		//p.add (caseInsensitive);
		//caseInsensitive.setCheckboxGroup (caseSense);

		if (packComponents)
			caseSensitive = new Checkbox ("case sens" );
		else
			caseSensitive = new Checkbox ("case sensitive" );

		p.add (caseSensitive);
		//caseSensitive.setCheckboxGroup (caseSense);
		//caseSense.setCurrent (caseInsensitive);

		if (packComponents)
			matchWholeWord = new Checkbox ("whole words");
		else
			matchWholeWord = new Checkbox ("match whole words");

		p.add (matchWholeWord);
		p.setForeground (fgColour);
		p.setBackground (bgColour);
		add ("South",p);

		disableButtons ();      // Disable buttons until text entered

		// Create vector to hold pages that have been found
		// and pages that contain the search text
		pageNames = new Vector();
		pageMatch = new Vector();

		// Now that we know what the maxSearch parameter is fill
		// in sensible page numbers
		//for (int i=maxSearch / 5; i<= maxSearch; i += maxSearch / 5)
		//{
		//	numPagesChoice.addItem (Integer.toString(i));   
		//}
		//
		//numPagesChoice.setFont (new Font ("Helvetica", Font.PLAIN, 12));

		// Set the default number of pages to be searched
		//numPagesChoice.select (4);      
		// maxSearch = maxSearch / 5;

		// Set the background + foreground applet colours
//              setBackground(bgColour);
//              setForeground(fgColour);

		// Always set text input field to white for readability
		inputArea.setBackground (Color.white);
	}

	// Function enableButtons
	// Purpose - enable use of buttons in GUI
	public void enableButtons ()
	{
		search.enable();
		clear.enable();
	}

	// Function disableButtons
	// Purpose - disable use of buttons in GUI
	final void disableButtons ()
	{
		search.disable();
		clear.disable();
	}

	// Function getParameters
	// Purpose - read applet parameters
	final void getParameters ()
	{
		hostName = getParameter ("hostname");
		IPAddress = getParameter ("IPAddress");
		String num = getParameter ("maxSearch");
		String arg = getParameter ("debug");
		server = getParameter ("server");
		indexName = getParameter ("indexName");

		String colour = getParameter("bgColour");
		if (colour == null)
		{
			// I wish this could be locali[sz]ed so that I could
			// write lightGrey !!
			bgColour = Color.lightGray;
		}
		else 
		{
			try 
			{
				bgColour = new Color(Integer.parseInt(colour, 16));
			}
			catch (NumberFormatException e) 
			{
				bgColour=Color.lightGray;
			}
		}

		colour = getParameter("fgColour");
		if (colour == null)
		{
			fgColour = Color.black;
		}
		else 
		{
			try 
			{
				fgColour = new Color(Integer.parseInt(colour, 16));
			}
			catch (NumberFormatException e) 
			{
				bgColour=Color.black;
			}
		}

		// Check for missing parameters
		if (hostName == null && server == null)
		{
			statusArea.setText ("Error - no host/server");
			System.out.println (" Error: No hostname specified");
			hostName = "none";
		}

		maxSearch = (num == null) ? MAX_NUMBER_PAGES : Integer.parseInt(num);
		debugMode = (arg == null) ? false : true;

		if (debugMode)
		{
			System.out.println ("hostname is " + hostName);
			System.out.println ("IPAddress is " + IPAddress);
			System.out.println ("maxSearch is " + maxSearch);
			System.out.println ("debugMode is " + debugMode);
			System.out.println ("server is " + server);
			System.out.println ("indexName is " + indexName);
			System.out.println ("bgColour is " + bgColour);
			System.out.println ("fgColour is " + fgColour);
		}
	}

	// Display parameter information
	public String[][] getParameterInfo()
	{
		String[][] info =
		{
			{"hostname","String","hostname of site"},
			{"IPAddress","String","IP address of site"},
			{"maxSearch","String","maximum number of pages to search"},
			{"debug","String","debug mode"},
			{"server","String","Home Page URL"},
			{"indexName","String","Name of index page"},
			{"bgColour","Color","Background colour of applet"},
			{"fgColour","Color","Foreground colour of applet"}
		};

		return info;
	}

	// Display applet information
	public String getAppletInfo()
	{
		return "Home Page Search Applet v1.5x";
	}

	// Function keyDown
	// Purpose - enable or disable buttons. When search text is entered
	// the search and clear buttons are enabled. When no search text has
	// been entered the buttons are disabled.
	public boolean keyDown (Event e, int nKey)
	{
		boolean boolDone = true;
		String text;

		text = inputArea.getText();     // Read the search text
		int n = text.length();          // Count number of chars

		if (nKey == BACKSPACE_CHARACTER)// catch backspace character
		{
			boolDone = false;
			n--;
		}
		else
		{
			boolDone = false;
			n++;
		}

		if (n > 0)
		{
			enableButtons ();
		}
		else
		{
			disableButtons ();
		}

		return (boolDone);
	}

	// Purpose - this function handles all the GUI events
	public boolean action (Event e, Object o)
	{

		String text;            // Search text entered by user
		String searchText;      // Lower case version of above
		URL newURL = null;

		// Check to see if the option menu has been selected
		if (e.target instanceof Choice)
		{
			//if (e.target == numPagesChoice)
			//{
			//	Choice c = (Choice) e.target;
			//	try 
			//	{
			//		maxSearch = Integer.parseInt(c.getSelectedItem(), 10);
			//	}
			//	catch (NumberFormatException ex) 
			//	{
			//		maxSearch = MAX_NUMBER_PAGES;
			//	}

			//	if (debugMode)
			//		System.out.println ("maxSearch is now " + maxSearch);
			//}

		}

		// Check to see if a checkbox has been pressed
		if (e.target instanceof Checkbox)
		{
			if (caseSensitive.getState() == true)
				matchCase = true;
			else
				matchCase = false;

			if (matchWholeWord.getState() == true)
				matchWord = true;
			else
				matchWord = false;
		}

		// A button has been pressed - determine which
		if (e.target instanceof Button)
		{
			if (e.target == search)
			{
				// Search button pressed - read in 
				// search text entered
				text = inputArea.getText();

				// Make sure ther's somthing to search for
				if (text.length() == 0)
					return (false);

				// New search so clear the GUI out
				if (resultsArea.countItems() > 0)
					resultsArea.clear();

				disableButtons ();
				abort.enable();
				statusArea.setText("");

				// Clear out previous search data
				pageNames.removeAllElements();
				pageMatch.removeAllElements();

				// We're off - start the search thread
				cp = new SearchPages (this, hostName, text, maxSearch);
				cp.start();
			}
			else if (e.target == abort)
			{
				// Abort button pressed - stop the thread
				if (cp != null)
					cp.stop();
				cp = null;

				// Enable buttons for another search
				enableButtons();
				abort.disable();
			}
			else 
			{
				// Clear button pressed - clear all the fields
				// and return
				inputArea.setText("");
				statusArea.setText("");

				// Clear radio buttons
				caseSensitive.setState(false); 
				//caseInsensitive.setState(true);
				matchWholeWord.setState(false);

				// Clear option menu
				//numPagesChoice.select (4);      
				//try 
				//{
				//	maxSearch = Integer.parseInt(numPagesChoice.getSelectedItem(), 10);
				//}
				//catch (NumberFormatException ex) 
				//{
				//	maxSearch = MAX_NUMBER_PAGES;
				//}
				
				//if (debugMode)                                  
				//	System.out.println ("maxSearch is now " + maxSearch);
				
				if (resultsArea.countItems() > 0)
					resultsArea.clear();

				cp = null;
			}
		}

		// Selection made from the list of matches
		if (e.target instanceof List)
		{
			List list = (List) e.target;
			int index = list.getSelectedIndex();

			// Extract the page name from the list
			if (index < pageMatch.size())
			{
				String URLSelected = (String)pageMatch.elementAt(index);          

				try
				{
					// If URL stored then use it
					if (URLSelected.startsWith ("http:") ||
					     URLSelected.startsWith ("file:"))
					newURL = new URL(URLSelected);
					else if (server == null)
						newURL = new URL("http://www." + hostName + ".demon.co.uk/" + URLSelected);
					else
						newURL = new URL (server + URLSelected);
				}
				catch(MalformedURLException excep) 
				{
					System.out.println("action(): Bad URL: " + newURL);
				}

				if (debugMode)
					System.out.println (" Jumping to ... " + newURL.toString());

				// Display the document
				getAppletContext().showDocument(newURL,"_self");
			}
		}

		return true;                                    // We're done
	}


	// Purpose - checks to see if a page has already been
	// visited by the search thread
	boolean checkAlreadyFound (String page)
	{
		if (pageNames.size() == 0)
			return (false);

		// Check this is a new one
		for (int i=0; i < pageNames.size() ;i++)
		{
			String pageName = (String) pageNames.elementAt(i);
			if (pageName.equalsIgnoreCase (page))
				return (true);
		}

		return (false);
	}

	// Purpose - adds a page visited by the search thread to
	// the list of visited pages
	// This prevents the same link from being followed if it
	// is on multiple pages.
	public void incrementPages (String page)
	{
		// Check if page already indexed
		if (checkAlreadyFound (page))
			return;

		pageNames.addElement (page);

		// Provide feedback to the user
		statusArea.setText ("Searching page: " + pageNames.size());
	}

	// Purpose - returns the number of pages that the search
	// thread has visited
	public int getTotalPages ()
	{
		return pageNames.size() - 1;
	}

	// Purpose - convert special characters in the page title
	// to Unicode characters so they are displayed properly
	final protected String translateSpecialChars (String title)
	{
		int start;
		int i;
		// HTML representation of selected extended chars
		String rawString[] = {"&aacute;","&acirc;","&aelig;",
				      "&agrave;","&auml;","&ccedil;",
				      "&eacute;","&ecirc;","&egrave;",
				      "&euml;","&icirc;","&iuml;",
				      "&ocirc;","&ouml;","&szlig;",
				      "&uuml;","&yuml;","&copy;",
				      "&pound;","&reg;","&lt;",
				      "&gt;","&amp;","&quot;",
					"&atilde;","&aring;","&igrave;",
					"&iacute;","&eth;","&ntilde;",
					"&ograve;","&oacute;","&otilde;",
					"&divide;","&oslash;","&ugrave;",
					"&uacute;","&ucirc;","&yacute;",
					"&thorn;","&times;","&nbsp;",
					"&sect;","&cent;","&deg;"};
		// Unicode representation of above
		char translatedChar[] = {'\u00e1','\u00e2','\u00e6',
					 '\u00e0','\u00e4','\u00e7',
					 '\u00e9','\u00ea','\u00e8',
					 '\u00eb','\u00ee','\u00ef',
					 '\u00f4','\u00f6','\u00df',
					 '\u00fc','\u00ff','\u00a9',
					 '\u00a3','\u00ae','\u003c',
					 '\u003e','\u0026','\u0022',
					 '\u00e3','\u00e5','\u00ec',
					 '\u00ed','\u00f0','\u00f1',
					 '\u00f2','\u00f3','\u00f5',
					 '\u00f7','\u00f8','\u00f9',
					 '\u00fa','\u00fb','\u00fd',
					 '\u00fe','\u00d7','\u00a0',
					 '\u00a7','\u00a2','\u00b0'};
		StringBuffer translated = new StringBuffer ("");
		String titleString = title;

		// Check the title for each of the above HTML special chars
		for (int loop=0; loop < NUMBER_SPECIAL_CHARS; loop++)
		{
			if (translated.length() > 0)
			{
				titleString = translated.toString();
				translated = new StringBuffer ("");
			}

			start = titleString.indexOf (rawString[loop]);

			if (start != -1)
			{
				// HTML special character found so replace it
				// with the Unicode equivalent for display
				for (i=0; i < start; i++)
					translated.insert (i,titleString.charAt(i));

				translated.append (translatedChar[loop]);

				for (i=start+rawString[loop].length(); i < titleString.length(); i++)
					translated.append (titleString.charAt(i));
				
			}
		}

		return (translated.length() == 0) ? titleString : translated.toString();
	}

	// Purpose - adds a page to the list of matches in the results
	// ListBox. The page title and matching text are displayed.
	// The page name is also stored so that the URL can be jumped
	// to.
	public void addToList (String Page, String line, String title)
	{
		String translatedTitle = title;
		String translatedLine  = line;

		if (title.indexOf("&") != -1 &&
		    title.indexOf(";") != -1)
		{
			// check for HTML special characters
			// e.g &quot; &ccedil; etc.
			translatedTitle = translateSpecialChars (title);
		}       

		if (line.indexOf("&") != -1 &&
		    line.indexOf(";") != -1)
		{
			// check for HTML special characters
			// e.g &quot; &ccedil; etc.
			translatedLine = translateSpecialChars (line);
		}       

		resultsArea.addItem (translatedTitle + "   TEXT: " + translatedLine);

		pageMatch.addElement(Page);
	}

}


//=========================================================================
//                        Class SearchPages 
//=========================================================================

// This thread performs the search. The search starts with the index.html or
// index.htm page and then follows all local links
// (e.g. <A HREF="fred.html">link to fred</A> or 
// <A HREF="http://www.<hostname>.demon.co.uk/fred.html">link to fred</A>.
// Note external links are ignored.
class SearchPages extends Thread
{
	// Search state transitions
	// First find top level pages (from the index page)
	// Search the above pages first
	// Search all other pages
	final byte FIND_TOP_LEVEL_PAGES = 0;
	final byte SEARCH_TOP_LEVEL_PAGES = 1;
	final byte SEARCH_OTHER_PAGES = 2;

	String hostName;        // Host name of site e.g babbage
	HomePageSearch app;     // Parent applet
	Vector textToFind;      // Words/String to find
	int foundWhere[];	// LineNumber the last word was found on
	int maxPages;           // Maximum number of pages to visit
	int matchMode;		// 1=AND 2=OR
	int hitsFound = 0;      // No of occurrences of search string
	int MaxMatchLines;	// If AND all words must be within this number of lines
	static final byte URLCOUNT = 2;
	boolean pageOpened = false;     // Flag to indicate if index page
					// opened OK
	boolean proxyDetected = false;  // Flag to indicate if a proxy server
					// or firewall has been detected
	int topLevelSearch;             // Search the index page links first
	Vector topLevelPages;           // Page names in the index page
	Vector nextLevelPages;          // Lower level pages

	// Constructor
	SearchPages (HomePageSearch applet, String hn, String text, int maxSearch)
	{
		app = applet;
		hostName = hn;
		matchMode = 2;			// doesn't matter if no & or |
		MaxMatchLines = app.MAX_MATCHLINES;
		textToFind = new Vector();
		text = text.trim();		// Break text into words
		int i = 0;
		int j = 0;
		int n = 0;
		int k = 0;
		while (k >= 0)
		{
			i = text.indexOf(" & ",k);
			j = text.indexOf(" | ",k);
			if ((i<0) && (j<0))
			{	textToFind.addElement(text.substring(k,text.length()));
				k=-1;
			} else if ( (i>=k) && ((i<j) || (j<0)) )
			{	if (i>k) textToFind.addElement(text.substring(k,i));
				k = i + 3;
				if (n==0)
				{	n=1;
					matchMode = 1;
				}
			} else if (j>=k)
			{	if (j>k) textToFind.addElement(text.substring(k,j));
				k = j + 3;
				if (n==0)
				{	n=1;
					matchMode = 2;
				}
			}
			if (k > text.length())  k=-1;
		}
		maxPages = maxSearch;
		foundWhere = new int[textToFind.size()];
	}

	public void run()
	{
		// State 1: search the index page, remembering all links on
		// the index page
		topLevelSearch = FIND_TOP_LEVEL_PAGES;

		topLevelPages = new Vector();
		nextLevelPages = new Vector();

		// Check to see if a proxy is being used. If so then we use
		// IP address rather than hostnames
		proxyDetected = detectProxyServer();

		app.statusArea.setText ("Starting...");
		startSearch();
		app.enableButtons();
		app.abort.disable();

		if (hitsFound == 0 && pageOpened == true)
			app.statusArea.setText ("No matches found");
		else if (hitsFound == 1)
			app.statusArea.setText (hitsFound + " match found");
		else
			app.statusArea.setText (hitsFound + " matches found");
	}

	// Function: detectProxyServer
	// Purpose: attempt to see if a proxy server or firewall is blocking
	// a connection back to the originating server. If so then the
	// variable proxyDetected is set to true and all future connections
	// to the server will use the IP Address (if passed as a parameter)     
	final boolean detectProxyServer ()
	{
		DataInputStream dis = null;
		String url = "";

		// Allow for non-Demon Home Page
		if (app.server == null)
		{
			if (app.indexName == null)
				url = "http://www." + hostName + ".demon.co.uk/index.html";
			else
				url = "http://www." + hostName + ".demon.co.uk/" + app.indexName;
		}
		else
		{
			if (app.indexName == null)
				url = app.server + "index.html";
			else
				url = app.server + app.indexName;
		}

		// Attempt to connect to this URL
		try
		{
			URL doc = new URL (url);
			dis = new DataInputStream (doc.openStream());
		}
		catch (Exception e)
		{
			// Unable to connect. This may be an incorrect applet
			// parameter. Lets assume though it's a proxy server 
			// that's stopping use using the hostname.

			return true;                    
		}

		return false;

	}

	final void startSearch()
	{       
		DataInputStream dis = null;
		String [] url = {"",""};
		String currentPageName="";     // HTML page currently being searched
		
		// Allow for non-Demon Home Page
		if (app.server == null)
		{
			if (app.indexName == null)
			{
				url[0] = "http://www." + hostName + ".demon.co.uk/index.html";
				url[1] = "http://www." + hostName + ".demon.co.uk/index.htm";
			}
			else
			{
				url[0] = "http://www." + hostName + ".demon.co.uk/" + app.indexName;
				url[1] = "";
			}
		}
		else
		{
			if (app.indexName == null)
			{
				url[0] = app.server + "index.html";
				url[1] = app.server + "index.htm";
			}
			else
			{
				// Allow for an index page other than
				// "index.html"
				url[0] = app.server + app.indexName;
				url[1] = "";
			}
		}

		// If a proxy server/firewall has been detected then use the
		// IP address (if supplied) of the originating server rather
		// than the hostname.
		if (proxyDetected && app.IPAddress != null)
		{
			if (app.indexName == null)
			{
				url[0] = "http://" + app.IPAddress + "/index.html";
				url[1] = "http://" + app.IPAddress + "/index.htm";
			}
			else
			{
				url[0] = "http://"+ app.IPAddress + "/" + app.indexName;
				url[1] = "";
			}
		}

		for (int i=0; i < URLCOUNT; i++)
		{
			try
			{
				currentPageName = url[i];
				URL doc = new URL (url[i]);
				dis = new DataInputStream (doc.openStream());
			}
			catch (Exception e)
			{
				System.out.println ("StartSearch(): Exception: " + e + " Page= " + url[i]);
				continue;       // Try next page
			}

			if (dis != null)        // Check page opened OK
			{
				pageOpened = true;
				i = URLCOUNT;  // Exit the loop
			}       
		}

		if (pageOpened == false)
		{
			app.statusArea.setText ("Cannot connect to server");
			System.out.println ("StartSearch(): No pages to search");
			return;                 // Nothing to do
		}
		else
		{
			// Search the first page. Any links on the index page
			// are saved and searched next.
			searchPage (dis,currentPageName);     
		}

		// State 2: search links found on the index page
		topLevelSearch = SEARCH_TOP_LEVEL_PAGES;

		for (int i=0; i < topLevelPages.size(); i++)
		{
			checkLink ((String)topLevelPages.elementAt(i));

			// Check that the maximum number of pages to be
			// searched has not been reached
			if (app.getTotalPages () >= maxPages)
				return;
		}

		// State 3: spider all other pages
		topLevelSearch = SEARCH_OTHER_PAGES;

		for (int i=0; i < nextLevelPages.size(); i++)
		{
			checkLink ((String)nextLevelPages.elementAt(i));

			// Check that the maximum number of pages to be
			// searched has not been reached
			if (app.getTotalPages () >= maxPages)
				return;
		}
	}

	// Purpose - read all lines on a page - extracting local links
	// and checking for the presence of the search string
	final void searchPage (DataInputStream dis, String url)
	{
		try
		{
			String input;           // Raw line read in
			String upperCaseInput;  // Uppercase version of
						// above
			String link;            // HTML link found
			String nametag;         // NAME tag found
			String urltag;          // URL with NAME tag
			String temp;
			String title = "";      // Page title
			String titletag = "";
			int i;
			int LineNum = 0;	// Line Number

			urltag = url ;
			
			// Reset the foundWhere linenumbers for each page
			for (i=0;i<textToFind.size();i++)
				foundWhere[i] = -MaxMatchLines;

			// Read a line at a time
			while ((input = dis.readLine()) != null)
			{
				LineNum++;
				
				// Convert to upper case (makes comparisons
				// easier)
				upperCaseInput = input.toUpperCase();

				// check for document title
				temp = parseForTitle (input, upperCaseInput, dis);
				// If a title has been found then remember it
				// so that it can be displayed in the list box
				if (temp != null && temp.length() > 0)
				{
					title = temp;
					titletag = title;
				}

				// check for NAME= tag so we can get
				// closer to the match string
				nametag = parseForName (upperCaseInput, input);
				if (nametag != null)
				{
					urltag = url + "#" + nametag ;
					i = nametag.indexOf ("v");
					if (i > 0)
						titletag = title+" Ch"+nametag.substring(0,i);
					if (i == -1)
						titletag = title+" Ch"+nametag;
				}

				// check for match after title has been found
				// (Don't bother searching the title though)
				if (title.length() > 0 && temp == null)
					checkMatch (input, urltag, titletag, LineNum);

				// check to see if this line contains
				// a link
				link = parseForLink (upperCaseInput, input);
				if (link != null)
				{
					// Check if the maximum number
					// of pages to search has been
					// reached
					if (app.getTotalPages () >= maxPages)
						return;

					if (topLevelSearch == FIND_TOP_LEVEL_PAGES)
						topLevelPages.addElement (link);
					else if (topLevelSearch == SEARCH_TOP_LEVEL_PAGES)
						nextLevelPages.addElement (link);
					else
						checkLink (link);
				}

			}
		}
		catch (IOException e)
		{
			System.out.println ("searchPage(): Exception: " + e + " on Page: " + url);
		}
	}

	// Purpose - scan a line of text looking for the title of the page
	// e.g <TITLE> My Page </TITLE>
	// Titles may be multi-line so this routine reads from the document
	// until the </TITLE> tag has been read or 25 characters read (max
	// meaningful length of a title) (same as Alta Vista!)
	final String parseForTitle (String rawInput, String input, DataInputStream dis)
	{
		int i,j,k,l;                    // Loop counters
		int titleLength = 0;            // Keep track of title length
						// as only first 25 characters
						// are displayed
		int start = 0;                  // Start of title text
		String temp;
		StringBuffer title = new StringBuffer ("");
		boolean foundTag = false;

		try
		{
			// Search for <TITLE> tage              
			// Can the TITLE tag have spaces? e.g < TITLE  > (assume not!)
			i = input.indexOf ("<TITLE");
			if (i != -1)
			{
				// Allow for <HTML><HEAD><TITLE>Title</TITLE></HEAD>
				j = input.indexOf (">",i);

				if (j != -1)
				{
					while (titleLength <= 25 && foundTag == false)
					{
						start = j + 1;
						for (k=start; k < rawInput.length(); k++)
						{
							if (foundTag == false && rawInput.charAt(k) != '<')
							{
								titleLength++;
								title.append (rawInput.charAt(k));
							}
							else
								foundTag = true;
						}

						// Continue reading from doc
						// if </TITLE> not found
						if (foundTag == false)
						{
							rawInput = dis.readLine();
							j = -1;
						}
					}

					// Remove leading and trailing spaces
					temp = title.toString();

					return (temp.trim());
				}
			}
				
		}
		catch (IOException e)
		{
			System.out.println ("parseForTitle(): Exception: " + e);
		}

		return (null);                  // No title found
	}


	// Purpose - scan a line of text looking for links to other
	// pages. The following types of links are currently supported
	// 1. Normal links, e.g <A HREF="page.html">Text</A>
	// 2. Frames, e.g <FRAME scrolling=yes SRC="contents.html">
	final String parseForLink (String upperCaseInput, String input)
	{
		int i,j,k,l;
		String temp = null;
		String link = null;

		// Look for links to other pages
		// 1. Normal links, e.g <A HREF="page.html">Text</A>
		i = upperCaseInput.indexOf ("HREF");
		if (i != -1)
		{
			// Locate position of quote marks
			j = upperCaseInput.indexOf ("\"",i);
			k = upperCaseInput.indexOf ("\"",j+1);

			// Locate position of </a>
			l = upperCaseInput.indexOf ("</A>",i);

			// If double quotes were not found then try using
			// single quote marks
			if (j == -1 || k == -1 || (j > l && k == -1))
			{
				j = upperCaseInput.indexOf ("\'",i);
				k = upperCaseInput.indexOf ("\'",j+1);
			}

			// Remove leading and trailing spaces
			if (j != -1 && k != -1)
			{
				// Extract the link name
				temp = input.substring (j+1,k);

				// Remove leading and trailing spaces
				link = temp.trim ();

				return (link);
			}
		}

		// 2. Frames, e.g <FRAME scrolling=yes SRC="contents.html">
		i = upperCaseInput.indexOf ("FRAME");
		if (i != -1)
		{
			// Locate position of SRC tag
			l = upperCaseInput.indexOf ("SRC",i);

			if (l != -1)
			{
				// Locate position of quote marks
				j = upperCaseInput.indexOf ("\"",l);
				k = upperCaseInput.indexOf ("\"",j+1);

				// If double quotes were not found then try
				// single quote marks
				if (j == -1)
				{
					j = upperCaseInput.indexOf ("\'",i);
					k = upperCaseInput.indexOf ("\'",j+1);
				}

				// Remove leading and trailing spaces
				if (j != -1 && k != -1)
				{
					// Extract the link name
					temp = input.substring (j+1,k);

					// Remove leading and trailing spaces
					link = temp.trim ();

					return (link);
				}
			}
		}

		return (null);
	}

	// Purpose - scan a line of text looking for NAME tags.
	final String parseForName (String upperCaseInput, String input)
	{
		int i,j,k;
		String temp = null;
		String nametag = null;

		// Look for NAME tags
		// e.g <A NAME="placetage">
		i = upperCaseInput.indexOf ("<A ");
		i = upperCaseInput.indexOf ("NAME=",i);
		if (i != -1)
		{
			// Locate position of quote marks
			j = upperCaseInput.indexOf ("\"",i);
			k = upperCaseInput.indexOf ("\"",j+1);

			// If double quotes were not found then try using
			// single quote marks
			if (j == -1 || k == -1 )
			{
				j = upperCaseInput.indexOf ("\'",i);
				k = upperCaseInput.indexOf ("\'",j+1);
			}

			// Remove leading and trailing spaces
			if (j != -1 && k != -1)
			{
				// Extract the link name
				temp = input.substring (j+1,k);

				// Remove leading and trailing spaces
				nametag = temp.trim ();

				return (nametag);
			}
		}

		return (null);
	}

	// Purpose - scan a line of text to see if the search string is
	// present. If so then add the line to the list of matches.
	final void checkMatch (String input, String url, String title, int LineNum)
	{
		// remove HTML tags before search
		String searchLine = removeHTMLTags (input);
		String temptext;
		boolean found;
		int k;

		// If the line contains some non-HTML text
		// then search it
		if (searchLine.length() > 0)
		{
		    for (int i=0;i<textToFind.size();i++)
		    {   temptext = (String)textToFind.elementAt(i);
		    	found = false;
			// Check if case-sensitive search
			if (app.matchCase)
			{
				// Check if attempting to match whole word
				if (app.matchWord)
				{
					k = searchLine.indexOf(temptext);
					if ( (k!=-1) &&
					     ( (k==0) || (!Character.isLetterOrDigit(searchLine.charAt(k-1))) ) &&
					     ( ((k+temptext.length())==searchLine.length()) || (!Character.isLetterOrDigit(searchLine.charAt(k+temptext.length() ))) )
					   )
						found = true;
				}
				else if (searchLine.indexOf (temptext) != -1)
					found = true;
			}
			else
			{
				String lower1 = searchLine.toLowerCase();
				String lower2 = temptext.toLowerCase();

				// Check if attempting to match whole word
				if (app.matchWord)
				{
					k = lower1.indexOf(lower2);
					if ( (k!=-1) &&
					     ( (k==0) || (!Character.isLetterOrDigit(searchLine.charAt(k-1))) ) &&
					     ( ((k+lower2.length())==searchLine.length()) || (!Character.isLetterOrDigit(searchLine.charAt(k+temptext.length() ))) )
					   )
						found = true;
				}
				else if (lower1.indexOf (lower2) != -1)
					found = true;
			}
			if (found == true)
			{	if (matchMode != 1)
				{	// Found it! Display the match
					app.addToList (url, searchLine, title);
					hitsFound++;
					return;
				}
				foundWhere[i] = LineNum;
				found = true;
				for (int j=0; j<textToFind.size(); j++)
				{	if (LineNum > (foundWhere[j] + MaxMatchLines))
						found = false;
				}
				if (found == true)
				{	// Found it! Display the match
					app.addToList (url, searchLine, title);
					hitsFound++;
					return;
				}
			}
		    }
		}
	}

	// Purpose - remove HTML tages from a line (e.g <BR>). The
	// algorithm is a bit simplistic in that it cannot handle
	// HTML tags spilt over one line.
	final String removeHTMLTags (String inputLine)
	{
		StringBuffer outputLine = new StringBuffer ("");
		boolean foundTag = false;

		for (int i=0; i < inputLine.length(); i++)
		{
			if (inputLine.charAt (i) == '<')
				foundTag = true;
			else if (inputLine.charAt(i) == '>')
				foundTag = false;
			else if (foundTag == false)
				outputLine.append (inputLine.charAt(i));
		}

		return (outputLine.toString());
	}

	// Purpose - checks validity of a link. If the link is valid
	// it's added to the list of visited links and then followed
	final void checkLink (String link)
	{
		URL doc;        // URL of link
		DataInputStream dis = null;
		int i;
		boolean qualifiedLink = false;

		// Skip the link if it's just an offset in this document
		if (link.startsWith("#"))
			return;

		// Strip #offset tag off
		if ((i = link.indexOf ("#")) != -1)
		{
			String substr =link.substring (0,i);
			link = substr;
		}

		// Check that this link hasn't already been followed
		if (app.checkAlreadyFound (link))
			return;

		// Ignore non HTML links and start page
		if ((link.startsWith ("mailto:")) ||
		    (link.startsWith ("wais:")) ||
		    (link.startsWith ("gopher:")) ||
		    (link.startsWith ("newsrc:")) ||
		    (link.startsWith ("ftp:")) ||
		    (link.startsWith ("nntp:")) ||
		    (link.startsWith ("telnet:")) ||
		    (link.startsWith ("news:")) ||
		    (link.equalsIgnoreCase (app.indexName)) ||
		    (link.equalsIgnoreCase ("index.html")) ||
		    (link.equalsIgnoreCase ("index.htm")))
			return;

		// Check that it is not an outside link (e.g www.mycom.com)
		if (link.indexOf ("http:") != -1)
		{
			String pageName;
			if (app.server == null)
				pageName = "http://www."+ hostName + ".demon.co.uk";
			else
				pageName = app.server;

			// Allow for local host being displayed as an
			// IP address rather than host name
			if (proxyDetected && app.IPAddress != null)
				pageName = "http://" + app.IPAddress;

			// This is a "fully qualified link" 
			// e.g "http://www.babbage.demon.co.uk/link.html"
			qualifiedLink = true;

			// If the link doesn't contain the local host name
			// or IP address then it's an external link - so
			// ignore it
			if (link.indexOf (pageName) == -1)
				return;
		}

		// Check that it's a HTML page
		if (link.indexOf (".htm") == -1 &&
		    link.indexOf (".HTM") == -1 &&
		    link.indexOf (".TXT") == -1 &&
		    link.indexOf (".txt") == -1 &&
	            link.indexOf (".phtml") == -1 &&
		    link.indexOf (".PHTML") == -1)
			return;

		// Valid link - add it to the array of visited links
		app.incrementPages (link);

		// Follow link and read its contents
		try
		{
			if (app.server == null)
				doc = new URL ("http://www."+ hostName + ".demon.co.uk/" + link);
			else
			{
				if (link.startsWith ("/"))
				{
					// Remove the "/" from the link as the
					// server name has a terminating "/" 
					String temp = link.substring (1, link.length());
					link = temp;
				}
				doc = new URL (app.server + link);
			}

			// Link may be absolute 
			// (e.g www.babbage.demon.co.uk/fred.html")
			if (qualifiedLink)
				doc = new URL (link);

			// If a proxy server/firewall has been detected then use the
			// IP address (if supplied) of the originating server rather
			// than the hostname.
			if (proxyDetected && app.IPAddress != null)
				doc = new URL ("http://" + app.IPAddress + "/" + link);

			if (app.debugMode)
				System.out.println ("Found link " + link);

			dis = new DataInputStream (doc.openStream());

			// Start searching this new link
			searchPage (dis, link);

		}
		catch (IOException e)
		{
			System.out.println ("checkLink(): Exception: " + e + " Page: " + link);
		}
	}

}




