<?php

/*

 Copyright (C) 2005 - 2006, Domeo / Avisi B.V.

 Website Baker inline wrapper module is free software; you can 
 redistribute it and/or modify it under the terms of the GNU 
 General Public License as published by the Free Software 
 Foundation; either version 2 of the License, or (at your 
 option) any later version.

 Website Baker is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with Website Baker; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/

/**
 * @package pages
 *
 * Contents HTML page.
 */
class HtmlPage extends WebPage {

	// Include original stylesheets?
	var $stylesheet;

	
	/**
	 * Constructor.
	 *
	 * @param $pUrl string  The absolute URL.
	 */
	function HtmlPage($pUrl) {

		$this->WebPage($pUrl);
		$this->stylesheet = false;
	}
	
	
	/**
	 * Rewrite html document for inline usage.
	 */ 
	function processDocument() {
	
		$lPath = $this->getPath();
		$lRootPath = $this->getRootPath();
		$lScriptName = $_SERVER['SCRIPT_NAME'];

		$lPageContents = $this->contents;
		$lMatches = array();
		
		// rewrite src of images and javascript. Relative path have to be absolute.
		$lPageContents = preg_replace("'src=(\")?(?!http)(?!/)([^\"^\ ]+)(\")?'si", "src=$1".$lPath."$2$3", $lPageContents);
		// locations that point to /, add hostpart to them.
		// /blah.png -> http://www.google.com/blah.png
		$lPageContents = preg_replace("'src=(\")?(?!http)(/)([^\"^\ ]+)(\")?'si", "src=$1".$lRootPath."$3$4", $lPageContents);
		
		// Remove stylesheets?
		if (!$this->stylesheet) {
			
			// Remove internal stylesheets; 
			$lPageContents = preg_replace("'<style[^>]*>.*?</style>'si", "", $lPageContents); 
			// Remove external stylesheets; 
			$lPageContents = preg_replace("'<link [^>]*rel=.?stylesheet.?[^>]*(/>|>.*?</link>)'si", "", $lPageContents); 
		}
		
		// Rewrite internal stylesheets. Search for inline stylesheets.
		// Rewrite style rules for use in div with id "wrapper". 
		else if ($this->stylesheet && preg_match_all("'(<style[^>]*>)(.*?)(</style>)'si", $lPageContents, $lMatches, PREG_SET_ORDER)) {
			for ($i=0; $i<sizeof($lMatches); $i++) {
				
				$lContents = $lMatches[$i][2];
				$lOriginalContents = $lMatches[$i][0];
				$lOriginalStartTag = $lMatches[$i][1];
				$lOriginalEndTag = $lMatches[$i][3];
				
				$lContents = preg_replace("'(^|})([^a-z^A-Z^\.^#^/]*)([^{]*){'si", "$1$2\n#wrapper $3{", $lContents);
				$lContents = preg_replace("',([^{^,^\n]+){'si", ",#wrapper $1{", $lContents);
				while (preg_match("',(?!#wrapper)([^,^{]+),'si", $lContents)) {
					$lContents = preg_replace("',(?!#wrapper)([^,^{]+),'si", ",#wrapper $1,", $lContents);
				}
				// convert relative links in stylesheet to absolute links.
				$lContents = preg_replace("'url\((?!http) *'si", "url(".$lPath."$1", $lContents);
				
				$lPageContents = str_replace($lOriginalContents, $lOriginalStartTag.$lContents.$lOriginalEndTag, $lPageContents);
			}
		}
		
		// Cut the body
		$lMatches = array();
		$lBody = "<p>No bodytags found</p>\n";
		if (preg_match("'<body[^>]*>(.*)</body>'si", $lPageContents, $lMatches)) {
			$lBody = $lMatches[1];
		}

		// Cut the header
		$lHeader = $this->contents;
		if (preg_match("'<head>(.*?)</head>'si", $lPageContents, $lMatches)) {
			$lHeader = $lMatches[1];
		} 
		
		// Ignore href in between <script> tags.
		$lTmpBody = preg_replace("'<script[^>]*>.*?</script>'si", "", $lBody); 
		
		// rewrite relative links (<a href="">) in HTML body. Load all links through WebsiteBaker.
		// Pass through the HTTP GET parameters and ignore absolute and mailto URLs.
		$lMatches = array();
		if (preg_match_all("'(href)(=\"?(?!http|mailto|javascript))([^\"^>^ ]+)'si", $lTmpBody, $lMatches, PREG_SET_ORDER)) {
			for ($i=0 ; $i < sizeof($lMatches) ; $i++) {

				$lLinkOriginalContents = $lMatches[$i][0];
				$lLinkOriginalStart1 = $lMatches[$i][1];
				$lLinkOriginalStart2 = $lMatches[$i][2];
				$lLink = $lMatches[$i][3];
				// Convert relative adress to absolute adress first
				if (!str_startswith("http://", $lLink) && !str_startswith("https://", $lLink) && !str_startswith("mailto:", $lLink)) {
					if (str_startswith("/", $lLink)) {
						$lLink = $lRootPath . substr($lLink, 1);
					} else {
						$lLink = $lPath.$lLink;
					}
				}
				$lLink = str_replace("&amp;", "&", $lLink);
				$lBody = str_replace($lLinkOriginalContents, $lLinkOriginalStart1 . $lLinkOriginalStart2 . $lScriptName."?wrapperurl=".urlencode($lLink), $lBody);
			}
		}
		
		// rewrite relative links (<form action="">) in HTML body. Load all links through WebsiteBaker.
		// Pass through the HTTP GET parameters also.
		$lMatches = array();
		if (preg_match_all("'(<form .*?action)(=\"?(?!http|mailto|javascript))([^\"^>^ ]+)(.*?>)(.*?)</form>'si", $lBody, $lMatches, PREG_SET_ORDER)) {
			for ($i=0 ; $i < sizeof($lMatches) ; $i++) {

				$lLinkOriginalContents = $lMatches[$i][0];
				$lLinkOriginalStart1 = $lMatches[$i][1];
				$lLinkOriginalStart2 = $lMatches[$i][2];
				$lLinkOriginalStart3 = $lMatches[$i][4];
				$lLinkOriginalBetweenTags = $lMatches[$i][5];
				$lLink = $lMatches[$i][3];
				
				
				// Convert relative adress to absolute adress first
				if (!str_startswith("http://", $lLink) && !str_startswith("https://", $lLink) && !str_startswith("mailto:", $lLink)) {
					if (str_startswith("/", $lLink)) {
						$lLink = $lRootPath . substr($lLink, 1);
					} else {
						$lLink = $lPath.$lLink;
					}
				}
				$lLink = str_replace("&amp;", "&", $lLink);
				
				// Test if POST or GET HTML form
				$lInputTag = "";
				if (!(str_contains("post", strtolower($lLinkOriginalStart1)) || str_contains("post", strtolower($lLinkOriginalStart3)))) {
					
					// We have a GET (the default) form. Put the variables in a HTML input-tag.
					$lInputTag  = "<input type=\"hidden\" name=\"wrapperurl\" value=\"" . $lLink . "\" />";
					$lInputTag .= "<input type=\"hidden\" name=\"wrapperaction\" value=\"getform\" />";

					$lReplacement = $lLinkOriginalStart1 . $lLinkOriginalStart2 . $lScriptName. $lLinkOriginalStart3 . $lLinkOriginalBetweenTags . $lInputTag . "</form>";
					$lBody = str_replace($lLinkOriginalContents, $lReplacement, $lBody);
					
					// Debugging
					/*
					print "Replace: '" . nl2br(htmlspecialchars($lLinkOriginalContents)) . "'<br />\n";
					print "By: '" . nl2br(htmlspecialchars($lReplacement)) . "'<br />\n";
					*/
					
				} else {
				
					// We have a POST form. Only rewrite the action URL.
					$lReplacement = $lLinkOriginalStart1 . $lLinkOriginalStart2 . $lScriptName."?wrapperurl=".urlencode($lLink) . $lLinkOriginalStart3 . $lLinkOriginalBetweenTags . "</form>";
					$lBody = str_replace($lLinkOriginalContents, $lReplacement, $lBody);
				}
			}
		}

		// Rewrite links external stylesheets. All tylesheets have to be processed bij this module first. Do not load the stylesheets through WebsiteBaker.
		if ($this->stylesheet && preg_match_all("'<link ([^>]*href=\")([^\"]*)'si", $lHeader, $lMatches, PREG_SET_ORDER)) {
			for ($i=0 ; $i < sizeof($lMatches) ; $i++) {

				$lLinkOriginalContents = $lMatches[$i][0];
				$lLinkOriginalStart = $lMatches[$i][1];
				$lLink = $lMatches[$i][2];
				// Convert relative adress to absolute adress first
				if (strpos($lLink, "http") !== 0) {
					$lLink = $lPath.$lLink;
				}
				$lLink = str_replace("&amp;", "&", $lLink);
				// Load through this script
				$lHeader = str_replace($lLinkOriginalContents, "<link " . $lLinkOriginalStart.$this->getThisScriptName()."?wrapperurl=".urlencode($lLink), $lHeader);
			}
		}
		
		// This works and yes I know this is not valid HTML. 
		// Still searching how to change WebsiteBakers header tags.
		
		// Remove title tag
		$lHeader = preg_replace("'<title[^>]*>.*?</title>'si", "", $lHeader);
		// Remove base tag
		$lHeader = preg_replace("'<base[^>]*>'si", "", $lHeader);
		// Remove meta tags
		$lHeader = preg_replace("'<meta[^>]*>'si", "", $lHeader);
		
		$this->contents = $lHeader . "\n"  . $lBody;
		
		$this->processed = true;
	}
	
	
	// =============================================================
	// Getters and setters
	// =============================================================
	
	/**
	 * Include rewritten stylesheet from original page?
	 *
	 * @param $pValue boolean  If true, then include rewritten stylesheet.
	 */
	function setStylesheet($pValue) {
		$this->stylesheet = $pValue;
	}
}

?>