<?php
/* Logaholic Web Analytics software             Copyright(c) 2005-2016 Logaholic B.V.
 *                                                               All rights Reserved.
 * This code is subject to the Logaholic license. Unauthorized copying is prohibited.
 * support@logaholic.com                         http://www.logaholic.com/License.txt
*/ 
if(!defined('APP_INCLUDE')){ die('invalid inclusion'); }
 
require_once logaholic_dir()."components/ua-parser/ua-parser.php";
use UAParser\Parser;
$newparser = Parser::create();

class Import {
	var $logdir;			// the path where we find the log files
	var $print;				// determines how we print output
	var $log_parser;		// the type of log file parser, ie the format of the log file
	var $num_import_lines;	// the number of lines we have imported from a log file
	var $filehistory;		// contains an array of information about log files that have already been analyzed
	var $totmem;			// contains the total amount of memory that php may use
	var $oldest_timestamp;	// the oldest timestamp found in the current update job
	var $latest_timestamp;	// the latest timestamp found in the current update job
	var $dates_seen;		// an array of dates that we've seen during the current update job

	var $ua_whitelist;		// this is a whitelist for useragents. includes the data from files/ua_whitelist.ini
	var $affected_dates;    // an array containing all the dates that were seen during an import job

	var $authenticatedVisitors = array();
	var $bandwidth = array();

	function __construct($cltos=true) {
		global $profile;
		
		$this->oldest_timestamp = time();
		$this->latest_timestamp = 0;
		$this->dates_seen = array();
		
		$this->print = 3;
		
		if (isset($profile)) {
			if (LOGAHOLIC_BASE_EDITION == "cPanel Edition") {
				$logdir = $profile->logfilefullpath;
			} else {
				
				if($profile->trackermode == 2 && getGlobalSetting("upload_dir") !== false) {
					
					$logdir = getGlobalSetting("upload_dir", logaholic_dir()."files/");
					$logdir .= "{$profile->profilename}/";
				} else {
					$logdir = $profile->logfilefullpath;
				}
			}
		} else {
			# we might want to use functions from this class even though we don't have a loaded profile
			$logdir = "";
		}
		
		$this->totmem = substr(ini_get('memory_limit'), 0, -1);
		if (!$this->totmem) {
			$this->totmem = (8 * 1024 * 1024);
		} else {
			$this->totmem = ($this->totmem * 1024 * 1024);
		}
		$this->totmem = intval(($this->totmem * 0.75));


		$this->num_import_lines = 0;
		
		$this->logdir = $logdir;
		$this->totaltime = 0;
		$this->startscript = time();
		$this->manage_keys = false;
		if ($cltos===true) {
			$this->Createlogtable();
		}
		$this->affected_dates = array();
		$this->cap_sessions = 10000;

	}
	
	function setLogParser($format) {
		if (class_exists($format)) {
			$this->log_parser = new $format;
		} else {
			return false;
		}
	}
		
	# This function fetches the maximum timestamp from the data existing in the profile's main table and returns it.
	function getStartTimeForProfile() {
		global $profile, $db, $skiptime,$force;
		
		if (isset($skiptime)) {
			return $skiptime;
		}
		
		if($force === true){
			$skiptime = 0;
			return $skiptime;
		}		
		$start = time();
		$q = $db->Execute("SELECT MAX(`timestamp`) FROM {$profile->tablename}");
		$skiptime = $q->FetchRow();
		if(!empty($skiptime[0])) {
			$skiptime = $skiptime[0];
		} else {
			$skiptime = 0;
		}
		//$took = time() - $start;
		//echo "\n$profile->profilename lookup max timestamp took $took secs\n";
		return $skiptime;
	}
	
	# recursive loop through directory in search of files.
	function RecursivelySearchDir($dir, $dirs = array()){
		if(substr($dir, -1) == "/"){
			$dir = substr_replace($dir ,"",-1);
		}
		$files = glob($dir . '/*', GLOB_ONLYDIR);
		if(is_array($files)){ 
			foreach($files as $file) {
					$dirs[] = $file;
					$dirs = $this->RecursivelySearchDir($file, $dirs);
			}
		}
		return $dirs;
	}
	
	# This function reads a directory for log files and returns them in a array.
	function ReadDirectoryForLogs($directory){	
		global $profile, $skiptime, $force, $check_file_history;
		
		$logfiles = array();

		# We open the directory
		$handle = opendir($directory);
		$ls = array();
		# While we have files in the directory...
		while ($file = readdir($handle)) {
				
			# ... we paste the directory and filename together ...
			if(substr($directory, -1) != "/") {
				$filename = $directory."/".$file;
			} else {
				$filename = $directory.$file;
			}
			
			# see if we can ignore this file
			if ($profile->splitfilternegative) {
				if (strpos($file, $profile->splitfilternegative) !== FALSE) {
					continue;
				}
			}
			
			if ($profile->splitfilter) {
				if (strpos($profile->splitfilter, '(') !== FALSE) {
					# we have a regex splitfilter
					if (preg_match("/".$profile->splitfilter."/i", $file)) {
						# it matched, so we want to analyze it
					} else {
						# It doesn't match; skip it.
						continue;
					}
				} else if (strpos($file, $profile->splitfilter) === FALSE) {
					continue;
				}
			}


			# ... and get the filename and modification date for each file.
			if ($file[0] != '.') {
				clearstatcache();
				$rfn = "{$filename}";
				$mt = filemtime($rfn);
				
				# now lets see if we've already analyzed this file
				if($force === false){
					$lastmodtime = $this->getFileHistory($filename, "lastmodtime");
				} elseif(isset($check_file_history) && $check_file_history === true) {
					
					$lastmodtime = $this->getFileHistory($filename, "lastmodtime");
				} else {
					$lastmodtime = 0;
				}
				
				if ($lastmodtime == $mt) {
					# this file has probably been done, skip it
					//echo date("Y-m-d H:i:s",$mt). " $filename: this file has probably been done, skip it\n";
					continue;
				}
				
				$ls[$file] = $mt;
				
			}
		}
		
		if (!isset($ls)) {
			closedir($handle);
			return;
		}
		
		# Sort by modtime
		asort($ls);		

		foreach($ls as $file => $modtime) {
			# If the modification time is lower than the start time of data, we are looking at an old log file; skip it.
			
			
			if(($skiptime - $modtime) > 86400) {
				continue;
			}
			
			# We did this already in the above code, but apparently, it's needed to do so again.
			if(substr($directory, -1) != "/") {
				$filename = $directory."/".$file;
			} else {
				$filename = $directory.$file;
			}			
			if (!is_dir($filename)) {
				$logfiles[] = $filename;
			}
			
			
		}
		return $logfiles;
	}

	function HostpointGetLogFilesArray($skiptime) {
		global $profile, $force;
		$logfiles = array();
		$reasonalble_start_date = 0;
		
		# if skiptime is 0 but last update time isn't we know that we collected no data so far, so just start checking from some recent date
		if ($profile->last_update_finished > 0) {
			$reasonalble_start_date = $profile->last_update_finished - (3 * 86400);
		}

		if ($skiptime < $reasonalble_start_date) {
			$skiptime = $reasonalble_start_date;
			//$skiptime = mktime(0,0,0,1,1,2013);
		}
		if ($skiptime == 0) {
			$skiptime = strtotime('-1 days', time());
		}

		$end_at = mktime(23,59,59,date("n"),date("j"),date("Y"));
		//echoWarning(date("n")."-".(date("j"))."-".date("Y"));
		$current = $skiptime;
		while ($current < $end_at) {
			$filename = $this->logdir.date("Ymd",$current)."/".$profile->profilename.".log";
			//echoWarning($filename);
			$current = strtotime('+1 day', $current);

			if(!file_exists($filename)) {
				$filename = $filename.".gz";
			}

			clearstatcache();			
			$mt = @filemtime($filename);
			if ($mt === false) {
				continue;
			}
				
			# now lets see if we've already analyzed this file
			if($force === false){
				$lastmodtime = $this->getFileHistory($filename, "lastmodtime");
			} else {
				$lastmodtime = 0;
			}
			
			if ($lastmodtime == $mt) {
				# this file has probably been done, skip it
				$this->LogProcess( date("Y-m-d H:i:s",$mt). " $filename: this file has probably been done, skip it<br>\n" );
				continue;
			}
			$logfiles[] = $filename;
		}
		//dump($logfiles);
		return $logfiles;
		// exit();

	}

	# This function returns an array containing all full paths to each log file we need to parse.
	function GetLogFilesArray() {
		global $profile,$hostpoint,$running_from_command_line, $session;
		
		$skiptime = $this->getStartTimeForProfile();

		if (isset($hostpoint) && $running_from_command_line === true) {
			$this->LogProcess("using HostpointGetLogFilesArray<br>");
			return $this->HostpointGetLogFilesArray($skiptime);
		}

		$logfiles = array();
		$skiptime = $this->getStartTimeForProfile();
		# If we handle more than one log file
		if($profile->splitlogs == 1) {
			if($profile->recursive == 1){
				$dirs = $this->RecursivelySearchDir($this->logdir);
				
				foreach($dirs as $dir){				
		
					# check if this is a date based directory and skip if needed
					// if(substr($dir, -1) == "/") {
					// 	$dpart=substr($dir,0,-1);
					// } else { $dpart = $dir; }
					
					// $dpart = end(explode("/",$dpart));
					// if (strlen($dpart)==strlen("20130101")) {
					// 	if ($dpart < date("Ymd",$skiptime)) {
					// 		#echo "Skip $dpart<br>";
					// 		continue;
					// 	}
					// }
					$temparray = $this->ReadDirectoryForLogs($dir);
					if(empty($temparray)){
						continue;
					}
					
					foreach($temparray as $k => $v){
						$logfiles[] = $v;
					}
				}
				
				# finally, add files from the root folder
				$temparray = $this->ReadDirectoryForLogs($this->logdir);
				if(!empty($temparray)){
					foreach($temparray as $k => $v){
						$logfiles[] = $v;
					}
				}			
				
			} else {
				$logfiles = $this->ReadDirectoryForLogs($this->logdir);
			}
		} else {
			# We only have one log file, so we return an array with 1 entry.
			$logfiles[] = $profile->logfilefullpath;
		}
		//if ($session->isAdmin()) {
			//dump($logfiles);
			//exit();
		//}
		return $logfiles;
	}
	
	function Createlogtable() {
		global $db, $profile;

		# Create the log table.
		$db->Execute("CREATE TEMPORARY TABLE IF NOT EXISTS `{$profile->tablename}log` (
			  `id` int(11) NOT NULL AUTO_INCREMENT,
			  `host` varchar(255) DEFAULT NULL,
			  `logname` varchar(45) DEFAULT NULL,
			  `user` varchar(45) DEFAULT NULL,
			  `timestamp` int(11) DEFAULT NULL,
			  `timezone` varchar(7) DEFAULT NULL,
			  `request` text DEFAULT NULL,
			  `status` varchar(4) DEFAULT NULL,
			  `bytes` varchar(10) DEFAULT NULL,
			  `referrer` text DEFAULT NULL,
			  `useragent` text DEFAULT NULL,
			  `useragent_hash` varchar(32) DEFAULT NULL,
			  `cookie` text DEFAULT NULL,
			  `urlparams` text DEFAULT NULL,
			  `refparams` text DEFAULT NULL,
			  `keywords` text DEFAULT NULL,
			  `visitorid` varchar(32) DEFAULT NULL,
			  `country` varchar(100) DEFAULT NULL,
			  `crawl` int(1) DEFAULT '0',
			  `sessionid` int(11) DEFAULT NULL,
			  PRIMARY KEY (`id`),
			  KEY `visitortime` (`visitorid`,`timestamp`)
			) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8
		");

		# If the log table already exists, delete it; we want a clean one.
		$db->Execute("TRUNCATE TABLE {$profile->tablename}log");

	}

	# This is where we load the preparsed log file into a temprary table.
	function ImportLog($file) {
		global $db, $profile, $log_parser_types, $skiptime;
		$start=time();
		
		$this->LogProcess("Importing: $file ");

		$this->fsize = round(((filesize($file)/1024)/1024),2);
		if ($this->manage_keys === true) {
			$this->LogProcess("starting import with manage_keys ON");
		} else {
			$this->LogProcess("starting import with manage_keys off");
		}
		
		# Force the Logaholic Format Log Parser
		$log_format["ClassName"] = "LogaholicLogformatParser";
		$this->log_parser = new $log_format["ClassName"];
		$this->log_parser->Initialize($file);
	
		# Load the parser query...
		$import_query = $this->log_parser->ImportQuery($file);
		$import_query_local = $this->log_parser->ImportQuery($file,"LOCAL");
		
		if($import_query == false){
			$took = $this->took($start);
			$this->LogProcess("Failed importing records from the log file! No import Query found?! ".$this->sec2time($took));
			$this->StopOrContinue();
			return;
		}
		
		# ... and run it!		
		try {
			error_reporting(0); 
			if($db->Execute($import_query) === false){
				throw new Exception();
			} else {
				$this->num_import_lines = $db->Affected_Rows();
			}
			# error reporting
			if (LOGAHOLIC_VERSION_STATUS=="release") {  
				error_reporting(E_ALL & ~E_NOTICE);    
			} else {
				error_reporting(E_ALL);
			}
			
		} catch(Exception $e){
			# Try to load it again and now with LOAD DATA LOCAL INFILE
			error_reporting(0); 
			
			if ($db->Execute($import_query_local) === false) {
				# fall back to a slower method that does not use LOAD DATA INFILE
				if (LOGAHOLIC_VERSION_STATUS=="release") {  
					error_reporting(E_ALL & ~E_NOTICE);    
				} else {
					error_reporting(E_ALL);
				}
				$this->ReadFileAndImport($file);
			} else {
				if (LOGAHOLIC_VERSION_STATUS=="release") {  
					error_reporting(E_ALL & ~E_NOTICE);    
				} else {
					error_reporting(E_ALL);
				}
				
				$this->num_import_lines = $db->Affected_Rows();
				
			}			
		}	
			
		# Do some Log parser clean up, if needed
		$this->log_parser->CleanUp();
		
		$took = $this->took($start);

			
		$this->LogProcess("Imported {$this->num_import_lines} records from the log file ($this->fsize mb) in ".$this->sec2time($took)."");

		# Equalize crawl detection - update crawl status for any human traffic
		if ($profile->patternfilter=="true" && $profile->trackermode!=1) {
			$this->EqualizeHumanTraffic();
		}
		
		$this->StopOrContinue();		

	}
	
	#this is a fall-back method that is only used if load data infile does not work to read the lwa log file
	function ReadFileAndImport($file) {
		global $db, $profile;
		
		# warn the user
		$this->LogProcess("Mysql LOAD DATA INFILE statement failed! Falling back to importing the file line by line... Make sure your mysql is allowed to use this statement for much faster importing.");
		
		# let's turn off indexes so we get better insert speed
		$this->EnableKeys($profile->tablename."log", false);
		
		# prepare a query
		$insertstart = "INSERT into {$profile->tablename}log (`host`, logname, `user`, `timestamp`, timezone, request, `status`, bytes, referrer, useragent, useragent_hash, cookie, urlparams, refparams, keywords, visitorid, country, crawl, sessionid) values ";
		$insert = "";
		
		$n=0;
		$i=0;
		
		$fp = fopen($file, "r");
		while (($line = fgets($fp)) !== false) {
		
			$line = explode("|LWA|", $line);
			# if we don't have 19 elements, something is wrong
			if (count($line)!=19) {
				echoWarning("corruptedline");
				continue;
			}
			$insert .= "(";
			foreach ($line as $val) {
				$insert .= $db->Quote(trim($val)) . ", ";
			}
			$insert = substr($insert, 0, -2) . ")";
			$i++;
			
			# if we've saved up enough lines, insert it into the table
			if ($i >= 500) {
				$q = $insertstart . $insert;
				$db->Execute($q);
				$insert = "";
				$i=0;
				
			} else {
				# add a comma for the next set of values
				$insert .= ",\n";
				
			}
			$n++;
		}
		# insert any remaining lines, remove the last comma first
		if (!empty($insert)) {
			$insert = substr($insert, 0, -2);
			$q = $insertstart . $insert;
			$db->Execute($q);
		}
		
		$this->num_import_lines = $n;
		
		# turn on indexes
		$this->EnableKeys($profile->tablename."log", true);
	}
	
	# This inserts elements like urls and keywords into the subtables
	function insertIDs() {
		global $db, $profile;
		
		if ($this->num_import_lines < 1) {
			return;
		}
		
		$tables = $this->getTables();
		
		foreach ($tables as $t) {
			$start = time();
						
			# Skip visitorIDs
			if($t['table'] == $profile->tablename_visitorids) { continue; }
						
			# insert missing items
			//$db->Execute("INSERT IGNORE INTO {$t['table']} ({$t['field']}, `hash`) (SELECT DISTINCT {$t['sfield']}, MD5({$t['sfield']}) FROM `{$profile->tablename}log` AS s LEFT JOIN {$t['table']} AS u ON MD5({$t['sfield']}) = u.hash WHERE u.id IS NULL)");
			//Just do insert ignore, it's faster
			$select = "SELECT DISTINCT {$t['sfield']}, MD5({$t['sfield']}) FROM `{$profile->tablename}log` AS s";
			$db->Execute("INSERT IGNORE INTO {$t['table']} ({$t['field']}, `hash`) ($select)");
			
			
			# calculate how many we inserted
			$rows = $db->Affected_Rows();
			$took = $this->took($start);
			$this->LogProcess("Inserting $rows records in {$t['table']} took ".$this->sec2time($took)."");			
			$this->StopOrContinue();
		}		
	}
	
	# This inserts the visitorids into the visitors table
	function insertVisitorIDs() {
		global $db, $profile;
		$start = time();
		
		if ($this->num_import_lines < 1) {
			return;
		}
				
		# Insert the visitorid into the visitor ID's table
		$sql = "INSERT IGNORE INTO {$profile->tablename_visitorids} (`visitorid`, `ipnumber`, `created`) (SELECT DISTINCT s.visitorid AS vid, s.host, MIN(s.`timestamp`) FROM `{$profile->tablename}log` AS s GROUP BY vid, host)";
		$db->Execute($sql);
		
		# calculate how many we inserted
		$rows = $db->Affected_Rows();
		$took = $this->took($start);
		$this->LogProcess("Inserting $rows records in $profile->tablename_visitorids took ".$this->sec2time($took)."");			
		$this->StopOrContinue();
	}
	
	function getTables() {
		global $profile, $took, $totaltime;
		
		# Get an array containing all needed tables for the current profile.
		$tables = array();
		$tables[] = array('table' => $profile->tablename_urls , 'field' => 'url', 'sfield' => 's.request');
		$tables[] = array('table' => $profile->tablename_urlparams , 'field' => 'params', 'sfield' => 's.urlparams');
		$tables[] = array('table' => $profile->tablename_referrers , 'field' => 'referrer', 'sfield' => 's.referrer');
		$tables[] = array('table' => $profile->tablename_refparams , 'field' => 'params', 'sfield' => 's.refparams');
		$tables[] = array('table' => $profile->tablename_keywords , 'field' => 'keywords', 'sfield' => 's.keywords');
		$tables[] = array('table' => $profile->tablename_useragents , 'field' => 'useragent', 'sfield' => 's.useragent');
		$tables[] = array('table' => $profile->tablename_visitorids , 'field' => 'visitorid', 'sfield' => "md5(CONCAT(s.host,':',s.useragent))");
		
		return $tables;
	}

	function EqualizeHumanTraffic() {
		global $profile, $db;
		$start = time();
		$temp = "create temporary table if not exists {$profile->tablename}loghuman AS (select distinct visitorid from {$profile->tablename}log where crawl=0)";
		$db->Execute($temp);

		// $subq = "select visitorid from {$profile->tablename}loghuman";
		// $q = "update {$profile->tablename}log as a set crawl=0 where visitorid IN ($subq)";
		
		$q = "update  {$profile->tablename}log as a JOIN (select visitorid from {$profile->tablename}loghuman group by visitorid) as b on a.visitorid = b.visitorid SET a.crawl = 0";

		$db->Execute($q);		
		
		$rows = $db->Affected_Rows();
		//$this->LogProcess("equalized crawl detection for $rows rows");
		$took = $this->took($start);
		$this->LogProcess("Equalized human traffic for $rows rows in ".$this->sec2time($took)."");		
		$db->Execute("drop TABLE {$profile->tablename}loghuman");

	}
	
	function insertNormalized() {
		global $profile, $db, $mysqltmp;
		
		if ($this->num_import_lines < 1) {
			return;
		}
		
		$start = time();

		# Insert all ID's into the profile's main table, from all other tables.
		$q  = "INSERT INTO {$profile->tablename} (timestamp, visitorid, url, params, status, bytes, country, crawl, sessionid, referrer, refparams, useragentid, keywords) ";
		$q .= "SELECT `timestamp`, v.id, u.id, up.id, status, bytes, country, GREATEST(ua.is_bot, log.crawl), sessionid, r.id, rp.id, ua.id, k.id FROM `{$profile->tablename}log` AS log, "; 
		$q .= "{$profile->tablename_visitorids} as v,";
		$q .= "{$profile->tablename_urls} as u,";
		$q .= "{$profile->tablename_urlparams} as up,";
		$q .= "{$profile->tablename_referrers} as r,";
		$q .= "{$profile->tablename_refparams} as rp,";
		$q .= "{$profile->tablename_keywords} as k,";
		$q .= "{$profile->tablename_useragents} as ua ";
		$q .= "WHERE log.visitorid = v.visitorid AND "; 
		$q .= "MD5(log.request) = u.hash AND MD5(log.urlparams) = up.hash AND ";
		$q .= "MD5(log.referrer) = r.hash AND MD5(log.refparams) = rp.hash AND ";
		$q .= "MD5(log.keywords) = k.hash AND MD5(log.useragent) = ua.hash ";
		$db->Execute($q);		
		
		$rows = $db->Affected_Rows();
		$took = $this->took($start);
		$this->LogProcess("Inserting $rows records in $profile->tablename took ".$this->sec2time($took)."");					
		
		$this->StopOrContinue();
	}
	
	# This will turns all non-unique mysql indexes off for the table. It makes inserting a lot of rows faster.
	function EnableKeys($table, $enable) {
		global $db;
		if ($this->manage_keys===false) {
			return;
		}
		if (!isset($enable)) {
			return;
		}		
		if ($enable==true) {
			$start = time();
			# enable keys when we are done inserting
			$db->Execute("ALTER TABLE {$table} ENABLE KEYS");
			$took = time() - $start;
			$this->LogProcess("Enabling index on $table took {$took}");
		} else {
			# disable the keys to speed up the insert
			$start = time();
			$db->Execute("ALTER TABLE {$table} DISABLE KEYS");
			$took = time() - $start;
			$this->LogProcess("Disabling index on $table took {$took}");
		}	
	}

	function Blacklist() {
		global $profile, $skiptime;
		//return;
		if (class_exists("Blacklist")) {
			$start = time();
			$n = 0;
			$b = new Blacklist();

			$n = $b->UpdateBlacklisted($skiptime);
			
			$took = $this->took($start);			
			$this->LogProcess("Finding $n blacklisted IP addresses took ".$this->sec2time($took));
		}
	}
	

	function separateGIFData() {
		global $db, $profile;
		$start = time();
		
		# find the id for logaholic.gif
		$id = getID('/logaholic.gif', 'urls');
		# only do this if we need to
		if ($id == false) {
			return;
		}
		
		$db->Execute("create table if not exists $profile->tablename_gifdata like $profile->tablename");
		
		# now move all the logaholic.gif hits to the gifdata table and delete them from the main table
		$db->Execute("insert into $profile->tablename_gifdata select * from $profile->tablename where timestamp > $this->oldest_timestamp and url = '$id'");
		//$this->LogProcess("insert into $profile->tablename_gifdata select * from $profile->tablename where timestamp > $this->oldest_timestamp and url = '$id'");
		$db->Execute("DELETE FROM {$profile->tablename} WHERE timestamp > $this->oldest_timestamp and url = '$id'");
				
		$took = $this->took($start);			
		$this->LogProcess("Moving additional data to gifdata table took ".$this->sec2time($took));
	}

	function separateBots() {
		global $db, $profile;
		$start = time();
		$db->Execute("create table if not exists $profile->tablename_crawl like $profile->tablename");
		
		# find visitorids that the user has marked as a bot
		$q = $db->Execute("select id from $profile->tablename_visitorids where crawl=1");
		$ids = array();
		while ($data = $q->FetchRow()) {
			$ids[] = $data['id'];
		}
		if (count($ids) > 0) {
			$ids = implode(",",$ids);
			$db->Execute("update $profile->tablename set crawl=1 where visitorid IN ($ids)");
		}
		# find feed urls and update to crawl=2
		if ($profile->feedurl!="") {
			$q = $db->Execute("select id from $profile->tablename_urls where url like '$profile->feedurl%'");
			$ids = array();
			while ($data = $q->FetchRow()) {
				$ids[] = $data['id'];
			}
			if (count($ids) > 0) {
				$ids = implode(",",$ids);
				$db->Execute("update $profile->tablename set crawl=2 where url IN ($ids)");
			}
		}
		
		# now move all the crawler hits to the crawl table and delete them from the main table
		$db->Execute("insert into $profile->tablename_crawl select * from $profile->tablename where timestamp > $this->oldest_timestamp and crawl != 0");
		//$this->LogProcess("insert into $profile->tablename_crawl select * from $profile->tablename where timestamp > $this->oldest_timestamp and crawl != 0");
		$db->Execute("DELETE FROM {$profile->tablename} WHERE timestamp > $this->oldest_timestamp and crawl != 0");
				
		$took = $this->took($start);			
		$this->LogProcess("Moving bots to crawl table took ".$this->sec2time($took));
	}
	
	function copyConversions() {
		global $db, $profile;
		$start = time();
		# we want to move conversion urls to their own table for speedy access later
		if ($profile->targetfiles!="") { 
			//first figure out the dates			
			//we can get the to and from from the temp log table :)
			$range = GetMaxDateRange($profile, true);
			$from = $range['from'];
			$to = $range['to'];
			$targets = $profile->GetTargetIDs();
			if (!empty($to) && !empty($from) && is_array($targets) && count($targets)) {
				$db->Execute("delete from $profile->tablename_conversions where timestamp >=$from and timestamp <=$to") or die($db->ErrorMsg());
				$db->Execute("insert into $profile->tablename_conversions select timestamp,visitorid,url from $profile->tablename where timestamp between $from and $to and url in (".implode(",",$targets).") and (status=200 or status=302) order by timestamp")  or die("Error updating conversion stats. ".$db->ErrorMsg());
				//$db->Execute("insert into $profile->tablename_conversions select timestamp,visitorid,url from $profile->tablename where timestamp between $from and $to and url in (".implode(",",$targets).") order by timestamp")  or die("Error updating conversion stats. ".$db->ErrorMsg());
				$took = $this->took($start);			
				$this->LogProcess("Moving conversions to table took ".$this->sec2time($took));
			}
		}		
	}
	
	
	function thisTook($start, $update_total = true) {		
		
		$took = $this->took($start);
		
		$tot="";
		if ($update_total == true) {
			$this->totaltime = $this->took($this->startscript);
			$tot_dur = $this->sec2time($this->totaltime);
			$tot = ". Total processing time is now {$tot_dur}";
		}		
		$duration = $this->sec2time($took);
				
		$this->LogProcess("This took $duration $tot", false);
		
		return time();
	}
	
	# this calculates how many seconds have passed since the start time
	function took($start) {				
		return time() - $start;		
	}
	
	# This converts a number of seconds to a pretty hours, minutes, seconds string
	function sec2time($secs) {
		$hours = str_pad(floor($secs / (60 * 60)),2,'0',STR_PAD_LEFT);
		$divisor_for_minutes = $secs % (60 * 60);
		$minutes = str_pad(floor($divisor_for_minutes / 60),2,'0',STR_PAD_LEFT);
		$divisor_for_seconds = $divisor_for_minutes % 60;
		$seconds = str_pad(ceil($divisor_for_seconds),2,'0',STR_PAD_LEFT);
		return "{$hours}:{$minutes}:{$seconds} ($secs secs)";
	}
	
	# This function prints stuff to the update progress log.
	function LogProcess($message, $include_took = false) {
		global $profile, $updatelog, $took, $totaltime, $human_readable_file;
		
		# If the message that will be returned starts with [Finished], we want to prepend this to the message, and remove it from the message.
		# This is so we then can detect it in the ajax request that reads the update progress log.
		if(substr($message, 0, 10) == '[Finished]') {
			$message = "[Finished]".$human_readable_file.": ".str_replace("[Finished]", "", $message);
		} else {
			$message = $human_readable_file.": ".$message;
		}
		
		# Write the line to the update progress log.
		if($this->print == 2){
			echo $message."<br/>";
		}
		if($this->print == 3){
			@fwrite($updatelog, $message."\n");
		}
		
		if($this->print == 'stream'){
			echo "data: ". $message . PHP_EOL;
		 	echo PHP_EOL;
			ob_flush();
			flush();
		}
		
		lgflush();			
		
	}
	
	# This function prints stuff to the log we use to analyze the speed
	function LogDuration($label, $speed, $secs) {
		global $profile, $human_readable_file;
		if (!isset($this->durationlog)) {
			$this->durationlog = fopen($profile->datamanagerDir.$profile->profilename."/{$profile->profilename}_update_duration.lwa.log", "a+");
			set_permissions($profile->datamanagerDir.$profile->profilename."/{$profile->profilename}_update_duration.lwa.log");
		}
		$message = date("Y-m-d H:i:s", time())." ".$human_readable_file.", MB:,".@$this->fsize.", lines:,".@$this->num_import_lines.", ".$label.", Speed:,".floor($speed).", Seconds:,".$secs;
		# Write the line to the update duration log.
		fwrite($this->durationlog, $message."\n");	
		
	}	
	
	# This function will stop the update process if a user has remotely turned on the stop flag (via UI)
	function StopOrContinue() {
		global $profile, $running_from_command_line, $sessioncounter;

		if ($running_from_command_line === true) {
			# try this to see if it speeds things up on hostpoint
			return true;
		}

		if($profile->GetUpdateStatus() == "stop"){
			$this->LogProcess("Error: user stopped process", false);
			$profile->SetUpdateStatus('ready');
			$profile->SetInDB("sessioncounter",$sessioncounter);
			die();
		} else {
			return true;
		}
	}
	
	# This function sets the corrected profile table name for this update job.
	# we need this when we are using archived merge tables (which is currently not in use)
	# don't confuse this with the merge table that is used to group the main table and the crawl table, this has nothing to do with that
	function correctProfileTablename() {
		global $db, $profile;
		
		if (isset($this->real_profile_tablename)) {
			$profile->tablename = $this->real_profile_tablename;
			return;
		}
				
		$query  = "SHOW CREATE TABLE $profile->tablename";
		$q= $db->Execute($query);
		if ($data = $q->FetchRow()) {
			$createtable = $data['Create Table'];
			if (strpos($createtable,"UNION")!==false) {
				$this->real_profile_tablename = $profile->tablename;
				$profile->tablename = $profile->tablename."_current";
			}
		} else {
			echoDebug("error for query: $query");
			return false;  
		}			
	}
	
	function PHPanalyzeLine($logline){ 
		global $profile, $sessions, $sessioncounter, $geo, $gi, $mysqltmp, $bandwidth, $force;
		
		# Global the regexes, since we don't need to create a regex everytime.
		global $skiptime, $regex_skipips, $regex_skipfiles, $regex_urlparamfilter, $regex_equivdomains, $regex_googleparams, $dynamic_pages, $only_include;

		a: {
			# If the line can not be parsed, skip it.
			if (!$this->log_parser->ParseLine($logline)) {
				if (isset($_REQUEST['debuglines'])) {
					$this->LogProcess("return false because can't parse line\n");
				}
				return false;
			}
			
			# If the last line is not data, skip it.
			if ($this->log_parser->lastlineisdata === false) {
				if (isset($_REQUEST['debuglines'])) {
					$this->LogProcess("return false because lastlineisdata = false\n");
				}
				return false;
			}
			
			if(empty($this->log_parser->clientip)) {
				if (isset($_REQUEST['debuglines'])) {
					$this->LogProcess("return false because no clientip\n");
				}
				return false;
			}

			# Name the parse line stuff
			$ipnumber = $this->log_parser->clientip;
			
			$url = strip_tags(urldecode($this->log_parser->reqfile));
			
			$referrer = strip_tags(urldecode($this->log_parser->referrer));
			
			$this->log_parser->agent = strip_tags(urldecode($this->log_parser->agent));
			
			$this->log_parser->cookie = strip_tags(urldecode($this->log_parser->cookie));
			
			$logtimestamp = $this->log_parser->logdate;

			if(!is_integer($logtimestamp)){
				if (isset($_REQUEST['debuglines'])) {
					echo("logtimestamp is not an integer: $logtimestamp \n");
				}
				return false;
			}
			
			if($logtimestamp < $this->oldest_timestamp){
				$this->oldest_timestamp = $logtimestamp;
			}

			if($logtimestamp > $this->latest_timestamp){
				$this->latest_timestamp = $logtimestamp;
			}

			# Skip Time
			if($force==false) {
				if($logtimestamp <= $skiptime) {
					if (isset($_REQUEST['debuglines'])) {
						echo("return false because skiptime\n");
					}
					return false;
				}
			}

			# Determine the visitorid
			if ($profile->visitoridentmethod == VIDM_IPADDRESS) {
				$visitorid = md5($ipnumber);
			} else if ($profile->visitoridentmethod == VIDM_IPPLUSAGENT) {
				$visitorid = md5($ipnumber . ':' . $this->log_parser->agent);
			} else if ($profile->visitoridentmethod == VIDM_COOKIE && !empty($this->log_parser->visitorid)) {
				# If we have a cookie method set and a visitor id set from the parser then use that.
				$visitorid = md5($this->log_parser->visitorid);

			} else if ($profile->visitoridentmethod == VIDM_COOKIE) {
				# if we have a cookie, use it, else use VIDM_IPPLUSAGENT
				$cookies = $this->log_parser->cookie.";";
				$thisLogaholic_VID = strpos($cookies, "Logaholic_VID");
				if ($thisLogaholic_VID !== FALSE) {
					$thisLogaholic_VID = substr($cookies, $thisLogaholic_VID);
					$thisLogaholic_VID = substr($thisLogaholic_VID, 14, strpos($thisLogaholic_VID, ";") - 14);
					
					if (is_numeric($thisLogaholic_VID) == false) {
						$thisLogaholic_VID = md5($ipnumber . ':' . $this->log_parser->agent);
					}
					
					$visitorid = $thisLogaholic_VID;
					
					if (isset($debug) && $debug) {
						echo "we are using visitorid: $thisLogaholic_VID for $ipnumber<br>";
						echo "a new logaholic vid would look like this:".md5($ipnumber . ':' . $this->log_parser->agent);
						echo "<br>that is based on :".$ipnumber . ':' . $this->log_parser->agent;
						echo "<br>this is the cookie string :" . $cookies;
					}
				} else {
					$visitorid = md5($ipnumber . ':' . $this->log_parser->agent);
				}
			} else if ($profile->visitoridentmethod == VIDM_CUSTOM_COOKIE && !empty($profile->vidm_custom_cookie)) {
				# if we have a custom cookie, use it, else use VIDM_IPPLUSAGENT
				$cookies = $this->log_parser->cookie.";";
				$thisLogaholic_VID = strpos($cookies, $profile->vidm_custom_cookie);
				//dump($cookies);
				if ($thisLogaholic_VID !== FALSE) {
					$tvidlen = strlen($profile->vidm_custom_cookie) + 1;
					$thisLogaholic_VID = substr($cookies, $thisLogaholic_VID);
					$thisLogaholic_VID = substr($thisLogaholic_VID, $tvidlen, strpos($thisLogaholic_VID, ";") - $tvidlen);
					
					$visitorid = $thisLogaholic_VID;
					
					if (isset($debug) && $debug) {
						echo "we are using visitorid: $thisLogaholic_VID for $ipnumber<br>";
						echo "a new logaholic vid would look like this:".md5($ipnumber . ':' . $this->log_parser->agent);
						echo "<br>that is based on :".$ipnumber . ':' . $this->log_parser->agent;
						echo "<br>this is the cookie string :" . $cookies;
					}
				} else {
					//echo "hier";
					$visitorid = md5($ipnumber . ':' . $this->log_parser->agent);
				}
			}
		}
		$session_timeout = ($profile->visittimeout * 60);
		
		# Calculate sessions
		if(isset($sessions[$visitorid])) {	
			if($logtimestamp - $sessions[$visitorid][1] > $session_timeout) {
				$sessions[$visitorid][1] = $logtimestamp;
				$sessions[$visitorid][2] = $sessioncounter++;				
				
			} else {
				# Add to visit
				$sessions[$visitorid][1] = $logtimestamp;
			}

			if ($profile->patternfilter == "true" && $profile->trackermode!=1) {
				# reset crawl flag if image found for visitorid, use EqualizeHumanTraffic() later to fill the gaps of previous non matching requests (like the first page )
				if(preg_match("/(\.gif(\?|$)|\.jpg(\?|$)|\.jpeg(\?|$)|\.png(\?|$)|\.ico(\?|$)|\.css(\?|$))/i", $url) > 0) {
					$sessions[$visitorid][4] = 0;

					if (isset($sessions[$visitorid][6])) {
						
						//reset the logline and redo this function using the buffered line
						$logline = gzdecode($sessions[$visitorid][6]);
						unset($sessions[$visitorid][6]);
						goto a;
						
					} else {
						//$this->LogProcess("I would have done it");
					}
				}	
			}
		} else {
			$sessions[$visitorid][1] = $logtimestamp;
			$sessions[$visitorid][2] = $sessioncounter++;			
			
			# Set crawl					
			if ($profile->patternfilter == "false" ||  $profile->trackermode==1) {
				$sessions[$visitorid][4] = 0;
			} else {
				$sessions[$visitorid][4] = 1;  // set to 'bot' by default, swich off if we find any images or any subsquent requests
				$sessions[$visitorid][6] = gzencode($logline); // buffer this line untill we know if it's a human
				return false;
			}
		}		


		# Skip Files
		if(!empty($regex_skipfiles)) {			

			if (strpos($url, '/logaholic.gif')!==false) { // I am special, don't skip me
			
			} else if(substr($only_include, 0,1)=="!") {
				//if we don't match the string, filter it out
				//$this->LogProcess("we have only include");
				if (strpos($url, substr(stripslashes($only_include), 1))!==false) {
					if(preg_match("/({$regex_skipfiles})/i", $url) > 0) {
						return false;
					}
					//ok					
				} else {
					//$this->LogProcess("no match $url");				
					return false;
				}
			} else if(preg_match("/({$regex_skipfiles})/i", $url) > 0) {
				if (isset($_REQUEST['debuglines'])) {
					echo("return false because skipfiles\n");
				}
				
				return false;
			}
		}
		
		# we are going to keep track of which days we see during this import, so we can use that to determine if any existing datafiles need to be expired
		if(!isset($this->affected_dates[date("Ymd",$logtimestamp)])){
			# If this is the first time we see this day then remove the datafiles for this day
			$startof_day = mktime(0,0,0,date("m",$logtimestamp),date("d",$logtimestamp),date("Y",$logtimestamp));
			$endof_day = mktime(23,59,59,date("m",$logtimestamp),date("d",$logtimestamp),date("Y",$logtimestamp));
			# Remove datafiles for a day
			DeleteDataFiles($profile,$startof_day,$endof_day);
		}
		$this->affected_dates[date("Ymd",$logtimestamp)] = 1;
		// $this->affected_dates[date("Ym",$logtimestamp)] = 1;

		# Skip Ipnumbers
		if(!empty($regex_skipips)) {
			if(preg_match("/({$regex_skipips})/i", $ipnumber) > 0) {
				if (isset($_REQUEST['debuglines'])) {
					echo("return false because skipips\n");
				}				
				return false;
			}
		}
		
		# Count the bandwidth
		$this->countBandwidth($this->log_parser->bytes, $logtimestamp);
		
		# URL Stuff
		if(strpos($url, '?') > -1) {
			
			$urlparams = substr($url, strpos($url, '?'));

			$url = substr($url, 0, strpos($url, '?'));
			
			# Here we strip the important parameters from the urlparams, and put it into url
			foreach($dynamic_pages as $key => $value) {
				if($key == $url && strpos($urlparams, $value.'=') > -1) {
					$urlparams = preg_replace("/\?/i", "", $urlparams);
					parse_str($urlparams,$p);
					$urlparam_part = $value."=".$p[$value];

					$urlparams = @preg_replace("/({$urlparam_part})/i", "", $urlparams);
					$url = $url.'?'.$urlparam_part;
				}
			}

			if (strpos($url, '/logaholic.gif')!==false  && isset($urlparams)) { // I am special
				parse_str($urlparams, $event);				
				if (!empty($event['title'])) {
					//the title belongs to the referrer of this request
					$turl = parse_url($referrer, PHP_URL_PATH);
					$this->UpdatePageTitle($turl,$event['title']);
				}				
				
			} else if(!empty($profile->urlparamfilter) && isset($urlparams) ) {		
				# Add & sign for filtering
				$urlparams .= "&";
				$matched = preg_match("/({$regex_urlparamfilter})/i", $urlparams);				
				
				if($matched == 1 && $profile->urlparamfiltermode == 'Include') {					
					# Match found lets if there are more matches!
					preg_match_all("/({$regex_urlparamfilter})(.*?)&/i",$urlparams,$matches);				
					
					# rewrite urlparams
					$params = array();
	
					# If there is a match the full string is in the [0] key!
					foreach($matches[0] as $match){
						if(substr($match,0,1) == "&" || substr($match,0,1) == "?"){
							$params[] = substr($match,1,-1);
						} else {
							$params[] = substr($match,0,-1);
						}
					}
					$urlparams = "?". implode("&",$params);
					
				} else if($matched == 0 && $profile->urlparamfiltermode == 'Include') {
					# No matches found so remove the params
					$urlparams = '';					
					
				} else if($matched == 1 && $profile->urlparamfiltermode == 'Exclude') {
					# remove the matches from the params
					$urlparams = preg_replace("/({$regex_urlparamfilter})(.*?)&/i", "&" , $urlparams);
					
					if(substr($urlparams,0,2) == "&&"){
						$urlparams = substr($urlparams,2,-1);
					} else {
						$urlparams = substr($urlparams,1,-1);					
					}
					$urlparams = (empty($urlparams)) ? "" : "?". $urlparams;
					
				} else if($matched == 0 && $profile->urlparamfiltermode == 'Exclude') { 
					# nothing going on.. remove the &
					$urlparams = substr($urlparams,0,-1);					
				} else {
					$urlparams = "";					
				}
			}
		} else {
			$urlparams = "";
		}
		
		
		# Parse Referrer Parameters (refparams) and keywords
		$keywords = "";
		$confdomain = $profile->confdomain;
		
		# If empty referrer set referrer to '-' like in the old update
		if ($referrer == "") { $referrer = "-"; }
		
		if(!empty($regex_equivdomains)) {
			if(substr($profile->equivdomains, 0, 1) == '(') {
				$referrer = preg_replace("/({$regex_equivdomains})/i", $confdomain, $referrer);
			} else {
				$referrer = preg_replace("/({$regex_equivdomains})/i", "://".$confdomain, $referrer);
			}
		}
		
		if(strpos($referrer, '?') > -1) {
			$refparams = substr($referrer, strpos($referrer, '?'));
			$referrer = substr($referrer, 0, strpos($referrer, '?'));
			
			if(!isset($refparams)) {
				$refparams = '';
			}
			
			if(!empty($regex_urlparamfilter)) {
				$refparams = preg_replace("/({$regex_urlparamfilter})/i", "", $refparams);
			}

			# Here we strip the important parameters from the urlparams, and put it into url
			foreach($dynamic_pages as $key => $value) {
				if($confdomain.$key == $referrer && strpos($refparams, $value.'=') > -1) {
					$refparam_part = substr($refparams, strpos($refparams, $value.'='));
					if(strpos($refparam_part, '&') > -1) {
						$refparam_part = substr($refparams, strpos($refparams, '&'));
					}
					
					$refparams = preg_replace("/\?/i", "//", $refparams);
					$refparams = preg_replace("/({$refparam_part})/i", "//", $refparams);
					
					$referrer = $referrer.'?'.$refparam_part;
				}
			}
			
			$keyworddetectors = array('as_q', 'as_epq', 'as_oq', 'as_eq', 'as_sitesearch', 'as_rq', 'as_lq');
			
			# If the referrer is google, we look at our predefined 'keyworddetectors'
			if(strpos($referrer, "www.google") > -1) {
				for($i = 0; $i < (count($keyworddetectors) - 1); $i++) {
					if(strpos($refparams, $keyworddetectors[$i]) > -1) {
						$keywordpart = substr($refparams, (strpos($refparams, $keyworddetectors[$i]) + strlen($keyworddetectors[$i])));
						if(strpos($keywordpart, '&') > -1) {
							$keywords = substr($keywordpart, 0, strpos($keywordpart, "&"));
						} else {
							$keywords = $keywordpart;
						}
					}
				}
				
				
				# Google Params
				# For each parameter in googleparams, remove it from refparams
				if(preg_match("/({$regex_googleparams})/i", $refparams) > 0) {
					$tmp = explode('&', $refparams);
					$stripped_params = array();
					$c = 0;
					
					for($i = 0; $i < (count($tmp) - 1); $i++) {
						$tmp_param = explode('=', $tmp[$i]);
						if(!empty($tmp_param[0])) {
							if(strpos($regex_googleparams, $tmp_param[0]) > -1) {
								$stripped_params[$c] = $tmp_param[0].'='.@$tmp_param[1];
								$c++;
							}
						}
					}
					
					if(!empty($stripped_params)) {
						$refparams = implode("&", $stripped_params);
					}
				}
			}
			
			# If the referrer is yahoo, we look for ?p= or &p=
			if(strpos($referrer, "search.yahoo") > -1) {
				$keywordpart = substr($refparams, (strpos($refparams, "p=") + 2));
				if(strpos($keywordpart, '&') > -1) {
					$keywords = substr($keywordpart, 0, strpos($keywordpart, "&"));
				} else {
					$keywords = $keywordpart;
				}
			}
			
			# If it's not google, nor yahoo, we look for ?q= or &q=
			# but only for external sites
			if (strpos($referrer, $profile->confdomain) === false) {

				if(strpos($refparams, 'q=') > -1 && $keywords == "") {
					$keywordpart = substr($refparams, (strpos($refparams, 'q=') + 2));
					if(strpos($keywordpart, '&') > -1) {
						$keywords = substr($keywordpart, 0, strpos($keywordpart, "&"));
					} else {
						$keywords = $keywordpart;
					}
				}
			}
						
		} else {
			# There are no referrer parameters
			$refparams = '';
		}

		# If the referrer is google, but we haven't found a keyword, we'll set the keyword to 'Not Provided'
		if(strpos($referrer, "www.google") !== false && $keywords == "") {
			$keywords = "(not provided)";
		}
		
		if ($geo) {
			if (!isset($sessions[$visitorid][3])) {
				try {
					$area = $gi->city($ipnumber);
					$sessions[$visitorid][3] = $area->country->isoCode;
				} catch (Exception $e) {
					//$this->LogProcess("geoip failure: $e");
					$sessions[$visitorid][3] = "";
				}
			}
		} else {
			$sessions[$visitorid][3] = "";
		}
		
		# Clean up the sessions array, to prevent we run out of memory
		if (!isset($this->session_count_reset)) {
			$this->session_count_reset = $sessioncounter;
		}
		if (($sessioncounter - $this->session_count_reset) > $this->cap_sessions) {
			$this->LogProcess("We are going to clean the sessions array - ". count($sessions));
			$prebyte = memory_get_usage();
			# just unset the ones that have expired
			$tempsessions = array();
			$cses = 0;
			$sesmem = 0;
			while (list ($key, $val) = each ($sessions)) {
				$time_elapsed = $logtimestamp - @$val[1];
				$sesmem++;
				if ($time_elapsed < $session_timeout) {
					$tempsessions[$key][0] = @$val[0];
					$tempsessions[$key][1] = @$val[1];
					$tempsessions[$key][2] = @$val[2];
					$tempsessions[$key][3] = @$val[3];
					$tempsessions[$key][4] = @$val[4];
					$tempsessions[$key][5] = @$val[5];
					$tempsessions[$key][6] = @$val[6];
					$cses++;
				}
			}

			$sessions = $tempsessions;
			$tempsessions = null;

			$this->session_count_reset = $sessioncounter;
			$freebyte = round((($prebyte - memory_get_usage() )/1024)/1024, 1);
			$sperc = ($cses / $sesmem) * 100;

			$curperc = round((memory_get_usage() / $this->totmem) * 100, 1);

			$this->LogProcess("Currently PHP is using {$curperc}% of the total memory");
			
			if ($cses > $this->cap_sessions && $curperc < 80) {
				$this->cap_sessions = $this->cap_sessions * 1.1;
				$this->LogProcess("Increasing sessions cap with 10% ({$this->cap_sessions})");
			}
			//$this->LogProcess("We cleaned the sessions array and freed $freebyte MB, kept $cses out of $sesmem entries ($sperc%)");
		}
		
		# Count the authenticated visitors
		if(!empty($this->log_parser->authuser) && $this->log_parser->authuser != "-"){
			$this->CountAuthenticatedVisitors(
				$ipnumber,					# Ipnumber
				$logtimestamp,				# Timestamp
				$visitorid,					# Visitorid
				$sessions[$visitorid][3],	# Country
				$this->log_parser->authuser # User
			);
		}


		# Make sure the keywords are strtolower before entering the database.
		$keywords = urldecode($keywords);
		$keywords = strtolower($keywords);

		# also make sure there are no spaces in a keyword
		$keywords = trim($keywords);

		# if the case insensitive settings is on strtolower the Url and the referrer it's it our domain
		if ($profile->caseinsensitiveurls == 1) {
			$url = strtolower($url);
			if (strpos($referrer, $profile->confdomain) !== false) {
				$referrer = strtolower($referrer);
			}

		}
		$url=trim($url);

		if ($profile->ipencoding=='true') {
			//see if it's ipv6
			if (strpos($ipnumber, ":")!==false) {				
				$ipnumber = inet_ntop(inet_pton($ipnumber) & inet_pton("ffff:ffff:ffff:ffff:0000:0000:0000:0000"));
			} else {
				$ipnumber = substr($ipnumber, 0, strrpos($ipnumber,".")) . ".0";
			}			
		}
		
		$line = implode("|LWA|", array(
			$ipnumber,
			'-',
			$this->log_parser->authuser,
			$logtimestamp,
			'-',
			$url,
			$this->log_parser->status,
			$this->log_parser->bytes,
			$referrer,
			$this->log_parser->agent,
			md5($this->log_parser->agent),
			$this->log_parser->cookie,
			$urlparams,
			urldecode($refparams),
			$keywords,
			$visitorid,
			$sessions[$visitorid][3], //country
			$sessions[$visitorid][4], //crawl
			$sessions[$visitorid][2]) //sessionid
		);

		if ($sessions[$visitorid][4]==1) {
			@$this->count_crawl++;
		} else {
			@$this->count_human++;
		}
		
		# any stray newline characters are deadly here, so lets remove those
		$line = str_ireplace(array("\r", "\n", "%0a", "%0d"), '', $line); 			
		
		# make sure only mysql valid utf8 chars make it into the db
		$line = preg_replace('/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]'. 
		'|(?<=^|[\x00-\x7F])[\x80-\xBF]+'.
		'|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*'.
		'|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})'.
		'|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/',
		'�', $line );

		$line = preg_replace('/\xE0[\x80-\x9F][\x80-\xBF]'.
		'|\xED[\xA0-\xBF][\x80-\xBF]/S','?', $line );

		return $line."\n";
			
	}

	function CountAuthenticatedVisitors($ipnumber,$timestamp,$visitorid,$country,$user){

		$day = date("Y-m-d",$timestamp);
		$key = $user;

		$data = array(
			"ipnumber" => $ipnumber
			,"timestamp" => $timestamp
			,"user" => $user
			,"requests" => 0
			,"visitorid" => $visitorid
			,"country" => $country
		);

		# [day][user][data]
		if(!empty($this->authenticatedVisitors[$day]) && !empty($this->authenticatedVisitors[$day][$key])){
			$data["requests"] = $this->authenticatedVisitors[$day][$key]["requests"];
		}
		$data["requests"] ++;

		$this->authenticatedVisitors[$day][$key] = $data;

		# Lets check if we need to flush the array to files before we run out of memory
		if (memory_get_usage() > $this->totmem) {
			$this->SaveAuthenticatedVisitors();
		}
	}
	
	function SaveAuthenticatedVisitors(){
		global $profile;

		if(empty($this->authenticatedVisitors)){
			return false;
		}

		# Initiate a clean report
		$report = New Report();		
		$report->dbMinimumDate = $this->oldest_timestamp;
		$report->dbMaximumDate = $this->latest_timestamp;
		$report->label_constant = "_AUTHENTICATED_VISITORS";
		$dir_1 = $profile->datamanagerDir.$profile->profilename;
		$dir_2 = $dir_1 ."/reports";

		$report->MakeDataDir($dir_1);
		$report->MakeDataDir($dir_2);

		# Loop through the array to store the data
		foreach($this->authenticatedVisitors as $date => $data){
			$dateparts = explode("-",$date);

			if($report->from < mktime(0,0,0,$dateparts[1],1,$dateparts[0])){
				$report->from = mktime(0,0,0,$dateparts[1],1,$dateparts[0]); 
			}
			$report->to = mktime(0,0,0,($dateparts[1] + 1),0,$dateparts[0]); 

			$dir_3 = $dir_2 ."/". $dateparts[0];
			$dir_4 = $dir_3 ."/_AUTHENTICATED_VISITORS";

			# Create the directories if needed	
			$report->MakeDataDir($dir_3);
			$report->MakeDataDir($dir_4);	

			$path = $dir_4;

			$ym = $dateparts[0] . $dateparts[1] . $dateparts[2];
			$filename = "_AUTHENTICATED_VISITORS.{$ym}.json.gz";
			
			$old_data = array();

			if(DATAFILE_METHOD == "ziparchive"){
				$zip_file = $path.".zip";
				$full_path  = "zip://".$zip_file."#/".$filename;
					
				if ($report->report_file_exists($filename, $zip_file) === true){
					# File already exists get data and add new
					$old_data = $report->getDataFromFile($full_path);
				}
			} else {		
				if(file_exists("{$path}/{$filename}")){
					# File already exists get data and add new
					$old_data = $report->getDataFromFile("{$path}/{$filename}");
				}
			}

			if(!empty($old_data)){
				# Loop through the old rows
				foreach($old_data as $old){
					if(empty($data[ $old[0] ])){
						$data[ $old[0] ] = array(
							"requests" => $old[1]
							,"ipnumber" => $old[2]
							,"country" => $old[3]
							,"timestamp" => $old[4]
							,"visitorid" => $old[5]
						);
					} else {
						# User is in current stack update him
						$data[ $old[0] ]['requests'] += $old[1]; # Add the old requests to the current
					}
				}

				# reset array for memory usage
				$old_data = array();
			}

			$fp = gzopen("{$path}/{$filename}","w");
			set_permissions("{$path}/{$filename}");

			foreach($data as $user => $user_data){
				# Set the rows
				$row = array();
				$row[0] = $user;
				$row[1] = $user_data['requests'];
				$row[2] = $user_data['ipnumber'];
				$row[3] = $user_data['country'];
				$row[4] = $user_data['timestamp'];
				$row[5] = $user_data['visitorid'];

				gzwrite($fp,json_encode($row)."\n");
			}
			
			gzclose($fp);
		}
		$report->ConvertDataFilesToZip();

		# Reset for mem usage
		$this->authenticatedVisitors = array();
	}

	function countBandwidth($bytes, $timestamp) {		
		$bytes = intval($bytes);
		$d = date("Y-m-d", $timestamp);
		
		if(!empty($this->bandwidth[$d])) {
			$this->bandwidth[$d] = $this->bandwidth[$d] + $bytes; 
		} else {
			$this->bandwidth[$d] = $bytes;
		}
		$this->BandwidthMemoryCheck();
	}

	/*
	**	Check if we need to save the bandwidth array now because we do not want php to run out of memory
	*/
	function BandwidthMemoryCheck(){
		if (memory_get_usage() > $this->totmem) {
			$this->LogProcess("Flush Bandwidth data");
   			$this->StoreBandwidth();
   		}
	}
	
	function StoreBandwidth(){
		global $profile;
		$report = New Report();		
		$report->dbMinimumDate = $this->oldest_timestamp;
		$report->dbMaximumDate = $this->latest_timestamp;

		$report->label_constant = "_BANDWIDTH";
		$dir_1 = $profile->datamanagerDir.$profile->profilename;
		$dir_2 = $dir_1 ."/reports";
		
		$report->MakeDataDir($dir_1);
		$report->MakeDataDir($dir_2);
		foreach($this->bandwidth as $date => $bytes){
			$dateparts = explode("-",$date);
			if($report->from < mktime(0,0,0,$dateparts[1],1,$dateparts[0])){
				$report->from = mktime(0,0,0,$dateparts[1],1,$dateparts[0]); 
			}
			$report->to = mktime(23,59,59,($dateparts[1] + 1),0,$dateparts[0]); 

			$dir_3 = $dir_2 ."/". $dateparts[0];
			$dir_4 = $dir_3 ."/_BANDWIDTH";
				
			$report->MakeDataDir($dir_3);
			$report->MakeDataDir($dir_4);	

			$path = $dir_4;
			
			$ymd = $dateparts[0] . $dateparts[1]; # . $dateparts[2];
			$filename = "$path/_BANDWIDTH.{$ymd}.json.gz";
			
			$old_data = array();

			if(file_exists($filename)){
				# File already exists get data and add new
				$old_data = $report->getDataFromFile($filename) ;
			}

			if(empty($old_data)){
				if(DATAFILE_METHOD == "ziparchive"){
					$zip_file = $dir_4.".zip";
					$filename_check = "_BANDWIDTH.{$ymd}.json.gz";
					$full_path  = "zip://".$zip_file."#/".$filename_check;
						
					if ($report->report_file_exists($filename_check, $zip_file) === true){
						# File already exists get data and add new
						$old_data = $report->getDataFromFile($full_path);
					}
				}
			}
			


			$fp = gzopen($filename,"w");
			
			$row = array();

			# this boolean keeps track if the record was already found in the old data and we added the bytes to that record
			$already_writen = false;

			# Write the old data in the new file first
			if(!empty($old_data)){
				foreach($old_data as $k => $preRow){
					$row[0] = $preRow[0];
					
					if($preRow[0] == $date){ 
						$row[1] = $preRow[1] + $bytes;
						$already_writen = true;
					} else {
						$row[1] = $preRow[1];
					}
					
					gzwrite($fp,json_encode($row)."\n");
				}
			}
			
			# check if the data was not already written from the old data.
			if(!$already_writen){
				$row[0] = $date;
				$row[1] = $bytes;
				gzwrite($fp,json_encode($row)."\n");
			}

			gzclose($fp);
			set_permissions($filename);
		}		
		$report->ConvertDataFilesToZip();

		# Reset the array for memory space
		$this->bandwidth = array();

	}

	function Get_seektime() {
		global $db, $profile;
		$q = $db->Execute("select max(timestamp) from $profile->tablename");
		$data = $q->FetchRow();
		return $data[0];
	}
	
	function UpdateInternalReferrers(){
		global $db, $profile;
		$start = time();
		$db->Execute("UPDATE {$profile->tablename_referrers} SET internal_referrer = 1 WHERE internal_referrer = 0 AND (referrer LIKE 'http://{$profile->confdomain}%' OR referrer LIKE 'https://{$profile->confdomain}%')");		
		$took = $this->took($start);			
		$this->LogProcess("Identifying Internal referrers took ".$this->sec2time($took));
	}
	
	function DeviceCheck($ua){
		
		# Default checks		
		if(stripos($ua, 'ipad') !== false){
			return 'iPad';
		}
		if(stripos($ua, 'iphone') !== false){
			return 'iPhone';
		}
		if(stripos($ua, 'blackberry') !== false){
			return 'BlackBerry';
		}
		if(stripos($ua, 'samsung-') !== false){
			$dev = explode("/",$ua);
			return str_replace("-"," ",$dev[0]);
		}
		if(stripos($ua, 'sonyericsson') !== false){
			$dev = explode("/",$ua);
			return $dev[0];
		}
		if(stripos($ua, 'lg-') !== false){
			$dev = explode("/",$ua);
			return str_replace("-"," ",$dev[0]);
		}
		if(stripos($ua, 'playstation') !== false){
			return 'Playstation';
		}
		if(stripos($ua, 'nokia') !== false){
			return 'Nokia';
		}
		if(stripos($ua, 'android') !== false){
			return 'Android';
		}

		return false;
	}
	
	function UpdateUseragents() {
		global $db, $profile, $newparser;
		$start = time();		  
		$q = $db->Execute("SELECT * FROM {$profile->tablename_useragents} WHERE name IS NULL");
		$inserts = array();
		$n = $q->NumRows();
		$i = 0;

		$sqlstart = "INSERT INTO {$profile->tablename_useragents} (name,version,os,os_version,engine,is_bot,is_mobile,device,hash) VALUES ";
		$sqlend = " ON DUPLICATE KEY UPDATE name=VALUES(name),version=VALUES(version),os=VALUES(os),os_version=VALUES(os_version),engine=VALUES(engine),is_bot=VALUES(is_bot),is_mobile=VALUES(is_mobile),device=VALUES(device)";
		
		# Bulk of ids for updating purpose..
		$agent_ids = array();

		while($row = $q->FetchRow()) {
			# Gets information about the current browser's user agent 
			if ($row['useragent']=="") {
				# we might not even have a useragent field in the log file, so let's just skip it the field is really empty
				continue;
			}
			
			$agent = array();
						
			$newparsed = $newparser->parse($row['useragent']);
						
			$agent['agent_name'] = $newparsed->ua->family;

			$agent['agent_version'] = $newparsed->ua->toVersion();

			
			$agent['agent_os'] = $newparsed->os->family;
			if(stripos($agent['agent_os'],'Windows') !== false){
				$agent['agent_os'] = "Windows";				
			}
			if(stripos($agent['agent_os'],'Linux') !== false){
				$agent['agent_os'] = "Linux";				
			}

			$agent['agent_os_version'] = $newparsed->os->toString();

			$agent['agent_engine'] = '';			
			$agent['agent_is_bot'] = 0;
			if($newparsed->device->family == 'Spider') {
				$agent['agent_is_bot'] = 1;
			}
			if($newparsed->ua->family == 'Other' && $newparsed->device->family == 'Other') {
				$agent['agent_is_bot'] = 1;
			}

			$agent['agent_is_mobile'] = 0;
			if(stripos($newparsed->ua->family,'Mobile') !== false){
				$agent['agent_is_mobile'] = 1;
			}

			if($newparsed->os->family == "Android" || $newparsed->os->family == "Symbian OS"){
				$agent['agent_is_mobile'] = 1;
			}

			# mobile double check
			$agent['agent_device'] = $newparsed->device->family;
			if(strtolower($agent['agent_name']) != 'other') {
				$d = $this->DeviceCheck($row['useragent']);
				if(!empty($d)){
					//dump($d);
					//echoWarning($row['useragent']);
					$agent['agent_is_mobile'] = 1;
					//$agent['agent_is_bot'] = 0;
					//$agent['agent_device'] = $d;
				}
			}

			$agent = str_replace("Other", 'unknown', $agent);
			$agent['hash'] = $row['hash'];

			if($agent['agent_is_bot'] == 1) {
				if(count($agent_ids) >= 200){
					$db->Execute("UPDATE {$profile->tablename} SET crawl = 1 WHERE useragentid IN ( ". implode(",", $agent_ids) ." )");
					$agent_ids = array();
				}
				$agent_ids[] = $row['id'];
			} 	

			foreach($agent as $k => $v) {
				$agent[$k] = $db->Quote($v);
			}			
			$inserts[]= $agent;
			$i++;
			if ($i > 5000 || memory_get_usage() > $this->totmem) {
				$sql= array();
				foreach ($inserts as $record) {
					$sql[]="(".implode(",",$record).")";
				}
				//$sql = substr($sql, 0, -1);
				$sql = implode(",", $sql);
				$db->Execute($sqlstart.$sql.$sqlend);
				//echoWarning("inloop:".$sqlstart.$sql.$sqlend);
				$i=0;
				$inserts=array();
			}						
			
		}

		if(!empty($agent_ids)){
			$db->Execute("UPDATE {$profile->tablename} SET crawl = 1 WHERE useragentid IN ( ". implode(",", $agent_ids) ." )");
		}


		$sql=array();
		foreach ($inserts as $agent) {
			$sql[] ="(".implode(",",$agent).")";
		}
		$sql = implode(",",$sql);
		//$sql = substr($sql, 0, -1);
		
		if(!empty($inserts)){
			$db->Execute($sqlstart.$sql.$sqlend);
			//echoWarning("outloop:".$sqlstart.$sql.$sqlend);
		}
		
		$took = $this->took($start);			
		$this->LogProcess("Detecting $n Browser/OS from useragents took ".$this->sec2time($took));
	}
	
	# this function will return information about log files we already know about,
	# like the lastmodtime, firstlogline and lastlogpos (bytes)
	function getFileHistory($file="", $setting="") {
		global $profile;
		
		$this->filehistory = json_decode($profile->filehistory,true);
		if (empty($this->filehistory)) {
			$this->filehistory = array();
		}
		
		if (empty($file) && empty($setting)) {
			# if no specific file or setting is requested, just return the whole array
			return $this->filehistory;
		}
		
		if (!empty($file) && empty($setting)) {
			# if we only have a file, return all the info for that file
			return $this->filehistory[md5($file)];
		}
		
		if (!empty($file) && !empty($setting)) {
			# if we have both, return the value only
			if (isset($this->filehistory[md5($file)][$setting])) {
				return $this->filehistory[md5($file)][$setting];
			} else {
				return 0;
			}
		}	
	}
	
	function setFileHistory($file, $setting, $val) {
		global $profile;
		
		if (empty($this->filehistory)) {
			$this->filehistory = $this->getFileHistory();
		}
		
		$this->filehistory[md5($file)][$setting] = $val;
		$profile->filehistory = json_encode($this->filehistory);
	}

	function LogScreen($timestamp, $event) {
		global $profile, $db;
		//Log the screen properties (only do this once per visit, and store it daily)
	    $noon = mktime(12,0,0,date("m",$timestamp),date("d",$timestamp),date("Y",$timestamp)); 	    
	    if (!empty($event['sw']) && !empty($event['sh'])) {
	        $screenres=$event['sw']."x".$event['sh'];
	        $q = "INSERT INTO `{$profile->tablename_screenres}` (timestamp,screenres,visits) VALUES ( ? , ? , 1) ON DUPLICATE KEY UPDATE visits=visits+1";
			@$db->Execute($q, array($noon, $screenres));
	    }
	    if (!empty($event['cd'])) {		
	        $q = "INSERT INTO `{$profile->tablename_colordepth}` (timestamp,colordepth,visits) VALUES (? , ? , 1) ON DUPLICATE KEY UPDATE visits=visits+1";
			@$db->Execute($q, array($noon, $event['cd']));
	    }   
	}

	function UpdatePageTitle($url, $title) {
		global $profile, $db;
		//Log the screen properties (only do this once per visit, and store it daily)
		if (!isset($this->title_updated[md5($url.$title)])) {
	    	$db->Execute("update {$profile->tablename_urls} set title=".$db->Quote(substr($title, 0, 250))." where url=".$db->Quote($url)." and title=''");
	    	$this->title_updated[md5($url.$title)] = true;
	    }
	}
}
?>