<?php
class SbvrLexer {

	 protected $_other = array(
        "/^(\s+)/" => "WHITESPACE",
		"/^(\w+\-*\w*)/" => "UNMATCHED",
    );

	//Lista niezmiennych tokenów, np. spacje, słowa kluczowe SBVR
    protected $_terminals = array(
		"NEC" => array('it is obligatory that', "it is prohibited that", 'it is necessary that',
						"it is impossible that", "it is possible that", "it is permitted that"),
		"LOG_OP" => array("it is not the case", "and", "or", "but", "not both", "if", "then", 
						  "if and only if", "neither", "nor", "whether or not", "only if", "not"),
		"QUANT" => array("each", "some", "at least", "at most", "exactly", "at least", 
						"and at most", "more than"),
		"NUMB" => array("one", "two", "three", "four", "five", "six", "seven", "eight", "nine"),
		"DIGIT" => array("[0-9]+"),
		"KEYWORD" => array("the", "a", "an", "another", "a given", "who", "what"),
		"THAT" => array("that")
    );
	protected $_symb = array(",", "\.", "'", "\"");

	public $terms = array();
	public $facts = array();
	public $rules = array();

	//Metoda wczytuje słownictwo z pliku i tworzy tabelkę
	public function prepareTerms($namespace){
		//wczytanie termów ze strony
		$pageId=$namespace.':terms';
		if(page_exists($pageId)===false){
			return "Page: <a href=".wl($pageId).">".$pageId."</a> not created yet";
		}
		$str = file_get_contents(wikiFN($pageId));
		$str = str_replace("<sbvr>","",$str);
		$str = str_replace("</sbvr>","",$str);
		$temp = explode( "- ", $str );
		
		$vocab = array();		
		$category="";
		foreach($temp as $key => $text){
			$text=rtrim($text);
			$text=ltrim($text);

			if(stripos($text, "[category]")!==false){
				$category=str_replace("[category]", "", $text);
				unset($temp[$key]);
			}
			elseif(stripos($text, "[comment]")!==false || stripos($text, "[silent_comment]")!==false){
				unset($temp[$key]);
			}
			elseif($text!=""){
				$vocab=explode("*", $text);
				$term = array();			
				foreach($vocab as $index => $termPart){
					$termPart=rtrim($termPart);
					$termPart=ltrim($termPart);
					if($termPart==""){
						unset($vocab[$index]);
						break;
					}
				
					$opt=explode(":", $termPart);
					if(count($opt)==2){
						$term['attributes'][]=array($opt[0],$opt[1]);
					}
					elseif(count($opt)==1){
						if($opt[0]!=""){
							$term['name']=$opt[0];
							$term['category']=$category;
						}
						else {
							break;
						}
					}
				}
				$temp[$key]=$term;
			}
			else {
				unset($temp[$key]);
			}
		}
		$this->terms = array_values($temp);
		return true;
	}

	//Metoda wczytuje fakty z pliku i tworzy z nich tabelę
	public function prepareFacts($namespace){
		//wczytanie faktów ze strony
		$pageId=$namespace.':facts';
		if(page_exists($pageId)===false)
			return "Page: <a href=".wl($pageId).">".$pageId."</a> not created yet";
		$str = file_get_contents(wikiFN($pageId));
		$str = str_replace("<sbvr>","",$str);
		$str = str_replace("</sbvr>","",$str);

		$temp = explode( "- ", $str );
		

		foreach($temp as $key => $fact){
			$fact=rtrim($fact);
			$fact=ltrim($fact);
			if($fact == "" || stripos($fact, "[comment]")!==false || stripos($fact, "[silent_comment]")!==false){
				unset($temp[$key]);
			}
		}
		//fakty podzielone na tokeny
		$this->facts = $this->tokenFacts($temp);
		return true;
	}

	//Metoda tworzy tabelę: 1)fakt, 2) term_1, 3) term_2,
	//gdzie fakt to wyrażenie czasownikowe, term 1 to lewa strona faktu,
	//a term 2 to jego prawa strona
	public function tokenFacts($temp){
		$preparedFacts = array();		
		$category="";
		foreach($temp as $key => $fact){
			if(stripos($fact, "[category]")!==false){
				$category=str_replace("[category]", "", $fact);
				unset($temp[$key]);
				continue;
			}
			$offset = 0;
			$tokens = array();
			while($offset < strlen($fact)) {
		        $result = $this->_match($fact, $key, $offset);
		        if($result === false) {
		           unset($temp[$key]);
				   break;
		        }
		        $tokens[] = $result;
		        $offset += strlen($result['match']);
		    }
			$term1="";
			$term2="";
			$verb="";
			foreach($tokens as $key => $tok){
				$curr_token = $tok['token'];
				$curr_match = $tok['match'];
				if($curr_token=="TERM") {
					if($term1==""){
						$term1=$curr_match;
					}
					else{
						if($key==max(array_keys( $tokens ))-1)
							{ 
							$term2=$curr_match;}
						else
							{$verb.=$curr_match.' ';}
					}
				}
				elseif($curr_token!="WHITESPACE"){
					$verb.=$curr_match.' ';
				}
				
			}
			$verb=rtrim($verb);	
			$verb=ltrim($verb);	
			$preparedFacts[] = array('verb' => $verb, 'term1' => $term1, 'term2' => $term2, 'category' => $category);
			if($verb=="has" || $verb=="have"){
				$preparedFacts[] = array('verb' => "is of", 'term1' => $term2, 'term2' => $term1, 'category' => $category);
				$preparedFacts[] = array('verb' => "of", 'term1' => $term2, 'term2' => $term1, 'category' => $category);
			}
		}
		return $preparedFacts;
	}

	public function prepareRules($dir){
		//wczytanie reguł ze strony
		$pageId=$dir.':rules';
		if(page_exists($pageId)===false){
			return "Page: <a href=".wl($pageId).">".$pageId."</a> not created yet";
		}
		
		$str = file_get_contents(wikiFN($pageId));
		$str = str_replace("<sbvr>","",$str);
		$str = str_replace("</sbvr>","",$str);
		
		$rules = explode( "- ", $str );
		
		foreach($rules as $key => $rule){
			$rule=rtrim($rule);
			$rule=ltrim($rule);
			if($rule == ""){
				unset($rules[$key]);
			}
		}
		$tokenedRules = array();
		$category="";
		foreach($rules as $number => $line) { 
			if(stripos($line, "[category]")!==false){
				$category=str_replace("[category]","",$line);
			}
			elseif(stripos($line, "[comment]")!==false || stripos($line, "[silent_comment]")!==false){
				unset($rules[$number]);
			}
			else {
				$tokens = $this->tokenSource($number, $line, "");
				if($tokens[0]===false){
					unset($rules[$number]);
				}
				else{
					$tokenedRules[]=array('rule' => $line, 'category' => $category, 'tokens' => $tokens[1]);
				}
			}
		}
		$this->rules = $tokenedRules;
	}

	protected function validateSemantic($tokens) {
		$errorMsg="";
		foreach($tokens as $key => $tok){
			$curr_token = $tok['token'];

			if( ($key==0 && $curr_token!="NEC")
				|| ($curr_token=="QUANT" && ($tokens[$key-1]['token']!="NEC" && $tokens[$key-1]['token']!="LOG_OP"
										 && $tokens[$key-1]['token']!="FACT"))
				|| ($curr_token=="KEYWORD" && ($tokens[$key-1]['token']!="NEC" && $tokens[$key-1]['token']!="FACT"
												&& $tokens[$key-1]['token']!="THAT" && $tokens[$key-1]['token']!="LOG_OP"))
				|| ($curr_token=="TERM" && ($tokens[$key-1]['token']!="QUANT" && $tokens[$key-1]['token']!="KEYWORD"
											&& $tokens[$key-1]['token']!="NUMB" && $tokens[$key-1]['token']!="DIGIT"))
				|| ($curr_token=="FACT" && ($tokens[$key-1]['token']!="SYMB" && $tokens[$key-1]['token']!="TERM"
										&& $tokens[$key-1]['token']!="THAT" && $tokens[$key-1]['token']!="FACT"))
				|| (($curr_token=="NUMB" || $curr_token=="DIGIT") && $tokens[$key-1]['token']!="QUANT")
				|| ($curr_token=="THAT" && $tokens[$key-1]['token']!="TERM")
			){
				$errorMsg="Incorrect syntax";
				return array(false, $errorMsg, $tokens[$key-1]['match'].' '.$tok['match']);
			}
			if ($curr_token=="UNMATCHED"){
				$errorMsg="Undefined term or fact";
				return array(false, $errorMsg, $tok['match']);
			}
		}
	} 

	//Metoda przypisowuje tokeny do wszystkich elementów zadanego tekstu
	//zadany tekst to zawsze pojedyncza reguła
	public function tokenSource($number, $line, $catGraph){
		$graph = "";
		$tokens = array();       
	    $offset = 0;
		$line;
	    while($offset < strlen($line)) {
	        $result = $this->_match($line, $number, $offset);
	        if($result === false) {
	           return array($result, "Illegal symbols");
	        }
			if($result['token']!='WHITESPACE')
		        $tokens[] = $result;
	        $offset += strlen($result['match']);
	    }
		
		$verb = "";
		$term1 = "";
		$term2 = "";
		$memory = "";
		$factTokens = array();

		//Dopasowanie faktów do nierozpoznanych fragmentów
		foreach($tokens as $key => $tok){
			
			$curr_token = $tok['token'];
			$curr_match = $tok['match'];
			if($curr_token=="NEC" || $curr_token=="LOG_OP" || $curr_token=="QUANT" || $curr_token=="NUMB" || $curr_token=="DIGIT"
				|| $curr_token=="KEYWORD" || $curr_token=="SYMB") {
				;
			}
			elseif($curr_token=="THAT"){
				$memory=$tokens[$key-1]['match'];
			}
			elseif($curr_token=="TERM") {
				if($term1=="" && $verb==""){
					$term1=$curr_match;
				}
				elseif($term1=="" && $verb!=""){
					$term1=$memory;
					$memory="";
					$term2=$curr_match;
					$verb=rtrim($verb);
					$verb=ltrim($verb);
					
					$res = $this->_matchFact($verb,$term1,$term2);
					if($res===true){
						$graph.=$this->appendClassToGraph($term1, $term2, $verb, $graph, $catGraph);
						$factTokens[$factParts] = $verb;
						//wyczyszczenie tymczasowych zmiennych po udanym dopasowaniu
						$term1="";
						$term2="";
						$verb="";
					}
				}
				else{
					$term2=$curr_match;
					$verb=rtrim($verb);
					$verb=ltrim($verb);
					
					$res = $this->_matchFact($verb,$term1,$term2);
					if($res===true){
						$graph.=$this->appendClassToGraph($term1, $term2, $verb, $graph, $catGraph);
						$factTokens[$factParts] = $verb;
						$term1="";
						$term2="";
						$verb="";
					}
				}
			}
			elseif($curr_token=="UNMATCHED"){
				$verb.=$curr_match;
				$factParts = $key;
				if($term1!="" && $this->_matchUnaryFact($verb,$term1)===true){
					$graph.=$this->appendClassToGraph($term1, $term1, $verb, $graph, $catGraph);
					$factTokens[$factParts] = $verb;
					$term1="";
					$term2="";
					$verb="";
				}
				elseif($memory!="" && $this->_matchUnaryFact($verb,$memory)===true){
					$graph.=$this->appendClassToGraph($memory, $memory, $verb, $graph, $catGraph);
					$factTokens[$factParts] = $verb;
					$term1="";
					$term2="";
					$memory="";
					$verb="";
				}
				elseif($memory!="" && $this->_matchFact($verb,$term1,$memory)===true){
					$graph.=$this->appendClassToGraph($term1, $memory, $verb, $graph, $catGraph);
					$factTokens[$factParts] = $verb;
					$term1="";
					$term2="";
					$memory="";
					$verb="";
				}
				elseif($verb=="of"){
					$memory=$tokens[$key-1]['match'];
				}
				else {
					$verb.=' ';
				}
			}
		}

		//iteracja od końca do początku reguły
		end($factTokens);
		while($factParts=current($factTokens))
		{
			$key = key($factTokens);
			//echo "<br>replacing: ".$tokens[$key]['match']." with: ".$factParts."<br>";
			//zastąpienie wybranych fragmentów tokenem z faktem
			
			array_splice($tokens, $key, 1, array(
				array(  'match' => $factParts,
						'token' => 'FACT')
				)
			);
			prev($factTokens);
		}
		
		//SPRAWDZANIE SKŁADNI REGUŁ
		if(noNS(getID($param='id',$clean=true))!="terms" && noNS(getID($param='id',$clean=true))!="facts"){
			$valid = $this->validateSemantic($tokens);
			if($valid[0]===false){
				$graph="";
				return $valid;
			}
		}

		return array(true, $tokens, $graph);
	}
	
	protected function appendClassToGraph($term1, $term2, $verb, $graph, $catGraph){
		$str="";
		$term1 = str_replace(' ',"_",$term1);
		$term1 = str_replace('-',"_",$term1);
		$term2 = str_replace(' ',"_",$term2);
		$term2 = str_replace('-',"_",$term2);
		if($verb=="has" || $verb=="have"){
			$line="class ".$term1."{ \n -".$term2." \n } \n";
			if(stripos($graph,$line)===false && stripos($catGraph,$line)===false)
				$str.=$line;
		}
		else {
			$line="class ".$term1."{ \n } \n";
			if(stripos($graph,$line)===false && stripos($catGraph,$line)===false)
				$str.=$line;
			$line="class ".$term2."{ \n } \n";
			if(stripos($graph,$line)===false && stripos($catGraph,$line)===false)
				$str.=$line;
			$line=$term1." --> ".$term2." : ".$verb."\n";
			if(stripos($graph,$line)===false && stripos($catGraph,$line)===false)
				$str.=$line;
		}
		return $str;
	}	

	//Metoda dopasowuje jednoargumentowe fakty do podanych parametrów
	protected function _matchUnaryFact($verb, $term1){
		foreach($this->facts as $key => $dicFact){
			if($dicFact['term2']=="" && $term1==$dicFact['term1'] && $verb==$dicFact['verb']){
				return true;
			}
		}
		return false;
	}

	//Metoda dopasowuje dwuargumentowe fakty do podanych parametrów
	protected function _matchFact($verb, $term1, $term2){
		foreach($this->facts as $key => $dicFact){
			if($term1==$dicFact['term1'] && $verb==$dicFact['verb'] && $term2==$dicFact['term2']){
				return true;
			}
		}
		return false;
	}

	//Metoda dopasowuje któreś z zadanych wyrażeń regularnych do kolejnego fragmentu tekstu
	protected function _match($line, $number, $offset) {
		$string = substr($line, $offset);

		$term = array(
		        'match' => '',
		        'token' => 'TERM'
		    );
		//Dopasowanie słownictwa
		foreach($this->terms as $pattern) {
		    if(preg_match("/^(".$pattern['name'].")\b/i", $string, $matches)) {
		        if(strlen($matches[1])>strlen($term['match'])){
				     $term = array(
				        'match' => $matches[0],
				        'token' => 'TERM'
				    );
				}
		    }
		}
		if($term['match']!=''){
			return $term;
		}

			$fact = array(
		        'match' => '',
		        'token' => 'UNMATCHED'
		    );
		//Dopasowanie faktów
		foreach($this->facts as $pattern) {
			
		    if(preg_match("/^(".$pattern['verb'].")\b/i", $string, $matches)) {
				if(strlen($matches[1])>strlen($fact['match'])){
				     $fact = array(
				        'match' => $matches[0],
				        'token' => 'UNMATCHED'
				    );
				}
		    }
		}
		if($fact['match']!=''){
			return $fact;
		}

		//Dopasowanie symboli
		foreach($this->_symb as $pattern) {
		    if(preg_match("/^(".$pattern.")/i", $string, $matches)) {
		        return array(
		            'match' => $matches[1],
		            'token' => 'SYMB'
		        );
		    }
		}

		//Słowa kluczowe, kwantyfikatory itd.
		foreach($this->_terminals as $name => $group) {
			foreach($group as $pattern){
				if(preg_match("/^(".$pattern.")\b/i", $string, $matches)) {
				    return array(
				        'match' => $matches[1],
				        'token' => $name
				    );
				}
			}
		}
		//Spacje i wyrażenia nierozpoznane
		foreach($this->_other as $pattern => $name) {
		    if(preg_match($pattern, $string, $matches)) {
		        return array(
		            'match' => $matches[1],
		            'token' => $name
		        );
		    }
    	}

		return false;
	}
	
}
