mirror of
https://github.com/OpenXE-org/OpenXE.git
synced 2025-01-26 20:51:14 +01:00
452 lines
14 KiB
PHP
452 lines
14 KiB
PHP
<?php
|
|
/*
|
|
**** COPYRIGHT & LICENSE NOTICE *** DO NOT REMOVE ****
|
|
*
|
|
* Xentral (c) Xentral ERP Sorftware GmbH, Fuggerstrasse 11, D-86150 Augsburg, * Germany 2019
|
|
*
|
|
* This file is licensed under the Embedded Projects General Public License *Version 3.1.
|
|
*
|
|
* You should have received a copy of this license from your vendor and/or *along with this file; If not, please visit www.wawision.de/Lizenzhinweis
|
|
* to obtain the text of the corresponding license version.
|
|
*
|
|
**** END OF COPYRIGHT & LICENSE NOTICE *** DO NOT REMOVE ****
|
|
*/
|
|
?>
|
|
<?php
|
|
|
|
class StringCleaner
|
|
{
|
|
private $elements;
|
|
private $htmlpuriferconfig;
|
|
private $htmlpurifer;
|
|
private $ruleregexps;
|
|
/** @var Application */
|
|
private $app;
|
|
|
|
/**
|
|
* StringCleaner constructor.
|
|
*
|
|
* @param null|Application $app
|
|
*/
|
|
public function __construct($app = null)
|
|
{
|
|
$this->app = $app;
|
|
if(class_exists('HTMLPurifier_Config')) {
|
|
$this->htmlpuriferconfig = HTMLPurifier_Config::createDefault();
|
|
$this->htmlpuriferconfig->set('Core.Encoding', 'UTF-8');
|
|
$this->htmlpuriferconfig->set('Attr.AllowedFrameTargets', ['_blank']); // Allow hyperlinks with target="_blank"
|
|
//$this->htmlpuriferconfig->set('HTML.AllowedElements', 'h1,h2,h3,h4,h5,h6,p,a,strong,em,ol,ul,li,img,param,div,br,form,label,fieldset,input,textarea,select,option');
|
|
$this->htmlpurifer = new HTMLPurifier($this->htmlpuriferconfig);
|
|
}
|
|
$this->elements = array('nohtml'=> array('ust_befreit','abweichendelieferadresse','bestellungsart','bearbeiter','datum','lieferdatum','name','anrede','partner','packstation_inhaber','packstation_station','packstation_ident','packstation_plz','packstation_ort','partnerid','kennen','ihrebestellnummer'
|
|
,'abteilung','unterabteilung','ansprechpartner','adresszusatz','strasse','land','bundesstaat','plz','ort','versandart','internet','transaktionsnummer','vertrieb','zahlungsweise'
|
|
,'lieferabteilung','lieferunterabteilung','lieferansprechpartner','lieferadresszusatz','lieferstrasse','lieferland','lieferbundesstaat','lieferplz','lieferort'
|
|
,'bank_inhaber','bank_institut','bank_blz','bank_konto'
|
|
,'email','telefon','telefax','ustid','partner','projekt','herstellernummer','ean','nummer','name_de','name_ean'),
|
|
'nojs' => array('anabregstext','anabregstext_en','uebersicht_de','uebersicht_en','kurztext_de','kurztext_en','internebemerkung','internebezeichnung','freitext'));
|
|
|
|
$this->rulechecks = array('digit'=>'/^[0-9]+$/'
|
|
,'alpha'=>'/^[a-zA-Z]+$/'
|
|
,'alphadigit'=>'/^[0-9a-zA-Z]+$/'
|
|
,'username'=>'/^[0-9a-zA-Z\.\-]+$/'
|
|
,'space'=>'/^[\x20]+$/'
|
|
,'module'=>'/^[0-9a-zA-Z\_]$/'
|
|
,'password'=>'/^[^\s\n]{1}[^\n]{5}.*$/'
|
|
,'email'=>'/^[^@\s\x00-\x20]+@[^@\s\x00-\x20\.]+\.[^@\s\x00-\x20\.]+[^@\s\x00-\x20]*$/'
|
|
);
|
|
|
|
$this->ruleregexps = array(
|
|
'digit'=>'/[^0-9]/'
|
|
,'username'=>'/[^0-9a-zA-Z\.\-]/'
|
|
,'alpha'=>'/[^a-zA-Z]/'
|
|
,'alphadigits'=>'/[^0-9a-zA-Z]/'
|
|
,'module'=>'/[^0-9a-zA-Z\_]/'
|
|
,'moduleminus'=>'/[^0-9a-zA-Z\_\-]/'
|
|
,'alphadigitsspecial'=>'/[^0-9a-zA-Z\_\.\(\)]/'
|
|
,'base64'=>'/[^0-9a-zA-Z\=\+\-\_\/]/'
|
|
);
|
|
}
|
|
|
|
function SyntaxByElement($key, $default = '')
|
|
{
|
|
foreach($this->elements as $type => $arr) {
|
|
if(in_array($key, $arr)) {
|
|
return $type;
|
|
}
|
|
}
|
|
return $default;
|
|
}
|
|
|
|
function CleanSQLReturn($value, $columnname, $default = '')
|
|
{
|
|
if($value == '' || is_numeric($value))
|
|
{
|
|
return $value;
|
|
}
|
|
if(in_array($columnname, array('nummer','name','kundennummer','bezeichnung','bezeichnunglieferant','lieferantennummer','mitarbeiternummer','name_de','name_en',
|
|
'kurzbezeichnung','abkuerzung',
|
|
'strasse','plz','ort','land','ansprechpartner','abteilung','unterabteilung',
|
|
'liefername','lieferstrasse','lieferplz','lieferort','lieferland','lieferansprechpartner','lieferabteilung','lieferunterabteilung'))){
|
|
return strip_tags($value);
|
|
}
|
|
if($default == 'xss_clean')
|
|
{
|
|
return $this->xss_clean($value, false);
|
|
}
|
|
if($this->htmlpurifer)
|
|
{
|
|
return $this->htmlpurifer->purify($value);
|
|
}
|
|
return $value;
|
|
}
|
|
|
|
function RuleCheck($string, $rule = null, &$found = false)
|
|
{
|
|
if(isset($this->rulechecks[$rule]))
|
|
{
|
|
$found = true;
|
|
return preg_match_all($this->rulechecks[$rule], $string, $dummy);
|
|
}
|
|
switch($rule)
|
|
{
|
|
case 'datum':
|
|
$found = true;
|
|
|
|
if(preg_match_all('/([0-9]+)\.([0-9]+)\.$/', $string, $matches))
|
|
{
|
|
$string = $matches[1][0].'.'.$matches[2][0].'.'.date('Y');
|
|
}
|
|
|
|
try {
|
|
if($x = new DateTime($string)) {
|
|
return $x->format('Y') > 0;
|
|
}
|
|
}
|
|
catch (Exception $e) {
|
|
return false;
|
|
}
|
|
return false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
function CheckSQLHtml($sql)
|
|
{
|
|
$start = 0;
|
|
$len = strlen($sql);
|
|
$lvl = 0;
|
|
$col = 0;
|
|
$ret = array(0);
|
|
$instring = false;
|
|
for($i = $start; $i < $len; $i++)
|
|
{
|
|
$char = $sql[$i];
|
|
switch($char)
|
|
{
|
|
case "'":
|
|
if($instring)
|
|
{
|
|
if($sql[$i-1] != '\\')
|
|
{
|
|
$instring = false;
|
|
}
|
|
}else{
|
|
if($sql[$i-1] != '\\'){
|
|
$instring = true;
|
|
}
|
|
}
|
|
break;
|
|
case "(":
|
|
if($instring)
|
|
{
|
|
|
|
}else{
|
|
$lvl++;
|
|
}
|
|
break;
|
|
case ")":
|
|
if($instring)
|
|
{
|
|
|
|
}else{
|
|
$lvl--;
|
|
}
|
|
break;
|
|
case "<":
|
|
if($instring)
|
|
{
|
|
if(preg_match('/<[a-zA-Z]/',$char.$sql[$i+1]))
|
|
{
|
|
if($ret[$col] != 2)
|
|
{
|
|
$ret[$col] = 1;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case ',':
|
|
if($instring)
|
|
{
|
|
|
|
}else{
|
|
if($lvl == 0)
|
|
{
|
|
$col++;
|
|
$ret[$col] = 0;
|
|
}
|
|
}
|
|
break;
|
|
case 'o':
|
|
case 'O':
|
|
if($instring)
|
|
{
|
|
if($i < $len -4)
|
|
{
|
|
if(strtolower(substr($sql, $i, 2)) == 'on')
|
|
{
|
|
if(preg_match('/^on[a-z]+(\s*)=/', substr($sql, $i)))
|
|
{
|
|
$ret[$col] = 2;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case 'F':
|
|
case 'f':
|
|
if($instring)
|
|
{
|
|
|
|
}else{
|
|
if($lvl == 0)
|
|
{
|
|
if($i < $len - 4)
|
|
{
|
|
if(strtolower(substr($sql, $i, 4)) == 'from')
|
|
{
|
|
break 2;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
$where = strripos($sql, 'where');
|
|
$restsql = substr($sql, $i, $where - $i);
|
|
if(preg_match('/<[a-zA-Z]/', $restsql))
|
|
{
|
|
if(preg_match('/on[a-z]+(\s*)=/',$restsql))
|
|
{
|
|
if($ret)
|
|
{
|
|
foreach($ret as $k => $v)
|
|
{
|
|
$ret[$k] = 2;
|
|
}
|
|
}
|
|
}else{
|
|
if($ret)
|
|
{
|
|
foreach($ret as $k => $v)
|
|
{
|
|
if($v != 2)
|
|
{
|
|
$ret[$k] = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
public function CleanString($string, $rule = null, &$found = false)
|
|
{
|
|
if(is_null($rule))
|
|
{
|
|
$rule = 'nothml';
|
|
}
|
|
switch($rule)
|
|
{
|
|
case 'email':
|
|
if($this->RuleCheck($string, $rule))
|
|
{
|
|
return $string;
|
|
}
|
|
return '';
|
|
break;
|
|
case 'nohtml':
|
|
$found = true;
|
|
if($string == '' || is_numeric($string))
|
|
{
|
|
return $string;
|
|
}
|
|
if(strpos($string,'<') === false)
|
|
{
|
|
return $string;
|
|
}
|
|
return strip_tags($string);
|
|
break;
|
|
case 'datum':
|
|
$found = true;
|
|
$string_ = $string;
|
|
if(preg_match_all('/([0-9]+)\.([0-9]+)\.$/', $string, $matches))
|
|
{
|
|
$string_ = $matches[1][0].'.'.$matches[2][0].'.'.date('Y');
|
|
}
|
|
try
|
|
{
|
|
if($x = new DateTime($string_))
|
|
{
|
|
if($x->format('Y') <= 0)
|
|
{
|
|
return '';
|
|
}
|
|
return $string;
|
|
}
|
|
} catch (Exception $e) {
|
|
return '';
|
|
}
|
|
return '';
|
|
break;
|
|
case 'xss_clean':
|
|
$found = true;
|
|
if($string == '' || is_numeric($string))
|
|
{
|
|
return $string;
|
|
}
|
|
if(strpos($string,'<') === false){
|
|
return $string;
|
|
}
|
|
return $this->xss_clean($string, false);
|
|
break;
|
|
case 'nojs':
|
|
$found = true;
|
|
if($string == '' || is_numeric($string))return $string;
|
|
if(strpos($string,'<') === false)return $string;
|
|
if($this->htmlpurifer)
|
|
{
|
|
return $this->htmlpurifer->purify($string);
|
|
}
|
|
return $this->xss_clean($string);
|
|
break;
|
|
case 'id':
|
|
$found = true;
|
|
if((String)$string === '')
|
|
{
|
|
return $string;
|
|
}
|
|
return (int)$string;
|
|
break;
|
|
case 'doppelid':
|
|
$found = true;
|
|
if((String)$string === '')
|
|
{
|
|
return $string;
|
|
}
|
|
$stringa = explode('-', $string, 2);
|
|
if(count($stringa) == 1)return (int)$stringa[0];
|
|
return ($stringa[0]===''?'':(int)$stringa[0]).'-'.(int)$stringa[1];
|
|
break;
|
|
case 'module':
|
|
$found = true;
|
|
return preg_replace ($this->ruleregexps[$rule], '' , $string);
|
|
break;
|
|
default:
|
|
if(isset($this->ruleregexps[$rule]))
|
|
{
|
|
$found = true;
|
|
return preg_replace ($this->ruleregexps[$rule], '' , $string);
|
|
}
|
|
break;
|
|
}
|
|
return $string;
|
|
}
|
|
|
|
public function xss_clean($data, $usepurify = true)
|
|
{
|
|
if($usepurify && !empty($this->htmlpurifer))
|
|
{
|
|
return $this->htmlpurifer->purify($data);
|
|
}
|
|
// Fix &entity\n;
|
|
$data = str_replace(array('&','<','>'), array('&amp;','&lt;','&gt;'), $data);
|
|
$data = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $data);
|
|
$data = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $data);
|
|
$data = html_entity_decode($data, ENT_COMPAT, 'UTF-8');
|
|
return $data;
|
|
// Remove any attribute starting with "on" or xmlns
|
|
$data = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+>#iu', '$1>', $data);
|
|
|
|
// Remove javascript: and vbscript: protocols
|
|
$data = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $data);
|
|
$data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $data);
|
|
$data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $data);
|
|
|
|
// Only works in IE: <span style="width: expression(alert('Ping!'));"></span>
|
|
$data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
|
|
$data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
|
|
$data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $data);
|
|
|
|
// Remove namespaced elements (we do not need them)
|
|
$data = preg_replace('#</*\w+:\w[^>]*+>#i', '', $data);
|
|
|
|
do
|
|
{
|
|
// Remove really unwanted tags
|
|
$old_data = $data;
|
|
$data = preg_replace('#</*(?:applet|b(?:ase|gsound|link)|embed|frame(?:set)?|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#i', '', $data);
|
|
}
|
|
while ($old_data !== $data);
|
|
|
|
return $data;
|
|
}
|
|
|
|
function XMLArray_clean(&$xml, $lvl = 0)
|
|
{
|
|
if(is_string($xml))
|
|
{
|
|
|
|
}elseif(is_array($xml))
|
|
{
|
|
if(count($xml) > 0)
|
|
{
|
|
foreach($xml as $k => $v)
|
|
{
|
|
if(is_string($v))
|
|
{
|
|
$xml[$k] = $this->CleanString($v, $this->SyntaxByElement($k,'nojs'));
|
|
}
|
|
if($lvl < 10)
|
|
{
|
|
$this->XMLArray_clean($v, $lvl + 1);
|
|
}
|
|
}
|
|
}
|
|
}elseif(is_object($xml))
|
|
{
|
|
if(count($xml) > 0)
|
|
{
|
|
foreach($xml as $k => $v)
|
|
{
|
|
if(count($v) > 0)
|
|
{
|
|
if($lvl < 10)
|
|
{
|
|
$this->XMLArray_clean($v, $lvl + 1);
|
|
}
|
|
}elseif((String)$v != '')
|
|
{
|
|
if(isset($xml->$k))
|
|
{
|
|
//$xml->$k = $this->CleanString($v, $this->SyntaxByElement($k,'nojs'));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $xml;
|
|
}
|
|
}
|