OpenXE/classes/Modules/HocrParser/Finder/PatternMatcher.php
2021-05-21 08:49:41 +02:00

124 lines
3.0 KiB
PHP

<?php
namespace Xentral\Modules\HocrParser\Finder;
use Xentral\Modules\HocrParser\Exception\InvalidArgumentException;
class PatternMatcher
{
public const PATTERN_DOCUMENT_NUMBER = 'documentnumber';
public const PATTERN_MONEY = 'money';
public const PATTERN_DATE = 'date';
public const PATTERN_DEFAULT = 'default';
/** @var array $validPatterns */
private static $validPatterns = [
self::PATTERN_DOCUMENT_NUMBER,
self::PATTERN_MONEY,
self::PATTERN_DATE,
self::PATTERN_DEFAULT,
];
/** @var string $pattern */
private $pattern;
/**
* @param string $pattern
*/
public function __construct($pattern = self::PATTERN_DEFAULT)
{
if (!in_array($pattern, self::$validPatterns, true)) {
throw new InvalidArgumentException(sprintf('Pattern "%s" is not allowed.', $pattern));
}
$this->pattern = $pattern;
}
/**
* @param string $value
*
* @return bool
*/
public function Match($value)
{
$value = trim((string)$value);
if (empty($value)) {
return false;
}
switch ($this->pattern) {
case self::PATTERN_DATE:
return $this->IsDateLikeValue($value);
break;
case self::PATTERN_MONEY:
return $this->IsMoneyLikeValue($value);
break;
case self::PATTERN_DOCUMENT_NUMBER:
return $this->IsDocumentNumberLikeValue($value);
break;
case self::PATTERN_DEFAULT:
return $this->IsCandidateValue($value);
break;
}
return false;
}
/**
* @param string $value
*
* @return bool
*/
private function IsDateLikeValue($value)
{
return (bool)preg_match('/\d{1,2}\.\d{1,2}\.\d{2,4}/', $value);
}
/**
* @param string $value
*
* @return bool
*/
private function IsMoneyLikeValue($value)
{
// Mit Tausendertrenner: z.B.: 11.111,11 oder 11,111.11
$withThousands = (bool)preg_match('/\d+[\.,]\d{3}[\.,]{1}\d{2}$/', $value);
if ($withThousands) {
return true;
}
// Ohne Tausendertrenner: z.B.: 1111111,11 oder 1111111.11
return (bool)preg_match('/^\d+[\.,]{1}\d{2}$/', $value);
}
/**
* @param $value
*
* @return bool
*/
private function IsDocumentNumberLikeValue($value)
{
// Nur Grossbuchstaben, Zahlen, Minus und Unterstrich sind erlaubt
$containsInvalidChars = (bool)preg_match('/[^A-Z0-9\-_]+/', $value);
if ($containsInvalidChars) {
return false;
}
return (bool)preg_match('/\d{4,}/', $value);
}
/**
* @param string $value
*
* @return bool
*/
private function IsCandidateValue($value)
{
return $this->IsDateLikeValue($value)
|| $this->IsDocumentNumberLikeValue($value)
|| $this->IsMoneyLikeValue($value);
}
}