mirror of
https://github.com/OpenXE-org/OpenXE.git
synced 2025-01-01 09:30:28 +01:00
124 lines
3.0 KiB
PHP
124 lines
3.0 KiB
PHP
<?php
|
|
|
|
namespace Xentral\Modules\HocrParser\Finder;
|
|
|
|
use Xentral\Modules\HocrParser\Exception\InvalidArgumentException;
|
|
|
|
class PatternMatcher
|
|
{
|
|
public const PATTERN_DOCUMENT_NUMBER = 'documentnumber';
|
|
public const PATTERN_MONEY = 'money';
|
|
public const PATTERN_DATE = 'date';
|
|
public const PATTERN_DEFAULT = 'default';
|
|
|
|
/** @var array $validPatterns */
|
|
private static $validPatterns = [
|
|
self::PATTERN_DOCUMENT_NUMBER,
|
|
self::PATTERN_MONEY,
|
|
self::PATTERN_DATE,
|
|
self::PATTERN_DEFAULT,
|
|
];
|
|
|
|
/** @var string $pattern */
|
|
private $pattern;
|
|
|
|
/**
|
|
* @param string $pattern
|
|
*/
|
|
public function __construct($pattern = self::PATTERN_DEFAULT)
|
|
{
|
|
if (!in_array($pattern, self::$validPatterns, true)) {
|
|
throw new InvalidArgumentException(sprintf('Pattern "%s" is not allowed.', $pattern));
|
|
}
|
|
|
|
$this->pattern = $pattern;
|
|
}
|
|
|
|
/**
|
|
* @param string $value
|
|
*
|
|
* @return bool
|
|
*/
|
|
public function Match($value)
|
|
{
|
|
$value = trim((string)$value);
|
|
|
|
if (empty($value)) {
|
|
return false;
|
|
}
|
|
|
|
switch ($this->pattern) {
|
|
case self::PATTERN_DATE:
|
|
return $this->IsDateLikeValue($value);
|
|
break;
|
|
case self::PATTERN_MONEY:
|
|
return $this->IsMoneyLikeValue($value);
|
|
break;
|
|
case self::PATTERN_DOCUMENT_NUMBER:
|
|
return $this->IsDocumentNumberLikeValue($value);
|
|
break;
|
|
case self::PATTERN_DEFAULT:
|
|
return $this->IsCandidateValue($value);
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* @param string $value
|
|
*
|
|
* @return bool
|
|
*/
|
|
private function IsDateLikeValue($value)
|
|
{
|
|
return (bool)preg_match('/\d{1,2}\.\d{1,2}\.\d{2,4}/', $value);
|
|
}
|
|
|
|
/**
|
|
* @param string $value
|
|
*
|
|
* @return bool
|
|
*/
|
|
private function IsMoneyLikeValue($value)
|
|
{
|
|
// Mit Tausendertrenner: z.B.: 11.111,11 oder 11,111.11
|
|
$withThousands = (bool)preg_match('/\d+[\.,]\d{3}[\.,]{1}\d{2}$/', $value);
|
|
if ($withThousands) {
|
|
return true;
|
|
}
|
|
|
|
// Ohne Tausendertrenner: z.B.: 1111111,11 oder 1111111.11
|
|
return (bool)preg_match('/^\d+[\.,]{1}\d{2}$/', $value);
|
|
}
|
|
|
|
/**
|
|
* @param $value
|
|
*
|
|
* @return bool
|
|
*/
|
|
private function IsDocumentNumberLikeValue($value)
|
|
{
|
|
// Nur Grossbuchstaben, Zahlen, Minus und Unterstrich sind erlaubt
|
|
$containsInvalidChars = (bool)preg_match('/[^A-Z0-9\-_]+/', $value);
|
|
if ($containsInvalidChars) {
|
|
return false;
|
|
}
|
|
|
|
return (bool)preg_match('/\d{4,}/', $value);
|
|
}
|
|
|
|
/**
|
|
* @param string $value
|
|
*
|
|
* @return bool
|
|
*/
|
|
private function IsCandidateValue($value)
|
|
{
|
|
return $this->IsDateLikeValue($value)
|
|
|| $this->IsDocumentNumberLikeValue($value)
|
|
|| $this->IsMoneyLikeValue($value);
|
|
}
|
|
}
|
|
|