mirror of https://github.com/ghostfolio/ghostfolio
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
207 lines
4.7 KiB
207 lines
4.7 KiB
<?php declare(strict_types=1);
|
|
|
|
/**
|
|
* @package s9e\RegexpBuilder
|
|
* @copyright Copyright (c) The s9e authors
|
|
* @license https://opensource.org/licenses/mit-license.php The MIT License
|
|
*/
|
|
namespace s9e\RegexpBuilder;
|
|
|
|
use ValueError;
|
|
use const false, true;
|
|
use function array_search, count, ord, preg_last_error_msg, preg_match;
|
|
|
|
class Meta
|
|
{
|
|
/**
|
|
* @const Bit value that indicates whether a meta sequence represents a single character usable
|
|
* in a character class
|
|
*/
|
|
final protected const IS_CHAR = 1;
|
|
|
|
/**
|
|
* @const Bit value that indicates whether a meta sequence represents a quantifiable expression
|
|
*/
|
|
final protected const IS_QUANTIFIABLE = 2;
|
|
|
|
/**
|
|
* @var array<int|string, int> Map of meta sequences and their numeric values
|
|
*/
|
|
protected array $inputMap = [];
|
|
|
|
/**
|
|
* @var array<int, string> Map of meta values and the expressions they represent
|
|
*/
|
|
protected array $outputMap = [];
|
|
|
|
/**
|
|
* @param iterable $map Map of sequences and the expressions they represent
|
|
*/
|
|
public function __construct(iterable $map = [])
|
|
{
|
|
foreach ($map as $sequence => $expression)
|
|
{
|
|
$this->set((string) $sequence, $expression);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Return the expression that matches given value
|
|
*
|
|
* @param int $value
|
|
* @return string
|
|
*/
|
|
public function getExpression(int $value): string
|
|
{
|
|
return $this->outputMap[$value];
|
|
}
|
|
|
|
/**
|
|
* @return array<int|string, int>
|
|
*/
|
|
public function getInputMap(): array
|
|
{
|
|
return $this->inputMap;
|
|
}
|
|
|
|
/**
|
|
* Return whether a given value represents a single character usable in a character class
|
|
*
|
|
* @param int $value
|
|
* @return bool
|
|
*/
|
|
public static function isChar(int $value): bool
|
|
{
|
|
return ($value >= 0 || ($value & self::IS_CHAR));
|
|
}
|
|
|
|
/**
|
|
* Return whether a given value represents a quantifiable expression
|
|
*
|
|
* @param int $value
|
|
* @return bool
|
|
*/
|
|
public static function isQuantifiable(int $value): bool
|
|
{
|
|
return ($value >= 0 || ($value & self::IS_QUANTIFIABLE));
|
|
}
|
|
|
|
/**
|
|
* Set a meta sequence
|
|
*
|
|
* @param string $sequence String used in the input
|
|
* @param string $expression Regular expression used in the output
|
|
* @return void
|
|
*/
|
|
public function set(string $sequence, string $expression): void
|
|
{
|
|
if (@preg_match('(' . $expression . ')u', '') === false)
|
|
{
|
|
throw new ValueError("Invalid expression '" . $expression . "' (" . preg_last_error_msg() . ')');
|
|
}
|
|
|
|
// Map to the same value if possible, create a new one otherwise
|
|
$value = array_search($expression, $this->outputMap, true);
|
|
if ($value === false)
|
|
{
|
|
$value = $this->computeValue($expression);
|
|
}
|
|
|
|
$this->inputMap[$sequence] = $value;
|
|
$this->outputMap[$value] = $expression;
|
|
}
|
|
|
|
/**
|
|
* Compute and return a value for given expression
|
|
*
|
|
* Values are meant to be a unique negative integer. The least significant bits are used to
|
|
* store the expression's properties
|
|
*
|
|
* @param string $expr Regular expression
|
|
* @return int
|
|
*/
|
|
protected function computeValue(string $expr): int
|
|
{
|
|
// If the expression is a single digit/letter or an escaped character, return its codepoint
|
|
if (preg_match('(^(?:[0-9A-Za-z]|\\\\[^0-9A-Za-z])$)D', $expr))
|
|
{
|
|
return ord($expr[-1]);
|
|
}
|
|
|
|
$properties = [
|
|
self::IS_CHAR => 'exprIsChar',
|
|
self::IS_QUANTIFIABLE => 'exprIsQuantifiable'
|
|
];
|
|
$value = (1 + count($this->outputMap)) * -(2 ** count($properties));
|
|
foreach ($properties as $bitValue => $methodName)
|
|
{
|
|
if ($this->$methodName($expr))
|
|
{
|
|
$value |= $bitValue;
|
|
}
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
/**
|
|
* Test whether given expression represents a single character usable in a character class
|
|
*
|
|
* @param string $expr
|
|
* @return bool
|
|
*/
|
|
protected function exprIsChar(string $expr): bool
|
|
{
|
|
$regexps = [
|
|
// Escaped literal or escape sequence such as \w but not \R
|
|
'(^\\\\[adefhnrstvwDHNSVW\\W]$)D',
|
|
|
|
// Unicode properties such as \pL or \p{Lu}
|
|
'(^\\\\p(?:.|\\{[^}]+\\})$)Di',
|
|
|
|
// An escape sequence such as \x1F or \x{2600}
|
|
'(^\\\\x(?:[0-9a-f]{2}|\\{[^}]+\\})$)Di'
|
|
];
|
|
|
|
return $this->matchesAny($expr, $regexps);
|
|
}
|
|
|
|
/**
|
|
* Test whether given expression is quantifiable
|
|
*
|
|
* @param string $expr
|
|
* @return bool
|
|
*/
|
|
protected function exprIsQuantifiable(string $expr): bool
|
|
{
|
|
$regexps = [
|
|
// A dot or \R
|
|
'(^(?:\\.|\\\\R)$)D',
|
|
|
|
// A character class
|
|
'(^\\[\\^?(?:([^\\\\\\]]|\\\\.)(?:-(?-1))?)++\\]$)D'
|
|
];
|
|
|
|
return $this->matchesAny($expr, $regexps) || $this->exprIsChar($expr);
|
|
}
|
|
|
|
/**
|
|
* Test whether given expression matches any of the given regexps
|
|
*
|
|
* @param string $expr
|
|
* @param array<int, string> $regexps
|
|
* @return bool
|
|
*/
|
|
protected function matchesAny(string $expr, array $regexps): bool
|
|
{
|
|
foreach ($regexps as $regexp)
|
|
{
|
|
if (preg_match($regexp, $expr))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|