159 lines
5.1 KiB
PHP
159 lines
5.1 KiB
PHP
<?php
|
||
|
||
declare(strict_types=1);
|
||
|
||
namespace Doctrine\RST\Parser;
|
||
|
||
use function in_array;
|
||
use function preg_match;
|
||
use function preg_replace;
|
||
use function str_repeat;
|
||
use function strlen;
|
||
use function strpos;
|
||
use function trim;
|
||
|
||
class LineChecker
|
||
{
|
||
private const HEADER_LETTERS = ['=', '-', '~', '*', '+', '^', '"', '.', '`', "'", '_', '#', ':'];
|
||
|
||
/**
|
||
* A regex matching all bullet list markers and a subset of the enumerated list markers.
|
||
*
|
||
* @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#bullet-lists
|
||
* @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#enumerated-lists
|
||
*/
|
||
public const LIST_MARKER = '/
|
||
^(
|
||
[-+*\x{2022}\x{2023}\x{2043}] # match bullet list markers: "*", "+", "-", "•", "‣", or "⁃"
|
||
|(?:[\d#]+\.|[\d#]+\)|\([\d#]+\)) # match arabic (1-9) or auto-enumerated ("#") lists with formats: "1.", "1)", or "(1)"
|
||
)
|
||
(?:\s+|$) # capture the spaces between marker and text to determine the list item text offset (or eol, if text starts on a new line)
|
||
/ux';
|
||
|
||
public function isSpecialLine(string $line): ?string
|
||
{
|
||
if (strlen($line) < 2) {
|
||
return null;
|
||
}
|
||
|
||
$letter = $line[0];
|
||
|
||
if (! in_array($letter, self::HEADER_LETTERS, true)) {
|
||
return null;
|
||
}
|
||
|
||
for ($i = 1; $i < strlen($line); $i++) {
|
||
if ($line[$i] !== $letter) {
|
||
return null;
|
||
}
|
||
}
|
||
|
||
return $letter;
|
||
}
|
||
|
||
/**
|
||
* Checks if this line is the start of a list item.
|
||
*
|
||
* @see self::LIST_MARKER
|
||
*
|
||
* @param string|null $listMarker if provided, this function only returns "true" if the
|
||
* same list marker format is used (e.g. all dashes).
|
||
* @param int|null $listOffset if this line is a list, this will be set to the column
|
||
* number of the start of the list item content (used to
|
||
* match multiline items)
|
||
* @param string|null $nextLine if set, this line must also be a valid list line or
|
||
* indented content for enumerated lists
|
||
*/
|
||
public function isListLine(string $line, ?string &$listMarker = null, ?int &$listOffset = 0, ?string $nextLine = null): bool
|
||
{
|
||
$isList = preg_match(self::LIST_MARKER, $line, $m) > 0;
|
||
if (! $isList) {
|
||
return false;
|
||
}
|
||
|
||
$offset = strlen($m[0]);
|
||
$normalizedMarker = preg_replace('/\d+/', 'd', $m[1]);
|
||
if (
|
||
// validate if next line can be considered part of a list for enumerated lists
|
||
$normalizedMarker !== $m[1]
|
||
&& $nextLine !== null
|
||
&& trim($nextLine) !== ''
|
||
&& ! $this->isBlockLine($nextLine, $offset)
|
||
&& ! $this->isListLine($nextLine, $normalizedMarker)
|
||
) {
|
||
return false;
|
||
}
|
||
|
||
if ($listMarker !== null) {
|
||
$isList = $normalizedMarker === $listMarker;
|
||
}
|
||
|
||
if ($isList) {
|
||
$listOffset = $offset;
|
||
$listMarker = $normalizedMarker;
|
||
}
|
||
|
||
return $isList;
|
||
}
|
||
|
||
/**
|
||
* Is this line "indented"?
|
||
*
|
||
* A blank line also counts as a "block" line, as it
|
||
* may be the empty line between, for example, a
|
||
* ".. note::" directive and the indented content on the
|
||
* next lines.
|
||
*
|
||
* @param int $minIndent can be used to require a specific level of
|
||
* indentation for non-blank lines (number of spaces)
|
||
*/
|
||
public function isBlockLine(string $line, int $minIndent = 1): bool
|
||
{
|
||
return (trim($line) === '' || $this->isIndented($line, $minIndent)) && ! $this->isComment($line);
|
||
}
|
||
|
||
public function isComment(string $line): bool
|
||
{
|
||
return preg_match('/^\.\.(?: [^_]((?:(?!::).)*))?$/mUsi', $line) > 0;
|
||
}
|
||
|
||
public function isDirective(string $line): bool
|
||
{
|
||
return preg_match('/^\.\. (\|(.+)\| |)([^\s]+)::( (.*)|)$/mUsi', $line) > 0;
|
||
}
|
||
|
||
/**
|
||
* Check if line is an indented one.
|
||
*
|
||
* This does *not* include blank lines, use {@see isBlockLine()} to check
|
||
* for blank or indented lines.
|
||
*
|
||
* @param int $minIndent can be used to require a specific level of indentation (number of spaces)
|
||
*/
|
||
public function isIndented(string $line, int $minIndent = 1): bool
|
||
{
|
||
return strpos($line, str_repeat(' ', $minIndent)) === 0;
|
||
}
|
||
|
||
/**
|
||
* Checks if the current line can be considered part of the definition list.
|
||
*
|
||
* Either the current line, or the next line must be indented to be considered
|
||
* definition.
|
||
*
|
||
* @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#definition-lists
|
||
*/
|
||
public function isDefinitionListEnded(string $line, string $nextLine): bool
|
||
{
|
||
if (trim($line) === '') {
|
||
return false;
|
||
}
|
||
|
||
if ($this->isIndented($line)) {
|
||
return false;
|
||
}
|
||
|
||
return ! $this->isIndented($nextLine);
|
||
}
|
||
}
|