535 lines
18 KiB
PHP
535 lines
18 KiB
PHP
|
<?php
|
||
|
|
||
|
declare(strict_types=1);
|
||
|
|
||
|
namespace Doctrine\RST\Nodes;
|
||
|
|
||
|
use Doctrine\RST\Exception\InvalidTableStructure;
|
||
|
use Doctrine\RST\Nodes\Table\TableColumn;
|
||
|
use Doctrine\RST\Nodes\Table\TableRow;
|
||
|
use Doctrine\RST\Parser;
|
||
|
use Doctrine\RST\Parser\LineChecker;
|
||
|
use Doctrine\RST\Parser\TableSeparatorLineConfig;
|
||
|
use Exception;
|
||
|
use LogicException;
|
||
|
|
||
|
use function array_keys;
|
||
|
use function array_reverse;
|
||
|
use function array_values;
|
||
|
use function count;
|
||
|
use function explode;
|
||
|
use function implode;
|
||
|
use function ksort;
|
||
|
use function max;
|
||
|
use function mb_convert_encoding;
|
||
|
use function preg_match;
|
||
|
use function sprintf;
|
||
|
use function str_repeat;
|
||
|
use function strlen;
|
||
|
use function strpos;
|
||
|
use function substr;
|
||
|
use function trim;
|
||
|
|
||
|
class TableNode extends Node
|
||
|
{
|
||
|
public const TYPE_SIMPLE = 'simple';
|
||
|
public const TYPE_PRETTY = 'pretty';
|
||
|
|
||
|
/** @var TableSeparatorLineConfig[] */
|
||
|
private $separatorLineConfigs = [];
|
||
|
|
||
|
/** @var string[] */
|
||
|
private $rawDataLines = [];
|
||
|
|
||
|
/** @var int */
|
||
|
private $currentLineNumber = 0;
|
||
|
|
||
|
/** @var bool */
|
||
|
private $isCompiled = false;
|
||
|
|
||
|
/** @var TableRow[] */
|
||
|
protected $data = [];
|
||
|
|
||
|
/** @var bool[] */
|
||
|
protected $headers = [];
|
||
|
|
||
|
/** @var string[] */
|
||
|
private $errors = [];
|
||
|
|
||
|
/** @var string */
|
||
|
protected $type;
|
||
|
|
||
|
/** @var LineChecker */
|
||
|
private $lineChecker;
|
||
|
|
||
|
public function __construct(TableSeparatorLineConfig $separatorLineConfig, string $type, LineChecker $lineChecker)
|
||
|
{
|
||
|
parent::__construct();
|
||
|
|
||
|
$this->pushSeparatorLine($separatorLineConfig);
|
||
|
$this->type = $type;
|
||
|
$this->lineChecker = $lineChecker;
|
||
|
}
|
||
|
|
||
|
public function getCols(): int
|
||
|
{
|
||
|
if ($this->isCompiled === false) {
|
||
|
throw new LogicException('Call compile() first.');
|
||
|
}
|
||
|
|
||
|
$columns = 0;
|
||
|
foreach ($this->data as $row) {
|
||
|
$columns = max($columns, count($row->getColumns()));
|
||
|
}
|
||
|
|
||
|
return $columns;
|
||
|
}
|
||
|
|
||
|
public function getRows(): int
|
||
|
{
|
||
|
if ($this->isCompiled === false) {
|
||
|
throw new LogicException('Call compile() first.');
|
||
|
}
|
||
|
|
||
|
return count($this->data);
|
||
|
}
|
||
|
|
||
|
/** @return TableRow[] */
|
||
|
public function getData(): array
|
||
|
{
|
||
|
if ($this->isCompiled === false) {
|
||
|
throw new LogicException('Call compile() first.');
|
||
|
}
|
||
|
|
||
|
return $this->data;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns an of array of which rows should be headers,
|
||
|
* where the row index is the key of the array and
|
||
|
* the value is always true.
|
||
|
*
|
||
|
* @return bool[]
|
||
|
*/
|
||
|
public function getHeaders(): array
|
||
|
{
|
||
|
if ($this->isCompiled === false) {
|
||
|
throw new LogicException('Call compile() first.');
|
||
|
}
|
||
|
|
||
|
return $this->headers;
|
||
|
}
|
||
|
|
||
|
public function pushSeparatorLine(TableSeparatorLineConfig $separatorLineConfig): void
|
||
|
{
|
||
|
if ($this->isCompiled === true) {
|
||
|
throw new LogicException('Cannot push data after TableNode is compiled');
|
||
|
}
|
||
|
|
||
|
$this->separatorLineConfigs[$this->currentLineNumber] = $separatorLineConfig;
|
||
|
$this->currentLineNumber++;
|
||
|
}
|
||
|
|
||
|
public function pushContentLine(string $line): void
|
||
|
{
|
||
|
if ($this->isCompiled === true) {
|
||
|
throw new LogicException('Cannot push data after TableNode is compiled');
|
||
|
}
|
||
|
|
||
|
$this->rawDataLines[$this->currentLineNumber] = mb_convert_encoding($line, 'ISO-8859-1', 'UTF-8');
|
||
|
$this->currentLineNumber++;
|
||
|
}
|
||
|
|
||
|
public function finalize(Parser $parser): void
|
||
|
{
|
||
|
if ($this->isCompiled === false) {
|
||
|
$this->compile();
|
||
|
}
|
||
|
|
||
|
$tableAsString = $this->getTableAsString();
|
||
|
|
||
|
if (count($this->errors) > 0) {
|
||
|
$parser->getEnvironment()
|
||
|
->getErrorManager()
|
||
|
->error(sprintf("%s\n\n%s", $this->errors[0], $tableAsString), $parser->getFilename());
|
||
|
|
||
|
$this->data = [];
|
||
|
$this->headers = [];
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
foreach ($this->data as $i => $row) {
|
||
|
foreach ($row->getColumns() as $col) {
|
||
|
$lines = explode("\n", $col->getContent());
|
||
|
|
||
|
if ($this->lineChecker->isListLine($lines[0])) {
|
||
|
$node = $parser->parseFragment($col->getContent())->getNodes()[0];
|
||
|
} else {
|
||
|
$node = $parser->createSpanNode($col->getContent());
|
||
|
}
|
||
|
|
||
|
$col->setNode($node);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Looks at all the raw data and finally populates the data
|
||
|
* and headers.
|
||
|
*/
|
||
|
private function compile(): void
|
||
|
{
|
||
|
$this->isCompiled = true;
|
||
|
|
||
|
if ($this->type === self::TYPE_SIMPLE) {
|
||
|
$this->compileSimpleTable();
|
||
|
} else {
|
||
|
$this->compilePrettyTable();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private function compileSimpleTable(): void
|
||
|
{
|
||
|
// determine if there is second === separator line (other than
|
||
|
// the last line): this would mean there are header rows
|
||
|
$finalHeadersRow = 0;
|
||
|
foreach ($this->separatorLineConfigs as $i => $separatorLine) {
|
||
|
// skip the first line: we're looking for the *next* line
|
||
|
if ($i === 0) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// we found the next ==== line
|
||
|
if ($separatorLine->getLineCharacter() === '=') {
|
||
|
// found the end of the header rows
|
||
|
$finalHeadersRow = $i;
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// if the final header row is *after* the last data line, it's not
|
||
|
// really a header "ending" and so there are no headers
|
||
|
$lastDataLineNumber = array_keys($this->rawDataLines)[count($this->rawDataLines) - 1];
|
||
|
if ($finalHeadersRow > $lastDataLineNumber) {
|
||
|
$finalHeadersRow = 0;
|
||
|
}
|
||
|
|
||
|
// todo - support "---" in the future for colspan
|
||
|
$columnRanges = $this->separatorLineConfigs[0]->getPartRanges();
|
||
|
$lastColumnRangeEnd = array_values($columnRanges)[count($columnRanges) - 1][1];
|
||
|
foreach ($this->rawDataLines as $i => $line) {
|
||
|
$row = new TableRow();
|
||
|
// loop over where all the columns should be
|
||
|
|
||
|
$previousColumnEnd = null;
|
||
|
foreach ($columnRanges as $columnRange) {
|
||
|
$isRangeBeyondText = $columnRange[0] >= strlen($line);
|
||
|
// check for content in the "gap"
|
||
|
if ($previousColumnEnd !== null && ! $isRangeBeyondText) {
|
||
|
$gapText = substr($line, $previousColumnEnd, $columnRange[0] - $previousColumnEnd);
|
||
|
if (strlen(trim($gapText)) !== 0) {
|
||
|
$this->addError(sprintf('Malformed table: content "%s" appears in the "gap" on row "%s"', $gapText, $line));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($isRangeBeyondText) {
|
||
|
// the text for this line ended earlier. This column should be blank
|
||
|
|
||
|
$content = '';
|
||
|
} elseif ($lastColumnRangeEnd === $columnRange[1]) {
|
||
|
// this is the last column, so get the rest of the line
|
||
|
// this is because content can go *beyond* the table legally
|
||
|
$content = substr(
|
||
|
$line,
|
||
|
$columnRange[0]
|
||
|
);
|
||
|
} else {
|
||
|
$content = substr(
|
||
|
$line,
|
||
|
$columnRange[0],
|
||
|
$columnRange[1] - $columnRange[0]
|
||
|
);
|
||
|
}
|
||
|
|
||
|
$content = trim($content);
|
||
|
$row->addColumn($content, 1);
|
||
|
|
||
|
$previousColumnEnd = $columnRange[1];
|
||
|
}
|
||
|
|
||
|
// is header row?
|
||
|
if ($i <= $finalHeadersRow) {
|
||
|
$this->headers[$i] = true;
|
||
|
}
|
||
|
|
||
|
$this->data[$i] = $row;
|
||
|
}
|
||
|
|
||
|
$previousRow = null;
|
||
|
// check for empty first columns, which means this is
|
||
|
// not a new row, but the continuation of the previous row
|
||
|
foreach ($this->data as $i => $row) {
|
||
|
if ($row->getFirstColumn()->isCompletelyEmpty() && $previousRow !== null) {
|
||
|
try {
|
||
|
$previousRow->absorbRowContent($row);
|
||
|
} catch (InvalidTableStructure $e) {
|
||
|
$this->addError($e->getMessage());
|
||
|
}
|
||
|
|
||
|
unset($this->data[$i]);
|
||
|
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$previousRow = $row;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private function compilePrettyTable(): void
|
||
|
{
|
||
|
// loop over ALL separator lines to find ALL of the column ranges
|
||
|
$columnRanges = [];
|
||
|
$finalHeadersRow = 0;
|
||
|
foreach ($this->separatorLineConfigs as $rowIndex => $separatorLine) {
|
||
|
if ($separatorLine->isHeader()) {
|
||
|
if ($finalHeadersRow !== 0) {
|
||
|
$this->addError(sprintf('Malformed table: multiple "header rows" using "===" were found. See table lines "%d" and "%d"', $finalHeadersRow + 1, $rowIndex));
|
||
|
}
|
||
|
|
||
|
// indicates that "=" was used
|
||
|
$finalHeadersRow = $rowIndex - 1;
|
||
|
}
|
||
|
|
||
|
foreach ($separatorLine->getPartRanges() as $columnRange) {
|
||
|
$colStart = $columnRange[0];
|
||
|
$colEnd = $columnRange[1];
|
||
|
|
||
|
// we don't have this "start" yet? just add it
|
||
|
// in theory, should only happen for the first row
|
||
|
if (! isset($columnRanges[$colStart])) {
|
||
|
$columnRanges[$colStart] = $colEnd;
|
||
|
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// an exact column range we've already seen
|
||
|
// OR, this new column goes beyond what we currently
|
||
|
// have recorded, which means its a colspan, and so
|
||
|
// we already have correctly recorded the "smallest"
|
||
|
// current column ranges
|
||
|
if ($columnRanges[$colStart] <= $colEnd) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// this is not a new "start", but it is a new "end"
|
||
|
// this means that we've found a "shorter" column that
|
||
|
// we've seen before. We need to update the "end" of
|
||
|
// the existing column, and add a "new" column
|
||
|
$previousEnd = $columnRanges[$colStart];
|
||
|
|
||
|
// A) update the end of this column to the new end
|
||
|
$columnRanges[$colStart] = $colEnd;
|
||
|
// B) add a new column from this new end, to the previous end
|
||
|
$columnRanges[$colEnd + 1] = $previousEnd;
|
||
|
ksort($columnRanges);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @var TableRow[] $rows */
|
||
|
$rows = [];
|
||
|
$partialSeparatorRows = [];
|
||
|
foreach ($this->rawDataLines as $rowIndex => $line) {
|
||
|
$row = new TableRow();
|
||
|
|
||
|
// if the row is part separator row, part content, this
|
||
|
// is a rowspan situation - e.g.
|
||
|
// | +----------------+----------------------------+
|
||
|
// look for +-----+ pattern
|
||
|
if (preg_match('/\+[-]+\+/', $this->rawDataLines[$rowIndex]) === 1) {
|
||
|
$partialSeparatorRows[$rowIndex] = true;
|
||
|
}
|
||
|
|
||
|
$currentColumnStart = null;
|
||
|
$currentSpan = 1;
|
||
|
$previousColumnEnd = null;
|
||
|
foreach ($columnRanges as $start => $end) {
|
||
|
// a content line that ends before it should
|
||
|
if ($end >= strlen($line)) {
|
||
|
$this->errors[] = sprintf("Malformed table: Line\n\n%s\n\ndoes not appear to be a complete table row", $line);
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if ($currentColumnStart !== null) {
|
||
|
$gapText = substr($line, $previousColumnEnd, $start - $previousColumnEnd);
|
||
|
if (strpos($gapText, '|') === false && strpos($gapText, '+') === false) {
|
||
|
// text continued through the "gap". This is a colspan
|
||
|
// "+" is an odd character - it's usually "|", but "+" can
|
||
|
// happen in row-span situations
|
||
|
$currentSpan++;
|
||
|
} else {
|
||
|
// we just hit a proper "gap" record the line up until now
|
||
|
$row->addColumn(
|
||
|
substr($line, $currentColumnStart, $previousColumnEnd - $currentColumnStart),
|
||
|
$currentSpan
|
||
|
);
|
||
|
$currentSpan = 1;
|
||
|
$currentColumnStart = null;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// if the current column start is null, then set it
|
||
|
// other wise, leave it - this is a colspan, and eventually
|
||
|
// we want to get all the text starting here
|
||
|
if ($currentColumnStart === null) {
|
||
|
$currentColumnStart = $start;
|
||
|
}
|
||
|
|
||
|
$previousColumnEnd = $end;
|
||
|
}
|
||
|
|
||
|
// record the last column
|
||
|
if ($currentColumnStart !== null) {
|
||
|
if ($previousColumnEnd === null) {
|
||
|
throw new LogicException('The previous column end is not set yet');
|
||
|
}
|
||
|
|
||
|
$row->addColumn(
|
||
|
substr($line, $currentColumnStart, $previousColumnEnd - $currentColumnStart),
|
||
|
$currentSpan
|
||
|
);
|
||
|
}
|
||
|
|
||
|
$rows[$rowIndex] = $row;
|
||
|
}
|
||
|
|
||
|
$columnIndexesCurrentlyInRowspan = [];
|
||
|
foreach ($rows as $rowIndex => $row) {
|
||
|
if (isset($partialSeparatorRows[$rowIndex])) {
|
||
|
// this row is part content, part separator due to a rowspan
|
||
|
// for each column that contains content, we need to
|
||
|
// push it onto the last real row's content and record
|
||
|
// that this column in the next row should also be
|
||
|
// included in that previous row's content
|
||
|
foreach ($row->getColumns() as $columnIndex => $column) {
|
||
|
if (! $column->isCompletelyEmpty() && str_repeat('-', strlen($column->getContent())) === $column->getContent()) {
|
||
|
// only a line separator in this column - not content!
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$prevTargetColumn = $this->findColumnInPreviousRows((int) $columnIndex, $rows, (int) $rowIndex);
|
||
|
$prevTargetColumn->addContent("\n" . $column->getContent());
|
||
|
$prevTargetColumn->incrementRowSpan();
|
||
|
// mark that this column on the next row should also be added
|
||
|
// to the previous row
|
||
|
$columnIndexesCurrentlyInRowspan[] = $columnIndex;
|
||
|
}
|
||
|
|
||
|
// remove the row - it's not real
|
||
|
unset($rows[$rowIndex]);
|
||
|
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// check if the previous row was a partial separator row, and
|
||
|
// we need to take some columns and add them to a previous row's content
|
||
|
foreach ($columnIndexesCurrentlyInRowspan as $columnIndex) {
|
||
|
$prevTargetColumn = $this->findColumnInPreviousRows($columnIndex, $rows, (int) $rowIndex);
|
||
|
$columnInRowspan = $row->getColumn($columnIndex);
|
||
|
if ($columnInRowspan === null) {
|
||
|
throw new LogicException(sprintf('Cannot find column for index "%s"', $columnIndex));
|
||
|
}
|
||
|
|
||
|
$prevTargetColumn->addContent("\n" . $columnInRowspan->getContent());
|
||
|
|
||
|
// now this column actually needs to be removed from this row,
|
||
|
// as it's not a real column that needs to be printed
|
||
|
$row->removeColumn($columnIndex);
|
||
|
}
|
||
|
|
||
|
$columnIndexesCurrentlyInRowspan = [];
|
||
|
|
||
|
// if the next row is just $i+1, it means there
|
||
|
// was no "separator" and this is really just a
|
||
|
// continuation of this row.
|
||
|
$nextRowCounter = 1;
|
||
|
while (isset($rows[(int) $rowIndex + $nextRowCounter])) {
|
||
|
// but if the next line is actually a partial separator, then
|
||
|
// it is not a continuation of the content - quit now
|
||
|
if (isset($partialSeparatorRows[(int) $rowIndex + $nextRowCounter])) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
$targetRow = $rows[(int) $rowIndex + $nextRowCounter];
|
||
|
unset($rows[(int) $rowIndex + $nextRowCounter]);
|
||
|
|
||
|
try {
|
||
|
$row->absorbRowContent($targetRow);
|
||
|
} catch (InvalidTableStructure $e) {
|
||
|
$this->addError($e->getMessage());
|
||
|
}
|
||
|
|
||
|
$nextRowCounter++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// one more loop to set headers
|
||
|
foreach ($rows as $rowIndex => $row) {
|
||
|
if ($rowIndex > $finalHeadersRow) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$this->headers[$rowIndex] = true;
|
||
|
}
|
||
|
|
||
|
$this->data = $rows;
|
||
|
}
|
||
|
|
||
|
private function getTableAsString(): string
|
||
|
{
|
||
|
$lines = [];
|
||
|
$i = 0;
|
||
|
while (isset($this->separatorLineConfigs[$i]) || isset($this->rawDataLines[$i])) {
|
||
|
if (isset($this->separatorLineConfigs[$i])) {
|
||
|
$lines[] = $this->separatorLineConfigs[$i]->getRawContent();
|
||
|
} else {
|
||
|
$lines[] = $this->rawDataLines[$i];
|
||
|
}
|
||
|
|
||
|
$i++;
|
||
|
}
|
||
|
|
||
|
return implode("\n", $lines);
|
||
|
}
|
||
|
|
||
|
private function addError(string $message): void
|
||
|
{
|
||
|
$this->errors[] = $message;
|
||
|
}
|
||
|
|
||
|
/** @param TableRow[] $rows */
|
||
|
private function findColumnInPreviousRows(int $columnIndex, array $rows, int $currentRowIndex): TableColumn
|
||
|
{
|
||
|
/** @var TableRow[] $reversedRows */
|
||
|
$reversedRows = array_reverse($rows, true);
|
||
|
|
||
|
// go through the rows backwards to find the last/previous
|
||
|
// row that actually had a real column at this position
|
||
|
foreach ($reversedRows as $k => $row) {
|
||
|
// start by skipping any future rows, as we go backward
|
||
|
if ($k >= $currentRowIndex) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$prevTargetColumn = $row->getColumn($columnIndex);
|
||
|
if ($prevTargetColumn !== null) {
|
||
|
return $prevTargetColumn;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
throw new Exception('Could not find column in any previous rows');
|
||
|
}
|
||
|
}
|