Update website
This commit is contained in:
parent
ec39d37f63
commit
b2868c260d
22 changed files with 26 additions and 26 deletions
566
vendor/renanbr/bibtex-parser/src/Parser.php
vendored
Normal file
566
vendor/renanbr/bibtex-parser/src/Parser.php
vendored
Normal file
|
@ -0,0 +1,566 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of the BibTex Parser.
|
||||
*
|
||||
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
|
||||
*
|
||||
* For the full copyright and license information, please view the LICENSE
|
||||
* file that was distributed with this source code.
|
||||
*/
|
||||
|
||||
namespace RenanBr\BibTexParser;
|
||||
|
||||
use ErrorException;
|
||||
use RenanBr\BibTexParser\Exception\ParserException;
|
||||
|
||||
class Parser
|
||||
{
|
||||
const TYPE = 'type';
|
||||
const CITATION_KEY = 'citation_key';
|
||||
const TAG_NAME = 'tag_name';
|
||||
const RAW_TAG_CONTENT = 'raw_tag_content';
|
||||
const BRACED_TAG_CONTENT = 'braced_tag_content';
|
||||
const QUOTED_TAG_CONTENT = 'quoted_tag_content';
|
||||
const ENTRY = 'entry';
|
||||
|
||||
const NONE = 'none';
|
||||
const COMMENT = 'comment';
|
||||
const FIRST_TAG_NAME = 'first_tag_name';
|
||||
const POST_TYPE = 'post_type';
|
||||
const POST_TAG_NAME = 'post_tag_name';
|
||||
const PRE_TAG_CONTENT = 'pre_tag_content';
|
||||
|
||||
/** @var string */
|
||||
private $state;
|
||||
|
||||
/** @var string */
|
||||
private $buffer;
|
||||
|
||||
/** @var int|null */
|
||||
private $bufferOffset;
|
||||
|
||||
/** @var array|null */
|
||||
private $firstTagSnapshot;
|
||||
|
||||
/** @var string|null */
|
||||
private $originalEntryBuffer;
|
||||
|
||||
/** @var int|null */
|
||||
private $originalEntryOffset;
|
||||
|
||||
/** @var bool */
|
||||
private $skipOriginalEntryReading;
|
||||
|
||||
/** @var int */
|
||||
private $line;
|
||||
|
||||
/** @var int */
|
||||
private $column;
|
||||
|
||||
/** @var int */
|
||||
private $offset;
|
||||
|
||||
/** @var bool */
|
||||
private $isTagContentEscaped;
|
||||
|
||||
/** @var bool */
|
||||
private $mayConcatenateTagContent;
|
||||
|
||||
/** @var string|null */
|
||||
private $tagContentDelimiter;
|
||||
|
||||
/** @var int */
|
||||
private $braceLevel;
|
||||
|
||||
/** @var ListenerInterface[] */
|
||||
private $listeners = [];
|
||||
|
||||
public function addListener(ListenerInterface $listener)
|
||||
{
|
||||
$this->listeners[] = $listener;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $file
|
||||
*
|
||||
* @throws ParserException if $file given is not a valid BibTeX
|
||||
* @throws ErrorException if $file given is not readable
|
||||
*/
|
||||
public function parseFile($file)
|
||||
{
|
||||
$handle = @fopen($file, 'r');
|
||||
if (!$handle) {
|
||||
throw new ErrorException(sprintf('Unable to open %s', $file));
|
||||
}
|
||||
try {
|
||||
$this->reset();
|
||||
while (!feof($handle)) {
|
||||
$buffer = fread($handle, 128);
|
||||
$this->parse($buffer);
|
||||
}
|
||||
$this->throwExceptionIfReadingEntry("\0");
|
||||
} finally {
|
||||
fclose($handle);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $string
|
||||
*
|
||||
* @throws ParserException if $string given is not a valid BibTeX
|
||||
*/
|
||||
public function parseString($string)
|
||||
{
|
||||
$this->reset();
|
||||
$this->parse($string);
|
||||
$this->throwExceptionIfReadingEntry("\0");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $text
|
||||
*/
|
||||
private function parse($text)
|
||||
{
|
||||
$length = mb_strlen($text);
|
||||
for ($position = 0; $position < $length; ++$position) {
|
||||
$char = mb_substr($text, $position, 1);
|
||||
$this->read($char);
|
||||
if ("\n" === $char) {
|
||||
++$this->line;
|
||||
$this->column = 1;
|
||||
} else {
|
||||
++$this->column;
|
||||
}
|
||||
++$this->offset;
|
||||
}
|
||||
}
|
||||
|
||||
private function reset()
|
||||
{
|
||||
$this->state = self::NONE;
|
||||
$this->buffer = '';
|
||||
$this->firstTagSnapshot = null;
|
||||
$this->originalEntryBuffer = null;
|
||||
$this->originalEntryOffset = null;
|
||||
$this->skipOriginalEntryReading = false;
|
||||
$this->line = 1;
|
||||
$this->column = 1;
|
||||
$this->offset = 0;
|
||||
$this->mayConcatenateTagContent = false;
|
||||
$this->isTagContentEscaped = false;
|
||||
$this->tagContentDelimiter = null;
|
||||
$this->braceLevel = 0;
|
||||
}
|
||||
|
||||
// ----- Readers -----------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function read($char)
|
||||
{
|
||||
$previousState = $this->state;
|
||||
|
||||
switch ($this->state) {
|
||||
case self::NONE:
|
||||
$this->readNone($char);
|
||||
break;
|
||||
case self::COMMENT:
|
||||
$this->readComment($char);
|
||||
break;
|
||||
case self::TYPE:
|
||||
$this->readType($char);
|
||||
break;
|
||||
case self::POST_TYPE:
|
||||
$this->readPostType($char);
|
||||
break;
|
||||
case self::FIRST_TAG_NAME:
|
||||
case self::TAG_NAME:
|
||||
$this->readTagName($char);
|
||||
break;
|
||||
case self::POST_TAG_NAME:
|
||||
$this->readPostTagName($char);
|
||||
break;
|
||||
case self::PRE_TAG_CONTENT:
|
||||
$this->readPreTagContent($char);
|
||||
break;
|
||||
case self::RAW_TAG_CONTENT:
|
||||
$this->readRawTagContent($char);
|
||||
break;
|
||||
case self::QUOTED_TAG_CONTENT:
|
||||
case self::BRACED_TAG_CONTENT:
|
||||
$this->readDelimitedTagContent($char);
|
||||
break;
|
||||
}
|
||||
|
||||
$this->readOriginalEntry($char, $previousState);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function readNone($char)
|
||||
{
|
||||
if ('@' === $char) {
|
||||
$this->state = self::TYPE;
|
||||
} elseif (!$this->isWhitespace($char)) {
|
||||
$this->state = self::COMMENT;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function readComment($char)
|
||||
{
|
||||
if ($this->isWhitespace($char)) {
|
||||
$this->state = self::NONE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function readType($char)
|
||||
{
|
||||
if (preg_match('/^[a-zA-Z]$/', $char)) {
|
||||
$this->appendToBuffer($char);
|
||||
} else {
|
||||
$this->throwExceptionIfBufferIsEmpty($char);
|
||||
|
||||
// Skips @comment type
|
||||
if ('comment' === mb_strtolower($this->buffer)) {
|
||||
$this->skipOriginalEntryReading = true;
|
||||
$this->buffer = '';
|
||||
$this->bufferOffset = null;
|
||||
$this->state = self::COMMENT;
|
||||
$this->readComment($char);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->triggerListenersWithCurrentBuffer();
|
||||
|
||||
// once $char isn't a valid character
|
||||
// it must be interpreted as POST_TYPE
|
||||
$this->state = self::POST_TYPE;
|
||||
$this->readPostType($char);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function readPostType($char)
|
||||
{
|
||||
if ('{' === $char) {
|
||||
$this->state = self::FIRST_TAG_NAME;
|
||||
} elseif (!$this->isWhitespace($char)) {
|
||||
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function readTagName($char)
|
||||
{
|
||||
if (preg_match('/^[a-zA-Z0-9_\&\+:\-\.\/\x{00C0}-\x{01FF}]$/u', $char)) {
|
||||
$this->appendToBuffer($char);
|
||||
} elseif ($this->isWhitespace($char) && empty($this->buffer)) {
|
||||
// Skips because we didn't start reading
|
||||
} elseif ('}' === $char && empty($this->buffer)) {
|
||||
// No tag name found, $char is just closing current entry
|
||||
$this->state = self::NONE;
|
||||
} else {
|
||||
$this->throwExceptionIfBufferIsEmpty($char);
|
||||
|
||||
if (self::FIRST_TAG_NAME === $this->state) {
|
||||
// Takes a snapshot of current state to be triggered later as
|
||||
// tag name or citation key, see readPostTagName()
|
||||
$this->firstTagSnapshot = $this->takeBufferSnapshot();
|
||||
} else {
|
||||
// Current buffer is a simple tag name
|
||||
$this->triggerListenersWithCurrentBuffer();
|
||||
}
|
||||
|
||||
// Once $char isn't a valid tag name character, it must be
|
||||
// interpreted as post tag name
|
||||
$this->state = self::POST_TAG_NAME;
|
||||
$this->readPostTagName($char);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function readPostTagName($char)
|
||||
{
|
||||
if ('=' === $char) {
|
||||
// First tag name isn't a citation key, because it has content
|
||||
$this->triggerListenersWithFirstTagSnapshotAs(self::TAG_NAME);
|
||||
$this->state = self::PRE_TAG_CONTENT;
|
||||
} elseif ('}' === $char) {
|
||||
// First tag name is a citation key, because $char closes entry and
|
||||
// lets first tag without value
|
||||
$this->triggerListenersWithFirstTagSnapshotAs(self::CITATION_KEY);
|
||||
$this->state = self::NONE;
|
||||
} elseif (',' === $char) {
|
||||
// First tag name is a citation key, because $char moves to the next
|
||||
// tag and lets first tag without value
|
||||
$this->triggerListenersWithFirstTagSnapshotAs(self::CITATION_KEY);
|
||||
$this->state = self::TAG_NAME;
|
||||
} elseif (!$this->isWhitespace($char)) {
|
||||
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function readPreTagContent($char)
|
||||
{
|
||||
if (preg_match('/^[a-zA-Z0-9]$/', $char)) {
|
||||
// When concatenation is available it means there is already a
|
||||
// defined value, and parser expect a concatenator, a tag separator
|
||||
// or an entry closing char as next $char
|
||||
$this->throwExceptionAccordingToConcatenationAvailability($char, true);
|
||||
$this->state = self::RAW_TAG_CONTENT;
|
||||
$this->readRawTagContent($char);
|
||||
} elseif ('"' === $char) {
|
||||
// The exception is here for the same reason of the first case
|
||||
$this->throwExceptionAccordingToConcatenationAvailability($char, true);
|
||||
$this->tagContentDelimiter = '"';
|
||||
$this->state = self::QUOTED_TAG_CONTENT;
|
||||
} elseif ('{' === $char) {
|
||||
// The exception is here for the same reason of the first case
|
||||
$this->throwExceptionAccordingToConcatenationAvailability($char, true);
|
||||
$this->tagContentDelimiter = '}';
|
||||
$this->state = self::BRACED_TAG_CONTENT;
|
||||
} elseif ('#' === $char) {
|
||||
$this->throwExceptionAccordingToConcatenationAvailability($char, false);
|
||||
$this->mayConcatenateTagContent = false;
|
||||
} elseif (',' === $char) {
|
||||
$this->throwExceptionAccordingToConcatenationAvailability($char, false);
|
||||
$this->mayConcatenateTagContent = false;
|
||||
$this->state = self::TAG_NAME;
|
||||
} elseif ('}' === $char) {
|
||||
$this->throwExceptionAccordingToConcatenationAvailability($char, false);
|
||||
$this->mayConcatenateTagContent = false;
|
||||
$this->state = self::NONE;
|
||||
} elseif (!$this->isWhitespace($char)) {
|
||||
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function readRawTagContent($char)
|
||||
{
|
||||
if (preg_match('/^[a-zA-Z0-9_\+:\-\.\/]$/', $char)) {
|
||||
$this->appendToBuffer($char);
|
||||
} else {
|
||||
$this->throwExceptionIfBufferIsEmpty($char);
|
||||
$this->triggerListenersWithCurrentBuffer();
|
||||
|
||||
// once $char isn't a valid character
|
||||
// it must be interpreted as TAG_CONTENT
|
||||
$this->mayConcatenateTagContent = true;
|
||||
$this->state = self::PRE_TAG_CONTENT;
|
||||
$this->readPreTagContent($char);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function readDelimitedTagContent($char)
|
||||
{
|
||||
if ($this->isTagContentEscaped) {
|
||||
$this->isTagContentEscaped = false;
|
||||
if ($this->tagContentDelimiter !== $char && '\\' !== $char && '%' !== $char) {
|
||||
$this->appendToBuffer('\\');
|
||||
}
|
||||
$this->appendToBuffer($char);
|
||||
} elseif ('}' === $this->tagContentDelimiter && '{' === $char) {
|
||||
++$this->braceLevel;
|
||||
$this->appendToBuffer($char);
|
||||
} elseif ($this->tagContentDelimiter === $char) {
|
||||
if (0 === $this->braceLevel) {
|
||||
$this->triggerListenersWithCurrentBuffer();
|
||||
$this->mayConcatenateTagContent = true;
|
||||
$this->state = self::PRE_TAG_CONTENT;
|
||||
} else {
|
||||
--$this->braceLevel;
|
||||
$this->appendToBuffer($char);
|
||||
}
|
||||
} elseif ('\\' === $char) {
|
||||
$this->isTagContentEscaped = true;
|
||||
} else {
|
||||
$this->appendToBuffer($char);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
* @param string $previousState
|
||||
*/
|
||||
private function readOriginalEntry($char, $previousState)
|
||||
{
|
||||
if ($this->skipOriginalEntryReading) {
|
||||
$this->originalEntryBuffer = '';
|
||||
$this->originalEntryOffset = null;
|
||||
$this->skipOriginalEntryReading = false;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Checks whether we are reading an entry character or not
|
||||
$isPreviousStateEntry = $this->isEntryState($previousState);
|
||||
$isCurrentStateEntry = $this->isEntryState($this->state);
|
||||
$isEntry = $isPreviousStateEntry || $isCurrentStateEntry;
|
||||
if (!$isEntry) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Appends $char to the original entry buffer
|
||||
if (empty($this->originalEntryBuffer)) {
|
||||
$this->originalEntryOffset = $this->offset;
|
||||
}
|
||||
$this->originalEntryBuffer .= $char;
|
||||
|
||||
// Sends original entry to the listeners when $char closes an entry
|
||||
$isClosingEntry = $isPreviousStateEntry && !$isCurrentStateEntry;
|
||||
if ($isClosingEntry) {
|
||||
$this->triggerListeners($this->originalEntryBuffer, self::ENTRY, [
|
||||
'offset' => $this->originalEntryOffset,
|
||||
'length' => $this->offset - $this->originalEntryOffset + 1,
|
||||
]);
|
||||
$this->originalEntryBuffer = '';
|
||||
$this->originalEntryOffset = null;
|
||||
}
|
||||
}
|
||||
|
||||
// ----- Listener triggers -------------------------------------------------
|
||||
|
||||
/**
|
||||
* @param string $text
|
||||
* @param string $type
|
||||
*/
|
||||
private function triggerListeners($text, $type, array $context)
|
||||
{
|
||||
foreach ($this->listeners as $listener) {
|
||||
$listener->bibTexUnitFound($text, $type, $context);
|
||||
}
|
||||
}
|
||||
|
||||
private function triggerListenersWithCurrentBuffer()
|
||||
{
|
||||
$snapshot = $this->takeBufferSnapshot();
|
||||
$text = $snapshot['text'];
|
||||
$context = $snapshot['context'];
|
||||
$this->triggerListeners($text, $this->state, $context);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $type
|
||||
*/
|
||||
private function triggerListenersWithFirstTagSnapshotAs($type)
|
||||
{
|
||||
if (empty($this->firstTagSnapshot)) {
|
||||
return;
|
||||
}
|
||||
$text = $this->firstTagSnapshot['text'];
|
||||
$context = $this->firstTagSnapshot['context'];
|
||||
$this->firstTagSnapshot = null;
|
||||
$this->triggerListeners($text, $type, $context);
|
||||
}
|
||||
|
||||
// ----- Buffer tools ------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function appendToBuffer($char)
|
||||
{
|
||||
if (empty($this->buffer)) {
|
||||
$this->bufferOffset = $this->offset;
|
||||
}
|
||||
$this->buffer .= $char;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
private function takeBufferSnapshot()
|
||||
{
|
||||
$snapshot = [
|
||||
'text' => $this->buffer,
|
||||
'context' => [
|
||||
'offset' => $this->bufferOffset,
|
||||
'length' => $this->offset - $this->bufferOffset,
|
||||
],
|
||||
];
|
||||
$this->bufferOffset = null;
|
||||
$this->buffer = '';
|
||||
|
||||
return $snapshot;
|
||||
}
|
||||
|
||||
// ----- Exception throwers ------------------------------------------------
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
* @param bool $availability
|
||||
*/
|
||||
private function throwExceptionAccordingToConcatenationAvailability($char, $availability)
|
||||
{
|
||||
if ($availability === $this->mayConcatenateTagContent) {
|
||||
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function throwExceptionIfBufferIsEmpty($char)
|
||||
{
|
||||
if (empty($this->buffer)) {
|
||||
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*/
|
||||
private function throwExceptionIfReadingEntry($char)
|
||||
{
|
||||
if ($this->isEntryState($this->state)) {
|
||||
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
|
||||
}
|
||||
}
|
||||
|
||||
// ----- Auxiliaries -------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @param string $state
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
private function isEntryState($state)
|
||||
{
|
||||
return self::NONE !== $state && self::COMMENT !== $state;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $char
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
private function isWhitespace($char)
|
||||
{
|
||||
return ' ' === $char || "\t" === $char || "\n" === $char || "\r" === $char;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue