Update website

This commit is contained in:
Guilhem Lavaux 2024-11-23 22:46:58 +01:00
parent 8d22900937
commit 56c333245a
24 changed files with 2292 additions and 0 deletions

View File

@ -25,6 +25,7 @@ return array(
'Symfony\\Component\\Filesystem\\' => array($vendorDir . '/symfony/filesystem'), 'Symfony\\Component\\Filesystem\\' => array($vendorDir . '/symfony/filesystem'),
'Symfony\\Component\\ErrorHandler\\' => array($vendorDir . '/symfony/error-handler'), 'Symfony\\Component\\ErrorHandler\\' => array($vendorDir . '/symfony/error-handler'),
'Stevenmaguire\\OAuth2\\Client\\' => array($vendorDir . '/stevenmaguire/oauth2-bitbucket/src'), 'Stevenmaguire\\OAuth2\\Client\\' => array($vendorDir . '/stevenmaguire/oauth2-bitbucket/src'),
'RenanBr\\BibTexParser\\' => array($vendorDir . '/renanbr/bibtex-parser/src'),
'Psr\\Log\\' => array($vendorDir . '/psr/log/Psr/Log'), 'Psr\\Log\\' => array($vendorDir . '/psr/log/Psr/Log'),
'Psr\\Http\\Message\\' => array($vendorDir . '/psr/http-factory/src', $vendorDir . '/psr/http-message/src'), 'Psr\\Http\\Message\\' => array($vendorDir . '/psr/http-factory/src', $vendorDir . '/psr/http-message/src'),
'Psr\\Http\\Client\\' => array($vendorDir . '/psr/http-client/src'), 'Psr\\Http\\Client\\' => array($vendorDir . '/psr/http-client/src'),

View File

@ -51,6 +51,10 @@ class ComposerStaticInitc94a8368bcea9853dd31683be0b15c06
'Symfony\\Component\\ErrorHandler\\' => 31, 'Symfony\\Component\\ErrorHandler\\' => 31,
'Stevenmaguire\\OAuth2\\Client\\' => 28, 'Stevenmaguire\\OAuth2\\Client\\' => 28,
), ),
'R' =>
array (
'RenanBr\\BibTexParser\\' => 21,
),
'P' => 'P' =>
array ( array (
'Psr\\Log\\' => 8, 'Psr\\Log\\' => 8,
@ -159,6 +163,10 @@ class ComposerStaticInitc94a8368bcea9853dd31683be0b15c06
array ( array (
0 => __DIR__ . '/..' . '/stevenmaguire/oauth2-bitbucket/src', 0 => __DIR__ . '/..' . '/stevenmaguire/oauth2-bitbucket/src',
), ),
'RenanBr\\BibTexParser\\' =>
array (
0 => __DIR__ . '/..' . '/renanbr/bibtex-parser/src',
),
'Psr\\Log\\' => 'Psr\\Log\\' =>
array ( array (
0 => __DIR__ . '/..' . '/psr/log/Psr/Log', 0 => __DIR__ . '/..' . '/psr/log/Psr/Log',

View File

@ -1402,6 +1402,71 @@
}, },
"install-path": "../ralouphie/getallheaders" "install-path": "../ralouphie/getallheaders"
}, },
{
"name": "renanbr/bibtex-parser",
"version": "2.2.0",
"version_normalized": "2.2.0.0",
"source": {
"type": "git",
"url": "https://github.com/renanbr/bibtex-parser.git",
"reference": "d02d2426822235f5179ecdf635ba710c9d6d2ddd"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/renanbr/bibtex-parser/zipball/d02d2426822235f5179ecdf635ba710c9d6d2ddd",
"reference": "d02d2426822235f5179ecdf635ba710c9d6d2ddd",
"shasum": ""
},
"require": {
"php": ">=5.6.0"
},
"require-dev": {
"phpunit/phpunit": ">=5.7",
"ryakad/pandoc-php": "^1.0"
},
"suggest": {
"ryakad/pandoc-php": "Needed to support LaTeX decoder in class RenanBr\\BibTexParser\\Processor\\LatexToUnicodeProcessor",
"ueberdosis/pandoc": "Alternate Pandoc PHP package which (if available) will be preferred over ryakad/pandoc-php"
},
"time": "2023-08-25T11:21:46+00:00",
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.x-dev"
}
},
"installation-source": "dist",
"autoload": {
"psr-4": {
"RenanBr\\BibTexParser\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Renan de Lima Barbosa",
"email": "renandelima@gmail.com"
}
],
"description": "BibTex Parser provides an API to read .bib files programmatically",
"keywords": [
"Bibliography",
"bib",
"bibtex",
"citation",
"cite",
"latex",
"parser"
],
"support": {
"issues": "https://github.com/renanbr/bibtex-parser/issues",
"source": "https://github.com/renanbr/bibtex-parser/tree/2.2.0"
},
"install-path": "../renanbr/bibtex-parser"
},
{ {
"name": "stevenmaguire/oauth2-bitbucket", "name": "stevenmaguire/oauth2-bitbucket",
"version": "3.0.0", "version": "3.0.0",

View File

@ -220,6 +220,15 @@
'aliases' => array(), 'aliases' => array(),
'dev_requirement' => false, 'dev_requirement' => false,
), ),
'renanbr/bibtex-parser' => array(
'pretty_version' => '2.2.0',
'version' => '2.2.0.0',
'reference' => 'd02d2426822235f5179ecdf635ba710c9d6d2ddd',
'type' => 'library',
'install_path' => __DIR__ . '/../renanbr/bibtex-parser',
'aliases' => array(),
'dev_requirement' => false,
),
'stevenmaguire/oauth2-bitbucket' => array( 'stevenmaguire/oauth2-bitbucket' => array(
'pretty_version' => '3.0.0', 'pretty_version' => '3.0.0',
'version' => '3.0.0.0', 'version' => '3.0.0.0',

18
vendor/renanbr/bibtex-parser/LICENSE vendored Normal file
View File

@ -0,0 +1,18 @@
Copyright (c) 2017 Renan de Lima Barbosa
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

74
vendor/renanbr/bibtex-parser/Makefile vendored Normal file
View File

@ -0,0 +1,74 @@
ifeq ($(shell type podman > /dev/null 2>&1; echo $$?), 0)
ENGINE ?= podman
else ifeq ($(shell type docker > /dev/null 2>&1; echo $$?), 0)
ENGINE ?= docker
endif
PHP_VERSION ?= 7.4
IMAGE_BASE = jakzal/phpqa:php$(PHP_VERSION)
IMAGE = renanbr/bibtex-parser:php$(PHP_VERSION)
LABEL = maintainer=renanbr-bibtex-parser
RUN = $(ENGINE) run --init -it --rm -v "$(CURDIR):/project" -w /project
.DEFAULT_GOAL := help
help: ## Display this message help
@make -v | head -n 1
@awk '\
BEGIN {\
FS = ":.*##";\
printf "\n\033[33mUsage:\033[0m\n [PHP_VERSION=major.minor] make [target]\n\n\033[33mAvailable targets:\033[0m\n" \
} /^[a-zA-Z0-9_-]+:.*?##/ { \
printf " \033[32m%-18s\033[0m %s\n", $$1, $$2 \
} /^##/ { \
printf "\033[33m %s\033[0m\n", substr($$0, 4) \
}' $(MAKEFILE_LIST)
.PHONY: help
## Checks
check: static-analysis cs-check test ## Run all checks
static-analysis: vendor ## Run static analysis
$(RUN) $(IMAGE) phpstan analyse --verbose
.PHONY: static-analysis
cs-check: check-engine ## Check for coding standards violations
mkdir -p var
$(RUN) $(IMAGE_BASE) php-cs-fixer fix --dry-run --verbose
.PHONY: cs-check
test: vendor ## Run tests
$(RUN) $(IMAGE) php -d pcov.enabled=1 ./vendor/bin/phpunit --testdox --coverage-text --verbose
.PHONY: test
## Fixers
cs-fix: check-engine ## Fix coding standards
mkdir -p var
$(RUN) $(IMAGE_BASE) php-cs-fixer fix
.PHONY: cs-fix
## Misc
clean: check-engine ## Clean up workspace
$(RUN) $(IMAGE_BASE) rm -rf composer.lock var/ vendor/
$(ENGINE) image rm --force $$($(ENGINE) images --filter "label=$(LABEL)" --quiet) 2>&1 | true
.PHONY: clean
vendor: build-image
$(RUN) $(IMAGE) composer install -vvv
## Container engine
check-engine:
ifeq ($(ENGINE),)
$(error "Container engine not found. Did you install podman or docker?")
endif
.PHONY: check-engine
build-image: check-engine
$(ENGINE) build --tag $(IMAGE) --build-arg FROM=$(IMAGE_BASE) --label $(LABEL) .docker/
.PHONY: build-image

569
vendor/renanbr/bibtex-parser/README.md vendored Normal file
View File

@ -0,0 +1,569 @@
<h1 align="center">PHP BibTeX Parser 2.x</h1>
<p align="center">
This is a
<a href="https://tug.org/bibtex/">BibTeX</a>
parser written in
<a href="https://php.net">PHP</a>.
</p>
<p align="center">
<a href="https://tug.org/bibtex/">
<img src="https://upload.wikimedia.org/wikipedia/commons/3/30/BibTeX_logo.svg" height="83" alt="BibTeX logo">
</a>
<a href="https://php.net">
<img src="https://upload.wikimedia.org/wikipedia/commons/2/27/PHP-logo.svg" height="83" alt="PHP logo">
</a>
</p>
![Tests](https://github.com/renanbr/bibtex-parser/workflows/Tests/badge.svg)
[![codecov](https://codecov.io/gh/renanbr/bibtex-parser/branch/master/graph/badge.svg)](https://codecov.io/gh/renanbr/bibtex-parser)
![Static Analysis](https://github.com/renanbr/bibtex-parser/workflows/Static%20Analysis/badge.svg)
![Coding Standards](https://github.com/renanbr/bibtex-parser/workflows/Coding%20Standards/badge.svg)
You are browsing the documentation of **BibTeX Parser 2.x**, the latest version.
## Table of contents
* [Installing](#installing)
* [Usage](#usage)
* [Vocabulary](#vocabulary)
* [Processors](#processors)
* [Tag name case](#tag-name-case)
* [Authors and editors](#authors-and-editors)
* [Keywords](#keywords)
* [Date](#date)
* [Fill missing tag](#fill-missing-tag)
* [Trim tags](#trim-tags)
* [Determine URL from the DOI](#determine-url-from-the-doi)
* [LaTeX to unicode](#latex-to-unicode)
* [Custom](#custom)
* [Handling errors](#handling-errors)
* [Advanced usage](#advanced-usage)
## Installing
```bash
composer require renanbr/bibtex-parser
```
## Usage
```php
use RenanBr\BibTexParser\Listener;
use RenanBr\BibTexParser\Parser;
use RenanBr\BibTexParser\Processor;
require 'vendor/autoload.php';
$bibtex = <<<BIBTEX
@article{einstein1916relativity,
title={Relativity: The Special and General Theory},
author={Einstein, Albert},
year={1916}
}
BIBTEX;
// Create and configure a Listener
$listener = new Listener();
$listener->addProcessor(new Processor\TagNameCaseProcessor(CASE_LOWER));
// $listener->addProcessor(new Processor\NamesProcessor());
// $listener->addProcessor(new Processor\KeywordsProcessor());
// $listener->addProcessor(new Processor\DateProcessor());
// $listener->addProcessor(new Processor\FillMissingProcessor([/* ... */]));
// $listener->addProcessor(new Processor\TrimProcessor());
// $listener->addProcessor(new Processor\UrlFromDoiProcessor());
// $listener->addProcessor(new Processor\LatexToUnicodeProcessor());
// ... you can append as many Processors as you want
// Create a Parser and attach the listener
$parser = new Parser();
$parser->addListener($listener);
// Parse the content, then read processed data from the Listener
$parser->parseString($bibtex); // or parseFile('/path/to/file.bib')
$entries = $listener->export();
print_r($entries);
```
This will output:
```
Array
(
[0] => Array
(
[_type] => article
[citation-key] => einstein1916relativity
[title] => Relativity: The Special and General Theory
[author] => Einstein, Albert
[year] => 1916
)
)
```
## Vocabulary
[BibTeX] is all about "entry", "tag's name" and "tag's content".
> A [BibTeX] **entry** consists of the type (the word after @), a citation-key and a number of tags which define various characteristics of the specific [BibTeX] entry.
> (...) A [BibTeX] **tag** is specified by its **name** followed by an equals sign, and the **content**.
Source: http://www.bibtex.org/Format/
Note:
This library considers "type" and "citation-key" as tags.
This behavior can be changed [implementing your own Listener](#advanced-usage).
## Processors
`Processor` is a [callable] that receives an entry as argument and returns a modified entry.
This library contains three main parts:
- `Parser` class, responsible for detecting units inside a [BibTeX] input;
- `Listener` class, responsible for gathering units and transforming them into a list of entries;
- `Processor` classes, responsible for manipulating entries.
Despite you can't configure the `Parser`, you can append as many `Processor` as you want to the `Listener` through `Listener::addProcessor()` before exporting the contents.
Be aware that `Listener` provides, by default, these features:
- Found entries are reachable through `Listener::export()` method;
- [Tag content concatenation](http://www.bibtex.org/Format/);
- e.g. `hello # " world"` tag's content will generate `hello world` [string]
- [Tag content abbreviation handling](http://www.bibtex.org/Format/);
- e.g. `@string{foo="bar"} @misc{bar=foo}` will make `$entries[1]['bar']` assume `bar` as value
- Publication's type exposed as `_type` tag;
- Citation key exposed as `citation-key` tag;
- Original entry text exposed as `_original` tag.
This project ships some useful processors.
### Tag name case
In [BibTeX] the tag's names aren't case-sensitive.
This library exposes entries as [array], in which keys are case-sensitive.
To avoid this misunderstanding, you can force the tags' name character case using `TagNameCaseProcessor`.
<details><summary>Usage</summary>
```php
use RenanBr\BibTexParser\Processor\TagNameCaseProcessor;
$listener->addProcessor(new TagNameCaseProcessor(CASE_UPPER)); // or CASE_LOWER
```
```bib
@article{
title={BibTeX rocks}
}
```
```
Array
(
[0] => Array
(
[TYPE] => article
[TITLE] => BibTeX rocks
)
)
```
</details>
### Authors and editors
[BibTeX] recognizes four parts of an author's name: First Von Last Jr.
If you would like to parse the `author` and `editor` tags included in your entries, you can use the `NamesProcessor` class.
<details><summary>Usage</summary>
```php
use RenanBr\BibTexParser\Processor\NamesProcessor;
$listener->addProcessor(new NamesProcessor());
```
```bib
@article{
title={Relativity: The Special and General Theory},
author={Einstein, Albert}
}
```
```
Array
(
[0] => Array
(
[type] => article
[title] => Relativity: The Special and General Theory
[author] => Array
(
[0] => Array
(
[first] => Albert
[von] =>
[last] => Einstein
[jr] =>
)
)
)
)
```
</details>
### Keywords
The `keywords` tag contains a list of expressions represented as [string], you might want to read them as an [array] instead.
<details><summary>Usage</summary>
```php
use RenanBr\BibTexParser\Processor\KeywordsProcessor;
$listener->addProcessor(new KeywordsProcessor());
```
```bib
@misc{
title={The End of Theory: The Data Deluge Makes the Scientific Method Obsolete},
keywords={big data, data deluge, scientific method}
}
```
```
Array
(
[0] => Array
(
[type] => misc
[title] => The End of Theory: The Data Deluge Makes the Scientific Method Obsolete
[keywords] => Array
(
[0] => big data
[1] => data deluge
[2] => scientific method
)
)
)
```
</details>
### Date
It adds a new tag `_date` as [DateTimeImmutable].
This processor adds the new tag **if and only if** this the tags `month` and `year` are fulfilled.
<details><summary>Usage</summary>
```php
use RenanBr\BibTexParser\Processor\DateProcessor;
$listener->addProcessor(new DateProcessor());
```
```bib
@misc{
month="1~oct",
year=2000
}
```
```
Array
(
[0] => Array
(
[type] => misc
[month] => 1~oct
[year] => 2000
[_date] => DateTimeImmutable Object
(
[date] => 2000-10-01 00:00:00.000000
[timezone_type] => 3
[timezone] => UTC
)
)
)
```
</details>
### Fill missing tag
It puts a default value to some missing field.
<details><summary>Usage</summary>
```php
use RenanBr\BibTexParser\Processor\FillMissingProcessor;
$listener->addProcessor(new FillMissingProcessor([
'title' => 'This entry has no title',
'year' => 1970,
]));
```
```bib
@misc{
}
@misc{
title="I do exist"
}
```
```
Array
(
[0] => Array
(
[type] => misc
[title] => This entry has no title
[year] => 1970
)
[1] => Array
(
[type] => misc
[title] => I do exist
[year] => 1970
)
)
```
</details>
### Trim tags
Apply [trim()] to all tags.
<details><summary>Usage</summary>
```php
use RenanBr\BibTexParser\Processor\TrimProcessor;
$listener->addProcessor(new TrimProcessor());
```
```bib
@misc{
title=" too much space "
}
```
```
Array
(
[0] => Array
(
[type] => misc
[title] => too much space
)
)
```
</details>
### Determine URL from the DOI
Sets `url` tag with [DOI] if `doi` tag is present and `url` tag is missing.
<details><summary>Usage</summary>
```php
use RenanBr\BibTexParser\Processor\UrlFromDoiProcessor;
$listener->addProcessor(new UrlFromDoiProcessor());
```
```bib
@misc{
doi="qwerty"
}
@misc{
doi="azerty",
url="http://example.org"
}
```
```
Array
(
[0] => Array
(
[type] => misc
[doi] => qwerty
[url] => https://doi.org/qwerty
)
[1] => Array
(
[type] => misc
[doi] => azerty
[url] => http://example.org
)
)
```
</details>
### LaTeX to unicode
[BibTeX] files store [LaTeX] contents.
You might want to read them as unicode instead.
The `LatexToUnicodeProcessor` class solves this problem, but before adding the processor to the listener you must:
- [install Pandoc](http://pandoc.org/installing.html) in your system; and
- add [ryakad/pandoc-php](https://github.com/ryakad/pandoc-php) or [ueberdosis/pandoc](https://github.com/ueberdosis/pandoc) as a dependency of your project.
<details><summary>Usage</summary>
```php
use RenanBr\BibTexParser\Processor\LatexToUnicodeProcessor;
$listener->addProcessor(new LatexToUnicodeProcessor());
```
```bib
@article{
title={Caf\\'{e}s and bars}
}
```
```
Array
(
[0] => Array
(
[type] => article
[title] => Cafés and bars
)
)
```
</details>
Note: Order matters, add this processor as the last.
### Custom
The `Listener::addProcessor()` method expects a [callable] as argument.
In the example shown below, we append the text `with laser` to the `title` tags for all entries.
<details><summary>Usage</summary>
```php
$listener->addProcessor(static function (array $entry) {
$entry['title'] .= ' with laser';
return $entry;
});
```
```
@article{
title={BibTeX rocks}
}
```
```
Array
(
[0] => Array
(
[type] => article
[title] => BibTeX rocks with laser
)
)
```
</details>
## Handling errors
This library throws two types of exception: `ParserException` and `ProcessorException`.
The first one may happen during the data extraction.
When it occurs it probably means the parsed BibTeX isn't valid.
The second exception may happen during the data processing.
When it occurs it means the listener's processors can't handle properly the data found.
Both implement `ExceptionInterface`.
```php
use RenanBr\BibTexParser\Exception\ExceptionInterface;
use RenanBr\BibTexParser\Exception\ParserException;
use RenanBr\BibTexParser\Exception\ProcessorException;
try {
// ... parser and listener configuration
$parser->parseFile('/path/to/file.bib');
$entries = $listener->export();
} catch (ParserException $exception) {
// The BibTeX isn't valid
} catch (ProcessorException $exception) {
// Listener's processors aren't able to handle data found
} catch (ExceptionInterface $exception) {
// Alternatively, you can use this exception to catch all of them at once
}
```
## Advanced usage
The core of this library contains these main classes:
- `RenanBr\BibTexParser\Parser` responsible for detecting units inside a [BibTeX] input;
- `RenanBr\BibTexParser\ListenerInterface` responsible for treating units found.
You can attach listeners to the parser through `Parser::addListener()`.
The parser is able to detect [BibTeX] units, such as "type", "tag's name", "tag's content".
As the parser finds a unit, it triggers the listeners attached to it.
You can code your own listener! All you have to do is handle units.
```php
namespace RenanBr\BibTexParser;
interface ListenerInterface
{
/**
* Called when an unit is found.
*
* @param string $text The original content of the unit found.
* Escape character will not be sent.
* @param string $type The type of unit found.
* It can assume one of Parser's constant value.
* @param array $context Contains details of the unit found.
*/
public function bibTexUnitFound($text, $type, array $context);
}
```
`$type` may assume one of these values:
- `Parser::TYPE`
- `Parser::CITATION_KEY`
- `Parser::TAG_NAME`
- `Parser::RAW_TAG_CONTENT`
- `Parser::BRACED_TAG_CONTENT`
- `Parser::QUOTED_TAG_CONTENT`
- `Parser::ENTRY`
`$context` is an [array] with these keys:
- `offset` contains the `$text`'s beginning position.
It may be useful, for example, to [seek on a file pointer](https://php.net/fseek);
- `length` contains the original `$text`'s length.
It may differ from [string] length sent to the listener because may there are escaped characters.
[BibTeX]: https://tug.org/bibtex/
[DOI]: https://www.doi.org/
[DateTimeImmutable]: https://www.php.net/manual/class.datetimeimmutable.php
[LaTeX]: https://www.latex-project.org/
[array]: https://php.net/manual/language.types.array.php
[callable]: https://php.net/manual/en/language.types.callable.php
[string]: https://php.net/manual/language.types.string.php
[trim()]: https://www.php.net/trim

View File

@ -0,0 +1,50 @@
{
"name": "renanbr/bibtex-parser",
"type": "library",
"description": "BibTex Parser provides an API to read .bib files programmatically",
"keywords": [
"bib",
"bibtex",
"latex",
"parser",
"bibliography",
"citation",
"cite"
],
"license": "MIT",
"authors": [
{
"name": "Renan de Lima Barbosa",
"email": "renandelima@gmail.com"
}
],
"require": {
"php": ">=5.6.0"
},
"require-dev": {
"phpunit/phpunit": ">=5.7",
"ryakad/pandoc-php": "^1.0"
},
"suggest": {
"ryakad/pandoc-php": "Needed to support LaTeX decoder in class RenanBr\\BibTexParser\\Processor\\LatexToUnicodeProcessor",
"ueberdosis/pandoc": "Alternate Pandoc PHP package which (if available) will be preferred over ryakad/pandoc-php"
},
"config": {
"sort-packages": true
},
"extra": {
"branch-alias": {
"dev-master": "2.x-dev"
}
},
"autoload": {
"psr-4": {
"RenanBr\\BibTexParser\\": "src/"
}
},
"autoload-dev": {
"psr-4": {
"RenanBr\\BibTexParser\\Test\\": "tests/"
}
}
}

View File

@ -0,0 +1,19 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Exception;
/**
* Interface for package exceptions.
*/
interface ExceptionInterface
{
}

View File

@ -0,0 +1,35 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Exception;
use Exception;
class ParserException extends Exception implements ExceptionInterface
{
/**
* @param string $character
* @param int $line
* @param int $column
*/
public static function unexpectedCharacter($character, $line, $column)
{
// Avoid var_export() weird treatment for \0
$character = "\0" === $character ? "'\\0'" : var_export($character, true);
return new self(sprintf(
'Unexpected character %s at line %d column %d',
$character,
$line,
$column
));
}
}

View File

@ -0,0 +1,18 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Exception;
use Exception;
class ProcessorException extends Exception implements ExceptionInterface
{
}

View File

@ -0,0 +1,109 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser;
class Listener implements ListenerInterface
{
/** @var array */
private $entries = [];
/**
* Current tag name.
*
* Indicates where to save contents when triggered by the parser.
*
* @var string
*/
private $currentTagName;
/** @var array */
private $processors = [];
/** @var array */
private $processed = [];
/**
* @return array all entries found during parsing process
*/
public function export()
{
$offset = \count($this->processed);
$missing = \array_slice($this->entries, $offset);
foreach ($this->processors as $processor) {
$missing = array_filter(array_map($processor, $missing));
}
$this->processed = array_merge($this->processed, $missing);
return $this->processed;
}
/**
* @param callable $processor Function to be applied to every BibTeX entry.
* The processor given must return the modified entry.
* Processors will be applied in the same order in which they were added.
* The suggested signature is:
* function (array $entry): array
*/
public function addProcessor(callable $processor)
{
$this->processors[] = $processor;
}
public function bibTexUnitFound($text, $type, array $context)
{
switch ($type) {
case Parser::TYPE:
// Starts a new entry
$this->entries[] = [
'_type' => $text,
'type' => $text, // compatibility
];
break;
case Parser::CITATION_KEY:
$index = \count($this->entries) - 1;
$this->entries[$index]['citation-key'] = $text;
break;
case Parser::TAG_NAME:
// Saves tag into the current entry
$index = \count($this->entries) - 1;
$this->currentTagName = $text;
$this->entries[$index][$this->currentTagName] = null;
break;
case Parser::RAW_TAG_CONTENT:
// Searches for an abbreviation
foreach ($this->entries as $entry) {
if ('string' === $entry['type'] && \array_key_exists($text, $entry)) {
$text = $entry[$text];
break;
}
}
// no break
case Parser::BRACED_TAG_CONTENT:
case Parser::QUOTED_TAG_CONTENT:
// Appends content into the current tag
if (null !== $text) {
$index = \count($this->entries) - 1;
$this->entries[$index][$this->currentTagName] .= $text;
}
break;
case Parser::ENTRY:
$index = \count($this->entries) - 1;
$this->entries[$index]['_original'] = $text;
break;
}
}
}

View File

@ -0,0 +1,26 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser;
interface ListenerInterface
{
/**
* Called when an unit is found.
*
* @param string $text The original content of the unit found.
* Escape character will not be sent.
* @param string $type The type of unit found.
* It can assume one of Parser's constant value.
* @param array $context contains details of the unit found
*/
public function bibTexUnitFound($text, $type, array $context);
}

View File

@ -0,0 +1,566 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser;
use ErrorException;
use RenanBr\BibTexParser\Exception\ParserException;
class Parser
{
const TYPE = 'type';
const CITATION_KEY = 'citation_key';
const TAG_NAME = 'tag_name';
const RAW_TAG_CONTENT = 'raw_tag_content';
const BRACED_TAG_CONTENT = 'braced_tag_content';
const QUOTED_TAG_CONTENT = 'quoted_tag_content';
const ENTRY = 'entry';
const NONE = 'none';
const COMMENT = 'comment';
const FIRST_TAG_NAME = 'first_tag_name';
const POST_TYPE = 'post_type';
const POST_TAG_NAME = 'post_tag_name';
const PRE_TAG_CONTENT = 'pre_tag_content';
/** @var string */
private $state;
/** @var string */
private $buffer;
/** @var int|null */
private $bufferOffset;
/** @var array|null */
private $firstTagSnapshot;
/** @var string|null */
private $originalEntryBuffer;
/** @var int|null */
private $originalEntryOffset;
/** @var bool */
private $skipOriginalEntryReading;
/** @var int */
private $line;
/** @var int */
private $column;
/** @var int */
private $offset;
/** @var bool */
private $isTagContentEscaped;
/** @var bool */
private $mayConcatenateTagContent;
/** @var string|null */
private $tagContentDelimiter;
/** @var int */
private $braceLevel;
/** @var ListenerInterface[] */
private $listeners = [];
public function addListener(ListenerInterface $listener)
{
$this->listeners[] = $listener;
}
/**
* @param string $file
*
* @throws ParserException if $file given is not a valid BibTeX
* @throws ErrorException if $file given is not readable
*/
public function parseFile($file)
{
$handle = @fopen($file, 'r');
if (!$handle) {
throw new ErrorException(sprintf('Unable to open %s', $file));
}
try {
$this->reset();
while (!feof($handle)) {
$buffer = fread($handle, 128);
$this->parse($buffer);
}
$this->throwExceptionIfReadingEntry("\0");
} finally {
fclose($handle);
}
}
/**
* @param string $string
*
* @throws ParserException if $string given is not a valid BibTeX
*/
public function parseString($string)
{
$this->reset();
$this->parse($string);
$this->throwExceptionIfReadingEntry("\0");
}
/**
* @param string $text
*/
private function parse($text)
{
$length = mb_strlen($text);
for ($position = 0; $position < $length; ++$position) {
$char = mb_substr($text, $position, 1);
$this->read($char);
if ("\n" === $char) {
++$this->line;
$this->column = 1;
} else {
++$this->column;
}
++$this->offset;
}
}
private function reset()
{
$this->state = self::NONE;
$this->buffer = '';
$this->firstTagSnapshot = null;
$this->originalEntryBuffer = null;
$this->originalEntryOffset = null;
$this->skipOriginalEntryReading = false;
$this->line = 1;
$this->column = 1;
$this->offset = 0;
$this->mayConcatenateTagContent = false;
$this->isTagContentEscaped = false;
$this->tagContentDelimiter = null;
$this->braceLevel = 0;
}
// ----- Readers -----------------------------------------------------------
/**
* @param string $char
*/
private function read($char)
{
$previousState = $this->state;
switch ($this->state) {
case self::NONE:
$this->readNone($char);
break;
case self::COMMENT:
$this->readComment($char);
break;
case self::TYPE:
$this->readType($char);
break;
case self::POST_TYPE:
$this->readPostType($char);
break;
case self::FIRST_TAG_NAME:
case self::TAG_NAME:
$this->readTagName($char);
break;
case self::POST_TAG_NAME:
$this->readPostTagName($char);
break;
case self::PRE_TAG_CONTENT:
$this->readPreTagContent($char);
break;
case self::RAW_TAG_CONTENT:
$this->readRawTagContent($char);
break;
case self::QUOTED_TAG_CONTENT:
case self::BRACED_TAG_CONTENT:
$this->readDelimitedTagContent($char);
break;
}
$this->readOriginalEntry($char, $previousState);
}
/**
* @param string $char
*/
private function readNone($char)
{
if ('@' === $char) {
$this->state = self::TYPE;
} elseif (!$this->isWhitespace($char)) {
$this->state = self::COMMENT;
}
}
/**
* @param string $char
*/
private function readComment($char)
{
if ($this->isWhitespace($char)) {
$this->state = self::NONE;
}
}
/**
* @param string $char
*/
private function readType($char)
{
if (preg_match('/^[a-zA-Z]$/', $char)) {
$this->appendToBuffer($char);
} else {
$this->throwExceptionIfBufferIsEmpty($char);
// Skips @comment type
if ('comment' === mb_strtolower($this->buffer)) {
$this->skipOriginalEntryReading = true;
$this->buffer = '';
$this->bufferOffset = null;
$this->state = self::COMMENT;
$this->readComment($char);
return;
}
$this->triggerListenersWithCurrentBuffer();
// once $char isn't a valid character
// it must be interpreted as POST_TYPE
$this->state = self::POST_TYPE;
$this->readPostType($char);
}
}
/**
* @param string $char
*/
private function readPostType($char)
{
if ('{' === $char) {
$this->state = self::FIRST_TAG_NAME;
} elseif (!$this->isWhitespace($char)) {
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
}
}
/**
* @param string $char
*/
private function readTagName($char)
{
if (preg_match('/^[a-zA-Z0-9_\+:\-\.\/\x{00C0}-\x{01FF}]$/u', $char)) {
$this->appendToBuffer($char);
} elseif ($this->isWhitespace($char) && empty($this->buffer)) {
// Skips because we didn't start reading
} elseif ('}' === $char && empty($this->buffer)) {
// No tag name found, $char is just closing current entry
$this->state = self::NONE;
} else {
$this->throwExceptionIfBufferIsEmpty($char);
if (self::FIRST_TAG_NAME === $this->state) {
// Takes a snapshot of current state to be triggered later as
// tag name or citation key, see readPostTagName()
$this->firstTagSnapshot = $this->takeBufferSnapshot();
} else {
// Current buffer is a simple tag name
$this->triggerListenersWithCurrentBuffer();
}
// Once $char isn't a valid tag name character, it must be
// interpreted as post tag name
$this->state = self::POST_TAG_NAME;
$this->readPostTagName($char);
}
}
/**
* @param string $char
*/
private function readPostTagName($char)
{
if ('=' === $char) {
// First tag name isn't a citation key, because it has content
$this->triggerListenersWithFirstTagSnapshotAs(self::TAG_NAME);
$this->state = self::PRE_TAG_CONTENT;
} elseif ('}' === $char) {
// First tag name is a citation key, because $char closes entry and
// lets first tag without value
$this->triggerListenersWithFirstTagSnapshotAs(self::CITATION_KEY);
$this->state = self::NONE;
} elseif (',' === $char) {
// First tag name is a citation key, because $char moves to the next
// tag and lets first tag without value
$this->triggerListenersWithFirstTagSnapshotAs(self::CITATION_KEY);
$this->state = self::TAG_NAME;
} elseif (!$this->isWhitespace($char)) {
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
}
}
/**
* @param string $char
*/
private function readPreTagContent($char)
{
if (preg_match('/^[a-zA-Z0-9]$/', $char)) {
// When concatenation is available it means there is already a
// defined value, and parser expect a concatenator, a tag separator
// or an entry closing char as next $char
$this->throwExceptionAccordingToConcatenationAvailability($char, true);
$this->state = self::RAW_TAG_CONTENT;
$this->readRawTagContent($char);
} elseif ('"' === $char) {
// The exception is here for the same reason of the first case
$this->throwExceptionAccordingToConcatenationAvailability($char, true);
$this->tagContentDelimiter = '"';
$this->state = self::QUOTED_TAG_CONTENT;
} elseif ('{' === $char) {
// The exception is here for the same reason of the first case
$this->throwExceptionAccordingToConcatenationAvailability($char, true);
$this->tagContentDelimiter = '}';
$this->state = self::BRACED_TAG_CONTENT;
} elseif ('#' === $char) {
$this->throwExceptionAccordingToConcatenationAvailability($char, false);
$this->mayConcatenateTagContent = false;
} elseif (',' === $char) {
$this->throwExceptionAccordingToConcatenationAvailability($char, false);
$this->mayConcatenateTagContent = false;
$this->state = self::TAG_NAME;
} elseif ('}' === $char) {
$this->throwExceptionAccordingToConcatenationAvailability($char, false);
$this->mayConcatenateTagContent = false;
$this->state = self::NONE;
} elseif (!$this->isWhitespace($char)) {
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
}
}
/**
* @param string $char
*/
private function readRawTagContent($char)
{
if (preg_match('/^[a-zA-Z0-9_\+:\-\.\/]$/', $char)) {
$this->appendToBuffer($char);
} else {
$this->throwExceptionIfBufferIsEmpty($char);
$this->triggerListenersWithCurrentBuffer();
// once $char isn't a valid character
// it must be interpreted as TAG_CONTENT
$this->mayConcatenateTagContent = true;
$this->state = self::PRE_TAG_CONTENT;
$this->readPreTagContent($char);
}
}
/**
* @param string $char
*/
private function readDelimitedTagContent($char)
{
if ($this->isTagContentEscaped) {
$this->isTagContentEscaped = false;
if ($this->tagContentDelimiter !== $char && '\\' !== $char && '%' !== $char) {
$this->appendToBuffer('\\');
}
$this->appendToBuffer($char);
} elseif ('}' === $this->tagContentDelimiter && '{' === $char) {
++$this->braceLevel;
$this->appendToBuffer($char);
} elseif ($this->tagContentDelimiter === $char) {
if (0 === $this->braceLevel) {
$this->triggerListenersWithCurrentBuffer();
$this->mayConcatenateTagContent = true;
$this->state = self::PRE_TAG_CONTENT;
} else {
--$this->braceLevel;
$this->appendToBuffer($char);
}
} elseif ('\\' === $char) {
$this->isTagContentEscaped = true;
} else {
$this->appendToBuffer($char);
}
}
/**
* @param string $char
* @param string $previousState
*/
private function readOriginalEntry($char, $previousState)
{
if ($this->skipOriginalEntryReading) {
$this->originalEntryBuffer = '';
$this->originalEntryOffset = null;
$this->skipOriginalEntryReading = false;
return;
}
// Checks whether we are reading an entry character or not
$isPreviousStateEntry = $this->isEntryState($previousState);
$isCurrentStateEntry = $this->isEntryState($this->state);
$isEntry = $isPreviousStateEntry || $isCurrentStateEntry;
if (!$isEntry) {
return;
}
// Appends $char to the original entry buffer
if (empty($this->originalEntryBuffer)) {
$this->originalEntryOffset = $this->offset;
}
$this->originalEntryBuffer .= $char;
// Sends original entry to the listeners when $char closes an entry
$isClosingEntry = $isPreviousStateEntry && !$isCurrentStateEntry;
if ($isClosingEntry) {
$this->triggerListeners($this->originalEntryBuffer, self::ENTRY, [
'offset' => $this->originalEntryOffset,
'length' => $this->offset - $this->originalEntryOffset + 1,
]);
$this->originalEntryBuffer = '';
$this->originalEntryOffset = null;
}
}
// ----- Listener triggers -------------------------------------------------
/**
* @param string $text
* @param string $type
*/
private function triggerListeners($text, $type, array $context)
{
foreach ($this->listeners as $listener) {
$listener->bibTexUnitFound($text, $type, $context);
}
}
private function triggerListenersWithCurrentBuffer()
{
$snapshot = $this->takeBufferSnapshot();
$text = $snapshot['text'];
$context = $snapshot['context'];
$this->triggerListeners($text, $this->state, $context);
}
/**
* @param string $type
*/
private function triggerListenersWithFirstTagSnapshotAs($type)
{
if (empty($this->firstTagSnapshot)) {
return;
}
$text = $this->firstTagSnapshot['text'];
$context = $this->firstTagSnapshot['context'];
$this->firstTagSnapshot = null;
$this->triggerListeners($text, $type, $context);
}
// ----- Buffer tools ------------------------------------------------------
/**
* @param string $char
*/
private function appendToBuffer($char)
{
if (empty($this->buffer)) {
$this->bufferOffset = $this->offset;
}
$this->buffer .= $char;
}
/**
* @return array
*/
private function takeBufferSnapshot()
{
$snapshot = [
'text' => $this->buffer,
'context' => [
'offset' => $this->bufferOffset,
'length' => $this->offset - $this->bufferOffset,
],
];
$this->bufferOffset = null;
$this->buffer = '';
return $snapshot;
}
// ----- Exception throwers ------------------------------------------------
/**
* @param string $char
* @param bool $availability
*/
private function throwExceptionAccordingToConcatenationAvailability($char, $availability)
{
if ($availability === $this->mayConcatenateTagContent) {
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
}
}
/**
* @param string $char
*/
private function throwExceptionIfBufferIsEmpty($char)
{
if (empty($this->buffer)) {
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
}
}
/**
* @param string $char
*/
private function throwExceptionIfReadingEntry($char)
{
if ($this->isEntryState($this->state)) {
throw ParserException::unexpectedCharacter($char, $this->line, $this->column);
}
}
// ----- Auxiliaries -------------------------------------------------------
/**
* @param string $state
*
* @return bool
*/
private function isEntryState($state)
{
return self::NONE !== $state && self::COMMENT !== $state;
}
/**
* @param string $char
*
* @return bool
*/
private function isWhitespace($char)
{
return ' ' === $char || "\t" === $char || "\n" === $char || "\r" === $char;
}
}

View File

@ -0,0 +1,58 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
use DateTimeImmutable;
class DateProcessor
{
use TagSearchTrait;
const TAG_NAME = '_date';
/**
* @var string
*/
private $tagName;
/**
* @param string $tagName
*/
public function __construct($tagName = null)
{
$this->tagName = $tagName ?: self::TAG_NAME;
}
/**
* @return array
*/
public function __invoke(array $entry)
{
$yearTag = $this->tagSearch('year', array_keys($entry));
$monthTag = $this->tagSearch('month', array_keys($entry));
if (null !== $yearTag && null !== $monthTag) {
$year = (int) $entry[$yearTag];
$monthArray = explode('~', $entry[$monthTag]);
if (2 === \count($monthArray)) {
list($day, $month) = $monthArray;
$day = (int) $day;
$dateMonthNumber = date_parse($month);
$month = $dateMonthNumber['month'] ?: null;
if (checkdate($month, $day, $year)) {
$timestamp = mktime(0, 0, 0, $month, $day, $year);
$entry[$this->tagName] = new DateTimeImmutable(date('Y-m-d', $timestamp), new \DateTimeZone('UTC'));
}
}
}
return $entry;
}
}

View File

@ -0,0 +1,40 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
class FillMissingProcessor
{
use TagSearchTrait;
/**
* @var array
*/
protected $missingFields;
public function __construct(array $missingFields)
{
$this->missingFields = $missingFields;
}
public function __invoke(array $entry)
{
$tags = array_keys($entry);
foreach ($this->missingFields as $tag => $value) {
if (!$this->tagSearch($tag, $tags)) {
$entry[$tag] = $value;
}
}
return $entry;
}
}

View File

@ -0,0 +1,38 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
/**
* Splits tags contents as array.
*/
class KeywordsProcessor
{
use TagCoverageTrait;
public function __construct()
{
$this->setTagCoverage(['keywords']);
}
/**
* @return array
*/
public function __invoke(array $entry)
{
$covered = $this->getCoveredTags(array_keys($entry));
foreach ($covered as $tag) {
$entry[$tag] = preg_split('/, |; /', $entry[$tag]);
}
return $entry;
}
}

View File

@ -0,0 +1,104 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
use Composer\InstalledVersions;
use Exception;
use Pandoc\Pandoc;
use RenanBr\BibTexParser\Exception\ProcessorException;
use RuntimeException;
/**
* Translates LaTeX texts to unicode.
*/
class LatexToUnicodeProcessor
{
use TagCoverageTrait;
/** @var (callable(string): string)|null */
private $converter;
/**
* @return array
*/
public function __invoke(array $entry)
{
$covered = $this->getCoveredTags(array_keys($entry));
foreach ($covered as $tag) {
// Translate string
if (\is_string($entry[$tag])) {
$entry[$tag] = $this->decode($entry[$tag]);
continue;
}
// Translate array
if (\is_array($entry[$tag])) {
array_walk_recursive($entry[$tag], function (&$text) {
if (\is_string($text)) {
$text = $this->decode($text);
}
});
}
}
return $entry;
}
/**
* @param mixed $text
*
* @return string
*/
private function decode($text)
{
try {
return \call_user_func($this->getConverter(), $text);
} catch (Exception $exception) {
throw new ProcessorException(sprintf('Error while processing LaTeX to Unicode: %s', $exception->getMessage()), 0, $exception);
}
}
/**
* @return (callable(string): string)
*/
private function getConverter()
{
if ($this->converter) {
return $this->converter;
}
if (InstalledVersions::isInstalled('ueberdosis/pandoc')) {
$pandoc = new Pandoc();
return $this->converter = static function ($text) use ($pandoc) {
// @phpstan-ignore-next-line
return mb_substr($pandoc->input($text)->execute([
'--from', 'latex',
'--to', 'plain',
'--wrap', 'none',
]), 0, -1);
};
} elseif (InstalledVersions::isInstalled('ryakad/pandoc-php')) {
$pandoc = new Pandoc();
return $this->converter = static function ($text) use ($pandoc) {
return $pandoc->runWith($text, [
'from' => 'latex',
'to' => 'plain',
'wrap' => 'none',
]);
};
}
throw new RuntimeException('Pandoc wrapper not installed. Try running "composer require ueberdosis/pandoc"');
}
}

View File

@ -0,0 +1,243 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
use RenanBr\BibTexParser\Exception\ProcessorException;
/**
* Splits names in four parts: First Von Last Jr.
*
* This class includes source code adapted from the Structures_BibTex package,
* (c) Elmar Pitschke <elmar.pitschke@gmx.de>, included here under PHP license:
* http://www.php.net/license/3_0.txt
*
* @author Andre Chalom <andrechalom@gmail.com>
*
* @see https://github.com/pear/Structures_BibTex
*/
class NamesProcessor
{
use TagCoverageTrait;
public function __construct()
{
$this->setTagCoverage(['author', 'editor']);
}
/**
* @return array
*/
public function __invoke(array $entry)
{
$covered = $this->getCoveredTags(array_keys($entry));
foreach ($covered as $tag) {
$entry[$tag] = $this->extractAuthors($entry[$tag]);
}
return $entry;
}
/**
* Extracting the authors.
*
* @param string $entry The entry with the authors
*
* @return array the extracted authors
*
* @author Elmar Pitschke <elmar.pitschke@gmx.de>
*/
private function extractAuthors($entry)
{
// Sanitizes the entry to remove unwanted whitespace
$entry = trim(preg_replace('/\s+/', ' ', $entry));
$authorarray = [];
$authorarray = explode(' and ', $entry);
for ($i = 0; $i < \count($authorarray); ++$i) {
$author = trim($authorarray[$i]);
/*The first version of how an author could be written (First von Last)
has no commas in it*/
$first = '';
$von = '';
$last = '';
$jr = '';
if (false === mb_strpos($author, ',')) {
$tmparray = [];
$tmparray = preg_split('/[\s\~]/', $author);
$size = \count($tmparray);
if (1 === $size) { //There is only a last
$last = $tmparray[0];
} elseif (2 === $size) { //There is a first and a last
$first = $tmparray[0];
$last = $tmparray[1];
} else {
$invon = false;
$inlast = false;
for ($j = 0; $j < ($size - 1); ++$j) {
if ($inlast) {
$last .= ' '.$tmparray[$j];
} elseif ($invon) {
try {
$case = $this->determineCase($tmparray[$j]);
if ((0 === $case) || (-1 === $case)) { //Change from von to last
//You only change when there is no more lower case there
$islast = true;
for ($k = ($j + 1); $k < ($size - 1); ++$k) {
try {
$futurecase = $this->determineCase($tmparray[$k]);
if (0 === $futurecase) {
$islast = false;
}
} catch (ProcessorException $sbe) {
// Ignore
}
}
if ($islast) {
$inlast = true;
if (-1 === $case) { //Caseless belongs to the last
$last .= ' '.$tmparray[$j];
} else {
$von .= ' '.$tmparray[$j];
}
} else {
$von .= ' '.$tmparray[$j];
}
} else {
$von .= ' '.$tmparray[$j];
}
} catch (ProcessorException $sbe) {
// Ignore
}
} else {
try {
$case = $this->determineCase($tmparray[$j]);
if (0 === $case) { //Change from first to von
$invon = true;
$von .= ' '.$tmparray[$j];
} else {
$first .= ' '.$tmparray[$j];
}
} catch (ProcessorException $sbe) {
// Ignore
}
}
}
//The last entry is always the last!
$last .= ' '.$tmparray[$size - 1];
}
} else { //Version 2 and 3
$tmparray = [];
$tmparray = explode(',', $author);
//The first entry must contain von and last
$vonlastarray = [];
$vonlastarray = explode(' ', $tmparray[0]);
$size = \count($vonlastarray);
if (1 === $size) { //Only one entry->got to be the last
$last = $vonlastarray[0];
} else {
$inlast = false;
for ($j = 0; $j < ($size - 1); ++$j) {
if ($inlast) {
$last .= ' '.$vonlastarray[$j];
} else {
if (0 !== ($this->determineCase($vonlastarray[$j]))) { //Change from von to last
$islast = true;
for ($k = ($j + 1); $k < ($size - 1); ++$k) {
try {
$case = $this->determineCase($vonlastarray[$k]);
if (0 === $case) {
$islast = false;
}
} catch (ProcessorException $sbe) {
// Ignore
}
}
if ($islast) {
$inlast = true;
$last .= ' '.$vonlastarray[$j];
} else {
$von .= ' '.$vonlastarray[$j];
}
} else {
$von .= ' '.$vonlastarray[$j];
}
}
}
$last .= ' '.$vonlastarray[$size - 1];
}
//Now we check if it is version three (three entries in the array (two commas)
if (3 === \count($tmparray)) {
$jr = $tmparray[1];
}
//Everything in the last entry is first
$first = $tmparray[\count($tmparray) - 1];
}
$authorarray[$i] = ['first' => trim($first), 'von' => trim($von), 'last' => trim($last), 'jr' => trim($jr)];
}
return $authorarray;
}
/**
* Case Determination according to the needs of BibTex.
*
* To parse the Author(s) correctly a determination is needed
* to get the Case of a word. There are three possible values:
* - Upper Case (return value 1)
* - Lower Case (return value 0)
* - Caseless (return value -1)
*
* @param string $word
*
* @throws ProcessorException
*
* @return int The Case
*
* @author Elmar Pitschke <elmar.pitschke@gmx.de>
*/
private function determineCase($word)
{
$ret = -1;
$trimmedword = trim($word);
/*We need this variable. Without the next of would not work
(trim changes the variable automatically to a string!)*/
if (\is_string($word) && (mb_strlen($trimmedword) > 0)) {
$i = 0;
$found = false;
$openbrace = 0;
while (!$found && ($i <= mb_strlen($word))) {
$letter = mb_substr($trimmedword, $i, 1);
$ord = \ord($letter);
if (123 === $ord) { //Open brace
++$openbrace;
}
if (125 === $ord) { //Closing brace
--$openbrace;
}
if (($ord >= 65) && ($ord <= 90) && (0 === $openbrace)) { //The first character is uppercase
$ret = 1;
$found = true;
} elseif (($ord >= 97) && ($ord <= 122) && (0 === $openbrace)) { //The first character is lowercase
$ret = 0;
$found = true;
} else { //Not yet found
++$i;
}
}
} else {
throw new ProcessorException('Could not determine case on word: '.$word);
}
return $ret;
}
}

View File

@ -0,0 +1,63 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
trait TagCoverageTrait
{
use TagSearchTrait;
/** @var array */
private $tagCoverageList = [
'_original',
'_type',
];
/** @var string */
private $tagCoverageStrategy = 'blacklist';
/**
* @param array $tags List of tags to be covered
* @param string $strategy Can assume "whitelist" (default) or "blacklist"
*/
public function setTagCoverage($tags, $strategy = null)
{
$this->tagCoverageList = $tags;
$this->tagCoverageStrategy = $strategy ?: 'whitelist';
}
/**
* Calculates which tags are covered.
*
* The search performed internally is case-insensitive.
*
* @return array
*/
protected function getCoveredTags(array $tags)
{
// Finds for actual tag names
$matched = [];
foreach ($this->tagCoverageList as $original) {
$actual = $this->tagSearch($original, $tags);
if (null !== $actual) {
$matched[] = $actual;
}
}
// Whitelist
if ('whitelist' === $this->tagCoverageStrategy) {
return $matched;
}
// Blacklist
return array_values(array_diff($tags, $matched));
}
}

View File

@ -0,0 +1,37 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
/**
* Change the case of all tag names.
*/
class TagNameCaseProcessor
{
/** @var int */
private $case;
/**
* @param int $case
*/
public function __construct($case)
{
$this->case = $case;
}
/**
* @return array
*/
public function __invoke(array $entry)
{
return array_change_key_case($entry, $this->case);
}
}

View File

@ -0,0 +1,35 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
trait TagSearchTrait
{
/**
* Searches for the actual name of a tag.
*
* The search performed is case-insensitive.
*
* @param string $needle
*
* @return string|null
*/
protected function tagSearch($needle, array $haystack)
{
foreach ($haystack as $actual) {
if (0 === strcasecmp($needle, $actual)) {
return $actual;
}
}
return null;
}
}

View File

@ -0,0 +1,58 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
/**
* @author Florent DESPIERRES <florent@despierres.pro>
*/
class TrimProcessor
{
use TagCoverageTrait;
public function __construct(array $fields = null)
{
if ($fields) {
$this->setTagCoverage($fields);
}
}
/**
* @return array
*/
public function __invoke(array $entry)
{
$covered = $this->getCoveredTags(array_keys($entry));
foreach ($covered as $tag) {
$entry[$tag] = $this->trim($entry[$tag]);
}
return $entry;
}
private function trim($value)
{
if (\is_array($value)) {
$trimmed = [];
foreach ($value as $key => $subValue) {
$trimmed[$key] = $this->trim($subValue);
}
return $trimmed;
}
if (\is_string($value)) {
return trim($value);
}
return $value;
}
}

View File

@ -0,0 +1,49 @@
<?php
/*
* This file is part of the BibTex Parser.
*
* (c) Renan de Lima Barbosa <renandelima@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace RenanBr\BibTexParser\Processor;
class UrlFromDoiProcessor
{
use TagSearchTrait;
const FORMAT = 'https://doi.org/%s';
/**
* @var string
*/
private $urlFormat;
/**
* @param string $urlFormat
*/
public function __construct($urlFormat = null)
{
$this->urlFormat = $urlFormat ?: self::FORMAT;
}
/**
* @return array
*/
public function __invoke(array $entry)
{
$doiTag = $this->tagSearch('doi', array_keys($entry));
$urlTag = $this->tagSearch('url', array_keys($entry));
if (null === $urlTag && null !== $doiTag) {
$doiValue = $entry[$doiTag];
if (\is_string($doiValue) && '' !== $doiValue) {
$entry['url'] = sprintf($this->urlFormat, $doiValue);
}
}
return $entry;
}
}