MarkDoc/lib/ParsedownExtra.php

539 lines
13 KiB
PHP
Raw Permalink Normal View History

2021-01-01 12:08:59 +01:00
<?php
#
#
# Parsedown Extra
# https://github.com/erusev/parsedown-extra
#
# (c) Emanuil Rusev
# http://erusev.com
#
# For the full license information, view the LICENSE file that was distributed
# with this source code.
#
#
class ParsedownExtra extends Parsedown
{
# ~
const version = '0.8.1';
# ~
function __construct()
{
if (version_compare(parent::version, '1.7.4') < 0)
{
throw new Exception('ParsedownExtra requires a later version of Parsedown');
}
$this->BlockTypes[':'] []= 'DefinitionList';
$this->BlockTypes['*'] []= 'Abbreviation';
# identify footnote definitions before reference definitions
array_unshift($this->BlockTypes['['], 'Footnote');
# identify footnote markers before before links
array_unshift($this->InlineTypes['['], 'FootnoteMarker');
}
#
# ~
function text($text)
{
$markup = parent::text($text);
# merge consecutive dl elements
$markup = preg_replace('/<\/dl>\s+<dl>\s+/', '', $markup);
# add footnotes
if (isset($this->DefinitionData['Footnote']))
{
$Element = $this->buildFootnoteElement();
$markup .= "\n" . $this->element($Element);
}
return $markup;
}
#
# Blocks
#
#
# Abbreviation
protected function blockAbbreviation($Line)
{
if (preg_match('/^\*\[(.+?)\]:[ ]*(.+?)[ ]*$/', $Line['text'], $matches))
{
$this->DefinitionData['Abbreviation'][$matches[1]] = $matches[2];
$Block = array(
'hidden' => true,
);
return $Block;
}
}
#
# Footnote
protected function blockFootnote($Line)
{
if (preg_match('/^\[\^(.+?)\]:[ ]?(.*)$/', $Line['text'], $matches))
{
$Block = array(
'label' => $matches[1],
'text' => $matches[2],
'hidden' => true,
);
return $Block;
}
}
protected function blockFootnoteContinue($Line, $Block)
{
if ($Line['text'][0] === '[' and preg_match('/^\[\^(.+?)\]:/', $Line['text']))
{
return;
}
if (isset($Block['interrupted']))
{
if ($Line['indent'] >= 4)
{
$Block['text'] .= "\n\n" . $Line['text'];
return $Block;
}
}
else
{
$Block['text'] .= "\n" . $Line['text'];
return $Block;
}
}
protected function blockFootnoteComplete($Block)
{
$this->DefinitionData['Footnote'][$Block['label']] = array(
'text' => $Block['text'],
'count' => null,
'number' => null,
);
return $Block;
}
#
# Definition List
protected function blockDefinitionList($Line, $Block)
{
if ( ! isset($Block) or isset($Block['type']))
{
return;
}
$Element = array(
'name' => 'dl',
'handler' => 'elements',
'text' => array(),
);
$terms = explode("\n", $Block['element']['text']);
foreach ($terms as $term)
{
$Element['text'] []= array(
'name' => 'dt',
'handler' => 'line',
'text' => $term,
);
}
$Block['element'] = $Element;
$Block = $this->addDdElement($Line, $Block);
return $Block;
}
protected function blockDefinitionListContinue($Line, array $Block)
{
if ($Line['text'][0] === ':')
{
$Block = $this->addDdElement($Line, $Block);
return $Block;
}
else
{
if (isset($Block['interrupted']) and $Line['indent'] === 0)
{
return;
}
if (isset($Block['interrupted']))
{
$Block['dd']['handler'] = 'text';
$Block['dd']['text'] .= "\n\n";
unset($Block['interrupted']);
}
$text = substr($Line['body'], min($Line['indent'], 4));
$Block['dd']['text'] .= "\n" . $text;
return $Block;
}
}
#
# Header
protected function blockHeader($Line)
{
$Block = parent::blockHeader($Line);
if (! isset($Block)) {
return null;
}
if (preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE))
{
$attributeString = $matches[1][0];
$Block['element']['attributes'] = $this->parseAttributeData($attributeString);
$Block['element']['text'] = substr($Block['element']['text'], 0, $matches[0][1]);
}
return $Block;
}
#
# Markup
protected function blockMarkupComplete($Block)
{
if ( ! isset($Block['void']))
{
$Block['markup'] = $this->processTag($Block['markup']);
}
return $Block;
}
#
# Setext
protected function blockSetextHeader($Line, array $Block = null)
{
$Block = parent::blockSetextHeader($Line, $Block);
if (! isset($Block)) {
return null;
}
if (preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE))
{
$attributeString = $matches[1][0];
$Block['element']['attributes'] = $this->parseAttributeData($attributeString);
$Block['element']['text'] = substr($Block['element']['text'], 0, $matches[0][1]);
}
return $Block;
}
#
# Inline Elements
#
#
# Footnote Marker
protected function inlineFootnoteMarker($Excerpt)
{
if (preg_match('/^\[\^(.+?)\]/', $Excerpt['text'], $matches))
{
$name = $matches[1];
if ( ! isset($this->DefinitionData['Footnote'][$name]))
{
return;
}
$this->DefinitionData['Footnote'][$name]['count'] ++;
if ( ! isset($this->DefinitionData['Footnote'][$name]['number']))
{
$this->DefinitionData['Footnote'][$name]['number'] = ++ $this->footnoteCount; # » &
}
$Element = array(
'name' => 'sup',
'attributes' => array('id' => 'fnref'.$this->DefinitionData['Footnote'][$name]['count'].':'.$name),
'handler' => 'element',
'text' => array(
'name' => 'a',
'attributes' => array('href' => '#fn:'.$name, 'class' => 'footnote-ref'),
'text' => $this->DefinitionData['Footnote'][$name]['number'],
),
);
return array(
'extent' => strlen($matches[0]),
'element' => $Element,
);
}
}
private $footnoteCount = 0;
#
# Link
protected function inlineLink($Excerpt)
{
$Link = parent::inlineLink($Excerpt);
if (! isset($Link)) {
return null;
}
$remainder = substr($Excerpt['text'], $Link['extent']);
if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches))
{
$Link['element']['attributes'] += $this->parseAttributeData($matches[1]);
$Link['extent'] += strlen($matches[0]);
}
return $Link;
}
#
# ~
#
protected function unmarkedText($text)
{
$text = parent::unmarkedText($text);
if (isset($this->DefinitionData['Abbreviation']))
{
foreach ($this->DefinitionData['Abbreviation'] as $abbreviation => $meaning)
{
$pattern = '/\b'.preg_quote($abbreviation, '/').'\b/';
$text = preg_replace($pattern, '<abbr title="'.$meaning.'">'.$abbreviation.'</abbr>', $text);
}
}
return $text;
}
#
# Util Methods
#
protected function addDdElement(array $Line, array $Block)
{
$text = substr($Line['text'], 1);
$text = trim($text);
unset($Block['dd']);
$Block['dd'] = array(
'name' => 'dd',
'handler' => 'line',
'text' => $text,
);
if (isset($Block['interrupted']))
{
$Block['dd']['handler'] = 'text';
unset($Block['interrupted']);
}
$Block['element']['text'] []= & $Block['dd'];
return $Block;
}
protected function buildFootnoteElement()
{
$Element = array(
'name' => 'div',
'attributes' => array('class' => 'footnotes'),
'handler' => 'elements',
'text' => array(
array(
'name' => 'hr',
),
array(
'name' => 'ol',
'handler' => 'elements',
'text' => array(),
),
),
);
uasort($this->DefinitionData['Footnote'], 'self::sortFootnotes');
foreach ($this->DefinitionData['Footnote'] as $definitionId => $DefinitionData)
{
if ( ! isset($DefinitionData['number']))
{
continue;
}
$text = $DefinitionData['text'];
$text = parent::text($text);
$numbers = range(1, $DefinitionData['count']);
$backLinksMarkup = '';
foreach ($numbers as $number)
{
$backLinksMarkup .= ' <a href="#fnref'.$number.':'.$definitionId.'" rev="footnote" class="footnote-backref">&#8617;</a>';
}
$backLinksMarkup = substr($backLinksMarkup, 1);
if (substr($text, - 4) === '</p>')
{
$backLinksMarkup = '&#160;'.$backLinksMarkup;
$text = substr_replace($text, $backLinksMarkup.'</p>', - 4);
}
else
{
$text .= "\n".'<p>'.$backLinksMarkup.'</p>';
}
$Element['text'][1]['text'] []= array(
'name' => 'li',
'attributes' => array('id' => 'fn:'.$definitionId),
'rawHtml' => "\n".$text."\n",
);
}
return $Element;
}
# ~
protected function parseAttributeData($attributeString)
{
$Data = array();
$attributes = preg_split('/[ ]+/', $attributeString, - 1, PREG_SPLIT_NO_EMPTY);
foreach ($attributes as $attribute)
{
if ($attribute[0] === '#')
{
$Data['id'] = substr($attribute, 1);
}
else # "."
{
$classes []= substr($attribute, 1);
}
}
if (isset($classes))
{
$Data['class'] = implode(' ', $classes);
}
return $Data;
}
# ~
protected function processTag($elementMarkup) # recursive
{
# http://stackoverflow.com/q/1148928/200145
libxml_use_internal_errors(true);
$DOMDocument = new DOMDocument;
# http://stackoverflow.com/q/11309194/200145
$elementMarkup = mb_convert_encoding($elementMarkup, 'HTML-ENTITIES', 'UTF-8');
# http://stackoverflow.com/q/4879946/200145
$DOMDocument->loadHTML($elementMarkup);
$DOMDocument->removeChild($DOMDocument->doctype);
$DOMDocument->replaceChild($DOMDocument->firstChild->firstChild->firstChild, $DOMDocument->firstChild);
$elementText = '';
if ($DOMDocument->documentElement->getAttribute('markdown') === '1')
{
foreach ($DOMDocument->documentElement->childNodes as $Node)
{
$elementText .= $DOMDocument->saveHTML($Node);
}
$DOMDocument->documentElement->removeAttribute('markdown');
$elementText = "\n".$this->text($elementText)."\n";
}
else
{
foreach ($DOMDocument->documentElement->childNodes as $Node)
{
$nodeMarkup = $DOMDocument->saveHTML($Node);
if ($Node instanceof DOMElement and ! in_array($Node->nodeName, $this->textLevelElements))
{
$elementText .= $this->processTag($nodeMarkup);
}
else
{
$elementText .= $nodeMarkup;
}
}
}
# because we don't want for markup to get encoded
$DOMDocument->documentElement->nodeValue = 'placeholder\x1A';
$markup = $DOMDocument->saveHTML($DOMDocument->documentElement);
$markup = str_replace('placeholder\x1A', $elementText, $markup);
return $markup;
}
# ~
protected function sortFootnotes($A, $B) # callback
{
return $A['number'] - $B['number'];
}
#
# Fields
#
protected $regexAttribute = '(?:[#.][-\w]+[ ]*)';
}