OpenXE/www/plugins/class.wikiparser.php

557 lines
13 KiB
PHP
Raw Permalink Normal View History

2021-05-21 08:49:41 +02:00
<?php
/* WikiParser
* Version 1.0
* Copyright 2005, Steve Blinch
* http://code.blitzaffe.com
*
* This class parses and returns the HTML representation of a document containing
* basic MediaWiki-style wiki markup.
*
*
* USAGE
*
* Refer to class_WikiRetriever.php (which uses this script to parse fetched
* wiki documents) for an example.
*
*
* LICENSE
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
*
*/
class IndexPoint
{
var $children;
var $text;
function __construct($text='') {
$this->text = $text;
$this->children = array();
}
function AddChild($point) {
$this->children[] = $point;
}
function LastChild() {
if(is_array($this->children) && count($this->children)>0)
return $this->children[count($this->children)-1];
return null;
}
function HasChildren() {
if(count($this->children)<1)
return true;
return false;
}
}
class WikiParser {
function __construct() {
$this->reference_wiki = 'index.php?module=wiki&action=list&name=';
$this->image_uri = '';
$this->ignore_images = true;
$this->Index = new IndexPoint();
}
function BuildIndex()
{
if(count($this->Index->children)==0) return false;
$out = "<ul id=\"index\">";
foreach($this->Index->children AS $key1=>$level1) {
$key1++;
$out.= "<li class=\"level1\"><a href=\"#".$this->Normalize($level1->text)."\">$key1 {$level1->text}</a></li>";
foreach($level1->children AS $key2=>$level2) {
$key2++;
$out .= "<li class=\"level2\"><a href=\"#".$this->Normalize($level2->text)."\">$key1.$key2 {$level2->text}</a></li>";
foreach($level2->children AS $key3=>$level3) {
$key3++;
$out .= "<li class=\"level3\"><a href=\"#".$this->Normalize($level3->text)."\">$key1.$key2.$key3 {$level3->text}</a></li>";
}
}
}
return $out."</ul>";
}
function AddToIndex($text, $level) {
if($level==1)
$this->Index->AddChild(new IndexPoint($text));
$secondlevel = $this->Index->LastChild();
if($level==2 && $secondlevel!=null) {
$secondlevel->AddChild(new IndexPoint($text));
}
$thirdlevel = $secondlevel->LastChild();
if($level==3 && $thirdlevel!=null) {
$thirdlevel->AddChild(new IndexPoint($text));
}
}
function handle_sections($matches) {
$level = strlen($matches[1]);
$content = $matches[2];
$this->AddToIndex($content, $level);
$this->stop = true;
$id = $this->Normalize($content);
// avoid accidental run-on emphasis
return $this->emphasize_off() . "\n\n<h{$level} id=\"$id\">{$content}</h{$level}>\n\n";
}
function Normalize($text)
{
$text = str_replace(' ', '_', $text);
$text = str_replace('Ä', 'A', $text);
$text = str_replace('ä', 'a', $text);
$text = str_replace('Ö', 'O', $text);
$text = str_replace('ö', 'o', $text);
$text = str_replace('Ü', 'U', $text);
$text = str_replace('ü', 'u', $text);
$text = str_replace('ß', 'ss', $text);
return $text;
}
function handle_newline($matches) {
if ($this->suppress_linebreaks) return $this->emphasize_off();
$this->stop = true;
// avoid accidental run-on emphasis
return $this->emphasize_off() . "<br /><br />";
}
function handle_list($matches,$close=false) {
$listtypes = array(
'*'=>'ul',
'#'=>'ol',
);
$output = "";
$newlevel = ($close) ? 0 : strlen($matches[1]);
while ($this->list_level!=$newlevel) {
$listchar = substr($matches[1],-1);
$listtype = $listtypes[$listchar];
//$output .= "[".$this->list_level."->".$newlevel."]";
if ($this->list_level>$newlevel) {
$listtype = '/'.array_pop($this->list_level_types);
$this->list_level--;
} else {
$this->list_level++;
array_push($this->list_level_types,$listtype);
}
$output .= "\n<{$listtype}>\n";
}
if ($close) return $output;
$output .= "<li>".$matches[2]."</li>\n";
return $output;
}
function handle_definitionlist($matches,$close=false) {
if ($close) {
$this->deflist = false;
return "</dl>\n";
}
$output = "";
if (!$this->deflist) $output .= "<dl>\n";
$this->deflist = true;
switch($matches[1]) {
case ';':
$term = $matches[2];
$p = strpos($term,' :');
if ($p!==false) {
list($term,$definition) = explode(':',$term);
$output .= "<dt>{$term}</dt><dd>{$definition}</dd>";
} else {
$output .= "<dt>{$term}</dt>";
}
break;
case ':':
$definition = $matches[2];
$output .= "<dd>{$definition}</dd>\n";
break;
}
return $output;
}
function handle_preformat($matches,$close=false) {
if ($close) {
$this->preformat = false;
return "</pre>\n";
}
$this->stop_all = true;
$output = "";
if (!$this->preformat) $output .= "<pre>";
$this->preformat = true;
$output .= $matches[1];
return $output."\n";
}
function handle_horizontalrule($matches) {
return "<hr />";
}
function wiki_link($topic) {
return ucfirst(str_replace(' ','_',$topic));
}
function handle_image($href,$title,$options) {
if ($this->ignore_images) return "";
if (!$this->image_uri) return $title;
$href = $this->image_uri . $href;
$imagetag = sprintf(
'<img src="%s" alt="%s" />',
$href,
$title
);
foreach ($options as $k=>$option) {
switch($option) {
case 'frame':
$imagetag = sprintf(
'<div style="float: right; background-color: #F5F5F5; border: 1px solid #D0D0D0; padding: 2px">'.
'%s'.
'<div>%s</div>'.
'</div>',
$imagetag,
$title
);
break;
case 'right':
$imagetag = sprintf(
'<div style="float: right">%s</div>',
$imagetag
);
break;
}
}
return $imagetag;
}
function handle_internallink($matches) {
//var_dump($matches);
$nolink = false;
$href = $matches[4];
$title = $matches[6] ? $matches[6] : $href.$matches[7];
$namespace = $matches[3];
if ($namespace=='Image') {
$options = explode('|',$title);
$title = array_pop($options);
return $this->handle_image($href,$title,$options);
}
$title = preg_replace('/\(.*?\)/','',$title);
$title = preg_replace('/^.*?\:/','',$title);
if ($this->reference_wiki) {
$href = $this->reference_wiki.($namespace?$namespace.':':'').$this->wiki_link($href);
} else {
$nolink = true;
}
if ($nolink) return $title;
return sprintf(
'<a href="%s"%s>%s</a>',
$href,
($newwindow?' target="_blank"':''),
$title
);
}
function handle_externallink($matches) {
$href = $matches[2];
$title = $matches[3];
if (!$title) {
$this->linknumber++;
$title = "[{$this->linknumber}]";
}
$newwindow = true;
return sprintf(
'<a href="%s"%s>%s</a>',
$href,
($newwindow?' target="_blank"':''),
$title
);
}
function emphasize($amount) {
$amounts = array(
2=>array('<em>','</em>'),
3=>array('<strong>','</strong>'),
4=>array('<strong>','</strong>'),
5=>array('<em><strong>','</strong></em>'),
);
$output = "";
// handle cases where emphasized phrases end in an apostrophe, eg: ''somethin'''
// should read <em>somethin'</em> rather than <em>somethin<strong>
if ( (!$this->emphasis[$amount]) && ($this->emphasis[$amount-1]) ) {
$amount--;
$output = "'";
}
$output .= $amounts[$amount][(int) $this->emphasis[$amount]];
$this->emphasis[$amount] = !$this->emphasis[$amount];
return $output;
}
function handle_emphasize($matches) {
$amount = strlen($matches[1]);
return $this->emphasize($amount);
}
function emphasize_off() {
$output = "";
foreach ($this->emphasis as $amount=>$state) {
if ($state) $output .= $this->emphasize($amount);
}
return $output;
}
function handle_eliminate($matches) {
return "";
}
function handle_variable($matches) {
switch($matches[2]) {
case 'CURRENTMONTH': return date('m');
case 'CURRENTMONTHNAMEGEN':
case 'CURRENTMONTHNAME': return date('F');
case 'CURRENTDAY': return date('d');
case 'CURRENTDAYNAME': return date('l');
case 'CURRENTYEAR': return date('Y');
case 'CURRENTTIME': return date('H:i');
case 'NUMBEROFARTICLES': return 0;
case 'PAGENAME': return $this->page_title;
case 'NAMESPACE': return 'None';
case 'SITENAME': return $_SERVER['HTTP_HOST'];
default: return '';
}
}
function parse_line($line) {
$line_regexes = array(
'preformat'=>'^\s(.*?)$',
'definitionlist'=>'^([\;\:])\s*(.*?)$',
'newline'=>'^$',
'list'=>'^([\*\#]+)(.*?)$',
'sections'=>'^(={1,6})(.*?)(={1,6})$',
'horizontalrule'=>'^----$',
);
$char_regexes = array(
// 'link'=>'(\[\[((.*?)\:)?(.*?)(\|(.*?))?\]\]([a-z]+)?)',
'internallink'=>'('.
'\[\['. // opening brackets
'(([^\]]*?)\:)?'. // namespace (if any)
'([^\]]*?)'. // target
'(\|([^\]]*?))?'. // title (if any)
'\]\]'. // closing brackets
'([a-z]+)?'. // any suffixes
')',
'externallink'=>'('.
'\['.
'([^\]]*?)'.
'(\s+[^\]]*?)?'.
'\]'.
')',
'emphasize'=>'(\'{2,5})',
'eliminate'=>'(__TOC__|__NOTOC__|__NOEDITSECTION__)',
'variable'=>'('. '\{\{' . '([^\}]*?)' . '\}\}' . ')',
);
$this->stop = false;
$this->stop_all = false;
$called = array();
$line = rtrim($line);
foreach ($line_regexes as $func=>$regex) {
if (preg_match("/$regex/i",$line,$matches)) {
$called[$func] = true;
$func = "handle_".$func;
$line = $this->$func($matches);
if ($this->stop || $this->stop_all) break;
}
}
if (!$this->stop_all) {
$this->stop = false;
foreach ($char_regexes as $func=>$regex) {
$line = preg_replace_callback("/$regex/i",array(&$this,"handle_".$func),$line);
if ($this->stop) break;
}
}
$isline = strlen(trim($line))>0;
// if this wasn't a list item, and we are in a list, close the list tag(s)
if (($this->list_level>0) && !$called['list']) $line = $this->handle_list(false,true) . $line;
if ($this->deflist && !$called['definitionlist']) $line = $this->handle_definitionlist(false,true) . $line;
if (!empty($this->preformat) && empty($called['preformat'])) $line = $this->handle_preformat(false,true) . $line;
// suppress linebreaks for the next line if we just displayed one; otherwise re-enable them
if ($isline) $this->suppress_linebreaks = (!empty($called['newline']) || !empty($called['sections']));
return $line;
}
function test() {
$text = "WikiParser stress tester. <br /> Testing...
__TOC__
== Nowiki test ==
<nowiki>[[wooticles|narf]] and '''test''' and stuff.</nowiki>
== Character formatting ==
This is ''emphasized'', this is '''really emphasized''', this is ''''grossly emphasized'''',
and this is just '''''freeking insane'''''.
Done.
== Variables ==
{{CURRENTDAY}}/{{CURRENTMONTH}}/{{CURRENTYEAR}}
Done.
== Image test ==
[[:Image:bao1.jpg]]
[[Image:bao1.jpg|frame|alternate text]]
[[Image:bao1.jpg|right|alternate text]]
Done.
== Horizontal Rule ==
Above the rule.
----
Done.
== Hyperlink test ==
This is a [[namespace:link target|bitchin hypalink]] to another document for [[click]]ing, with [[(some) hidden text]] and a [[namespace:hidden namespace]].
A link to an external site [http://www.google.ca] as well another [http://www.esitemedia.com], and a [http://www.blitzaffe.com titled link] -- woo!
Done.
== Preformat ==
Not preformatted.
Totally preformatted 01234 o o
Again, this is preformatted b <-- It's a face
Again, this is preformatted ---'
Done.
== Bullet test ==
* One bullet
* Another '''bullet'''
*# a list item
*# another list item
*#* unordered, ordered, unordered
*#* again
*# back down one
Done.
== Definition list ==
; yes : opposite of no
; no : opposite of yes
; maybe
: somewhere in between yes and no
Done.
== Indent ==
Normal
: indented woo
: more indentation
Done.
";
return $this->parse($text);
}
function parse($text,$title="") {
$this->redirect = false;
$this->nowikis = array();
$this->list_level_types = array();
$this->list_level = 0;
$this->deflist = false;
$this->linknumber = 0;
$this->suppress_linebreaks = false;
$this->page_title = $title;
$output = "";
$text = preg_replace_callback('/<nowiki>([\s\S]*)<\/nowiki>/i',array(&$this,"handle_save_nowiki"),$text);
$lines = explode("\n",$text);
if (preg_match('/^\#REDIRECT\s+\[\[(.*?)\]\]$/',trim($lines[0]),$matches)) {
$this->redirect = $matches[1];
}
foreach ($lines as $k=>$line) {
$line = $this->parse_line($line);
$output .= $line;
}
$output = preg_replace_callback('/<nowiki><\/nowiki>/i',array(&$this,"handle_restore_nowiki"),$output);
return $output;
}
function handle_save_nowiki($matches) {
array_push($this->nowikis,$matches[1]);
return "<nowiki></nowiki>";
}
function handle_restore_nowiki($matches) {
return array_pop($this->nowikis);
}
}
?>