mirror of
https://github.com/OpenXE-org/OpenXE.git
synced 2025-01-04 02:50:29 +01:00
557 lines
13 KiB
PHP
557 lines
13 KiB
PHP
|
<?php
|
||
|
/* WikiParser
|
||
|
* Version 1.0
|
||
|
* Copyright 2005, Steve Blinch
|
||
|
* http://code.blitzaffe.com
|
||
|
*
|
||
|
* This class parses and returns the HTML representation of a document containing
|
||
|
* basic MediaWiki-style wiki markup.
|
||
|
*
|
||
|
*
|
||
|
* USAGE
|
||
|
*
|
||
|
* Refer to class_WikiRetriever.php (which uses this script to parse fetched
|
||
|
* wiki documents) for an example.
|
||
|
*
|
||
|
*
|
||
|
* LICENSE
|
||
|
*
|
||
|
* This library is free software; you can redistribute it and/or
|
||
|
* modify it under the terms of the GNU Lesser General Public
|
||
|
* License as published by the Free Software Foundation; either
|
||
|
* version 2.1 of the License, or (at your option) any later version.
|
||
|
*
|
||
|
* This library is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
* Lesser General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU Lesser General Public
|
||
|
* License along with this library; if not, write to the Free Software
|
||
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
*
|
||
|
*
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
class IndexPoint
|
||
|
{
|
||
|
var $children;
|
||
|
var $text;
|
||
|
|
||
|
function __construct($text='') {
|
||
|
$this->text = $text;
|
||
|
$this->children = array();
|
||
|
}
|
||
|
|
||
|
function AddChild($point) {
|
||
|
$this->children[] = $point;
|
||
|
}
|
||
|
|
||
|
function LastChild() {
|
||
|
if(is_array($this->children) && count($this->children)>0)
|
||
|
return $this->children[count($this->children)-1];
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
function HasChildren() {
|
||
|
if(count($this->children)<1)
|
||
|
return true;
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
class WikiParser {
|
||
|
|
||
|
function __construct() {
|
||
|
$this->reference_wiki = 'index.php?module=wiki&action=list&name=';
|
||
|
$this->image_uri = '';
|
||
|
$this->ignore_images = true;
|
||
|
$this->Index = new IndexPoint();
|
||
|
}
|
||
|
|
||
|
function BuildIndex()
|
||
|
{
|
||
|
if(count($this->Index->children)==0) return false;
|
||
|
|
||
|
$out = "<ul id=\"index\">";
|
||
|
foreach($this->Index->children AS $key1=>$level1) {
|
||
|
$key1++;
|
||
|
$out.= "<li class=\"level1\"><a href=\"#".$this->Normalize($level1->text)."\">$key1 {$level1->text}</a></li>";
|
||
|
foreach($level1->children AS $key2=>$level2) {
|
||
|
$key2++;
|
||
|
$out .= "<li class=\"level2\"><a href=\"#".$this->Normalize($level2->text)."\">$key1.$key2 {$level2->text}</a></li>";
|
||
|
foreach($level2->children AS $key3=>$level3) {
|
||
|
$key3++;
|
||
|
$out .= "<li class=\"level3\"><a href=\"#".$this->Normalize($level3->text)."\">$key1.$key2.$key3 {$level3->text}</a></li>";
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return $out."</ul>";
|
||
|
}
|
||
|
|
||
|
function AddToIndex($text, $level) {
|
||
|
if($level==1)
|
||
|
$this->Index->AddChild(new IndexPoint($text));
|
||
|
|
||
|
$secondlevel = $this->Index->LastChild();
|
||
|
if($level==2 && $secondlevel!=null) {
|
||
|
$secondlevel->AddChild(new IndexPoint($text));
|
||
|
}
|
||
|
|
||
|
$thirdlevel = $secondlevel->LastChild();
|
||
|
if($level==3 && $thirdlevel!=null) {
|
||
|
$thirdlevel->AddChild(new IndexPoint($text));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function handle_sections($matches) {
|
||
|
$level = strlen($matches[1]);
|
||
|
$content = $matches[2];
|
||
|
|
||
|
$this->AddToIndex($content, $level);
|
||
|
$this->stop = true;
|
||
|
$id = $this->Normalize($content);
|
||
|
// avoid accidental run-on emphasis
|
||
|
return $this->emphasize_off() . "\n\n<h{$level} id=\"$id\">{$content}</h{$level}>\n\n";
|
||
|
}
|
||
|
|
||
|
function Normalize($text)
|
||
|
{
|
||
|
$text = str_replace(' ', '_', $text);
|
||
|
$text = str_replace('Ä', 'A', $text);
|
||
|
$text = str_replace('ä', 'a', $text);
|
||
|
$text = str_replace('Ö', 'O', $text);
|
||
|
$text = str_replace('ö', 'o', $text);
|
||
|
$text = str_replace('Ü', 'U', $text);
|
||
|
$text = str_replace('ü', 'u', $text);
|
||
|
$text = str_replace('ß', 'ss', $text);
|
||
|
return $text;
|
||
|
}
|
||
|
|
||
|
function handle_newline($matches) {
|
||
|
if ($this->suppress_linebreaks) return $this->emphasize_off();
|
||
|
|
||
|
$this->stop = true;
|
||
|
// avoid accidental run-on emphasis
|
||
|
return $this->emphasize_off() . "<br /><br />";
|
||
|
}
|
||
|
|
||
|
function handle_list($matches,$close=false) {
|
||
|
|
||
|
$listtypes = array(
|
||
|
'*'=>'ul',
|
||
|
'#'=>'ol',
|
||
|
);
|
||
|
|
||
|
$output = "";
|
||
|
|
||
|
$newlevel = ($close) ? 0 : strlen($matches[1]);
|
||
|
|
||
|
while ($this->list_level!=$newlevel) {
|
||
|
$listchar = substr($matches[1],-1);
|
||
|
$listtype = $listtypes[$listchar];
|
||
|
|
||
|
//$output .= "[".$this->list_level."->".$newlevel."]";
|
||
|
|
||
|
if ($this->list_level>$newlevel) {
|
||
|
$listtype = '/'.array_pop($this->list_level_types);
|
||
|
$this->list_level--;
|
||
|
} else {
|
||
|
$this->list_level++;
|
||
|
array_push($this->list_level_types,$listtype);
|
||
|
}
|
||
|
$output .= "\n<{$listtype}>\n";
|
||
|
}
|
||
|
|
||
|
if ($close) return $output;
|
||
|
|
||
|
$output .= "<li>".$matches[2]."</li>\n";
|
||
|
|
||
|
return $output;
|
||
|
}
|
||
|
|
||
|
function handle_definitionlist($matches,$close=false) {
|
||
|
|
||
|
if ($close) {
|
||
|
$this->deflist = false;
|
||
|
return "</dl>\n";
|
||
|
}
|
||
|
|
||
|
|
||
|
$output = "";
|
||
|
if (!$this->deflist) $output .= "<dl>\n";
|
||
|
$this->deflist = true;
|
||
|
|
||
|
switch($matches[1]) {
|
||
|
case ';':
|
||
|
$term = $matches[2];
|
||
|
$p = strpos($term,' :');
|
||
|
if ($p!==false) {
|
||
|
list($term,$definition) = explode(':',$term);
|
||
|
$output .= "<dt>{$term}</dt><dd>{$definition}</dd>";
|
||
|
} else {
|
||
|
$output .= "<dt>{$term}</dt>";
|
||
|
}
|
||
|
break;
|
||
|
case ':':
|
||
|
$definition = $matches[2];
|
||
|
$output .= "<dd>{$definition}</dd>\n";
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return $output;
|
||
|
}
|
||
|
|
||
|
function handle_preformat($matches,$close=false) {
|
||
|
if ($close) {
|
||
|
$this->preformat = false;
|
||
|
return "</pre>\n";
|
||
|
}
|
||
|
|
||
|
$this->stop_all = true;
|
||
|
|
||
|
$output = "";
|
||
|
if (!$this->preformat) $output .= "<pre>";
|
||
|
$this->preformat = true;
|
||
|
|
||
|
$output .= $matches[1];
|
||
|
|
||
|
return $output."\n";
|
||
|
}
|
||
|
|
||
|
function handle_horizontalrule($matches) {
|
||
|
return "<hr />";
|
||
|
}
|
||
|
|
||
|
function wiki_link($topic) {
|
||
|
return ucfirst(str_replace(' ','_',$topic));
|
||
|
}
|
||
|
|
||
|
function handle_image($href,$title,$options) {
|
||
|
if ($this->ignore_images) return "";
|
||
|
if (!$this->image_uri) return $title;
|
||
|
|
||
|
$href = $this->image_uri . $href;
|
||
|
|
||
|
$imagetag = sprintf(
|
||
|
'<img src="%s" alt="%s" />',
|
||
|
$href,
|
||
|
$title
|
||
|
);
|
||
|
foreach ($options as $k=>$option) {
|
||
|
switch($option) {
|
||
|
case 'frame':
|
||
|
$imagetag = sprintf(
|
||
|
'<div style="float: right; background-color: #F5F5F5; border: 1px solid #D0D0D0; padding: 2px">'.
|
||
|
'%s'.
|
||
|
'<div>%s</div>'.
|
||
|
'</div>',
|
||
|
$imagetag,
|
||
|
$title
|
||
|
);
|
||
|
break;
|
||
|
case 'right':
|
||
|
$imagetag = sprintf(
|
||
|
'<div style="float: right">%s</div>',
|
||
|
$imagetag
|
||
|
);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $imagetag;
|
||
|
}
|
||
|
|
||
|
function handle_internallink($matches) {
|
||
|
//var_dump($matches);
|
||
|
$nolink = false;
|
||
|
|
||
|
$href = $matches[4];
|
||
|
$title = $matches[6] ? $matches[6] : $href.$matches[7];
|
||
|
$namespace = $matches[3];
|
||
|
|
||
|
if ($namespace=='Image') {
|
||
|
$options = explode('|',$title);
|
||
|
$title = array_pop($options);
|
||
|
|
||
|
return $this->handle_image($href,$title,$options);
|
||
|
}
|
||
|
|
||
|
$title = preg_replace('/\(.*?\)/','',$title);
|
||
|
$title = preg_replace('/^.*?\:/','',$title);
|
||
|
|
||
|
if ($this->reference_wiki) {
|
||
|
$href = $this->reference_wiki.($namespace?$namespace.':':'').$this->wiki_link($href);
|
||
|
} else {
|
||
|
$nolink = true;
|
||
|
}
|
||
|
|
||
|
if ($nolink) return $title;
|
||
|
|
||
|
|
||
|
|
||
|
return sprintf(
|
||
|
'<a href="%s"%s>%s</a>',
|
||
|
$href,
|
||
|
($newwindow?' target="_blank"':''),
|
||
|
$title
|
||
|
);
|
||
|
}
|
||
|
|
||
|
function handle_externallink($matches) {
|
||
|
$href = $matches[2];
|
||
|
$title = $matches[3];
|
||
|
if (!$title) {
|
||
|
$this->linknumber++;
|
||
|
$title = "[{$this->linknumber}]";
|
||
|
}
|
||
|
$newwindow = true;
|
||
|
|
||
|
return sprintf(
|
||
|
'<a href="%s"%s>%s</a>',
|
||
|
$href,
|
||
|
($newwindow?' target="_blank"':''),
|
||
|
$title
|
||
|
);
|
||
|
}
|
||
|
|
||
|
function emphasize($amount) {
|
||
|
$amounts = array(
|
||
|
2=>array('<em>','</em>'),
|
||
|
3=>array('<strong>','</strong>'),
|
||
|
4=>array('<strong>','</strong>'),
|
||
|
5=>array('<em><strong>','</strong></em>'),
|
||
|
);
|
||
|
|
||
|
$output = "";
|
||
|
|
||
|
// handle cases where emphasized phrases end in an apostrophe, eg: ''somethin'''
|
||
|
// should read <em>somethin'</em> rather than <em>somethin<strong>
|
||
|
if ( (!$this->emphasis[$amount]) && ($this->emphasis[$amount-1]) ) {
|
||
|
$amount--;
|
||
|
$output = "'";
|
||
|
}
|
||
|
|
||
|
$output .= $amounts[$amount][(int) $this->emphasis[$amount]];
|
||
|
|
||
|
$this->emphasis[$amount] = !$this->emphasis[$amount];
|
||
|
|
||
|
return $output;
|
||
|
}
|
||
|
|
||
|
function handle_emphasize($matches) {
|
||
|
$amount = strlen($matches[1]);
|
||
|
return $this->emphasize($amount);
|
||
|
|
||
|
}
|
||
|
|
||
|
function emphasize_off() {
|
||
|
$output = "";
|
||
|
foreach ($this->emphasis as $amount=>$state) {
|
||
|
if ($state) $output .= $this->emphasize($amount);
|
||
|
}
|
||
|
|
||
|
return $output;
|
||
|
}
|
||
|
|
||
|
function handle_eliminate($matches) {
|
||
|
return "";
|
||
|
}
|
||
|
|
||
|
function handle_variable($matches) {
|
||
|
switch($matches[2]) {
|
||
|
case 'CURRENTMONTH': return date('m');
|
||
|
case 'CURRENTMONTHNAMEGEN':
|
||
|
case 'CURRENTMONTHNAME': return date('F');
|
||
|
case 'CURRENTDAY': return date('d');
|
||
|
case 'CURRENTDAYNAME': return date('l');
|
||
|
case 'CURRENTYEAR': return date('Y');
|
||
|
case 'CURRENTTIME': return date('H:i');
|
||
|
case 'NUMBEROFARTICLES': return 0;
|
||
|
case 'PAGENAME': return $this->page_title;
|
||
|
case 'NAMESPACE': return 'None';
|
||
|
case 'SITENAME': return $_SERVER['HTTP_HOST'];
|
||
|
default: return '';
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function parse_line($line) {
|
||
|
$line_regexes = array(
|
||
|
'preformat'=>'^\s(.*?)$',
|
||
|
'definitionlist'=>'^([\;\:])\s*(.*?)$',
|
||
|
'newline'=>'^$',
|
||
|
'list'=>'^([\*\#]+)(.*?)$',
|
||
|
'sections'=>'^(={1,6})(.*?)(={1,6})$',
|
||
|
'horizontalrule'=>'^----$',
|
||
|
);
|
||
|
$char_regexes = array(
|
||
|
// 'link'=>'(\[\[((.*?)\:)?(.*?)(\|(.*?))?\]\]([a-z]+)?)',
|
||
|
'internallink'=>'('.
|
||
|
'\[\['. // opening brackets
|
||
|
'(([^\]]*?)\:)?'. // namespace (if any)
|
||
|
'([^\]]*?)'. // target
|
||
|
'(\|([^\]]*?))?'. // title (if any)
|
||
|
'\]\]'. // closing brackets
|
||
|
'([a-z]+)?'. // any suffixes
|
||
|
')',
|
||
|
'externallink'=>'('.
|
||
|
'\['.
|
||
|
'([^\]]*?)'.
|
||
|
'(\s+[^\]]*?)?'.
|
||
|
'\]'.
|
||
|
')',
|
||
|
'emphasize'=>'(\'{2,5})',
|
||
|
'eliminate'=>'(__TOC__|__NOTOC__|__NOEDITSECTION__)',
|
||
|
'variable'=>'('. '\{\{' . '([^\}]*?)' . '\}\}' . ')',
|
||
|
);
|
||
|
|
||
|
$this->stop = false;
|
||
|
$this->stop_all = false;
|
||
|
|
||
|
$called = array();
|
||
|
|
||
|
$line = rtrim($line);
|
||
|
|
||
|
foreach ($line_regexes as $func=>$regex) {
|
||
|
if (preg_match("/$regex/i",$line,$matches)) {
|
||
|
$called[$func] = true;
|
||
|
$func = "handle_".$func;
|
||
|
$line = $this->$func($matches);
|
||
|
if ($this->stop || $this->stop_all) break;
|
||
|
}
|
||
|
}
|
||
|
if (!$this->stop_all) {
|
||
|
$this->stop = false;
|
||
|
foreach ($char_regexes as $func=>$regex) {
|
||
|
$line = preg_replace_callback("/$regex/i",array(&$this,"handle_".$func),$line);
|
||
|
if ($this->stop) break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$isline = strlen(trim($line))>0;
|
||
|
|
||
|
// if this wasn't a list item, and we are in a list, close the list tag(s)
|
||
|
if (($this->list_level>0) && !$called['list']) $line = $this->handle_list(false,true) . $line;
|
||
|
if ($this->deflist && !$called['definitionlist']) $line = $this->handle_definitionlist(false,true) . $line;
|
||
|
if (!empty($this->preformat) && empty($called['preformat'])) $line = $this->handle_preformat(false,true) . $line;
|
||
|
|
||
|
// suppress linebreaks for the next line if we just displayed one; otherwise re-enable them
|
||
|
if ($isline) $this->suppress_linebreaks = (!empty($called['newline']) || !empty($called['sections']));
|
||
|
|
||
|
return $line;
|
||
|
}
|
||
|
|
||
|
function test() {
|
||
|
$text = "WikiParser stress tester. <br /> Testing...
|
||
|
__TOC__
|
||
|
|
||
|
== Nowiki test ==
|
||
|
<nowiki>[[wooticles|narf]] and '''test''' and stuff.</nowiki>
|
||
|
|
||
|
== Character formatting ==
|
||
|
This is ''emphasized'', this is '''really emphasized''', this is ''''grossly emphasized'''',
|
||
|
and this is just '''''freeking insane'''''.
|
||
|
Done.
|
||
|
|
||
|
== Variables ==
|
||
|
{{CURRENTDAY}}/{{CURRENTMONTH}}/{{CURRENTYEAR}}
|
||
|
Done.
|
||
|
|
||
|
== Image test ==
|
||
|
[[:Image:bao1.jpg]]
|
||
|
[[Image:bao1.jpg|frame|alternate text]]
|
||
|
[[Image:bao1.jpg|right|alternate text]]
|
||
|
Done.
|
||
|
|
||
|
== Horizontal Rule ==
|
||
|
Above the rule.
|
||
|
----
|
||
|
Done.
|
||
|
|
||
|
== Hyperlink test ==
|
||
|
This is a [[namespace:link target|bitchin hypalink]] to another document for [[click]]ing, with [[(some) hidden text]] and a [[namespace:hidden namespace]].
|
||
|
|
||
|
A link to an external site [http://www.google.ca] as well another [http://www.esitemedia.com], and a [http://www.blitzaffe.com titled link] -- woo!
|
||
|
Done.
|
||
|
|
||
|
== Preformat ==
|
||
|
Not preformatted.
|
||
|
Totally preformatted 01234 o o
|
||
|
Again, this is preformatted b <-- It's a face
|
||
|
Again, this is preformatted ---'
|
||
|
Done.
|
||
|
|
||
|
== Bullet test ==
|
||
|
* One bullet
|
||
|
* Another '''bullet'''
|
||
|
*# a list item
|
||
|
*# another list item
|
||
|
*#* unordered, ordered, unordered
|
||
|
*#* again
|
||
|
*# back down one
|
||
|
Done.
|
||
|
|
||
|
== Definition list ==
|
||
|
; yes : opposite of no
|
||
|
; no : opposite of yes
|
||
|
; maybe
|
||
|
: somewhere in between yes and no
|
||
|
Done.
|
||
|
|
||
|
== Indent ==
|
||
|
Normal
|
||
|
: indented woo
|
||
|
: more indentation
|
||
|
Done.
|
||
|
|
||
|
";
|
||
|
return $this->parse($text);
|
||
|
}
|
||
|
|
||
|
function parse($text,$title="") {
|
||
|
$this->redirect = false;
|
||
|
|
||
|
$this->nowikis = array();
|
||
|
$this->list_level_types = array();
|
||
|
$this->list_level = 0;
|
||
|
|
||
|
$this->deflist = false;
|
||
|
$this->linknumber = 0;
|
||
|
$this->suppress_linebreaks = false;
|
||
|
|
||
|
$this->page_title = $title;
|
||
|
|
||
|
$output = "";
|
||
|
|
||
|
$text = preg_replace_callback('/<nowiki>([\s\S]*)<\/nowiki>/i',array(&$this,"handle_save_nowiki"),$text);
|
||
|
|
||
|
$lines = explode("\n",$text);
|
||
|
|
||
|
if (preg_match('/^\#REDIRECT\s+\[\[(.*?)\]\]$/',trim($lines[0]),$matches)) {
|
||
|
$this->redirect = $matches[1];
|
||
|
}
|
||
|
|
||
|
foreach ($lines as $k=>$line) {
|
||
|
$line = $this->parse_line($line);
|
||
|
$output .= $line;
|
||
|
}
|
||
|
|
||
|
$output = preg_replace_callback('/<nowiki><\/nowiki>/i',array(&$this,"handle_restore_nowiki"),$output);
|
||
|
|
||
|
return $output;
|
||
|
}
|
||
|
|
||
|
function handle_save_nowiki($matches) {
|
||
|
array_push($this->nowikis,$matches[1]);
|
||
|
return "<nowiki></nowiki>";
|
||
|
}
|
||
|
|
||
|
function handle_restore_nowiki($matches) {
|
||
|
return array_pop($this->nowikis);
|
||
|
}
|
||
|
}
|
||
|
?>
|