+ *
+ * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
+ * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
+ */
+define('GESHI_HEADER_PRE_TABLE', 4);
// Capatalisation constants
/** Lowercase keywords found */
@@ -102,7 +126,7 @@
/** Strict mode never applies (this is the most common) */
define('GESHI_NEVER', 0);
/** Strict mode *might* apply, and can be enabled or
- disabled by {@link GeSHi::enable_strict_mode()} */
+ disabled by {@link GeSHi->enable_strict_mode()} */
define('GESHI_MAYBE', 1);
/** Strict mode always applies */
define('GESHI_ALWAYS', 2);
@@ -128,6 +152,86 @@
/** Used in language files to mark comments */
define('GESHI_COMMENTS', 0);
+/** Used to work around missing PHP features **/
+define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
+
+/** make sure we can call stripos **/
+if (!function_exists('stripos')) {
+ // the offset param of preg_match is not supported below PHP 4.3.3
+ if (GESHI_PHP_PRE_433) {
+ /**
+ * @ignore
+ */
+ function stripos($haystack, $needle, $offset = null) {
+ if (!is_null($offset)) {
+ $haystack = substr($haystack, $offset);
+ }
+ if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
+ return $match[0][1];
+ }
+ return false;
+ }
+ }
+ else {
+ /**
+ * @ignore
+ */
+ function stripos($haystack, $needle, $offset = null) {
+ if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
+ return $match[0][1];
+ }
+ return false;
+ }
+ }
+}
+
+/** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
+ regular expressions. Set this to false if your PCRE lib is up to date
+ @see GeSHi->optimize_regexp_list()
+ **/
+define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
+/** it's also important not to generate too long regular expressions
+ be generous here... but keep in mind, that when reaching this limit we
+ still have to close open patterns. 12k should do just fine on a 16k limit.
+ @see GeSHi->optimize_regexp_list()
+ **/
+define('GESHI_MAX_PCRE_LENGTH', 12288);
+
+//Number format specification
+/** Basic number format for integers */
+define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
+/** Enhanced number format for integers like seen in C */
+define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
+/** Number format to highlight binary numbers with a suffix "b" */
+define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
+/** Number format to highlight binary numbers with a prefix % */
+define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
+/** Number format to highlight binary numbers with a prefix 0b (C) */
+define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
+/** Number format to highlight octal numbers with a leading zero */
+define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
+/** Number format to highlight octal numbers with a prefix 0o (logtalk) */
+define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+
+/** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
+define('GESHI_NUMBER_OCT_PREFIX_AT', 1024); //@[0-7]+
+/** Number format to highlight octal numbers with a suffix of o */
+define('GESHI_NUMBER_OCT_SUFFIX', 2048); //[0-7]+[oO]
+/** Number format to highlight hex numbers with a prefix 0x */
+define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
+/** Number format to highlight hex numbers with a prefix $ */
+define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192); //$[0-9a-fA-F]+
+/** Number format to highlight hex numbers with a suffix of h */
+define('GESHI_NUMBER_HEX_SUFFIX', 16384); //[0-9][0-9a-fA-F]*h
+/** Number format to highlight floating-point numbers without support for scientific notation */
+define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
+/** Number format to highlight floating-point numbers without support for scientific notation */
+define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
+/** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
+define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
+/** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
+define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
+//Custom formats are passed by RX array
+
// Error detection - use these to analyse faults
/** No sourcecode to highlight was specified
* @deprecated
@@ -137,9 +241,9 @@
define('GESHI_ERROR_NO_SUCH_LANG', 2);
/** GeSHi could not open a file for reading (generally a language file) */
define('GESHI_ERROR_FILE_NOT_READABLE', 3);
-/** The header type passed to {@link GeSHi::set_header_type()} was invalid */
+/** The header type passed to {@link GeSHi->set_header_type()} was invalid */
define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
-/** The line number type passed to {@link GeSHi::enable_line_numbers()} was invalid */
+/** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
/**#@-*/
@@ -152,8 +256,8 @@
* about how to use this class.
*
* @package geshi
- * @author Nigel McNie
- * @copyright (C) 2004 - 2007 Nigel McNie
+ * @author Nigel McNie , Benny Baumann
+ * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
*/
class GeSHi {
/**#@+
@@ -235,7 +339,7 @@
'REGEXPS' => array(),
'ESCAPE_CHAR' => true,
'BRACKETS' => true,
- 'SYMBOLS' => true,
+ 'SYMBOLS' => false,
'STRINGS' => true,
'NUMBERS' => true,
'METHODS' => true,
@@ -273,6 +377,13 @@
var $footer_content_style = '';
/**
+ * Tells if a block around the highlighted source should be forced
+ * if not using line numbering
+ * @var boolean
+ */
+ var $force_code_block = false;
+
+ /**
* The styles for hyperlinks in the code
* @var array
*/
@@ -308,10 +419,24 @@
var $highlight_extra_lines = array();
/**
+ * Styles of lines that should be highlighted extra
+ * @var array
+ */
+ var $highlight_extra_lines_styles = array();
+
+ /**
* Styles of extra-highlighted lines
* @var string
*/
- var $highlight_extra_lines_style = 'color: #cc0; background-color: #ffc;';
+ var $highlight_extra_lines_style = 'background-color: #ffc;';
+
+ /**
+ * The line ending
+ * If null, nl2br() will be used on the result string.
+ * Otherwise, all instances of \n will be replaced with $line_ending
+ * @var string
+ */
+ var $line_ending = null;
/**
* Number at which line numbers should start at
@@ -323,13 +448,13 @@
* The overall style for this code block
* @var string
*/
- var $overall_style = '';
+ var $overall_style = 'font-family:monospace;';
/**
* The style for the actual code
* @var string
*/
- var $code_style = 'font-family: \'Courier New\', Courier, monospace; font-weight: normal;';
+ var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
/**
* The overall class for this code block
@@ -347,21 +472,34 @@
* Line number styles
* @var string
*/
- var $line_style1 = 'font-family: \'Courier New\', Courier, monospace; color: black; font-weight: normal; font-style: normal;';
+ var $line_style1 = 'font-weight: normal; vertical-align:top;';
/**
* Line number styles for fancy lines
* @var string
*/
- var $line_style2 = 'font-weight: bold;';
+ var $line_style2 = 'font-weight: bold; vertical-align:top;';
+
+ /**
+ * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
+ * @var string
+ */
+ var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
/**
- * Flag for how line nubmers are displayed
+ * Flag for how line numbers are displayed
* @var boolean
*/
var $line_numbers = GESHI_NO_LINE_NUMBERS;
/**
+ * Flag to decide if multi line spans are allowed. Set it to false to make sure
+ * each tag is closed before and reopened after each linefeed.
+ * @var boolean
+ */
+ var $allow_multiline_span = true;
+
+ /**
* The "nth" value for fancy line highlighting
* @var int
*/
@@ -374,6 +512,12 @@
var $tab_width = 8;
/**
+ * Should we use language-defined tab stop widths?
+ * @var int
+ */
+ var $use_language_tab_width = false;
+
+ /**
* Default target for keyword links
* @var string
*/
@@ -381,7 +525,7 @@
/**
* The encoding to use for entity encoding
- * NOTE: no longer used
+ * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
* @var string
*/
var $encoding = 'utf-8';
@@ -392,6 +536,51 @@
*/
var $keyword_links = true;
+ /**
+ * Currently loaded language file
+ * @var string
+ * @since 1.0.7.22
+ */
+ var $loaded_language = '';
+
+ /**
+ * Wether the caches needed for parsing are built or not
+ *
+ * @var bool
+ * @since 1.0.8
+ */
+ var $parse_cache_built = false;
+
+ /**
+ * Work around for Suhosin Patch with disabled /e modifier
+ *
+ * Note from suhosins author in config file:
+ *
+ * The /e modifier inside preg_replace() allows code execution.
+ * Often it is the cause for remote code execution exploits. It is wise to
+ * deactivate this feature and test where in the application it is used.
+ * The developer using the /e modifier should be made aware that he should
+ * use preg_replace_callback() instead
+ *
+ *
+ * @var array
+ * @since 1.0.8
+ */
+ var $_kw_replace_group = 0;
+ var $_rx_key = 0;
+
+ /**
+ * some "callback parameters" for handle_multiline_regexps
+ *
+ * @since 1.0.8
+ * @access private
+ * @var string
+ */
+ var $_hmr_before = '';
+ var $_hmr_replace = '';
+ var $_hmr_after = '';
+ var $_hmr_key = 0;
+
/**#@-*/
/**
@@ -405,13 +594,17 @@
* should be automatically set correctly. If you have
* renamed the language directory however, you will
* still need to set the path using this parameter or
- * {@link GeSHi::set_language_path()}
+ * {@link GeSHi->set_language_path()}
* @since 1.0.0
*/
- function GeSHi($source, $language, $path = '') {
- $this->set_source($source);
+ function GeSHi($source = '', $language = '', $path = '') {
+ if (!empty($source)) {
+ $this->set_source($source);
+ }
+ if (!empty($language)) {
+ $this->set_language($language);
+ }
$this->set_language_path($path);
- $this->set_language($language);
}
/**
@@ -423,15 +616,17 @@
*/
function error() {
if ($this->error) {
- $msg = $this->error_messages[$this->error];
+ //Put some template variables for debugging here ...
$debug_tpl_vars = array(
'{LANGUAGE}' => $this->language,
'{PATH}' => $this->language_path
);
- foreach ($debug_tpl_vars as $tpl => $var) {
- $msg = str_replace($tpl, $var, $msg);
- }
- return " GeSHi Error: $msg (code $this->error) ";
+ $msg = str_replace(
+ array_keys($debug_tpl_vars),
+ array_values($debug_tpl_vars),
+ $this->error_messages[$this->error]);
+
+ return " GeSHi Error: $msg (code {$this->error}) ";
}
return false;
}
@@ -464,21 +659,40 @@
/**
* Sets the language for this object
*
+ * @note since 1.0.8 this function won't reset language-settings by default anymore!
+ * if you need this set $force_reset = true
+ *
* @param string The name of the language to use
* @since 1.0.0
*/
- function set_language($language) {
- $this->error = false;
- $this->strict_mode = GESHI_NEVER;
+ function set_language($language, $force_reset = false) {
+ if ($force_reset) {
+ $this->loaded_language = false;
+ }
+ //Clean up the language name to prevent malicious code injection
$language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
- $this->language = strtolower($language);
- $file_name = $this->language_path . $this->language . '.php';
+ $language = strtolower($language);
+
+ //Retreive the full filename
+ $file_name = $this->language_path . $language . '.php';
+ if ($file_name == $this->loaded_language) {
+ // this language is already loaded!
+ return;
+ }
+
+ $this->language = $language;
+
+ $this->error = false;
+ $this->strict_mode = GESHI_NEVER;
+
+ //Check if we can read the desired file
if (!is_readable($file_name)) {
$this->error = GESHI_ERROR_NO_SUCH_LANG;
return;
}
+
// Load the language for parsing
$this->load_language($file_name);
}
@@ -496,10 +710,114 @@
* so this method will disappear in 1.2.0.
*/
function set_language_path($path) {
+ if(strpos($path,':')) {
+ //Security Fix to prevent external directories using fopen wrappers.
+ if(DIRECTORY_SEPARATOR == "\\") {
+ if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
+ return;
+ }
+ } else {
+ return;
+ }
+ }
+ if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
+ //Security Fix to prevent external directories using fopen wrappers.
+ return;
+ }
+ if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
+ //Security Fix to prevent external directories using fopen wrappers.
+ return;
+ }
+ if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
+ //Security Fix to prevent external directories using fopen wrappers.
+ return;
+ }
if ($path) {
- $this->language_path = ('/' == substr($path, strlen($path) - 1, 1)) ? $path : $path . '/';
- $this->set_language($this->language); // otherwise set_language_path has no effect
+ $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
+ $this->set_language($this->language); // otherwise set_language_path has no effect
+ }
+ }
+
+ /**
+ * Get supported langs or an associative array lang=>full_name.
+ * @param boolean $longnames
+ * @return array
+ */
+ function get_supported_languages($full_names=false)
+ {
+ // return array
+ $back = array();
+
+ // we walk the lang root
+ $dir = dir($this->language_path);
+
+ // foreach entry
+ while (false !== ($entry = $dir->read()))
+ {
+ $full_path = $this->language_path.$entry;
+
+ // Skip all dirs
+ if (is_dir($full_path)) {
+ continue;
+ }
+
+ // we only want lang.php files
+ if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
+ continue;
+ }
+
+ // Raw lang name is here
+ $langname = $matches[1];
+
+ // We want the fullname too?
+ if ($full_names === true)
+ {
+ if (false !== ($fullname = $this->get_language_fullname($langname)))
+ {
+ $back[$langname] = $fullname; // we go associative
+ }
+ }
+ else
+ {
+ // just store raw langname
+ $back[] = $langname;
+ }
+ }
+
+ $dir->close();
+
+ return $back;
+ }
+
+ /**
+ * Get full_name for a lang or false.
+ * @param string $language short langname (html4strict for example)
+ * @return mixed
+ */
+ function get_language_fullname($language)
+ {
+ //Clean up the language name to prevent malicious code injection
+ $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
+
+ $language = strtolower($language);
+
+ // get fullpath-filename for a langname
+ $fullpath = $this->language_path.$language.'.php';
+
+ // we need to get contents :S
+ if (false === ($data = file_get_contents($fullpath))) {
+ $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
+ return false;
+ }
+
+ // match the langname
+ if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+)\'/', $data, $matches)) {
+ $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
+ return false;
}
+
+ // return fullname for langname
+ return stripcslashes($matches[1]);
}
/**
@@ -517,15 +835,15 @@
* @since 1.0.0
*/
function set_header_type($type) {
- if (GESHI_HEADER_DIV != $type && GESHI_HEADER_PRE != $type && GESHI_HEADER_NONE != $type) {
+ //Check if we got a valid header type
+ if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
+ GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
$this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
return;
}
+
+ //Set that new header type
$this->header_type = $type;
- // Set a default overall style if the header is a
- if (GESHI_HEADER_DIV == $type && !$this->overall_style) {
- $this->overall_style = 'font-family: monospace;';
- }
}
/**
@@ -540,8 +858,7 @@
function set_overall_style($style, $preserve_defaults = false) {
if (!$preserve_defaults) {
$this->overall_style = $style;
- }
- else {
+ } else {
$this->overall_style .= $style;
}
}
@@ -593,12 +910,12 @@
*
* @param string The style to use for actual code
* @param boolean Whether to merge the current styles with the new styles
+ * @since 1.0.2
*/
function set_code_style($style, $preserve_defaults = false) {
if (!$preserve_defaults) {
$this->code_style = $style;
- }
- else {
+ } else {
$this->code_style .= $style;
}
}
@@ -616,15 +933,17 @@
* @since 1.0.2
*/
function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
+ //Check if we got 2 or three parameters
if (is_bool($style2)) {
$preserve_defaults = $style2;
$style2 = '';
}
+
+ //Actually set the new styles
if (!$preserve_defaults) {
$this->line_style1 = $style1;
$this->line_style2 = $style2;
- }
- else {
+ } else {
$this->line_style1 .= $style1;
$this->line_style2 .= $style2;
}
@@ -657,6 +976,29 @@
}
/**
+ * Sets wether spans and other HTML markup generated by GeSHi can
+ * span over multiple lines or not. Defaults to true to reduce overhead.
+ * Set it to false if you want to manipulate the output or manually display
+ * the code in an ordered list.
+ *
+ * @param boolean Wether multiline spans are allowed or not
+ * @since 1.0.7.22
+ */
+ function enable_multiline_span($flag) {
+ $this->allow_multiline_span = (bool) $flag;
+ }
+
+ /**
+ * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
+ *
+ * @see enable_multiline_span
+ * @return bool
+ */
+ function get_multiline_span() {
+ return $this->allow_multiline_span;
+ }
+
+ /**
* Sets the style for a keyword group. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
@@ -668,12 +1010,17 @@
* @since 1.0.0
*/
function set_keyword_group_style($key, $style, $preserve_defaults = false) {
+ //Set the style for this keyword group
if (!$preserve_defaults) {
$this->language_data['STYLES']['KEYWORDS'][$key] = $style;
- }
- else {
+ } else {
$this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
}
+
+ //Update the lexic permissions
+ if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
+ $this->lexic_permissions['KEYWORDS'][$key] = true;
+ }
}
/**
@@ -701,8 +1048,7 @@
function set_comments_style($key, $style, $preserve_defaults = false) {
if (!$preserve_defaults) {
$this->language_data['STYLES']['COMMENTS'][$key] = $style;
- }
- else {
+ } else {
$this->language_data['STYLES']['COMMENTS'][$key] .= $style;
}
}
@@ -728,12 +1074,11 @@
* to overwrite them
* @since 1.0.0
*/
- function set_escape_characters_style($style, $preserve_defaults = false) {
+ function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
if (!$preserve_defaults) {
- $this->language_data['STYLES']['ESCAPE_CHAR'][0] = $style;
- }
- else {
- $this->language_data['STYLES']['ESCAPE_CHAR'][0] .= $style;
+ $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
+ } else {
+ $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
}
}
@@ -764,8 +1109,7 @@
function set_brackets_style($style, $preserve_defaults = false) {
if (!$preserve_defaults) {
$this->language_data['STYLES']['BRACKETS'][0] = $style;
- }
- else {
+ } else {
$this->language_data['STYLES']['BRACKETS'][0] .= $style;
}
}
@@ -792,17 +1136,21 @@
* @param string The style to make the symbols
* @param boolean Whether to merge the new styles with the old or just
* to overwrite them
+ * @param int Tells the group of symbols for which style should be set.
* @since 1.0.1
*/
- function set_symbols_style($style, $preserve_defaults = false) {
+ function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
+ // Update the style of symbols
if (!$preserve_defaults) {
- $this->language_data['STYLES']['SYMBOLS'][0] = $style;
- }
- else {
- $this->language_data['STYLES']['SYMBOLS'][0] .= $style;
+ $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
+ } else {
+ $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
}
+
// For backward compatibility
- $this->set_brackets_style ($style, $preserve_defaults);
+ if (0 == $group) {
+ $this->set_brackets_style ($style, $preserve_defaults);
+ }
}
/**
@@ -812,7 +1160,9 @@
* @since 1.0.0
*/
function set_symbols_highlighting($flag) {
+ // Update lexic permissions for this symbol group
$this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
+
// For backward compatibility
$this->set_brackets_highlighting ($flag);
}
@@ -825,14 +1175,14 @@
* @param string The style to make the escape characters
* @param boolean Whether to merge the new styles with the old or just
* to overwrite them
+ * @param int Tells the group of strings for which style should be set.
* @since 1.0.0
*/
- function set_strings_style($style, $preserve_defaults = false) {
+ function set_strings_style($style, $preserve_defaults = false, $group = 0) {
if (!$preserve_defaults) {
- $this->language_data['STYLES']['STRINGS'][0] = $style;
- }
- else {
- $this->language_data['STYLES']['STRINGS'][0] .= $style;
+ $this->language_data['STYLES']['STRINGS'][$group] = $style;
+ } else {
+ $this->language_data['STYLES']['STRINGS'][$group] .= $style;
}
}
@@ -847,6 +1197,26 @@
}
/**
+ * Sets the styles for strict code blocks. If $preserve_defaults is
+ * true, then styles are merged with the default styles, with the
+ * user defined styles having priority
+ *
+ * @param string The style to make the script blocks
+ * @param boolean Whether to merge the new styles with the old or just
+ * to overwrite them
+ * @param int Tells the group of script blocks for which style should be set.
+ * @since 1.0.8.4
+ */
+ function set_script_style($style, $preserve_defaults = false, $group = 0) {
+ // Update the style of symbols
+ if (!$preserve_defaults) {
+ $this->language_data['STYLES']['SCRIPT'][$group] = $style;
+ } else {
+ $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
+ }
+ }
+
+ /**
* Sets the styles for numbers. If $preserve_defaults is
* true, then styles are merged with the default styles, with the
* user defined styles having priority
@@ -854,14 +1224,14 @@
* @param string The style to make the numbers
* @param boolean Whether to merge the new styles with the old or just
* to overwrite them
+ * @param int Tells the group of numbers for which style should be set.
* @since 1.0.0
*/
- function set_numbers_style($style, $preserve_defaults = false) {
+ function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
if (!$preserve_defaults) {
- $this->language_data['STYLES']['NUMBERS'][0] = $style;
- }
- else {
- $this->language_data['STYLES']['NUMBERS'][0] .= $style;
+ $this->language_data['STYLES']['NUMBERS'][$group] = $style;
+ } else {
+ $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
}
}
@@ -891,8 +1261,7 @@
function set_methods_style($key, $style, $preserve_defaults = false) {
if (!$preserve_defaults) {
$this->language_data['STYLES']['METHODS'][$key] = $style;
- }
- else {
+ } else {
$this->language_data['STYLES']['METHODS'][$key] .= $style;
}
}
@@ -920,8 +1289,7 @@
function set_regexps_style($key, $style, $preserve_defaults = false) {
if (!$preserve_defaults) {
$this->language_data['STYLES']['REGEXPS'][$key] = $style;
- }
- else {
+ } else {
$this->language_data['STYLES']['REGEXPS'][$key] .= $style;
}
}
@@ -957,10 +1325,12 @@
*
* @param int A constant specifying what to do with matched keywords
* @since 1.0.1
- * @todo Error check the passed value
*/
function set_case_keywords($case) {
- $this->language_data['CASE_KEYWORDS'] = $case;
+ if (in_array($case, array(
+ GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
+ $this->language_data['CASE_KEYWORDS'] = $case;
+ }
}
/**
@@ -973,6 +1343,38 @@
*/
function set_tab_width($width) {
$this->tab_width = intval($width);
+
+ //Check if it fit's the constraints:
+ if ($this->tab_width < 1) {
+ //Return it to the default
+ $this->tab_width = 8;
+ }
+ }
+
+ /**
+ * Sets whether or not to use tab-stop width specifed by language
+ *
+ * @param boolean Whether to use language-specific tab-stop widths
+ * @since 1.0.7.20
+ */
+ function set_use_language_tab_width($use) {
+ $this->use_language_tab_width = (bool) $use;
+ }
+
+ /**
+ * Returns the tab width to use, based on the current language and user
+ * preference
+ *
+ * @return int Tab width
+ * @since 1.0.7.20
+ */
+ function get_real_tab_width() {
+ if (!$this->use_language_tab_width ||
+ !isset($this->language_data['TAB_WIDTH'])) {
+ return $this->tab_width;
+ } else {
+ return $this->language_data['TAB_WIDTH'];
+ }
}
/**
@@ -985,7 +1387,7 @@
*/
function enable_strict_mode($mode = true) {
if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
- $this->strict_mode = ($mode) ? true : false;
+ $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
}
}
@@ -993,42 +1395,37 @@
* Disables all highlighting
*
* @since 1.0.0
- * @todo Rewrite with an array traversal
+ * @todo Rewrite with array traversal
+ * @deprecated In favour of enable_highlighting
*/
function disable_highlighting() {
- foreach ($this->lexic_permissions as $key => $value) {
- if (is_array($value)) {
- foreach ($value as $k => $v) {
- $this->lexic_permissions[$key][$k] = false;
- }
- }
- else {
- $this->lexic_permissions[$key] = false;
- }
- }
- // Context blocks
- $this->enable_important_blocks = false;
+ $this->enable_highlighting(false);
}
/**
* Enables all highlighting
*
+ * The optional flag parameter was added in version 1.0.7.21 and can be used
+ * to enable (true) or disable (false) all highlighting.
+ *
* @since 1.0.0
+ * @param boolean A flag specifying whether to enable or disable all highlighting
* @todo Rewrite with array traversal
*/
- function enable_highlighting() {
+ function enable_highlighting($flag = true) {
+ $flag = $flag ? true : false;
foreach ($this->lexic_permissions as $key => $value) {
if (is_array($value)) {
foreach ($value as $k => $v) {
- $this->lexic_permissions[$key][$k] = true;
+ $this->lexic_permissions[$key][$k] = $flag;
}
- }
- else {
- $this->lexic_permissions[$key] = true;
+ } else {
+ $this->lexic_permissions[$key] = $flag;
}
}
+
// Context blocks
- $this->enable_important_blocks = true;
+ $this->enable_important_blocks = $flag;
}
/**
@@ -1036,59 +1433,92 @@
* name, or the empty string if it couldn't be found
*
* @param string The extension to get a language name for
- * @param array A lookup array to use instead of the default
+ * @param array A lookup array to use instead of the default one
* @since 1.0.5
* @todo Re-think about how this method works (maybe make it private and/or make it
* a extension->lang lookup?)
* @todo static?
*/
function get_language_name_from_extension( $extension, $lookup = array() ) {
- if ( !$lookup ) {
+ if ( !is_array($lookup) || empty($lookup)) {
$lookup = array(
+ '6502acme' => array( 'a', 's', 'asm', 'inc' ),
+ '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
+ '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
+ '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
+ 'abap' => array('abap'),
'actionscript' => array('as'),
'ada' => array('a', 'ada', 'adb', 'ads'),
'apache' => array('conf'),
- 'asm' => array('ash', 'asm'),
+ 'asm' => array('ash', 'asm', 'inc'),
'asp' => array('asp'),
'bash' => array('sh'),
+ 'bf' => array('bf'),
'c' => array('c', 'h'),
'c_mac' => array('c', 'h'),
'caddcl' => array(),
'cadlisp' => array(),
'cdfg' => array('cdfg'),
- 'cpp' => array('cpp', 'h', 'hpp'),
- 'csharp' => array(),
+ 'cobol' => array('cbl'),
+ 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
+ 'csharp' => array('cs'),
'css' => array('css'),
- 'delphi' => array('dpk', 'dpr'),
+ 'd' => array('d'),
+ 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
+ 'diff' => array('diff', 'patch'),
+ 'dos' => array('bat', 'cmd'),
+ 'gdb' => array('kcrash', 'crash', 'bt'),
+ 'gettext' => array('po', 'pot'),
+ 'gml' => array('gml'),
+ 'gnuplot' => array('plt'),
+ 'groovy' => array('groovy'),
+ 'haskell' => array('hs'),
'html4strict' => array('html', 'htm'),
+ 'ini' => array('ini', 'desktop'),
'java' => array('java'),
'javascript' => array('js'),
+ 'klonec' => array('kl1'),
+ 'klonecpp' => array('klx'),
+ 'latex' => array('tex'),
'lisp' => array('lisp'),
'lua' => array('lua'),
+ 'matlab' => array('m'),
'mpasm' => array(),
+ 'mysql' => array('sql'),
'nsis' => array(),
'objc' => array(),
'oobas' => array(),
'oracle8' => array(),
+ 'oracle10' => array(),
'pascal' => array('pas'),
'perl' => array('pl', 'pm'),
'php' => array('php', 'php5', 'phtml', 'phps'),
+ 'povray' => array('pov'),
+ 'providex' => array('pvc', 'pvx'),
+ 'prolog' => array('pl'),
'python' => array('py'),
'qbasic' => array('bi'),
+ 'reg' => array('reg'),
+ 'ruby' => array('rb'),
'sas' => array('sas'),
+ 'scala' => array('scala'),
+ 'scheme' => array('scm'),
+ 'scilab' => array('sci'),
+ 'smalltalk' => array('st'),
'smarty' => array(),
+ 'tcl' => array('tcl'),
'vb' => array('bas'),
'vbnet' => array(),
'visualfoxpro' => array(),
- 'xml' => array('xml')
+ 'whitespace' => array('ws'),
+ 'xml' => array('xml', 'svg', 'xrc'),
+ 'z80' => array('z80', 'asm', 'inc')
);
}
foreach ($lookup as $lang => $extensions) {
- foreach ($extensions as $ext) {
- if ($ext == $extension) {
- return $lang;
- }
+ if (in_array($extension, $extensions)) {
+ return $lang;
}
}
return '';
@@ -1106,15 +1536,16 @@
* 'lang_name' ...
* );
*
+ * @param string The filename to load the source from
+ * @param array A lookup array to use instead of the default one
* @todo Complete rethink of this and above method
* @since 1.0.5
*/
function load_from_file($file_name, $lookup = array()) {
if (is_readable($file_name)) {
- $this->set_source(implode('', file($file_name)));
+ $this->set_source(file_get_contents($file_name));
$this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
- }
- else {
+ } else {
$this->error = GESHI_ERROR_FILE_NOT_READABLE;
}
}
@@ -1127,7 +1558,15 @@
* @since 1.0.0
*/
function add_keyword($key, $word) {
- $this->language_data['KEYWORDS'][$key][] = $word;
+ if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
+ $this->language_data['KEYWORDS'][$key][] = $word;
+
+ //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
+ if ($this->parse_cache_built) {
+ $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
+ $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
+ }
+ }
}
/**
@@ -1135,11 +1574,24 @@
*
* @param int The key of the keyword group to remove the keyword from
* @param string The word to remove from the keyword group
+ * @param bool Wether to automatically recompile the optimized regexp list or not.
+ * Note: if you set this to false and @see GeSHi->parse_code() was already called once,
+ * for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
+ * or the removed keyword will stay in cache and still be highlighted! On the other hand
+ * it might be too expensive to recompile the regexp list for every removal if you want to
+ * remove a lot of keywords.
* @since 1.0.0
*/
- function remove_keyword($key, $word) {
- $this->language_data['KEYWORDS'][$key] =
- array_diff($this->language_data['KEYWORDS'][$key], array($word));
+ function remove_keyword($key, $word, $recompile = true) {
+ $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
+ if ($key_to_remove !== false) {
+ unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
+
+ //NEW in 1.0.8, optionally recompile keyword group
+ if ($recompile && $this->parse_cache_built) {
+ $this->optimize_keyword_group($key);
+ }
+ }
}
/**
@@ -1153,10 +1605,21 @@
*/
function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
$words = (array) $words;
+ if (empty($words)) {
+ // empty word lists mess up highlighting
+ return false;
+ }
+
+ //Add the new keyword group internally
$this->language_data['KEYWORDS'][$key] = $words;
$this->lexic_permissions['KEYWORDS'][$key] = true;
$this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
$this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
+
+ //NEW in 1.0.8, cache keyword regexp
+ if ($this->parse_cache_built) {
+ $this->optimize_keyword_group($key);
+ }
}
/**
@@ -1166,10 +1629,44 @@
* @since 1.0.0
*/
function remove_keyword_group ($key) {
+ //Remove the keyword group internally
unset($this->language_data['KEYWORDS'][$key]);
unset($this->lexic_permissions['KEYWORDS'][$key]);
unset($this->language_data['CASE_SENSITIVE'][$key]);
unset($this->language_data['STYLES']['KEYWORDS'][$key]);
+
+ //NEW in 1.0.8
+ unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
+ }
+
+ /**
+ * compile optimized regexp list for keyword group
+ *
+ * @param int The key of the keyword group to compile & optimize
+ * @since 1.0.8
+ */
+ function optimize_keyword_group($key) {
+ $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
+ $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
+ $space_as_whitespace = false;
+ if(isset($this->language_data['PARSER_CONTROL'])) {
+ if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
+ if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
+ $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
+ }
+ if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
+ if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
+ $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
+ }
+ }
+ }
+ }
+ if($space_as_whitespace) {
+ foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
+ $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
+ str_replace(" ", "\\s+", $rxv);
+ }
+ }
}
/**
@@ -1213,6 +1710,17 @@
}
/**
+ * Sets whether to force a surrounding block around
+ * the highlighted code or not
+ *
+ * @param boolean Tells whether to enable or disable this feature
+ * @since 1.0.7.20
+ */
+ function enable_inner_code_block($flag) {
+ $this->force_code_block = (bool)$flag;
+ }
+
+ /**
* Sets the base URL to be used for keywords
*
* @param int The key of the keyword group to set the URL for
@@ -1246,9 +1754,8 @@
function set_link_target($target) {
if (!$target) {
$this->link_target = '';
- }
- else {
- $this->link_target = ' target="' . $target . '" ';
+ } else {
+ $this->link_target = ' target="' . $target . '"';
}
}
@@ -1265,8 +1772,10 @@
/**
* Sets whether context-important blocks are highlighted
*
+ * @param boolean Tells whether to enable or disable highlighting of important blocks
* @todo REMOVE THIS SHIZ FROM GESHI!
* @deprecated
+ * @since 1.0.2
*/
function enable_important_blocks($flag) {
$this->enable_important_blocks = ( $flag ) ? true : false;
@@ -1285,19 +1794,37 @@
/**
* Specifies which lines to highlight extra
*
+ * The extra style parameter was added in 1.0.7.21.
+ *
* @param mixed An array of line numbers to highlight, or just a line
* number on its own.
+ * @param string A string specifying the style to use for this line.
+ * If null is specified, the default style is used.
+ * If false is specified, the line will be removed from
+ * special highlighting
* @since 1.0.2
* @todo Some data replication here that could be cut down on
*/
- function highlight_lines_extra($lines) {
+ function highlight_lines_extra($lines, $style = null) {
if (is_array($lines)) {
+ //Split up the job using single lines at a time
foreach ($lines as $line) {
- $this->highlight_extra_lines[intval($line)] = intval($line);
+ $this->highlight_lines_extra($line, $style);
+ }
+ } else {
+ //Mark the line as being highlighted specially
+ $lines = intval($lines);
+ $this->highlight_extra_lines[$lines] = $lines;
+
+ //Decide on which style to use
+ if ($style === null) { //Check if we should use default style
+ unset($this->highlight_extra_lines_styles[$lines]);
+ } else if ($style === false) { //Check if to remove this line
+ unset($this->highlight_extra_lines[$lines]);
+ unset($this->highlight_extra_lines_styles[$lines]);
+ } else {
+ $this->highlight_extra_lines_styles[$lines] = $style;
}
- }
- else {
- $this->highlight_extra_lines[intval($lines)] = intval($lines);
}
}
@@ -1312,6 +1839,16 @@
}
/**
+ * Sets the line-ending
+ *
+ * @param string The new line-ending
+ * @since 1.0.2
+ */
+ function set_line_ending($line_ending) {
+ $this->line_ending = (string)$line_ending;
+ }
+
+ /**
* Sets what number line numbers should start at. Should
* be a positive integer, and will be converted to one.
*
@@ -1344,7 +1881,7 @@
*/
function set_encoding($encoding) {
if ($encoding) {
- $this->encoding = $encoding;
+ $this->encoding = strtolower($encoding);
}
}
@@ -1352,18 +1889,251 @@
* Turns linking of keywords on or off.
*
* @param boolean If true, links will be added to keywords
+ * @since 1.0.2
*/
function enable_keyword_links($enable = true) {
- $this->keyword_links = ($enable) ? true : false;
+ $this->keyword_links = (bool) $enable;
}
/**
- * Returns the code in $this->source, highlighted and surrounded by the
- * nessecary HTML.
- *
- * This should only be called ONCE, cos it's SLOW! If you want to highlight
- * the same source multiple times, you're better off doing a whole lot of
- * str_replaces to replace the <span>s
+ * Setup caches needed for styling. This is automatically called in
+ * parse_code() and get_stylesheet() when appropriate. This function helps
+ * stylesheet generators as they rely on some style information being
+ * preprocessed
+ *
+ * @since 1.0.8
+ * @access private
+ */
+ function build_style_cache() {
+ //Build the style cache needed to highlight numbers appropriate
+ if($this->lexic_permissions['NUMBERS']) {
+ //First check what way highlighting information for numbers are given
+ if(!isset($this->language_data['NUMBERS'])) {
+ $this->language_data['NUMBERS'] = 0;
+ }
+
+ if(is_array($this->language_data['NUMBERS'])) {
+ $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
+ } else {
+ $this->language_data['NUMBERS_CACHE'] = array();
+ if(!$this->language_data['NUMBERS']) {
+ $this->language_data['NUMBERS'] =
+ GESHI_NUMBER_INT_BASIC |
+ GESHI_NUMBER_FLT_NONSCI;
+ }
+
+ for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
+ //Rearrange style indices if required ...
+ if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
+ $this->language_data['STYLES']['NUMBERS'][$i] =
+ $this->language_data['STYLES']['NUMBERS'][1<<$i];
+ unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
+ }
+
+ //Check if this bit is set for highlighting
+ if($j&1) {
+ //So this bit is set ...
+ //Check if it belongs to group 0 or the actual stylegroup
+ if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
+ $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
+ } else {
+ if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
+ $this->language_data['NUMBERS_CACHE'][0] = 0;
+ }
+ $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
+ * This function makes stylesheet generators much faster as they do not need these caches.
+ *
+ * @since 1.0.8
+ * @access private
+ */
+ function build_parse_cache() {
+ // cache symbol regexp
+ //As this is a costy operation, we avoid doing it for multiple groups ...
+ //Instead we perform it for all symbols at once.
+ //
+ //For this to work, we need to reorganize the data arrays.
+ if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
+ $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
+
+ $this->language_data['SYMBOL_DATA'] = array();
+ $symbol_preg_multi = array(); // multi char symbols
+ $symbol_preg_single = array(); // single char symbols
+ foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
+ if (is_array($symbols)) {
+ foreach ($symbols as $sym) {
+ $sym = $this->hsc($sym);
+ if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
+ $this->language_data['SYMBOL_DATA'][$sym] = $key;
+ if (isset($sym[1])) { // multiple chars
+ $symbol_preg_multi[] = preg_quote($sym, '/');
+ } else { // single char
+ if ($sym == '-') {
+ // don't trigger range out of order error
+ $symbol_preg_single[] = '\-';
+ } else {
+ $symbol_preg_single[] = preg_quote($sym, '/');
+ }
+ }
+ }
+ }
+ } else {
+ $symbols = $this->hsc($symbols);
+ if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
+ $this->language_data['SYMBOL_DATA'][$symbols] = 0;
+ if (isset($symbols[1])) { // multiple chars
+ $symbol_preg_multi[] = preg_quote($symbols, '/');
+ } else if ($symbols == '-') {
+ // don't trigger range out of order error
+ $symbol_preg_single[] = '\-';
+ } else { // single char
+ $symbol_preg_single[] = preg_quote($symbols, '/');
+ }
+ }
+ }
+ }
+
+ //Now we have an array with each possible symbol as the key and the style as the actual data.
+ //This way we can set the correct style just the moment we highlight ...
+ //
+ //Now we need to rewrite our array to get a search string that
+ $symbol_preg = array();
+ if (!empty($symbol_preg_multi)) {
+ rsort($symbol_preg_multi);
+ $symbol_preg[] = implode('|', $symbol_preg_multi);
+ }
+ if (!empty($symbol_preg_single)) {
+ rsort($symbol_preg_single);
+ $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
+ }
+ $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
+ }
+
+ // cache optimized regexp for keyword matching
+ // remove old cache
+ $this->language_data['CACHED_KEYWORD_LISTS'] = array();
+ foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
+ if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
+ $this->lexic_permissions['KEYWORDS'][$key]) {
+ $this->optimize_keyword_group($key);
+ }
+ }
+
+ // brackets
+ if ($this->lexic_permissions['BRACKETS']) {
+ $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
+ if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
+ $this->language_data['CACHE_BRACKET_REPLACE'] = array(
+ '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">[|>',
+ '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">]|>',
+ '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">(|>',
+ '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">)|>',
+ '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">{|>',
+ '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">}|>',
+ );
+ }
+ else {
+ $this->language_data['CACHE_BRACKET_REPLACE'] = array(
+ '<| class="br0">[|>',
+ '<| class="br0">]|>',
+ '<| class="br0">(|>',
+ '<| class="br0">)|>',
+ '<| class="br0">{|>',
+ '<| class="br0">}|>',
+ );
+ }
+ }
+
+ //Build the parse cache needed to highlight numbers appropriate
+ if($this->lexic_permissions['NUMBERS']) {
+ //Check if the style rearrangements have been processed ...
+ //This also does some preprocessing to check which style groups are useable ...
+ if(!isset($this->language_data['NUMBERS_CACHE'])) {
+ $this->build_style_cache();
+ }
+
+ //Number format specification
+ //All this formats are matched case-insensitively!
+ static $numbers_format = array(
+ GESHI_NUMBER_INT_BASIC =>
+ '(?:(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?
+ '(?language_data['NUMBERS_RXCACHE'] = array();
+ foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
+ if(is_string($rxdata)) {
+ $regexp = $rxdata;
+ } else {
+ //This is a bitfield of number flags to highlight:
+ //Build an array, implode them together and make this the actual RX
+ $rxuse = array();
+ for($i = 1; $i <= $rxdata; $i<<=1) {
+ if($rxdata & $i) {
+ $rxuse[] = $numbers_format[$i];
+ }
+ }
+ $regexp = implode("|", $rxuse);
+ }
+
+ $this->language_data['NUMBERS_RXCACHE'][$key] =
+ "/(?)($regexp)(?!(?:|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
+ }
+
+ if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
+ $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
+ }
+ }
+
+ $this->parse_cache_built = true;
+ }
+
+ /**
+ * Returns the code in $this->source, highlighted and surrounded by the
+ * nessecary HTML.
+ *
+ * This should only be called ONCE, cos it's SLOW! If you want to highlight
+ * the same source multiple times, you're better off doing a whole lot of
+ * str_replaces to replace the <span>s
*
* @since 1.0.0
*/
@@ -1371,34 +2141,35 @@
// Start the timer
$start_time = microtime();
+ // Replace all newlines to a common form.
+ $code = str_replace("\r\n", "\n", $this->source);
+ $code = str_replace("\r", "\n", $code);
+
// Firstly, if there is an error, we won't highlight
if ($this->error) {
- $result = GeSHi::hsc($this->source);
+ //Escape the source for output
+ $result = $this->hsc($this->source);
+
+ //This fix is related to SF#1923020, but has to be applied regardless of
+ //actually highlighting symbols.
+ $result = str_replace(array('', ''), array(';', '|'), $result);
+
// Timing is irrelevant
$this->set_time($start_time, $start_time);
- return $this->finalise($result);
+ $this->finalise($result);
+ return $result;
}
- // Replace all newlines to a common form.
- $code = str_replace("\r\n", "\n", $this->source);
- $code = str_replace("\r", "\n", $code);
- // Add spaces for regular expression matching and line numbers
- $code = "\n" . $code . "\n";
+ // make sure the parse cache is up2date
+ if (!$this->parse_cache_built) {
+ $this->build_parse_cache();
+ }
// Initialise various stuff
$length = strlen($code);
- $STRING_OPEN = '';
- $CLOSE_STRING = false;
- $ESCAPE_CHAR_OPEN = false;
$COMMENT_MATCHED = false;
- // Turn highlighting on if strict mode doesn't apply to this language
- $HIGHLIGHTING_ON = ( !$this->strict_mode ) ? true : '';
- // Whether to highlight inside a block of code
- $HIGHLIGHT_INSIDE_STRICT = false;
- $HARDQUOTE_OPEN = false;
- $STRICTATTRS = '';
$stuff_to_parse = '';
- $result = '';
+ $endresult = '';
// "Important" selections are handled like multiline comments
// @todo GET RID OF THIS SHIZ
@@ -1409,446 +2180,957 @@
if ($this->strict_mode) {
// Break the source into bits. Each bit will be a portion of the code
// within script delimiters - for example, HTML between < and >
- $parts = array(0 => array(0 => ''));
$k = 0;
- for ($i = 0; $i < $length; $i++) {
- $char = substr($code, $i, 1);
- if (!$HIGHLIGHTING_ON) {
- foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
+ $parts = array();
+ $matches = array();
+ $next_match_pointer = null;
+ // we use a copy to unset delimiters on demand (when they are not found)
+ $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
+ $i = 0;
+ while ($i < $length) {
+ $next_match_pos = $length + 1; // never true
+ foreach ($delim_copy as $dk => $delimiters) {
+ if(is_array($delimiters)) {
foreach ($delimiters as $open => $close) {
+ // make sure the cache is setup properly
+ if (!isset($matches[$dk][$open])) {
+ $matches[$dk][$open] = array(
+ 'next_match' => -1,
+ 'dk' => $dk,
+
+ 'open' => $open, // needed for grouping of adjacent code blocks (see below)
+ 'open_strlen' => strlen($open),
+
+ 'close' => $close,
+ 'close_strlen' => strlen($close),
+ );
+ }
// Get the next little bit for this opening string
- $check = substr($code, $i, strlen($open));
- // If it matches...
- if ($check == $open) {
- // We start a new block with the highlightable
- // code in it
- $HIGHLIGHTING_ON = $open;
- $i += strlen($open) - 1;
- $char = $open;
- $parts[++$k][0] = $char;
+ if ($matches[$dk][$open]['next_match'] < $i) {
+ // only find the next pos if it was not already cached
+ $open_pos = strpos($code, $open, $i);
+ if ($open_pos === false) {
+ // no match for this delimiter ever
+ unset($delim_copy[$dk][$open]);
+ continue;
+ }
+ $matches[$dk][$open]['next_match'] = $open_pos;
+ }
+ if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
+ //So we got a new match, update the close_pos
+ $matches[$dk][$open]['close_pos'] =
+ strpos($code, $close, $matches[$dk][$open]['next_match']+1);
- // No point going around again...
- break(2);
+ $next_match_pointer =& $matches[$dk][$open];
+ $next_match_pos = $matches[$dk][$open]['next_match'];
}
}
+ } else {
+ //So we should match an RegExp as Strict Block ...
+ /**
+ * The value in $delimiters is expected to be an RegExp
+ * containing exactly 2 matching groups:
+ * - Group 1 is the opener
+ * - Group 2 is the closer
+ */
+ if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
+ preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
+ //We got a match ...
+ if(isset($matches_rx['start']) && isset($matches_rx['end']))
+ {
+ $matches[$dk] = array(
+ 'next_match' => $matches_rx['start'][1],
+ 'dk' => $dk,
+
+ 'close_strlen' => strlen($matches_rx['end'][0]),
+ 'close_pos' => $matches_rx['end'][1],
+ );
+ } else {
+ $matches[$dk] = array(
+ 'next_match' => $matches_rx[1][1],
+ 'dk' => $dk,
+
+ 'close_strlen' => strlen($matches_rx[2][0]),
+ 'close_pos' => $matches_rx[2][1],
+ );
+ }
+ } else {
+ // no match for this delimiter ever
+ unset($delim_copy[$dk]);
+ continue;
+ }
+
+ if ($matches[$dk]['next_match'] <= $next_match_pos) {
+ $next_match_pointer =& $matches[$dk];
+ $next_match_pos = $matches[$dk]['next_match'];
+ }
}
}
- else {
- foreach ($this->language_data['SCRIPT_DELIMITERS'] as $key => $delimiters) {
- foreach ($delimiters as $open => $close) {
- if ($open == $HIGHLIGHTING_ON) {
- // Found the closing tag
- break(2);
+
+ // non-highlightable text
+ $parts[$k] = array(
+ 1 => substr($code, $i, $next_match_pos - $i)
+ );
+ ++$k;
+
+ if ($next_match_pos > $length) {
+ // out of bounds means no next match was found
+ break;
+ }
+
+ // highlightable code
+ $parts[$k][0] = $next_match_pointer['dk'];
+
+ //Only combine for non-rx script blocks
+ if(is_array($delim_copy[$next_match_pointer['dk']])) {
+ // group adjacent script blocks, e.g. should be one block, not three!
+ $i = $next_match_pos + $next_match_pointer['open_strlen'];
+ while (true) {
+ $close_pos = strpos($code, $next_match_pointer['close'], $i);
+ if ($close_pos == false) {
+ break;
+ }
+ $i = $close_pos + $next_match_pointer['close_strlen'];
+ if ($i == $length) {
+ break;
+ }
+ if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
+ substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
+ // merge adjacent but make sure we don't merge things like
+ foreach ($matches as $submatches) {
+ foreach ($submatches as $match) {
+ if ($match['next_match'] == $i) {
+ // a different block already matches here!
+ break 3;
+ }
+ }
}
+ } else {
+ break;
}
}
- // We check code from our current position BACKWARDS. This is so
- // the ending string for highlighting can be included in the block
- $check = substr($code, $i - strlen($close) + 1, strlen($close));
- if ($check == $close) {
- $HIGHLIGHTING_ON = '';
- // Add the string to the rest of the string for this part
- $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
- $parts[++$k][0] = '';
- $char = '';
- }
+ } else {
+ $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
+ $i = $close_pos;
+ }
+
+ if ($close_pos === false) {
+ // no closing delimiter found!
+ $parts[$k][1] = substr($code, $next_match_pos);
+ ++$k;
+ break;
+ } else {
+ $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
+ ++$k;
}
- $parts[$k][1] = ( isset($parts[$k][1]) ) ? $parts[$k][1] . $char : $char;
}
- $HIGHLIGHTING_ON = '';
- }
- else {
+ unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
+ $num_parts = $k;
+
+ if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
+ // when we have only one part, we don't have anything to highlight at all.
+ // if we have a "maybe" strict language, this should be handled as highlightable code
+ $parts = array(
+ 0 => array(
+ 0 => '',
+ 1 => ''
+ ),
+ 1 => array(
+ 0 => null,
+ 1 => $parts[0][1]
+ )
+ );
+ $num_parts = 2;
+ }
+
+ } else {
// Not strict mode - simply dump the source into
// the array at index 1 (the first highlightable block)
$parts = array(
- 1 => array(
+ 0 => array(
0 => '',
+ 1 => ''
+ ),
+ 1 => array(
+ 0 => null,
1 => $code
)
);
+ $num_parts = 2;
+ }
+
+ //Unset variables we won't need any longer
+ unset($code);
+
+ //Preload some repeatedly used values regarding hardquotes ...
+ $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
+ $hq_strlen = strlen($hq);
+
+ //Preload if line numbers are to be generated afterwards
+ //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
+ $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
+ !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
+
+ //preload the escape char for faster checking ...
+ $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
+
+ // this is used for single-line comments
+ $sc_disallowed_before = "";
+ $sc_disallowed_after = "";
+
+ if (isset($this->language_data['PARSER_CONTROL'])) {
+ if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
+ if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
+ $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
+ }
+ if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
+ $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
+ }
+ }
+ }
+
+ //Fix for SF#1932083: Multichar Quotemarks unsupported
+ $is_string_starter = array();
+ if ($this->lexic_permissions['STRINGS']) {
+ foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
+ if (!isset($is_string_starter[$quotemark[0]])) {
+ $is_string_starter[$quotemark[0]] = (string)$quotemark;
+ } else if (is_string($is_string_starter[$quotemark[0]])) {
+ $is_string_starter[$quotemark[0]] = array(
+ $is_string_starter[$quotemark[0]],
+ $quotemark);
+ } else {
+ $is_string_starter[$quotemark[0]][] = $quotemark;
+ }
+ }
}
// Now we go through each part. We know that even-indexed parts are
// code that shouldn't be highlighted, and odd-indexed parts should
// be highlighted
- foreach ($parts as $key => $data) {
- $part = $data[1];
+ for ($key = 0; $key < $num_parts; ++$key) {
+ $STRICTATTRS = '';
+
// If this block should be highlighted...
- if ($key % 2) {
- if ($this->strict_mode) {
- // Find the class key for this block of code
- foreach ($this->language_data['SCRIPT_DELIMITERS'] as $script_key => $script_data) {
- foreach ($script_data as $open => $close) {
- if ($data[0] == $open) {
- break(2);
- }
- }
- }
+ if (!($key & 1)) {
+ // Else not a block to highlight
+ $endresult .= $this->hsc($parts[$key][1]);
+ unset($parts[$key]);
+ continue;
+ }
- if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
- $this->lexic_permissions['SCRIPT']) {
- // Add a span element around the source to
- // highlight the overall source block
- if (!$this->use_classes &&
- $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
- $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
- }
- else {
- $attributes = ' class="sc' . $script_key . '"';
- }
- $result .= "";
- $STRICTATTRS = $attributes;
+ $result = '';
+ $part = $parts[$key][1];
+
+ $highlight_part = true;
+ if ($this->strict_mode && !is_null($parts[$key][0])) {
+ // get the class key for this block of code
+ $script_key = $parts[$key][0];
+ $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
+ if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
+ $this->lexic_permissions['SCRIPT']) {
+ // Add a span element around the source to
+ // highlight the overall source block
+ if (!$this->use_classes &&
+ $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
+ $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
+ } else {
+ $attributes = ' class="sc' . $script_key . '"';
}
+ $result .= "";
+ $STRICTATTRS = $attributes;
}
+ }
- if (!$this->strict_mode || $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]) {
- // Now, highlight the code in this block. This code
- // is really the engine of GeSHi (along with the method
- // parse_non_string_part).
- $length = strlen($part);
- for ($i = 0; $i < $length; $i++) {
- // Get the next char
- $char = substr($part, $i, 1);
- $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
- // Is this char the newline and line numbers being used?
- if (($this->line_numbers != GESHI_NO_LINE_NUMBERS
- || count($this->highlight_extra_lines) > 0)
- && $char == "\n") {
- // If so, is there a string open? If there is, we should end it before
- // the newline and begin it again (so when
s are put in the source
- // remains XHTML compliant)
- // note to self: This opens up possibility of config files specifying
- // that languages can/cannot have multiline strings???
- if ($STRING_OPEN) {
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
+ if ($highlight_part) {
+ // Now, highlight the code in this block. This code
+ // is really the engine of GeSHi (along with the method
+ // parse_non_string_part).
+
+ // cache comment regexps incrementally
+ $next_comment_regexp_key = '';
+ $next_comment_regexp_pos = -1;
+ $next_comment_multi_pos = -1;
+ $next_comment_single_pos = -1;
+ $comment_regexp_cache_per_key = array();
+ $comment_multi_cache_per_key = array();
+ $comment_single_cache_per_key = array();
+ $next_open_comment_multi = '';
+ $next_comment_single_key = '';
+ $escape_regexp_cache_per_key = array();
+ $next_escape_regexp_key = '';
+ $next_escape_regexp_pos = -1;
+
+ $length = strlen($part);
+ for ($i = 0; $i < $length; ++$i) {
+ // Get the next char
+ $char = $part[$i];
+ $char_len = 1;
+
+ // update regexp comment cache if needed
+ if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
+ $next_comment_regexp_pos = $length;
+ foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
+ $match_i = false;
+ if (isset($comment_regexp_cache_per_key[$comment_key]) &&
+ ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
+ $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
+ // we have already matched something
+ if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
+ // this comment is never matched
+ continue;
}
- else {
- $attributes = ' class="st0"';
+ $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
+ } else if (
+ //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
+ (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
+ (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
+ ) {
+ $match_i = $match[0][1];
+ if (GESHI_PHP_PRE_433) {
+ $match_i += $i;
}
- $char = '
' . $char . "";
+
+ $comment_regexp_cache_per_key[$comment_key] = array(
+ 'key' => $comment_key,
+ 'length' => strlen($match[0][0]),
+ 'pos' => $match_i
+ );
+ } else {
+ $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
+ continue;
}
- }
- else if ($char == $STRING_OPEN) {
- // A match of a string delimiter
- if (($this->lexic_permissions['ESCAPE_CHAR'] && $ESCAPE_CHAR_OPEN) ||
- ($this->lexic_permissions['STRINGS'] && !$ESCAPE_CHAR_OPEN)) {
- $char = GeSHi::hsc($char) . '';
+
+ if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
+ $next_comment_regexp_pos = $match_i;
+ $next_comment_regexp_key = $comment_key;
+ if ($match_i === $i) {
+ break;
+ }
}
- $escape_me = false;
- if ($HARDQUOTE_OPEN) {
- if ($ESCAPE_CHAR_OPEN) {
- $escape_me = true;
+ }
+ }
+
+ $string_started = false;
+
+ if (isset($is_string_starter[$char])) {
+ // Possibly the start of a new string ...
+
+ //Check which starter it was ...
+ //Fix for SF#1932083: Multichar Quotemarks unsupported
+ if (is_array($is_string_starter[$char])) {
+ $char_new = '';
+ foreach ($is_string_starter[$char] as $testchar) {
+ if ($testchar === substr($part, $i, strlen($testchar)) &&
+ strlen($testchar) > strlen($char_new)) {
+ $char_new = $testchar;
+ $string_started = true;
}
- else {
- foreach ($this->language_data['HARDESCAPE'] as $hardesc) {
- if (substr($part, $i, strlen($hardesc)) == $hardesc) {
- $escape_me = true;
- break;
+ }
+ if ($string_started) {
+ $char = $char_new;
+ }
+ } else {
+ $testchar = $is_string_starter[$char];
+ if ($testchar === substr($part, $i, strlen($testchar))) {
+ $char = $testchar;
+ $string_started = true;
+ }
+ }
+ $char_len = strlen($char);
+ }
+
+ if ($string_started && ($i != $next_comment_regexp_pos)) {
+ // Hand out the correct style information for this string
+ $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
+ if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
+ !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
+ $string_key = 0;
+ }
+
+ // parse the stuff before this
+ $result .= $this->parse_non_string_part($stuff_to_parse);
+ $stuff_to_parse = '';
+
+ if (!$this->use_classes) {
+ $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
+ } else {
+ $string_attributes = ' class="st'.$string_key.'"';
+ }
+
+ // now handle the string
+ $string = "" . GeSHi::hsc($char);
+ $start = $i + $char_len;
+ $string_open = true;
+
+ if(empty($this->language_data['ESCAPE_REGEXP'])) {
+ $next_escape_regexp_pos = $length;
+ }
+
+ do {
+ //Get the regular ending pos ...
+ $close_pos = strpos($part, $char, $start);
+ if(false === $close_pos) {
+ $close_pos = $length;
+ }
+
+ if($this->lexic_permissions['ESCAPE_CHAR']) {
+ // update escape regexp cache if needed
+ if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
+ $next_escape_regexp_pos = $length;
+ foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
+ $match_i = false;
+ if (isset($escape_regexp_cache_per_key[$escape_key]) &&
+ ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
+ $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
+ // we have already matched something
+ if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
+ // this comment is never matched
+ continue;
+ }
+ $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
+ } else if (
+ //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
+ (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
+ (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
+ ) {
+ $match_i = $match[0][1];
+ if (GESHI_PHP_PRE_433) {
+ $match_i += $start;
+ }
+
+ $escape_regexp_cache_per_key[$escape_key] = array(
+ 'key' => $escape_key,
+ 'length' => strlen($match[0][0]),
+ 'pos' => $match_i
+ );
+ } else {
+ $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
+ continue;
+ }
+
+ if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
+ $next_escape_regexp_pos = $match_i;
+ $next_escape_regexp_key = $escape_key;
+ if ($match_i === $start) {
+ break;
+ }
}
}
}
+
+ //Find the next simple escape position
+ if('' != $this->language_data['ESCAPE_CHAR']) {
+ $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
+ if(false === $simple_escape) {
+ $simple_escape = $length;
+ }
+ } else {
+ $simple_escape = $length;
+ }
+ } else {
+ $next_escape_regexp_pos = $length;
+ $simple_escape = $length;
}
- if (!$ESCAPE_CHAR_OPEN) {
- $STRING_OPEN = '';
- $CLOSE_STRING = true;
- }
- if (!$escape_me) {
- $HARDQUOTE_OPEN = false;
- }
- $ESCAPE_CHAR_OPEN = false;
- }
- else if (in_array($char, $this->language_data['QUOTEMARKS']) &&
- ($STRING_OPEN == '') && $this->lexic_permissions['STRINGS']) {
- // The start of a new string
- $STRING_OPEN = $char;
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
+ if($simple_escape < $next_escape_regexp_pos &&
+ $simple_escape < $length &&
+ $simple_escape < $close_pos) {
+ //The nexxt escape sequence is a simple one ...
+ $es_pos = $simple_escape;
+
+ //Add the stuff not in the string yet ...
+ $string .= $this->hsc(substr($part, $start, $es_pos - $start));
+
+ //Get the style for this escaped char ...
+ if (!$this->use_classes) {
+ $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
+ } else {
+ $escape_char_attributes = ' class="es0"';
+ }
+
+ //Add the style for the escape char ...
+ $string .= "" .
+ GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
+
+ //Get the byte AFTER the ESCAPE_CHAR we just found
+ $es_char = $part[$es_pos + 1];
+ if ($es_char == "\n") {
+ // don't put a newline around newlines
+ $string .= "\n";
+ $start = $es_pos + 2;
+ } else if (ord($es_char) >= 128) {
+ //This is an non-ASCII char (UTF8 or single byte)
+ //This code tries to work around SF#2037598 ...
+ if(function_exists('mb_substr')) {
+ $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
+ $string .= $es_char_m . '';
+ } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
+ if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
+ "|\xE0[\xA0-\xBF][\x80-\xBF]".
+ "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
+ "|\xED[\x80-\x9F][\x80-\xBF]".
+ "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
+ "|[\xF1-\xF3][\x80-\xBF]{3}".
+ "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
+ $part, $es_char_m, null, $es_pos + 1)) {
+ $es_char_m = $es_char_m[0];
+ } else {
+ $es_char_m = $es_char;
+ }
+ $string .= $this->hsc($es_char_m) . '';
+ } else {
+ $es_char_m = $this->hsc($es_char);
+ }
+ $start = $es_pos + strlen($es_char_m) + 1;
+ } else {
+ $string .= $this->hsc($es_char) . '';
+ $start = $es_pos + 2;
+ }
+ } else if ($next_escape_regexp_pos < $length &&
+ $next_escape_regexp_pos < $close_pos) {
+ $es_pos = $next_escape_regexp_pos;
+ //Add the stuff not in the string yet ...
+ $string .= $this->hsc(substr($part, $start, $es_pos - $start));
+
+ //Get the key and length of this match ...
+ $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
+ $escape_str = substr($part, $es_pos, $escape['length']);
+ $escape_key = $escape['key'];
+
+ //Get the style for this escaped char ...
+ if (!$this->use_classes) {
+ $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
+ } else {
+ $escape_char_attributes = ' class="es' . $escape_key . '"';
+ }
+
+ //Add the style for the escape char ...
+ $string .= "" .
+ $this->hsc($escape_str) . '';
+
+ $start = $es_pos + $escape['length'];
+ } else {
+ //Copy the remainder of the string ...
+ $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '';
+ $start = $close_pos + $char_len;
+ $string_open = false;
}
- else {
- $attributes = ' class="st0"';
+ } while($string_open);
+
+ if ($check_linenumbers) {
+ // Are line numbers used? If, we should end the string before
+ // the newline and begin it again (so when
s are put in the source
+ // remains XHTML compliant)
+ // note to self: This opens up possibility of config files specifying
+ // that languages can/cannot have multiline strings???
+ $string = str_replace("\n", "\n", $string);
+ }
+
+ $result .= $string;
+ $string = '';
+ $i = $start - 1;
+ continue;
+ } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
+ substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
+ // The start of a hard quoted string
+ if (!$this->use_classes) {
+ $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
+ $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
+ } else {
+ $string_attributes = ' class="st_h"';
+ $escape_char_attributes = ' class="es_h"';
+ }
+ // parse the stuff before this
+ $result .= $this->parse_non_string_part($stuff_to_parse);
+ $stuff_to_parse = '';
+
+ // now handle the string
+ $string = '';
+
+ // look for closing quote
+ $start = $i + $hq_strlen;
+ while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
+ $start = $close_pos + 1;
+ if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
+ (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
+ // make sure this quote is not escaped
+ foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
+ if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
+ // check wether this quote is escaped or if it is something like '\\'
+ $escape_char_pos = $close_pos - 1;
+ while ($escape_char_pos > 0
+ && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
+ --$escape_char_pos;
+ }
+ if (($close_pos - $escape_char_pos) & 1) {
+ // uneven number of escape chars => this quote is escaped
+ continue 2;
+ }
+ }
+ }
}
- $char = "" . GeSHi::hsc($char);
- $result .= $this->parse_non_string_part( $stuff_to_parse );
- $stuff_to_parse = '';
+ // found closing quote
+ break;
}
- else if ($hq && substr($part, $i, strlen($hq)) == $hq &&
- ($STRING_OPEN == '') && $this->lexic_permissions['STRINGS']) {
- // The start of a hard quoted string
- $STRING_OPEN = $this->language_data['HARDQUOTE'][1];
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][0] . '"';
+
+ //Found the closing delimiter?
+ if (!$close_pos) {
+ // span till the end of this $part when no closing delimiter is found
+ $close_pos = $length;
+ }
+
+ //Get the actual string
+ $string = substr($part, $i, $close_pos - $i + 1);
+ $i = $close_pos;
+
+ // handle escape chars and encode html chars
+ // (special because when we have escape chars within our string they may not be escaped)
+ if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
+ $start = 0;
+ $new_string = '';
+ while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
+ // hmtl escape stuff before
+ $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
+ // check if this is a hard escape
+ foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
+ if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
+ // indeed, this is a hardescape
+ $new_string .= "" .
+ $this->hsc($hardescape) . '';
+ $start = $es_pos + strlen($hardescape);
+ continue 2;
+ }
+ }
+ // not a hard escape, but a normal escape
+ // they come in pairs of two
+ $c = 0;
+ while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
+ && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
+ && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
+ $c += 2;
+ }
+ if ($c) {
+ $new_string .= "" .
+ str_repeat($escaped_escape_char, $c) .
+ '';
+ $start = $es_pos + $c;
+ } else {
+ // this is just a single lonely escape char...
+ $new_string .= $escaped_escape_char;
+ $start = $es_pos + 1;
+ }
}
- else {
- $attributes = ' class="st0"';
+ $string = $new_string . $this->hsc(substr($string, $start));
+ } else {
+ $string = $this->hsc($string);
+ }
+
+ if ($check_linenumbers) {
+ // Are line numbers used? If, we should end the string before
+ // the newline and begin it again (so when
s are put in the source
+ // remains XHTML compliant)
+ // note to self: This opens up possibility of config files specifying
+ // that languages can/cannot have multiline strings???
+ $string = str_replace("\n", "
\n", $string);
+ }
+
+ $result .= "" . $string . '';
+ $string = '';
+ continue;
+ } else {
+ //Have a look for regexp comments
+ if ($i == $next_comment_regexp_pos) {
+ $COMMENT_MATCHED = true;
+ $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
+ $test_str = $this->hsc(substr($part, $i, $comment['length']));
+
+ //@todo If remove important do remove here
+ if ($this->lexic_permissions['COMMENTS']['MULTI']) {
+ if (!$this->use_classes) {
+ $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
+ } else {
+ $attributes = ' class="co' . $comment['key'] . '"';
+ }
+
+ $test_str = "" . $test_str . "";
+
+ // Short-cut through all the multiline code
+ if ($check_linenumbers) {
+ // strreplace to put close span and open span around multiline newlines
+ $test_str = str_replace(
+ "\n", "\n",
+ str_replace("\n ", "\n ", $test_str)
+ );
+ }
}
- $char = "" . $hq;
- $i += strlen($hq) - 1;
- $HARDQUOTE_OPEN = true;
+
+ $i += $comment['length'] - 1;
+
+ // parse the rest
$result .= $this->parse_non_string_part($stuff_to_parse);
$stuff_to_parse = '';
}
- else if ($char == $this->language_data['ESCAPE_CHAR'] && $STRING_OPEN != '') {
- // An escape character
- if (!$ESCAPE_CHAR_OPEN) {
- $ESCAPE_CHAR_OPEN = !$HARDQUOTE_OPEN; // true unless $HARDQUOTE_OPEN
- if ($HARDQUOTE_OPEN) {
- foreach ($this->language_data['HARDESCAPE'] as $hard) {
- if (substr($part, $i, strlen($hard)) == $hard) {
- $ESCAPE_CHAR_OPEN = true;
+
+ // If we haven't matched a regexp comment, try multi-line comments
+ if (!$COMMENT_MATCHED) {
+ // Is this a multiline comment?
+ if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
+ $next_comment_multi_pos = $length;
+ foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
+ $match_i = false;
+ if (isset($comment_multi_cache_per_key[$open]) &&
+ ($comment_multi_cache_per_key[$open] >= $i ||
+ $comment_multi_cache_per_key[$open] === false)) {
+ // we have already matched something
+ if ($comment_multi_cache_per_key[$open] === false) {
+ // this comment is never matched
+ continue;
+ }
+ $match_i = $comment_multi_cache_per_key[$open];
+ } else if (($match_i = stripos($part, $open, $i)) !== false) {
+ $comment_multi_cache_per_key[$open] = $match_i;
+ } else {
+ $comment_multi_cache_per_key[$open] = false;
+ continue;
+ }
+ if ($match_i !== false && $match_i < $next_comment_multi_pos) {
+ $next_comment_multi_pos = $match_i;
+ $next_open_comment_multi = $open;
+ if ($match_i === $i) {
break;
}
}
}
- if ($ESCAPE_CHAR_OPEN && $this->lexic_permissions['ESCAPE_CHAR']) {
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
- }
- else {
- $attributes = ' class="es0"';
- }
- $char = "" . $char;
- if (substr($code, $i + 1, 1) == "\n") {
- // escaping a newline, what's the point in putting the span around
- // the newline? It only causes hassles when inserting line numbers
- $char .= '';
- $ESCAPE_CHAR_OPEN = false;
+ }
+ if ($i == $next_comment_multi_pos) {
+ $open = $next_open_comment_multi;
+ $close = $this->language_data['COMMENT_MULTI'][$open];
+ $open_strlen = strlen($open);
+ $close_strlen = strlen($close);
+ $COMMENT_MATCHED = true;
+ $test_str_match = $open;
+ //@todo If remove important do remove here
+ if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
+ $open == GESHI_START_IMPORTANT) {
+ if ($open != GESHI_START_IMPORTANT) {
+ if (!$this->use_classes) {
+ $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
+ } else {
+ $attributes = ' class="coMULTI"';
+ }
+ $test_str = "" . $this->hsc($open);
+ } else {
+ if (!$this->use_classes) {
+ $attributes = ' style="' . $this->important_styles . '"';
+ } else {
+ $attributes = ' class="imp"';
+ }
+
+ // We don't include the start of the comment if it's an
+ // "important" part
+ $test_str = "";
}
+ } else {
+ $test_str = $this->hsc($open);
}
- }
- else {
- $ESCAPE_CHAR_OPEN = false;
- if ($this->lexic_permissions['ESCAPE_CHAR']) {
- $char .= '';
+
+ $close_pos = strpos( $part, $close, $i + $open_strlen );
+
+ if ($close_pos === false) {
+ $close_pos = $length;
}
+
+ // Short-cut through all the multiline code
+ $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
+ if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
+ $test_str_match == GESHI_START_IMPORTANT) &&
+ $check_linenumbers) {
+
+ // strreplace to put close span and open span around multiline newlines
+ $test_str .= str_replace(
+ "\n", "\n",
+ str_replace("\n ", "\n ", $rest_of_comment)
+ );
+ } else {
+ $test_str .= $rest_of_comment;
+ }
+
+ if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
+ $test_str_match == GESHI_START_IMPORTANT) {
+ $test_str .= '';
+ }
+
+ $i = $close_pos + $close_strlen - 1;
+
+ // parse the rest
+ $result .= $this->parse_non_string_part($stuff_to_parse);
+ $stuff_to_parse = '';
}
}
- else if ($ESCAPE_CHAR_OPEN) {
- if ($this->lexic_permissions['ESCAPE_CHAR']) {
- $char .= '';
- }
- $ESCAPE_CHAR_OPEN = false;
- $test_str = $char;
- }
- else if ($STRING_OPEN == '') {
- // Is this a multiline comment?
- foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
- $com_len = strlen($open);
- $test_str = substr( $part, $i, $com_len );
- $test_str_match = $test_str;
- if ($open == $test_str) {
- $COMMENT_MATCHED = true;
- //@todo If remove important do remove here
- if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
- $test_str == GESHI_START_IMPORTANT) {
- if ($test_str != GESHI_START_IMPORTANT) {
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
- }
- else {
- $attributes = ' class="coMULTI"';
- }
- $test_str = "" . GeSHi::hsc($test_str);
+
+ // If we haven't matched a multiline comment, try single-line comments
+ if (!$COMMENT_MATCHED) {
+ // cache potential single line comment occurances
+ if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
+ $next_comment_single_pos = $length;
+ foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
+ $match_i = false;
+ if (isset($comment_single_cache_per_key[$comment_key]) &&
+ ($comment_single_cache_per_key[$comment_key] >= $i ||
+ $comment_single_cache_per_key[$comment_key] === false)) {
+ // we have already matched something
+ if ($comment_single_cache_per_key[$comment_key] === false) {
+ // this comment is never matched
+ continue;
}
- else {
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->important_styles . '"';
- }
- else {
- $attributes = ' class="imp"';
- }
- // We don't include the start of the comment if it's an
- // "important" part
- $test_str = "";
+ $match_i = $comment_single_cache_per_key[$comment_key];
+ } else if (
+ // case sensitive comments
+ ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
+ ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
+ // non case sensitive
+ (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
+ (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
+ $comment_single_cache_per_key[$comment_key] = $match_i;
+ } else {
+ $comment_single_cache_per_key[$comment_key] = false;
+ continue;
+ }
+ if ($match_i !== false && $match_i < $next_comment_single_pos) {
+ $next_comment_single_pos = $match_i;
+ $next_comment_single_key = $comment_key;
+ if ($match_i === $i) {
+ break;
}
}
- else {
- $test_str = GeSHi::hsc($test_str);
+ }
+ }
+ if ($next_comment_single_pos == $i) {
+ $comment_key = $next_comment_single_key;
+ $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
+ $com_len = strlen($comment_mark);
+
+ // This check will find special variables like $# in bash
+ // or compiler directives of Delphi beginning {$
+ if ((empty($sc_disallowed_before) || ($i == 0) ||
+ (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
+ (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
+ (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
+ {
+ // this is a valid comment
+ $COMMENT_MATCHED = true;
+ if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
+ if (!$this->use_classes) {
+ $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
+ } else {
+ $attributes = ' class="co' . $comment_key . '"';
+ }
+ $test_str = "" . $this->hsc($this->change_case($comment_mark));
+ } else {
+ $test_str = $this->hsc($comment_mark);
}
- $close_pos = strpos( $part, $close, $i + strlen($close) );
-
+ //Check if this comment is the last in the source
+ $close_pos = strpos($part, "\n", $i);
$oops = false;
if ($close_pos === false) {
- $close_pos = strlen($part);
+ $close_pos = $length;
$oops = true;
}
- else {
- $close_pos -= ($com_len - strlen($close));
+ $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
+ if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
+ $test_str .= "";
}
- // Short-cut through all the multiline code
- $rest_of_comment = GeSHi::hsc(substr($part, $i + $com_len, $close_pos - $i));
- if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
- $test_str_match == GESHI_START_IMPORTANT) &&
- ($this->line_numbers != GESHI_NO_LINE_NUMBERS ||
- count($this->highlight_extra_lines) > 0)) {
- // strreplace to put close span and open span around multiline newlines
- $test_str .= str_replace("\n", "\n", $rest_of_comment);
- }
- else {
- $test_str .= $rest_of_comment;
+ // Take into account that the comment might be the last in the source
+ if (!$oops) {
+ $test_str .= "\n";
}
- if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
- $test_str_match == GESHI_START_IMPORTANT) {
- $test_str .= '';
- if ($oops) {
- $test_str .= "\n";
- }
- }
- $i = $close_pos + $com_len - 1;
+ $i = $close_pos;
+
// parse the rest
$result .= $this->parse_non_string_part($stuff_to_parse);
$stuff_to_parse = '';
- break;
- }
- }
- // If we haven't matched a multiline comment, try single-line comments
- if (!$COMMENT_MATCHED) {
- foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
- $com_len = strlen($comment_mark);
- $test_str = substr($part, $i, $com_len);
- if ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS]) {
- $match = ($comment_mark == $test_str);
- }
- else {
- $match = (strtolower($comment_mark) == strtolower($test_str));
- }
- if ($match) {
- $COMMENT_MATCHED = true;
- if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
- }
- else {
- $attributes = ' class="co' . $comment_key . '"';
- }
- $test_str = "" . GeSHi::hsc($this->change_case($test_str));
- }
- else {
- $test_str = GeSHi::hsc($test_str);
- }
- $close_pos = strpos($part, "\n", $i);
- $oops = false;
- if ($close_pos === false) {
- $close_pos = strlen($part);
- $oops = true;
- }
- $test_str .= GeSHi::hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
- if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
- $test_str .= "";
- }
- // Take into account that the comment might be the last in the source
- if (!$oops) {
- $test_str .= "\n";
- }
- $i = $close_pos;
- // parse the rest
- $result .= $this->parse_non_string_part($stuff_to_parse);
- $stuff_to_parse = '';
- break;
- }
}
}
}
- else if ($STRING_OPEN != '') {
- // Otherwise, convert it to HTML form
- if (strtolower($this->encoding) == 'utf-8') {
- //only escape <128 (we don't want to break multibyte chars)
- if (ord($char) < 128) {
- $char = GeSHi::hsc($char);
- }
- }
- else {
- //encode everthing
- $char = GeSHi::hsc($char);
- }
- }
- // Where are we adding this char?
- if (!$COMMENT_MATCHED) {
- if (($STRING_OPEN == '') && !$CLOSE_STRING) {
- $stuff_to_parse .= $char;
- }
- else {
- $result .= $char;
- $CLOSE_STRING = false;
- }
- }
- else {
- $result .= $test_str;
- $COMMENT_MATCHED = false;
- }
}
- // Parse the last bit
- $result .= $this->parse_non_string_part($stuff_to_parse);
- $stuff_to_parse = '';
- }
- else {
- if ($STRICTATTRS != '') {
- $part = str_replace("\n", "\n", GeSHi::hsc($part));
- $STRICTATTRS = '';
+
+ // Where are we adding this char?
+ if (!$COMMENT_MATCHED) {
+ $stuff_to_parse .= $char;
+ } else {
+ $result .= $test_str;
+ unset($test_str);
+ $COMMENT_MATCHED = false;
}
- $result .= $part;
- }
- // Close the that surrounds the block
- if ($this->strict_mode && $this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
- $this->lexic_permissions['SCRIPT']) {
- $result .= '';
}
+ // Parse the last bit
+ $result .= $this->parse_non_string_part($stuff_to_parse);
+ $stuff_to_parse = '';
+ } else {
+ $result .= $this->hsc($part);
}
- else {
- // Else not a block to highlight
- $result .= GeSHi::hsc($part);
+ // Close the that surrounds the block
+ if ($STRICTATTRS != '') {
+ $result = str_replace("\n", "\n", $result);
+ $result .= '';
}
+
+ $endresult .= $result;
+ unset($part, $parts[$key], $result);
}
- // Parse the last stuff (redundant?)
- $result .= $this->parse_non_string_part($stuff_to_parse);
+ //This fix is related to SF#1923020, but has to be applied regardless of
+ //actually highlighting symbols.
+ /** NOTE: memorypeak #3 */
+ $endresult = str_replace(array('', ''), array(';', '|'), $endresult);
- // Lop off the very first and last spaces
- $result = substr($result, 1, -1);
+// // Parse the last stuff (redundant?)
+// $result .= $this->parse_non_string_part($stuff_to_parse);
- // Are we still in a string?
- if ($STRING_OPEN) {
- $result .= '';
- }
+ // Lop off the very first and last spaces
+// $result = substr($result, 1, -1);
// We're finished: stop timing
$this->set_time($start_time, microtime());
- return $this->finalise($result);
+ $this->finalise($endresult);
+ return $endresult;
}
/**
* Swaps out spaces and tabs for HTML indentation. Not needed if
* the code is in a pre block...
*
- * @param string The source to indent
- * @return string The source with HTML indenting applied
+ * @param string The source to indent (reference!)
* @since 1.0.0
* @access private
*/
- function indent($result) {
+ function indent(&$result) {
/// Replace tabs with the correct number of spaces
if (false !== strpos($result, "\t")) {
$lines = explode("\n", $result);
- foreach ($lines as $key => $line) {
+ $result = null;//Save memory while we process the lines individually
+ $tab_width = $this->get_real_tab_width();
+ $tab_string = ' ' . str_repeat(' ', $tab_width);
+
+ for ($key = 0, $n = count($lines); $key < $n; $key++) {
+ $line = $lines[$key];
if (false === strpos($line, "\t")) {
- $lines[$key] = $line;
continue;
}
$pos = 0;
- $tab_width = $this->tab_width;
$length = strlen($line);
- $result_line = '';
+ $lines[$key] = ''; // reduce memory
$IN_TAG = false;
- for ($i = 0; $i < $length; $i++) {
- $char = substr($line, $i, 1);
+ for ($i = 0; $i < $length; ++$i) {
+ $char = $line[$i];
// Simple engine to work out whether we're in a tag.
// If we are we modify $pos. This is so we ignore HTML
// in the line and only workout the tab replacement
@@ -1856,26 +3138,24 @@
// This test could be improved to include strings in the
// html so that < or > would be allowed in user's styles
// (e.g. quotes: '<' '>'; or similar)
- if ($IN_TAG && '>' == $char) {
- $IN_TAG = false;
- $result_line .= '>';
- ++$pos;
- }
- else if (!$IN_TAG && '<' == $char) {
+ if ($IN_TAG) {
+ if ('>' == $char) {
+ $IN_TAG = false;
+ }
+ $lines[$key] .= $char;
+ } else if ('<' == $char) {
$IN_TAG = true;
- $result_line .= '<';
- ++$pos;
- }
- else if (!$IN_TAG && '&' == $char) {
- $substr = substr($line, $i + 3, 4);
- //$substr_5 = substr($line, 5, 1);
+ $lines[$key] .= '<';
+ } else if ('&' == $char) {
+ $substr = substr($line, $i + 3, 5);
$posi = strpos($substr, ';');
- if (false !== $posi) {
- $pos += $posi + 3;
+ if (false === $posi) {
+ ++$pos;
+ } else {
+ $pos -= $posi+2;
}
- $result_line .= '&';
- }
- else if (!$IN_TAG && "\t" == $char) {
+ $lines[$key] .= $char;
+ } else if ("\t" == $char) {
$str = '';
// OPTIMISE - move $strs out. Make an array:
// $tabs = array(
@@ -1883,38 +3163,43 @@
// 2 => ' ',
// 3 => ' ' etc etc
// to use instead of building a string every time
- $strs = array(0 => ' ', 1 => ' ');
- for ($k = 0; $k < ($tab_width - (($i - $pos) % $tab_width)); $k++) $str .= $strs[$k % 2];
- $result_line .= $str;
- $pos++;
+ $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
+ if (($pos & 1) || 1 == $tab_end_width) {
+ $str .= substr($tab_string, 6, $tab_end_width);
+ } else {
+ $str .= substr($tab_string, 0, $tab_end_width+5);
+ }
+ $lines[$key] .= $str;
+ $pos += $tab_end_width;
if (false === strpos($line, "\t", $i + 1)) {
- $result_line .= substr($line, $i + 1);
+ $lines[$key] .= substr($line, $i + 1);
break;
}
- }
- else if ($IN_TAG) {
+ } else if (0 == $pos && ' ' == $char) {
+ $lines[$key] .= ' ';
+ ++$pos;
+ } else {
+ $lines[$key] .= $char;
++$pos;
- $result_line .= $char;
- }
- else {
- $result_line .= $char;
- //++$pos;
}
}
- $lines[$key] = $result_line;
}
$result = implode("\n", $lines);
+ unset($lines);//We don't need the lines separated beyond this --- free them!
}
// Other whitespace
- $result = str_replace(' ', ' ', $result);
+ // BenBE: Fix to reduce the number of replacements to be done
+ $result = preg_replace('/^ /m', ' ', $result);
$result = str_replace(' ', ' ', $result);
- $result = str_replace("\n ", "\n ", $result);
- if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
- $result = nl2br($result);
+ if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
+ if ($this->line_ending === null) {
+ $result = nl2br($result);
+ } else {
+ $result = str_replace("\n", $this->line_ending, $result);
+ }
}
- return $result;
}
/**
@@ -1926,65 +3211,122 @@
* @access private
*/
function change_case($instr) {
- if ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_UPPER) {
- return strtoupper($instr);
- }
- else if ($this->language_data['CASE_KEYWORDS'] == GESHI_CAPS_LOWER) {
- return strtolower($instr);
+ switch ($this->language_data['CASE_KEYWORDS']) {
+ case GESHI_CAPS_UPPER:
+ return strtoupper($instr);
+ case GESHI_CAPS_LOWER:
+ return strtolower($instr);
+ default:
+ return $instr;
}
- return $instr;
}
/**
- * Adds a url to a keyword where needed.
+ * Handles replacements of keywords to include markup and links if requested
*
- * @param string The keyword to add the URL HTML to
- * @param int What group the keyword is from
- * @param boolean Whether to get the HTML for the start or end
- * @return The HTML for either the start or end of the HTML <a> tag
- * @since 1.0.2
+ * @param string The keyword to add the Markup to
+ * @return The HTML for the match found
+ * @since 1.0.8
* @access private
- * @todo Get rid of ender
+ *
+ * @todo Get rid of ender in keyword links
*/
- function add_url_to_keyword($keyword, $group, $start_or_end) {
- if (!$this->keyword_links) {
- // Keyword links have been disabled
- return;
- }
-
- if (isset($this->language_data['URLS'][$group]) &&
- $this->language_data['URLS'][$group] != '' &&
- substr($keyword, 0, 5) != '</') {
- // There is a base group for this keyword
- if ($start_or_end == 'BEGIN') {
- // HTML workaround... not good form (tm) but should work for 1.0.X
- if ($keyword != '') {
- // Old system: strtolower
- //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
- // New system: get keyword from language file to get correct case
- foreach ($this->language_data['KEYWORDS'][$group] as $word) {
- if (strtolower($word) == strtolower($keyword)) {
+ function handle_keyword_replace($match) {
+ $k = $this->_kw_replace_group;
+ $keyword = $match[0];
+
+ $before = '';
+ $after = '';
+
+ if ($this->keyword_links) {
+ // Keyword links have been ebabled
+
+ if (isset($this->language_data['URLS'][$k]) &&
+ $this->language_data['URLS'][$k] != '') {
+ // There is a base group for this keyword
+
+ // Old system: strtolower
+ //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
+ // New system: get keyword from language file to get correct case
+ if (!$this->language_data['CASE_SENSITIVE'][$k] &&
+ strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
+ foreach ($this->language_data['KEYWORDS'][$k] as $word) {
+ if (strcasecmp($word, $keyword) == 0) {
break;
}
}
- $word = ( substr($word, 0, 4) == '<' ) ? substr($word, 4) : $word;
- $word = ( substr($word, -4) == '>' ) ? substr($word, 0, strlen($word) - 4) : $word;
- if (!$word) return '';
-
- return '<|UR1|"' .
- str_replace(
- array('{FNAME}', '.'),
- array(GeSHi::hsc($word), ''),
- $this->language_data['URLS'][$group]
- ) . '">';
+ } else {
+ $word = $keyword;
}
- return '';
- // HTML fix. Again, dirty hackage...
- }
- else if (!($this->language == 'html4strict' && ('>' == $keyword || '<' == $keyword))) {
- return '';
+
+ $before = '<|UR1|"' .
+ str_replace(
+ array(
+ '{FNAME}',
+ '{FNAMEL}',
+ '{FNAMEU}',
+ '.'),
+ array(
+ str_replace('+', '%20', urlencode($this->hsc($word))),
+ str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
+ str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
+ ''),
+ $this->language_data['URLS'][$k]
+ ) . '">';
+ $after = '';
}
}
+
+ return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
+ }
+
+ /**
+ * handles regular expressions highlighting-definitions with callback functions
+ *
+ * @note this is a callback, don't use it directly
+ *
+ * @param array the matches array
+ * @return The highlighted string
+ * @since 1.0.8
+ * @access private
+ */
+ function handle_regexps_callback($matches) {
+ // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
+ return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
+ }
+
+ /**
+ * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
+ *
+ * @note this is a callback, don't use it directly
+ *
+ * @param array the matches array
+ * @return string
+ * @since 1.0.8
+ * @access private
+ */
+ function handle_multiline_regexps($matches) {
+ $before = $this->_hmr_before;
+ $after = $this->_hmr_after;
+ if ($this->_hmr_replace) {
+ $replace = $this->_hmr_replace;
+ $search = array();
+
+ foreach (array_keys($matches) as $k) {
+ $search[] = '\\' . $k;
+ }
+
+ $before = str_replace($search, $matches, $before);
+ $after = str_replace($search, $matches, $after);
+ $replace = str_replace($search, $matches, $replace);
+ } else {
+ $replace = $matches[0];
+ }
+ return $before
+ . '<|!REG3XP' . $this->_hmr_key .'!>'
+ . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
+ . '|>'
+ . $after;
}
/**
@@ -1996,123 +3338,186 @@
* @access private
* @todo BUGGY! Why? Why not build string and return?
*/
- function parse_non_string_part(&$stuff_to_parse) {
- $stuff_to_parse = ' ' . GeSHi::hsc($stuff_to_parse);
- $stuff_to_parse_pregquote = preg_quote($stuff_to_parse, '/');
- $func = '$this->change_case';
- $func2 = '$this->add_url_to_keyword';
+ function parse_non_string_part($stuff_to_parse) {
+ $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
- //
- // Regular expressions
- //
- foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
- if ($this->lexic_permissions['REGEXPS'][$key]) {
- if (is_array($regexp)) {
- $stuff_to_parse = preg_replace(
- "/" .
- str_replace('/', '\/', $regexp[GESHI_SEARCH]) .
- "/{$regexp[GESHI_MODIFIERS]}",
- "{$regexp[GESHI_BEFORE]}<|!REG3XP$key!>{$regexp[GESHI_REPLACE]}|>{$regexp[GESHI_AFTER]}",
- $stuff_to_parse
- );
+ // Highlight keywords
+ $disallowed_before = "(?lexic_permissions['STRINGS']) {
+ $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
+ $disallowed_before .= $quotemarks;
+ $disallowed_after .= $quotemarks;
+ }
+ $disallowed_before .= "])";
+ $disallowed_after .= "])";
+
+ $parser_control_pergroup = false;
+ if (isset($this->language_data['PARSER_CONTROL'])) {
+ if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
+ $x = 0; // check wether per-keyword-group parser_control is enabled
+ if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
+ $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
+ ++$x;
}
- else {
- $stuff_to_parse = preg_replace( "/(" . str_replace('/', '\/', $regexp) . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
+ if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
+ $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
+ ++$x;
}
+ $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
}
}
- //
- // Highlight numbers. This regexp sucks... anyone with a regexp that WORKS
- // here wins a cookie if they send it to me. At the moment there's two doing
- // almost exactly the same thing, except the second one prevents a number
- // being highlighted twice (eg 5)
- // Put /NUM!/ in for the styles, which gets replaced at the end.
- //
- // NEW ONE: Brice Bernard
- //
- if ($this->lexic_permissions['NUMBERS'] && preg_match('#[0-9]#', $stuff_to_parse )) {
- $stuff_to_parse = preg_replace('/([-+]?\\b(?:[0-9]*\\.)?[0-9]+\\b)/', '<|/NUM!/>\\1|>', $stuff_to_parse);
+ foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
+ if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
+ $this->lexic_permissions['KEYWORDS'][$k]) {
+
+ $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
+ $modifiers = $case_sensitive ? '' : 'i';
+
+ // NEW in 1.0.8 - per-keyword-group parser control
+ $disallowed_before_local = $disallowed_before;
+ $disallowed_after_local = $disallowed_after;
+ if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
+ if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
+ $disallowed_before_local =
+ $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
+ }
+
+ if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
+ $disallowed_after_local =
+ $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
+ }
+ }
+
+ $this->_kw_replace_group = $k;
+
+ //NEW in 1.0.8, the cached regexp list
+ // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
+ for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) {
+ $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
+ // Might make a more unique string for putting the number in soon
+ // Basically, we don't put the styles in yet because then the styles themselves will
+ // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
+ $stuff_to_parse = preg_replace_callback(
+ "/$disallowed_before_local({$keywordset})(?!\(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
+ array($this, 'handle_keyword_replace'),
+ $stuff_to_parse
+ );
+ }
+ }
}
- // Highlight keywords
- // if there is a couple of alpha symbols there *might* be a keyword
- if (preg_match('#[a-zA-Z]{2,}#', $stuff_to_parse)) {
- foreach ($this->language_data['KEYWORDS'] as $k => $keywordset) {
- if ($this->lexic_permissions['KEYWORDS'][$k]) {
- foreach ($keywordset as $keyword) {
- $keyword = preg_quote($keyword, '/');
- //
- // This replacement checks the word is on it's own (except if brackets etc
- // are next to it), then highlights it. We don't put the color=" for the span
- // in just yet - otherwise languages with the keywords "color" or "or" have
- // a fit.
- //
- if (false !== stristr($stuff_to_parse_pregquote, $keyword )) {
- $stuff_to_parse .= ' ';
- // Might make a more unique string for putting the number in soon
- // Basically, we don't put the styles in yet because then the styles themselves will
- // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
- $styles = "/$k/";
- if ($this->language_data['CASE_SENSITIVE'][$k]) {
- $stuff_to_parse = preg_replace(
- "/([^a-zA-Z0-9\$_\|\#;>|^])($keyword)(?=[^a-zA-Z0-9_<\|%\-&])/e",
- "'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END')",
- $stuff_to_parse
- );
- }
- else {
- // Change the case of the word.
- // hackage again... must... release... 1.2...
- if ('smarty' == $this->language) { $hackage = '\/'; } else { $hackage = ''; }
- $stuff_to_parse = preg_replace(
- "/([^a-zA-Z0-9\$_\|\#;>$hackage|^])($keyword)(?=[^a-zA-Z0-9_<\|%\-&])/ie",
- "'\\1' . $func2('\\2', '$k', 'BEGIN') . '<|$styles>' . $func('\\2') . '|>' . $func2('\\2', '$k', 'END')",
- $stuff_to_parse
- );
- }
- $stuff_to_parse = substr($stuff_to_parse, 0, strlen($stuff_to_parse) - 1);
- }
+ // Regular expressions
+ foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
+ if ($this->lexic_permissions['REGEXPS'][$key]) {
+ if (is_array($regexp)) {
+ if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
+ // produce valid HTML when we match multiple lines
+ $this->_hmr_replace = $regexp[GESHI_REPLACE];
+ $this->_hmr_before = $regexp[GESHI_BEFORE];
+ $this->_hmr_key = $key;
+ $this->_hmr_after = $regexp[GESHI_AFTER];
+ $stuff_to_parse = preg_replace_callback(
+ "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
+ array($this, 'handle_multiline_regexps'),
+ $stuff_to_parse);
+ $this->_hmr_replace = false;
+ $this->_hmr_before = '';
+ $this->_hmr_after = '';
+ } else {
+ $stuff_to_parse = preg_replace(
+ '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
+ $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
+ $stuff_to_parse);
+ }
+ } else {
+ if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
+ // produce valid HTML when we match multiple lines
+ $this->_hmr_key = $key;
+ $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
+ array($this, 'handle_multiline_regexps'), $stuff_to_parse);
+ $this->_hmr_key = '';
+ } else {
+ $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
}
}
}
}
+ // Highlight numbers. As of 1.0.8 we support different types of numbers
+ $numbers_found = false;
+
+ if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
+ $numbers_found = true;
+
+ //For each of the formats ...
+ foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
+ //Check if it should be highlighted ...
+ $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
+ }
+ }
+
//
// Now that's all done, replace /[number]/ with the correct styles
//
- foreach ($this->language_data['KEYWORDS'] as $k => $kws) {
+ foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['KEYWORDS'][$k] . '"';
- }
- else {
+ $attributes = ' style="' .
+ (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
+ $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
+ } else {
$attributes = ' class="kw' . $k . '"';
}
- $stuff_to_parse = str_replace("/$k/", $attributes, $stuff_to_parse);
+ $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
}
- // Put number styles in
- if (!$this->use_classes && $this->lexic_permissions['NUMBERS']) {
- $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][0] . '"';
- }
- else {
- $attributes = ' class="nu0"';
+ if ($numbers_found) {
+ // Put number styles in
+ foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
+ //Commented out for now, as this needs some review ...
+ // if ($numbers_permissions & $id) {
+ //Get the appropriate style ...
+ //Checking for unset styles is done by the style cache builder ...
+ if (!$this->use_classes) {
+ $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
+ } else {
+ $attributes = ' class="nu'.$id.'"';
+ }
+
+ //Set in the correct styles ...
+ $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
+ // }
+ }
}
- $stuff_to_parse = str_replace('/NUM!/', $attributes, $stuff_to_parse);
- //
// Highlight methods and fields in objects
- //
if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
+ $oolang_spaces = "[\s]*";
+ $oolang_before = "";
+ $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
+ if (isset($this->language_data['PARSER_CONTROL'])) {
+ if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
+ if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
+ $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
+ }
+ if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
+ $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
+ }
+ if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
+ $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
+ }
+ }
+ }
+
foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
- if (false !== stristr($stuff_to_parse, $splitter)) {
+ if (false !== strpos($stuff_to_parse, $splitter)) {
if (!$this->use_classes) {
$attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
- }
- else {
+ } else {
$attributes = ' class="me' . $key . '"';
}
- $stuff_to_parse = preg_replace("/(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], 1) . "[\s]*)([a-zA-Z\*\(][a-zA-Z0-9_\*]*)/", "\\1<|$attributes>\\2|>", $stuff_to_parse);
+ $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
}
}
}
@@ -2124,49 +3529,104 @@
// be highlighting regardless
//
if ($this->lexic_permissions['BRACKETS']) {
- $code_entities_match = array('[', ']', '(', ')', '{', '}');
- if (!$this->use_classes) {
- $code_entities_replace = array(
- '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">[|>',
- '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">]|>',
- '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">(|>',
- '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">)|>',
- '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">{|>',
- '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">}|>',
- );
- }
- else {
- $code_entities_replace = array(
- '<| class="br0">[|>',
- '<| class="br0">]|>',
- '<| class="br0">(|>',
- '<| class="br0">)|>',
- '<| class="br0">{|>',
- '<| class="br0">}|>',
- );
+ $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
+ $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
+ }
+
+
+ //FIX for symbol highlighting ...
+ if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
+ //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
+ $n_symbols = preg_match_all("/<\|(?:|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
+ $global_offset = 0;
+ for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
+ $symbol_match = $pot_symbols[$s_id][0][0];
+ if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
+ // already highlighted blocks _must_ include either < or >
+ // so if this conditional applies, we have to skip this match
+ // BenBE: UNLESS the block contains or
+ if(strpos($symbol_match, '') === false &&
+ strpos($symbol_match, '') === false) {
+ continue;
+ }
+ }
+
+ // if we reach this point, we have a valid match which needs to be highlighted
+
+ $symbol_length = strlen($symbol_match);
+ $symbol_offset = $pot_symbols[$s_id][0][1];
+ unset($pot_symbols[$s_id]);
+ $symbol_end = $symbol_length + $symbol_offset;
+ $symbol_hl = "";
+
+ // if we have multiple styles, we have to handle them properly
+ if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
+ $old_sym = -1;
+ // Split the current stuff to replace into its atomic symbols ...
+ preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
+ foreach ($sym_match_syms[0] as $sym_ms) {
+ //Check if consequtive symbols belong to the same group to save output ...
+ if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
+ && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
+ if (-1 != $old_sym) {
+ $symbol_hl .= "|>";
+ }
+ $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
+ if (!$this->use_classes) {
+ $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
+ } else {
+ $symbol_hl .= '<| class="sy' . $old_sym . '">';
+ }
+ }
+ $symbol_hl .= $sym_ms;
+ }
+ unset($sym_match_syms);
+
+ //Close remaining tags and insert the replacement at the right position ...
+ //Take caution if symbol_hl is empty to avoid doubled closing spans.
+ if (-1 != $old_sym) {
+ $symbol_hl .= "|>";
+ }
+ } else {
+ if (!$this->use_classes) {
+ $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
+ } else {
+ $symbol_hl = '<| class="sy0">';
+ }
+ $symbol_hl .= $symbol_match . '|>';
+ }
+
+ $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
+
+ // since we replace old text with something of different size,
+ // we'll have to keep track of the differences
+ $global_offset += strlen($symbol_hl) - $symbol_length;
}
- $stuff_to_parse = str_replace( $code_entities_match, $code_entities_replace, $stuff_to_parse );
}
+ //FIX for symbol highlighting ...
- //
// Add class/style for regexps
- //
- foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
+ foreach (array_keys($this->language_data['REGEXPS']) as $key) {
if ($this->lexic_permissions['REGEXPS'][$key]) {
- if (!$this->use_classes) {
- $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
- }
- else {
- if(is_array($this->language_data['REGEXPS'][$key]) &&
+ if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
+ $this->_rx_key = $key;
+ $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
+ array($this, 'handle_regexps_callback'),
+ $stuff_to_parse);
+ } else {
+ if (!$this->use_classes) {
+ $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
+ } else {
+ if (is_array($this->language_data['REGEXPS'][$key]) &&
array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
- $attributes = ' class="'
- . $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
- }
- else {
- $attributes = ' class="re' . $key . '"';
+ $attributes = ' class="' .
+ $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
+ } else {
+ $attributes = ' class="re' . $key . '"';
+ }
}
+ $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
}
- $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
}
}
@@ -2176,12 +3636,10 @@
if (isset($this->link_styles[GESHI_LINK])) {
if ($this->use_classes) {
$stuff_to_parse = str_replace('<|UR1|', 'link_target . ' href=', $stuff_to_parse);
- }
- else {
+ } else {
$stuff_to_parse = str_replace('<|UR1|', 'link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
}
- }
- else {
+ } else {
$stuff_to_parse = str_replace('<|UR1|', 'link_target . ' href=', $stuff_to_parse);
}
@@ -2191,7 +3649,6 @@
$stuff_to_parse = str_replace('<|', '', '', $stuff_to_parse );
-
return substr($stuff_to_parse, 1);
}
@@ -2220,63 +3677,168 @@
}
/**
+ * Merges arrays recursively, overwriting values of the first array with values of later arrays
+ *
+ * @since 1.0.8
+ * @access private
+ */
+ function merge_arrays() {
+ $arrays = func_get_args();
+ $narrays = count($arrays);
+
+ // check arguments
+ // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
+ for ($i = 0; $i < $narrays; $i ++) {
+ if (!is_array($arrays[$i])) {
+ // also array_merge_recursive returns nothing in this case
+ trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
+ return false;
+ }
+ }
+
+ // the first array is in the output set in every case
+ $ret = $arrays[0];
+
+ // merege $ret with the remaining arrays
+ for ($i = 1; $i < $narrays; $i ++) {
+ foreach ($arrays[$i] as $key => $value) {
+ if (is_array($value) && isset($ret[$key])) {
+ // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
+ // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
+ $ret[$key] = $this->merge_arrays($ret[$key], $value);
+ } else {
+ $ret[$key] = $value;
+ }
+ }
+ }
+
+ return $ret;
+ }
+
+ /**
* Gets language information and stores it for later use
*
+ * @param string The filename of the language file you want to load
+ * @since 1.0.0
* @access private
* @todo Needs to load keys for lexic permissions for keywords, regexps etc
*/
function load_language($file_name) {
+ if ($file_name == $this->loaded_language) {
+ // this file is already loaded!
+ return;
+ }
+
+ //Prepare some stuff before actually loading the language file
+ $this->loaded_language = $file_name;
+ $this->parse_cache_built = false;
$this->enable_highlighting();
$language_data = array();
+
+ //Load the language file
require $file_name;
+
// Perhaps some checking might be added here later to check that
// $language data is a valid thing but maybe not
$this->language_data = $language_data;
+
// Set strict mode if should be set
- if ($this->language_data['STRICT_MODE_APPLIES'] == GESHI_ALWAYS) {
- $this->strict_mode = true;
- }
+ $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
+
// Set permissions for all lexics to true
// so they'll be highlighted by default
- foreach ($this->language_data['KEYWORDS'] as $key => $words) {
- $this->lexic_permissions['KEYWORDS'][$key] = true;
+ foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
+ if (!empty($this->language_data['KEYWORDS'][$key])) {
+ $this->lexic_permissions['KEYWORDS'][$key] = true;
+ } else {
+ $this->lexic_permissions['KEYWORDS'][$key] = false;
+ }
}
- foreach ($this->language_data['COMMENT_SINGLE'] as $key => $comment) {
+
+ foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
$this->lexic_permissions['COMMENTS'][$key] = true;
}
- foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
+ foreach (array_keys($this->language_data['REGEXPS']) as $key) {
$this->lexic_permissions['REGEXPS'][$key] = true;
}
- // Set default class for CSS
- $this->overall_class = $this->language;
+
+ // for BenBE and future code reviews:
+ // we can use empty here since we only check for existance and emptiness of an array
+ // if it is not an array at all but rather false or null this will work as intended as well
+ // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
+ if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
+ foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
+ // it's either true or false and maybe is true as well
+ $perm = $value !== GESHI_NEVER;
+ if ($flag == 'ALL') {
+ $this->enable_highlighting($perm);
+ continue;
+ }
+ if (!isset($this->lexic_permissions[$flag])) {
+ // unknown lexic permission
+ continue;
+ }
+ if (is_array($this->lexic_permissions[$flag])) {
+ foreach ($this->lexic_permissions[$flag] as $key => $val) {
+ $this->lexic_permissions[$flag][$key] = $perm;
+ }
+ } else {
+ $this->lexic_permissions[$flag] = $perm;
+ }
+ }
+ unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
+ }
+
+ //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
+ //You need to set one for HARDESCAPES only in this case.
+ if(!isset($this->language_data['HARDCHAR'])) {
+ $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
+ }
+
+ //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
+ $style_filename = substr($file_name, 0, -4) . '.style.php';
+ if (is_readable($style_filename)) {
+ //Clear any style_data that could have been set before ...
+ if (isset($style_data)) {
+ unset($style_data);
+ }
+
+ //Read the Style Information from the style file
+ include $style_filename;
+
+ //Apply the new styles to our current language styles
+ if (isset($style_data) && is_array($style_data)) {
+ $this->language_data['STYLES'] =
+ $this->merge_arrays($this->language_data['STYLES'], $style_data);
+ }
+ }
}
/**
* Takes the parsed code and various options, and creates the HTML
* surrounding it to make it look nice.
*
- * @param string The code already parsed
- * @return string The code nicely finalised
+ * @param string The code already parsed (reference!)
* @since 1.0.0
* @access private
*/
- function finalise($parsed_code) {
+ function finalise(&$parsed_code) {
// Remove end parts of important declarations
// This is BUGGY!! My fault for bad code: fix coming in 1.2
// @todo Remove this crap
if ($this->enable_important_blocks &&
- (strstr($parsed_code, GeSHi::hsc(GESHI_START_IMPORTANT)) === false)) {
- $parsed_code = str_replace(GeSHi::hsc(GESHI_END_IMPORTANT), '', $parsed_code);
+ (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
+ $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
}
// Add HTML whitespace stuff if we're using the