// Copyright (C) 2006 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * @fileoverview * some functions for browser-side pretty printing of code contained in html. * * The lexer should work on a number of languages including C and friends, * Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles. * It works passably on Ruby, PHP and Awk and a decent subset of Perl, but, * because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or * CAML-like languages. * * If there's a language not mentioned here, then I don't know it, and don't * know whether it works. If it has a C-like, Bash-like, or XML-like syntax * then it should work passably. * * Usage: * 1) include this source file in an html page via * * 2) define style rules. See the example page for examples. * 3) mark the
 and  tags in your source with class=prettyprint.
 *    You can also use the (html deprecated)  tag, but the pretty printer
 *    needs to do more substantial DOM manipulations to support that, so some
 *    css styles may not be preserved.
 * That's it.  I wanted to keep the API as simple as possible, so there's no
 * need to specify which language the code is in.
 *
 * Change log:
 * cbeust, 2006/08/22
 *   Java annotations (start with "@") are now captured as literals ("lit")
 */
var PR_keywords = new Object();
/** initialize the keyword list for our target languages. */
(function () {
  var CPP_KEYWORDS = (
    "bool break case catch char class const const_cast continue default " +
    "delete deprecated dllexport dllimport do double dynamic_cast else enum " +
    "explicit extern false float for friend goto if inline int long mutable " +
    "naked namespace new noinline noreturn nothrow novtable operator private " +
    "property protected public register reinterpret_cast return selectany " +
    "short signed sizeof static static_cast struct switch template this " +
    "thread throw true try typedef typeid typename union unsigned using " +
    "declaration, using directive uuid virtual void volatile while typeof");
  var JAVA_KEYWORDS = (
    "abstract default goto package synchronized boolean do if private this " +
    "break double implements protected throw byte else import public throws " +
    "case enum instanceof return transient catch extends int short try char " +
    "final interface static void class finally long strictfp volatile const " +
    "float native super while continue for new switch");
  var PYTHON_KEYWORDS = (
    "and assert break class continue def del elif else except exec finally " +
    "for from global if import in is lambda not or pass print raise return " +
    "try while yield False True None");
  var JSCRIPT_KEYWORDS = (
    "abstract boolean break byte case catch char class const continue " +
    "debugger default delete do double else enum export extends false final " +
    "finally float for function goto if implements import in instanceof int " +
    "interface long native new null package private protected public return " +
    "short static super switch synchronized this throw throws transient " +
    "true try typeof var void volatile while with NaN Infinity");
  var PERL_KEYWORDS = (
    "foreach require sub unless until use elsif BEGIN END");
  var SH_KEYWORDS = (
    "if then do done else fi end");
  var RUBY_KEYWORDS = (
      "if then elsif else end begin do rescue ensure while for class module " +
      "def yield raise until unless and or not when case super undef break " +
      "next redo retry in return alias defined");
  var KEYWORDS = [CPP_KEYWORDS, JAVA_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS,
                  PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];
  for (var k = 0; k < KEYWORDS.length; k++) {
    var kw = KEYWORDS[k].split(' ');
    for (var i = 0; i < kw.length; i++) {
      if (kw[i]) { PR_keywords[kw[i]] = true; }
    }
  }
}).call(this);
// token style names.  correspond to css classes
/** token style for a string literal */
var PR_STRING = 'str';
/** token style for a keyword */
var PR_KEYWORD = 'kwd';
/** token style for a comment */
var PR_COMMENT = 'com';
/** token style for a type */
var PR_TYPE = 'typ';
/** token style for a literal value.  e.g. 1, null, true. */
var PR_LITERAL = 'lit';
/** token style for a punctuation string. */
var PR_PUNCTUATION = 'pun';
/** token style for a punctuation string. */
var PR_PLAIN = 'pln';
/** token style for an sgml tag. */
var PR_TAG = 'tag';
/** token style for a markup declaration such as a DOCTYPE. */
var PR_DECLARATION = 'dec';
/** token style for embedded source. */
var PR_SOURCE = 'src';
/** token style for an sgml attribute name. */
var PR_ATTRIB_NAME = 'atn';
/** token style for an sgml attribute value. */
var PR_ATTRIB_VALUE = 'atv';
/** the position of the end of a token during.  A division of a string into
  * n tokens can be represented as a series n - 1 token ends, as long as
  * runs of whitespace warrant their own token.
  * @private
  */
function PR_TokenEnd(end, style) {
  if (undefined === style) { throw new Error('BAD'); }
  if ('number' != typeof(end)) { throw new Error('BAD'); }
  this.end = end;
  this.style = style;
}
PR_TokenEnd.prototype.toString = function () {
  return '[PR_TokenEnd ' + this.end +
    (this.style ? ':' + this.style : '') + ']';
};
/** a chunk of text with a style.  These are used to represent both the output
  * from the lexing functions as well as intermediate results.
  * @constructor
  * @param token the token text
  * @param style one of the token styles defined in designdoc-template, or null
  *   for a styleless token, such as an embedded html tag.
  * @private
  */
function PR_Token(token, style) {
  if (undefined === style) { throw new Error('BAD'); }
  this.token = token;
  this.style = style;
}
PR_Token.prototype.toString = function () {
  return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']';
};
/** a helper class that decodes common html entities used to escape source and
  * markup punctuation characters in html.
  * @constructor
  * @private
  */
function PR_DecodeHelper() {
  this.next = 0;
  this.ch = '\0';
}
PR_DecodeHelper.prototype.decode = function (s, i) {
  var next = i + 1;
  var ch = s.charAt(i);
  if ('&' == ch) {
    var semi = s.indexOf(';', next);
    if (semi >= 0 && semi < next + 4) {
      var entityName = s.substring(next, semi).toLowerCase();
      next = semi + 1;
      if ('lt' == entityName) {
        ch = '<';
      } else if ('gt' == entityName) {
        ch = '>';
      } else if ('quot' == entityName) {
        ch = '"';
      } else if ('apos' == entityName) {
        ch = '\'';
      } else if ('amp' == entityName) {
        ch = '&';
      } else {
        next = i + 1;
      }
    }
  }
  this.next = next;
  this.ch = ch;
  return this.ch;
}
// some string utilities
function PR_isWordChar(ch) {
  return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}
function PR_isIdentifierStart(ch) {
  return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@';
}
function PR_isIdentifierPart(ch) {
  return PR_isIdentifierStart(ch) || PR_isDigitChar(ch);
}
function PR_isSpaceChar(ch) {
  return "\t \r\n".indexOf(ch) >= 0;
}
function PR_isDigitChar(ch) {
  return ch >= '0' && ch <= '9';
}
function PR_trim(s) {
  var i = 0, j = s.length - 1;
  while (i <= j && PR_isSpaceChar(s.charAt(i))) { ++i; }
  while (j > i && PR_isSpaceChar(s.charAt(j))) { --j; }
  return s.substring(i, j + 1);
}
function PR_startsWith(s, prefix) {
  return s.length >= prefix.length && prefix == s.substring(0, prefix.length);
}
function PR_endsWith(s, suffix) {
  return s.length >= suffix.length &&
         suffix == s.substring(s.length - suffix.length, s.length);
}
/** true iff prefix matches the first prefix characters in chars[0:len].
  * @private
  */
function PR_prefixMatch(chars, len, prefix) {
  if (len < prefix.length) { return false; }
  for (var i = 0, n = prefix.length; i < n; ++i) {
    if (prefix.charAt(i) != chars[i]) { return false; }
  }
  return true;
}
/** like textToHtml but escapes double quotes to be attribute safe. */
function PR_attribToHtml(str) {
  return str.replace(/&/g, '&')
    .replace(//g, '>')
    .replace(/\"/g, '"')
    .replace(/\xa0/, ' ');
}
/** escapest html special characters to html. */
function PR_textToHtml(str) {
  return str.replace(/&/g, '&')
    .replace(//g, '>')
    .replace(/\xa0/g, ' ');
}
/** is the given node's innerHTML normally unescaped? */
function PR_isRawContent(node) {
  return 'XMP' == node.tagName;
}
var PR_innerHtmlWorks = null;
function PR_getInnerHtml(node) {
  // inner html is hopelessly broken in Safari 2.0.4 when the content is
  // an html description of well formed XML and the containing tag is a PRE
   // tag, so we detect that case and emulate innerHTML.
  if (null == PR_innerHtmlWorks) {
    var testNode = document.createElement('PRE');
    testNode.appendChild(
        document.createTextNode('\n '));
    PR_innerHtmlWorks = !/');
      for (var child = node.firstChild; child; child = child.nextSibling) {
        PR_normalizedHtml(child, out);
      }
      if (node.firstChild || !/^(?:br|link|img)$/.test(name)) {
        out.push('<\/', name, '>');
      }
      break;
    case 2: // an attribute
      out.push(node.name.toLowerCase(), '="', PR_attribToHtml(node.value), '"');
      break;
    case 3: case 4: // text
      out.push(PR_textToHtml(node.nodeValue));
      break;
  }
}
/** split markup into chunks of html tags (style null) and
  * plain text (style {@link #PR_PLAIN}).
  *
  * @param s a String of html.
  * @return an Array of PR_Tokens of style PR_PLAIN and null.
  * @private
  */
function PR_chunkify(s) {
  var chunks = new Array();
  var state = 0;
  var start = 0;
  var pos = -1;
  for (var i = 0, n = s.length; i < n; ++i) {
    var ch = s.charAt(i);
    switch (state) {
      case 0:
        if ('<' == ch) { state = 1; }
        break;
      case 1:
        pos = i - 1;
        if ('/' == ch) { state = 2; }
        else if (PR_isWordChar(ch)) { state = 3; }
        else if ('<' == ch) { state = 1; }
        else { state = 0; }
        break;
      case 2:
        if (PR_isWordChar(ch)) { state = 3; }
        else if ('<' == ch) { state = 1; }
        else { state = 0; }
        break;
      case 3:
        if ('>' == ch) {
          if (pos > start) {
            chunks.push(new PR_Token(s.substring(start, pos), PR_PLAIN));
          }
          chunks.push(new PR_Token(s.substring(pos, i + 1), null));
          start = i + 1;
          pos = -1;
          state = 0;
        }
        break;
    }
  }
  if (s.length > start) {
    chunks.push(new PR_Token(s.substring(start, s.length), PR_PLAIN));
  }
  return chunks;
}
/** splits chunks around entities.
  * @private
  */
function PR_splitEntities(chunks) {
  var chunksOut = new Array();
  var state = 0;
  for (var ci = 0, nc = chunks.length; ci < nc; ++ci) {
    var chunk = chunks[ci];
    if (PR_PLAIN != chunk.style) {
      chunksOut.push(chunk);
      continue;
    }
    var s = chunk.token;
    var pos = 0;
    var start;
    for (var i = 0; i < s.length; ++i) {
      var ch = s.charAt(i);
      switch (state) {
        case 0:
          if ('&' == ch) { state = 1; }
          break;
        case 1:
          if ('#' == ch || PR_isWordChar(ch)) {
            start = i - 1;
            state = 2;
          } else {
            state = 0;
          }
          break;
        case 2:
          if (';' == ch) {
            if (start > pos) {
              chunksOut.push(
                  new PR_Token(s.substring(pos, start), chunk.style));
            }
            chunksOut.push(new PR_Token(s.substring(start, i + 1), null));
            pos = i + 1;
            state = 0;
          }
          break;
      }
    }
    if (s.length > pos) {
      chunksOut.push(pos ?
                     new PR_Token(s.substring(pos, s.length), chunk.style) :
                     chunk);
    }
  }
  return chunksOut;
}
/** walk the tokenEnds list and the chunk list in parallel to generate a list
  * of split tokens.
  * @private
  */
function PR_splitChunks(chunks, tokenEnds) {
  var tokens = new Array();  // the output
  var ci = 0;  // index into chunks
  // position of beginning of amount written so far in absolute space.
  var posAbs = 0;
  // position of amount written so far in chunk space
  var posChunk = 0;
  // current chunk
  var chunk = new PR_Token('', null);
  for (var ei = 0, ne = tokenEnds.length; ei < ne; ++ei) {
    var tokenEnd = tokenEnds[ei];
    var end = tokenEnd.end;
    var tokLen = end - posAbs;
    var remainingInChunk = chunk.token.length - posChunk;
    while (remainingInChunk <= tokLen) {
      if (remainingInChunk > 0) {
        tokens.push(
            new PR_Token(chunk.token.substring(posChunk, chunk.token.length),
                         null == chunk.style ? null : tokenEnd.style));
      }
      posAbs += remainingInChunk;
      posChunk = 0;
      if (ci < chunks.length) { chunk = chunks[ci++]; }
      tokLen = end - posAbs;
      remainingInChunk = chunk.token.length - posChunk;
    }
    if (tokLen) {
      tokens.push(
          new PR_Token(chunk.token.substring(posChunk, posChunk + tokLen),
                       tokenEnd.style));
      posAbs += tokLen;
      posChunk += tokLen;
    }
  }
  return tokens;
}
/** splits markup tokens into declarations, tags, and source chunks.
  * @private
  */
function PR_splitMarkup(chunks) {
  // A state machine to split out declarations, tags, etc.
  // This state machine deals with absolute space in the text, indexed by k,
  // and position in the current chunk, indexed by pos and tokenStart to
  // generate a list of the ends of tokens.
  // Absolute space is calculated by considering the chunks as appended into
  // one big string, as they were before being split.
  // Known failure cases
  // Server side scripting sections such as ...?> in attributes.
  // i.e. 
  // Handling this would require a stack, and we don't use PHP.
  // The output: a list of pairs of PR_TokenEnd instances
  var tokenEnds = new Array();
  var state = 0;  // FSM state variable
  var k = 0;  // position in absolute space of the start of the current chunk
  var tokenStart = -1;  // the start of the current token
  // Try to find a closing tag for any open