scripts/CodeMirror/mode/xquery/xquery.js (view raw)
1// CodeMirror, copyright (c) by Marijn Haverbeke and others
2// Distributed under an MIT license: https://codemirror.net/LICENSE
3
4(function(mod) {
5 if (typeof exports == "object" && typeof module == "object") // CommonJS
6 mod(require("../../lib/codemirror"));
7 else if (typeof define == "function" && define.amd) // AMD
8 define(["../../lib/codemirror"], mod);
9 else // Plain browser env
10 mod(CodeMirror);
11})(function(CodeMirror) {
12"use strict";
13
14CodeMirror.defineMode("xquery", function() {
15
16 // The keywords object is set to the result of this self executing
17 // function. Each keyword is a property of the keywords object whose
18 // value is {type: atype, style: astyle}
19 var keywords = function(){
20 // convenience functions used to build keywords object
21 function kw(type) {return {type: type, style: "keyword"};}
22 var operator = kw("operator")
23 , atom = {type: "atom", style: "atom"}
24 , punctuation = {type: "punctuation", style: null}
25 , qualifier = {type: "axis_specifier", style: "qualifier"};
26
27 // kwObj is what is return from this function at the end
28 var kwObj = {
29 ',': punctuation
30 };
31
32 // a list of 'basic' keywords. For each add a property to kwObj with the value of
33 // {type: basic[i], style: "keyword"} e.g. 'after' --> {type: "after", style: "keyword"}
34 var basic = ['after', 'all', 'allowing', 'ancestor', 'ancestor-or-self', 'any', 'array', 'as',
35 'ascending', 'at', 'attribute', 'base-uri', 'before', 'boundary-space', 'by', 'case', 'cast',
36 'castable', 'catch', 'child', 'collation', 'comment', 'construction', 'contains', 'content',
37 'context', 'copy', 'copy-namespaces', 'count', 'decimal-format', 'declare', 'default', 'delete',
38 'descendant', 'descendant-or-self', 'descending', 'diacritics', 'different', 'distance',
39 'document', 'document-node', 'element', 'else', 'empty', 'empty-sequence', 'encoding', 'end',
40 'entire', 'every', 'exactly', 'except', 'external', 'first', 'following', 'following-sibling',
41 'for', 'from', 'ftand', 'ftnot', 'ft-option', 'ftor', 'function', 'fuzzy', 'greatest', 'group',
42 'if', 'import', 'in', 'inherit', 'insensitive', 'insert', 'instance', 'intersect', 'into',
43 'invoke', 'is', 'item', 'language', 'last', 'lax', 'least', 'let', 'levels', 'lowercase', 'map',
44 'modify', 'module', 'most', 'namespace', 'next', 'no', 'node', 'nodes', 'no-inherit',
45 'no-preserve', 'not', 'occurs', 'of', 'only', 'option', 'order', 'ordered', 'ordering',
46 'paragraph', 'paragraphs', 'parent', 'phrase', 'preceding', 'preceding-sibling', 'preserve',
47 'previous', 'processing-instruction', 'relationship', 'rename', 'replace', 'return',
48 'revalidation', 'same', 'satisfies', 'schema', 'schema-attribute', 'schema-element', 'score',
49 'self', 'sensitive', 'sentence', 'sentences', 'sequence', 'skip', 'sliding', 'some', 'stable',
50 'start', 'stemming', 'stop', 'strict', 'strip', 'switch', 'text', 'then', 'thesaurus', 'times',
51 'to', 'transform', 'treat', 'try', 'tumbling', 'type', 'typeswitch', 'union', 'unordered',
52 'update', 'updating', 'uppercase', 'using', 'validate', 'value', 'variable', 'version',
53 'weight', 'when', 'where', 'wildcards', 'window', 'with', 'without', 'word', 'words', 'xquery'];
54 for(var i=0, l=basic.length; i < l; i++) { kwObj[basic[i]] = kw(basic[i]);};
55
56 // a list of types. For each add a property to kwObj with the value of
57 // {type: "atom", style: "atom"}
58 var types = ['xs:anyAtomicType', 'xs:anySimpleType', 'xs:anyType', 'xs:anyURI',
59 'xs:base64Binary', 'xs:boolean', 'xs:byte', 'xs:date', 'xs:dateTime', 'xs:dateTimeStamp',
60 'xs:dayTimeDuration', 'xs:decimal', 'xs:double', 'xs:duration', 'xs:ENTITIES', 'xs:ENTITY',
61 'xs:float', 'xs:gDay', 'xs:gMonth', 'xs:gMonthDay', 'xs:gYear', 'xs:gYearMonth', 'xs:hexBinary',
62 'xs:ID', 'xs:IDREF', 'xs:IDREFS', 'xs:int', 'xs:integer', 'xs:item', 'xs:java', 'xs:language',
63 'xs:long', 'xs:Name', 'xs:NCName', 'xs:negativeInteger', 'xs:NMTOKEN', 'xs:NMTOKENS',
64 'xs:nonNegativeInteger', 'xs:nonPositiveInteger', 'xs:normalizedString', 'xs:NOTATION',
65 'xs:numeric', 'xs:positiveInteger', 'xs:precisionDecimal', 'xs:QName', 'xs:short', 'xs:string',
66 'xs:time', 'xs:token', 'xs:unsignedByte', 'xs:unsignedInt', 'xs:unsignedLong',
67 'xs:unsignedShort', 'xs:untyped', 'xs:untypedAtomic', 'xs:yearMonthDuration'];
68 for(var i=0, l=types.length; i < l; i++) { kwObj[types[i]] = atom;};
69
70 // each operator will add a property to kwObj with value of {type: "operator", style: "keyword"}
71 var operators = ['eq', 'ne', 'lt', 'le', 'gt', 'ge', ':=', '=', '>', '>=', '<', '<=', '.', '|', '?', 'and', 'or', 'div', 'idiv', 'mod', '*', '/', '+', '-'];
72 for(var i=0, l=operators.length; i < l; i++) { kwObj[operators[i]] = operator;};
73
74 // each axis_specifiers will add a property to kwObj with value of {type: "axis_specifier", style: "qualifier"}
75 var axis_specifiers = ["self::", "attribute::", "child::", "descendant::", "descendant-or-self::", "parent::",
76 "ancestor::", "ancestor-or-self::", "following::", "preceding::", "following-sibling::", "preceding-sibling::"];
77 for(var i=0, l=axis_specifiers.length; i < l; i++) { kwObj[axis_specifiers[i]] = qualifier; };
78
79 return kwObj;
80 }();
81
82 function chain(stream, state, f) {
83 state.tokenize = f;
84 return f(stream, state);
85 }
86
87 // the primary mode tokenizer
88 function tokenBase(stream, state) {
89 var ch = stream.next(),
90 mightBeFunction = false,
91 isEQName = isEQNameAhead(stream);
92
93 // an XML tag (if not in some sub, chained tokenizer)
94 if (ch == "<") {
95 if(stream.match("!--", true))
96 return chain(stream, state, tokenXMLComment);
97
98 if(stream.match("![CDATA", false)) {
99 state.tokenize = tokenCDATA;
100 return "tag";
101 }
102
103 if(stream.match("?", false)) {
104 return chain(stream, state, tokenPreProcessing);
105 }
106
107 var isclose = stream.eat("/");
108 stream.eatSpace();
109 var tagName = "", c;
110 while ((c = stream.eat(/[^\s\u00a0=<>\"\'\/?]/))) tagName += c;
111
112 return chain(stream, state, tokenTag(tagName, isclose));
113 }
114 // start code block
115 else if(ch == "{") {
116 pushStateStack(state, { type: "codeblock"});
117 return null;
118 }
119 // end code block
120 else if(ch == "}") {
121 popStateStack(state);
122 return null;
123 }
124 // if we're in an XML block
125 else if(isInXmlBlock(state)) {
126 if(ch == ">")
127 return "tag";
128 else if(ch == "/" && stream.eat(">")) {
129 popStateStack(state);
130 return "tag";
131 }
132 else
133 return "variable";
134 }
135 // if a number
136 else if (/\d/.test(ch)) {
137 stream.match(/^\d*(?:\.\d*)?(?:E[+\-]?\d+)?/);
138 return "atom";
139 }
140 // comment start
141 else if (ch === "(" && stream.eat(":")) {
142 pushStateStack(state, { type: "comment"});
143 return chain(stream, state, tokenComment);
144 }
145 // quoted string
146 else if (!isEQName && (ch === '"' || ch === "'"))
147 return chain(stream, state, tokenString(ch));
148 // variable
149 else if(ch === "$") {
150 return chain(stream, state, tokenVariable);
151 }
152 // assignment
153 else if(ch ===":" && stream.eat("=")) {
154 return "keyword";
155 }
156 // open paren
157 else if(ch === "(") {
158 pushStateStack(state, { type: "paren"});
159 return null;
160 }
161 // close paren
162 else if(ch === ")") {
163 popStateStack(state);
164 return null;
165 }
166 // open paren
167 else if(ch === "[") {
168 pushStateStack(state, { type: "bracket"});
169 return null;
170 }
171 // close paren
172 else if(ch === "]") {
173 popStateStack(state);
174 return null;
175 }
176 else {
177 var known = keywords.propertyIsEnumerable(ch) && keywords[ch];
178
179 // if there's a EQName ahead, consume the rest of the string portion, it's likely a function
180 if(isEQName && ch === '\"') while(stream.next() !== '"'){}
181 if(isEQName && ch === '\'') while(stream.next() !== '\''){}
182
183 // gobble up a word if the character is not known
184 if(!known) stream.eatWhile(/[\w\$_-]/);
185
186 // gobble a colon in the case that is a lib func type call fn:doc
187 var foundColon = stream.eat(":");
188
189 // if there's not a second colon, gobble another word. Otherwise, it's probably an axis specifier
190 // which should get matched as a keyword
191 if(!stream.eat(":") && foundColon) {
192 stream.eatWhile(/[\w\$_-]/);
193 }
194 // if the next non whitespace character is an open paren, this is probably a function (if not a keyword of other sort)
195 if(stream.match(/^[ \t]*\(/, false)) {
196 mightBeFunction = true;
197 }
198 // is the word a keyword?
199 var word = stream.current();
200 known = keywords.propertyIsEnumerable(word) && keywords[word];
201
202 // if we think it's a function call but not yet known,
203 // set style to variable for now for lack of something better
204 if(mightBeFunction && !known) known = {type: "function_call", style: "variable def"};
205
206 // if the previous word was element, attribute, axis specifier, this word should be the name of that
207 if(isInXmlConstructor(state)) {
208 popStateStack(state);
209 return "variable";
210 }
211 // as previously checked, if the word is element,attribute, axis specifier, call it an "xmlconstructor" and
212 // push the stack so we know to look for it on the next word
213 if(word == "element" || word == "attribute" || known.type == "axis_specifier") pushStateStack(state, {type: "xmlconstructor"});
214
215 // if the word is known, return the details of that else just call this a generic 'word'
216 return known ? known.style : "variable";
217 }
218 }
219
220 // handle comments, including nested
221 function tokenComment(stream, state) {
222 var maybeEnd = false, maybeNested = false, nestedCount = 0, ch;
223 while (ch = stream.next()) {
224 if (ch == ")" && maybeEnd) {
225 if(nestedCount > 0)
226 nestedCount--;
227 else {
228 popStateStack(state);
229 break;
230 }
231 }
232 else if(ch == ":" && maybeNested) {
233 nestedCount++;
234 }
235 maybeEnd = (ch == ":");
236 maybeNested = (ch == "(");
237 }
238
239 return "comment";
240 }
241
242 // tokenizer for string literals
243 // optionally pass a tokenizer function to set state.tokenize back to when finished
244 function tokenString(quote, f) {
245 return function(stream, state) {
246 var ch;
247
248 if(isInString(state) && stream.current() == quote) {
249 popStateStack(state);
250 if(f) state.tokenize = f;
251 return "string";
252 }
253
254 pushStateStack(state, { type: "string", name: quote, tokenize: tokenString(quote, f) });
255
256 // if we're in a string and in an XML block, allow an embedded code block
257 if(stream.match("{", false) && isInXmlAttributeBlock(state)) {
258 state.tokenize = tokenBase;
259 return "string";
260 }
261
262
263 while (ch = stream.next()) {
264 if (ch == quote) {
265 popStateStack(state);
266 if(f) state.tokenize = f;
267 break;
268 }
269 else {
270 // if we're in a string and in an XML block, allow an embedded code block in an attribute
271 if(stream.match("{", false) && isInXmlAttributeBlock(state)) {
272 state.tokenize = tokenBase;
273 return "string";
274 }
275
276 }
277 }
278
279 return "string";
280 };
281 }
282
283 // tokenizer for variables
284 function tokenVariable(stream, state) {
285 var isVariableChar = /[\w\$_-]/;
286
287 // a variable may start with a quoted EQName so if the next character is quote, consume to the next quote
288 if(stream.eat("\"")) {
289 while(stream.next() !== '\"'){};
290 stream.eat(":");
291 } else {
292 stream.eatWhile(isVariableChar);
293 if(!stream.match(":=", false)) stream.eat(":");
294 }
295 stream.eatWhile(isVariableChar);
296 state.tokenize = tokenBase;
297 return "variable";
298 }
299
300 // tokenizer for XML tags
301 function tokenTag(name, isclose) {
302 return function(stream, state) {
303 stream.eatSpace();
304 if(isclose && stream.eat(">")) {
305 popStateStack(state);
306 state.tokenize = tokenBase;
307 return "tag";
308 }
309 // self closing tag without attributes?
310 if(!stream.eat("/"))
311 pushStateStack(state, { type: "tag", name: name, tokenize: tokenBase});
312 if(!stream.eat(">")) {
313 state.tokenize = tokenAttribute;
314 return "tag";
315 }
316 else {
317 state.tokenize = tokenBase;
318 }
319 return "tag";
320 };
321 }
322
323 // tokenizer for XML attributes
324 function tokenAttribute(stream, state) {
325 var ch = stream.next();
326
327 if(ch == "/" && stream.eat(">")) {
328 if(isInXmlAttributeBlock(state)) popStateStack(state);
329 if(isInXmlBlock(state)) popStateStack(state);
330 return "tag";
331 }
332 if(ch == ">") {
333 if(isInXmlAttributeBlock(state)) popStateStack(state);
334 return "tag";
335 }
336 if(ch == "=")
337 return null;
338 // quoted string
339 if (ch == '"' || ch == "'")
340 return chain(stream, state, tokenString(ch, tokenAttribute));
341
342 if(!isInXmlAttributeBlock(state))
343 pushStateStack(state, { type: "attribute", tokenize: tokenAttribute});
344
345 stream.eat(/[a-zA-Z_:]/);
346 stream.eatWhile(/[-a-zA-Z0-9_:.]/);
347 stream.eatSpace();
348
349 // the case where the attribute has not value and the tag was closed
350 if(stream.match(">", false) || stream.match("/", false)) {
351 popStateStack(state);
352 state.tokenize = tokenBase;
353 }
354
355 return "attribute";
356 }
357
358 // handle comments, including nested
359 function tokenXMLComment(stream, state) {
360 var ch;
361 while (ch = stream.next()) {
362 if (ch == "-" && stream.match("->", true)) {
363 state.tokenize = tokenBase;
364 return "comment";
365 }
366 }
367 }
368
369
370 // handle CDATA
371 function tokenCDATA(stream, state) {
372 var ch;
373 while (ch = stream.next()) {
374 if (ch == "]" && stream.match("]", true)) {
375 state.tokenize = tokenBase;
376 return "comment";
377 }
378 }
379 }
380
381 // handle preprocessing instructions
382 function tokenPreProcessing(stream, state) {
383 var ch;
384 while (ch = stream.next()) {
385 if (ch == "?" && stream.match(">", true)) {
386 state.tokenize = tokenBase;
387 return "comment meta";
388 }
389 }
390 }
391
392
393 // functions to test the current context of the state
394 function isInXmlBlock(state) { return isIn(state, "tag"); }
395 function isInXmlAttributeBlock(state) { return isIn(state, "attribute"); }
396 function isInXmlConstructor(state) { return isIn(state, "xmlconstructor"); }
397 function isInString(state) { return isIn(state, "string"); }
398
399 function isEQNameAhead(stream) {
400 // assume we've already eaten a quote (")
401 if(stream.current() === '"')
402 return stream.match(/^[^\"]+\"\:/, false);
403 else if(stream.current() === '\'')
404 return stream.match(/^[^\"]+\'\:/, false);
405 else
406 return false;
407 }
408
409 function isIn(state, type) {
410 return (state.stack.length && state.stack[state.stack.length - 1].type == type);
411 }
412
413 function pushStateStack(state, newState) {
414 state.stack.push(newState);
415 }
416
417 function popStateStack(state) {
418 state.stack.pop();
419 var reinstateTokenize = state.stack.length && state.stack[state.stack.length-1].tokenize;
420 state.tokenize = reinstateTokenize || tokenBase;
421 }
422
423 // the interface for the mode API
424 return {
425 startState: function() {
426 return {
427 tokenize: tokenBase,
428 cc: [],
429 stack: []
430 };
431 },
432
433 token: function(stream, state) {
434 if (stream.eatSpace()) return null;
435 var style = state.tokenize(stream, state);
436 return style;
437 },
438
439 blockCommentStart: "(:",
440 blockCommentEnd: ":)"
441
442 };
443
444});
445
446CodeMirror.defineMIME("application/xquery", "xquery");
447
448});