1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package org.jaxen.saxpath.base;
53
54 class XPathLexer
55 {
56 private String xpath;
57 private int currentPosition;
58 private int endPosition;
59
60 private Token previousToken;
61
62 XPathLexer(String xpath)
63 {
64 setXPath( xpath );
65 }
66
67 private void setXPath(String xpath)
68 {
69 this.xpath = xpath;
70 this.currentPosition = 0;
71 this.endPosition = xpath.length();
72 }
73
74 String getXPath()
75 {
76 return this.xpath;
77 }
78
79 Token nextToken()
80 {
81 Token token = null;
82
83 do
84 {
85 token = null;
86
87 switch ( LA(1) )
88 {
89 case '$':
90 {
91 token = dollar();
92 break;
93 }
94
95 case '"':
96 case '\'':
97 {
98 token = literal();
99 break;
100 }
101
102 case '/':
103 {
104 token = slashes();
105 break;
106 }
107
108 case ',':
109 {
110 token = comma();
111 break;
112 }
113
114 case '(':
115 {
116 token = leftParen();
117 break;
118 }
119
120 case ')':
121 {
122 token = rightParen();
123 break;
124 }
125
126 case '[':
127 {
128 token = leftBracket();
129 break;
130 }
131
132 case ']':
133 {
134 token = rightBracket();
135 break;
136 }
137
138 case '+':
139 {
140 token = plus();
141 break;
142 }
143
144 case '-':
145 {
146 token = minus();
147 break;
148 }
149
150 case '<':
151 case '>':
152 {
153 token = relationalOperator();
154 break;
155 }
156
157 case '=':
158 {
159 token = equals();
160 break;
161 }
162
163 case '!':
164 {
165 if ( LA(2) == '=' )
166 {
167 token = notEquals();
168 }
169 break;
170 }
171
172 case '|':
173 {
174 token = pipe();
175 break;
176 }
177
178 case '@':
179 {
180 token = at();
181 break;
182 }
183
184 case ':':
185 {
186 if ( LA(2) == ':' )
187 {
188 token = doubleColon();
189 }
190 else
191 {
192 token = colon();
193 }
194 break;
195 }
196
197 case '*':
198 {
199 token = star();
200 break;
201 }
202
203 case '.':
204 {
205 switch ( LA(2) )
206 {
207 case '0':
208 case '1':
209 case '2':
210 case '3':
211 case '4':
212 case '5':
213 case '6':
214 case '7':
215 case '8':
216 case '9':
217 {
218 token = number();
219 break;
220 }
221 default:
222 {
223 token = dots();
224 break;
225 }
226 }
227 break;
228 }
229
230 case '0':
231 case '1':
232 case '2':
233 case '3':
234 case '4':
235 case '5':
236 case '6':
237 case '7':
238 case '8':
239 case '9':
240 {
241 token = number();
242 break;
243 }
244
245 case ' ':
246 case '\t':
247 case '\n':
248 case '\r':
249 {
250 token = whitespace();
251 break;
252 }
253
254 default:
255 {
256 if ( isIdentifierStartChar( LA(1) ) )
257 {
258 token = identifierOrOperatorName();
259 }
260 }
261 }
262
263 if ( token == null )
264 {
265 if (!hasMoreChars())
266 {
267 token = new Token( TokenTypes.EOF,
268 getXPath(),
269 currentPosition(),
270 endPosition() );
271 }
272 else
273 {
274 token = new Token( TokenTypes.ERROR,
275 getXPath(),
276 currentPosition(),
277 endPosition() );
278 }
279 }
280
281 }
282 while ( token.getTokenType() == TokenTypes.SKIP );
283
284 setPreviousToken( token );
285
286 return token;
287 }
288
289 private Token identifierOrOperatorName()
290 {
291 Token token = null;
292
293 if ( previousToken != null )
294 {
295
296
297
298
299
300
301
302
303
304
305
306 switch ( previousToken.getTokenType() )
307 {
308 case TokenTypes.AT:
309 case TokenTypes.DOUBLE_COLON:
310 case TokenTypes.LEFT_PAREN:
311 case TokenTypes.LEFT_BRACKET:
312 case TokenTypes.AND:
313 case TokenTypes.OR:
314 case TokenTypes.MOD:
315 case TokenTypes.DIV:
316 case TokenTypes.COLON:
317 case TokenTypes.SLASH:
318 case TokenTypes.DOUBLE_SLASH:
319 case TokenTypes.PIPE:
320 case TokenTypes.DOLLAR:
321 case TokenTypes.PLUS:
322 case TokenTypes.MINUS:
323 case TokenTypes.STAR:
324 case TokenTypes.COMMA:
325 case TokenTypes.LESS_THAN_SIGN:
326 case TokenTypes.GREATER_THAN_SIGN:
327 case TokenTypes.LESS_THAN_OR_EQUALS_SIGN:
328 case TokenTypes.GREATER_THAN_OR_EQUALS_SIGN:
329 case TokenTypes.EQUALS:
330 case TokenTypes.NOT_EQUALS:
331 {
332 token = identifier();
333 break;
334 }
335 default:
336 {
337 token = operatorName();
338 break;
339 }
340 }
341 }
342 else
343 {
344 token = identifier();
345 }
346
347 return token;
348 }
349
350 private Token identifier()
351 {
352 Token token = null;
353
354 int start = currentPosition();
355
356 while ( hasMoreChars() )
357 {
358 if ( isIdentifierChar( LA(1) ) )
359 {
360 consume();
361 }
362 else
363 {
364 break;
365 }
366 }
367
368 token = new Token( TokenTypes.IDENTIFIER,
369 getXPath(),
370 start,
371 currentPosition() );
372
373 return token;
374 }
375
376 private Token operatorName()
377 {
378 Token token = null;
379
380 switch ( LA(1) )
381 {
382 case 'a':
383 {
384 token = and();
385 break;
386 }
387
388 case 'o':
389 {
390 token = or();
391 break;
392 }
393
394 case 'm':
395 {
396 token = mod();
397 break;
398 }
399
400 case 'd':
401 {
402 token = div();
403 break;
404 }
405 }
406
407 return token;
408 }
409
410 private Token mod()
411 {
412 Token token = null;
413
414 if ( ( LA(1) == 'm' )
415 &&
416 ( LA(2) == 'o' )
417 &&
418 ( LA(3) == 'd' )
419 )
420 {
421 token = new Token( TokenTypes.MOD,
422 getXPath(),
423 currentPosition(),
424 currentPosition()+3 );
425
426 consume();
427 consume();
428 consume();
429 }
430
431 return token;
432 }
433
434 private Token div()
435 {
436 Token token = null;
437
438 if ( ( LA(1) == 'd' )
439 &&
440 ( LA(2) == 'i' )
441 &&
442 ( LA(3) == 'v' )
443 )
444 {
445 token = new Token( TokenTypes.DIV,
446 getXPath(),
447 currentPosition(),
448 currentPosition()+3 );
449
450 consume();
451 consume();
452 consume();
453 }
454
455 return token;
456 }
457
458 private Token and()
459 {
460 Token token = null;
461
462 if ( ( LA(1) == 'a' )
463 &&
464 ( LA(2) == 'n' )
465 &&
466 ( LA(3) == 'd' )
467 )
468 {
469 token = new Token( TokenTypes.AND,
470 getXPath(),
471 currentPosition(),
472 currentPosition()+3 );
473
474 consume();
475 consume();
476 consume();
477 }
478
479 return token;
480 }
481
482 private Token or()
483 {
484 Token token = null;
485
486 if ( ( LA(1) == 'o' )
487 &&
488 ( LA(2) == 'r' )
489 )
490 {
491 token = new Token( TokenTypes.OR,
492 getXPath(),
493 currentPosition(),
494 currentPosition()+2 );
495
496 consume();
497 consume();
498 }
499
500 return token;
501 }
502
503 private Token number()
504 {
505 int start = currentPosition();
506 boolean periodAllowed = true;
507
508 loop:
509 while( true )
510 {
511 switch ( LA(1) )
512 {
513 case '.':
514 if ( periodAllowed )
515 {
516 periodAllowed = false;
517 consume();
518 }
519 else
520 {
521 break loop;
522 }
523 break;
524 case '0':
525 case '1':
526 case '2':
527 case '3':
528 case '4':
529 case '5':
530 case '6':
531 case '7':
532 case '8':
533 case '9':
534 consume();
535 break;
536 default:
537 break loop;
538 }
539 }
540
541 return new Token( TokenTypes.DOUBLE,
542 getXPath(),
543 start,
544 currentPosition() );
545 }
546
547 private Token whitespace()
548 {
549 consume();
550
551 loop:
552 while( hasMoreChars() )
553 {
554 switch ( LA(1) )
555 {
556 case ' ':
557 case '\t':
558 case '\n':
559 case '\r':
560 {
561 consume();
562 break;
563 }
564
565 default:
566 {
567 break loop;
568 }
569 }
570 }
571
572 return new Token( TokenTypes.SKIP,
573 getXPath(),
574 0,
575 0 );
576 }
577
578 private Token comma()
579 {
580 Token token = new Token( TokenTypes.COMMA,
581 getXPath(),
582 currentPosition(),
583 currentPosition()+1 );
584
585 consume();
586
587 return token;
588 }
589
590 private Token equals()
591 {
592 Token token = new Token( TokenTypes.EQUALS,
593 getXPath(),
594 currentPosition(),
595 currentPosition()+1 );
596
597 consume();
598
599 return token;
600 }
601
602 private Token minus()
603 {
604 Token token = new Token( TokenTypes.MINUS,
605 getXPath(),
606 currentPosition(),
607 currentPosition()+1 );
608 consume();
609
610 return token;
611 }
612
613 private Token plus()
614 {
615 Token token = new Token( TokenTypes.PLUS,
616 getXPath(),
617 currentPosition(),
618 currentPosition()+1 );
619 consume();
620
621 return token;
622 }
623
624 private Token dollar()
625 {
626 Token token = new Token( TokenTypes.DOLLAR,
627 getXPath(),
628 currentPosition(),
629 currentPosition()+1 );
630 consume();
631
632 return token;
633 }
634
635 private Token pipe()
636 {
637 Token token = new Token( TokenTypes.PIPE,
638 getXPath(),
639 currentPosition(),
640 currentPosition()+1 );
641
642 consume();
643
644 return token;
645 }
646
647 private Token at()
648 {
649 Token token = new Token( TokenTypes.AT,
650 getXPath(),
651 currentPosition(),
652 currentPosition()+1 );
653
654 consume();
655
656 return token;
657 }
658
659 private Token colon()
660 {
661 Token token = new Token( TokenTypes.COLON,
662 getXPath(),
663 currentPosition(),
664 currentPosition()+1 );
665 consume();
666
667 return token;
668 }
669
670 private Token doubleColon()
671 {
672 Token token = new Token( TokenTypes.DOUBLE_COLON,
673 getXPath(),
674 currentPosition(),
675 currentPosition()+2 );
676
677 consume();
678 consume();
679
680 return token;
681 }
682
683 private Token notEquals()
684 {
685 Token token = new Token( TokenTypes.NOT_EQUALS,
686 getXPath(),
687 currentPosition(),
688 currentPosition() + 2 );
689
690 consume();
691 consume();
692
693 return token;
694 }
695
696 private Token relationalOperator()
697 {
698 Token token = null;
699
700 switch ( LA(1) )
701 {
702 case '<':
703 {
704 if ( LA(2) == '=' )
705 {
706 token = new Token( TokenTypes.LESS_THAN_OR_EQUALS_SIGN,
707 getXPath(),
708 currentPosition(),
709 currentPosition() + 2 );
710 consume();
711 }
712 else
713 {
714 token = new Token( TokenTypes.LESS_THAN_SIGN,
715 getXPath(),
716 currentPosition(),
717 currentPosition() + 1);
718 }
719
720 consume();
721 break;
722 }
723 case '>':
724 {
725 if ( LA(2) == '=' )
726 {
727 token = new Token( TokenTypes.GREATER_THAN_OR_EQUALS_SIGN,
728 getXPath(),
729 currentPosition(),
730 currentPosition() + 2 );
731 consume();
732 }
733 else
734 {
735 token = new Token( TokenTypes.GREATER_THAN_SIGN,
736 getXPath(),
737 currentPosition(),
738 currentPosition() + 1 );
739 }
740
741 consume();
742 break;
743 }
744 }
745
746 return token;
747
748 }
749
750 private Token star()
751 {
752 Token token = new Token( TokenTypes.STAR,
753 getXPath(),
754 currentPosition(),
755 currentPosition()+1 );
756
757 consume();
758
759 return token;
760 }
761
762 private Token literal()
763 {
764 Token token = null;
765
766 char match = LA(1);
767
768 consume();
769
770 int start = currentPosition();
771
772 while ( ( token == null )
773 &&
774 hasMoreChars() )
775 {
776 if ( LA(1) == match )
777 {
778 token = new Token( TokenTypes.LITERAL,
779 getXPath(),
780 start,
781 currentPosition() );
782 }
783 consume();
784 }
785
786 return token;
787 }
788
789 private Token dots()
790 {
791 Token token = null;
792
793 switch ( LA(2) )
794 {
795 case '.':
796 {
797 token = new Token( TokenTypes.DOT_DOT,
798 getXPath(),
799 currentPosition(),
800 currentPosition()+2 ) ;
801 consume();
802 consume();
803 break;
804 }
805 default:
806 {
807 token = new Token( TokenTypes.DOT,
808 getXPath(),
809 currentPosition(),
810 currentPosition()+1 );
811 consume();
812 break;
813 }
814 }
815
816 return token;
817 }
818
819 private Token leftBracket()
820 {
821 Token token = new Token( TokenTypes.LEFT_BRACKET,
822 getXPath(),
823 currentPosition(),
824 currentPosition()+1 );
825
826 consume();
827
828 return token;
829 }
830
831 private Token rightBracket()
832 {
833 Token token = new Token( TokenTypes.RIGHT_BRACKET,
834 getXPath(),
835 currentPosition(),
836 currentPosition()+1 );
837
838 consume();
839
840 return token;
841 }
842
843 private Token leftParen()
844 {
845 Token token = new Token( TokenTypes.LEFT_PAREN,
846 getXPath(),
847 currentPosition(),
848 currentPosition()+1 );
849
850 consume();
851
852 return token;
853 }
854
855 private Token rightParen()
856 {
857 Token token = new Token( TokenTypes.RIGHT_PAREN,
858 getXPath(),
859 currentPosition(),
860 currentPosition()+1 );
861
862 consume();
863
864 return token;
865 }
866
867 private Token slashes()
868 {
869 Token token = null;
870
871 switch ( LA(2) )
872 {
873 case '/':
874 {
875 token = new Token( TokenTypes.DOUBLE_SLASH,
876 getXPath(),
877 currentPosition(),
878 currentPosition()+2 );
879 consume();
880 consume();
881 break;
882 }
883 default:
884 {
885 token = new Token( TokenTypes.SLASH,
886 getXPath(),
887 currentPosition(),
888 currentPosition()+1 );
889 consume();
890 }
891 }
892
893 return token;
894 }
895
896 private char LA(int i)
897 {
898 if ( currentPosition + ( i - 1 ) >= endPosition() )
899 {
900 return (char) -1;
901 }
902
903 return getXPath().charAt( currentPosition() + (i - 1) );
904 }
905
906 private void consume()
907 {
908 ++this.currentPosition;
909 }
910
911 private int currentPosition()
912 {
913 return this.currentPosition;
914 }
915
916 private int endPosition()
917 {
918 return this.endPosition;
919 }
920
921 private void setPreviousToken(Token previousToken)
922 {
923 this.previousToken = previousToken;
924 }
925
926 private boolean hasMoreChars()
927 {
928 return currentPosition() < endPosition();
929 }
930
931 private boolean isIdentifierChar(char c)
932 {
933 return Verifier.isXMLNCNameCharacter( c );
934 }
935
936 private boolean isIdentifierStartChar(char c)
937 {
938 return Verifier.isXMLNCNameStartCharacter( c );
939 }
940
941 }