/*
 *  Type-ARQuE - the experimental SPARQL to SQL translator.
 *  Copyright (C) 2010  Sami Kiminki / Aalto University
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "Messages.h"
#include "SPARQLTokenizer.h"

namespace TypeRQInternal {
   using namespace TypeRQ;

   Token::Token(const SourceReference &sref) :
      tokenCode(TOKEN_END), lastTokenInt(), lastTokenDouble(), lastTokenNumberType(), exp(sref),
      sourceReference(sref)
   {}

   Token::Token(const Expression &_expr) :
      tokenCode(TOKEN_EXPR), lastTokenInt(), lastTokenDouble(), lastTokenNumberType(), exp(_expr),
      sourceReference(_expr)
   {}

   Token::Token(const Token &_tok) :
      tokenCode(_tok.tokenCode), lastTokenRaw(_tok.lastTokenRaw), lastTokenString(_tok.lastTokenString),
      lastTokenInt(_tok.lastTokenInt), lastTokenDouble(_tok.lastTokenDouble),
      lastTokenNumberType(_tok.lastTokenNumberType), lastTokenNS(_tok.lastTokenNS),
      lastTokenLN(_tok.lastTokenLN), exp(_tok.exp), sourceReference(_tok.sourceReference)
   {}


   SPARQLTokenizer::SPARQLTokenizer(std::istream &_is) :
      is(_is), line(1), col(1), lastchar(0), lastToken(SourceReference()), EOF_CHAR(-1)
   {}

   int SPARQLTokenizer::getCurrentCol() {
      return col;
   }

   int SPARQLTokenizer::getCurrentLine() {
      return line;
   }


   void SPARQLTokenizer::nextToken(TokenCode tok)
   {
      if (!eatIfNext(tok)) {
         throwParseException("Expected token '%s'", getTokenName(tok));
      }
   }

   bool SPARQLTokenizer::eatIfNext(TokenCode tok) {
      lastToken.lastTokenRaw.erase();
      lastToken.lastTokenString.erase();
      lastToken.lastTokenInt=0;
      lastToken.lastTokenDouble=0;
      lastToken.lastTokenNumberType=Token::NONE;
      lastToken.lastTokenNS.erase();
      lastToken.lastTokenLN.erase();
      lastToken.sourceReference.setSourceReference(SourceReference(line, col));

      pushBackToRaw=true;

      bool ret=eatIfNextInternal(tok);
      if (ret) {
         lastToken.tokenCode=tok;
         print(OL_DEBUG, "Ate token %s, raw=\"%s\"\n", getTokenName(tok), lastToken.lastTokenRaw.c_str());
         pushBackToRaw=false;
         eatWhiteSpaces();
      }
      return ret;
   }

   const Token &SPARQLTokenizer::getLastToken() const
   {
      return lastToken;
   }

   const char *SPARQLTokenizer::getTokenName(TokenCode tok)
   {
      switch (tok) {
         case TOKEN_END: return "End of file";
         case TOKEN_IRI_REF: return "IRI_REF";
         case TOKEN_BLANK_NODE_LABEL: return "BLANK_NODE_LABEL";
         case TOKEN_VAR: return "VAR";
         case TOKEN_LANGTAG: return "LANGTAG";
         case TOKEN_NUMBER: return "NUMBER";
         case TOKEN_NUMBER_POSITIVE: return "NUMBER_POSITIVE";
         case TOKEN_NUMBER_NEGATIVE: return "NUMBER_NEGATIVE";
         case TOKEN_STRING_LITERAL: return "STRING_LITERAL";
         case TOKEN_NIL: return "NIL";
         case TOKEN_ANON: return "ANON";
         case TOKEN_PNAME_NS: return "PNAME_NS";
         case TOKEN_PNAME_LN: return "PNAME_LN";

         case TOKEN_ASTERISK: return "*";
         case TOKEN_LPAREN: return "(";
         case TOKEN_RPAREN: return ")";
         case TOKEN_PERIOD: return ".";
         case TOKEN_COMMA: return ",";
         case TOKEN_SEMICOLON: return ";";
         case TOKEN_LBRACKET: return "[";
         case TOKEN_RBRACKET: return "]";
         case TOKEN_LBRACE: return "{";
         case TOKEN_RBRACE: return "}";
         case TOKEN_AND: return "&&";
         case TOKEN_OR: return "||";
         case TOKEN_REL_OP: return "RELATIONAL OP";
         case TOKEN_PLUS: return "+";
         case TOKEN_MINUS: return "-";
         case TOKEN_SLASH: return "/";
         case TOKEN_EXCLAMATION: return "!";
         case TOKEN_DOUBLE_CARET: return "^^";

         case TOKEN_KEYWORD_BASE: return "BASE";
         case TOKEN_KEYWORD_PREFIX: return "PREFIX";
         case TOKEN_KEYWORD_SELECT: return "SELECT";
         case TOKEN_KEYWORD_CONSTRUCT: return "CONSTRUCT";
         case TOKEN_KEYWORD_DESCRIBE: return "DESCRIBE";
         case TOKEN_KEYWORD_ASK: return "ASK";

         case TOKEN_KEYWORD_ORDER: return "ORDER";
         case TOKEN_KEYWORD_BY: return "BY";
         case TOKEN_KEYWORD_LIMIT: return "LIMIT";
         case TOKEN_KEYWORD_OFFSET: return "OFFSET";
         case TOKEN_KEYWORD_DISTINCT: return "DISTINCT";
         case TOKEN_KEYWORD_REDUCED: return "REDUCED";

         case TOKEN_KEYWORD_FROM: return "FROM";
         case TOKEN_KEYWORD_NAMED: return "NAMED";
         case TOKEN_KEYWORD_WHERE: return "WHERE";

         case TOKEN_KEYWORD_GRAPH: return "GRAPH";
         case TOKEN_KEYWORD_OPTIONAL: return "OPTIONAL";
         case TOKEN_KEYWORD_UNION: return "UNION";
         case TOKEN_KEYWORD_FILTER: return "FILTER";
         case TOKEN_KEYWORD_a: return "a";

         case TOKEN_KEYWORD_STR: return "STR";
         case TOKEN_KEYWORD_LANG: return "LANG";
         case TOKEN_KEYWORD_LANGMATCHES: return "LANGMATCHES";
         case TOKEN_KEYWORD_DATATYPE: return "DATATYPE";
         case TOKEN_KEYWORD_BOUND: return "BOUND";
         case TOKEN_KEYWORD_sameTERM: return "sameTERM";

         case TOKEN_KEYWORD_isURI: return "isURI";
         case TOKEN_KEYWORD_isIRI: return "isIRI";
         case TOKEN_KEYWORD_isBLANK: return "isBLANK";
         case TOKEN_KEYWORD_isLITERAL: return "isLITERAL";
         case TOKEN_KEYWORD_REGEX: return "REGEX";
         case TOKEN_KEYWORD_true: return "true";
         case TOKEN_KEYWORD_false: return "false";

         case TOKEN_ASC: return "ASC";
         case TOKEN_DESC: return "DESC";

         case TOKEN_EXPR: return "EXPR";

         default: return "UNKNOWN";
      }
   }

   void SPARQLTokenizer::throwParseException(const char *fmt, ...)
   {
      va_list ap;
      va_start(ap, fmt);
      char *message=NULL;
      int ret=vasprintf(&message, fmt, ap);
      va_end(ap);

      std::string _message;

      if (ret>=0) {
         _message=message;
         free(message);
      } else {
         _message="SPARQLTokenizer::throwParseException: vasprintf format failure";
      }

      throw SPARQLTokenizerException(getCurrentLine(), getCurrentCol(), _message.c_str());
   }

   void SPARQLTokenizer::unsupported(const char *operation)
   {
      std::string message="Unsupported operation: ";
      message+=operation;
      throw SPARQLTokenizerException(getCurrentLine(), getCurrentCol(), message.c_str());
   }

   void SPARQLTokenizer::ensureLookaheadBufferSize(int i) {
      while (static_cast<int>(lookaheadBuffer.size())<i) {
         if (is)
         {
            lookaheadBuffer.push_back(is.get());
         }
         else {
            lookaheadBuffer.push_back(EOF_CHAR);
         }
      }
   }

   int SPARQLTokenizer::peek(int lookahead) {
      ensureLookaheadBufferSize(lookahead+1);

      std::list<int>::iterator i=lookaheadBuffer.begin();
      while (lookahead>0) {
         --lookahead;
         ++i;
      }

      return *i;
   }

   int SPARQLTokenizer::get() {
      ensureLookaheadBufferSize(1);
      int c=lookaheadBuffer.front();
      lookaheadBuffer.pop_front();
      if (c!=EOF_CHAR) {
         if (c=='\r') {
            if (lastchar!='\n') {
               col=1;
               ++line;
            }
         }
         else if (c=='\n') {
            if (lastchar!='\r') {
               col=1;
               ++line;
            }
         } else {
            ++col;
         }
         lastchar=c;
         if (pushBackToRaw) lastToken.lastTokenRaw+=c;
         return c;
      } else {
         throwParseException("Unexpected end of file");
      }
   }

   bool SPARQLTokenizer::isWhiteSpaceChar(int c) {
      switch (c) {
         case 0x09:
         case 0x0d:
         case 0x0a:
         case 0x20:
            return true;

         default:
            return false;
      }
   }

   void SPARQLTokenizer::eatWhiteSpaces()
   {
      int eatenSpaces=0;
      while (true) {
         // white space characters: #x20 | #x9 | #xD | #xA
         if (isWhiteSpaceChar(peek(0))) {
            get();
            ++eatenSpaces;
         }
         else {
            return;
         }
      }
   }

   // [77]  	INTEGER	  ::=  	[0-9]+
   // [78]  	DECIMAL	  ::=  	[0-9]+ '.' [0-9]* | '.' [0-9]+
   // [79]  	DOUBLE	  ::=  	[0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT
   // [86]  	EXPONENT  ::=  	[eE] [+-]? [0-9]+
   bool SPARQLTokenizer::decodeNumber() {
      char c=peek(0);
      if (!(isInRange(c, '0', '9') || c=='.') ) return false;

      bool gotIntegerChars=false;
      bool gotDecimalPoint=false;
      bool gotDecimalChars=false;
      bool gotExponent=false;

      // read integer part
      while (true)
      {
         if (isInRange(c, '0', '9')) {
            gotIntegerChars=true;
            lastToken.lastTokenString+=get();
            c=peek(0);
         }
         else {
            break;
         }
      }

      if (c=='.')
      {
         get();
         c=peek(0);
         gotDecimalPoint=true;
         while (true)
         {
            if (isInRange(c, '0', '9')) {
               if (!gotDecimalChars) lastToken.lastTokenString+='.';
               gotDecimalChars=true;
               lastToken.lastTokenString+=get();
               c=peek(0);
            }
            else {
               break;
            }
         }
      }

      if ((!gotIntegerChars) && (!gotDecimalChars))
      {
         throwParseException("Malformed number: lone decimal point");
      }

      if (c=='e' || c=='E')
      {
         bool gotExponentChars=false;
         gotExponent=true;
         lastToken.lastTokenString+='e';
         get();
         c=peek(0);

         if (c=='+')
         {
            get();
            c=peek(0);
         }
         else if (c=='-') {
            lastToken.lastTokenString+=get();
            c=peek(0);
         }

         while (true)
         {
            if (isInRange(c, '0', '9'))
            {
               gotExponentChars=true;
               lastToken.lastTokenString+=get();
               c=peek(0);
            }
            else {
               break;
            }
         }
         if (!gotExponentChars) {
            throwParseException("Malformed number: one or more digits required after exponent");
         }
      }

      if ((!gotDecimalPoint) && (!gotExponent))
      {
         // integer
         lastToken.lastTokenNumberType=Token::INTEGER;
         lastToken.lastTokenInt=atoll(lastToken.lastTokenString.c_str());
      }
      else if (!gotExponent) {
         // decimal
         lastToken.lastTokenNumberType=Token::DECIMAL;
         lastToken.lastTokenDouble=atof(lastToken.lastTokenString.c_str());
      }
      else {
         // double
         lastToken.lastTokenNumberType=Token::DOUBLE;
         lastToken.lastTokenDouble=atof(lastToken.lastTokenString.c_str());
      }

      return true;
   }

   // decode ECHAR
   int SPARQLTokenizer::decodeECHAR()
   {
      int c=peek(0);
      int codepoint;
      if (c==EOF_CHAR) throwParseException("Unexpected end of file");

      switch (c) {
         case 't':  codepoint='\t'; break;
         case 'b':  codepoint='\b'; break;
         case 'n':  codepoint='\n'; break;
         case 'r':  codepoint='\r'; break;
         case 'f':  codepoint='\f'; break;
         case '\\': codepoint='\\'; break;
         case '"':  codepoint='"';  break;
         case '\'': codepoint='\''; break;
         default:
            throwParseException("Bad escape");
      }
      get();
      return codepoint;
   }

   std::string SPARQLTokenizer::codepointToUTF8(int cp)
   {
      if (cp<=0) throwParseException("Invalid code point");
      if (cp>=0x80) throwParseException("Unimplemented: code point >= 0x80");
      std::string ret;
      ret.push_back(cp);
      return ret;
   }

   // decode string literal, small version. First ' or " is already eaten
   // [87]  	STRING_LITERAL1	  ::=  	"'" ( ([^#x27#x5C#xA#xD]) | ECHAR )* "'"
   // [88]  	STRING_LITERAL2	  ::=  	'"' ( ([^#x22#x5C#xA#xD]) | ECHAR )* '"'
   void SPARQLTokenizer::decodeStringLiteralSmall(char terminator)
   {
      int c;
      std::string str;

      while ( (c=peek(0)) != terminator)
      {
         if (c==EOF_CHAR) throwParseException("Unexpected end of file");
         switch (c) {
            case 0x0A:
            case 0x0D:
               throwParseException("Illegal string character");

            case 0x5C:
               get();
               str+=codepointToUTF8(decodeECHAR());
               break;

            default:
               str+=c;
               get();
         }
      }
      get();
      lastToken.lastTokenString=str;
   }

   // decode string literal, long version. First ''' or """ is already eaten
   // [89]  	STRING_LITERAL_LONG1	  ::=  	"'''" ( ( "'" | "''" )? ( [^'\] | ECHAR ) )* "'''"
   // [90]  	STRING_LITERAL_LONG2	  ::=  	'"""' ( ( '"' | '""' )? ( [^"\] | ECHAR ) )* '"""'
   void SPARQLTokenizer::decodeStringLiteralLong(char terminator)
   {
      int c;
      std::string str;

      while ( (c=peek(0)) != terminator || peek(1)!=terminator || peek(2)!=terminator)
      {
         if (c==EOF_CHAR) throwParseException("Unexpected end of file");

         switch (c)
         {
            case 0x5C:
               get();
               str+=codepointToUTF8(decodeECHAR());
               break;

            default:
               str+=c;
               get();
         }
      }
      get();
      get();
      get();
      lastToken.lastTokenString=str;
   }

   bool SPARQLTokenizer::decodeStringLiteral()
   {
      // [87]  	STRING_LITERAL1	  ::=  	"'" ( ([^#x27#x5C#xA#xD]) | ECHAR )* "'"
      // [88]  	STRING_LITERAL2	  ::=  	'"' ( ([^#x22#x5C#xA#xD]) | ECHAR )* '"'
      // [89]  	STRING_LITERAL_LONG1	  ::=  	"'''" ( ( "'" | "''" )? ( [^'\] | ECHAR ) )* "'''"
      // [90]  	STRING_LITERAL_LONG2	  ::=  	'"""' ( ( '"' | '""' )? ( [^"\] | ECHAR ) )* '"""'

      if (!(peek(0)=='"' || peek(0)=='\'')) return false; // not a string

      char terminator=peek(0);
      get();

      // small or long form?

      if (peek(0)==terminator && peek(1)==terminator)
      {
         get();
         get();
         decodeStringLiteralLong(terminator);
      }
      else {
         decodeStringLiteralSmall(terminator);
      }

      return true;
   }


   // match keyword, lkw is the lower-case and ukw is the upper-case version
   bool SPARQLTokenizer::eatIfKeyword(const char *lkw, const char *ukw)
   {
      int kwlen=strlen(lkw);
      ensureLookaheadBufferSize(kwlen+1);

      std::list<int>::iterator lbi=lookaheadBuffer.begin();

      for (int i=0; i<kwlen; ++i, ++lbi)
      {
         if (lkw[i]!=*lbi && ukw[i]!=*lbi) return false;
      }

      int c=*lbi;
      if (c>='A' && c<='Z') return false;
      if (c>='a' && c<='z') return false;
      if (c>='0' && c<='9') return false;

      // ok, keyword match
      for (int i=0; i<kwlen; ++i) get();

      return true;
   }

   bool SPARQLTokenizer::eatIfMatch(const char *k)
   {
      int kwlen=strlen(k);
      ensureLookaheadBufferSize(kwlen+1);

      std::list<int>::iterator lbi=lookaheadBuffer.begin();

      for (int i=0; i<kwlen; ++i, ++lbi)
      {
         if (k[i]!=*lbi) return false;
      }

      for (int i=0; i<kwlen; ++i) get();

      return true;
   }

   bool SPARQLTokenizer::isInRange(int c, int lo, int hi)
   {
      return c>=lo && c<=hi;
   }

   bool SPARQLTokenizer::isPN_CHARS_BASE(int c)
   {
      return isInRange(c, 'A', 'Z')
         || isInRange(c, 'a' ,'z')
         || isInRange(c, 0x00C0, 0x00D6)
         || isInRange(c, 0x00D8, 0x00F6)
         || isInRange(c, 0x00F8, 0x02FF)
         || isInRange(c, 0x0370, 0x037D)
         || isInRange(c, 0x037F, 0x1FFF)
         || isInRange(c, 0x200C, 0x200D)
         || isInRange(c, 0x2070, 0x218F)
         || isInRange(c, 0x2C00, 0x2FEF)
         || isInRange(c, 0x3001, 0xD7FF)
         || isInRange(c, 0xF900, 0xFDCF)
         || isInRange(c, 0xFDF0, 0xFFFD)
         || isInRange(c, 0x10000, 0xEFFFF);
   }

   bool SPARQLTokenizer::isPN_CHARS_U(int c)
   {
      return isPN_CHARS_BASE(c) || c=='_';
   }

   bool SPARQLTokenizer::isPN_CHARS(int c)
   {
      // PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]

      return isPN_CHARS_U(c)
         || c=='-'
         || isInRange(c, '0', '9')
         || c==0x00BF
         || isInRange(c, 0x300, 0x36F)
         || isInRange(c, 0x203F, 0x2040);
   }

   // returns the amount of chars that PNAME_NS would cover
   int SPARQLTokenizer::peekPNAME_NS()
   {
      // PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)? ':'

      int i=0;

      if (isPN_CHARS_BASE(peek(i++)))
      {
         bool lastNotDot=true;
         while (true)
         {
            int c=peek(i++);

            if (isPN_CHARS(c))
            {
               lastNotDot=true;
            }
            else if (c=='.')
            {
               lastNotDot=false;
            }
            else if (c==':' && lastNotDot)
            {
               return i;
            }
            else {
               return 0;
            }
         }
      }
      return 0;
   }

   // returns the amount of chars covered by PNAME_LOCAL
   // starting from peek offset ofs
   int SPARQLTokenizer::peekPNAME_LOCAL(int ofs)
   {
      // ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
      int i=0;
      int c=peek(ofs+(i++));

      if (isPN_CHARS_U(c) || isInRange(c, '0', '9'))
      {
         bool lastNotDot=true;
         while (true)
         {
            c=peek(ofs + (i++));

            if (isPN_CHARS(c))
            {
               lastNotDot=true;
            }
            else if (c=='.')
            {
               lastNotDot=false;
            }
            else if (lastNotDot)
            {
               return i-1;
            }
            else {
               return i-2;
            }
         }
      }

      return 0;
   }

   // i==0: return next token
   // i==1: return token after next token
   bool SPARQLTokenizer::eatIfNextInternal(TokenCode tok)
   {
      switch (tok) {
         case TOKEN_END:
            return peek(0)==EOF_CHAR;

         case TOKEN_IRI_REF:
            if (peek(0)=='<')
            {
               // '<' ([^<>"{}|^`\]-[#x00-#x20])* '>'
               // The interpretation is as follows: the following
               // characters may not reside within '<' .. '>'
               // > < " { } ^ \ | `   \u0000..\u0020
               get();

               while (true)
               {
                  int c=peek(0);

                  if (isInRange(c, 0, 0x20)) break;
                  switch (c)
                  {
                     case '>':
                     case '<':
                     case '"':
                     case '{':
                     case '}':
                     case '^':
                     case '\\':
                     case '|':
                     case '`':
                        goto end_of_good_chars;

                     default: ;
                  }

                  lastToken.lastTokenString.push_back(get());
               }

              end_of_good_chars:
               int c=peek(0);
               if (c!='>') throwParseException("Unexpected character in IRI");

               get();

               return true;
            }
            else {
               return false;
            }

         case TOKEN_BLANK_NODE_LABEL:
            // [73]  	BLANK_NODE_LABEL	  ::=  	'_:' PN_LOCAL
            if (peek(0)=='_' && peek(1)==':')
            {
               get();
               get();
               int j=peekPNAME_LOCAL(0);

               if (j==0) throwParseException("Expected PN_LOCAL");

               for (int k=0; k<j; ++k)
               {
                  lastToken.lastTokenString.push_back(get());
               }

               return true;
            }
            else {
               return false;
            }


         case TOKEN_VAR:
            if (peek(0)=='?' || peek(0)=='$')
            {
               get();

               int firstChar=get();
               if (!(isPN_CHARS_U(firstChar) || isInRange(firstChar, '0', '9')))
                  throwParseException("Bad varname character, code=%d", firstChar);

               lastToken.lastTokenString.push_back(firstChar);

               while (true)
               {
                  int c=peek(0);
                  if (isPN_CHARS_U(c) || isInRange(c, '0', '9')
                      || c==0x00B7 || isInRange(c, 0x0300, 0x036F) || isInRange(c, 0x203F, 0x2040)) {
                     lastToken.lastTokenString.push_back(get());
                  }
                  else {
                     break;
                  }
               }

               return true;
            }
            else {
               return false;
            }

         case TOKEN_LANGTAG:
            if (peek(0)=='@')
            {
               unsupported("LANG tags");
            }
            else {
               return false;
            }

         case TOKEN_NUMBER:
            return decodeNumber();

         case TOKEN_NUMBER_POSITIVE:
            if (peek(0)=='+')
            {
               get();
               if (decodeNumber())
               {
                  return true;
               }
               else {
                  throwParseException("Expected number");
               }
            }
            else {
               return false;
            }

         case TOKEN_NUMBER_NEGATIVE:
            if (peek(0)=='-')
            {
               lastToken.lastTokenString+=get();
               if (decodeNumber())
               {
                  return true;
               }
               else {
                  throwParseException("Expected number");
               }
            }
            else {
               return false;
            }

         case TOKEN_STRING_LITERAL:
            return decodeStringLiteral();

         case TOKEN_NIL:
            // [92]  	NIL	  ::=  	'(' WS* ')'
            if (peek(0)=='(')
            {
               int i=1;
               while (isWhiteSpaceChar(peek(i)))
               {
                  ++i;
               }
               if (peek(i)==')')
               {
                  while (i>=0)
                  {
                     get();
                     --i;
                  }
                  return true;
               }
               else {
                  return false;
               }
            }
            else {
               return false;
            }

         case TOKEN_ANON:
            // [94]  	ANON	  ::=  	'[' WS* ']'
            if (peek(0)=='[')
            {
               get();
               eatWhiteSpaces();
               if (peek(0)!=']') throwParseException("Expected ']'");
               get();
               return true;
            }
            else {
               return false;
            }

         case TOKEN_PNAME_NS:
         {
            int i=peekPNAME_NS();
            if (i)
            {
               while (i>0)
               {
                  char c=get();
                  if (i>1) lastToken.lastTokenNS.push_back(c);
                  --i;
               }
               return true;
            }
            else {
               return false;
            }
         }

         case TOKEN_PNAME_LN:
         {
            // PNAME_NS PN_LOCAL

            int i=peekPNAME_NS();
            if (i==0) return false;

            int j=peekPNAME_LOCAL(i);
            if (j==0) return false;

            for (int k=0; k<i; ++k)
            {
               char c=get();
               if (k<i-1) lastToken.lastTokenNS.push_back(c);
            }
            for (int k=0; k<j; ++k)
            {
               lastToken.lastTokenLN.push_back(get());
            }

            return true;
         }

         case TOKEN_ASTERISK:
            return eatIfMatch("*");

         case TOKEN_LPAREN:
            return eatIfMatch("(");

         case TOKEN_RPAREN:
            return eatIfMatch(")");

         case TOKEN_PERIOD:
            return eatIfMatch(".");

         case TOKEN_COMMA:
            return eatIfMatch(",");

         case TOKEN_SEMICOLON:
            return eatIfMatch(";");

         case TOKEN_LBRACKET:
            return eatIfMatch("[");

         case TOKEN_RBRACKET:
            return eatIfMatch("]");

         case TOKEN_LBRACE:
            return eatIfMatch("{");

         case TOKEN_RBRACE:
            return eatIfMatch("}");

         case TOKEN_AND:
            return eatIfMatch("&&");

         case TOKEN_OR:
            return eatIfMatch("||");

         case TOKEN_REL_OP:
         {
            if (eatIfMatch("<="))
            {
               lastToken.lastTokenInt=TOKEN_REL_OPCODE_LE;
               return true;
            }
            else if (eatIfMatch("<"))
            {
               lastToken.lastTokenInt=TOKEN_REL_OPCODE_LT;
               return true;
            }
            else if (eatIfMatch("="))
            {
               lastToken.lastTokenInt=TOKEN_REL_OPCODE_EQ;
               return true;
            }
            else if (eatIfMatch("!="))
            {
               lastToken.lastTokenInt=TOKEN_REL_OPCODE_NE;
               return true;
            }
            else if (eatIfMatch(">="))
            {
               lastToken.lastTokenInt=TOKEN_REL_OPCODE_GE;
               return true;
            }
            else if (eatIfMatch(">"))
            {
               lastToken.lastTokenInt=TOKEN_REL_OPCODE_GT;
               return true;
            }
            return false;
         }

         case TOKEN_PLUS:
            return eatIfMatch("+");

         case TOKEN_MINUS:
            return eatIfMatch("-");

         case TOKEN_SLASH:
            return eatIfMatch("/");

         case TOKEN_EXCLAMATION:
            return eatIfMatch("!");

         case TOKEN_DOUBLE_CARET:
            return eatIfMatch("^^");

         case TOKEN_KEYWORD_BASE:
            return eatIfKeyword("base", "BASE");

         case TOKEN_KEYWORD_PREFIX:
            return eatIfKeyword("prefix", "PREFIX");

         case TOKEN_KEYWORD_SELECT:
            return eatIfKeyword("select", "SELECT");

         case TOKEN_KEYWORD_CONSTRUCT:
            return eatIfKeyword("construct", "CONSTRUCT");

         case TOKEN_KEYWORD_DESCRIBE:  
            return eatIfKeyword("describe", "DESCRIBE");

         case TOKEN_KEYWORD_ASK:
            return eatIfKeyword("ask", "ASK");

         case TOKEN_KEYWORD_ORDER:
            return eatIfKeyword("order", "ORDER");

         case TOKEN_KEYWORD_BY:
            return eatIfKeyword("by", "BY");

         case TOKEN_KEYWORD_LIMIT:
            return eatIfKeyword("limit", "LIMIT");

         case TOKEN_KEYWORD_OFFSET:
            return eatIfKeyword("offset", "OFFSET");

         case TOKEN_KEYWORD_DISTINCT:
            return eatIfKeyword("distinct", "DISTINCT");

         case TOKEN_KEYWORD_REDUCED:
            return eatIfKeyword("reduced", "REDUCED");

         case TOKEN_KEYWORD_FROM:
            return eatIfKeyword("from", "FROM");

         case TOKEN_KEYWORD_NAMED:
            return eatIfKeyword("named", "NAMED");

         case TOKEN_KEYWORD_WHERE:
            return eatIfKeyword("where", "WHERE");

         case TOKEN_KEYWORD_GRAPH:
            return eatIfKeyword("graph", "GRAPH");

         case TOKEN_KEYWORD_OPTIONAL:
            return eatIfKeyword("optional", "OPTIONAL");

         case TOKEN_KEYWORD_UNION:
            return eatIfKeyword("union", "UNION");

         case TOKEN_KEYWORD_FILTER:
            return eatIfKeyword("filter", "FILTER");

         case TOKEN_KEYWORD_a:
            return eatIfKeyword("a", "a");

         case TOKEN_KEYWORD_STR:
            return eatIfKeyword("str", "STR");

         case TOKEN_KEYWORD_LANG:
            return eatIfKeyword("lang", "LANG");

         case TOKEN_KEYWORD_LANGMATCHES:
            return eatIfKeyword("langmatches", "LANGMATCHES");

         case TOKEN_KEYWORD_DATATYPE:
            return eatIfKeyword("datatype", "DATATYPE");

         case TOKEN_KEYWORD_BOUND:
            return eatIfKeyword("bound", "BOUND");

         case TOKEN_KEYWORD_sameTERM:
            return eatIfKeyword("sameterm", "SAMETERM");

         case TOKEN_KEYWORD_isURI:
            return eatIfKeyword("isuri", "ISURI");

         case TOKEN_KEYWORD_isIRI:
            return eatIfKeyword("isiri", "ISIRI");

         case TOKEN_KEYWORD_isBLANK:
            return eatIfKeyword("isblank", "ISBLANK");

         case TOKEN_KEYWORD_isLITERAL:
            return eatIfKeyword("isliteral", "ISLITERAL");

         case TOKEN_KEYWORD_REGEX:
            return eatIfKeyword("regex", "REGEX");

         case TOKEN_KEYWORD_true:
            return eatIfKeyword("true", "TRUE");

         case TOKEN_KEYWORD_false:
            return eatIfKeyword("false", "FALSE");

         case TOKEN_ASC:
            return eatIfKeyword("asc", "ASC");

         case TOKEN_DESC:
            return eatIfKeyword("desc", "DESC");

         default:
            throwParseException("Internal error: unimplemented eatIfNext(%s)", getTokenName(tok));
      }            
   }

}
