/*
 *  Type-ARQuE - the experimental SPARQL to SQL translator.
 *  Copyright (C) 2010  Sami Kiminki / Aalto University
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#pragma once

#include <list>
#include <istream>
#include <stdint.h>

#include "AQLException.h"
#include "SPARQLToAQL.h"

namespace TypeRQInternal {


   enum TokenCode {
      TOKEN_END,

      TOKEN_IRI_REF,
      TOKEN_BLANK_NODE_LABEL,
      TOKEN_VAR,
      TOKEN_LANGTAG,
      TOKEN_NUMBER,
      TOKEN_NUMBER_POSITIVE,
      TOKEN_NUMBER_NEGATIVE,
      TOKEN_STRING_LITERAL,
      TOKEN_NIL,
      TOKEN_ANON,
      TOKEN_PNAME_NS,
      TOKEN_PNAME_LN,

      TOKEN_ASTERISK,
      TOKEN_LPAREN,   // (
      TOKEN_RPAREN,   // )
      TOKEN_PERIOD,
      TOKEN_COMMA,
      TOKEN_SEMICOLON,
      TOKEN_LBRACKET, // [
      TOKEN_RBRACKET, // ]
      TOKEN_LBRACE,   // {
      TOKEN_RBRACE,   // }
      TOKEN_AND,      // &&
      TOKEN_OR,       // ||
      TOKEN_REL_OP,   // <, <=, ==, !=, >=, >, specified in lastTokenInt (TOKEN_REL_OPCODE_ defines)
      TOKEN_PLUS,
      TOKEN_MINUS,
      TOKEN_SLASH,
      TOKEN_EXCLAMATION,
      TOKEN_DOUBLE_CARET, // ^^

      TOKEN_KEYWORD_BASE,
      TOKEN_KEYWORD_PREFIX,
      TOKEN_KEYWORD_SELECT,
      TOKEN_KEYWORD_CONSTRUCT,
      TOKEN_KEYWORD_DESCRIBE,
      TOKEN_KEYWORD_ASK,

      TOKEN_KEYWORD_ORDER,
      TOKEN_KEYWORD_BY,
      TOKEN_KEYWORD_LIMIT,
      TOKEN_KEYWORD_OFFSET,
      TOKEN_KEYWORD_DISTINCT,
      TOKEN_KEYWORD_REDUCED,

      TOKEN_KEYWORD_FROM,
      TOKEN_KEYWORD_NAMED,
      TOKEN_KEYWORD_WHERE,

      TOKEN_KEYWORD_GRAPH,
      TOKEN_KEYWORD_OPTIONAL,
      TOKEN_KEYWORD_UNION,
      TOKEN_KEYWORD_FILTER,
      TOKEN_KEYWORD_a,

      TOKEN_KEYWORD_STR,
      TOKEN_KEYWORD_LANG,
      TOKEN_KEYWORD_LANGMATCHES,
      TOKEN_KEYWORD_DATATYPE,
      TOKEN_KEYWORD_BOUND,
      TOKEN_KEYWORD_sameTERM,

      TOKEN_KEYWORD_isURI,
      TOKEN_KEYWORD_isIRI,
      TOKEN_KEYWORD_isBLANK,
      TOKEN_KEYWORD_isLITERAL,
      TOKEN_KEYWORD_REGEX,
      TOKEN_KEYWORD_true,
      TOKEN_KEYWORD_false,

      TOKEN_ASC,
      TOKEN_DESC,

      TOKEN_EXPR,
   };

   enum TokenRelOpCode {
      TOKEN_REL_OPCODE_LT=1,
      TOKEN_REL_OPCODE_LE,
      TOKEN_REL_OPCODE_EQ,
      TOKEN_REL_OPCODE_NE,
      TOKEN_REL_OPCODE_GE,
      TOKEN_REL_OPCODE_GT,

   };

   struct Token
   {
      TokenCode tokenCode;

      std::string lastTokenRaw;

      // the following are token values - only applicable to specific tokens
      std::string lastTokenString; // this is ID, string literal value, IRI, variable name, ...

      int64_t lastTokenInt; // this is the int value of last token - if number
      double lastTokenDouble; // this is the double value of last token - if number
      enum {
         NONE, INTEGER, DECIMAL, DOUBLE,
      } lastTokenNumberType;

      // if PNAME
      std::string lastTokenNS;
      std::string lastTokenLN;

      // parser created expression object
      Expression exp;

      SourceReference sourceReference; // source pos of the token

      Token(const SourceReference &sref);
      Token(const Expression &_exp);
      Token(const Token &);

   private:
      Token &operator =(Token &); // we don't need it now
   };

   class SPARQLTokenizerException : public TypeRQ::AQLException
   {
   public:
      SPARQLTokenizerException(int line, int col, const std::string &message) :
         AQLException("Line %d column %d: %s", line, col, message.c_str()) {}
   };

   class SPARQLTokenizer
   {
   private:
      std::istream &is;
      int line;
      int col;
      char lastchar; // used to detect "\r\n" and "\n\r" patterns when updating line
      std::list<int> lookaheadBuffer;
      bool pushBackToRaw;
      Token lastToken;

   public:
      SPARQLTokenizer(std::istream &_is);
      int getCurrentCol();
      int getCurrentLine();
      void nextToken(TokenCode tok);
      bool eatIfNext(TokenCode tok) ;
      const Token &getLastToken() const;

      static const char *getTokenName(TokenCode tok);

   private:
      const int EOF_CHAR;

      void throwParseException(const char *fmt, ...) __attribute__ ((noreturn));
      void unsupported(const char *operation) __attribute__ ((noreturn));
      void ensureLookaheadBufferSize(int i);
      int peek(int lookahead);
      int get();
      bool isWhiteSpaceChar(int c);
      void eatWhiteSpaces();
      bool decodeNumber();
      int decodeECHAR();
      std::string codepointToUTF8(int cp);
      void decodeStringLiteralSmall(char terminator);
      void decodeStringLiteralLong(char terminator);
      bool decodeStringLiteral();
      bool eatIfKeyword(const char *lkw, const char *ukw);
      bool eatIfMatch(const char *k);
      bool isInRange(int c, int lo, int hi) ;
      bool isPN_CHARS_BASE(int c);
      bool isPN_CHARS_U(int c);
      bool isPN_CHARS(int c);
      int peekPNAME_NS();
      int peekPNAME_LOCAL(int ofs);
      bool eatIfNextInternal(TokenCode tok);
   };


}
