/*
 *  Type-ARQuE - the experimental SPARQL to SQL translator.
 *  Copyright (C) 2010  Sami Kiminki / Aalto University
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <cassert>
#include <cerrno>
#include <cstdlib>
#include <list>
#include <limits>
#include <iostream>
#include <stdarg.h>
#include <stdio.h>

#include "AQLException.h"
#include "AQLModel.h"
#include "AQLListParser.h"
#include "FormatUtils.h"
#include "AQLSupport.h"


#define DEBUG_AQL_PARSER (1)

#ifdef DEBUG_AQL_PARSER
#define DEBUG(a) a
#else
#define DEBUG(a)
#endif

namespace {

   using namespace TypeRQ;

   std::string possiblyEscape(char c)
   {
      if (unsigned(c)<0x20 || unsigned(c)>=0x7F)
      {
         std::string s="\\x";
         s+=intToHexString(unsigned(c), 2);
         return s;
      }
      else {
         std::string s;
         s+=c;
         return s;
      }
   }

   bool isWhiteSpace(int i)
   {
      switch (i)
      {
         case '\n':
         case '\r':
         case ' ':
         case '\t':
            return true;
         default:
            return false;
      }
   }

   bool isKeywordCharacter(int c)
   {
      if (isWhiteSpace(c)) return false;
      switch (c)
      {
         case '(':
         case ')':
            return false;
         default:
            return true;
      }
   }

   bool isStringCharacter(int c)
   {
      return c>=0x20 && c!='"';
   }

   class AQLParserException : public AQLException {
   public:
      AQLParserException(int line, int col, const char *message) : AQLException("Line %d Column %d: %s", line, col, message)  {}
      AQLParserException(int line, int col, const std::string &message) : AQLException("Line %d Column %d: %s", line, col, message.c_str())  {}
   };


   class AQLListParserImpl
   {
   protected:
      std::istream &is;
      int line;
      int col;
      char lastchar; // used to detect "\r\n" and "\n\r" patterns when updating line

      const int EOF_CHAR;  // = std::istream::traits_type::eof()



   public:
      AQLListParserImpl(std::istream &_is) : is(_is), line(1), col(1), lastchar(), EOF_CHAR(std::istream::traits_type::eof())
      {
      }

   protected:
      int peek()
      {
         return is.peek();
      }

      char get()
      {
         int c = is.get();
         expectNotEof();

         if ((c == '\r' && lastchar != '\n') || (c == '\n' && lastchar != '\r'))
         {
            // newline
            ++line;
         }

         if (c == '\r' || c == '\n')
            col = 1;
         else
            ++col;

         lastchar = c;
         return c;
      }

      void throwParseException(const char *fmt, ...) __attribute__ ((noreturn))
      {
         va_list ap;
         va_start(ap, fmt);
         char *message=NULL;
         int ret=vasprintf(&message, fmt, ap);
         va_end(ap);

         std::string _message;

         if (ret>=0) {
            _message=message;
            free(message);
         } else {
            _message="Format failure!";
         }

         throw AQLParserException(line, col, _message.c_str());
      }

      void expectNotEof()
      {
         if (!is) throw AQLParserException(line, col, "Unexpected end of file");
      }

      void expectEof()
      {
         int c = peek();
         if (c != std::istream::traits_type::eof())
         {
            std::string message = "Expected EOF but got '";
            message += possiblyEscape(c);
            message.push_back('\'');
            throw AQLParserException(line, col, message.c_str());
         }
      }


      void skipWhiteSpaces()
      {
         do {
            int c = is.peek();
            if (c == std::istream::traits_type::eof()) return;
            if (!isWhiteSpace(c)) return;
            get();
         } while (true);

      }

      std::string readString()
      {
         std::string ret;
         readExpectedCharacter('"');
         while (isStringCharacter(peek()))
         {
            char c=get();

            if (c=='\\')
            {
               c=get();
               switch (c)
               {
                  case 'n':   c='\n'; break;
                  case 'r':   c='\r'; break;
                  case 'x':   throw AQLParserException(line, col, "\\x escape not implemented");
                  case 'u':   throw AQLParserException(line, col, "\\u escape not implemented");
                  case 'U':   throw AQLParserException(line, col, "\\X escape not implemented");
                  case '\\':  break;
                  case '"':   break;
                  default:    throwParseException("Bad escape \\%c", c);
               }
            }
            ret.push_back(c);
         }
         readExpectedCharacter('"');
         return ret;
      }

      std::string readKeyword()
      {
         std::string ret;
         while (isKeywordCharacter(peek()))
         {
            char c=get();
            ret.push_back(c);
         }
         return ret;
      }

      bool readBoolean()
      {
         const std::string lit=readKeyword();
         if (lit=="false") return false;
         if (lit=="true") return true;
         throw AQLException("Expected `true' or `false', but got %s", lit.c_str());
      }

      int readInt()
      {
         char *end = 0;
         errno=0;
         std::string s=readKeyword();
         const char *start=s.c_str();
         long l=strtol(start, &end, 10);
         if (start+s.size()!=end) throwParseException("Bad integer value %s", start);

         if (errno!=0 || l>std::numeric_limits<int>::max() || l<std::numeric_limits<int>::min())
         {
            throwParseException("Number out of range: %s. Must be within [%d, %d]",
                                start, std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
         }

         return static_cast<int>(l);
      }

      double readDouble()
      {
         char *end = 0;
         errno=0;
         std::string s=readKeyword();
         const char *start=s.c_str();
         double ret=strtod(start, &end);
         if (start+s.size()!=end) throwParseException("Bad double value %s", start);

         if (errno!=0)
         {
            throwParseException("Bad number format or number out of range: %s",
                                start);
         }

         return ret;
      }

      void readExpectedCharacter(char c)
      {
         int d=get();
         if (c!=d) {
            std::string message="Unexpected character '";
            message+=possiblyEscape(d);
            message+="'. Expected '";
            message+=possiblyEscape(c);
            message+="'";

            throw AQLParserException(line, col, message.c_str());
         }
      }

      void readExpectedKeyword(const char *exp)
      {
         std::string k=readKeyword();
         if (k!=exp)
         {
            std::string message="Expected keyword '";
            message+=+exp;
            message+=+'\'';
            throw AQLParserException(line, col, message);
         }
      }

      AQLTypeSet::ExprType readExprTypeKeyword() {
         std::string type=readKeyword();
         if (type=="IRI") return AQLTypeSet::IRI;
         if (type=="string") return AQLTypeSet::STRING;
         if (type=="integer") return AQLTypeSet::INTEGER;
         if (type=="double") return AQLTypeSet::DOUBLE;
         if (type=="boolean") return AQLTypeSet::BOOLEAN;
         if (type=="datetime") return AQLTypeSet::DATETIME;
         if (type=="reference") return AQLTypeSet::REFERENCE;
         if (type=="any") return AQLTypeSet::ANY;

         throwParseException("Expected expression type keyword but got \"%s\"", type.c_str());
      }

      AQLTypeSet readExprTypesetKeyword()
      {
         if (peek()=='(')
         {
            get(); // read '('
            AQLTypeSet ret(AQLTypeSet::UNSET);

            skipWhiteSpaces();

            // read full type set
            while (peek()!=')')
            {
               AQLTypeSet::ExprType type=readExprTypeKeyword();
               ret.setType(type);
               skipWhiteSpaces();
            }
            get(); // read ')'
            return ret;
         }
         else {
            // read single type
            return AQLTypeSet(readExprTypeKeyword());
         }
      }

      AQLExpr *parseExpr()
      {
         AQLExpr *expr=0;

         try {
            skipWhiteSpaces();
            readExpectedCharacter('(');
            skipWhiteSpaces();
            std::string keyword=readKeyword();
            if (keyword=="literal")
            {
               // literal type-keyword "propertyname"
               skipWhiteSpaces();
               AQLTypeSet::ExprType literalType=readExprTypeKeyword();
               skipWhiteSpaces();

               switch (literalType)
               {
                  case AQLTypeSet::IRI:
                  case AQLTypeSet::STRING:
                  {
                     std::string literal=readString();
                     expr=new AQLLiteralExpr(literal, literalType);
                     break;
                  }

                  case AQLTypeSet::INTEGER: {
                     int64_t literal=readInt(); // TODO: readInt64
                     expr=new AQLLiteralExpr(literal);
                     break;
                  }

                  case AQLTypeSet::DOUBLE: {
                     double literal=readDouble();
                     expr=new AQLLiteralExpr(literal);
                     break;
                  }

                  case AQLTypeSet::BOOLEAN: {
                     bool literal=readBoolean();
                     expr=new AQLLiteralExpr(literal);
                     break;
                  }

                  default:
                     throwParseException("AQLListParser::parseExpr(): unhandled literal type %d", (int)literalType);
               }
            }
            else if (keyword=="property")
            {
               // property type-keyword "joinname" "propertyname"
               skipWhiteSpaces();
               AQLTypeSet propertyType=readExprTypesetKeyword();
               skipWhiteSpaces();
               std::string joinName=readString();
               skipWhiteSpaces();
               std::string propertyKeyword=readKeyword();
               AQLPropertyExpr *propertyExpr=new AQLPropertyExpr();
               expr=propertyExpr;
               propertyExpr->joinName=joinName;
               propertyExpr->propertyType=propertyType;
               if (propertyKeyword=="subject") {
                  propertyExpr->property=AQLPropertyExpr::SUBJECT;
               } else if (propertyKeyword=="predicate") {
                  propertyExpr->property=AQLPropertyExpr::PREDICATE;
               } else if (propertyKeyword=="object") {
                  propertyExpr->property=AQLPropertyExpr::OBJECT;
               } else {
                  throwParseException("Expected node part keyword (subject, predicate or object) but got \"%s\"", propertyKeyword.c_str());
               }
            }
            else if (keyword=="function")
            {
               // function "functionname" type-keyword [argument]*
               skipWhiteSpaces();
               AQLFunctionExpr *functionExpr=new AQLFunctionExpr();
               expr=functionExpr;
               functionExpr->functionName=readString();
               skipWhiteSpaces();
               functionExpr->functionType=readExprTypesetKeyword();
               while (true)
               {
                  skipWhiteSpaces();
                  if (peek()=='(')
                  {
                     // argument for function
                     functionExpr->arguments.push_back(parseExpr());
                  } else {
                     break;
                  }
               }
            }
            else if (keyword=="comp-eq")
            {
               AQLComparisonCriterion *comp=new AQLComparisonCriterion;
               expr=comp;
               comp->comparisonType=AQLComparisonCriterion::EQUAL;
               comp->left=parseExpr();
               comp->right=parseExpr();
            }
            else if (keyword=="comp-ne")
            {
               AQLComparisonCriterion *comp=new AQLComparisonCriterion;
               expr=comp;
               comp->comparisonType=AQLComparisonCriterion::NOT_EQUAL;
               comp->left=parseExpr();
               comp->right=parseExpr();
            }
            else if (keyword=="comp-ge")
            {
               AQLComparisonCriterion *comp=new AQLComparisonCriterion;
               expr=comp;
               comp->comparisonType=AQLComparisonCriterion::GREATER_OR_EQUAL;
               comp->left=parseExpr();
               comp->right=parseExpr();
            }
            else if (keyword=="comp-gt")
            {
               AQLComparisonCriterion *comp=new AQLComparisonCriterion;
               expr=comp;
               comp->comparisonType=AQLComparisonCriterion::GREATER;
               comp->left=parseExpr();
               comp->right=parseExpr();
            }
            else if (keyword=="comp-le")
            {
               AQLComparisonCriterion *comp=new AQLComparisonCriterion;
               expr=comp;
               comp->comparisonType=AQLComparisonCriterion::LESS_OR_EQUAL;
               comp->left=parseExpr();
               comp->right=parseExpr();
            }
            else if (keyword=="comp-lt")
            {
               AQLComparisonCriterion *comp=new AQLComparisonCriterion;
               expr=comp;
               comp->comparisonType=AQLComparisonCriterion::LESS;
               comp->left=parseExpr();
               comp->right=parseExpr();
            }
            else if (keyword=="and" || keyword=="or")
            {
               AQLJunctionCriterion *junction=new AQLJunctionCriterion;
               expr=junction;

               if (keyword=="and")
                  junction->junctionType=AQLJunctionCriterion::CONJUNCTION;
               else
                  junction->junctionType=AQLJunctionCriterion::DISJUNCTION;

               skipWhiteSpaces();

               while (peek()=='(')
               {
                  junction->terms.push_back(parseCriterion());
                  skipWhiteSpaces();
               }
            }
            else if (keyword=="not")
            {
               AQLNotExpression *notExpression=new AQLNotExpression;
               expr=notExpression;
               notExpression->expr=parseExpr();
            }
            else if (keyword=="typecast")
            {
               AQLTypecastExpression *typecastExpression=new AQLTypecastExpression;
               expr=typecastExpression;
               skipWhiteSpaces();
               typecastExpression->toType=readExprTypeKeyword();
               skipWhiteSpaces();
               typecastExpression->expr=parseExpr();
            }
            else {
               throwParseException("Expected: expression keyword but got \"%s\"", keyword.c_str());
            }
            skipWhiteSpaces();
            readExpectedCharacter(')');
            return expr;
         }
         catch (...) {
            delete expr;
            throw;
         }
      }

      AQLExpr *parseCriterion()
      {
         return parseExpr();
      }

      // when join is: (join left/right ...)
      // this parses the part "left/right ..." leaving the starting "(join" and closing ")" parsing to the caller
      AQLJoinGroup *parseJoinInnards()
      {
         skipWhiteSpaces();
         std::string joinTypeKeyword=readKeyword();

         AQLJoinGroup::JoinType joinType=AQLJoinGroup::JoinType();
         if (joinTypeKeyword=="left") {
            joinType=AQLJoinGroup::LEFT_OUTER;
         } else if (joinTypeKeyword=="inner") {
            joinType=AQLJoinGroup::INNER;
         } else {
            throwParseException("Bad join type '%s'. Expected 'left' or 'inner'", joinTypeKeyword.c_str());
         }

         skipWhiteSpaces();
         std::list<std::string> joinNames;
         if (peek()=='(')
         {
            get(); // reads peeked '('
            skipWhiteSpaces();

            while (peek()!=')')
            {
               joinNames.push_back(readString());
               skipWhiteSpaces();
            }

            readExpectedCharacter(')'); // matches join name list             
         }
         else {
            joinNames.push_back(readString());
         }
         skipWhiteSpaces();

         if (peek()!=')')
         {
            // join didn't end there, e.g., there's at least criterion and maybe inner joins, too

            AQLExpr *joinCriterion=parseCriterion();
            AQLJoinGroup *join=new AQLJoinGroup;
            join->criterion=joinCriterion;
            join->joinType=joinType;
            join->names=joinNames;

            // check for inner joins
            try {
               skipWhiteSpaces();

               while (peek()=='(')
               {
                  // we have inner join
                  readExpectedCharacter('(');

                  skipWhiteSpaces();
                  readExpectedKeyword("join");

                  AQLJoinGroup *nestedJoin=parseJoinInnards();
                  readExpectedCharacter(')');
                  join->nestedJoins.push_back(nestedJoin);
               }
            }
            catch (...) {
               delete join;
               throw;
            }
            return join;
         }
         else {
            AQLJoinGroup *join=new AQLJoinGroup;
            join->joinType=joinType;
            join->names=joinNames;
            return join;
         }

      }

      AQLQuery *parseAQLQuery()
      {
         AQLQuery *q=new AQLQuery;
         bool namesAdded=false;

         try {
            skipWhiteSpaces();
            readExpectedCharacter('(');
            skipWhiteSpaces();
            readExpectedKeyword("aql-query");
            skipWhiteSpaces();

            while (peek()=='(')
            {
               get(); // reads peeked '('

               skipWhiteSpaces();

               // check whether we have join names
               if (peek()=='"')
               {
                  if (namesAdded) throwParseException("Root join names already added");

                  namesAdded=true;

                  do {
                     const std::string joinName=readString();
                     q->names.push_back(joinName);
                     skipWhiteSpaces();
                  } while (peek()=='"');
               }
               else {
                  // must be keyword

                  std::string keyword=readKeyword();

                  if (keyword=="select")
                  {
                     skipWhiteSpaces();
                     std::string label=readString();
                     AQLExpr *expr=parseExpr();
                     AQLSelect *aqlSelect=new AQLSelect;

                     aqlSelect->label=label;
                     aqlSelect->expr=expr;

                     q->selects.push_back(aqlSelect);
                  }
                  else if (keyword=="join")
                  {
                     AQLJoinGroup *join=parseJoinInnards();
                     q->nestedJoins.push_back(join);
                  }
                  else if (keyword=="criterion")
                  {
                     AQLExpr *criterion=parseCriterion();
                     if (!q->criterion)
                     {
                        q->criterion=criterion;
                     }
                     else if (dynamic_cast<AQLJunctionCriterion *>(q->criterion) &&
                              static_cast<AQLJunctionCriterion *>(q->criterion)->junctionType==AQLJunctionCriterion::CONJUNCTION) {
                        // root is conjunction, so add this criterion as a new term
                        static_cast<AQLJunctionCriterion *>(q->criterion)->terms.push_back(criterion);
                     }
                     else {
                        // root is not a conjunction, create conjunction and add old root
                        // and this criterion as terms
                        AQLJunctionCriterion *newRoot=new AQLJunctionCriterion;
                        newRoot->junctionType=AQLJunctionCriterion::CONJUNCTION;
                        newRoot->terms.push_back(q->criterion);
                        newRoot->terms.push_back(criterion);
                        q->criterion=newRoot;
                     }
                  }
                  else if (keyword=="sort")
                  {
                     AQLSort *sort=new AQLSort;
                     q->sorts.push_back(sort);
                     skipWhiteSpaces();
                     std::string sortDirection=readKeyword();

                     if (sortDirection=="ascending") {
                        sort->ascending=true;
                     } else if (sortDirection=="descending") {
                        sort->ascending=false;
                     } else {
                        throwParseException("Bad sort direction '%s'. Expected 'ascending' or 'descending'", sortDirection.c_str());
                     }

                     skipWhiteSpaces();
                     sort->expr=parseExpr();
                  }
                  else if (keyword=="result-max-rows") {
                     skipWhiteSpaces();
                     int maxRows=readInt();
                     if (maxRows<0) throwParseException("Expected non-negative numeric value");
                     q->maxRows=maxRows;
                  }
                  else if (keyword=="result-row-offset") {
                     skipWhiteSpaces();
                     int offset=readInt();
                     if (offset<0) throwParseException("Expected non-negative numeric value");
                     q->rowOffset=offset;
                  }
                  else {
                     throwParseException("Expected: select, join, criterion, sort, result-max-rows, result-row-offset or ')', but got %s", keyword.c_str());
                  }
               }

               skipWhiteSpaces();
               readExpectedCharacter(')'); // matches keyword or join name list
               skipWhiteSpaces();
            }

            readExpectedCharacter(')'); // matches aql-query
            skipWhiteSpaces();

            if (!namesAdded)
            {
               // join names not specified explicitly, use single join "root" as default
               q->names.push_back("root");
            }

            return q;
         }
         catch (...) {
            delete q;
            throw;
         }
      }


   public:
      AQLQuery *parse()
      {
         AQLQuery *q=0;
         try {
            q=parseAQLQuery();
            skipWhiteSpaces();
            expectEof();
         } catch (...) {
            delete q;
            throw;
         }
         return q;
      }

   };

}


namespace TypeRQ
{


   AQLListParser::AQLListParser()
   {
   }

   AQLListParser::~AQLListParser()
   {
   }

   AQLQuery *AQLListParser::parseQuery(std::istream &is)
   {
      AQLListParserImpl impl(is);
      return impl.parse();
   }

}
