
// matches.cpp
// Copyright 2015 Matthew Rickard
// This file is part of dep

// dep is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.

#include "precomp.h"
#include "matches.h"
#include <map>
#include <string>
#include <iostream>
#include "cross_platform.h"
#include "logMsg.h"
#include "quote.h"
#include <string.h>

// There are three differences between regex_search and regex_match:
// 1: regex_search searches for the RE. regex_match only tests the entire string
// 2: match[0] from regex_match is rather dull, it it the entire input string
//    match[0] from regex_search is more interesting, it is the location of the RE.
// 3: regex_match is faster

// They have some things in common:
// 1: match[1] and onwards are submatches that were specified with parentheses in the RE

// matches() and within() return the position and length of a found string
// Pattern::matches() returns the position and length of a submatch
// Pattern::within() and StartPattern::within() are identical - they
//   return the position and length of the found RE

int Pattern::matches(const std::string &s, int *posPtr, int *lenPtr) const {
  std::smatch match;
  bool result = regex_match(s, match, re);
  if (result && posPtr && match.size() > 1) {
    //logMsg << "this->s=" << quote(this->s) << ' ' << logValue(s) << ' ' << logValue(result) << ' '
      //<< logValue(match.size()) << std::endl;
    //logMsg << logValue(*match[0].first) << ' ' << logValue(*match[0].second) << std::endl;
    *posPtr = match[1].first - s.begin();
    if (lenPtr)
      *lenPtr= match[1].second - match[1].first;
  }
  return result;
}

int Pattern::within(const std::string &s, int *posPtr, int *lenPtr) const {
  std::smatch match;
  bool result = regex_search(s, match, re);
  if (!result) {
    //logMsg << logValue(s) << " Nup.\n";
    return 0;
  }
  int idx = match.size() > 1? 1: 0;
  if (posPtr)
    *posPtr = match[idx].first - s.begin();
  if (lenPtr)
    *lenPtr= match[idx].second - match[idx].first;
  //logMsg << logValue(s) << " Yep.\n";
  return 1;
}

// This is a mishmash of startsWith() and within(), and is only used by CmdLine

int StartPattern::patternStartsWith(const char *s, const char *start, int startlen, int *lenPtr) const {
  //logMsg << logValue(s) << ' ' << logValue(start) << ' '
    //<< logValue(startlen) << ' ' << logValue(useRegex) << std::endl;
  if (useRegex) {
    if (howFar)
      if (strncmp(s, start, howFar)) {
        //logMsg << "Nup. " << logValue(s) << ' ' << logValue(start) << ' ' << logValue(howFar)
          //<< std::endl;
        return 0;
      }
    int result = within(s + howFar, 0, lenPtr);
    if (lenPtr)
      *lenPtr += howFar;
    return result;
  }
  int result = !strncmp(s, start, startlen);
  if (result && lenPtr)
    *lenPtr = startlen;
  return result;
}

int StartPattern::fastMatches(const std::string &s, int *posPtr, int *lenPtr) const {
  if (useRegex) {
    if (howFar) {
      //logMsg << logValue(s) << std::endl;
      //logMsg << logValue(this->s) << std::endl;
      //logMsg << logValue(howFar) << std::endl;
      if (strncmp(s.c_str(), this->s.c_str(), howFar)) {
        //logMsg << "Returning 0" << std::endl;
        return 0;
      }
      //logMsg << "Checkpoint!!!" << std::endl;
      int result = matches(s.c_str() + howFar, posPtr, lenPtr);
      if (result && posPtr)
        *posPtr += howFar;
      //logMsg << logValue(s.c_str() + howFar) << std::endl;
      //logMsg << logValue(Pattern::s) << std::endl;
      //logMsg << logValue(result) << std::endl;
      return result;
    }
    lineabort();
  }
  lineabort();
}

Pattern::Pattern(const char *pattern, std::regex_constants::syntax_option_type patternType) {
  if (pattern) {
    setup(pattern, patternType);
  }
}

void Pattern::setup(const char *pattern, std::regex_constants::syntax_option_type patternType) {
  s = pattern;
  try {
    // std::regex_constants::optimize is immeasurable in script/gml
    re = std::regex(pattern, patternType | std::regex_constants::optimize);
  }
  catch (std::regex_error &e) {
    logMsg << "Couldn't assign " << logValue(pattern) << " to std::regex\n"
      << logValue(e.what()) << ' ' << logValue(e.code()) << std::endl;
  }
}

StartPattern::StartPattern(const char *pattern,
    std::regex_constants::syntax_option_type patternType, int flags):
    Pattern(0, patternType),
    s(pattern),
    flags(flags)
{
  useRegex = 0;
  const char *p = pattern;
  if (!(flags & STARTSWITH_FIXED))
    for (; *p; p++) {
      if (*p == '-');
      else if (*p == '/');
      else if (*p == '=');
      else if (*p == '>');
      else if (*p == '<');
      else if (*p == ':');
      else if (*p == '!');
      else if (*p == '(');
      else if (*p == ')');
      else if (*p >= 'a' && *p <= 'z');
      else if (*p >= 'A' && *p <= 'Z');
      else if (*p >= '0' && *p <= '9');
      else if (*p == '[') useRegex = 1;
      else if (*p == '.') useRegex = 1;
      else if (*p == '\\') useRegex = 1;
      else if (*p == '$') useRegex = 1;
      else if (*p == '+') { p--; useRegex = 1; }
      else if (*p == '*') { p--; useRegex = 1; }
      else if (*p == '?') { p--; useRegex = 1; }
      else if (*p == '|');
      else if (*p == '/');
      else if (*p == ' ');
      else if (*p == '%');
      else if (*p == ',');
      else if (*p == '^');
      else if (*p == '#');
      else if (*p == '"');
      // These are usually sensed earlier
      //else if (*p == ']') useRegex = 1;
      else {
        logMsg << logValue(quote(pattern)) << ' ' << logValue(quote(p)) << std::endl;
        lineabort();
      }
      if (useRegex) // don't remove this test. It is needed to calculate p and howFar
        break;
    }
  //logMsg << logValue(fixed) << std::endl;
  howFar = p - pattern;
  //logMsg << logValue(s) << ' ' << logValue(p) << ' ' << logValue((void *)p) << std::endl;

  if (flags && STARTSWITH_MATCHES) {
    //s = p;
    //logMsg << logValue(s) << std::endl;
    //logMsg << logValue(pattern) << std::endl;
    Pattern::setup(p, patternType);
  }
  else {
    s = std::string("^") + p;

    static int startPatternCount = 0;
    startPatternCount++;
    //logMsg << logValue(startPatternCount) << std::endl;
    try {
      re = std::regex(s.c_str(), patternType);
    }
    catch (std::regex_error &e) {
      logMsg << "Couldn't assign " << logValue(pattern) << " to std::regex\n"
        << logValue(e.what()) << ' ' << logValue(e.code()) << std::endl;
    }
  }
}

int startsWith(/*int flags,*/ const char *s, const char *start, int startlen, int *lenPtr) {
  int result = !strncmp(s, start, startlen);
  if (result && lenPtr)
    *lenPtr = startlen;
  return result;
}

int endsWith(/*int flags,*/ const char *s, const char *end, int endlen) {
  int slen = strlen(s);
  if (slen < endlen)
    return 0;
  return !strcmp(s + slen - endlen, end);
}

int startsWith(const char *s, const char *start) {
  return startsWith(s, start, strlen(start), 0);
}

int endsWith(const char *s, const char *end) {
  return endsWith(s, end, strlen(end));
}

//----------------------------------------------------------------------
// Lexer stuff
//----------------------------------------------------------------------

Token::Token(const char *regexp, const char *code, int flags):
  pattern(regexp, std::regex_constants::ECMAScript), code(code? code: regexp), flags(flags) {
}

int Lexer::addToken(const char *regexp, const char *code, int flags) {
  if (!regexp || *regexp != '^') {
    logMsg << "warning: all regexps should be starter regexps. " << logValue(regexp) << std::endl;
  }
  exprVector.push_back(Token(regexp, code, flags));
  return exprVector.size() - 1;
};

const char *Lexer::scan(const char *s, ScanResult *result) {
  for (;;) {
    result->len = 0;
    for (unsigned i = 0; i != exprVector.size(); i++) {
      int len2;

      int startsWithResult = exprVector[i].pattern.within(s, 0, &len2);

      if (startsWithResult && len2 > result->len) {
        result->len = len2;
        result->token = i;
      }
    }
    if (!result->len) {
      if (*s) {
        // error
        result->len = 1;
        result->token = -2;
      }
      else {
        // end-of-string
        result->token = -1;
      }
      break;
    }
    if (!(exprVector[result->token].flags & LEXER_DISCARD)) {
      //logMsg << exprVector[result->token].code << " matched " << logValue(s) << std::endl;
      break;
    }
    s += result->len;
  }
  result->str = s;
  return s + result->len;
}

void Lexer::scanAll(const char *input,
    std::vector<ScanResult> &result, std::string *keyString) {
  if (keyString)
    keyString->clear();
  ScanResult scanResult;
  while (*input) {
    input = scan(input, &scanResult);
    //logMsg << std::string(scanResult.str, scanResult.len)
      //<< ' ' << scanResult.len
      //<< ' ' << scanResult.token;
    //if (scanResult.token >= 0)
      //std::cerr << ' ' << exprVector[scanResult.token].code;
    //std::cerr << std::endl;
    result.push_back(scanResult);
    if (scanResult.token < 0) {
      if (keyString) {
        keyString->append(" ERROR ");
        keyString->push_back(*input);
      }
      break;
    }
    if (keyString) {
      if (keyString->size())
        keyString->push_back(' ');
      keyString->append(exprVector[scanResult.token].code);
    }
  }
}

