package ca.ucalgary.seahawk.services;

import java.util.regex.*;

/**
 * This class contains methods for extracting parts of regular expressions,
 * mainly for use in MobyComplexBuilder in dealing with nested rules, etc.
 */
public class RegexParser{

    public static boolean isPosixCharacterClass(String className){
	if(className == null){
	    return false;
	}

	return className.equals("Lower") ||
	    className.equals("Upper") ||
	    className.equals("ASCII") ||
	    className.equals("Alpha") ||
	    className.equals("Digit") ||
	    className.equals("Alnum") ||
	    className.equals("Punct") ||
	    className.equals("Graph") ||
	    className.equals("Print") ||
	    className.equals("Blank") ||
	    className.equals("Cntrl") ||
	    className.equals("XDigit") ||
	    className.equals("Space") ||
	    className.equals("javaLowerCase") ||
	    className.equals("javaUpperCase") ||
	    className.equals("javaWhitespace") ||
	    className.equals("javaMirrored") ||
	    className.equals("InGreek") ||
	    className.equals("Lu") ||
	    className.equals("Sc") ||
	    className.equals("InGreek");
    }

    public static int locationToCaptureGroupNumber(String seahawkPattern, int targetPosition) throws Exception{
	int groupCount = groupCount(seahawkPattern);
	// Work backwards so we get the deepest nested cature group containing the position
	for(int i = groupCount; i > 0; i--){
	    int[] groupRange = getCaptureGroupRange(seahawkPattern, i, true);
	    if(groupRange[0] <= targetPosition && groupRange[1] >= targetPosition){
		return i;
	    }
	}

	return 0; // indicates that the position is not part of any capture group
    }
    /**
     * Parses out the capture group corresponding to the given number in the pattern
     * that produced the given matcher. Does not include any quantity modifier on the 
     * capture group, e.g. in (\d)+, the '+' would not be included.
     */
    public static String getCaptureGroupRegex(Pattern pat, int groupNumber) throws Exception{
	int range[] = getCaptureGroupRange(pat, groupNumber, false);
	return pat.pattern().substring(range[0], range[1]+1);
    }

    public static int[] getCaptureGroupRange(Pattern pat, int groupNumber) throws Exception{
	return getCaptureGroupRange(pat, groupNumber, false);
    }

    public static int groupCount(Pattern pat){
	if(pat == null){
	    return 0;
	}
	return groupCount(pat.pattern());
    }

    protected static int groupCount(String pattern){
	if(pattern == null){
	    return 0;
	}

	int groupCount = 0;
	// Don't yet deal with \Q...\E syntax, # in Pattern.COMMENTS mode, or [..(..]
	int i = 0;
	for(; i < pattern.length(); i++){
	    if(pattern.charAt(i) != '('){
		continue;
	    }
	    // Ignore escaped parentheses
	    if(i-1 > 0 && pattern.charAt(i-1) == '\\'){
		continue;
	    }
	    // Ignore uncaptured groups
	    if(i+1 < pattern.length() && pattern.charAt(i+1) == '?'){
		continue;
	    }
	    groupCount++;
	}
	return groupCount;
    }

    /**
     *
     */
    public static int[] getCaptureGroupRange(Pattern pat, int groupNumber, boolean includeQuantifier) throws Exception{
	if(pat == null){
	    return null;
	}
	return getCaptureGroupRange(pat.pattern(), groupNumber, includeQuantifier);	
    }

    static int[] getCaptureGroupRange(String pattern, int groupNumber, boolean includeQuantifier) throws Exception{
	if(groupNumber < 0){
	    throw new IllegalArgumentException("The capture group to parse out (" + groupNumber + 
					       ") was not zero or a positive integer as required");
	}
	if(groupNumber > groupCount(pattern)){
	    throw new IllegalArgumentException("Was asked for capture group "+groupNumber+
					       ", but there are only " + groupCount(pattern) + 
					       " in the match for regex " + pattern);
	}

	// Trivial case, $0 is the whole regex
	if(groupNumber == 0){
	    return new int[]{0, pattern.length()-1};
	}
	int groupCount = 0;
	// Don't yet deal with \Q...\E syntax, # in Pattern.COMMENTS mode, or [..(..]
	int i = 0;
	for(; i < pattern.length(); i++){
	    if(pattern.charAt(i) != '('){
		continue;
	    }
	    // Ignore escaped parentheses
	    if(i-1 > 0 && pattern.charAt(i-1) == '\\'){
		continue;
	    }
	    // Ignore uncaptured groups
	    if(i+1 < pattern.length() && pattern.charAt(i+1) == '?'){
		continue;
	    }
	    if(++groupCount == groupNumber){
		break;
	    }
	}
	int openParenthesisIndex = i;

	// Move along until we find the matching closing parenthesis,
	// counting nested capture groups in case there are any.
	int nestedParentheses = 0;
	for(i++; i < pattern.length(); i++){
	    if(pattern.charAt(i) == '('){
		// Ignore escaped parentheses
		if(i-1 > 0 && pattern.charAt(i-1) == '\\'){
		    continue;
		}
		// *Don't* ignore uncaptured groups, we need to balance their parentheses
		if(i+1 < pattern.length() && pattern.charAt(i+1) == '?'){
		    nestedParentheses++;
		}
		else{
		    nestedParentheses++;
		}
	    }
	    else if(pattern.charAt(i) == ')'){
		// Ignore escaped parentheses
		if(i-1 > 0 && pattern.charAt(i-1) == '\\'){
		    continue;
		}
		// Is match to opener, we're done!
		if(nestedParentheses-- == 0){
		    break;
		}
	    }
	}
	int closeParenthesisIndex = i;

	// Extend for any *, ?, {x,y}, etc. attached to the capture group
	if(includeQuantifier){
	    boolean hasQuantifier = true;
	    char nextChar = pattern.charAt(++i);
	    // Check for greedy quanitifers
	    if(nextChar == '{'){
		for(i++; i < pattern.length(); i++){
		    if(pattern.charAt(i) == '}'){
			closeParenthesisIndex = i;
			break;
		    }
		    // Don't check for digit and comma
		    // because we assume the pattern is valid 
		    // (since it came from a Pattern object through
		    // a Matcher, which is final, so you shouldn't be
		    // able to trick me with some non-standard regex engine).
		}
	    }
	    else if(nextChar == '*'){
		closeParenthesisIndex++;
	    }
	    else if(nextChar == '+'){
		closeParenthesisIndex++;
	    }
	    else if(nextChar == '?'){
		closeParenthesisIndex++;
	    }
	    else{
		hasQuantifier = false;
	    }

	    // See if the reluctant '?' notation exists after the greedy quantifier we parsed 
	    if(hasQuantifier && i < pattern.length()-1 && pattern.charAt(++i) == '?'){
		closeParenthesisIndex++;
	    }
	}

	if(openParenthesisIndex >= pattern.length()){
	    throw new Exception("Parsing error: Could not find the start of the regex capture group #" + groupNumber);
	}
	if(closeParenthesisIndex >= pattern.length()){
	    throw new Exception("Parsing error: Could not find the end of the regex capture group #" + groupNumber);
	}

	if(includeQuantifier){
	    return new int[]{openParenthesisIndex, closeParenthesisIndex};
	}
	else{
	    return new int[]{openParenthesisIndex+1, closeParenthesisIndex-1};
	}
    }
}
