package logic.formulas;

import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;

/**
 * Parser for formulas in propositional logic.
 */
public class Parser {
    /**
     * Custom exception class to report problems that are found during parsing.
     */
    public static class ParseError extends Exception {
        private static final long serialVersionUID = 1L;
        public ParseError(String message) {
            super(message);
        }
    }

    /**
     * Set of connective that are allowed to occur between two subformulas.
     */
    private static final Set<String> BINARY_CONNECTIVES =
        new HashSet<String>(Arrays.asList(new String[] {"/\\", "\\/", "->", "<->"}));

    /**
     * A "lexer" converts a sequence of characters into a sequence of "tokens".
     * In our case, tokens are atom names, parentheses and logical connectives.
     * Since some of these are composed of more than one character, we need to
     * split the input string into a list of those tokens.
     * For example, the string "(~phi /\ psi)" is the sequence of characters
     *   ['(', '~', 'p', 'h', 'i', ' ', '/', '\', ' ', 'p', 's', 'i', ')'] (13 characters)
     * This function turns this into the following sequence of tokens
     *   ["(", "~", "phi", "/\", "psi", ")"] (6 tokens)
     * @param input a raw input string containing a formula
     * @return a list of recognized tokens
     * @throws ParseError if unknown symbols (e.g., "{A /| B]") or
     * unexpected symbols (e.g., "(A /\\\\\ B)") are found.
     */
    private static LinkedList<String> lex(String input) throws ParseError {
        LinkedList<String> result = new LinkedList<String>();
        for(int i = 0; i < input.length(); ) {
            switch(input.charAt(i)) {
            case '(':
            case ')':
            case '~':
                // Single character tokens can just be added to the token list.
                result.add(input.substring(i, i+1));
                i++;
                break;
            case '/':
                // Start of the token "/\", check for the rest of the token.
                if (input.length() < i)
                    throw new ParseError("Unexpected end of string");
                if (input.charAt(i + 1) != '\\')
                    throw new ParseError("Unknown symbol at position " + i);
                result.add(input.substring(i, i+2));
                i += 2;
                break;
            case '\\':
                // Start of the token "\/", check for the rest of the token.
                if (input.length() < i)
                    throw new ParseError("Unexpected end of string");
                if (input.charAt(i + 1) != '/')
                    throw new ParseError("Unknown symbol at position " + i);
                result.add(input.substring(i, i+2));
                i += 2;
                break;
            case '-':
                // Start of the token "->", check for the rest of the token.
                if (input.length() < i)
                    throw new ParseError("Unexpected end of string");
                if (input.charAt(i + 1) != '>')
                    throw new ParseError("Unknown symbol at position " + i);
                result.add(input.substring(i, i+2));
                i += 2;
                break;
            case '<':
                // Start of the token "<->", check for the rest of the token.
                if (input.length() < i + 1)
                    throw new ParseError("Unexpected end of string");
                if (input.charAt(i + 1) != '-' || input.charAt(i + 2) != '>')
                    throw new ParseError("Unknown symbol at position " + i);
                result.add(input.substring(i, i+3));
                i += 3;
                break;
            default:
                if(Character.isWhitespace(input.charAt(i))) {
                    // Skip any whitespace without adding it to the token list.
                    i++;
                } else {
                    // No logical connective, parentheses, or whitespace, so it must be an atom name.
                    if (!Character.isLetterOrDigit(input.charAt(i)))
                        throw new ParseError("Unknown symbol at position " + i);
                    // The atom name continues until the next non-alpha-numeric character.
                    int j = i;
                    while (j + 1 < input.length() && Character.isLetterOrDigit(input.charAt(j + 1))) {
                        j++;
                    }
                    result.add(input.substring(i, j+1));
                    i += j + 1 - i;
                }
                break;
            }
        }
        return result;
    }

    /**
     * Parse a formula from a list of tokens.
     * @param tokens list of tokens (see documentation of function lex()).
     * @return the parsed formula.
     * @throws ParseError if the tokens do not form a valid formula.
     *         Some examples for ill-formed formulas:
     *            ["(", "A"]
     *            ["A", "B"]
     *            ["(", ")"]
     *            ["(", "A", "/\", "B", "/\", "C", ")"] (missing parentheses)
     */
    private static Formula parseFormula(LinkedList<String> tokens) throws ParseError {
        if (tokens.isEmpty())
            throw new ParseError("Expected formula but found end of string");
        String token = tokens.pop();
        if (token.equals("(")) {
            // Formulas starting with "(" are a formulas with two subformulas
            // connected by a connective. We parse them in 5 steps:
            // 1. Parse the left-hand side.
            Formula lhs = parseFormula(tokens);
            // 2. Parse the connective.
            if (tokens.isEmpty())
                throw new ParseError("Expected logical connective but found end of string");
            String connective = tokens.pop();
            if (!BINARY_CONNECTIVES.contains(connective)) {
                throw new ParseError("Expected binary logical connective but found '" + connective + "'");
            }
            // 3. Parse the right-hand side.
            Formula rhs = parseFormula(tokens);
            // 4. Check for the closing parenthesis. 
            if (tokens.isEmpty())
                throw new ParseError("Expected closing bracket but found end of string");
            token = tokens.pop();
            if (!token.equals(")"))
                throw new ParseError("Expected closing bracket but found '" + token + "'");
            // 5. Create the correct type of formula depending on the connective.
            if (connective.equals("/\\"))
                return new Conjunction(lhs, rhs);
            else if (connective.equals("\\/"))
                return new Disjunction(lhs, rhs);
            else if (connective.equals("->"))
                return new Implication(lhs, rhs);
            else if (connective.equals("<->"))
                return new Biimplication(lhs, rhs);
            else
                throw new ParseError("Unexpected logical connective found '" + connective + "'");
        } else if (token.equals("~")) {
            // Formulas starting with ~ are negations.
            // We parse the subformula and create its negation. 
            Formula negatedFormula = parseFormula(tokens);
            return new Negation(negatedFormula);
        } else {
            // All other formulas are atoms.
            // We check that we got a valid atom name and create the atom.
            if (!Character.isLetterOrDigit(token.charAt(0)))
                throw new ParseError("Expected atom name but found '" + token + "'");
            return new Atom(token);
        }
    }

    /**
     * Parse a formula from a string.
     * @param input a raw input string containing a formula.
     * @return the parsed formula.
     * @throws ParseError if the string contains errors (see documentation of lex() and parseFormula())
     *         or if there are any tokens left after parsing the formula.
     *         Some examples where this is the case:
     *            ["(", "A", "/\", "B", ")", "C"] (additional C at the end)
     *            ["A", "/\", "B"] (no parentheses, "A" is parsed and ["/\", "B"] are left)
     */
    public static Formula parse(String input) throws ParseError {
        LinkedList<String> tokens = lex(input);
        Formula result = parseFormula(tokens);
        if (!tokens.isEmpty()) {
            throw new ParseError("Leftover tokens after parsing formula " +
                                 "(missing parentheses?): '" + tokens + "'");
        }
        return result;
    }
}
