/*
 * Decompiled with CFR 0.152.
 */
package jsat.io;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import jsat.DataSet;
import jsat.DataStore;
import jsat.SimpleDataSet;
import jsat.classifiers.CategoricalData;
import jsat.classifiers.ClassificationDataSet;
import jsat.classifiers.DataPoint;
import jsat.io.DataWriter;
import jsat.linear.DenseVector;
import jsat.linear.Vec;
import jsat.regression.RegressionDataSet;
import jsat.utils.DoubleList;
import jsat.utils.IntList;
import jsat.utils.StringUtils;

public class CSV {
    public static final char DEFAULT_DELIMITER = ',';
    public static final char DEFAULT_COMMENT = '#';

    private CSV() {
    }

    public static RegressionDataSet readR(int numeric_target_column, Path path, int lines_to_skip, Set<Integer> cat_cols) throws IOException {
        return CSV.readR(numeric_target_column, path, ',', lines_to_skip, '#', cat_cols);
    }

    public static RegressionDataSet readR(int numeric_target_column, Reader reader, int lines_to_skip, Set<Integer> cat_cols) throws IOException {
        return CSV.readR(numeric_target_column, reader, ',', lines_to_skip, '#', cat_cols);
    }

    public static RegressionDataSet readR(int numeric_target_column, Path path, char delimiter, int lines_to_skip, char comment, Set<Integer> cat_cols) throws IOException {
        BufferedReader br = Files.newBufferedReader(path, Charset.defaultCharset());
        RegressionDataSet ret = CSV.readR(numeric_target_column, br, delimiter, lines_to_skip, comment, cat_cols);
        br.close();
        return ret;
    }

    public static RegressionDataSet readR(int numeric_target_column, Reader reader, char delimiter, int lines_to_skip, char comment, Set<Integer> cat_cols) throws IOException {
        return (RegressionDataSet)CSV.readCSV(reader, lines_to_skip, delimiter, comment, cat_cols, numeric_target_column, -1);
    }

    public static ClassificationDataSet readC(int classification_target, Path path, int lines_to_skip, Set<Integer> cat_cols) throws IOException {
        return CSV.readC(classification_target, path, ',', lines_to_skip, '#', cat_cols);
    }

    public static ClassificationDataSet readC(int classification_target, Reader reader, int lines_to_skip, Set<Integer> cat_cols) throws IOException {
        return CSV.readC(classification_target, reader, ',', lines_to_skip, '#', cat_cols);
    }

    public static ClassificationDataSet readC(int classification_target, Reader reader, char delimiter, int lines_to_skip, char comment, Set<Integer> cat_cols) throws IOException {
        return (ClassificationDataSet)CSV.readCSV(reader, lines_to_skip, delimiter, comment, cat_cols, -1, classification_target);
    }

    public static ClassificationDataSet readC(int classification_target, Path path, char delimiter, int lines_to_skip, char comment, Set<Integer> cat_cols) throws IOException {
        BufferedReader br = Files.newBufferedReader(path, Charset.defaultCharset());
        ClassificationDataSet ret = CSV.readC(classification_target, br, delimiter, lines_to_skip, comment, cat_cols);
        br.close();
        return ret;
    }

    public static SimpleDataSet read(Path path, int lines_to_skip, Set<Integer> cat_cols) throws IOException {
        return CSV.read(path, ',', lines_to_skip, '#', cat_cols);
    }

    public static SimpleDataSet read(Reader reader, int lines_to_skip, Set<Integer> cat_cols) throws IOException {
        return CSV.read(reader, ',', lines_to_skip, '#', cat_cols);
    }

    public static SimpleDataSet read(Path path, char delimiter, int lines_to_skip, char comment, Set<Integer> cat_cols) throws IOException {
        BufferedReader br = Files.newBufferedReader(path, Charset.defaultCharset());
        SimpleDataSet ret = CSV.read(br, delimiter, lines_to_skip, comment, cat_cols);
        br.close();
        return ret;
    }

    public static SimpleDataSet read(Reader reader, char delimiter, int lines_to_skip, char comment, Set<Integer> cat_cols) throws IOException {
        return (SimpleDataSet)CSV.readCSV(reader, lines_to_skip, delimiter, comment, cat_cols, -1, -1);
    }

    private static DataSet<?> readCSV(Reader reader, int lines_to_skip, char delimiter, char comment, Set<Integer> cat_col, int numeric_target, int cat_target) throws IOException {
        StringBuilder processBuffer = new StringBuilder(20);
        StringBuilder charBuffer = new StringBuilder(1024);
        char[] read_buffer = new char[1024];
        DoubleList regressionTargets = new DoubleList();
        IntList catTargets = new IntList();
        HashMap seenCats = new HashMap();
        for (int col : cat_col) {
            if (col == cat_target) continue;
            seenCats.put(col, new HashMap());
        }
        HashMap<String, Integer> seenCats_target = new HashMap<String, Integer>();
        HashMap<Integer, Integer> cat_indx_to_csv_column = new HashMap<Integer, Integer>();
        STATE state = STATE.INITIAL;
        int position = 0;
        int totalCols = -1;
        DoubleList numericFeats = new DoubleList();
        IntList catFeats = new IntList();
        int cur_column = 0;
        ArrayList<DenseVector> all_vecs = new ArrayList<DenseVector>();
        ArrayList<int[]> all_cats = new ArrayList<int[]>();
        while (true) {
            if (charBuffer.length() - position <= 1) {
                charBuffer.delete(0, position);
                position = 0;
                int read = reader.read(read_buffer);
                if (read >= 0) {
                    charBuffer.append(read_buffer, 0, read);
                    continue;
                }
            }
            if (charBuffer.length() - position == 0) {
                if (state == STATE.NEWLINE || state == STATE.COMMENT) break;
                if (state == STATE.VALUE) {
                    charBuffer.append("\n");
                } else {
                    throw new RuntimeException();
                }
            }
            char ch = charBuffer.charAt(position);
            switch (state) {
                case INITIAL: {
                    if (lines_to_skip > 0) {
                        state = STATE.SKIPPING_ROWS;
                        break;
                    }
                    state = STATE.VALUE;
                    break;
                }
                case COMMENT: 
                case SKIPPING_ROWS: {
                    if (CSV.isNewLine(ch)) {
                        if (state == STATE.SKIPPING_ROWS) {
                            --lines_to_skip;
                        }
                        state = STATE.NEWLINE;
                        break;
                    }
                    ++position;
                    break;
                }
                case VALUE: {
                    if (ch == delimiter || CSV.isNewLine(ch) || ch == comment) {
                        while (processBuffer.length() > 0 && Character.isWhitespace(processBuffer.charAt(processBuffer.length() - 1))) {
                            processBuffer.setLength(processBuffer.length() - 1);
                        }
                        if (cat_col.contains(cur_column) || cur_column == cat_target) {
                            int val;
                            HashMap<String, Integer> map = cur_column == cat_target ? seenCats_target : (Map)seenCats.get(cur_column);
                            String cat_op = processBuffer.toString();
                            processBuffer.setLength(0);
                            if (cat_op.length() == 0) {
                                val = -1;
                            } else {
                                if (!map.containsKey(cat_op)) {
                                    map.put(cat_op, map.size());
                                }
                                val = (Integer)map.get(cat_op);
                            }
                            if (cur_column == cat_target) {
                                if (val == -1) {
                                    throw new RuntimeException("Categorical column can't have missing values!");
                                }
                                catTargets.add(val);
                            } else {
                                catFeats.add(val);
                            }
                            if (cur_column != cat_target) {
                                cat_indx_to_csv_column.put(catFeats.size() - 1, cur_column);
                            }
                        } else {
                            double val = processBuffer.length() == 0 ? Double.NaN : StringUtils.parseDouble(processBuffer, 0, processBuffer.length());
                            processBuffer.setLength(0);
                            if (cur_column == numeric_target) {
                                regressionTargets.add(val);
                            } else {
                                numericFeats.add(val);
                            }
                        }
                        if (ch == delimiter) {
                            state = STATE.DELIMITER;
                            break;
                        }
                        state = ch == comment ? STATE.COMMENT : STATE.NEWLINE;
                        if (totalCols < 0) {
                            totalCols = cur_column + 1;
                        } else if (totalCols != cur_column + 1) {
                            throw new RuntimeException("Inconsistent number of columns in CSV");
                        }
                        all_vecs.add(new DenseVector(numericFeats));
                        int[] cat_vals = new int[catFeats.size()];
                        for (int i = 0; i < cat_vals.length; ++i) {
                            cat_vals[i] = catFeats.getI(i);
                        }
                        all_cats.add(cat_vals);
                        numericFeats.clear();
                        catFeats.clear();
                        break;
                    }
                    if (processBuffer.length() == 0 && Character.isWhitespace(ch)) {
                        ++position;
                        break;
                    }
                    processBuffer.append(ch);
                    ++position;
                    break;
                }
                case DELIMITER: {
                    if (ch == delimiter) {
                        ++position;
                        ++cur_column;
                        state = STATE.VALUE;
                        break;
                    }
                    throw new RuntimeException("BAD CSV");
                }
                case NEWLINE: {
                    cur_column = 0;
                    if (CSV.isNewLine(ch)) {
                        ++position;
                        break;
                    }
                    state = lines_to_skip > 0 ? STATE.SKIPPING_ROWS : STATE.VALUE;
                }
            }
        }
        HashMap cat_true_index = new HashMap();
        HashMap<Integer, CategoricalData> catDataMap = new HashMap<Integer, CategoricalData>();
        if (cat_target >= 0) {
            seenCats.put(cat_target, seenCats_target);
        }
        CategoricalData target_data = null;
        for (Map.Entry main_entry : seenCats.entrySet()) {
            HashMap translator = new HashMap();
            int col = (Integer)main_entry.getKey();
            Map catsSeen = (Map)main_entry.getValue();
            ArrayList sortedOrder = new ArrayList(catsSeen.keySet());
            Collections.sort(sortedOrder);
            CategoricalData cd = new CategoricalData(sortedOrder.size());
            if (col != cat_target) {
                catDataMap.put(col, cd);
            } else {
                target_data = cd;
            }
            for (int i = 0; i < sortedOrder.size(); ++i) {
                translator.put(catsSeen.get(sortedOrder.get(i)), i);
                cd.setOptionName((String)sortedOrder.get(i), i);
            }
            cat_true_index.put(col, translator);
        }
        for (int[] cat_vals : all_cats) {
            for (int i = 0; i < cat_vals.length; ++i) {
                if (cat_vals[i] < 0) continue;
                cat_vals[i] = (Integer)((Map)cat_true_index.get(cat_indx_to_csv_column.get(i))).get(cat_vals[i]);
            }
        }
        if (cat_target >= 0) {
            Map translator = (Map)cat_true_index.get(cat_target);
            for (int i = 0; i < catTargets.size(); ++i) {
                catTargets.set(i, (Integer)translator.get(catTargets.get(i)));
            }
        }
        CategoricalData[] cat_array = new CategoricalData[catDataMap.size()];
        for (int i = 0; i < cat_array.length; ++i) {
            cat_array[i] = (CategoricalData)catDataMap.get(cat_indx_to_csv_column.get(i));
        }
        if (cat_target >= 0) {
            ClassificationDataSet d = new ClassificationDataSet(totalCols - cat_array.length - 1, cat_array, target_data);
            d.setDataStore(DataStore.DEFAULT_STORE.emptyClone());
            for (int i = 0; i < all_vecs.size(); ++i) {
                d.addDataPoint((Vec)all_vecs.get(i), (int[])all_cats.get(i), catTargets.getI(i));
            }
            return d;
        }
        if (numeric_target >= 0) {
            RegressionDataSet d = new RegressionDataSet(totalCols - cat_array.length - 1, cat_array);
            d.setDataStore(DataStore.DEFAULT_STORE.emptyClone());
            for (int i = 0; i < all_vecs.size(); ++i) {
                d.addDataPoint((Vec)all_vecs.get(i), (int[])all_cats.get(i), regressionTargets.getD(i));
            }
            return d;
        }
        SimpleDataSet d = new SimpleDataSet(totalCols - cat_array.length, cat_array);
        d.setDataStore(DataStore.DEFAULT_STORE.emptyClone());
        for (int i = 0; i < all_vecs.size(); ++i) {
            d.add(new DataPoint((Vec)all_vecs.get(i), (int[])all_cats.get(i), cat_array));
        }
        return d;
    }

    public static void write(DataSet<?> data, Path path) throws IOException {
        CSV.write(data, path, ',');
    }

    public static void write(DataSet<?> data, Writer writer) throws IOException {
        CSV.write(data, writer, ',');
    }

    public static void write(DataSet<?> data, Path path, char delimiter) throws IOException {
        BufferedWriter bw = Files.newBufferedWriter(path, Charset.defaultCharset(), new OpenOption[0]);
        CSV.write(data, bw, delimiter);
        bw.close();
    }

    public static void write(DataSet<?> data, Writer writer, char delimiter) throws IOException {
        String[][] catNamesToUse = CSV.getSafeNames(data.getCategories(), delimiter);
        String[] classNames = null;
        if (data instanceof ClassificationDataSet) {
            classNames = CSV.getSafeNames(new CategoricalData[]{((ClassificationDataSet)data).getPredicting()}, delimiter)[0];
        }
        for (int i = 0; i < data.size(); ++i) {
            int j;
            if (i > 0) {
                writer.write(10);
            }
            boolean nothingWrittenYet = true;
            if (data instanceof ClassificationDataSet) {
                int targetClass = ((ClassificationDataSet)data).getDataPointCategory(i);
                writer.write(classNames[targetClass]);
                nothingWrittenYet = false;
            } else if (data instanceof RegressionDataSet) {
                double targetVal = ((RegressionDataSet)data).getTargetValue(i);
                writer.write(Double.toString(targetVal));
                nothingWrittenYet = false;
            }
            DataPoint dp = data.getDataPoint(i);
            Vec v = dp.getNumericalValues();
            int[] c = dp.getCategoricalValues();
            for (j = 0; j < v.length(); ++j) {
                double val;
                if (!nothingWrittenYet) {
                    writer.write(delimiter);
                }
                if (Math.rint(val = v.get(j)) == val) {
                    writer.write(Long.toString((long)val));
                } else {
                    writer.write(Double.toString(val));
                }
                nothingWrittenYet = false;
            }
            for (j = 0; j < c.length; ++j) {
                if (!nothingWrittenYet) {
                    writer.write(delimiter);
                }
                if (c[j] >= 0) {
                    writer.write(catNamesToUse[j][c[j]]);
                }
                nothingWrittenYet = false;
            }
        }
        writer.flush();
    }

    public static DataWriter getWriter(OutputStream out, CategoricalData[] catInfo, int dim, CategoricalData predicting, DataWriter.DataSetType type) throws IOException {
        return CSV.getWriter(out, catInfo, dim, predicting, type, ',');
    }

    public static DataWriter getWriter(OutputStream out, CategoricalData[] catInfo, int dim, CategoricalData predicting, DataWriter.DataSetType type, final char delimiter) throws IOException {
        String[] classNames;
        final String[][] catNamesToUse = CSV.getSafeNames(catInfo, delimiter);
        if (DataWriter.DataSetType.CLASSIFICATION == type) {
            if (predicting == null) {
                throw new RuntimeException("Can't create CSV writer without prediction target information (was null) ");
            }
            classNames = CSV.getSafeNames(new CategoricalData[]{predicting}, delimiter)[0];
        } else {
            classNames = null;
        }
        DataWriter dw = new DataWriter(out, catInfo, dim, type){

            @Override
            protected void writeHeader(CategoricalData[] catInfo, int dim, DataWriter.DataSetType type, OutputStream out) {
            }

            @Override
            protected void pointToBytes(double weight, DataPoint dp, double label, ByteArrayOutputStream byteOut) {
                int j;
                PrintWriter writer = new PrintWriter(byteOut);
                boolean nothingWrittenYet = true;
                if (this.type == DataWriter.DataSetType.CLASSIFICATION) {
                    int targetClass = (int)label;
                    writer.write(classNames[targetClass]);
                    nothingWrittenYet = false;
                } else if (this.type == DataWriter.DataSetType.REGRESSION) {
                    double targetVal = label;
                    writer.write(Double.toString(targetVal));
                    nothingWrittenYet = false;
                }
                Vec v = dp.getNumericalValues();
                int[] c = dp.getCategoricalValues();
                for (j = 0; j < v.length(); ++j) {
                    double val;
                    if (!nothingWrittenYet) {
                        writer.write(delimiter);
                    }
                    if (Math.rint(val = v.get(j)) == val) {
                        writer.write(Long.toString((long)val));
                    } else {
                        writer.write(Double.toString(val));
                    }
                    nothingWrittenYet = false;
                }
                for (j = 0; j < c.length; ++j) {
                    if (!nothingWrittenYet) {
                        writer.write(delimiter);
                    }
                    if (c[j] >= 0) {
                        writer.write(catNamesToUse[j][c[j]]);
                    }
                    nothingWrittenYet = false;
                }
                writer.write("\n");
                writer.flush();
            }
        };
        return dw;
    }

    private static String[][] getSafeNames(CategoricalData[] cats, char delimiter) {
        String[][] catNamesToUse = new String[cats.length][];
        char delim_replacement = delimiter == '_' ? (char)'-' : '_';
        for (int i = 0; i < catNamesToUse.length; ++i) {
            catNamesToUse[i] = new String[cats[i].getNumOfCategories()];
            for (int j = 0; j < catNamesToUse[i].length; ++j) {
                String name = cats[i].getOptionName(j).trim();
                if (name.contains(String.valueOf(delimiter))) {
                    name = name.replace(delimiter, delim_replacement);
                }
                catNamesToUse[i][j] = name;
            }
        }
        return catNamesToUse;
    }

    private static boolean isNewLine(char ch) {
        return ch == '\n' || ch == '\r';
    }

    private static enum STATE {
        INITIAL,
        SKIPPING_ROWS,
        VALUE,
        DELIMITER,
        NEWLINE,
        COMMENT;

    }
}

