/*
 * Decompiled with CFR 0.152.
 */
package org.forester.io.parsers;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.msa.BasicMsa;
import org.forester.msa.Msa;
import org.forester.msa.MsaFormatException;
import org.forester.sequence.BasicSequence;
import org.forester.sequence.MolecularSequence;

public class FastaParser {
    private static final Pattern NAME_REGEX = Pattern.compile("^\\s*>\\s*(.+)");
    private static final Pattern SEQ_REGEX = Pattern.compile("^\\s*(.+)");
    private static final Pattern ANYTHING_REGEX = Pattern.compile("[\\d\\s]+");
    public static final Pattern FASTA_DESC_LINE = Pattern.compile(">?\\s*([^|]+)\\|([^|]+)\\S*\\s+(.+)\\s+\\[(.+)\\]");

    public static void main(String[] args) {
        String a = ">gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio]";
        Matcher name_m = FASTA_DESC_LINE.matcher(">gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio]");
        if (name_m.lookingAt()) {
            System.out.println();
            System.out.println(name_m.group(1));
            System.out.println(name_m.group(2));
            System.out.println(name_m.group(3));
            System.out.println(name_m.group(4));
        } else {
            System.out.println("Does not match.");
        }
    }

    public static boolean isLikelyFasta(File f) throws IOException {
        return FastaParser.isLikelyFasta(new FileInputStream(f));
    }

    public static boolean isLikelyFasta(InputStream is) throws IOException {
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
        String line = null;
        while ((line = reader.readLine()) != null) {
            boolean is_name_line = NAME_REGEX.matcher(line).lookingAt();
            if (FastaParser.canIgnore(line, true, false)) continue;
            if (is_name_line) {
                reader.close();
                return true;
            }
            if (!SEQ_REGEX.matcher(line).lookingAt()) continue;
            reader.close();
            return false;
        }
        reader.close();
        return false;
    }

    public static Msa parseMsa(File f) throws IOException {
        return FastaParser.parseMsa(new FileInputStream(f));
    }

    public static Msa parseMsa(InputStream is) throws IOException {
        return BasicMsa.createInstance(FastaParser.parse(is));
    }

    public static Msa parseMsa(String s) throws IOException {
        return FastaParser.parseMsa(s.getBytes());
    }

    public static Msa parseMsa(byte[] bytes) throws IOException {
        return FastaParser.parseMsa(new ByteArrayInputStream(bytes));
    }

    public static List<MolecularSequence> parse(File f) throws IOException {
        return FastaParser.parse(new FileInputStream(f));
    }

    public static List<MolecularSequence> parse(InputStream is) throws IOException {
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
        String line = null;
        int line_counter = 0;
        boolean saw_first_seq = false;
        StringBuilder current_seq = null;
        StringBuilder name = null;
        ArrayList<StringBuilder[]> temp_msa = new ArrayList<StringBuilder[]>();
        while ((line = reader.readLine()) != null) {
            ++line_counter;
            Matcher name_m = NAME_REGEX.matcher(line);
            boolean is_name_line = name_m.lookingAt();
            if (FastaParser.canIgnore(line, saw_first_seq, is_name_line)) continue;
            Matcher seq_m = SEQ_REGEX.matcher(line);
            if (is_name_line) {
                saw_first_seq = true;
                FastaParser.addSeq(name, current_seq, temp_msa);
                name = new StringBuilder(name_m.group(1).trim());
                current_seq = new StringBuilder();
                continue;
            }
            if (seq_m.lookingAt()) {
                if (name.length() < 1) {
                    reader.close();
                    throw new MsaFormatException("illegally formatted fasta msa (line: " + line_counter + "):\n\"" + FastaParser.trim(line) + "\"");
                }
                current_seq.append(seq_m.group(1).replaceAll("\\s+", ""));
                continue;
            }
            reader.close();
            throw new MsaFormatException("illegally formatted fasta msa (line: " + line_counter + "):\n\"" + FastaParser.trim(line) + "\"");
        }
        FastaParser.addSeq(name, current_seq, temp_msa);
        reader.close();
        ArrayList<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
        for (int i = 0; i < temp_msa.size(); ++i) {
            seqs.add(BasicSequence.createAaSequence(((StringBuilder[])temp_msa.get(i))[0].toString(), ((StringBuilder[])temp_msa.get(i))[1].toString()));
        }
        return seqs;
    }

    private static boolean canIgnore(String line, boolean saw_first_seq, boolean is_name_line) {
        if (line.length() < 1 || ANYTHING_REGEX.matcher(line).matches()) {
            return true;
        }
        return !saw_first_seq && !is_name_line;
    }

    private static void addSeq(StringBuilder name, StringBuilder seq, List<StringBuilder[]> temp_msa) {
        if (name != null && seq != null && name.length() > 0 && seq.length() > 0) {
            StringBuilder[] ary = new StringBuilder[]{name, seq};
            temp_msa.add(ary);
        }
    }

    private static String trim(String line) {
        if (line.length() > 100) {
            return line.substring(0, 100) + " ...";
        }
        return line;
    }
}

