/*
 * Decompiled with CFR 0.152.
 */
package org.forester.io.parsers;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import org.forester.protein.BasicDomain;
import org.forester.protein.BasicProtein;
import org.forester.protein.Domain;
import org.forester.protein.Protein;
import org.forester.util.ForesterUtil;

public final class HmmscanPerDomainTableParser {
    private static final String RETRO = "RETRO";
    private static final String PHAGE = "PHAGE";
    private static final String VIR = "VIR";
    private static final String TRANSPOS = "TRANSPOS";
    private static final String RV = "RV";
    private static final String GAG = "GAG_";
    private static final String HCV = "HCV_";
    private static final String HERPES = "HERPES_";
    private static final String BACULO = "BACULO_";
    private static final int E_VALUE_MAXIMUM_DEFAULT = -1;
    private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
    private static final boolean IGNORE_DUFS_DEFAULT = false;
    private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
    private static final boolean IGNORE_REPLACED_RRMS = false;
    private static final boolean IGNORE_hGDE_amylase = true;
    private final Set<String> _filter;
    private final FilterType _filter_type;
    private final File _input_file;
    private final String _species;
    private double _fs_e_value_maximum;
    private double _i_e_value_maximum;
    private Map<String, Double> _individual_score_cutoffs;
    private boolean _ignore_dufs;
    private boolean _ignore_virus_like_ids;
    private int _max_allowed_overlap;
    private boolean _ignore_engulfed_domains;
    private ReturnType _return_type;
    private int _proteins_encountered;
    private int _proteins_ignored_due_to_filter;
    private int _proteins_stored;
    private int _domains_encountered;
    private int _domains_ignored_due_to_duf;
    private int _domains_ignored_due_to_overlap;
    private int _domains_ignored_due_to_fs_e_value;
    private int _domains_ignored_due_to_i_e_value;
    private int _domains_ignored_due_to_individual_score_cutoff;
    private int _domains_stored;
    private SortedSet<String> _domains_stored_set;
    private long _time;
    private int _domains_ignored_due_to_negative_domain_filter;
    private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
    private int _domains_ignored_due_to_virus_like_id;
    private Map<String, Integer> _domains_ignored_due_to_virus_like_id_counts_map;
    private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
    private final boolean _allow_proteins_with_same_name;

    public HmmscanPerDomainTableParser(File input_file, String species, INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to) {
        this._input_file = input_file;
        this._species = species;
        this._filter = null;
        this._filter_type = FilterType.NONE;
        this._ind_cutoff = individual_cutoff_applies_to;
        this._allow_proteins_with_same_name = false;
        this.init();
    }

    public HmmscanPerDomainTableParser(File input_file, String species, INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to, boolean allow_proteins_with_same_name) {
        this._input_file = input_file;
        this._species = species;
        this._filter = null;
        this._filter_type = FilterType.NONE;
        this._ind_cutoff = individual_cutoff_applies_to;
        this._allow_proteins_with_same_name = allow_proteins_with_same_name;
        this.init();
    }

    public HmmscanPerDomainTableParser(File input_file, String species, Set<String> filter, FilterType filter_type, INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to) {
        this._input_file = input_file;
        this._species = species;
        this._filter = filter;
        this._filter_type = filter_type;
        this._ind_cutoff = individual_cutoff_applies_to;
        this._allow_proteins_with_same_name = false;
        this.init();
    }

    public HmmscanPerDomainTableParser(File input_file, String species, Set<String> filter, FilterType filter_type, INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to, boolean allow_proteins_with_same_name) {
        this._input_file = input_file;
        this._species = species;
        this._filter = filter;
        this._filter_type = filter_type;
        this._ind_cutoff = individual_cutoff_applies_to;
        this._allow_proteins_with_same_name = allow_proteins_with_same_name;
        this.init();
    }

    public boolean isAllowProteinsWithSameName() {
        return this._allow_proteins_with_same_name;
    }

    private void actuallyAddProtein(List<Protein> proteins, Protein current_protein) {
        List<Domain> l = current_protein.getProteinDomains();
        for (Domain d : l) {
            this.getDomainsStoredSet().add(d.getDomainId());
        }
        proteins.add(current_protein);
        ++this._proteins_stored;
    }

    private void addProtein(List<Protein> proteins, Protein current_protein) {
        if (this.getMaxAllowedOverlap() != -1 || this.isIgnoreEngulfedDomains()) {
            int domains_count = current_protein.getNumberOfProteinDomains();
            current_protein = ForesterUtil.removeOverlappingDomains(this.getMaxAllowedOverlap(), this.isIgnoreEngulfedDomains(), current_protein);
            int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
            this._domains_stored -= domains_removed;
            this._domains_ignored_due_to_overlap += domains_removed;
        }
        if (this.getFilterType() == FilterType.POSITIVE_PROTEIN || this.getFilterType() == FilterType.NEGATIVE_PROTEIN) {
            HashSet<String> domain_ids_in_protein = new HashSet<String>();
            for (Domain d : current_protein.getProteinDomains()) {
                domain_ids_in_protein.add(d.getDomainId());
            }
            domain_ids_in_protein.retainAll(this.getFilter());
            if (this.getFilterType() == FilterType.POSITIVE_PROTEIN) {
                if (domain_ids_in_protein.size() > 0) {
                    this.actuallyAddProtein(proteins, current_protein);
                } else {
                    ++this._proteins_ignored_due_to_filter;
                }
            } else if (domain_ids_in_protein.size() < 1) {
                this.actuallyAddProtein(proteins, current_protein);
            } else {
                ++this._proteins_ignored_due_to_filter;
            }
        } else {
            this.actuallyAddProtein(proteins, current_protein);
        }
    }

    public int getDomainsEncountered() {
        return this._domains_encountered;
    }

    public int getDomainsIgnoredDueToDuf() {
        return this._domains_ignored_due_to_duf;
    }

    public int getDomainsIgnoredDueToIEval() {
        return this._domains_ignored_due_to_i_e_value;
    }

    public int getDomainsIgnoredDueToFsEval() {
        return this._domains_ignored_due_to_fs_e_value;
    }

    public int getDomainsIgnoredDueToIndividualScoreCutoff() {
        return this._domains_ignored_due_to_individual_score_cutoff;
    }

    public int getDomainsIgnoredDueToNegativeDomainFilter() {
        return this._domains_ignored_due_to_negative_domain_filter;
    }

    public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
        return this._domains_ignored_due_to_negative_domain_filter_counts_map;
    }

    public int getDomainsIgnoredDueToOverlap() {
        return this._domains_ignored_due_to_overlap;
    }

    public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
        return this._domains_ignored_due_to_virus_like_id_counts_map;
    }

    public int getDomainsIgnoredDueToVirusLikeIds() {
        return this._domains_ignored_due_to_virus_like_id;
    }

    public int getDomainsStored() {
        return this._domains_stored;
    }

    public SortedSet<String> getDomainsStoredSet() {
        return this._domains_stored_set;
    }

    private double getFsEValueMaximum() {
        return this._fs_e_value_maximum;
    }

    private double getIEValueMaximum() {
        return this._i_e_value_maximum;
    }

    private Set<String> getFilter() {
        return this._filter;
    }

    private FilterType getFilterType() {
        return this._filter_type;
    }

    public INDIVIDUAL_SCORE_CUTOFF getIndividualCutoffAppliesTo() {
        return this._ind_cutoff;
    }

    private Map<String, Double> getIndividualScoreCutoffs() {
        return this._individual_score_cutoffs;
    }

    private File getInputFile() {
        return this._input_file;
    }

    private int getMaxAllowedOverlap() {
        return this._max_allowed_overlap;
    }

    public int getProteinsEncountered() {
        return this._proteins_encountered;
    }

    public int getProteinsIgnoredDueToFilter() {
        return this._proteins_ignored_due_to_filter;
    }

    public int getProteinsStored() {
        return this._proteins_stored;
    }

    private ReturnType getReturnType() {
        return this._return_type;
    }

    private String getSpecies() {
        return this._species;
    }

    public long getTime() {
        return this._time;
    }

    private void init() {
        this._fs_e_value_maximum = -1.0;
        this._i_e_value_maximum = -1.0;
        this.setIgnoreDufs(false);
        this.setReturnType(RETURN_TYPE_DEFAULT);
        this._max_allowed_overlap = -1;
        this.setIndividualScoreCutoffs(null);
        this.setIgnoreEngulfedDomains(false);
        this.setIgnoreVirusLikeIds(false);
        this.intitCounts();
    }

    private void intitCounts() {
        this.setDomainsStoredSet(new TreeSet<String>());
        this.setDomainsEncountered(0);
        this.setProteinsEncountered(0);
        this.setProteinsIgnoredDueToFilter(0);
        this.setDomainsIgnoredDueToNegativeFilter(0);
        this.setDomainsIgnoredDueToDuf(0);
        this.setDomainsIgnoredDueToFsEval(0);
        this.setDomainsIgnoredDueToIEval(0);
        this.setDomainsIgnoredDueToIndividualScoreCutoff(0);
        this.setDomainsIgnoredDueToVirusLikeId(0);
        this.setDomainsIgnoredDueToOverlap(0);
        this.setDomainsStored(0);
        this.setProteinsStored(0);
        this.setTime(0L);
        this.setDomainsIgnoredDueToVirusLikeIdCountsMap(new TreeMap<String, Integer>());
        this.setDomainsIgnoredDueToNegativeDomainFilterCountsMap(new TreeMap<String, Integer>());
    }

    private boolean isIgnoreDufs() {
        return this._ignore_dufs;
    }

    private boolean isIgnoreEngulfedDomains() {
        return this._ignore_engulfed_domains;
    }

    private boolean isIgnoreVirusLikeIds() {
        return this._ignore_virus_like_ids;
    }

    public List<Protein> parse() throws IOException {
        String line;
        if (this.getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE && (this.getIndividualScoreCutoffs() == null || this.getIndividualScoreCutoffs().size() < 1)) {
            throw new RuntimeException("attempt to use individual cuttoffs with having set them");
        }
        this.intitCounts();
        HashSet<String> prev_queries = new HashSet<String>();
        String error = ForesterUtil.isReadableFile(this.getInputFile());
        if (!ForesterUtil.isEmpty(error)) {
            throw new IOException(error);
        }
        BufferedReader br = new BufferedReader(new FileReader(this.getInputFile()));
        ArrayList<Protein> proteins = new ArrayList<Protein>();
        Protein current_protein = null;
        int line_number = 0;
        long start_time = new Date().getTime();
        String prev_query = "";
        int prev_qlen = -1;
        while ((line = br.readLine()) != null) {
            ++line_number;
            if (ForesterUtil.isEmpty(line) || line.startsWith("#")) continue;
            String[] tokens = line.split("\\s+");
            String target_id = tokens[0];
            String target_acc = tokens[1];
            int tlen = this.parseInt(tokens[2], line_number, "tlen");
            String query = tokens[3];
            String query_acc = tokens[4];
            int qlen = this.parseInt(tokens[5], line_number, "qlen");
            double fs_e_value = this.parseDouble(tokens[6], line_number, "E-value");
            double fs_score = this.parseDouble(tokens[7], line_number, "score");
            int domain_number = this.parseInt(tokens[9], line_number, "count");
            int total_domains = this.parseInt(tokens[10], line_number, "total");
            double c_e_value = this.parseDouble(tokens[11], line_number, "c-Evalue");
            double i_e_value = this.parseDouble(tokens[12], line_number, "i-Evalue");
            double domain_score = this.parseDouble(tokens[13], line_number, "score");
            int hmm_from = this.parseInt(tokens[15], line_number, "hmm from");
            int hmm_to = this.parseInt(tokens[16], line_number, "hmm to");
            int ali_from = this.parseInt(tokens[17], line_number, "ali from");
            int ali_to = this.parseInt(tokens[18], line_number, "ali to");
            int env_from = this.parseInt(tokens[19], line_number, "env from");
            int env_to = this.parseInt(tokens[20], line_number, "env to");
            ++this._domains_encountered;
            if (!query.equals(prev_query) || qlen != prev_qlen) {
                if (!this.isAllowProteinsWithSameName()) {
                    if (query.equals(prev_query)) {
                        throw new IOException("more than one protein named [" + query + "]" + " lengths: " + qlen + ", " + prev_qlen);
                    }
                    if (prev_queries.contains(query)) {
                        throw new IOException("more than one protein named [" + query + "]");
                    }
                }
                prev_query = query;
                prev_qlen = qlen;
                prev_queries.add(query);
                if (current_protein != null && current_protein.getProteinDomains().size() > 0) {
                    this.addProtein(proteins, current_protein);
                }
                if (this.getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN) {
                    current_protein = new BasicProtein(query, this.getSpecies(), qlen);
                } else {
                    throw new IllegalArgumentException("unknown return type");
                }
            }
            boolean failed_cutoff = false;
            if (this.getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE) {
                if (this.getIndividualScoreCutoffs().containsKey(target_id)) {
                    double cutoff = this.getIndividualScoreCutoffs().get(target_id);
                    if (this.getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE) {
                        if (fs_score < cutoff) {
                            failed_cutoff = true;
                        }
                    } else if (this.getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.DOMAIN && domain_score < cutoff) {
                        failed_cutoff = true;
                    }
                } else {
                    throw new IOException("could not find a score cutoff value for domain id \"" + target_id + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                }
            }
            String uc_id = target_id.toUpperCase();
            if (failed_cutoff) {
                ++this._domains_ignored_due_to_individual_score_cutoff;
                continue;
            }
            if (ali_from == ali_to) continue;
            if (this.getFsEValueMaximum() != -1.0 && fs_e_value > this.getFsEValueMaximum()) {
                ++this._domains_ignored_due_to_fs_e_value;
                continue;
            }
            if (this.getIEValueMaximum() != -1.0 && i_e_value > this.getIEValueMaximum()) {
                ++this._domains_ignored_due_to_i_e_value;
                continue;
            }
            if (this.isIgnoreDufs() && uc_id.startsWith("DUF")) {
                ++this._domains_ignored_due_to_duf;
                continue;
            }
            if (uc_id.equals("hGDE_amylase")) continue;
            if (this.isIgnoreVirusLikeIds() && (uc_id.contains(VIR) || uc_id.contains(PHAGE) || uc_id.contains(RETRO) || uc_id.contains(TRANSPOS) || uc_id.startsWith(RV) || uc_id.startsWith(GAG) || uc_id.startsWith(HCV) || uc_id.startsWith(HERPES) || uc_id.startsWith(BACULO))) {
                ForesterUtil.increaseCountingMap(this.getDomainsIgnoredDueToVirusLikeIdCountsMap(), target_id);
                ++this._domains_ignored_due_to_virus_like_id;
                continue;
            }
            if (this.getFilterType() == FilterType.NEGATIVE_DOMAIN && this.getFilter().contains(target_id)) {
                ++this._domains_ignored_due_to_negative_domain_filter;
                ForesterUtil.increaseCountingMap(this.getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), target_id);
                continue;
            }
            try {
                BasicDomain pd = new BasicDomain(target_id, ali_from, ali_to, (short)domain_number, (short)total_domains, i_e_value, domain_score);
                current_protein.addProteinDomain(pd);
            }
            catch (IllegalArgumentException e) {
                throw new IOException("problem with domain parsing at line " + line_number + "[" + line + "]: " + e.getMessage());
            }
            ++this._domains_stored;
        }
        if (current_protein != null && current_protein.getProteinDomains().size() > 0) {
            this.addProtein(proteins, current_protein);
        }
        this.setProteinsEncountered(prev_queries.size());
        this.setTime(new Date().getTime() - start_time);
        return proteins;
    }

    private double parseDouble(String double_str, int line_number, String label) throws IOException {
        double d = -1.0;
        try {
            d = Double.valueOf(double_str);
        }
        catch (NumberFormatException e) {
            throw new IOException("could not parse \" +label + \" from \"" + double_str + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
        }
        return d;
    }

    private int parseInt(String double_str, int line_number, String label) throws IOException {
        int i = -1;
        try {
            i = Integer.valueOf(double_str);
        }
        catch (NumberFormatException e) {
            throw new IOException("could not parse \"" + label + "\" from \"" + double_str + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
        }
        return i;
    }

    private void setDomainsEncountered(int domains_encountered) {
        this._domains_encountered = domains_encountered;
    }

    private void setDomainsIgnoredDueToDuf(int domains_ignored_due_to_duf) {
        this._domains_ignored_due_to_duf = domains_ignored_due_to_duf;
    }

    private void setDomainsIgnoredDueToFsEval(int domains_ignored_due_to_fs_e_value) {
        this._domains_ignored_due_to_fs_e_value = domains_ignored_due_to_fs_e_value;
    }

    private void setDomainsIgnoredDueToIEval(int domains_ignored_due_to_i_e_value) {
        this._domains_ignored_due_to_i_e_value = domains_ignored_due_to_i_e_value;
    }

    private void setDomainsIgnoredDueToIndividualScoreCutoff(int domains_ignored_due_to_individual_score_cutoff) {
        this._domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
    }

    private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap(Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map) {
        this._domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
    }

    private void setDomainsIgnoredDueToNegativeFilter(int domains_ignored_due_to_negative_domain_filter) {
        this._domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
    }

    private void setDomainsIgnoredDueToOverlap(int domains_ignored_due_to_overlap) {
        this._domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
    }

    private void setDomainsIgnoredDueToVirusLikeId(int i) {
        this._domains_ignored_due_to_virus_like_id = i;
    }

    private void setDomainsIgnoredDueToVirusLikeIdCountsMap(Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map) {
        this._domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
    }

    private void setDomainsStored(int domains_stored) {
        this._domains_stored = domains_stored;
    }

    private void setDomainsStoredSet(SortedSet<String> _storeddomains_stored) {
        this._domains_stored_set = _storeddomains_stored;
    }

    public void setFsEValueMaximum(double fs_e_value_maximum) {
        if (fs_e_value_maximum < 0.0) {
            throw new IllegalArgumentException("attempt to set the maximum E-value to a negative value");
        }
        this._fs_e_value_maximum = fs_e_value_maximum;
    }

    public void setIEValueMaximum(double i_e_value_maximum) {
        if (i_e_value_maximum < 0.0) {
            throw new IllegalArgumentException("attempt to set the maximum E-value to a negative value");
        }
        this._i_e_value_maximum = i_e_value_maximum;
    }

    public void setIgnoreDufs(boolean ignore_dufs) {
        this._ignore_dufs = ignore_dufs;
    }

    public void setIgnoreEngulfedDomains(boolean ignore_engulfed_domains) {
        this._ignore_engulfed_domains = ignore_engulfed_domains;
    }

    public void setIgnoreVirusLikeIds(boolean ignore_virus_like_ids) {
        this._ignore_virus_like_ids = ignore_virus_like_ids;
    }

    public void setIndividualScoreCutoffs(Map<String, Double> individual_score_cutoffs) {
        this._individual_score_cutoffs = individual_score_cutoffs;
    }

    public void setMaxAllowedOverlap(int max_allowed_overlap) {
        if (max_allowed_overlap < 0) {
            throw new IllegalArgumentException("Attempt to set max allowed overlap to less than zero.");
        }
        this._max_allowed_overlap = max_allowed_overlap;
    }

    private void setProteinsEncountered(int proteins_encountered) {
        this._proteins_encountered = proteins_encountered;
    }

    private void setProteinsIgnoredDueToFilter(int proteins_ignored_due_to_filter) {
        this._proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
    }

    private void setProteinsStored(int proteins_stored) {
        this._proteins_stored = proteins_stored;
    }

    public void setReturnType(ReturnType return_type) {
        this._return_type = return_type;
    }

    private void setTime(long time) {
        this._time = time;
    }

    public static enum ReturnType {
        UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;

    }

    public static enum INDIVIDUAL_SCORE_CUTOFF {
        FULL_SEQUENCE,
        DOMAIN,
        NONE;

    }

    public static enum FilterType {
        NONE,
        POSITIVE_PROTEIN,
        NEGATIVE_PROTEIN,
        NEGATIVE_DOMAIN;

    }
}

