/*
 * Decompiled with CFR 0.152.
 */
package org.forester.ws.seqdb;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import org.forester.go.GoTerm;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Annotation;
import org.forester.phylogeny.data.Identifier;
import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.sequence.MolecularSequence;
import org.forester.util.ForesterUtil;
import org.forester.util.SequenceAccessionTools;
import org.forester.ws.seqdb.EbiDbEntry;
import org.forester.ws.seqdb.SequenceDatabaseEntry;
import org.forester.ws.seqdb.UniProtEntry;
import org.forester.ws.seqdb.UniProtTaxonomy;

public final class SequenceDbWsTools {
    public static final String BASE_UNIPROT_URL = "http://www.uniprot.org/";
    public static final int DEFAULT_LINES_TO_RETURN = 4000;
    public static final String EMBL_DBS_REFSEQ_N = "refseqn";
    public static final String EMBL_DBS_REFSEQ_P = "refseqp";
    public static final String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id=";
    public static final String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id=";
    public static final String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id=";
    private static final boolean DEBUG = false;
    private static final String URL_ENC = "UTF-8";
    private static final int SLEEP = 200;
    private static final boolean ALLOW_TO_OVERWRITE_MOL_SEQ = false;

    public static List<UniProtTaxonomy> getTaxonomiesFromCommonNameStrict(String cn, int max_taxonomies_return) throws IOException {
        List<UniProtTaxonomy> taxonomies = SequenceDbWsTools.getTaxonomiesFromCommonName(cn, max_taxonomies_return);
        if (taxonomies != null && taxonomies.size() > 0) {
            ArrayList<UniProtTaxonomy> filtered_taxonomies = new ArrayList<UniProtTaxonomy>();
            for (UniProtTaxonomy taxonomy : taxonomies) {
                if (!taxonomy.getCommonName().equalsIgnoreCase(cn)) continue;
                filtered_taxonomies.add(taxonomy);
            }
            return filtered_taxonomies;
        }
        return null;
    }

    public static List<UniProtTaxonomy> getTaxonomiesFromId(String id, int max_taxonomies_return) throws IOException {
        List<String> result = SequenceDbWsTools.getTaxonomyStringFromId(id, max_taxonomies_return);
        if (result.size() > 0) {
            return SequenceDbWsTools.parseUniProtTaxonomy(result);
        }
        return null;
    }

    public static List<UniProtTaxonomy> getTaxonomiesFromScientificNameStrict(String sn, int max_taxonomies_return) throws IOException {
        List<UniProtTaxonomy> taxonomies = SequenceDbWsTools.getTaxonomiesFromScientificName(sn, max_taxonomies_return);
        if (taxonomies != null && taxonomies.size() > 0) {
            ArrayList<UniProtTaxonomy> filtered_taxonomies = new ArrayList<UniProtTaxonomy>();
            for (UniProtTaxonomy taxonomy : taxonomies) {
                if (!taxonomy.getScientificName().equalsIgnoreCase(sn)) continue;
                filtered_taxonomies.add(taxonomy);
            }
            return filtered_taxonomies;
        }
        return null;
    }

    public static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode(String code, int max_taxonomies_return) throws IOException {
        String my_code = new String(code);
        List<String> result = SequenceDbWsTools.getTaxonomyStringFromTaxonomyCode(my_code, max_taxonomies_return);
        if (result.size() > 0) {
            return SequenceDbWsTools.parseUniProtTaxonomy(result);
        }
        return null;
    }

    public static SequenceDatabaseEntry obtainEmblEntry(Accession acc) throws IOException {
        return SequenceDbWsTools.obtainEmblEntry(acc, 4000);
    }

    public static SequenceDatabaseEntry obtainEmblEntry(Accession acc, int max_lines_to_return) throws IOException {
        List<String> lines = SequenceDbWsTools.queryEmblDb(acc, max_lines_to_return);
        return EbiDbEntry.createInstanceFromPlainTextForRefSeq(lines);
    }

    public static SequenceDatabaseEntry obtainEntry(String acc_str) throws IOException {
        if (ForesterUtil.isEmpty(acc_str)) {
            throw new IllegalArgumentException("cannot not extract sequence db accessor from null or empty string");
        }
        Accession acc = SequenceAccessionTools.parseAccessorFromString(acc_str);
        if (acc == null) {
            throw new IllegalArgumentException("could not extract acceptable sequence db accessor from \"" + acc_str + "\"");
        }
        if (acc.getSource().equals(Accession.Source.REFSEQ.toString()) || acc.getSource().equals(Accession.Source.EMBL.toString()) || acc.getSource().equals(Accession.Source.NCBI.toString())) {
            return SequenceDbWsTools.obtainEmblEntry(acc, 4000);
        }
        if (acc.getSource().equals(Accession.Source.UNIPROT.toString())) {
            return SequenceDbWsTools.obtainUniProtEntry(acc.getValue(), 4000);
        }
        throw new IllegalArgumentException("don't know how to handle request for source \"" + acc.getSource() + "\"");
    }

    public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl(Accession acc) throws IOException {
        return SequenceDbWsTools.obtainRefSeqEntryFromEmbl(acc, 4000);
    }

    public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl(Accession acc, int max_lines_to_return) throws IOException {
        List<String> lines = SequenceDbWsTools.queryEmblDbForRefSeqEntry(acc, max_lines_to_return);
        return EbiDbEntry.createInstanceFromPlainTextForRefSeq(lines);
    }

    public static final Accession obtainSeqAccession(PhylogenyNode node) {
        Accession acc = SequenceAccessionTools.obtainFromSeqAccession(node);
        if (!SequenceDbWsTools.isAccessionAcceptable(acc)) {
            acc = SequenceAccessionTools.obtainAccessorFromDataFields(node);
        }
        return acc;
    }

    public static final void obtainSeqInformation(boolean allow_to_set_taxonomic_data, int lines_to_return, SortedSet<String> not_found, PhylogenyNode node) throws IOException {
        Accession acc = SequenceDbWsTools.obtainSeqAccession(node);
        if (!SequenceDbWsTools.isAccessionAcceptable(acc)) {
            if (node.isExternal() || !node.isEmpty()) {
                not_found.add(node.toString());
            }
        } else {
            SequenceDbWsTools.addDataFromDbToNode(allow_to_set_taxonomic_data, lines_to_return, not_found, node, acc);
        }
    }

    public static final void obtainSeqInformation(boolean allow_to_set_taxonomic_data, SortedSet<String> not_found, PhylogenyNode node) throws IOException {
        SequenceDbWsTools.obtainSeqInformation(allow_to_set_taxonomic_data, 4000, not_found, node);
    }

    public static final SortedSet<String> obtainSeqInformation(Phylogeny phy, boolean ext_nodes_only, boolean allow_to_set_taxonomic_data, int lines_to_return) throws IOException {
        TreeSet<String> not_found = new TreeSet<String>();
        PhylogenyNodeIterator iter = phy.iteratorPostorder();
        while (iter.hasNext()) {
            PhylogenyNode node = iter.next();
            if (!node.isExternal() && ext_nodes_only) continue;
            SequenceDbWsTools.obtainSeqInformation(allow_to_set_taxonomic_data, lines_to_return, not_found, node);
        }
        return not_found;
    }

    public static final void obtainSeqInformation(PhylogenyNode node) throws IOException {
        SequenceDbWsTools.obtainSeqInformation(true, 4000, new TreeSet<String>(), node);
    }

    public static SequenceDatabaseEntry obtainUniProtEntry(String query) throws IOException {
        return SequenceDbWsTools.obtainUniProtEntry(query, 4000);
    }

    public static SequenceDatabaseEntry obtainUniProtEntry(String query, int max_lines_to_return) throws IOException {
        List<String> lines = SequenceDbWsTools.queryUniprot("uniprot/" + query + ".txt", max_lines_to_return);
        return UniProtEntry.createInstanceFromPlainText(lines);
    }

    public static List<String> queryDb(String query, int max_lines_to_return, String base_url) throws IOException {
        String line;
        if (ForesterUtil.isEmpty(query)) {
            throw new IllegalArgumentException("illegal attempt to use empty query ");
        }
        if (max_lines_to_return < 1) {
            max_lines_to_return = 1;
        }
        URL url = new URL(base_url + query);
        URLConnection urlc = url.openConnection();
        BufferedReader in = new BufferedReader(new InputStreamReader(urlc.getInputStream()));
        ArrayList<String> result = new ArrayList<String>();
        while ((line = in.readLine()) != null) {
            result.add(line);
            if (result.size() <= max_lines_to_return) continue;
        }
        in.close();
        try {
            Thread.sleep(200L);
        }
        catch (InterruptedException e) {
            e.printStackTrace();
        }
        return result;
    }

    public static List<String> queryEmblDb(Accession acc, int max_lines_to_return) throws IOException {
        StringBuilder url_sb = new StringBuilder();
        if (acc.getSource().equals(Accession.Source.NCBI.toString())) {
            url_sb.append(EMBL_GENBANK);
        } else if (acc.getSource().equals(Accession.Source.REFSEQ.toString())) {
            url_sb.append(EMBL_REFSEQ);
        } else if (acc.getSource().equals(Accession.Source.EMBL.toString())) {
            url_sb.append(EMBL_EMBL);
        } else {
            throw new IllegalArgumentException("unable to handle source: " + acc.getSource());
        }
        return SequenceDbWsTools.queryDb(acc.getValue(), max_lines_to_return, url_sb.toString());
    }

    public static List<String> queryEmblDbForRefSeqEntry(Accession id, int max_lines_to_return) throws IOException {
        StringBuilder url_sb = new StringBuilder();
        url_sb.append(EMBL_REFSEQ);
        return SequenceDbWsTools.queryDb(id.getValue(), max_lines_to_return, url_sb.toString());
    }

    public static List<String> queryUniprot(String query, int max_lines_to_return) throws IOException {
        return SequenceDbWsTools.queryDb(query, max_lines_to_return, BASE_UNIPROT_URL);
    }

    static final String extractFrom(String target, String a) {
        int i_a = target.indexOf(a);
        return target.substring(i_a + a.length()).trim();
    }

    static final String extractFromTo(String target, String a, String b) {
        int i_a = target.indexOf(a);
        int i_b = target.indexOf(b);
        if (i_a < 0 || i_b < i_a) {
            return "";
        }
        return target.substring(i_a + a.length(), i_b).trim();
    }

    static final String extractTo(String target, String b) {
        int i_b = target.indexOf(b);
        return target.substring(0, i_b).trim();
    }

    private static void addDataFromDbToNode(boolean allow_to_set_taxonomic_data, int lines_to_return, SortedSet<String> not_found, PhylogenyNode node, Accession acc) throws IOException {
        SequenceDatabaseEntry db_entry = null;
        String query = acc.getValue();
        if (acc.getSource().equals(Accession.Source.UNIPROT.toString())) {
            try {
                db_entry = SequenceDbWsTools.obtainUniProtEntry(query, lines_to_return);
            }
            catch (FileNotFoundException fileNotFoundException) {}
        } else if (acc.getSource().equals(Accession.Source.REFSEQ.toString())) {
            try {
                db_entry = SequenceDbWsTools.obtainRefSeqEntryFromEmbl(new Accession(query), lines_to_return);
            }
            catch (FileNotFoundException fileNotFoundException) {}
        } else if (acc.getSource().equals(Accession.Source.EMBL.toString()) || acc.getSource().equals(Accession.Source.NCBI.toString()) || acc.getSource().equals(Accession.Source.EMBL.toString())) {
            try {
                db_entry = SequenceDbWsTools.obtainEmblEntry(acc, lines_to_return);
            }
            catch (FileNotFoundException fileNotFoundException) {}
        } else if (acc.getSource().equals(Accession.Source.GI.toString())) {
            try {
                db_entry = SequenceDbWsTools.obtainRefSeqEntryFromEmbl(new Accession(query), lines_to_return);
            }
            catch (FileNotFoundException fileNotFoundException) {
                // empty catch block
            }
        }
        if (db_entry != null && !db_entry.isEmpty()) {
            Taxonomy tax;
            Sequence seq;
            Sequence sequence = seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence();
            if (!ForesterUtil.isEmpty(db_entry.getAccession())) {
                seq.setAccession(new Accession(db_entry.getAccession(), acc.getSource()));
            }
            if (!ForesterUtil.isEmpty(db_entry.getSequenceName())) {
                seq.setName(db_entry.getSequenceName());
            }
            if (!ForesterUtil.isEmpty(db_entry.getGeneName())) {
                seq.setGeneName(db_entry.getGeneName());
            }
            if (!ForesterUtil.isEmpty(db_entry.getSequenceSymbol())) {
                try {
                    seq.setSymbol(db_entry.getSequenceSymbol());
                }
                catch (PhyloXmlDataFormatException phyloXmlDataFormatException) {
                    // empty catch block
                }
            }
            if (db_entry.getMolecularSequence() != null && !ForesterUtil.isEmpty(db_entry.getMolecularSequence().getMolecularSequenceAsString()) && seq.getMolecularSequence().isEmpty()) {
                seq.setMolecularSequence(db_entry.getMolecularSequence().getMolecularSequenceAsString());
                seq.setMolecularSequenceAligned(false);
                if (db_entry.getMolecularSequence().getType() == MolecularSequence.TYPE.AA) {
                    seq.setType("protein");
                } else if (db_entry.getMolecularSequence().getType() == MolecularSequence.TYPE.DNA) {
                    seq.setType("dna");
                } else if (db_entry.getMolecularSequence().getType() == MolecularSequence.TYPE.RNA) {
                    seq.setType("rna");
                }
            }
            if (db_entry.getGoTerms() != null && !db_entry.getGoTerms().isEmpty()) {
                for (GoTerm go : db_entry.getGoTerms()) {
                    Annotation ann = new Annotation(go.getGoId().getId());
                    ann.setDesc(go.getName());
                    seq.addAnnotation(ann);
                }
            }
            if (db_entry.getCrossReferences() != null && !db_entry.getCrossReferences().isEmpty()) {
                for (Accession x : db_entry.getCrossReferences()) {
                    seq.addCrossReference(x);
                }
            }
            if (!ForesterUtil.isEmpty(db_entry.getChromosome()) && !ForesterUtil.isEmpty(db_entry.getMap())) {
                seq.setLocation("chr " + db_entry.getChromosome() + ", " + db_entry.getMap());
            } else if (!ForesterUtil.isEmpty(db_entry.getChromosome())) {
                seq.setLocation("chr " + db_entry.getChromosome());
            } else if (!ForesterUtil.isEmpty(db_entry.getMap())) {
                seq.setLocation(db_entry.getMap());
            }
            Taxonomy taxonomy = tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy();
            if (!ForesterUtil.isEmpty(db_entry.getTaxonomyScientificName())) {
                tax.setScientificName(db_entry.getTaxonomyScientificName());
            }
            if (allow_to_set_taxonomic_data && !ForesterUtil.isEmpty(db_entry.getTaxonomyIdentifier())) {
                tax.setIdentifier(new Identifier(db_entry.getTaxonomyIdentifier(), "uniprot"));
            }
            node.getNodeData().setTaxonomy(tax);
            node.getNodeData().setSequence(seq);
        } else if (node.isExternal() || !node.isEmpty()) {
            not_found.add(node.toString());
        }
        try {
            Thread.sleep(200L);
        }
        catch (InterruptedException interruptedException) {
            // empty catch block
        }
    }

    private static String encode(String str) throws UnsupportedEncodingException {
        return URLEncoder.encode(str.trim(), URL_ENC);
    }

    private static List<UniProtTaxonomy> getTaxonomiesFromCommonName(String cn, int max_taxonomies_return) throws IOException {
        List<String> result = SequenceDbWsTools.getTaxonomyStringFromCommonName(cn, max_taxonomies_return);
        if (result.size() > 0) {
            return SequenceDbWsTools.parseUniProtTaxonomy(result);
        }
        return null;
    }

    private static List<UniProtTaxonomy> getTaxonomiesFromScientificName(String sn, int max_taxonomies_return) throws IOException {
        List<String> result = SequenceDbWsTools.getTaxonomyStringFromScientificName(sn, max_taxonomies_return);
        if (result.size() > 0) {
            return SequenceDbWsTools.parseUniProtTaxonomy(result);
        }
        return null;
    }

    private static List<String> getTaxonomyStringFromCommonName(String cn, int max_lines_to_return) throws IOException {
        return SequenceDbWsTools.queryUniprot("taxonomy/?query=common%3a%22" + SequenceDbWsTools.encode(cn) + "%22&format=tab", max_lines_to_return);
    }

    private static List<String> getTaxonomyStringFromId(String id, int max_lines_to_return) throws IOException {
        return SequenceDbWsTools.queryUniprot("taxonomy/?query=id%3a%22" + SequenceDbWsTools.encode(id) + "%22&format=tab", max_lines_to_return);
    }

    private static List<String> getTaxonomyStringFromScientificName(String sn, int max_lines_to_return) throws IOException {
        return SequenceDbWsTools.queryUniprot("taxonomy/?query=scientific%3a%22" + SequenceDbWsTools.encode(sn) + "%22&format=tab", max_lines_to_return);
    }

    private static List<String> getTaxonomyStringFromTaxonomyCode(String code, int max_lines_to_return) throws IOException {
        return SequenceDbWsTools.queryUniprot("taxonomy/?query=mnemonic%3a%22" + SequenceDbWsTools.encode(code) + "%22&format=tab", max_lines_to_return);
    }

    private static final boolean isAccessionAcceptable(Accession acc) {
        return acc != null && !ForesterUtil.isEmpty(acc.getSource()) && !ForesterUtil.isEmpty(acc.getValue()) && (!acc.getSource().equals(Accession.Source.UNIPROT.toString()) || !acc.getSource().toString().equals(Accession.Source.EMBL.toString()) || !acc.getSource().toString().equals(Accession.Source.REFSEQ.toString()));
    }

    private static List<UniProtTaxonomy> parseUniProtTaxonomy(List<String> result) throws IOException {
        ArrayList<UniProtTaxonomy> taxonomies = new ArrayList<UniProtTaxonomy>();
        for (String line : result) {
            if (ForesterUtil.isEmpty(line)) continue;
            if (line.startsWith("Taxon")) {
                String[] items = line.split("\t");
                if (items[1].equalsIgnoreCase("Mnemonic") && items[2].equalsIgnoreCase("Scientific name") && items[3].equalsIgnoreCase("Common name") && items[4].equalsIgnoreCase("Synonym") && items[5].equalsIgnoreCase("Other Names") && items[6].equalsIgnoreCase("Reviewed") && items[7].equalsIgnoreCase("Rank") && items[8].equalsIgnoreCase("Lineage")) continue;
                throw new IOException("Unreconized UniProt Taxonomy format: " + line);
            }
            if (line.split("\t").length <= 4) continue;
            taxonomies.add(new UniProtTaxonomy(line));
        }
        return taxonomies;
    }
}

