/*
 * Decompiled with CFR 0.152.
 */
package org.forester.ws.seqdb;

import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.go.BasicGoTerm;
import org.forester.go.GoTerm;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Annotation;
import org.forester.sequence.BasicSequence;
import org.forester.sequence.MolecularSequence;
import org.forester.util.ForesterUtil;
import org.forester.ws.seqdb.SequenceDatabaseEntry;
import org.forester.ws.seqdb.SequenceDbWsTools;

public final class UniProtEntry
implements SequenceDatabaseEntry {
    public static final Pattern BindingDB_PATTERN = Pattern.compile("BindingDB;\\s+([0-9A-Z]+);");
    public static final Pattern CTD_PATTERN = Pattern.compile("CTD;\\s+(\\d+);");
    public static final Pattern DrugBank_PATTERN = Pattern.compile("DrugBank;\\s+([0-9A-Z]+);\\s+([^\\.]+)");
    public static final Pattern GO_PATTERN = Pattern.compile("GO;\\s+(GO:\\d+);\\s+([PFC]):([^;]+);");
    public static final Pattern KEGG_PATTERN = Pattern.compile("KEGG;\\s+([a-z]+:[0-9]+);");
    public static final Pattern MIM_PATTERN = Pattern.compile("MIM;\\s+(\\d+);");
    public static final Pattern NextBio_PATTERN = Pattern.compile("NextBio;\\s+(\\d+);");
    public static final Pattern Orphanet_PATTERN = Pattern.compile("Orphanet;\\s+(\\d+);\\s+([^\\.]+)");
    public static final Pattern PDB_PATTERN = Pattern.compile("PDB;\\s+([0-9A-Z]{4});\\s+([^;]+)");
    public static final Pattern PharmGKB_PATTERN = Pattern.compile("PharmGKB;\\s+([0-9A-Z]+);");
    public static final Pattern Reactome_PATTERN = Pattern.compile("Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)");
    public static final Pattern HGNC_PATTERN = Pattern.compile("HGNC;\\s+HGNC:(\\d+);");
    public static final Pattern NCBI_TAXID_PATTERN = Pattern.compile("NCBI_TaxID=(\\d+)");
    private String _ac;
    private SortedSet<Accession> _cross_references;
    private String _gene_name;
    private SortedSet<GoTerm> _go_terms;
    private String _name;
    private String _os_scientific_name;
    private String _symbol;
    private String _tax_id;
    private MolecularSequence _mol_seq;

    private UniProtEntry() {
    }

    public Object clone() throws CloneNotSupportedException {
        throw new CloneNotSupportedException();
    }

    @Override
    public String getAccession() {
        return this._ac;
    }

    @Override
    public SortedSet<Accession> getCrossReferences() {
        return this._cross_references;
    }

    @Override
    public String getGeneName() {
        return this._gene_name;
    }

    @Override
    public SortedSet<GoTerm> getGoTerms() {
        return this._go_terms;
    }

    @Override
    public String getProvider() {
        return "uniprot";
    }

    @Override
    public String getSequenceName() {
        return this._name;
    }

    @Override
    public String getSequenceSymbol() {
        return this._symbol;
    }

    @Override
    public String getTaxonomyIdentifier() {
        return this._tax_id;
    }

    @Override
    public String getTaxonomyScientificName() {
        return this._os_scientific_name;
    }

    @Override
    public boolean isEmpty() {
        return !(!ForesterUtil.isEmpty(this.getAccession()) || !ForesterUtil.isEmpty(this.getSequenceName()) || !ForesterUtil.isEmpty(this.getTaxonomyScientificName()) || !ForesterUtil.isEmpty(this.getSequenceSymbol()) || !ForesterUtil.isEmpty(this.getGeneName()) || !ForesterUtil.isEmpty(this.getTaxonomyIdentifier()) || !ForesterUtil.isEmpty(this.getSequenceSymbol()) || this.getGoTerms() != null && !this.getGoTerms().isEmpty() || this.getCrossReferences() != null && !this.getCrossReferences().isEmpty());
    }

    private void addCrossReference(Accession accession) {
        if (this._cross_references == null) {
            this._cross_references = new TreeSet<Accession>();
        }
        this._cross_references.add(accession);
    }

    private void addGoTerm(BasicGoTerm g) {
        if (this._go_terms == null) {
            this._go_terms = new TreeSet<GoTerm>();
        }
        this._go_terms.add(g);
    }

    private void setAc(String ac) {
        if (this._ac == null) {
            this._ac = ac;
        }
    }

    private void setMolecularSequence(MolecularSequence mol_seq) {
        this._mol_seq = mol_seq;
    }

    private void setGeneName(String gene_name) {
        if (this._gene_name == null) {
            this._gene_name = gene_name;
        }
    }

    private void setOsScientificName(String os_scientific_name) {
        if (this._os_scientific_name == null) {
            this._os_scientific_name = os_scientific_name;
        }
    }

    private void setSequenceName(String name) {
        if (this._name == null) {
            this._name = name;
        }
    }

    private void setSequenceSymbol(String symbol) {
        this._symbol = symbol;
    }

    private void setTaxId(String tax_id) {
        if (this._tax_id == null) {
            this._tax_id = tax_id;
        }
    }

    public static SequenceDatabaseEntry createInstanceFromPlainText(List<String> lines) {
        UniProtEntry e = new UniProtEntry();
        boolean saw_sq = false;
        StringBuffer sq_buffer = new StringBuffer();
        boolean is_aa = false;
        for (String line : lines) {
            Matcher m;
            if (line.startsWith("AC")) {
                e.setAc(SequenceDbWsTools.extractFromTo(line, "AC", ";"));
                continue;
            }
            if (line.startsWith("DE") && ForesterUtil.isEmpty(e.getSequenceName())) {
                if (line.indexOf("RecName:") > 0 && line.indexOf("Full=") > 0) {
                    if (line.indexOf("{") > 0) {
                        e.setSequenceName(SequenceDbWsTools.extractFromTo(line, "Full=", "{"));
                        continue;
                    }
                    e.setSequenceName(SequenceDbWsTools.extractFromTo(line, "Full=", ";"));
                    continue;
                }
                if (line.indexOf("SubName:") <= 0 || line.indexOf("Full=") <= 0) continue;
                if (line.indexOf("{") > 0) {
                    e.setSequenceName(SequenceDbWsTools.extractFromTo(line, "Full=", "{"));
                    continue;
                }
                e.setSequenceName(SequenceDbWsTools.extractFromTo(line, "Full=", ";"));
                continue;
            }
            if (line.startsWith("DE") && ForesterUtil.isEmpty(e.getSequenceSymbol())) {
                if (line.indexOf("Short=") <= 0) continue;
                if (line.indexOf("{") > 0) {
                    e.setSequenceSymbol(SequenceDbWsTools.extractFromTo(line, "Short=", "{"));
                    continue;
                }
                e.setSequenceSymbol(SequenceDbWsTools.extractFromTo(line, "Short=", ";"));
                continue;
            }
            if (line.startsWith("GN") && ForesterUtil.isEmpty(e.getGeneName())) {
                if (line.indexOf("Name=") <= 0) continue;
                if (line.indexOf("{") > 0) {
                    e.setGeneName(SequenceDbWsTools.extractFromTo(line, "Name=", "{"));
                    continue;
                }
                e.setGeneName(SequenceDbWsTools.extractFromTo(line, "Name=", ";"));
                continue;
            }
            if (line.startsWith("DR")) {
                if (line.indexOf("GO;") > 0) {
                    m = GO_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    String id = m.group(1);
                    String ns_str = m.group(2);
                    String desc = m.group(3);
                    String gns = "biological_process";
                    if (ns_str.equals("F")) {
                        gns = "molecular_function";
                    } else if (ns_str.equals("C")) {
                        gns = "cellular_component";
                    }
                    e.addGoTerm(new BasicGoTerm(id, desc, gns, false));
                    continue;
                }
                if (line.indexOf("PDB;") > 0) {
                    m = PDB_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "PDB", m.group(2)));
                    continue;
                }
                if (line.indexOf("KEGG;") > 0) {
                    m = KEGG_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "KEGG"));
                    continue;
                }
                if (line.indexOf("CTD;") > 0) {
                    m = CTD_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "CTD"));
                    continue;
                }
                if (line.indexOf("MIM;") > 0) {
                    m = MIM_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "MIM"));
                    continue;
                }
                if (line.indexOf("Orphanet;") > 0) {
                    m = Orphanet_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "Orphanet", m.group(2)));
                    continue;
                }
                if (line.indexOf("PharmGKB;") > 0) {
                    m = PharmGKB_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "PharmGKB"));
                    continue;
                }
                if (line.indexOf("BindingDB;") > 0) {
                    m = BindingDB_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "BindingDB"));
                    continue;
                }
                if (line.indexOf("DrugBank;") > 0) {
                    m = DrugBank_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "DrugBank", m.group(2)));
                    continue;
                }
                if (line.indexOf("NextBio;") > 0) {
                    m = NextBio_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "NextBio"));
                    continue;
                }
                if (line.indexOf("Reactome;") > 0) {
                    m = Reactome_PATTERN.matcher(line);
                    if (!m.find()) continue;
                    e.addCrossReference(new Accession(m.group(1), "Reactome", m.group(2)));
                    continue;
                }
                if (line.indexOf("HGNC;") <= 0 || !(m = HGNC_PATTERN.matcher(line)).find()) continue;
                e.addCrossReference(new Accession(m.group(1), "HGNC"));
                continue;
            }
            if (line.startsWith("OS")) {
                if (line.indexOf("(") > 0) {
                    e.setOsScientificName(SequenceDbWsTools.extractFromTo(line, "OS", "("));
                    continue;
                }
                e.setOsScientificName(SequenceDbWsTools.extractFromTo(line, "OS", "."));
                continue;
            }
            if (line.startsWith("OX")) {
                if (line.indexOf("NCBI_TaxID=") <= 0 || !(m = NCBI_TAXID_PATTERN.matcher(line)).find()) continue;
                e.setTaxId(m.group(1));
                continue;
            }
            if (line.startsWith("SQ")) {
                saw_sq = true;
                if (!line.contains("AA;")) continue;
                is_aa = true;
                continue;
            }
            if (!saw_sq || !line.startsWith(" ")) continue;
            sq_buffer.append(line.replaceAll("\\s+", ""));
        }
        if (sq_buffer.length() > 0) {
            if (is_aa) {
                e.setMolecularSequence(BasicSequence.createAaSequence(e.getAccession(), sq_buffer.toString()));
            } else {
                e.setMolecularSequence(BasicSequence.createDnaSequence(e.getAccession(), sq_buffer.toString()));
            }
        }
        return e;
    }

    @Override
    public SortedSet<Annotation> getAnnotations() {
        return null;
    }

    @Override
    public String getMap() {
        return null;
    }

    @Override
    public String getChromosome() {
        return null;
    }

    @Override
    public MolecularSequence getMolecularSequence() {
        return this._mol_seq;
    }
}

