/*
 * Decompiled with CFR 0.152.
 */
package fr.proline.module.seq.service;

import fr.profi.util.DateUtils;
import fr.profi.util.FileUtils;
import fr.profi.util.StringUtils;
import fr.profi.util.ThreadLogger;
import fr.proline.module.seq.Constants;
import fr.proline.module.seq.DatabaseAccess;
import fr.proline.module.seq.config.ParsingRuleEntry;
import fr.proline.module.seq.config.SeqRepoConfig;
import fr.proline.module.seq.dto.DDatabankInstance;
import fr.proline.module.seq.dto.DDatabankProtein;
import fr.proline.module.seq.orm.Alphabet;
import fr.proline.module.seq.orm.BioSequence;
import fr.proline.module.seq.orm.Databank;
import fr.proline.module.seq.orm.DatabankInstance;
import fr.proline.module.seq.orm.DatabankProtein;
import fr.proline.module.seq.orm.Repository;
import fr.proline.module.seq.orm.RepositoryProtein;
import fr.proline.module.seq.orm.dao.BioSequenceDao;
import fr.proline.module.seq.orm.dao.DatabankDao;
import fr.proline.module.seq.orm.dao.DatabankProteinDao;
import fr.proline.module.seq.orm.dao.RepositoryProteinDao;
import fr.proline.module.seq.service.DataSource;
import fr.proline.module.seq.service.DataSourceBuilder;
import fr.proline.module.seq.service.RetrieverContext;
import fr.proline.module.seq.util.Counters;
import fr.proline.module.seq.util.DatabankInstanceComparator;
import fr.proline.module.seq.util.HashUtil;
import fr.proline.module.seq.util.RegExUtil;
import fr.proline.repository.IDatabaseConnector;
import java.io.File;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import javax.persistence.EntityManager;
import javax.persistence.EntityTransaction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class BioSequenceRetriever {
    public static final Object SEQ_DB_WRITE_LOCK = new Object();
    private static final Logger LOG = LoggerFactory.getLogger(BioSequenceRetriever.class);
    private static final Object RUNNING_LOCK = new Object();
    private static final ExecutorService EXECUTOR = Executors.newFixedThreadPool(Constants.calculateNThreads());
    private static final DataSourceBuilder DATA_SOURCE_BUILDER = new DataSourceBuilder();
    private static final DatabankInstanceComparator DATABANK_INSTANCE_COMPARATOR = new DatabankInstanceComparator();

    private BioSequenceRetriever() {
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static int retrieveBioSequences(Map<DDatabankInstance, Set<DDatabankProtein>> proteinsByDatabank) throws Exception {
        assert (proteinsByDatabank != null && !proteinsByDatabank.isEmpty()) : "proteinsByDatabank must not be null";
        LOG.info("Start RetrieveBioSequences from {} Databank Instance(s) ", (Object)proteinsByDatabank.size());
        int totalPersistedProteinsCount = 0;
        Object object = RUNNING_LOCK;
        synchronized (object) {
            long start = System.currentTimeMillis();
            SeqRepoConfig.forcePropertiesReload();
            DATA_SOURCE_BUILDER.forceRescanFastaFiles();
            ArrayList<Future<Integer>> futures = new ArrayList<Future<Integer>>();
            Set<Map.Entry<DDatabankInstance, Set<DDatabankProtein>>> entries = proteinsByDatabank.entrySet();
            for (Map.Entry<DDatabankInstance, Set<DDatabankProtein>> entry : entries) {
                final Set<DDatabankProtein> proteins = entry.getValue();
                if (proteins == null || proteins.isEmpty()) continue;
                final DDatabankInstance databank = entry.getKey();
                Callable<Integer> task = new Callable<Integer>(){

                    @Override
                    public Integer call() throws Exception {
                        Thread currentThread = Thread.currentThread();
                        if (!(currentThread.getUncaughtExceptionHandler() instanceof ThreadLogger)) {
                            currentThread.setUncaughtExceptionHandler((Thread.UncaughtExceptionHandler)new ThreadLogger(LOG));
                        }
                        return BioSequenceRetriever.retrieveBioSequences(databank, proteins);
                    }
                };
                Future<Integer> future = EXECUTOR.submit(task);
                futures.add(future);
            }
            for (Future future : futures) {
                int nHandledSEDbIdents;
                Integer result = (Integer)future.get();
                if (result == null || (nHandledSEDbIdents = result.intValue()) <= 0) continue;
                totalPersistedProteinsCount += nHandledSEDbIdents;
            }
            long end = System.currentTimeMillis();
            long duration = end - start;
            LOG.info("Total retrieveBioSequences() execution : {} Protein Identifiers retrieved from sources in {} ms", (Object)totalPersistedProteinsCount, (Object)duration);
        }
        return totalPersistedProteinsCount;
    }

    public static boolean waitExecutorShutdown() throws Exception {
        boolean result = false;
        EXECUTOR.shutdown();
        result = EXECUTOR.awaitTermination(Integer.MAX_VALUE, TimeUnit.SECONDS);
        return result;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static int retrieveBioSequences(DDatabankInstance databank, Set<DDatabankProtein> proteins) throws Exception {
        assert (databank != null) : "retrieveBioSequences() databank is null";
        LOG.info("Start RetrieveBioSequences of {} proteins in file {}", (Object)proteins.size(), (Object)databank.getSourcePath());
        int persistedProteinsCount = 0;
        EntityManager seqEM = null;
        try {
            IDatabaseConnector seqDb = DatabaseAccess.getSEQDatabaseConnector(true);
            seqEM = seqDb.createEntityManager();
            Counters counters = BioSequenceRetriever.retrieveBioSequences(seqEM, databank, proteins, true);
            persistedProteinsCount = counters.sum("persisted");
            counters.report(LOG);
        }
        finally {
            if (seqEM != null) {
                try {
                    seqEM.close();
                }
                catch (Exception exClose) {
                    LOG.error("Error closing SEQ Db EntityManager", (Throwable)exClose);
                }
            }
        }
        return persistedProteinsCount;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static Counters retrieveBioSequences(EntityManager seqEM, DDatabankInstance dDatabankInstance, Set<DDatabankProtein> proteins, boolean doApproximate) throws Exception {
        assert (dDatabankInstance != null) : "retrieveBioSequences() databankInstance is null";
        Counters counters = new Counters(dDatabankInstance.getName());
        Map<String, List<DDatabankProtein>> proteinsByIdentifier = BioSequenceRetriever.buildProteinsByIdentifierMap(proteins);
        String databankInstanceName = dDatabankInstance.getName();
        String sourcePath = dDatabankInstance.getSourcePath();
        String sourceFileName = FileUtils.extractFileName((String)sourcePath);
        String releaseRegex = null;
        String proteinIdentifierRegex = null;
        String release = null;
        ParsingRuleEntry parsingRule = ParsingRuleEntry.getParsingRuleEntry(sourceFileName);
        if (parsingRule != null) {
            releaseRegex = parsingRule.getFastaReleaseRegEx();
            proteinIdentifierRegex = parsingRule.getProteinAccRegEx();
            release = RegExUtil.parseReleaseVersion(sourceFileName, releaseRegex);
        }
        DatabankInstance seqDbDatabankInstance = BioSequenceRetriever.searchDatabankInstance(seqEM, dDatabankInstance, release);
        release = BioSequenceRetriever.getReleaseInformation(release, seqDbDatabankInstance);
        dDatabankInstance.setRelease(release);
        if (seqDbDatabankInstance != null) {
            BioSequenceRetriever.removeAlreadyPersistedIdentifiers(seqEM, seqDbDatabankInstance, proteinsByIdentifier);
        }
        if (!proteinsByIdentifier.isEmpty()) {
            Pattern proteinIdentifierPattern = null;
            if (proteinIdentifierRegex == null) {
                String defaultRegEx = SeqRepoConfig.getInstance().getDefaultProtAccRegEx();
                LOG.debug("Sequence source [{}] will be parsed with Default Protein Accession Regex {}", (Object)sourcePath, (Object)defaultRegEx);
                proteinIdentifierPattern = Pattern.compile(defaultRegEx, 2);
            } else {
                LOG.debug("Sequence source [{}] will be parsed using Protein Accession Regex {} ", (Object)sourceFileName, (Object)proteinIdentifierRegex);
                proteinIdentifierPattern = Pattern.compile(proteinIdentifierRegex, 2);
            }
            DataSource fastaSource = DATA_SOURCE_BUILDER.buildFastaSource(sourceFileName, proteinIdentifierPattern, null);
            if (fastaSource == null) {
                LOG.warn("No Fasta file found matching source path [{}]", (Object)sourcePath);
                if (doApproximate) {
                    LOG.info("Trying to find the closest matching filename to {}", (Object)sourceFileName);
                    File bestFastaFile = BioSequenceRetriever.selectBestMatchingFastaFile(sourceFileName, release, releaseRegex);
                    if (bestFastaFile != null) {
                        LOG.info("Trying to load [{}] sequences from [{}]", (Object)sourcePath, (Object)bestFastaFile.getAbsolutePath());
                        return BioSequenceRetriever.retrieveBioSequences(seqEM, new DDatabankInstance(databankInstanceName, null, bestFastaFile.getName()), proteins, false);
                    }
                    LOG.warn("No filename nearly matching to source path [{}] found", (Object)sourcePath);
                }
            } else {
                LOG.info("Searching {} BioSequences from sourcePath [{}]", (Object)proteinsByIdentifier.size(), (Object)sourcePath);
                Map<DDatabankProtein, String> foundSequences = fastaSource.retrieveSequences(proteinsByIdentifier);
                if (foundSequences != null && !foundSequences.isEmpty()) {
                    LOG.info("{} BioSequences have been extracted from sourcePath [{}]", (Object)foundSequences.size(), (Object)sourcePath);
                    Object object = SEQ_DB_WRITE_LOCK;
                    synchronized (object) {
                        EntityTransaction seqTransac = seqEM.getTransaction();
                        boolean transacOK = false;
                        try {
                            seqTransac.begin();
                            transacOK = false;
                            LOG.trace("SEQ Db WRITE Transaction begin");
                            long start = System.currentTimeMillis();
                            seqDbDatabankInstance = seqDbDatabankInstance == null ? BioSequenceRetriever.findOrCreateDatabankInstance(seqEM, dDatabankInstance, null, release, fastaSource.getLastModifiedTime()) : (DatabankInstance)seqEM.merge((Object)seqDbDatabankInstance);
                            Map<String, List<DatabankProtein>> existingProteins = BioSequenceRetriever.searchExistingProteins(seqEM, databankInstanceName, foundSequences.keySet());
                            LOG.debug("{} Proteins already exists in the databank {}", (Object)existingProteins.size(), (Object)seqDbDatabankInstance);
                            Map<String, BioSequence> existingBioSequences = BioSequenceRetriever.findExistingBioSequences(seqEM, foundSequences.values());
                            LOG.debug("{} BioSequence already exists in the SeqDB (compared by hash code)", (Object)existingBioSequences.size());
                            Repository repository = seqDbDatabankInstance.getDatabank().getRepository();
                            Map<String, RepositoryProtein> existingRepositoryIdents = null;
                            if (repository != null) {
                                String repositoryName = repository.getName();
                                existingRepositoryIdents = BioSequenceRetriever.loadExistingRepositoryIdentifiers(seqEM, repositoryName, foundSequences);
                                LOG.debug("Possible existing RepositoryIdentifiers : {}", (Object)existingRepositoryIdents.size());
                            }
                            RetrieverContext context = new RetrieverContext(seqEM, seqDbDatabankInstance, existingProteins, existingBioSequences, repository, existingRepositoryIdents, counters);
                            for (Map.Entry<DDatabankProtein, String> entry : foundSequences.entrySet()) {
                                DDatabankProtein protein = entry.getKey();
                                String sequence = entry.getValue();
                                BioSequenceRetriever.persistProteinIfNeeded(context, protein, sequence);
                            }
                            seqTransac.commit();
                            transacOK = true;
                            long duration = System.currentTimeMillis() - start;
                            LOG.debug("SeqDb WRITE Transaction committed : {} proteins persisted from [{}] in {} ms", new Object[]{counters.sum("persisted"), sourcePath, duration});
                        }
                        finally {
                            if (seqTransac != null && !transacOK) {
                                try {
                                    seqTransac.rollback();
                                }
                                catch (Exception ex) {
                                    LOG.error("Error rollbacking SEQ Db EntityManager Transaction", (Throwable)ex);
                                }
                            }
                        }
                    }
                }
            }
        }
        return counters;
    }

    private static String getReleaseInformation(String release, DatabankInstance databankInstance) {
        if (databankInstance != null) {
            if (release == null) {
                return databankInstance.getRelease();
            }
            String instanceRelease = databankInstance.getRelease();
            if (!release.equals(instanceRelease)) {
                throw new RuntimeException("Inconsistent Release version");
            }
        }
        return release;
    }

    private static File selectBestMatchingFastaFile(String sourceFileName, String release, String parsingRuleReleaseRegex) throws Exception {
        List<File> fastaFiles;
        String namePart;
        int releaseIndex;
        assert (sourceFileName != null) : "selectBestMatchingFastaFile() sourceFileName is null";
        File result = null;
        if (!(StringUtils.isEmpty((String)release) || (releaseIndex = sourceFileName.indexOf(release)) == -1 || StringUtils.isEmpty((String)(namePart = sourceFileName.substring(0, releaseIndex))) || (fastaFiles = DATA_SOURCE_BUILDER.locateFastaFile(namePart)) == null || fastaFiles.isEmpty())) {
            Map.Entry floorEntry;
            TreeMap<String, File> sortedFiles = new TreeMap<String, File>();
            for (File f : fastaFiles) {
                File oldFile;
                long lastModifiedTime = f.lastModified();
                String fRelease = null;
                if (parsingRuleReleaseRegex != null) {
                    fRelease = RegExUtil.parseReleaseVersion(f.getName(), parsingRuleReleaseRegex);
                }
                if (fRelease == null || StringUtils.isEmpty(fRelease)) {
                    fRelease = DateUtils.formatReleaseDate((Date)new Date(lastModifiedTime));
                }
                if ((oldFile = (File)sortedFiles.get(fRelease)) == null) {
                    sortedFiles.put(fRelease, f);
                    continue;
                }
                if (lastModifiedTime <= oldFile.lastModified()) continue;
                LOG.debug("Use latest version of [{}]", (Object)f.getAbsolutePath());
                sortedFiles.put(fRelease, f);
            }
            Map.Entry ceilingEntry = sortedFiles.ceilingEntry(release);
            if (ceilingEntry != null) {
                result = (File)ceilingEntry.getValue();
            }
            if (result == null && (floorEntry = sortedFiles.floorEntry(release)) != null) {
                result = (File)floorEntry.getValue();
            }
        }
        return result;
    }

    private static Map<String, List<DDatabankProtein>> buildProteinsByIdentifierMap(Set<DDatabankProtein> proteins) {
        assert (proteins != null) : "buildProteinsByIdentifierMap() proteins Set is null";
        HashMap<String, List<DDatabankProtein>> result = new HashMap<String, List<DDatabankProtein>>();
        for (DDatabankProtein sdi : proteins) {
            String identValue = sdi.getIdentifier();
            ArrayList<DDatabankProtein> identifiers = (ArrayList<DDatabankProtein>)result.get(identValue);
            if (identifiers == null) {
                identifiers = new ArrayList<DDatabankProtein>(1);
                result.put(identValue, identifiers);
            }
            identifiers.add(sdi);
        }
        return result;
    }

    private static DatabankInstance searchDatabankInstance(EntityManager seqEM, DDatabankInstance dDatabankInstance, String release) {
        assert (dDatabankInstance != null) : "searchDatabankInstance() dDatabankInstance is null";
        String seDbName = dDatabankInstance.getName();
        String sourcePath = dDatabankInstance.getSourcePath();
        DatabankInstance result = null;
        List<DatabankInstance> foundSEDbInstances = DatabankDao.findSEDbInstanceByNameAndSourcePath(seqEM, seDbName, sourcePath);
        if (foundSEDbInstances != null) {
            int nInstances = foundSEDbInstances.size();
            if (nInstances == 1) {
                result = foundSEDbInstances.get(0);
                LOG.info("DatabankInstance matching name:{} and sourcePath:{} found", (Object)seDbName, (Object)sourcePath);
            } else if (nInstances > 1) {
                LOG.warn("There are {} DatabankInstances in SeqDB matching name:{} and sourcePath:{}", new Object[]{nInstances, seDbName, sourcePath});
            }
        }
        if (result == null) {
            LOG.warn("DatabankInstance (name, sourcePath) not found or ambiguous in SeqDB, trying to search by release extracted from the fasta filename");
            if (release != null && !StringUtils.isEmpty((String)release)) {
                LOG.warn("Search DatabankInstance in SeqDB, from the name:{} and release:{}", (Object)seDbName, (Object)release);
                result = DatabankDao.findSEDbInstanceByNameAndRelease(seqEM, seDbName, release);
                if (result == null) {
                    LOG.warn("DatabankInstance (name, release) not found in SeqDB");
                }
            } else {
                LOG.warn("No Release information supplied, DatabankInstance cannot be found in the SeqDB");
            }
        }
        return result;
    }

    private static void removeAlreadyPersistedIdentifiers(EntityManager seqEM, DatabankInstance databankInstance, Map<String, List<DDatabankProtein>> proteinsByIdentifier) {
        assert (databankInstance != null) : "removeAlreadyPersistedIdentifiers() databankInstance is null";
        assert (proteinsByIdentifier != null && !proteinsByIdentifier.isEmpty()) : "removeAlreadyPersistedIdentifiers() invalid proteinsByIdentifier";
        Set<String> distinctIdentifiers = proteinsByIdentifier.keySet();
        List<DatabankProtein> proteinsInDatabank = DatabankProteinDao.findProteinsInDatabank(seqEM, databankInstance, distinctIdentifiers);
        int removedIdentifiersCount = 0;
        if (proteinsInDatabank != null && !proteinsInDatabank.isEmpty()) {
            for (DatabankProtein protein : proteinsInDatabank) {
                String identifier = protein.getIdentifier();
                if (proteinsByIdentifier.remove(identifier) == null) continue;
                ++removedIdentifiersCount;
            }
        }
        if (removedIdentifiersCount > 0) {
            LOG.info("{} already known identifiers removed from search list for DatabankInstance {}", (Object)removedIdentifiersCount, (Object)databankInstance);
        }
    }

    private static DatabankInstance findOrCreateDatabankInstance(EntityManager seqEM, DDatabankInstance dDatabankInstance, Databank databank, String release, Date lastModifiedTime) {
        assert (dDatabankInstance != null) : "findOrCreateDatabankInstance() dDatabankInstance is null";
        assert (lastModifiedTime != null) : "findOrCreateDatabankInstance() lastModifiedTime is null";
        String seDbRelease = null;
        String seDbName = dDatabankInstance.getName();
        DatabankInstance databankInstance = DatabankDao.findSEDbInstanceByNameAndRelease(seqEM, seDbName, seDbRelease = StringUtils.isEmpty((String)release) ? DateUtils.formatReleaseDate((Date)lastModifiedTime) : release);
        if (databankInstance == null) {
            databankInstance = new DatabankInstance();
            databankInstance.setRelease(seDbRelease);
            databankInstance.setSourcePath(dDatabankInstance.getSourcePath());
            databankInstance.setSourceLastModifiedTime(new Timestamp(lastModifiedTime.getTime()));
            Databank jpaDatabank = null;
            jpaDatabank = databank == null ? BioSequenceRetriever.findOrCreateDatabank(seqEM, seDbName) : (Databank)seqEM.merge((Object)databank);
            databankInstance.setDatabank(jpaDatabank);
            BioSequenceRetriever.persist(seqEM, databankInstance);
        }
        return databankInstance;
    }

    private static Databank findOrCreateDatabank(EntityManager seqEM, String databankName) {
        assert (!StringUtils.isEmpty((String)databankName)) : "findOrCreateDatabank() invalid databankName";
        Databank databank = DatabankDao.findSEDbByName(seqEM, databankName);
        if (databank == null) {
            databank = new Databank();
            databank.setName(databankName);
            databank.setAlphabet(Alphabet.AA);
            BioSequenceRetriever.persist(seqEM, databank);
        }
        return databank;
    }

    private static Map<String, List<DatabankProtein>> searchExistingProteins(EntityManager seqEM, String databankName, Set<DDatabankProtein> proteins) {
        List<DatabankProtein> foundProteins;
        assert (proteins != null) : "searchExistingProteins() proteins Map is null";
        HashMap<String, List<DatabankProtein>> result = new HashMap<String, List<DatabankProtein>>();
        HashSet<String> identifiers = new HashSet<String>();
        for (DDatabankProtein protein : proteins) {
            identifiers.add(protein.getIdentifier());
        }
        if (!identifiers.isEmpty() && (foundProteins = DatabankProteinDao.findProteinsInDatabankName(seqEM, databankName, identifiers)) != null && !foundProteins.isEmpty()) {
            for (DatabankProtein protein : foundProteins) {
                String identifier = protein.getIdentifier();
                ArrayList<DatabankProtein> proteinList = (ArrayList<DatabankProtein>)result.get(identifier);
                if (proteinList == null) {
                    proteinList = new ArrayList<DatabankProtein>();
                    result.put(identifier, proteinList);
                }
                proteinList.add(protein);
            }
        }
        return result;
    }

    private static Map<String, BioSequence> findExistingBioSequences(EntityManager seqEM, Collection<String> sequences) {
        List<BioSequence> foundBSs;
        assert (sequences != null) : "findExistingBioSequences() sequences collection is null";
        HashMap<String, BioSequence> result = new HashMap<String, BioSequence>();
        HashSet<String> hashesSet = new HashSet<String>();
        for (String sequence : sequences) {
            String hash = HashUtil.calculateSHA256(sequence);
            hashesSet.add(hash);
        }
        if (!hashesSet.isEmpty() && (foundBSs = BioSequenceDao.findBioSequenceByHashes(seqEM, hashesSet)) != null && !foundBSs.isEmpty()) {
            for (BioSequence bs : foundBSs) {
                String hash = bs.getHash();
                result.put(hash, bs);
            }
        }
        return result;
    }

    private static Map<String, RepositoryProtein> loadExistingRepositoryIdentifiers(EntityManager seqEM, String repositoryName, Map<DDatabankProtein, String> foundSequences) {
        List<RepositoryProtein> foundIdentifiers;
        assert (foundSequences != null) : "loadExistingRepositoryIdentifiers() foundSequences Map is null";
        HashMap<String, RepositoryProtein> result = new HashMap<String, RepositoryProtein>();
        HashSet<String> valuesSet = new HashSet<String>();
        Set<DDatabankProtein> identifiers = foundSequences.keySet();
        for (DDatabankProtein ident : identifiers) {
            String repositoryIdentValue = ident.getRepositoryIdentifier();
            if (repositoryIdentValue == null) continue;
            valuesSet.add(repositoryIdentValue);
        }
        if (!valuesSet.isEmpty() && (foundIdentifiers = RepositoryProteinDao.findRepositoryIdentByRepoNameAndValues(seqEM, repositoryName, valuesSet)) != null && !foundIdentifiers.isEmpty()) {
            for (RepositoryProtein ident : foundIdentifiers) {
                String repositoryIdentValue = ident.getValue();
                result.put(repositoryIdentValue, ident);
            }
        }
        return result;
    }

    private static void persistProteinIfNeeded(RetrieverContext context, DDatabankProtein protein, String sequence) {
        assert (context != null) : "persistProteinIfNeeded() context is null";
        assert (protein != null) : "persistProteinIfNeeded() protein is null";
        assert (sequence != null) : "persistProteinIfNeeded() sequence is null";
        String proteinIdentifier = protein.getIdentifier();
        List<DatabankProtein> matchingProteins = context.getExistingProteins().get(proteinIdentifier);
        if (matchingProteins == null || matchingProteins.isEmpty()) {
            LOG.trace("Persist new Protein Identifier [{}] in Databank {}", (Object)proteinIdentifier, (Object)context.getDatabankInstance());
            BioSequenceRetriever.persistDatabankProtein(context, protein, sequence);
            context.getCounters().inc("New Proteins persisted");
        } else {
            DatabankInstance databankInstance = context.getDatabankInstance();
            boolean sequenceMatched = false;
            for (DatabankProtein matchingProtein : matchingProteins) {
                String matchingSequence = matchingProtein.getBioSequence().getSequence();
                if (!sequence.equals(matchingSequence)) continue;
                if (sequenceMatched) {
                    String databankName = databankInstance.getDatabank().getName();
                    LOG.error("There are several proteins named [{}] with the same BioSequence for Databank [{}], this should not happen", (Object)proteinIdentifier, (Object)databankName);
                    continue;
                }
                sequenceMatched = true;
                DatabankInstance matchedDatabankInstance = matchingProtein.getDatabankInstance();
                if (DATABANK_INSTANCE_COMPARATOR.compare(matchedDatabankInstance, databankInstance) < 0) {
                    matchingProtein.setDatabankInstance(databankInstance);
                    if (matchingProtein.getDescription() == null || !matchingProtein.getDescription().equals(protein.getDescription())) {
                        matchingProtein.setDescription(protein.getDescription());
                    }
                    context.getCounters().inc("Already persisted Proteins was updated");
                    BioSequenceRetriever.updateRepositoryIdentifier(context, matchingProtein, protein);
                    continue;
                }
                context.getCounters().inc("Already persisted Proteins (but in a newer databank)");
            }
            if (!sequenceMatched) {
                LOG.trace("Persist new Protein [{}] because its Sequence is new", (Object)proteinIdentifier);
                BioSequenceRetriever.persistDatabankProtein(context, protein, sequence);
                context.getCounters().inc("New persisted Proteins (because of a new Sequence)");
            }
        }
    }

    private static DatabankProtein persistDatabankProtein(RetrieverContext context, DDatabankProtein dProtein, String sequence) {
        assert (context != null) : "persistDatabankProtein() context is null";
        assert (dProtein != null) : "persistDatabankProtein() dProtein is null";
        assert (sequence != null) : "persistDatabankProtein() sequence is null";
        DatabankProtein protein = new DatabankProtein();
        protein.setIdentifier(dProtein.getIdentifier());
        protein.setInferred(dProtein.isInferred());
        protein.setDescription(dProtein.getDescription());
        DatabankInstance seDbInstance = context.getDatabankInstance();
        protein.setDatabankInstance(seDbInstance);
        BioSequence bioSequence = BioSequenceRetriever.getOrCreateBioSequence(context, sequence);
        protein.setBioSequence(bioSequence);
        Repository repository = context.getRepository();
        String repositoryIdentValue = dProtein.getRepositoryIdentifier();
        if (repository != null && repositoryIdentValue != null) {
            RepositoryProtein repositoryIdent = BioSequenceRetriever.getOrCreateRepositoryIdentifier(context, repositoryIdentValue);
            protein.setRepositoryIdentifier(repositoryIdent);
        }
        BioSequenceRetriever.persist(context.getSeqEM(), protein);
        return protein;
    }

    private static BioSequence getOrCreateBioSequence(RetrieverContext context, String sequence) {
        assert (context != null) : "getOrCreateBioSequence() context is null";
        assert (sequence != null) : "getOrCreateBioSequence() sequence is null";
        String hash = HashUtil.calculateSHA256(sequence);
        Map<String, BioSequence> existingBioSequences = context.getExistingBioSequences();
        BioSequence bioSequence = existingBioSequences.get(hash);
        if (bioSequence == null) {
            bioSequence = new BioSequence();
            bioSequence.setSequence(sequence);
            bioSequence.setHash(hash);
            BioSequenceRetriever.persist(context.getSeqEM(), bioSequence);
            existingBioSequences.put(hash, bioSequence);
        }
        return bioSequence;
    }

    private static RepositoryProtein getOrCreateRepositoryIdentifier(RetrieverContext context, String value) {
        assert (context != null) : "getOrCreateRepositoryIdentifier() context is null";
        assert (value != null) : "getOrCreateRepositoryIdentifier() value is null";
        Map<String, RepositoryProtein> existingRepositoryIdents = context.getExistingRepositoryIdents();
        if (existingRepositoryIdents == null) {
            throw new IllegalArgumentException("RetrieverContext.existingRepositoryIdents Map is null");
        }
        RepositoryProtein repositoryProtein = existingRepositoryIdents.get(value);
        if (repositoryProtein == null) {
            repositoryProtein = new RepositoryProtein();
            repositoryProtein.setValue(value);
            Repository repository = context.getRepository();
            if (repository == null) {
                throw new IllegalArgumentException("RetrieverContext.repository is null");
            }
            repositoryProtein.setRepository(repository);
            BioSequenceRetriever.persist(context.getSeqEM(), repositoryProtein);
            existingRepositoryIdents.put(value, repositoryProtein);
        }
        return repositoryProtein;
    }

    private static void updateRepositoryIdentifier(RetrieverContext context, DatabankProtein protein, DDatabankProtein dProtein) {
        assert (context != null) : "updateRepositoryIdentifier() context is null";
        assert (protein != null) : "updateRepositoryIdentifier() protein is null";
        assert (dProtein != null) : "updateRepositoryIdentifier() dProtein is null";
        String repositoryIdentValue = dProtein.getRepositoryIdentifier();
        if (repositoryIdentValue == null) {
            protein.setRepositoryIdentifier(null);
        } else {
            boolean same = false;
            RepositoryProtein oldRepositoryIdent = protein.getRepositoryIdentifier();
            if (oldRepositoryIdent != null) {
                String oldRepositoryIdentValue = oldRepositoryIdent.getValue();
                same = repositoryIdentValue.equals(oldRepositoryIdentValue);
            }
            if (!same) {
                Repository repository = context.getRepository();
                if (repository == null) {
                    protein.setRepositoryIdentifier(null);
                } else {
                    String seDbIdentValue = protein.getIdentifier();
                    LOG.info("New RepositoryProtein [{}] for DatabankProtein [{}]", (Object)repositoryIdentValue, (Object)seDbIdentValue);
                    RepositoryProtein newRepositoryIdent = BioSequenceRetriever.getOrCreateRepositoryIdentifier(context, repositoryIdentValue);
                    protein.setRepositoryIdentifier(newRepositoryIdent);
                }
            }
        }
    }

    private static void persist(EntityManager em, Object o) {
        if (Constants.PERSISTENCE) {
            em.persist(o);
        }
    }
}

