/*
 * Decompiled with CFR 0.152.
 */
package fr.proline.module.seq.service;

import fr.profi.util.StringUtils;
import fr.proline.module.seq.dto.DDatabankProtein;
import fr.proline.module.seq.service.DataSource;
import fr.proline.module.seq.util.PeptideUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FastaSource
implements DataSource {
    private static final Logger LOG = LoggerFactory.getLogger(FastaSource.class);
    private static final int MESSAGE_BUILDER_SIZE = 1024;
    private final File m_fastaFile;
    private final Date m_sourceCreationTime = new Date();
    private final Pattern m_proteinIdentifierPattern;
    private final Pattern m_repositoryIdentPattern;

    public FastaSource(File fastaFile, Pattern proteinIdentifierPattern, Pattern repositoryIdentPattern) {
        assert (fastaFile != null && fastaFile.isFile()) : "Invalid fastaFile";
        assert (proteinIdentifierPattern != null) : "SEDbIdentPattern is null";
        this.m_fastaFile = fastaFile;
        this.m_proteinIdentifierPattern = proteinIdentifierPattern;
        this.m_repositoryIdentPattern = repositoryIdentPattern;
    }

    @Override
    public Date getLastModifiedTime() {
        Date result = null;
        long lastModified = this.m_fastaFile.lastModified();
        result = lastModified == 0L ? (Date)this.m_sourceCreationTime.clone() : new Date(lastModified);
        return result;
    }

    @Override
    public Map<DDatabankProtein, String> retrieveSequences(Map<String, List<DDatabankProtein>> proteinsByIdentifier) throws IOException {
        return this.parseFile(proteinsByIdentifier);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private Map<DDatabankProtein, String> parseFile(Map<String, List<DDatabankProtein>> proteinsByIdentifier) throws IOException {
        String fastaAbsolutePathname = this.m_fastaFile.getAbsolutePath();
        HashMap<DDatabankProtein, String> foundSequences = new HashMap<DDatabankProtein, String>();
        long lineIndex = 0L;
        BufferedReader reader = null;
        try {
            FileInputStream is = new FileInputStream(this.m_fastaFile);
            reader = new BufferedReader(new InputStreamReader((InputStream)is, "ISO-8859-1"));
            HashMap<String, List<DDatabankProtein>> remainingProteinIdentifiers = new HashMap<String, List<DDatabankProtein>>(proteinsByIdentifier);
            int remainingProteinIdentifiersCount = remainingProteinIdentifiers.size();
            DDatabankProtein currentProtein = null;
            StringBuilder sequenceBuilder = null;
            LOG.debug("Searching {} distinct Proteins with \"{}\" Regex in [{}] ", new Object[]{remainingProteinIdentifiersCount, this.m_proteinIdentifierPattern.pattern(), fastaAbsolutePathname});
            long start = System.currentTimeMillis();
            String rawLine = reader.readLine();
            while (rawLine != null) {
                String trimmedLine = rawLine.trim();
                if (!trimmedLine.isEmpty()) {
                    if (trimmedLine.startsWith(">")) {
                        if (currentProtein != null) {
                            FastaSource.addSequence(currentProtein, sequenceBuilder, foundSequences, remainingProteinIdentifiers);
                            currentProtein = null;
                            sequenceBuilder = null;
                        }
                        if (remainingProteinIdentifiers.isEmpty()) {
                            LOG.debug("All identifiers found from [" + fastaAbsolutePathname + ']');
                            break;
                        }
                        DDatabankProtein proteinIdentifier = this.checkHeader(rawLine, remainingProteinIdentifiers);
                        if (proteinIdentifier != null) {
                            currentProtein = proteinIdentifier;
                            sequenceBuilder = new StringBuilder();
                        }
                    } else if (sequenceBuilder != null) {
                        sequenceBuilder.append(trimmedLine);
                    }
                }
                ++lineIndex;
                rawLine = reader.readLine();
            }
            if (currentProtein != null) {
                FastaSource.addSequence(currentProtein, sequenceBuilder, foundSequences, remainingProteinIdentifiers);
            }
            long duration = System.currentTimeMillis() - start;
            String message = String.format("[%s] %d lines parsed in %d ms (%,.1f lines/s) found %d sequences on %d", fastaAbsolutePathname, lineIndex, duration, (double)(lineIndex * 1000L) / (double)duration, foundSequences.size(), remainingProteinIdentifiersCount);
            LOG.info(message);
        }
        finally {
            if (reader != null) {
                try {
                    reader.close();
                }
                catch (IOException exClose) {
                    LOG.error("Error closing [" + fastaAbsolutePathname + ']', (Throwable)exClose);
                }
            }
        }
        return foundSequences;
    }

    private static void addSequence(DDatabankProtein protein, StringBuilder sequenceBuilder, Map<DDatabankProtein, String> foundSequences, Map<String, List<DDatabankProtein>> remainingProteinIdentifiers) {
        int starIndex;
        String identifier = protein.getIdentifier();
        String normalizedSequence = sequenceBuilder.toString().toUpperCase();
        if (normalizedSequence.contains(" ")) {
            LOG.info("White spaces will be replaced by '' in the Sequence for [{}].", (Object)identifier);
            normalizedSequence = normalizedSequence.replaceAll("\\s+", "");
        }
        if ((starIndex = normalizedSequence.indexOf(42)) != -1) {
            normalizedSequence = normalizedSequence.substring(0, starIndex);
        }
        if (PeptideUtils.checkSequence(normalizedSequence)) {
            foundSequences.put(protein, normalizedSequence);
            remainingProteinIdentifiers.remove(identifier);
        } else {
            LOG.warn("Invalid Sequence for [{}] :\n{}", (Object)identifier, (Object)normalizedSequence);
        }
    }

    private DDatabankProtein checkHeader(String header, Map<String, List<DDatabankProtein>> remainingProteinIdentifiers) {
        DDatabankProtein foundProtein = null;
        String descriptionFromFasta = null;
        Matcher matcher = this.m_proteinIdentifierPattern.matcher(header);
        if (matcher.find()) {
            List<DDatabankProtein> possibleIdentifiers;
            if (matcher.groupCount() < 1) {
                throw new IllegalArgumentException("Invalid DatabankProtein Regex");
            }
            String fastaIdentifier = matcher.group(1).trim();
            if (header != null && !header.isEmpty() && header.trim().length() > fastaIdentifier.trim().length()) {
                descriptionFromFasta = header.substring(header.indexOf(fastaIdentifier) + fastaIdentifier.length()).trim();
            }
            if ((possibleIdentifiers = remainingProteinIdentifiers.get(fastaIdentifier)) != null && !possibleIdentifiers.isEmpty()) {
                for (DDatabankProtein sdi : possibleIdentifiers) {
                    String description = sdi.getDescription();
                    if (description == null || !header.contains(description)) continue;
                    foundProtein = sdi;
                    break;
                }
                if (foundProtein == null) {
                    LOG.trace("Cannot find a Protein with a matching description for [{}], trying to search for one with no description", (Object)fastaIdentifier);
                    for (DDatabankProtein sdi : possibleIdentifiers) {
                        if (sdi.getDescription() != null) continue;
                        LOG.trace("A Protein with no description is found for [{}]", (Object)possibleIdentifiers.size(), (Object)fastaIdentifier);
                        if (descriptionFromFasta != null && !descriptionFromFasta.isEmpty()) {
                            foundProtein = new DDatabankProtein(fastaIdentifier, descriptionFromFasta);
                            break;
                        }
                        foundProtein = sdi;
                        int nPossibleIdentifiers = possibleIdentifiers.size();
                        if (nPossibleIdentifiers <= 1) break;
                        foundProtein.setInferred(true);
                        LOG.trace("There are {} Proteins (inferred) for [{}] taking the first one with no description", (Object)nPossibleIdentifiers, (Object)fastaIdentifier);
                        break;
                    }
                }
                if (foundProtein == null) {
                    StringBuilder messageBuilder = new StringBuilder(1024);
                    messageBuilder.append("No valid description match for [").append(fastaIdentifier);
                    messageBuilder.append("] taking first protein (inferred)");
                    messageBuilder.append(StringUtils.LINE_SEPARATOR);
                    messageBuilder.append("Parsed FASTA Header, then expected protein descriptions :");
                    messageBuilder.append(StringUtils.LINE_SEPARATOR);
                    messageBuilder.append(header);
                    messageBuilder.append(StringUtils.LINE_SEPARATOR);
                    for (DDatabankProtein sdi : possibleIdentifiers) {
                        String description = sdi.getDescription();
                        if (description == null) {
                            messageBuilder.append("NULL");
                        } else {
                            messageBuilder.append('[').append(description).append(']');
                        }
                        messageBuilder.append(StringUtils.LINE_SEPARATOR);
                    }
                    LOG.trace(messageBuilder.toString());
                    foundProtein = new DDatabankProtein(fastaIdentifier, descriptionFromFasta);
                    foundProtein.setInferred(true);
                    LOG.warn("Arbitrarily select the first ProteinIdentifier as the one matching to the identifier parsed in the fasta file ??");
                }
                this.parseRepositoryIdent(header, foundProtein);
            }
        }
        return foundProtein;
    }

    protected void parseRepositoryIdent(String header, DDatabankProtein seDbIdentifier) {
        Matcher matcher;
        if (this.m_repositoryIdentPattern != null && (matcher = this.m_repositoryIdentPattern.matcher(header)).find()) {
            if (matcher.groupCount() < 1) {
                throw new IllegalArgumentException("Invalid RepositoryProtein Regex");
            }
            String repositoryIdent = matcher.group(1).trim();
            if (!repositoryIdent.isEmpty()) {
                seDbIdentifier.setRepositoryIdentifier(repositoryIdent);
            }
        }
    }
}

