package org.biopax.paxtools.normalizer;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import org.apache.commons.lang3.StringUtils;
import org.biopax.paxtools.controller.ModelUtils;
import org.biopax.paxtools.controller.ShallowCopy;
import org.biopax.paxtools.converter.LevelUpgrader;
import org.biopax.paxtools.io.SimpleIOHandler;
import org.biopax.paxtools.model.BioPAXElement;
import org.biopax.paxtools.model.BioPAXLevel;
import org.biopax.paxtools.model.Model;
import org.biopax.paxtools.model.level3.BioSource;
import org.biopax.paxtools.model.level3.ControlledVocabulary;
import org.biopax.paxtools.model.level3.EntityReference;
import org.biopax.paxtools.model.level3.Named;
import org.biopax.paxtools.model.level3.NucleicAcidReference;
import org.biopax.paxtools.model.level3.ProteinReference;
import org.biopax.paxtools.model.level3.Provenance;
import org.biopax.paxtools.model.level3.PublicationXref;
import org.biopax.paxtools.model.level3.RelationshipTypeVocabulary;
import org.biopax.paxtools.model.level3.RelationshipXref;
import org.biopax.paxtools.model.level3.SimplePhysicalEntity;
import org.biopax.paxtools.model.level3.SmallMoleculeReference;
import org.biopax.paxtools.model.level3.UnificationXref;
import org.biopax.paxtools.model.level3.XReferrable;
import org.biopax.paxtools.model.level3.Xref;
import org.biopax.paxtools.util.BPCollections;
import org.biopax.paxtools.util.ClassFilterSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/biopax/paxtools/normalizer/Normalizer.class */
public final class Normalizer {
    private static final Logger log = LoggerFactory.getLogger((Class<?>) Normalizer.class);
    private boolean fixDisplayName;
    private String xmlBase;
    public static final String PROPERTY_NORMALIZER_URI_STRATEGY = "biopax.normalizer.uri.strategy";
    public static final String VALUE_NORMALIZER_URI_STRATEGY_SIMPLE = "simple";
    public static final String VALUE_NORMALIZER_URI_STRATEGY_MD5 = "md5";
    private String description = "";
    private SimpleIOHandler biopaxReader = new SimpleIOHandler(BioPAXLevel.L3);

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/biopax/paxtools/normalizer/Normalizer$NormalizerMap.class */
    public static class NormalizerMap {
        final Model model;
        final Map<BioPAXElement, BioPAXElement> subs = BPCollections.I.createMap();
        final Map<String, BioPAXElement> uriToSub = BPCollections.I.createMap();
        final ShallowCopy copier = new ShallowCopy();

        NormalizerMap(Model model) {
            this.model = model;
        }

        void put(BioPAXElement bioPAXElement, String str) {
            if (this.model.containsID(str)) {
                map(bioPAXElement, this.model.getByID(str));
            } else if (this.uriToSub.containsKey(str)) {
                map(bioPAXElement, this.uriToSub.get(str));
            } else {
                map(bioPAXElement, this.copier.copy(bioPAXElement, str));
            }
        }

        void doSubs() {
            Iterator<BioPAXElement> it = this.subs.keySet().iterator();
            while (it.hasNext()) {
                this.model.remove(it.next());
            }
            try {
                ModelUtils.replace(this.model, this.subs);
                for (BioPAXElement bioPAXElement : this.subs.values()) {
                    if (!this.model.contains(bioPAXElement)) {
                        this.model.add(bioPAXElement);
                    }
                }
                Iterator<BioPAXElement> it2 = this.model.getObjects().iterator();
                while (it2.hasNext()) {
                    ModelUtils.fixDanglingInverseProperties(it2.next(), this.model);
                }
            } catch (Exception e) {
                Normalizer.log.error("Failed to replace BioPAX elements.", (Throwable) e);
            }
        }

        private void map(BioPAXElement bioPAXElement, BioPAXElement bioPAXElement2) {
            this.subs.put(bioPAXElement, bioPAXElement2);
            this.uriToSub.put(bioPAXElement2.getUri(), bioPAXElement2);
        }
    }

    public Normalizer() {
        this.biopaxReader.mergeDuplicates(true);
        this.fixDisplayName = true;
        this.xmlBase = "";
    }

    public String normalize(String str) {
        if (str == null || str.length() == 0) {
            throw new IllegalArgumentException("no data. " + this.description);
        }
        try {
            Model convertFromOWL = this.biopaxReader.convertFromOWL(new ByteArrayInputStream(str.replaceAll("taxonXref", "xref").getBytes("UTF-8")));
            if (convertFromOWL == null) {
                throw new IllegalArgumentException("Failed to create Model! " + this.description);
            }
            if (convertFromOWL.getLevel() != BioPAXLevel.L3) {
                log.info("Converting model to BioPAX Level3...");
                convertFromOWL = new LevelUpgrader().filter(convertFromOWL);
            }
            normalize(convertFromOWL);
            return convertToOWL(convertFromOWL);
        } catch (UnsupportedEncodingException e) {
            throw new IllegalArgumentException("Failed! " + this.description, e);
        }
    }

    private void normalizeXrefs(Model model) {
        NormalizerMap normalizerMap = new NormalizerMap(model);
        String xmlBase = getXmlBase(model);
        for (Xref xref : new HashSet(model.getObjects(Xref.class))) {
            if (xref.getDb() != null && xref.getId() != null) {
                xref.setDb(xref.getDb().toLowerCase());
                String id = xref.getId();
                if (xref instanceof RelationshipXref) {
                    if (xref.getUri().startsWith("http://identifiers.org/")) {
                        RelationshipTypeVocabulary relationshipType = ((RelationshipXref) xref).getRelationshipType();
                        if (xref.getIdVersion() != null) {
                            id = id + "_" + xref.getIdVersion();
                        }
                        if (relationshipType != null && !relationshipType.getTerm().isEmpty()) {
                            id = id + "_" + StringUtils.join((Iterable<?>) relationshipType.getTerm(), '_').toLowerCase();
                        }
                    }
                } else if (xref instanceof UnificationXref) {
                    try {
                        xref.setDb(MiriamLink.getName(xref.getDb()).toLowerCase());
                        if (xref.getDb().startsWith("uniprot")) {
                            if (isValidDbId("uniprot isoform", xref.getId()) && xref.getId().contains("-")) {
                                xref.setDb("uniprot isoform");
                            } else if (xref.getDb().equals("uniprot isoform")) {
                                if (xref.getIdVersion() != null && xref.getIdVersion().matches("^\\d+$")) {
                                    id = xref.getId() + "-" + xref.getIdVersion();
                                }
                                if (isValidDbId(xref.getDb(), id)) {
                                    xref.setId(id);
                                    xref.setIdVersion(null);
                                } else if (!isValidDbId(xref.getDb(), xref.getId())) {
                                    xref.setDb("uniprot knowledgebase");
                                }
                                id = xref.getId();
                            }
                        }
                    } catch (IllegalArgumentException e) {
                        if (xref.getIdVersion() != null) {
                            id = id + "_" + xref.getIdVersion();
                        }
                        normalizerMap.put(xref, uri(xmlBase, xref.getDb(), id, xref.getModelInterface()));
                    }
                }
                normalizerMap.put(xref, uri(xmlBase, xref.getDb(), id, xref.getModelInterface()));
            }
        }
        normalizerMap.doSubs();
    }

    private boolean isValidDbId(String str, String str2) {
        return MiriamLink.checkRegExp(str2, str);
    }

    public static String uri(String str, String str2, String str3, Class<? extends BioPAXElement> cls) {
        if (cls == null || (str2 == null && str3 == null)) {
            throw new IllegalArgumentException("'Either type' is null, or both dbName and idPart are nulls.");
        }
        if (str3 != null) {
            str3 = str3.trim();
        }
        if (str2 != null) {
            str2 = str2.trim();
        }
        if (str2 != null) {
            try {
                str2 = MiriamLink.getName(str2);
                if ((cls.equals(PublicationXref.class) && "pubmed".equalsIgnoreCase(str2)) || cls.equals(RelationshipTypeVocabulary.class) || ProteinReference.class.isAssignableFrom(cls) || SmallMoleculeReference.class.isAssignableFrom(cls) || (cls.equals(BioSource.class) && "taxonomy".equalsIgnoreCase(str2) && str3 != null && str3.matches("^\\d+$"))) {
                    return MiriamLink.getIdentifiersOrgURI(str2, str3);
                }
            } catch (IllegalArgumentException e) {
                log.info(String.format("uri(for a %s): db:%s, id:%s are not standard; %s)", cls.getSimpleName(), str2, str3, e.getMessage()));
            }
        }
        StringBuilder sb = new StringBuilder();
        if (str2 != null) {
            sb.append(str2.toLowerCase());
        }
        if (str3 != null) {
            if (str2 != null) {
                sb.append("_");
            }
            sb.append(str3);
        }
        String sb2 = sb.toString();
        return (str != null ? str : "") + cls.getSimpleName() + "_" + ((VALUE_NORMALIZER_URI_STRATEGY_SIMPLE.equals(System.getProperty(PROPERTY_NORMALIZER_URI_STRATEGY, VALUE_NORMALIZER_URI_STRATEGY_MD5)) || Xref.class.isAssignableFrom(cls)) ? sb2.replaceAll("[^-\\w]", "_") : ModelUtils.md5hex(sb2));
    }

    public String getDescription() {
        return this.description;
    }

    public void setDescription(String str) {
        this.description = str;
    }

    private void fixDisplayName(Model model) {
        log.info("Trying to auto-fix 'null' displayName...");
        for (Named named : model.getObjects(Named.class)) {
            if (named.getDisplayName() == null) {
                if (named.getStandardName() != null) {
                    named.setDisplayName(named.getStandardName());
                    log.info(named + " displayName auto-fix: " + named.getDisplayName() + ". " + this.description);
                } else if (!named.getName().isEmpty()) {
                    String next = named.getName().iterator().next();
                    for (String str : named.getName()) {
                        if (str.length() < next.length()) {
                            next = str;
                        }
                    }
                    named.setDisplayName(next);
                    log.info(named + " displayName auto-fix: " + next + ". " + this.description);
                }
            }
        }
        for (EntityReference entityReference : model.getObjects(EntityReference.class)) {
            for (SimplePhysicalEntity simplePhysicalEntity : entityReference.getEntityReferenceOf()) {
                if (simplePhysicalEntity.getDisplayName() == null || simplePhysicalEntity.getDisplayName().trim().length() == 0) {
                    if (entityReference.getDisplayName() != null && entityReference.getDisplayName().trim().length() > 0) {
                        simplePhysicalEntity.setDisplayName(entityReference.getDisplayName());
                    }
                }
            }
        }
    }

    private String convertToOWL(Model model) {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        new SimpleIOHandler(model.getLevel()).convertToOWL(model, byteArrayOutputStream);
        return byteArrayOutputStream.toString();
    }

    private Collection<UnificationXref> getUnificationXrefsSorted(XReferrable xReferrable) {
        ArrayList arrayList = new ArrayList();
        Iterator<F> it = new ClassFilterSet(xReferrable.getXref(), UnificationXref.class).iterator();
        while (it.hasNext()) {
            UnificationXref unificationXref = (UnificationXref) it.next();
            if (unificationXref.getDb() != null && unificationXref.getId() != null) {
                arrayList.add(unificationXref);
            }
        }
        Collections.sort(arrayList, new Comparator<UnificationXref>() { // from class: org.biopax.paxtools.normalizer.Normalizer.1
            @Override // java.util.Comparator
            public int compare(UnificationXref unificationXref2, UnificationXref unificationXref3) {
                return (unificationXref2.getDb() + unificationXref2.getId()).compareTo(unificationXref3.getDb() + unificationXref3.getId());
            }
        });
        return arrayList;
    }

    private UnificationXref findPreferredUnificationXref(XReferrable xReferrable) {
        UnificationXref unificationXref = null;
        Collection<UnificationXref> unificationXrefsSorted = getUnificationXrefsSorted(xReferrable);
        if (xReferrable instanceof ProteinReference) {
            unificationXref = findSingleUnificationXref(unificationXrefsSorted, "uniprot");
            if (unificationXref == null) {
                unificationXref = findSingleUnificationXref(unificationXrefsSorted, "refseq");
            }
        } else if (xReferrable instanceof SmallMoleculeReference) {
            unificationXref = findSingleUnificationXref(unificationXrefsSorted, "chebi");
            if (unificationXref == null) {
                unificationXref = findSingleUnificationXref(unificationXrefsSorted, "pubchem");
            }
        } else if (xReferrable instanceof NucleicAcidReference) {
            unificationXref = findSingleUnificationXref(unificationXrefsSorted, "ncbi gene");
            if (unificationXref == null) {
                unificationXref = findSingleUnificationXref(unificationXrefsSorted, "entrez");
            }
        } else if (unificationXrefsSorted.size() == 1) {
            unificationXref = unificationXrefsSorted.iterator().next();
        }
        return unificationXref;
    }

    private UnificationXref findSingleUnificationXref(Collection<UnificationXref> collection, String str) {
        UnificationXref unificationXref = null;
        Iterator<UnificationXref> it = collection.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            UnificationXref next = it.next();
            if ((next instanceof UnificationXref) && next.getId() != null && next.getDb() != null && next.getDb().toLowerCase().startsWith(str)) {
                if (unificationXref != null) {
                    if (unificationXref.getDb().equalsIgnoreCase(next.getDb()) && !unificationXref.getId().equals(next.getId())) {
                        unificationXref = null;
                        break;
                    }
                } else {
                    unificationXref = next;
                }
            }
        }
        return unificationXref;
    }

    public void normalize(Model model) {
        if (model.getLevel() != BioPAXLevel.L3) {
            throw new IllegalArgumentException("Not Level3 model. Consider converting it first (e.g., with the PaxTools).");
        }
        if (this.xmlBase != null && !this.xmlBase.isEmpty()) {
            model.setXmlBase(this.xmlBase);
        }
        log.info("Normalizing xrefs..." + this.description);
        normalizeXrefs(model);
        if (this.fixDisplayName) {
            log.info("Normalizing display names..." + this.description);
            fixDisplayName(model);
        }
        log.info("Normalizing CVs..." + this.description);
        normalizeCVs(model);
        log.info("Normalizing organisms..." + this.description);
        normalizeBioSources(model);
        Iterator it = new HashSet(model.getObjects(SimplePhysicalEntity.class)).iterator();
        while (it.hasNext()) {
            ModelUtils.addMissingEntityReference(model, (SimplePhysicalEntity) it.next());
        }
        log.info("Normalizing entity references..." + this.description);
        normalizeERs(model);
        log.info("Repairing..." + this.description);
        model.repair();
        log.info("Optional tasks (reasoning)..." + this.description);
    }

    private void normalizeCVs(Model model) {
        NormalizerMap normalizerMap = new NormalizerMap(model);
        for (ControlledVocabulary controlledVocabulary : model.getObjects(ControlledVocabulary.class)) {
            UnificationXref findPreferredUnificationXref = findPreferredUnificationXref(controlledVocabulary);
            if (findPreferredUnificationXref != null) {
                normalizerMap.put(controlledVocabulary, uri(this.xmlBase, findPreferredUnificationXref.getDb(), findPreferredUnificationXref.getId(), controlledVocabulary.getModelInterface()));
            } else if (controlledVocabulary.getTerm().isEmpty()) {
                log.info("Cannot normalize " + controlledVocabulary.getModelInterface().getSimpleName() + " : no unification xrefs nor terms found in " + controlledVocabulary.getUri() + ". " + this.description);
            } else {
                normalizerMap.put(controlledVocabulary, uri(this.xmlBase, null, controlledVocabulary.getTerm().iterator().next(), controlledVocabulary.getModelInterface()));
            }
        }
        normalizerMap.doSubs();
    }

    private void normalizeBioSources(Model model) {
        NormalizerMap normalizerMap = new NormalizerMap(model);
        for (BioSource bioSource : model.getObjects(BioSource.class)) {
            UnificationXref findPreferredUnificationXref = findPreferredUnificationXref(bioSource);
            if (findPreferredUnificationXref == null || !(findPreferredUnificationXref.getDb().toLowerCase().contains("taxonomy") || findPreferredUnificationXref.getDb().equalsIgnoreCase("newt"))) {
                log.debug("Won't normalize BioSource : no taxonomy unification xref found in " + bioSource.getUri() + ". " + this.description);
            } else {
                String id = findPreferredUnificationXref.getId();
                if (bioSource.getTissue() != null && !bioSource.getTissue().getTerm().isEmpty()) {
                    id = id + "_" + bioSource.getTissue().getTerm().iterator().next();
                }
                if (bioSource.getCellType() != null && !bioSource.getCellType().getTerm().isEmpty()) {
                    id = id + "_" + bioSource.getCellType().getTerm().iterator().next();
                }
                normalizerMap.put(bioSource, (id.equals(findPreferredUnificationXref.getId()) && id.matches("^\\d+$")) ? uri(this.xmlBase, findPreferredUnificationXref.getDb(), id, BioSource.class) : "http://identifiers.org/taxonomy/" + id);
            }
        }
        normalizerMap.doSubs();
    }

    private void normalizeERs(Model model) {
        NormalizerMap normalizerMap = new NormalizerMap(model);
        for (EntityReference entityReference : model.getObjects(EntityReference.class)) {
            if (entityReference.getUri().startsWith("http://identifiers.org/")) {
                log.info("Skip already normalized: " + entityReference.getUri());
            } else {
                UnificationXref findPreferredUnificationXref = findPreferredUnificationXref(entityReference);
                if (findPreferredUnificationXref != null) {
                    String db = findPreferredUnificationXref.getDb();
                    String id = findPreferredUnificationXref.getId();
                    try {
                        String identifiersOrgURI = MiriamLink.getIdentifiersOrgURI(db, id);
                        if (identifiersOrgURI != null) {
                            normalizerMap.put(entityReference, identifiersOrgURI);
                        }
                    } catch (Exception e) {
                        log.error("Cannot get a Miriam standard ID for " + entityReference + " (" + entityReference.getModelInterface().getSimpleName() + ") , using " + db + ":" + id + ". " + e.getMessage());
                        return;
                    }
                } else {
                    log.info("Cannot normalize EntityReference: no unification xrefs found in " + entityReference.getUri() + ". " + this.description);
                }
            }
        }
        normalizerMap.doSubs();
    }

    public static void autoName(Provenance provenance) {
        if (!provenance.getUri().startsWith("urn:miriam:") && !provenance.getUri().startsWith("http://identifiers.org/") && provenance.getName().isEmpty()) {
            log.info("Skipping: cannot normalize Provenance: " + provenance.getUri());
            return;
        }
        TreeSet treeSet = new TreeSet();
        String uri = (provenance.getUri().startsWith("urn:miriam:") || provenance.getUri().startsWith("http://identifiers.org/")) ? provenance.getUri() : provenance.getStandardName() != null ? provenance.getStandardName() : provenance.getDisplayName();
        if (uri != null) {
            try {
                treeSet.addAll(Arrays.asList(MiriamLink.getNames(uri)));
                provenance.setStandardName(MiriamLink.getName(uri));
                provenance.addComment(MiriamLink.getDataTypeDef(provenance.getStandardName()));
            } catch (IllegalArgumentException e) {
            }
        }
        if (treeSet.isEmpty()) {
            Iterator<String> it = provenance.getName().iterator();
            while (it.hasNext()) {
                try {
                    treeSet.addAll(Arrays.asList(MiriamLink.getNames(it.next())));
                } catch (IllegalArgumentException e2) {
                }
            }
            if (!treeSet.isEmpty()) {
                provenance.setStandardName(MiriamLink.getName((String) treeSet.iterator().next()));
            }
        }
        Iterator it2 = treeSet.iterator();
        while (it2.hasNext()) {
            provenance.addName((String) it2.next());
        }
        if (provenance.getDisplayName() == null) {
            provenance.setDisplayName(provenance.getStandardName());
        }
    }

    public static String convertToLevel3(String str) {
        String str2 = "";
        try {
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(str.getBytes());
            SimpleIOHandler simpleIOHandler = new SimpleIOHandler();
            simpleIOHandler.mergeDuplicates(true);
            Model convertFromOWL = simpleIOHandler.convertFromOWL(byteArrayInputStream);
            if (convertFromOWL.getLevel() != BioPAXLevel.L3) {
                log.info("Converting to BioPAX Level3... " + convertFromOWL.getXmlBase());
                Model filter = new LevelUpgrader().filter(convertFromOWL);
                if (filter != null) {
                    simpleIOHandler.setFactory(filter.getLevel().getDefaultFactory());
                    simpleIOHandler.convertToOWL(filter, byteArrayOutputStream);
                    str2 = byteArrayOutputStream.toString();
                }
            } else {
                str2 = str;
            }
            return str2;
        } catch (Exception e) {
            throw new RuntimeException("Cannot convert to BioPAX Level3", e);
        }
    }

    private String getXmlBase(Model model) {
        return this.xmlBase != null ? this.xmlBase : "";
    }

    public boolean isFixDisplayName() {
        return this.fixDisplayName;
    }

    public void setFixDisplayName(boolean z) {
        this.fixDisplayName = z;
    }

    public String getXmlBase() {
        return this.xmlBase;
    }

    public void setXmlBase(String str) {
        this.xmlBase = str;
    }
}
