package org.dice_group.grp.compression.rdf;

import com.github.jsonldjava.core.JsonLdConsts;
import grph.Grph;
import grph.in_memory.InMemoryGrph;
import it.unimi.dsi.fastutil.ints.IntIterator;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import org.apache.jena.atlas.lib.Chars;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.riot.RDFLanguages;
import org.dice_group.grp.compression.GRPWriter;
import org.dice_group.grp.exceptions.NotAllowedInRDFException;
import org.dice_group.grp.exceptions.NotSupportedException;
import org.dice_group.grp.grammar.Grammar;
import org.dice_group.grp.grammar.GrammarHelper;
import org.dice_group.grp.grammar.digram.Digram;
import org.dice_group.grp.grammar.digram.DigramHelper;
import org.dice_group.grp.grammar.digram.DigramOccurence;
import org.dice_group.grp.index.impl.IntBasedIndexer;
import org.dice_group.grp.util.BlankNodeIDGenerator;
import org.dice_group.grp.util.BoundedList;
import org.dice_group.grp.util.DigramOccurenceComparator;
import org.dice_group.grp.util.IndexedRDFNode;
import org.dice_group.grp.util.PTriple;
import org.dice_group.grp.util.Stats;
import org.rdfhdt.hdt.dictionary.DictionaryFactory;
import org.rdfhdt.hdt.dictionary.TempDictionary;
import org.rdfhdt.hdt.dictionary.impl.section.PFCDictionarySectionBig;
import org.rdfhdt.hdt.enums.RDFNotation;
import org.rdfhdt.hdt.enums.TripleComponentRole;
import org.rdfhdt.hdt.exceptions.ParserException;
import org.rdfhdt.hdt.options.HDTSpecification;
import org.rdfhdt.hdt.rdf.RDFParserCallback;
import org.rdfhdt.hdt.rdf.RDFParserFactory;
import org.rdfhdt.hdt.triples.TripleString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/dice_group/grp/compression/rdf/RDFCompressor.class */
public class RDFCompressor {
    public static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) RDFCompressor.class);
    private static final long THRESHOLD = 2;
    private static final long MAX = 2147483647L;
    private final boolean threaded;
    private boolean onlyKD;
    public DigramHelper dh3;
    private TempDictionary dict;

    public RDFCompressor() {
        this(false);
    }

    public RDFCompressor(boolean z) {
        this.onlyKD = true;
        this.dh3 = new DigramHelper();
        this.threaded = z;
    }

    public File compressRDF(File file, String str, Boolean bool, Boolean bool2) throws NotAllowedInRDFException, NotSupportedException, IOException, ExecutionException, InterruptedException {
        this.onlyKD = bool2.booleanValue();
        Stats.printMemStats();
        Stats.setCurrentFileName(file.getName());
        long timeInMillis = Calendar.getInstance().getTimeInMillis();
        ArrayList arrayList = new ArrayList();
        HDTSpecification hDTSpecification = new HDTSpecification();
        hDTSpecification.set("tempDictionary.impl", DictionaryFactory.MOD_DICT_IMPL_HASH_PSFC);
        this.dict = DictionaryFactory.createTempDictionary(hDTSpecification);
        SortedSet<PTriple> readFileToTriples = readFileToTriples(file, arrayList);
        System.out.println("reading took " + (Calendar.getInstance().getTimeInMillis() - timeInMillis) + " ms");
        Grammar createGrammar = createGrammar(readFileToTriples, file, arrayList);
        new HashMap();
        Stats.printMemStats();
        IntBasedIndexer intBasedIndexer = new IntBasedIndexer(this.dict);
        Grammar indexGrammar = intBasedIndexer.indexGrammar(createGrammar);
        Stats.printMemStats();
        System.gc();
        GRPWriter.save(str, indexGrammar, intBasedIndexer.getDict(), bool, Boolean.valueOf(this.threaded));
        GrammarHelper.reset();
        return new File(str);
    }

    public Grammar createGrammar(SortedSet<PTriple> sortedSet, File file, List<String> list) throws NotAllowedInRDFException, IOException {
        long size = sortedSet.size();
        long timeInMillis = Calendar.getInstance().getTimeInMillis();
        BoundedList boundedList = new BoundedList();
        Grph rdfToGrph = rdfToGrph(sortedSet, list, boundedList, new InMemoryGrph());
        Grammar grammar = new Grammar(rdfToGrph);
        grammar.setProps(boundedList);
        grammar.setSOIndex(list);
        GrammarHelper.setStartIndexForNT(boundedList.getHighestBound() + 1);
        System.out.println("converting took " + (Calendar.getInstance().getTimeInMillis() - timeInMillis) + " ms");
        if (this.onlyKD) {
            return grammar;
        }
        Map<Digram, Collection<DigramOccurence>> mappingVertex = this.dh3.getMappingVertex(rdfToGrph, boundedList);
        List<Digram> sortDigrambyFrequence = this.dh3.sortDigrambyFrequence(mappingVertex);
        System.out.println("Found " + mappingVertex.size() + " Digrams");
        this.dh3.removeOverlappingOcc(sortDigrambyFrequence, mappingVertex);
        System.out.println("Found " + sortDigrambyFrequence.size() + " non overlapping digrams");
        int i = 0;
        Iterator<Digram> it2 = sortDigrambyFrequence.iterator();
        while (it2.hasNext()) {
            int i2 = i;
            i++;
            System.out.println("Digram " + i2 + " has " + ((List) mappingVertex.get(it2.next())).size() + " Occurences.");
        }
        System.out.println("Prework done. Onto the algorithm...");
        HashSet hashSet = new HashSet();
        while (sortDigrambyFrequence.size() > 0) {
            Digram digram = sortDigrambyFrequence.get(0);
            if (hashSet.contains(digram)) {
                mappingVertex.remove(digram);
                sortDigrambyFrequence.remove(digram);
            } else {
                hashSet.add(digram);
                if (digram.getNoOfOccurences() <= 2) {
                    break;
                }
                if (digram.getNoOfOccurences() <= MAX) {
                    HashSet hashSet2 = new HashSet();
                    ArrayList arrayList = new ArrayList();
                    if (digram.getStructure() == 4) {
                        System.out.println();
                    }
                    Integer replaceAllOccurences = replaceAllOccurences((List) mappingVertex.get(digram), rdfToGrph, hashSet2, boundedList, arrayList);
                    System.out.println("Graph size " + rdfToGrph.getEdges().size());
                    if (arrayList == null || replaceAllOccurences == null) {
                        mappingVertex.remove(digram);
                        sortDigrambyFrequence.remove(digram);
                    } else {
                        grammar.getReplaced().put(digram, arrayList);
                        grammar.addRule(replaceAllOccurences, digram);
                        mappingVertex.remove(digram);
                        sortDigrambyFrequence.remove(digram);
                        mappingVertex.putAll(this.dh3.findNewMappingsVertex(rdfToGrph, hashSet2, boundedList));
                        long timeInMillis2 = Calendar.getInstance().getTimeInMillis();
                        sortDigrambyFrequence = this.dh3.sortDigrambyFrequence(mappingVertex);
                        System.out.println("Sorting took " + (Calendar.getInstance().getTimeInMillis() - timeInMillis2) + " ms");
                        long timeInMillis3 = Calendar.getInstance().getTimeInMillis();
                        this.dh3.removeOverlappingOcc(sortDigrambyFrequence, mappingVertex);
                        System.out.println("Find & Remove Overlapping took " + (Calendar.getInstance().getTimeInMillis() - timeInMillis3) + " ms");
                        System.out.println("Found " + sortDigrambyFrequence.size() + " non overlapping digrams");
                        int i3 = 0;
                        int i4 = 0;
                        Iterator<Digram> it3 = sortDigrambyFrequence.iterator();
                        while (it3.hasNext()) {
                            i3++;
                            i4 += ((List) mappingVertex.get(it3.next())).size();
                        }
                        HashMap hashMap = new HashMap();
                        IntIterator it4 = grammar.getStart().getEdges().iterator();
                        while (it4.hasNext()) {
                            String rDFNode = grammar.getProps().getBounded(it4.next().intValue()).getRDFNode();
                            if (rDFNode.startsWith(GrammarHelper.NON_TERMINAL_PREFIX)) {
                                Integer valueOf = Integer.valueOf(rDFNode.replace(GrammarHelper.NON_TERMINAL_PREFIX, ""));
                                hashMap.putIfAbsent(valueOf, 0);
                                hashMap.put(valueOf, Integer.valueOf(((Integer) hashMap.get(valueOf)).intValue() + 1));
                            }
                        }
                        System.out.println(i3 + " Digrams with " + i4 + " Occurences.");
                    }
                }
            }
        }
        PrintStream printStream = System.out;
        double size2 = (grammar.getStart().getEdges().size() * 1.0d) / size;
        printStream.println("Start size " + grammar.getStart().getEdges().size() + " to original size " + size + " [ratio: " + printStream + "]");
        System.out.println("No Of Rules " + grammar.getRules().size());
        System.out.println("Grammar compression took " + (Calendar.getInstance().getTimeInMillis() - timeInMillis) + " ms");
        System.out.println("Grammar done. Onto indexing & serialization...");
        grammar.setVSize(grammar.getVSize() + grammar.getRules().size());
        return grammar;
    }

    protected Integer replaceAllOccurences(List<DigramOccurence> list, Grph grph2, Set<Integer> set, BoundedList boundedList, List<DigramOccurence> list2) {
        if (list.isEmpty()) {
            return null;
        }
        IndexedRDFNode indexedRDFNode = new IndexedRDFNode();
        boolean z = true;
        Integer num = -1;
        Collections.sort(list, new DigramOccurenceComparator());
        String nextNonTerminal = GrammarHelper.getNextNonTerminal();
        for (DigramOccurence digramOccurence : list) {
            if (digramOccurence != null && digramOccurence.getExternals().size() <= 2) {
                Integer valueOf = Integer.valueOf(GrammarHelper.getNextNonTerminalInt());
                set.add(valueOf);
                if (z) {
                    num = valueOf;
                    indexedRDFNode.setLowerBound(valueOf.intValue());
                    indexedRDFNode.setRDFNode(nextNonTerminal);
                    boundedList.add(indexedRDFNode);
                    z = false;
                }
                indexedRDFNode.setUpperBound(valueOf.intValue());
                list2.add(digramOccurence);
                try {
                    grph2.removeEdge(digramOccurence.getOrigE1());
                    grph2.removeEdge(digramOccurence.getOrigE2());
                    for (Integer num2 : digramOccurence.getInternals()) {
                    }
                } catch (IllegalArgumentException e) {
                    e.printStackTrace();
                } catch (NullPointerException e2) {
                    e2.printStackTrace();
                    System.out.println(digramOccurence.getOrigE1());
                    System.out.println();
                }
                digramOccurence.getExternals();
                if (digramOccurence.getExternals().contains(3770)) {
                    System.out.println();
                }
                if (digramOccurence.getExternals().size() == 1) {
                    Integer num3 = digramOccurence.getExternals().get(0);
                    grph2.addSimpleEdge(num3.intValue(), valueOf.intValue(), num3.intValue(), true);
                }
                if (digramOccurence.getExternals().size() == 2) {
                    grph2.addSimpleEdge(digramOccurence.getExternals().get(0).intValue(), valueOf.intValue(), digramOccurence.getExternals().get(1).intValue(), true);
                }
            }
        }
        return num;
    }

    private Model readFileToModel(File file) throws FileNotFoundException {
        return ModelFactory.createDefaultModel().read(new FileReader(file), (String) null, RDFLanguages.filenameToLang(file.getName()).getLabel());
    }

    private SortedSet<PTriple> readFileToTriples(File file, List<String> list) throws FileNotFoundException {
        RDFLanguages.filenameToLang(file.getName());
        RDFNotation guess = RDFNotation.guess(file);
        RDFParserCallback parserCallback = RDFParserFactory.getParserCallback(guess);
        final TreeSet treeSet = new TreeSet();
        try {
            parserCallback.doParse(file.getAbsolutePath(), "", guess, new RDFParserCallback.RDFCallback() { // from class: org.dice_group.grp.compression.rdf.RDFCompressor.1
                @Override // org.rdfhdt.hdt.rdf.RDFParserCallback.RDFCallback
                public void processTriple(TripleString tripleString, long j) {
                    treeSet.add(new PTriple(tripleString.getSubject().toString(), tripleString.getPredicate().toString(), tripleString.getObject().toString()));
                }
            });
        } catch (ParserException e) {
            e.printStackTrace();
        }
        return treeSet;
    }

    private Grph rdfToGrph(SortedSet<PTriple> sortedSet, List<String> list, BoundedList boundedList, Grph grph2) {
        int i = 0;
        String str = null;
        HashMap hashMap = new HashMap();
        for (PTriple pTriple : sortedSet) {
            int nodeIndex = getNodeIndex(pTriple.getSubject(), list, hashMap);
            IndexedRDFNode indexedRDFNode = new IndexedRDFNode();
            indexedRDFNode.setRDFNode(pTriple.getPredicate());
            if (str == null || !str.equals(pTriple.getPredicate())) {
                indexedRDFNode.setLowerBound(i);
                indexedRDFNode.setUpperBound(i);
                boundedList.add(indexedRDFNode);
                str = pTriple.getPredicate();
            } else {
                IndexedRDFNode indexedRDFNode2 = boundedList.get(boundedList.size() - 1);
                indexedRDFNode2.setUpperBound(indexedRDFNode2.getUpperBound() + 1);
            }
            int i2 = i;
            i++;
            grph2.addSimpleEdge(nodeIndex, i2, getNodeIndex(pTriple.getObject(), list, hashMap), true);
            if (i % PFCDictionarySectionBig.BLOCK_PER_BUFFER == 0) {
                System.out.println("Converted " + i + " edges");
            }
        }
        BlankNodeIDGenerator.reset();
        sortedSet.clear();
        return grph2;
    }

    private int getNodeIndex(String str, List<String> list, Map<Integer, Integer> map) {
        int intValue = addObject(str, this.dict).intValue();
        if (intValue <= list.size()) {
            System.out.print("");
        } else if (str.startsWith(JsonLdConsts.BLANK_NODE_PREFIX)) {
            list.add("_:" + BlankNodeIDGenerator.getID(str));
        } else {
            list.add(str);
        }
        return intValue - 1;
    }

    private Long addObject(String str, TempDictionary tempDictionary) {
        Long valueOf = (str.startsWith("\"") || str.startsWith(Chars.S_QUOTE1)) ? Long.valueOf(tempDictionary.insert(str, TripleComponentRole.OBJECT)) : str.startsWith(JsonLdConsts.BLANK_NODE_PREFIX) ? Long.valueOf(tempDictionary.insert("_:" + BlankNodeIDGenerator.getID(str), TripleComponentRole.OBJECT)) : Long.valueOf(tempDictionary.insert(str, TripleComponentRole.OBJECT));
        if (valueOf.longValue() == -1) {
            System.out.println();
        }
        return valueOf;
    }

    private String escape(String str) {
        return str.replace("\"", "\\\"").replace("_", "\\_").replace("-", "\\-").trim();
    }
}
