package org.dice_research.rdf.examples;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.jena.atlas.lib.ProgressMonitor;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.system.ProgressStreamRDF;
import org.apache.jena.riot.system.StreamRDFLib;
import org.apache.jena.vocabulary.RDF;
import org.dice_research.rdf.stream.collect.RDFStreamCollector;
import org.dice_research.rdf.stream.collect.RDFStreamGroupByCollector;
import org.dice_research.rdf.stream.filter.NodeFilterBasedTripleFilter;
import org.dice_research.rdf.stream.filter.RDFStreamTripleFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/dice_research/rdf/examples/FCTrainTestSplitter.class */
public class FCTrainTestSplitter {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) FCTrainTestSplitter.class);

    public static void main(String[] strArr) throws IOException {
        if (strArr.length < 4) {
            System.err.println("Error: wrong usage. FCTrainTestSplitter <input-file> <training-file> <test-file> <test-file-size> [seed]");
            return;
        }
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        int parseInt = Integer.parseInt(strArr[3]) / 2;
        long parseLong = strArr.length >= 5 ? Long.parseLong(strArr[4]) : System.currentTimeMillis();
        String absolutePath = File.createTempFile("True", ".nt").getAbsolutePath();
        String absolutePath2 = File.createTempFile("False", ".nt").getAbsolutePath();
        splitInputFile(str, absolutePath, absolutePath2, selectTrueStmts(str));
        String[] strArr2 = {absolutePath, absolutePath2};
        int i = 0;
        while (i < strArr2.length) {
            HashMap hashMap = new HashMap();
            groupStmtsByPredicate(strArr2[i], hashMap);
            splitInputFile(strArr2[i], str3, str2, selectTestStmts(hashMap, parseInt, parseLong), i != 0);
            i++;
        }
        LOGGER.info("Finished");
    }

    public static Set<String> selectTrueStmts(String str) {
        HashSet hashSet = new HashSet();
        RDFStreamTripleFilter rDFStreamTripleFilter = new RDFStreamTripleFilter(new NodeFilterBasedTripleFilter(null, node -> {
            return "http://swc2017.aksw.org/hasTruthValue".equals(node.getURI());
        }, node2 -> {
            return node2.isLiteral() && "1.0".equals(node2.getLiteral().getLexicalForm());
        }), new RDFStreamCollector(triple -> {
            return triple.getSubject().getURI();
        }, hashSet));
        ProgressMonitor create = ProgressMonitor.create(LOGGER, "Processed triples", 1000L, 10);
        ProgressStreamRDF progressStreamRDF = new ProgressStreamRDF(rDFStreamTripleFilter, create);
        LOGGER.info("Streaming data to select true triples...");
        create.start();
        progressStreamRDF.start();
        RDFDataMgr.parse(progressStreamRDF, str, Lang.NT);
        create.finish();
        progressStreamRDF.finish();
        return hashSet;
    }

    public static void groupStmtsByPredicate(String str, Map<String, Set<String>> map) {
        RDFStreamGroupByCollector rDFStreamGroupByCollector = new RDFStreamGroupByCollector(triple -> {
            return triple.getObject().getURI();
        }, triple2 -> {
            return triple2.getSubject().getURI();
        }, HashSet::new, map);
        String uri = RDF.predicate.getURI();
        RDFStreamTripleFilter rDFStreamTripleFilter = new RDFStreamTripleFilter(triple3 -> {
            return uri.equals(triple3.getPredicate().getURI());
        }, rDFStreamGroupByCollector);
        ProgressMonitor create = ProgressMonitor.create(LOGGER, "Processed triples", 1000L, 10);
        ProgressStreamRDF progressStreamRDF = new ProgressStreamRDF(rDFStreamTripleFilter, create);
        LOGGER.info("Streaming data to analyze predicates...");
        create.start();
        progressStreamRDF.start();
        RDFDataMgr.parse(progressStreamRDF, str, Lang.NT);
        create.finish();
        progressStreamRDF.finish();
    }

    public static Set<String> selectTestStmts(Map<String, Set<String>> map, int i, long j) {
        HashSet hashSet = new HashSet();
        int i2 = 0;
        int i3 = 0;
        for (String str : map.keySet()) {
            Set<String> set = map.get(str);
            if (set.size() <= 1) {
                hashSet.add(str);
            } else {
                i2++;
                i3 += set.size();
            }
        }
        int i4 = i3 - i2;
        if (i4 < i) {
            throw new IllegalArgumentException("The given test file size is too high. The maximum test file size is " + i4 + ".");
        }
        HashSet hashSet2 = new HashSet();
        Random random = new Random(j);
        for (String str2 : map.keySet()) {
            if (!hashSet.contains(str2)) {
                boolean z = true;
                for (String str3 : map.get(str2)) {
                    if (z) {
                        z = false;
                    } else {
                        if (random.nextInt(i4) < i) {
                            hashSet2.add(str3);
                            i--;
                        }
                        i4--;
                    }
                }
            }
        }
        return hashSet2;
    }

    public static void splitInputFile(String str, String str2, String str3, Set<String> set) throws IOException {
        splitInputFile(str, str2, str3, set, false);
    }

    public static void splitInputFile(String str, String str2, String str3, Set<String> set, boolean z) throws IOException {
        FileWriter fileWriter = new FileWriter(str2, z);
        try {
            FileWriter fileWriter2 = new FileWriter(str3, z);
            try {
                RDFStreamTripleFilter rDFStreamTripleFilter = new RDFStreamTripleFilter(new NodeFilterBasedTripleFilter(node -> {
                    return node.isURI() && set.contains(node.getURI());
                }, null, null), StreamRDFLib.writer(fileWriter), StreamRDFLib.writer(fileWriter2));
                ProgressMonitor create = ProgressMonitor.create(LOGGER, "Processed triples", 1000L, 10);
                ProgressStreamRDF progressStreamRDF = new ProgressStreamRDF(rDFStreamTripleFilter, create);
                LOGGER.info("Streaming data to split into two files...");
                create.start();
                progressStreamRDF.start();
                RDFDataMgr.parse(progressStreamRDF, str, Lang.NT);
                create.finish();
                progressStreamRDF.finish();
                fileWriter2.close();
                fileWriter.close();
            } finally {
            }
        } catch (Throwable th) {
            try {
                fileWriter.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
