package edu.cmu.parex;

import edu.cmu.parex.PhraseTable;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/* loaded from: input_file:edu/cmu/parex/ParaphraseExtractor.class */
public class ParaphraseExtractor {
    public static final double MIN_TRANS_PROB = 0.001d;
    public static final double MIN_REL_FREQ = 0.001d;
    public static final double MIN_FINAL_PROB = 0.01d;
    public static final String SYMBOLS = "~`!@#$%^&*()-_=+[{]}\\|;:'\",<.>/?";

    private static boolean isClean(String str, HashSet<Character> hashSet) {
        for (int i = 0; i < str.length(); i++) {
            if (hashSet.contains(Character.valueOf(str.charAt(i)))) {
                return false;
            }
        }
        return true;
    }

    private static boolean isUsable(int[] iArr, HashSet<Integer> hashSet) {
        for (int i : iArr) {
            if (!hashSet.contains(Integer.valueOf(i))) {
                return true;
            }
        }
        return false;
    }

    private static boolean eqWords(int[] iArr, int[] iArr2) {
        if (iArr.length != iArr2.length) {
            return false;
        }
        for (int i = 0; i < iArr.length; i++) {
            if (iArr[i] != iArr2[i]) {
                return false;
            }
        }
        return true;
    }

    public static void extractParaphrases(String str, String str2, String str3, String str4, String str5, double d, String str6) throws IOException {
        PhraseTable phraseTable = new PhraseTable();
        Hashtable hashtable = new Hashtable();
        HashSet hashSet = new HashSet();
        for (int i = 0; i < str6.length(); i++) {
            hashSet.add(Character.valueOf(str6.charAt(i)));
        }
        System.err.println("Loading corpus");
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            int[] mapPhrase = phraseTable.mapPhrase(readLine);
            for (int i2 = 0; i2 < mapPhrase.length; i2++) {
                Hashtable hashtable2 = hashtable;
                for (int i3 = i2; i3 < mapPhrase.length; i3++) {
                    if (!hashtable2.containsKey(Integer.valueOf(mapPhrase[i3]))) {
                        hashtable2.put(Integer.valueOf(mapPhrase[i3]), new Hashtable());
                    }
                    hashtable2 = (Hashtable) hashtable2.get(Integer.valueOf(mapPhrase[i3]));
                }
            }
        }
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        System.err.println("Loading common words (foreign)");
        BufferedReader bufferedReader2 = new BufferedReader(new FileReader(str3));
        while (true) {
            String readLine2 = bufferedReader2.readLine();
            if (readLine2 == null) {
                break;
            } else {
                hashSet2.add(Integer.valueOf(phraseTable.mapWord(readLine2)));
            }
        }
        System.err.println("Loading common words (english)");
        BufferedReader bufferedReader3 = new BufferedReader(new FileReader(str4));
        while (true) {
            String readLine3 = bufferedReader3.readLine();
            if (readLine3 == null) {
                break;
            } else {
                hashSet3.add(Integer.valueOf(phraseTable.mapWord(readLine3)));
            }
        }
        URL url = new File(str2).toURI().toURL();
        BufferedReader bufferedReader4 = new BufferedReader(new InputStreamReader(new GZIPInputStream(url.openStream())));
        int i4 = 0;
        int i5 = 0;
        System.err.println("Loading phrases");
        while (true) {
            String readLine4 = bufferedReader4.readLine();
            if (readLine4 == null) {
                break;
            }
            try {
                i4++;
                if (i4 % 10000000 == 0) {
                    System.err.println(i4 + " (" + i5 + ")");
                }
                String[] split = readLine4.split("\\|\\|\\|");
                String trim = split[0].trim();
                String trim2 = split[1].trim();
                StringTokenizer stringTokenizer = new StringTokenizer(split[2]);
                double parseDouble = Double.parseDouble(stringTokenizer.nextToken());
                stringTokenizer.nextToken();
                Double.parseDouble(stringTokenizer.nextToken());
                if (parseDouble >= d) {
                    if (isClean(trim2, hashSet) && isClean(trim, hashSet)) {
                        int[] mapPhrase2 = phraseTable.mapPhrase(trim2);
                        int[] mapPhrase3 = phraseTable.mapPhrase(trim);
                        if (isUsable(mapPhrase2, hashSet3) && isUsable(mapPhrase3, hashSet2)) {
                            boolean z = true;
                            Hashtable hashtable3 = hashtable;
                            int length = mapPhrase2.length;
                            int i6 = 0;
                            while (true) {
                                if (i6 >= length) {
                                    break;
                                }
                                int i7 = mapPhrase2[i6];
                                if (!hashtable3.containsKey(Integer.valueOf(i7))) {
                                    z = false;
                                    break;
                                } else {
                                    hashtable3 = (Hashtable) hashtable3.get(Integer.valueOf(i7));
                                    i6++;
                                }
                            }
                            if (z) {
                                phraseTable.addPhrasePair(mapPhrase3, mapPhrase2, parseDouble);
                                i5++;
                            }
                        }
                    }
                }
            } catch (Exception e) {
                System.err.println("Skipping problematic line: " + readLine4);
            }
        }
        bufferedReader4.close();
        PrintWriter printWriter = new PrintWriter(new GZIPOutputStream(new FileOutputStream(new File(str5))));
        BufferedReader bufferedReader5 = new BufferedReader(new InputStreamReader(new GZIPInputStream(url.openStream())));
        int i8 = 0;
        int i9 = 0;
        System.err.println("Finding paraphrases");
        while (true) {
            String readLine5 = bufferedReader5.readLine();
            if (readLine5 == null) {
                bufferedReader5.close();
                printWriter.close();
                return;
            }
            try {
                i8++;
                if (i8 % 10000000 == 0) {
                    System.err.println(i8 + " (" + i9 + ")");
                }
                String[] split2 = readLine5.split("\\|\\|\\|");
                String trim3 = split2[0].trim();
                String trim4 = split2[1].trim();
                StringTokenizer stringTokenizer2 = new StringTokenizer(split2[2]);
                Double.parseDouble(stringTokenizer2.nextToken());
                stringTokenizer2.nextToken();
                double parseDouble2 = Double.parseDouble(stringTokenizer2.nextToken());
                if (parseDouble2 >= d) {
                    if (isClean(trim4, hashSet) && isClean(trim3, hashSet)) {
                        int[] mapPhrase4 = phraseTable.mapPhrase(trim4);
                        int[] mapPhrase5 = phraseTable.mapPhrase(trim3);
                        if (isUsable(mapPhrase4, hashSet3) && isUsable(mapPhrase5, hashSet2)) {
                            Iterator<PhraseTable.Phrase> it = phraseTable.getPhrases(mapPhrase5).iterator();
                            while (it.hasNext()) {
                                PhraseTable.Phrase next = it.next();
                                if (!eqWords(next.words, mapPhrase4)) {
                                    double d2 = next.prob * parseDouble2;
                                    if (d2 >= d) {
                                        printWriter.println(next.phrase + " ||| " + trim4 + " ||| " + trim3 + " ||| " + d2);
                                        i9++;
                                    }
                                }
                            }
                        }
                    }
                }
            } catch (Exception e2) {
                System.err.println("Skipping problematic line: " + readLine5);
            }
        }
    }

    public static void findCommonWords(String str, String str2, double d) throws IOException {
        Hashtable hashtable = new Hashtable();
        int i = 0;
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            StringTokenizer stringTokenizer = new StringTokenizer(readLine);
            while (stringTokenizer.hasMoreTokens()) {
                String nextToken = stringTokenizer.nextToken();
                Integer num = (Integer) hashtable.get(nextToken);
                if (num == null) {
                    num = 0;
                }
                hashtable.put(nextToken, Integer.valueOf(num.intValue() + 1));
                i++;
            }
        }
        bufferedReader.close();
        PrintWriter printWriter = new PrintWriter(str2);
        Enumeration keys = hashtable.keys();
        while (keys.hasMoreElements()) {
            String str3 = (String) keys.nextElement();
            if (((Integer) hashtable.get(str3)).intValue() / i > d) {
                printWriter.println(str3);
            }
        }
        printWriter.close();
    }

    public static void groupParaphrases(String str, String str2) throws IOException {
        URL url = new File(str).toURI().toURL();
        File file = new File(str2);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new GZIPInputStream(url.openStream())));
        final PhraseTable phraseTable = new PhraseTable();
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            i++;
            if (i % 1000000 == 0) {
                System.err.println(i);
            }
            String[] split = readLine.split("\\|\\|\\|");
            arrayList.add(new Paraphrase(phraseTable.mapPhrase(split[0].trim()), phraseTable.mapPhrase(split[1].trim()), Double.parseDouble(split[3].trim())));
        }
        bufferedReader.close();
        Collections.sort(arrayList, new Comparator<Paraphrase>() { // from class: edu.cmu.parex.ParaphraseExtractor.1
            @Override // java.util.Comparator
            public int compare(Paraphrase paraphrase, Paraphrase paraphrase2) {
                int compareTo = PhraseTable.this.unmapPhrase(paraphrase.ref).compareTo(PhraseTable.this.unmapPhrase(paraphrase2.ref));
                return compareTo != 0 ? compareTo : PhraseTable.this.unmapPhrase(paraphrase.par).compareTo(PhraseTable.this.unmapPhrase(paraphrase2.par));
            }
        });
        PrintWriter printWriter = new PrintWriter(new GZIPOutputStream(new FileOutputStream(file)));
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            Paraphrase paraphrase = (Paraphrase) it.next();
            printWriter.println(phraseTable.unmapPhrase(paraphrase.ref) + " ||| " + phraseTable.unmapPhrase(paraphrase.par) + " ||| " + paraphrase.prob);
        }
        printWriter.close();
    }

    public static void combineParaphrases(String str, String str2, double d) throws IOException {
        double d2;
        URL url = new File(str).toURI().toURL();
        File file = new File(str2);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new GZIPInputStream(url.openStream())));
        PrintWriter printWriter = new PrintWriter(new GZIPOutputStream(new FileOutputStream(file)));
        String str3 = "";
        String str4 = "";
        double d3 = 0.0d;
        while (true) {
            d2 = d3;
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            String[] split = readLine.split("\\|\\|\\|");
            String trim = split[0].trim();
            String trim2 = split[1].trim();
            double parseDouble = Double.parseDouble(split[2].trim());
            if (!trim.equals(str3) || !trim2.equals(str4)) {
                if (!str3.equals("") && d2 >= d && !subphrase(str3, str4)) {
                    printWriter.println(str3 + " ||| " + str4 + " ||| " + d2);
                }
                str3 = trim;
                str4 = trim2;
                d2 = 0.0d;
            }
            d3 = d2 + parseDouble;
        }
        if (!str3.equals("") && d2 >= d && !subphrase(str3, str4)) {
            printWriter.println(str3 + " ||| " + str4 + " ||| " + d2);
        }
        bufferedReader.close();
        printWriter.close();
    }

    public static boolean subphrase(String str, String str2) {
        String str3 = " " + str + " ";
        String str4 = " " + str2 + " ";
        return str3.contains(str4) || str4.contains(str3);
    }
}
