package de.informaticup2012.geocrosswords.crossword;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes.dex */
public class WiktionaryCrawler {
    String basePath;
    String lang;
    int numCrawled = 0;
    LinkedList<String> queue = new LinkedList<>();
    LinkedList<String> crawled = new LinkedList<>();
    private Vector<ICrawlerListener> mListeners = new Vector<>();

    /* loaded from: classes.dex */
    class Pair {
        public String hint;
        public String word;

        public Pair(String str, String str2) {
            this.word = str;
            this.hint = str2;
        }
    }

    public WiktionaryCrawler(String str) {
        this.lang = str;
        this.basePath = "http://" + str + ".wiktionary.org/wiki/";
    }

    public void addListener(ICrawlerListener iCrawlerListener) {
        this.mListeners.add(iCrawlerListener);
    }

    public void clearListeners() {
        this.mListeners.clear();
    }

    public void crawl(String str, int i, String str2) {
        Pair pair;
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(str2));
            bufferedWriter.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n<root>\n");
            this.queue.add(str);
            while (this.numCrawled < i && this.queue.size() != 0) {
                String first = this.queue.getFirst();
                boolean z = false;
                Iterator<String> it = this.crawled.iterator();
                while (it.hasNext()) {
                    if (it.next().equals(first)) {
                        z = true;
                    }
                }
                System.out.println("Looking at: " + first + ". " + this.numCrawled + "/" + i);
                if (z || first.matches(".*[_/\\-,; ()].*")) {
                    System.out.println("igitt, raus damit");
                    this.queue.remove(0);
                } else {
                    boolean z2 = false;
                    boolean z3 = false;
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new URL(this.basePath + first).openStream()));
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        if (readLine.matches(".*<div id=\"bodyContent\">.*")) {
                            z2 = true;
                        }
                        if (readLine.matches(".*<h4>.*Übersetzungen.*")) {
                            z2 = false;
                        }
                        if (readLine.matches(".*Semantik.*Bedeutunge?n?.*")) {
                            z3 = true;
                        }
                        if (z2) {
                            if (z3 && readLine.matches(".*<dd>\\[1\\].*")) {
                                Matcher matcher = Pattern.compile("<dd>\\[1\\] (.*)</dd>").matcher(readLine);
                                while (matcher.find()) {
                                    String replaceAll = matcher.group(1).replaceAll("<.[^>]*>", "");
                                    if (first.matches(".*\\.")) {
                                        pair = new Pair(first.replaceAll("\\.$", ""), "Abk.: " + replaceAll);
                                        System.out.println("Abkürzung!");
                                    } else {
                                        pair = new Pair(first, replaceAll);
                                    }
                                    this.numCrawled++;
                                    bufferedWriter.write("<word lang=\"" + this.lang + "\" value=\"" + pair.word.replaceAll("\"", "&quot;") + "\" hint=\"" + pair.hint.replaceAll("\"", "&quot;") + "\" />\n");
                                    System.out.println("Hinzugefügt.");
                                    for (int i2 = 0; i2 < this.mListeners.size(); i2++) {
                                        this.mListeners.get(i2).wordCrawled(this.numCrawled, i);
                                    }
                                    if (this.numCrawled >= i) {
                                        break;
                                    }
                                }
                                z3 = false;
                            }
                            Matcher matcher2 = Pattern.compile("<a href=\"/wiki/([^\"]*)\"").matcher(readLine);
                            while (matcher2.find()) {
                                String group = matcher2.group(1);
                                if (group.indexOf(":") <= 0) {
                                    this.queue.add(group);
                                }
                            }
                        }
                    }
                    bufferedReader.close();
                    this.crawled.add(first);
                    this.queue.remove(0);
                }
            }
            bufferedWriter.write("</root>\n");
            bufferedWriter.close();
        } catch (Exception e) {
        }
    }
}
