/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.language.identifier;

import com.optimaize.langdetect.text.TextFilter;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import lombok.Generated;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.DetectedLanguage;
import org.languagetool.Language;
import org.languagetool.language.identifier.detector.CommonWordsDetector;
import org.languagetool.language.identifier.detector.UnicodeBasedDetector;

public abstract class LanguageIdentifier {
    private static final Pattern URL_REGEX = Pattern.compile("https?://[-_.?&~;+=/#%0-9A-Za-z]+");
    private static final Pattern MAIL_REGEX = Pattern.compile("[-_.0-9A-Za-z]+@[-_0-9A-Za-z]+[-_.0-9A-Za-z]+");
    private static final Pattern SIGNATURE = Pattern.compile("\n--[ \u00a0]\n.*", 32);
    private static final Pattern MENTION = Pattern.compile("@[A-Za-z0-9_]+");
    private static final Pattern NBSP_INVIS_SEPARATOR = Pattern.compile("[\ufeff\u2063]+");
    protected static final float SCORE_THRESHOLD = 0.85f;
    protected static final int CONSIDER_ONLY_PREFERRED_THRESHOLD = 50;
    protected static final List<String> NON_LATIN_CHARS_LANGUAGES = Arrays.asList("ar", "fa", "ru", "uk", "be", "zh", "ja", "km", "ta", "el", "hi", "mr", "th", "he", "ko");
    protected static final TextFilter REMOVE_EMAIL_SIGNATURE_FILTER = text -> SIGNATURE.matcher(text).replaceFirst("");
    protected static final TextFilter REMOVE_MENTION_FILTER = text -> MENTION.matcher(text).replaceFirst("");
    protected static final TextFilter REMOVE_NON_BREAKING_SPACES_FILTER = text -> text.toString().replace('\u00a0', ' ');
    protected static final TextFilter REMOVE_URL_FILTER = text -> MAIL_REGEX.matcher(URL_REGEX.matcher(text).replaceAll(" ")).replaceAll(" ");
    protected static final UnicodeBasedDetector UNICODE_BASED_LANG_IDENTIFIER = new UnicodeBasedDetector();
    protected static final CommonWordsDetector COMMON_WORDS_LANG_IDENTIFIER;
    protected int maxLength;

    public LanguageIdentifier(int maxLength) {
        if (maxLength < 10) {
            throw new IllegalArgumentException("maxLength must be >= 10 (but values > 100 are recommended): " + maxLength);
        }
        this.maxLength = maxLength;
    }

    @Nullable
    public abstract DetectedLanguage detectLanguage(String var1, List<String> var2, List<String> var3);

    @Nullable
    public abstract DetectedLanguage detectLanguage(String var1, List<String> var2, List<String> var3, boolean var4);

    @NotNull
    public abstract List<DetectedLanguage> getDetectedLanguageScores(String var1, List<String> var2, List<String> var3, boolean var4, int var5);

    @Nullable
    public abstract Language detectLanguage(String var1);

    public String cleanAndShortenText(String text) {
        String shortText = text.length() > this.maxLength ? text.substring(0, this.maxLength) : text;
        shortText = NBSP_INVIS_SEPARATOR.matcher(shortText).replaceAll(" ");
        shortText = REMOVE_URL_FILTER.filter((CharSequence)shortText);
        shortText = REMOVE_EMAIL_SIGNATURE_FILTER.filter((CharSequence)shortText);
        shortText = REMOVE_MENTION_FILTER.filter((CharSequence)shortText);
        shortText = REMOVE_NON_BREAKING_SPACES_FILTER.filter((CharSequence)shortText);
        return shortText;
    }

    @Nullable
    protected ParsedLanguageLists prepareDetectLanguage(String text, List<String> noopLangsTmp, List<String> preferredLangsTmp) {
        Objects.requireNonNull(noopLangsTmp);
        Objects.requireNonNull(preferredLangsTmp);
        List<String> additionalLangs = noopLangsTmp.stream().map(k -> k.equals("nb") ? "no" : k).collect(Collectors.toList());
        List preferredLangs = preferredLangsTmp.stream().map(k -> k.equals("nb") ? "no" : k).collect(Collectors.toCollection(ArrayList::new));
        if (preferredLangs.stream().anyMatch(k -> k.contains("-"))) {
            throw new IllegalArgumentException("preferredLanguages may only contain language codes without variants (e.g. 'en', but not 'en-US'): " + String.valueOf(preferredLangs) + ". Use 'preferredVariants' to specify variants.");
        }
        List<String> domLangCodes = UNICODE_BASED_LANG_IDENTIFIER.getDominantLangCodes(text);
        String domLangStr = String.join((CharSequence)",", domLangCodes);
        if (domLangStr.equals("th") || domLangStr.equals("he") || domLangStr.equals("ko") || domLangStr.equals("hi,mr")) {
            return null;
        }
        if (!(preferredLangs.contains("ru") || preferredLangs.contains("uk") || preferredLangs.contains("be") || preferredLangs.contains("zh") || preferredLangs.contains("hi") || preferredLangs.contains("mr"))) {
            preferredLangs.addAll(domLangCodes);
            additionalLangs.addAll(domLangCodes);
        }
        return new ParsedLanguageLists(additionalLangs, preferredLangs);
    }

    protected Map.Entry<String, Double> getHighestScoringResult(Map<String, Double> probs) {
        String result = null;
        double max = -1.0;
        for (Map.Entry<String, Double> entry : probs.entrySet()) {
            if (!(entry.getValue() > max)) continue;
            max = entry.getValue();
            result = entry.getKey();
        }
        return new AbstractMap.SimpleImmutableEntry<Object, Double>(result, max);
    }

    protected Map<String, Double> getOrderedScores(Map<String, Double> scores, int count) {
        ArrayList<Map.Entry<String, Double>> entries = new ArrayList<Map.Entry<String, Double>>(scores.entrySet());
        entries.sort(Map.Entry.comparingByValue(Collections.reverseOrder()));
        LinkedHashMap<String, Double> sortedScores = new LinkedHashMap<String, Double>();
        for (int i = 0; i < entries.size() && i < count; ++i) {
            sortedScores.put(entries.get(i).getKey(), entries.get(i).getValue());
        }
        return sortedScores;
    }

    static {
        try {
            COMMON_WORDS_LANG_IDENTIFIER = new CommonWordsDetector();
        }
        catch (IOException ex) {
            throw new RuntimeException(ex);
        }
    }

    protected static class ParsedLanguageLists {
        private final List<String> additionalLangs = new ArrayList<String>();
        private final List<String> preferredLangs = new ArrayList<String>();

        public ParsedLanguageLists(List<String> additionalLangs, List<String> preferredLangs) {
            this.additionalLangs.addAll(additionalLangs);
            this.preferredLangs.addAll(preferredLangs);
        }

        @Generated
        public List<String> getAdditionalLangs() {
            return this.additionalLangs;
        }

        @Generated
        public List<String> getPreferredLangs() {
            return this.preferredLangs;
        }
    }
}

