/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.team.fulltext.common.internal.query;

import com.ibm.team.fulltext.common.internal.StandardTokenTypes;
import com.ibm.team.fulltext.common.internal.index.IIndexAccess;
import com.ibm.team.fulltext.common.internal.query.Messages;
import com.ibm.team.fulltext.common.internal.query.QueryManagerImpl;
import com.ibm.team.fulltext.common.internal.util.QueryUtils;
import com.ibm.team.repository.common.util.NLS;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef;
import org.eclipse.core.runtime.Assert;

public class FindRelatedQueryBuilder {
    public static final int DEFAULT_MIN_TERM_FREQ = 1;
    public static final int DEFAULT_MIN_WORD_LEN = 3;
    public static final int DEFAULT_MAX_QUERY_TERMS = 48;
    public static final float DEFAULT_MAX_DOC_FREQ_RATIO = 0.5f;
    public static final int DEFAULT_MIN_SHOULD_MATCH_RATIO = 8;
    private static final Set<String> DEFAULT_SKIP_TOKEN_TYPES = new HashSet<String>(Arrays.asList(StandardTokenTypes.CAMELCASE_PART.getId()));
    public static final EnumSet<FusionQueryField> DEFAULT_QUERY_FIELDS = EnumSet.of(FusionQueryField.CONTENT, FusionQueryField.NAME);
    public static final EnumSet<FusionQueryContent> DEFAULT_QUERY_CONTENT = EnumSet.of(FusionQueryContent.CONTENT, FusionQueryContent.NAME);
    public static final FusionTermScorer DEFAULT_TERM_SCORER = FusionTermScorer.BM25;
    public static final FusionWeightFunction DEFAULT_WEIGHT_FUNCTION = FusionWeightFunction.NONE;
    private static int fMinTermFreq = 1;
    private static int fMinWordLen = 3;
    private static int fMaxQueryTerms = 48;
    private static float fMaxDocFreqRatio = 0.5f;
    private static int fMinShouldMatchRatio = 8;
    private static final Set<String> fSkipTokenTypes = DEFAULT_SKIP_TOKEN_TYPES;
    private static FusionTermScorer fTermScorer = DEFAULT_TERM_SCORER;
    private static FusionWeightFunction fWeightFunction = DEFAULT_WEIGHT_FUNCTION;
    private static EnumSet<FusionQueryField> fQueryFields = DEFAULT_QUERY_FIELDS;
    private static EnumSet<FusionQueryContent> fQueryContent = DEFAULT_QUERY_CONTENT;
    private static final int MAX_DOC_FREQ_RATIO_THRESHOLD = 100;
    private final TFIDFSimilarity fSimilarity = new ClassicSimilarity();
    private int fUnknownTermCount = 0;
    private int fKnownTermCount = 0;

    public QueryUtils.Pair<BooleanQuery, BooleanQuery> buildQuery(String id, String type, String constrainType, String[] searchScope, IIndexAccess indexAccess) throws IOException {
        this.fUnknownTermCount = 0;
        this.fKnownTermCount = 0;
        Term docTerm = new Term("_id", id);
        PostingsEnum postings = indexAccess.postings(type, docTerm);
        int docId = postings == null ? Integer.MAX_VALUE : postings.nextDoc();
        Assert.isTrue((docId != Integer.MAX_VALUE ? 1 : 0) != 0, (String)NLS.bind((String)Messages.getString("FindRelatedQueryBuilder.ERROR_UNABLE_FIND_DOCUMENT"), (Object)id, (Object[])new Object[]{type}));
        BooleanQuery likeQuery = this.internalBuildQuery(docId, type, searchScope, indexAccess);
        return QueryUtils.Pair.create(likeQuery, this.buildFilterQuery(likeQuery, id, constrainType));
    }

    public QueryUtils.Pair<BooleanQuery, BooleanQuery> buildQuery(String name, String content, String constrainType, String[] searchScope, IIndexAccess indexAccess, Analyzer analyzer) throws IOException {
        return this.buildQuery(null, name, content, constrainType, searchScope, indexAccess, analyzer);
    }

    public QueryUtils.Pair<BooleanQuery, BooleanQuery> buildQuery(String excludeId, String name, String content, String constrainType, String[] searchScope, IIndexAccess indexAccess, Analyzer analyzer) throws IOException {
        this.fUnknownTermCount = 0;
        this.fKnownTermCount = 0;
        BooleanQuery likeQuery = this.internalBuildQuery(name, content, searchScope, indexAccess, analyzer);
        return QueryUtils.Pair.create(likeQuery, this.buildFilterQuery(likeQuery, excludeId, constrainType));
    }

    int getUnknownTermCount() {
        return this.fUnknownTermCount;
    }

    int getKnownTermCount() {
        return this.fKnownTermCount;
    }

    private BooleanQuery buildFilterQuery(BooleanQuery likeQuery, String excludeId, String constrainType) {
        BooleanQuery bQuery;
        if (likeQuery.clauses().isEmpty()) {
            return null;
        }
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        if (excludeId != null) {
            BoostQuery idQuery = new BoostQuery((Query)new TermQuery(new Term("_id", excludeId)), 0.0f);
            qb = QueryUtils.conditionalAdd(qb, (Query)idQuery, BooleanClause.Occur.MUST_NOT);
            BoostQuery containerIdQuery = new BoostQuery((Query)new TermQuery(new Term("_containerId", excludeId)), 0.0f);
            qb = QueryUtils.conditionalAdd(qb, (Query)containerIdQuery, BooleanClause.Occur.MUST_NOT);
        }
        if (constrainType != null) {
            BooleanQuery.Builder constrainTypeQb = new BooleanQuery.Builder();
            TermQuery artifactTypeQuery = new TermQuery(new Term("_artifactType", constrainType));
            constrainTypeQb = QueryUtils.conditionalAdd(constrainTypeQb, (Query)artifactTypeQuery, BooleanClause.Occur.SHOULD);
            TermQuery containerTypeQuery = new TermQuery(new Term("_containerType", constrainType));
            constrainTypeQb = QueryUtils.conditionalAdd(constrainTypeQb, (Query)containerTypeQuery, BooleanClause.Occur.SHOULD);
            qb = QueryUtils.conditionalAdd(qb, (Query)new BoostQuery((Query)constrainTypeQb.build(), 0.0f), BooleanClause.Occur.MUST);
        }
        if ((bQuery = qb.build()).clauses().size() == 0) {
            return null;
        }
        return bQuery;
    }

    private BooleanQuery internalBuildQuery(int docId, String type, String[] searchScope, IIndexAccess indexAccess) throws IOException {
        Collection<QueryTerm> queryTerms = this.retrieveQueryTerms(docId, type, indexAccess);
        return this.internalBuildQuery(queryTerms, true, searchScope, indexAccess);
    }

    private BooleanQuery internalBuildQuery(String name, String content, String[] searchScope, IIndexAccess indexAccess, Analyzer analyzer) throws IOException {
        Collection<QueryTerm> queryTerms = this.retrieveQueryTerms(name, content, analyzer);
        return this.internalBuildQuery(queryTerms, false, searchScope, indexAccess);
    }

    private BooleanQuery internalBuildQuery(Collection<QueryTerm> queryTerms, boolean isTermsFromIndex, String[] searchScope, IIndexAccess indexAccess) throws IOException {
        PriorityQueue<QueryTerm> rankedQueryTerms = this.rankQueryTerms(searchScope, queryTerms, isTermsFromIndex, indexAccess);
        return this.createQuery(rankedQueryTerms);
    }

    private Collection<QueryTerm> retrieveQueryTerms(int docId, String type, IIndexAccess indexAccess) throws IOException {
        ArrayList<QueryTerm> queryTerms = new ArrayList<QueryTerm>();
        for (FusionQueryContent queryContent : fQueryContent) {
            Terms termFreqVector = null;
            switch (queryContent) {
                case NAME: {
                    termFreqVector = indexAccess.getTermVector(docId, type, "_name");
                    break;
                }
                case CONTENT: {
                    termFreqVector = indexAccess.getTermVector(docId, type, "_content");
                    break;
                }
                case TAGS: {
                    termFreqVector = indexAccess.getTermVector(docId, type, "_tags");
                    break;
                }
                case META: {
                    termFreqVector = indexAccess.getTermVector(docId, type, "_meta");
                }
            }
            if (termFreqVector == null) continue;
            TermsEnum termsEnum = termFreqVector.iterator();
            BytesRef bytes = null;
            while ((bytes = termsEnum.next()) != null) {
                char c;
                String term = bytes.utf8ToString();
                int tf = (int)termsEnum.totalTermFreq();
                if (term.length() < fMinWordLen && (term.length() != 1 || !QueryUtils.isCJK(c = term.charAt(0)) && !QueryUtils.isThai(c))) continue;
                QueryTerm queryTerm = new QueryTerm(term, tf);
                queryTerm.setTermSource(queryContent);
                queryTerms.add(queryTerm);
            }
        }
        return queryTerms;
    }

    private Collection<QueryTerm> retrieveQueryTerms(String name, String content, Analyzer analyzer) throws IOException {
        HashMap<String, QueryTerm> queryTerms = new HashMap<String, QueryTerm>();
        for (FusionQueryContent queryContent : fQueryContent) {
            TokenStream tokenStream = null;
            switch (queryContent) {
                case NAME: {
                    if (name == null) break;
                    tokenStream = analyzer.tokenStream("_name", (Reader)new StringReader(name));
                    break;
                }
                case CONTENT: {
                    if (content == null) break;
                    tokenStream = analyzer.tokenStream("_content", (Reader)new StringReader(content));
                    break;
                }
            }
            if (tokenStream == null) continue;
            CharTermAttribute termAtt = (CharTermAttribute)tokenStream.getAttribute(CharTermAttribute.class);
            TypeAttribute typeAtt = (TypeAttribute)tokenStream.getAttribute(TypeAttribute.class);
            tokenStream.reset();
            try {
                while (tokenStream.incrementToken()) {
                    char c;
                    String term = QueryUtils.termText(termAtt);
                    if (term.length() < fMinWordLen && (term.length() != 1 || !QueryUtils.isCJK(c = term.charAt(0)) && !QueryUtils.isThai(c)) || fSkipTokenTypes.contains(typeAtt.type())) continue;
                    QueryTerm queryTerm = (QueryTerm)queryTerms.get(String.valueOf(term) + queryContent.ordinal());
                    if (queryTerm == null) {
                        queryTerm = new QueryTerm(term);
                        queryTerm.setTermSource(queryContent);
                        queryTerms.put(String.valueOf(term) + queryContent.ordinal(), queryTerm);
                    }
                    queryTerm.incrementTermFrequency();
                }
                tokenStream.end();
            }
            finally {
                tokenStream.close();
            }
        }
        return queryTerms.values();
    }

    private PriorityQueue<QueryTerm> rankQueryTerms(String[] searchScope, Collection<QueryTerm> queryTerms, boolean isTermsFromIndex, IIndexAccess indexAccess) throws IOException {
        PriorityQueue<QueryTerm> queue = new PriorityQueue<QueryTerm>();
        int numDocs = indexAccess.numDocs(searchScope);
        int maxTf = 0;
        if (fWeightFunction == FusionWeightFunction.MAX_TF_NORMALIZATION) {
            for (QueryTerm queryTerm : queue) {
                maxTf = Math.max(queryTerm.getTermFrequency(), maxTf);
            }
        }
        for (QueryTerm queryTerm : queryTerms) {
            int tf = queryTerm.getTermFrequency();
            if (tf < fMinTermFreq) continue;
            int docFreq = 0;
            FusionQueryField topQueryField = FusionQueryField.CONTENT;
            for (FusionQueryField queryField : fQueryFields) {
                int currentDocFreq = indexAccess.docFreq(searchScope, new Term(queryField.getId(), queryTerm.getValue()));
                if (currentDocFreq <= docFreq) continue;
                docFreq = currentDocFreq;
                topQueryField = queryField;
            }
            queryTerm.setTopField(topQueryField);
            if (numDocs > 100 && (float)docFreq > (float)numDocs * fMaxDocFreqRatio) continue;
            Double score = 0.0;
            if (docFreq == 0 && !isTermsFromIndex || docFreq == 1 && isTermsFromIndex) {
                ++this.fUnknownTermCount;
                score = 0.0;
            } else {
                ++this.fKnownTermCount;
                switch (fTermScorer) {
                    case TF_IDF: {
                        score = this.scoreTFIDF(numDocs, docFreq, this.getTermFrequency(tf, maxTf));
                        break;
                    }
                    case BM25: {
                        score = this.scoreBM25(numDocs, docFreq, this.getTermFrequency(tf, maxTf));
                    }
                }
            }
            queryTerm.setScore(score);
            queue.add(queryTerm);
        }
        return queue;
    }

    private double getTermFrequency(int tf, int maxTf) {
        switch (fWeightFunction) {
            case NONE: {
                return tf;
            }
            case MAX_TF_NORMALIZATION: {
                float smoothing = 0.5f;
                return maxTf != 0 ? smoothing + (1.0f - smoothing) * (float)tf / (float)maxTf : 0.0f;
            }
            case SUBLINEAR_TF_SCALING: {
                return tf != 0 ? 1.0 + Math.log(tf) : 0.0;
            }
        }
        return tf;
    }

    private double scoreTFIDF(int numDocs, int docFreq, double tf) {
        float idf = this.fSimilarity.idf((long)docFreq, (long)numDocs);
        double score = tf * (double)idf;
        return score;
    }

    private double scoreBM25(int numDoc, int docFreq, double tf) {
        double b = 0.0;
        double k1 = 1.2;
        double idf = Math.log(numDoc) - Math.log(docFreq);
        double ndl = 0.0;
        double score = idf * (tf * (k1 + 1.0) / (tf + k1 * (1.0 - b + b * ndl)));
        return score;
    }

    private BooleanQuery createQuery(PriorityQueue<QueryTerm> rankedQueryTerms) {
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        int contentClauseCount = 0;
        int nameClauseCount = 0;
        int queryClauseCount = 0;
        for (QueryTerm queryTerm : rankedQueryTerms) {
            boolean isNameSource;
            boolean bl = isNameSource = queryTerm.getTermSource() == FusionQueryContent.NAME;
            if (contentClauseCount > fMaxQueryTerms && !isNameSource) continue;
            if (isNameSource && queryTerm.fTopField != FusionQueryField.NAME) {
                BooleanQuery.Builder nameContentQb = new BooleanQuery.Builder();
                TermQuery nameQuery = new TermQuery(queryTerm.toLuceneTerm(FusionQueryField.NAME.getId()));
                nameContentQb = nameContentQb.add((Query)nameQuery, BooleanClause.Occur.SHOULD);
                TermQuery contentQuery = new TermQuery(queryTerm.toLuceneTerm(FusionQueryField.CONTENT.getId()));
                nameContentQb = nameContentQb.add((Query)contentQuery, BooleanClause.Occur.SHOULD);
                qb = qb.add((Query)nameContentQb.build(), BooleanClause.Occur.SHOULD);
                ++queryClauseCount;
            } else {
                TermQuery termQuery = new TermQuery(queryTerm.toLuceneTerm());
                qb = qb.add((Query)termQuery, BooleanClause.Occur.SHOULD);
                ++queryClauseCount;
            }
            if (!isNameSource) {
                ++contentClauseCount;
                continue;
            }
            ++nameClauseCount;
        }
        this.setMinClausesCount(qb, queryClauseCount, nameClauseCount, contentClauseCount);
        return qb.build();
    }

    private void setMinClausesCount(BooleanQuery.Builder qb, int clausesCount, int nameClauseCount, int contentClauseCount) {
        int minNumberMustMatch = 0;
        minNumberMustMatch = clausesCount >= fMaxQueryTerms / 2 ? clausesCount / fMinShouldMatchRatio : (clausesCount >= 10 ? (nameClauseCount * 2 >= contentClauseCount ? clausesCount / 3 : clausesCount / 2) : (clausesCount >= 5 ? clausesCount * 3 / 4 : (clausesCount > 2 ? clausesCount - 1 : clausesCount)));
        qb.setMinimumNumberShouldMatch(minNumberMustMatch);
    }

    public static void updateConfig() {
        fMinTermFreq = 1;
        fMinWordLen = 3;
        fMaxQueryTerms = 48;
        fMaxDocFreqRatio = 0.5f;
        fMinShouldMatchRatio = 8;
        fQueryFields = EnumSet.of(FusionQueryField.CONTENT, FusionQueryField.NAME);
        fQueryContent = EnumSet.of(FusionQueryContent.CONTENT, FusionQueryContent.NAME);
        fTermScorer = FusionTermScorer.BM25;
        fWeightFunction = FusionWeightFunction.NONE;
        QueryManagerImpl.MIN_SCORE_FACTOR_DUPLICATE = 0.3f;
    }

    public static enum FusionQueryContent {
        NAME,
        CONTENT,
        TAGS,
        META;

    }

    public static enum FusionQueryField {
        NAME("_name"),
        CONTENT("_content"),
        TAGS("_tags"),
        META("_meta");

        private final String fFieldId;

        private FusionQueryField(String fieldId) {
            this.fFieldId = fieldId;
        }

        String getId() {
            return this.fFieldId;
        }
    }

    public static enum FusionTermScorer {
        TF_IDF,
        BM25;

    }

    public static enum FusionWeightFunction {
        NONE,
        SUBLINEAR_TF_SCALING,
        MAX_TF_NORMALIZATION;

    }

    private static class QueryTerm
    implements Comparable<QueryTerm> {
        private int fTermFrequency;
        private final String fValue;
        private Double fScore = 0.0;
        private FusionQueryField fTopField;
        public FusionQueryContent fTermSource;

        QueryTerm(String term) {
            this(term, 0);
        }

        QueryTerm(String term, int termFrequency) {
            this.fValue = term;
            this.fTermFrequency = termFrequency;
        }

        void incrementTermFrequency() {
            ++this.fTermFrequency;
        }

        int getTermFrequency() {
            return this.fTermFrequency;
        }

        String getValue() {
            return this.fValue;
        }

        void setScore(double score) {
            this.fScore = score;
        }

        void setTopField(FusionQueryField topField) {
            this.fTopField = topField;
        }

        Term toLuceneTerm() {
            return this.toLuceneTerm(this.fTopField.getId());
        }

        Term toLuceneTerm(String field) {
            return new Term(field, this.fValue);
        }

        @Override
        public int compareTo(QueryTerm other) {
            return other.fScore.compareTo(this.fScore);
        }

        FusionQueryContent getTermSource() {
            return this.fTermSource;
        }

        void setTermSource(FusionQueryContent termSource) {
            this.fTermSource = termSource;
        }
    }
}

