/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.preprocessing;

import java.util.stream.Stream;
import org.carrot2.attrs.AttrComposite;
import org.carrot2.attrs.AttrInteger;
import org.carrot2.clustering.Document;
import org.carrot2.language.LanguageComponents;
import org.carrot2.text.preprocessing.CaseNormalizer;
import org.carrot2.text.preprocessing.ContextPreprocessor;
import org.carrot2.text.preprocessing.InputTokenizer;
import org.carrot2.text.preprocessing.LanguageModelStemmer;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.StopListMarker;

public class BasicPreprocessingPipeline
extends AttrComposite
implements ContextPreprocessor {
    public final AttrInteger wordDfThreshold;
    protected final CaseNormalizer caseNormalizer;
    protected final LanguageModelStemmer stemming;
    protected final StopListMarker stopListMarker;
    protected final InputTokenizer tokenizer;

    public BasicPreprocessingPipeline() {
        this.wordDfThreshold = this.attributes.register("wordDfThreshold", AttrInteger.builder().min(1).max(100).label("Word document frequency threshold").defaultValue(1));
        this.caseNormalizer = new CaseNormalizer();
        this.stemming = new LanguageModelStemmer();
        this.stopListMarker = new StopListMarker();
        this.tokenizer = new InputTokenizer();
    }

    @Override
    public PreprocessingContext preprocess(Stream<? extends Document> documents, String query, LanguageComponents langModel) {
        try (PreprocessingContext context = new PreprocessingContext(langModel);){
            this.tokenizer.tokenize(context, documents);
            this.caseNormalizer.normalize(context, (Integer)this.wordDfThreshold.get());
            this.stemming.stem(context, query);
            this.stopListMarker.mark(context);
            PreprocessingContext preprocessingContext = context;
            return preprocessingContext;
        }
    }
}

