/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.temporal.ae.feature;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.cleartk.ml.Feature;

public class OverlappedHeadFeaturesExtractor
extends TokenFeaturesExtractor {
    private static final String FEATURE_NAME_ROOT = "overlappingHeadTerms";
    private static final String NOT_NN_VB_POS = "NOT_NN_VB_POS";

    public List<Feature> extract(JCas jCas, IdentifiedAnnotation mention1, IdentifiedAnnotation mention2) throws AnalysisEngineProcessException {
        List currentTokens = JCasUtil.selectCovered((JCas)jCas, WordToken.class, (AnnotationFS)mention1);
        List nextTokens = JCasUtil.selectCovered((JCas)jCas, WordToken.class, (AnnotationFS)mention2);
        if (currentTokens == null || currentTokens.isEmpty() || nextTokens == null || nextTokens.isEmpty()) {
            return Collections.emptyList();
        }
        ArrayList<Feature> features = new ArrayList<Feature>();
        int headSize1 = 0;
        int headSize2 = 0;
        int matches = 0;
        for (WordToken t1 : currentTokens) {
            String t1_pos = OverlappedHeadFeaturesExtractor.getNnVbPos((BaseToken)t1);
            if (t1_pos.equals(NOT_NN_VB_POS)) continue;
            ++headSize1;
            for (WordToken t2 : nextTokens) {
                if (OverlappedHeadFeaturesExtractor.getNnVbPos((BaseToken)t2).equals(NOT_NN_VB_POS)) continue;
                ++headSize2;
                String t1str = t1.getCanonicalForm();
                if (t1str == null || !t1str.equals(t2.getCanonicalForm())) continue;
                features.add(OverlappedHeadFeaturesExtractor.createFeature("CanonicalForm", t1str));
                features.add(OverlappedHeadFeaturesExtractor.createFeature("length", t1str.length()));
                features.add(OverlappedHeadFeaturesExtractor.createFeature("POS", t1_pos));
                ++matches;
            }
        }
        if (matches > 0) {
            features.add(OverlappedHeadFeaturesExtractor.createFeature("count", matches));
            float matchShortRatio = (float)matches / (float)Math.min(headSize1, headSize2);
            features.add(OverlappedHeadFeaturesExtractor.createFeature("shortRatio", Float.valueOf(matchShortRatio)));
            float matchLongRatio = (float)matches / (float)Math.max(headSize1, headSize2);
            features.add(OverlappedHeadFeaturesExtractor.createFeature("longRatio", Float.valueOf(matchLongRatio)));
        }
        return features;
    }

    private static String getNnVbPos(BaseToken baseToken) {
        String pos = baseToken.getPartOfSpeech();
        if (pos.startsWith("NN") || pos.startsWith("VB")) {
            return pos;
        }
        return NOT_NN_VB_POS;
    }

    private static Feature createFeature(String suffix, Object value) {
        return new Feature("overlappingHeadTerms_" + suffix, value);
    }
}

