/*
 * Decompiled with CFR 0.152.
 */
package org.nuxeo.ecm.platform.categorization.categorizer.tfidf;

import java.io.Serializable;
import java.util.List;

public class HashingVectorizer
implements Serializable {
    private static final long serialVersionUID = 1L;
    protected int dim = 524288;
    protected int probes = 2;
    protected int window = 0;

    public HashingVectorizer dimension(int dim) {
        this.dim = dim;
        return this;
    }

    public HashingVectorizer window(int window) {
        this.window = window;
        return this;
    }

    public HashingVectorizer probes(int probes) {
        this.probes = probes;
        return this;
    }

    public long[] count(List<String> tokens) {
        long[] counts = new long[this.dim];
        this.addCounts(tokens, counts);
        return counts;
    }

    public void addCounts(List<String> tokens, long[] counts) {
        int n = 0;
        for (String token : tokens) {
            for (int probe = 0; probe < this.probes; ++probe) {
                int n2 = this.hash(token, probe);
                counts[n2] = counts[n2] + 1L;
            }
            if (this.window > 0) {
                for (int j = Math.max(0, n - this.window); j < n; ++j) {
                    for (int probe = 0; probe < this.probes; ++probe) {
                        int n3 = this.hash(token, tokens.get(j), probe);
                        counts[n3] = counts[n3] + 1L;
                    }
                }
            }
            ++n;
        }
    }

    protected int hash(String token, int probe) {
        return this.hash(token, null, probe);
    }

    protected int hash(String token, String prevToken, int probe) {
        int h = (token + " " + prevToken + " " + probe).hashCode() % this.dim;
        if (h < 0) {
            h += this.dim;
        }
        return h;
    }
}

