Skip to content
Snippets Groups Projects

[feature] document frequency count functions and broadcasting

Merged Tinsaye Abye requested to merge 2-document-frequency-pre-calculation into develop
9 files
+ 445
49
Compare changes
  • Side-by-side
  • Inline
Files
9
@@ -3,18 +3,31 @@ package org.gradoop.famer.linking.similarityMeasuring.dataStructures;
import org.gradoop.famer.linking.similarityMeasuring.methods.SimilarityComputation;
import org.gradoop.famer.linking.similarityMeasuring.methods.TFIDF;
import java.util.Map;
public class TFIDFSimilarityComponent extends SimilarityComponent {
public static final String DOCUMENT_FREQUENCY_BROADCAST = "documentFrequency";
private final String tokenizer;
private Map<String, Integer> documentFrequency;
/**
* Creates an instance of TFIDFSimilarityComponent
*
* @param baseConfig The base configuration for the similarity component
* @param tokenizer Word tokenizer
*/
public TFIDFSimilarityComponent(SimilarityComponentBaseConfig baseConfig) {
public TFIDFSimilarityComponent(SimilarityComponentBaseConfig baseConfig, String tokenizer) {
super(baseConfig);
this.tokenizer = tokenizer;
}
@Override
public SimilarityComputation<String> buildSimilarityComputation() throws Exception {
return new TFIDF();
return new TFIDF(this.documentFrequency, tokenizer);
}
public void setDocumentFrequency(Map<String, Integer> documentFrequency) {
this.documentFrequency = documentFrequency;
}
}
Loading