Filter example: add metadata to any document
Description
Filters can be used to add metadata to document regardless of document type.
In this example a general filter (Filter) is implemented because access to the document content is not required for this example filter. If the metadata is being extracted from the content the either a StringDocumentFilter or ByteDocumentFilter will need to be implemented. Using a general filter avoids unnecessary conversion of the document to String or bytes respectively resulting in a more efficient filter.
Example
In this example the filter-time-stamp
is set to a human readable date for all documents. This implements the Filter interface which only requires implementation of the filter() method. The filter can still be skipped by returning FilterResult.skipped().
This example also has a simple test method which can be executed by running the main method, see Groovy filters for details.
The above example adds metadata with the name 'filter-time-stamp' to the document. For the metadata to be available in the search index, it needs to be added to the metadata mappings.
package com.myfilters;
import java.util.*;
import org.junit.*;
import org.junit.Test;
import com.funnelback.filter.api.*;
import com.funnelback.filter.api.documents.*;
import com.funnelback.filter.api.filters.*;
import com.funnelback.filter.api.mock.*;
import com.google.common.collect.ListMultimap;
/**
* Adds the filtered time stamp to all documents.
*
* <p>By implementing Filter rather than StringDocumentFilter or ByteDocumentFilter
* we avoid any unnecessary conversion of the document to String or bytes respectively</p>
*
*/
@groovy.util.logging.Log4j2
public class AddDocumentDateToAllDocuments implements Filter {
@Override
public FilterResult filter(FilterableDocument document, FilterContext context) throws RuntimeException,
FilterException {
// Get a copy of the existing metadata,
// so that we preserve the existing metadata
ListMultimap<String, String> metadata = document.getCopyOfMetadata();
// As metadata values are a List we first remove any existing values.
metadata.removeAll("filter-time-stamp");
String date = new Date().toString();
// Add the current time to the metadata
metadata.put("filter-time-stamp", date);
log.debug("Adding date: '" + date + "' to : '" + document.getURI() + "'");
// Create a document with the new metadata
FilterableDocument filteredDocument = document.cloneWithMetadata(metadata);
return FilterResult.of(filteredDocument);
}
/*
* Below are filter test methods.
*/
public static class FilterTest {
@Test
public void checkTimeStampIsAddedTest() {
// Create the input document
FilterableDocument inputDoc = MockDocuments.mockEmptyByteDoc();
// Create and run the filter
FilterResult filterResult = new AddDocumentDateToAllDocuments()
.filter(inputDoc, MockFilterContext.getEmptyContext());
// Get the filtered document
FilterableDocument filteredDocument = filterResult
.getFilteredDocuments().get(0);
// Get the timeStamps metadata
// Metadata values are a list as they can be multi-valued
List<String> timeStamps = filteredDocument
.getCopyOfMetadata().get("filter-time-stamp");
// Check the time stamp is set
Assert.assertEquals(
"Expected to see exactly one time stamp",
1,
timeStamps.size());
}
}
// Running the main method will execute the test methods.
public static void main(String[] args) throws Exception {
FilterTestRunner.runTests(FilterTest.class);
}
}