Skip to content

Filter example: string document filter

Description

Filters can be used to manipulate documents as a string. This is typically the simplest way of implementing a custom filter.

Example

In this example plain text documents (based on having a .txt file extension) have their content converted to lower case.

This example implements the StringDocumentFilter. We are required to implement canFilter(), used to check the document extension is .txt, as well as filterAsStringDocument() which contains the logic for the filter.

This example also has a simple test methods which can be executed by running the main method see testing Groovy filters.

package com.myfilters;

import java.net.URI;
import org.junit.*;
import org.junit.Test;
import static com.funnelback.filter.api.DocumentType.*;
import com.funnelback.filter.api.*;
import com.funnelback.filter.api.documents.*;
import com.funnelback.filter.api.filters.*;
import com.funnelback.filter.api.mock.*;

@groovy.util.logging.Log4j2
public class LowerCaseFilter implements StringDocumentFilter {

    @Override
    public PreFilterCheck canFilter(NoContentDocument document, FilterContext context) {
        //Only lower case documents that end in ".txt" for example file://file/share/foo.txt
        if(document.getURI().getPath().endsWith(".txt")) {
            return PreFilterCheck.ATTEMPT_FILTER;
        }

        log.debug("Skipping filter as the URL path of: '"+document.getURI() + "' does not end with '.txt'");

        return PreFilterCheck.SKIP_FILTER;
    }

    @Override
    public FilterResult filterAsStringDocument(StringDocument document, FilterContext context) {
        //Create a lower cased copy of the content
        String lowerCasedContent = document.getContentAsString().toLowerCase();

        //Create a new document with the new lower cased content.
        StringDocument filteredDocument = document.cloneWithStringContent(document.getDocumentType(), 
                                                                            lowerCasedContent);
        return FilterResult.of(filteredDocument);
    }

    /*
     * Below are filter test methods. 
     */
    public static class FilterTest {
        @Test
        public void lowerCasingTest() throws Exception {
            //Create a txt document with all upper case letters.
            StringDocument inputDoc = MockDocuments.mockEmptyStringDoc()
                                                    .cloneWithURI(new URI("file://file/share/foo.txt"))
                                                    .cloneWithStringContent(MIME_UNKNOWN, "HELLO");

            //Create and run the filter.
            FilterResult filterResult = new LowerCaseFilter().filter(inputDoc, MockFilterContext.getEmptyContext());

            //Get the resulting document.
            StringDocument filterdDocument = (StringDocument) filterResult.getFilteredDocuments().get(0);

            Assert.assertEquals("Content should have been converted to lower case", 
                                    "hello", filterdDocument.getContentAsString());
        }

        @Test
        public void nonTxtDocumentsAreSkippedTest() throws Exception {
          //Create a txt document with all upper case letters.
            StringDocument inputDoc = MockDocuments.mockEmptyStringDoc()
                                                    .cloneWithURI(new URI("file://file/share/foo.cfg"))
                                                    .cloneWithStringContent(MIME_UNKNOWN, "Hi");

            //Create and run the filter.
            FilterResult filterResult = new LowerCaseFilter().filter(inputDoc, MockFilterContext.getEmptyContext());

            Assert.assertTrue("Filter should not have been run on a file which does not have '.txt' extension", 
                                filterResult.isSkipped());
        }
    }

    //Running the main method will execute the test methods.
    public static void main(String[] args) throws Exception {
        FilterTestRunner.runTests(FilterTest.class);
    }
}

See also:

top

Funnelback logo
v15.16.0