Skip to content

Filter example: modify the document URL

Description

Filters can be used to modify the URI of a document regardless of document type. In this example Filter is implemented because unlike StringDocumentFilter and ByteDocumentFilter it avoids any unnecessary conversion of the document to String or bytes respectively. The Filter may be preferable to implement for cases where the document content is not important.

Example

In this example we change any URL that has protocol http to have protocol https. In this example we implement the Filter interface which only requires us to implement the Filter method. The filter can still be skipped by returning FilterResult.skipped().

This example also has a simple test methods which can be executed by executing the main method see testing Groovy filters.

package com.myfilters;

import java.net.URI;
import org.junit.*;
import org.junit.Test;
import com.funnelback.filter.api.*;
import com.funnelback.filter.api.documents.*;
import com.funnelback.filter.api.filters.*;
import com.funnelback.filter.api.mock.*;

@groovy.util.logging.Log4j2
public class ModifyURLFilter implements Filter {

    @Override
    public FilterResult filter(FilterableDocument document, FilterContext context) throws FilterException {
        //Get the document URI
        String uri = document.getURI().toASCIIString();
        
        //Only change URIs which start with http://
        if(uri.startsWith("http://")) {
            String newUrl = "https://" + uri.substring("http://".length());
            
            log.debug("Changing URL '" + document.getURI() + "' to '" + newUrl + "'");
            
            FilterableDocument filteredDocument = document.cloneWithURI(URI.create(newUrl));
            
            //Return the document with the modified URL
            return FilterResult.of(filteredDocument);
        }
        
        //If the document does not start with http:// mark this filter as being skipped.
        return FilterResult.skipped();
    }
    
    /*
     * Below are filter test methods. 
     */
    
    public static class FilterTest {
        @Test
        public void checkHttpUrl() throws Exception {
            //Create input document with http:// URL
            FilterableDocument inputDocument = MockDocuments.mockEmptyStringDoc()
                                                            .cloneWithURI(new URI("http://foo.com/"));
            
            FilterResult filterResult = new ModifyURLFilter().filter(inputDocument, MockFilterContext.getEmptyContext());
            
            FilterableDocument filteredDocument = filterResult.getFilteredDocuments().get(0);
            
            Assert.assertEquals("https://foo.com/", filteredDocument.getURI().toASCIIString());
        }
        
        @Test
        public void checkNonHttpURL() throws Exception {
          //Create input document with http:// URL
            FilterableDocument inputDocument = MockDocuments.mockEmptyStringDoc()
                                                            .cloneWithURI(new URI("file://foo.com/"));
            
            FilterResult filterResult = new ModifyURLFilter().filter(inputDocument, MockFilterContext.getEmptyContext());
            
            Assert.assertTrue("As the document url did not start with http:// the filter should have been skipped", 
                filterResult.isSkipped());
            
        }
    }
    
    //Running the main method will execute the test methods.
    public static void main(String[] args) throws Exception {
        FilterTestRunner.runTests(FilterTest.class);
    }
}

See also:

top

Funnelback logo
v15.16.0