package org.modeshape.jcr.api.text; import javax.jcr.Node; import javax.jcr.Property; import javax.jcr.RepositoryException; public abstract class TextExtractor { ... /** * Determine if this extractor is capable of processing content with the supplied MIME type. * @param mimeType the MIME type; never null * @return true if this extractor can process content with the supplied MIME type, or false otherwise. */ public abstract boolean supportsMimeType( String mimeType ); /** * Extract text from the given {@link Binary}, using the given output to record the results. * @param binary the binary value that can be used in the extraction process; never <code>null</code> * @param output the output from the sequencing operation; never <code>null</code> * @param context the context for the sequencing operation; never <code>null</code> * @throws Exception if there is a problem during the extraction process */ public abstract void extractFrom( Binary binary, TextExtractor.Output output, Context context ) throws Exception; /** * Allows subclasses to process the stream of binary value property in "safe" fashion, making sure the stream is closed at the * end of the operation. * @param binary a {@link org.modeshape.jcr.api.Binary} who is expected to contain a non-null binary value. * @param operation a {@link org.modeshape.jcr.api.text.TextExtractor.BinaryOperation} which should work with the stream * @param <T> the return type of the binary operation * @return whatever type of result the stream operation returns * @throws Exception if there is an error processing the stream */ protected final <T> T processStream( Binary binary, BinaryOperation<T> operation ) throws Exception { ... } /** * Interface which can be used by subclasses to process the input stream of a binary property. * @param <T> the return type of the binary operation */ protected interface BinaryOperation<T> { T execute( InputStream stream ) throws Exception; } /** * Interface which provides additional information to the text extractors, during the extraction operation. */ public interface Context { String mimeTypeOf( String name, Binary binaryValue ) throws RepositoryException, IOException; } /** * The interface passed to a TextExtractor to which the extractor should record all text content. */ public interface Output { /** * Record the text as being extracted. This method can be called multiple times during a single extract. * @param text the text extracted from the content. */ void recordText( String text ); } }