001 /*
002 * JBoss, Home of Professional Open Source.
003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004 * as indicated by the @author tags. See the copyright.txt file in the
005 * distribution for a full listing of individual contributors.
006 *
007 * This is free software; you can redistribute it and/or modify it
008 * under the terms of the GNU Lesser General Public License as
009 * published by the Free Software Foundation; either version 2.1 of
010 * the License, or (at your option) any later version.
011 *
012 * This software is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * You should have received a copy of the GNU Lesser General Public
018 * License along with this software; if not, write to the Free
019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021 */
022 package org.jboss.dna.sequencer.msoffice.excel;
023
024 import java.io.IOException;
025 import java.io.InputStream;
026 import java.util.ArrayList;
027 import java.util.List;
028 import org.apache.poi.hssf.extractor.ExcelExtractor;
029 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
030 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
031
032 /**
033 * @author Michael Trezzi
034 * @description Extracts data and some metadata from excel files
035 */
036 public class ExcelMetadataReader {
037
038 public static ExcelMetadata instance( InputStream stream ) throws IOException {
039 ExcelMetadata metadata = new ExcelMetadata();
040 HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(stream));
041 ExcelExtractor extractor = new ExcelExtractor(wb);
042
043 extractor.setFormulasNotResults(true);
044 extractor.setIncludeSheetNames(false);
045 metadata.setText(extractor.getText());
046 List<String> sheets = new ArrayList<String>();
047 for (int i = 0; i < wb.getNumberOfSheets(); i++) {
048 sheets.add(wb.getSheetName(i));
049 }
050 metadata.setSheets(sheets);
051 return metadata;
052 }
053 }