001 /*
002 * JBoss DNA (http://www.jboss.org/dna)
003 * See the COPYRIGHT.txt file distributed with this work for information
004 * regarding copyright ownership. Some portions may be licensed
005 * to Red Hat, Inc. under one or more contributor license agreements.
006 * See the AUTHORS.txt file in the distribution for a full listing of
007 * individual contributors.
008 *
009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010 * is licensed to you under the terms of the GNU Lesser General Public License as
011 * published by the Free Software Foundation; either version 2.1 of
012 * the License, or (at your option) any later version.
013 *
014 * JBoss DNA is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017 * Lesser General Public License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this software; if not, write to the Free
021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023 */
024 package org.jboss.dna.sequencer.msoffice.excel;
025
026 import java.io.IOException;
027 import java.io.InputStream;
028 import java.util.ArrayList;
029 import java.util.List;
030
031 import org.apache.poi.hssf.usermodel.HSSFCell;
032 import org.apache.poi.hssf.usermodel.HSSFComment;
033 import org.apache.poi.hssf.usermodel.HSSFRow;
034 import org.apache.poi.hssf.usermodel.HSSFSheet;
035 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
036 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
037
038 /**
039 * @author Michael Trezzi
040 * @description Extracts data and some metadata from excel files
041 */
042 public class ExcelMetadataReader {
043
044 /** The character to output after each row. */
045 private static final char ROW_DELIMITER_CHAR = '\n';
046 /** The character to output after each cell (column). */
047 private static final char CELL_DELIMITER_CHAR = '\t';
048
049 public static ExcelMetadata instance( InputStream stream ) throws IOException {
050 ExcelMetadata metadata = new ExcelMetadata();
051 HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(stream));
052
053 StringBuffer buff = new StringBuffer();
054 List<String> sheets = new ArrayList<String>();
055
056 for (int sheetInd = 0; sheetInd < wb.getNumberOfSheets(); sheetInd++) {
057 sheets.add(wb.getSheetName(sheetInd));
058
059 HSSFSheet worksheet = wb.getSheetAt(sheetInd);
060 int lastRowNum = worksheet.getLastRowNum();
061
062 for (int rowNum = worksheet.getFirstRowNum(); rowNum <= lastRowNum; rowNum++) {
063 HSSFRow row = worksheet.getRow(rowNum);
064
065 // Empty rows are returned as null
066 if (row == null) {
067 continue;
068 }
069
070 int lastCellNum = row.getLastCellNum();
071 for (int cellNum = row.getFirstCellNum(); cellNum < lastCellNum; cellNum++) {
072 HSSFCell cell = row.getCell(cellNum);
073
074 /*
075 * Builds a string of body content from all string, numeric,
076 * and formula values in the body of each worksheet.
077 *
078 * This code currently duplicates the POI 3.1 ExcelExtractor behavior of
079 * combining the body text from all worksheets into a single string.
080 */
081 switch (cell.getCellType()) {
082 case HSSFCell.CELL_TYPE_STRING:
083 buff.append(cell.getRichStringCellValue().getString());
084 break;
085 case HSSFCell.CELL_TYPE_NUMERIC:
086 buff.append(cell.getNumericCellValue());
087 break;
088 case HSSFCell.CELL_TYPE_FORMULA:
089 buff.append(cell.getCellFormula());
090 break;
091 }
092
093 HSSFComment comment = cell.getCellComment();
094 if (comment != null) {
095 // Filter out row delimiter characters from comment
096 String commentText = comment.getString().getString().replace(ROW_DELIMITER_CHAR, ' ');
097
098 buff.append(" [");
099 buff.append(commentText);
100 buff.append(" by ");
101 buff.append(comment.getAuthor());
102 buff.append(']');
103 }
104
105 if (cellNum < lastCellNum - 1) {
106 buff.append(CELL_DELIMITER_CHAR);
107 }
108 else {
109 buff.append(ROW_DELIMITER_CHAR);
110 }
111 }
112 }
113 }
114
115 metadata.setText(buff.toString());
116 metadata.setSheets(sheets);
117 return metadata;
118 }
119 }