001 /* 002 * JBoss DNA (http://www.jboss.org/dna) 003 * See the COPYRIGHT.txt file distributed with this work for information 004 * regarding copyright ownership. Some portions may be licensed 005 * to Red Hat, Inc. under one or more contributor license agreements. 006 * See the AUTHORS.txt file in the distribution for a full listing of 007 * individual contributors. 008 * 009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA 010 * is licensed to you under the terms of the GNU Lesser General Public License as 011 * published by the Free Software Foundation; either version 2.1 of 012 * the License, or (at your option) any later version. 013 * 014 * JBoss DNA is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 017 * Lesser General Public License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this software; if not, write to the Free 021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 023 */ 024 package org.jboss.dna.sequencer.msoffice.excel; 025 026 import java.io.IOException; 027 import java.io.InputStream; 028 import java.util.ArrayList; 029 import java.util.List; 030 031 import org.apache.poi.hssf.usermodel.HSSFCell; 032 import org.apache.poi.hssf.usermodel.HSSFComment; 033 import org.apache.poi.hssf.usermodel.HSSFRow; 034 import org.apache.poi.hssf.usermodel.HSSFSheet; 035 import org.apache.poi.hssf.usermodel.HSSFWorkbook; 036 import org.apache.poi.poifs.filesystem.POIFSFileSystem; 037 038 /** 039 * @author Michael Trezzi 040 * @description Extracts data and some metadata from excel files 041 */ 042 public class ExcelMetadataReader { 043 044 /** The character to output after each row. */ 045 private static final char ROW_DELIMITER_CHAR = '\n'; 046 /** The character to output after each cell (column). */ 047 private static final char CELL_DELIMITER_CHAR = '\t'; 048 049 public static ExcelMetadata instance( InputStream stream ) throws IOException { 050 ExcelMetadata metadata = new ExcelMetadata(); 051 HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(stream)); 052 053 StringBuffer buff = new StringBuffer(); 054 List<String> sheets = new ArrayList<String>(); 055 056 for (int sheetInd = 0; sheetInd < wb.getNumberOfSheets(); sheetInd++) { 057 sheets.add(wb.getSheetName(sheetInd)); 058 059 HSSFSheet worksheet = wb.getSheetAt(sheetInd); 060 int lastRowNum = worksheet.getLastRowNum(); 061 062 for (int rowNum = worksheet.getFirstRowNum(); rowNum <= lastRowNum; rowNum++) { 063 HSSFRow row = worksheet.getRow(rowNum); 064 065 // Empty rows are returned as null 066 if (row == null) { 067 continue; 068 } 069 070 int lastCellNum = row.getLastCellNum(); 071 for (int cellNum = row.getFirstCellNum(); cellNum < lastCellNum; cellNum++) { 072 HSSFCell cell = row.getCell(cellNum); 073 074 /* 075 * Builds a string of body content from all string, numeric, 076 * and formula values in the body of each worksheet. 077 * 078 * This code currently duplicates the POI 3.1 ExcelExtractor behavior of 079 * combining the body text from all worksheets into a single string. 080 */ 081 switch (cell.getCellType()) { 082 case HSSFCell.CELL_TYPE_STRING: 083 buff.append(cell.getRichStringCellValue().getString()); 084 break; 085 case HSSFCell.CELL_TYPE_NUMERIC: 086 buff.append(cell.getNumericCellValue()); 087 break; 088 case HSSFCell.CELL_TYPE_FORMULA: 089 buff.append(cell.getCellFormula()); 090 break; 091 } 092 093 HSSFComment comment = cell.getCellComment(); 094 if (comment != null) { 095 // Filter out row delimiter characters from comment 096 String commentText = comment.getString().getString().replace(ROW_DELIMITER_CHAR, ' '); 097 098 buff.append(" ["); 099 buff.append(commentText); 100 buff.append(" by "); 101 buff.append(comment.getAuthor()); 102 buff.append(']'); 103 } 104 105 if (cellNum < lastCellNum - 1) { 106 buff.append(CELL_DELIMITER_CHAR); 107 } 108 else { 109 buff.append(ROW_DELIMITER_CHAR); 110 } 111 } 112 } 113 } 114 115 metadata.setText(buff.toString()); 116 metadata.setSheets(sheets); 117 return metadata; 118 } 119 }