View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6   * See the AUTHORS.txt file in the distribution for a full listing of 
7   * individual contributors.
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   *
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.sequencer.msoffice.excel;
25  
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.util.ArrayList;
29  import java.util.List;
30  import org.apache.poi.hssf.usermodel.HSSFCell;
31  import org.apache.poi.hssf.usermodel.HSSFComment;
32  import org.apache.poi.hssf.usermodel.HSSFRow;
33  import org.apache.poi.hssf.usermodel.HSSFSheet;
34  import org.apache.poi.hssf.usermodel.HSSFWorkbook;
35  import org.apache.poi.poifs.filesystem.POIFSFileSystem;
36  
37  /**
38   * Extracts data and some metadata from Excel files
39   */
40  public class ExcelMetadataReader {
41  
42      /** The character to output after each row. */
43      private static final char ROW_DELIMITER_CHAR = '\n';
44      /** The character to output after each cell (column). */
45      private static final char CELL_DELIMITER_CHAR = '\t';
46  
47      public static ExcelMetadata instance( InputStream stream ) throws IOException {
48          ExcelMetadata metadata = new ExcelMetadata();
49          HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(stream));
50  
51          StringBuffer buff = new StringBuffer();
52          List<String> sheets = new ArrayList<String>();
53  
54          for (int sheetInd = 0; sheetInd < wb.getNumberOfSheets(); sheetInd++) {
55              sheets.add(wb.getSheetName(sheetInd));
56  
57              HSSFSheet worksheet = wb.getSheetAt(sheetInd);
58              int lastRowNum = worksheet.getLastRowNum();
59  
60              for (int rowNum = worksheet.getFirstRowNum(); rowNum <= lastRowNum; rowNum++) {
61                  HSSFRow row = worksheet.getRow(rowNum);
62  
63                  // Empty rows are returned as null
64                  if (row == null) {
65                      continue;
66                  }
67  
68                  int lastCellNum = row.getLastCellNum();
69                  for (int cellNum = row.getFirstCellNum(); cellNum < lastCellNum; cellNum++) {
70                      HSSFCell cell = row.getCell(cellNum);
71  
72                      /*
73                       * Builds a string of body content from all string, numeric,
74                       * and formula values in the body of each worksheet.
75                       * 
76                       *  This code currently duplicates the POI 3.1 ExcelExtractor behavior of
77                       *  combining the body text from all worksheets into a single string.
78                       */
79                      switch (cell.getCellType()) {
80                          case HSSFCell.CELL_TYPE_STRING:
81                              buff.append(cell.getRichStringCellValue().getString());
82                              break;
83                          case HSSFCell.CELL_TYPE_NUMERIC:
84                              buff.append(cell.getNumericCellValue());
85                              break;
86                          case HSSFCell.CELL_TYPE_FORMULA:
87                              buff.append(cell.getCellFormula());
88                              break;
89                      }
90  
91                      HSSFComment comment = cell.getCellComment();
92                      if (comment != null) {
93                          // Filter out row delimiter characters from comment
94                          String commentText = comment.getString().getString().replace(ROW_DELIMITER_CHAR, ' ');
95  
96                          buff.append(" [");
97                          buff.append(commentText);
98                          buff.append(" by ");
99                          buff.append(comment.getAuthor());
100                         buff.append(']');
101                     }
102 
103                     if (cellNum < lastCellNum - 1) {
104                         buff.append(CELL_DELIMITER_CHAR);
105                     } else {
106                         buff.append(ROW_DELIMITER_CHAR);
107                     }
108                 }
109             }
110         }
111 
112         metadata.setText(buff.toString());
113         metadata.setSheets(sheets);
114         return metadata;
115     }
116 }