Commit e08d1b7a authored by Jens X Augustsson's avatar Jens X Augustsson
Browse files

Updated package names to se.redpill.poiUtility

parent 8e9b3c2d
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>se.redpill.poiUtility</groupId>
<artifactId>poi-utility</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<name>POI Utility</name>
<dependencies>
<!-- compile -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.13</version>
<exclusions>
<exclusion>
<groupId>stax</groupId>
<artifactId>stax-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- provided -->
<!-- test -->
</dependencies>
</project>
//https://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.eventusermodel;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* A rudimentary XLSX -> CSV processor modeled on the
* POI sample program XLS2CSVmra from the package
* org.apache.poi.hssf.eventusermodel.examples.
* As with the HSSF version, this tries to spot missing
* rows and cells, and output empty entries for them.
* <p/>
* Data sheets are read using a SAX parser to keep the
* memory footprint relatively small, so this should be
* able to read enormous workbooks. The styles table and
* the shared-string table must be kept in memory. The
* standard POI styles table class is used, but a custom
* (read-only) class is used for the shared string table
* because the standard POI SharedStringsTable grows very
* quickly with the number of unique strings.
* <p/>
* For a more advanced implementation of SAX event parsing
* of XLSX files, see {@link XSSFEventBasedExcelExtractor}
* and {@link XSSFSheetXMLHandler}. Note that for many cases,
* it may be possible to simply use those with a custom
* {@link SheetContentsHandler} and no SAX code needed of
* your own!
*/
public class XLSX2CSV {
/**
* Uses the XSSF Event SAX helpers to do most of the work
* of parsing the Sheet XML, and outputs the contents
* as a (basic) CSV.
*/
private class SheetToCSV implements SheetContentsHandler {
private boolean firstCellOfRow = false;
private int currentRow = -1;
private int currentCol = -1;
private char separator = ',';
private void outputMissingRows(int number) {
for (int i=0; i<number; i++) {
for (int j=0; j<minColumns; j++) {
output.append(this.separator);
}
output.append('\n');
}
}
public void startRow(int rowNum) {
// If there were gaps, output the missing rows
outputMissingRows(rowNum-currentRow-1);
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
}
public void endRow(int rowNum) {
// Ensure the minimum number of columns
for (int i=currentCol; i<minColumns; i++) {
output.append(this.separator);
}
output.append('\n');
}
public void cell(String cellReference, String formattedValue,
XSSFComment comment) {
if (firstCellOfRow) {
firstCellOfRow = false;
} else {
output.append(this.separator);
}
// Did we miss any cells?
int thisCol = (new CellReference(cellReference)).getCol();
int missedCols = thisCol - currentCol - 1;
for (int i=0; i<missedCols; i++) {
output.append(this.separator);
}
currentCol = thisCol;
// Number or string?
try {
Double.parseDouble(formattedValue);
output.append(formattedValue);
} catch (NumberFormatException e) {
output.append('"');
output.append(formattedValue);
output.append('"');
}
}
public void headerFooter(String text, boolean isHeader, String tagName) {
// Skip, no headers or footers in CSV
}
}
///////////////////////////////////////
private final OPCPackage xlsxPackage;
/**
* Number of columns to read starting with leftmost
*/
private final int minColumns;
/**
* Destination for data
*/
private final PrintStream output;
/**
* The separator between the cells in the output. Defaults to ','
*/
private char separator;
/**
* Creates a new XLSX -> CSV converter
*
* @param pkg The XLSX package to process
* @param output The PrintStream to output the CSV to
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) {
this.xlsxPackage = pkg;
this.output = output;
this.minColumns = minColumns;
}
/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
*/
public void processSheet(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetHandler,
InputStream sheetInputStream)
throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
/**
* Initiates the processing of the XLS workbook file to CSV.
*/
public void process()
throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
while (iter.hasNext()) {
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
this.output.println();
this.output.println(sheetName + " [index=" + index + "]:");
SheetToCSV sheetToCSV = new SheetToCSV();
if (this.separator != '\u0000') {
sheetToCSV.separator = this.separator;
}
processSheet(styles, strings, sheetToCSV, stream);
stream.close();
++index;
}
}
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Use:");
System.err.println(" XLSX2CSV <xlsx file> [min columns]");
return;
}
File xlsxFile = new File(args[0]);
if (!xlsxFile.exists()) {
System.err.println("Not found or not a file: " + xlsxFile.getPath());
return;
}
int minColumns = -1;
if (args.length >= 2)
minColumns = Integer.parseInt(args[1]);
// The package open is instantaneous, as it should be.
OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);
xlsx2csv.process();
p.close();
}
public void setSeparator(char separator) {
this.separator = separator;
}
}
\ No newline at end of file
package se.redpill.poiUtility;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XLSX2CSV;
import org.xml.sax.SAXException;
/**
* Utilities for the stupid Apache POI API
* @author jens.augustsson@redpill-linpro.com
*
*/
public class PoiUtility {
private BufferedReader bufferedReader;
private List<String> headers = new ArrayList<String>();
private Character separator;
private String[] lineSplits;
/**
* If this constructor is used, non-static (stateful) methods can be used
* @param inputStream
* @param sheetName
* @param minColumns
* @param separator - defaults to tab
* @param firstLineIsHeader
*/
public PoiUtility(InputStream inputStream, String sheetName, int minColumns, Character separator, boolean firstLineIsHeader) {
try {
this.separator = separator != null ? separator : '\t';
this.bufferedReader = PoiUtility.getCsvBufferedReader(inputStream, sheetName, minColumns, this.separator);
if (firstLineIsHeader) {
String[] splits = bufferedReader.readLine().split(this.separator.toString());
for (int i = 0; i < splits.length; i++) {
splits[i] = getString(splits[i]);
}
this.headers = Arrays.asList(splits);
}
} catch (IOException e) {
throw new RuntimeException("Failed to construct PoiUtility for sheet " + sheetName, e);
}
}
//Public nonStatic
public boolean readLine() {
try {
String line = this.bufferedReader.readLine();
if (line != null) {
this.lineSplits = line.split(separator.toString());
return !line.trim().equals("");
} else {
this.bufferedReader.close();
return false;
}
} catch (IOException e) {
throw new RuntimeException("Could not readLine", e);
}
}
public String getCell(int columnIndex) {
return getString(this.lineSplits[columnIndex]);
}
public String getCellAsString(String header) {
int idx = getColumnIndex(header);
return getString(this.lineSplits[idx]);
}
public Integer getCellAsInteger(String header) {
int idx = getColumnIndex(header);
return getInteger(this.lineSplits[idx]);
}
public Long getCellAsLong(String header) {
int idx = getColumnIndex(header);
return getLong(this.lineSplits[idx]);
}
public Date getCellAsDate(String header, SimpleDateFormat sdf, boolean nullifyNonParseable) {
Date date = null;
int idx = -1;
try {
idx = getColumnIndex(header);
String split = this.lineSplits[idx];
if (split != null && split.trim().length() > 0) {
if(split.startsWith("\""))
split = (String) split.subSequence(1,(split.length() -1));
if (sdf != null) {
date = sdf.parse(split);
} else {
date = new SimpleDateFormat().parse(split);
}
}
} catch (ParseException e) {
if (!nullifyNonParseable) {
throw new RuntimeException("Could not parse date from data " + this.lineSplits[idx]);
}
}
return date;
}
public Boolean getCellAsBoolean(String header, String trueValue, String falseValue) {
int idx = getColumnIndex(header);
return getBoolean(this.lineSplits[idx], trueValue, falseValue);
}
// Public static
public static String cleanCases(String data) {
if (data != null && data.length() > 1) {
//Data exists, remove double quotes
return data.substring(0, 1).toUpperCase() + data.substring(1).toLowerCase();
}
return data;
}
public static String getString(String data) {
if (data != null && data.startsWith("\"")) {
//Data exists, remove double quotes
data = data.substring(1, data.length()-1);
}
if (data == null || data.trim().length() == 0) {
return null;
}
return data;
}
public static Integer getInteger(String data) {
if (data != null && data.length() > 1) {
try {
//Should be an Integer
return Integer.parseInt(data);
} catch (NumberFormatException e) {
//Can be a Float
return ((Float) Float.parseFloat(data)).intValue();
}
}
throw new RuntimeException("Data cannot be parsed to Integer: " + data);
}
public static Integer getIntegerFromInt(String data) {
//Should be an int
try {
return Integer.valueOf(data);
} catch (NumberFormatException e) {
e.printStackTrace();
throw new RuntimeException("Data cannot be parsed to Integer: " + data);
}
}
public static Long getLong(String data) {
if (data != null && data.length() > 1) {
try {
//Should be an Integer
return Long.parseLong(data);
} catch (NumberFormatException e) {
//Can be a Float
return ((Float) Float.parseFloat(data)).longValue();
}
}
throw new RuntimeException("Data cannot be parsed to Long: " + data);
}
/**
*
* @param data
* @param trueValue - this string or 1 or true will return TRUE
* @param falseValue - this string or 0 or false will return FALSE
* @return
*/
public static Boolean getBoolean(String data, String trueValue, String falseValue) {
Boolean b = null;
data = getString(data);
if (data != null) {
if ((trueValue != null && trueValue.equals(data)) || "1".equals(data) || "true".equals(data.toLowerCase())) {
b = true;
}
if (b == null && ((falseValue != null && falseValue.equals(data)) || "0".equals(data) || "false".equals(data.toLowerCase())) ) {
b = false;
}
}
if (b != null) {
return b;
} else {
throw new RuntimeException("Data cannot be parsed to Boolean: " + data);
}
}
public static Date getDateOrNull(String data, SimpleDateFormat sdf) {
Date date = null;
try {
date = sdf.parse(data);
} catch (ParseException e) {
;//Ignore badly formatted date
}
return date;
}
/**
* Utility for Apache POI XLSX2CSV, enabling streaming rows from a specific sheet to a line-based BufferedReader
* @param inputStream
* @param sheetName
* @param minColumns
* @param separator
* @return
*/
public static BufferedReader getCsvBufferedReader(InputStream inputStream, String sheetName, int minColumns, char separator) {
BufferedReader br = null;
try {
OPCPackage pkg = OPCPackage.open(inputStream);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
XLSX2CSV xlsx2csv = new XLSX2CSV(pkg, ps, minColumns);
xlsx2csv.setSeparator(separator);
xlsx2csv.process();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
br = new BufferedReader(new InputStreamReader(bais));
String line = br.readLine(); //First line is empty
boolean foundSheet = false;
while (line != null && !foundSheet) {
if (line != null && line.startsWith(sheetName)) {
foundSheet = true;
} else {
line = br.readLine();
}
}
if (!foundSheet) {
throw new RuntimeException("Could not find sheet " + sheetName + " in uploaded file");
}
// bais.close();
// ps.close();
// baos.close();
// pkg.close();
} catch (InvalidFormatException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (OpenXML4JException e) {
throw new RuntimeException(e);
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
} catch (SAXException e) {
throw new RuntimeException(e);
}
return br;
}
//Privates
private int getColumnIndex(String header) {
int idx = this.headers.indexOf(header);
if (idx > -1) {
return idx;
} else {
throw new RuntimeException("Could not find header " + header);
}
}
}
package se.redpill.poiUtility;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
/**
*
* @author andreas.arvidsson@redpill-linpro.com
*/
public class SheetPage implements Iterable<SheetRow> {
private final Sheet sheet;
private final Map<String, Integer> colNames = new HashMap<String, Integer>();
private final int rowCount, colCount;
public SheetPage(Sheet sheet) {
this.sheet = sheet;
rowCount = sheet.getPhysicalNumberOfRows();
colCount = calcColumnCount();
calcColumnNames();
}
public int getRowCount() {
return rowCount;
}
public int getColCount() {
return colCount;
}
public SheetRow getRow(int index) {
return new SheetRow(sheet.getRow(index), colCount);
}
public int getColIndex(String colName) {
return colNames.get(colName);