Copying an Excel file to a table's dataset

JordanCClark · January 22, 2021, 9:26pm

Got my paperwork done early, so here's a sample to read from an Excel file
EDIT: Reworked a bit to work with both .xls and .xlsx files
EDIT: Modified to properly cast data types
EDIT: Added handler for formula cell types
EDIT: Added parameters to optionally specify sheet, row, and column numbers

Reading back a file created from my other example:

def excelToDataSet(fileName, hasHeaders = False, sheetNum = 0, firstRow = None, lastRow = None, firstCol = None, lastCol = None):
	import org.apache.poi.ss.usermodel.WorkbookFactory as WorkbookFactory
	import org.apache.poi.ss.usermodel.DateUtil as DateUtil
	from java.io import FileInputStream
	from java.util import Date
	
	"""
	   Function to create a dataset from an Excel spreadsheet. It will try to automatically detect the boundaries of the data,
	   but helper parameters are available:
	   params:
	   		fileName   - The path to the Excel spreadsheet. (required)
	   		hasHeaders - If true, uses the first row of the spreadsheet as column names.
	   		sheetNum   - select the sheet to process. defaults to the first sheet.
	   		firstRow   - select first row to process. 
	   		lastRow    - select last row to process.
	   		firstCol   - select first column to process
	   		lastCol    - select last column toprocess
	"""
	
	fileStream = FileInputStream(fileName)

	wb = WorkbookFactory.create(fileStream)
	
	sheet = wb.getSheetAt(sheetNum)

	if firstRow is None:
		firstRow = sheet.getFirstRowNum()
	if lastRow is None:
		lastRow = sheet.getLastRowNum()

	data = []
	for i in range(firstRow , lastRow + 1):
		row = sheet.getRow(i)
		print str(i).zfill(3), list(row)
		if i == firstRow:
			if firstCol is None:
				firstCol = row.getFirstCellNum()

			if lastCol is None:
				lastCol  = row.getLastCellNum()
			else:
				# if lastCol is specified add 1 to it.
				lastCol += 1
			if hasHeaders:
				headers = list(row)[firstCol:lastCol]
			else:
				headers = ['Col'+str(cNum) for cNum in range(firstCol, lastCol)] 
		rowOut = []
		for j in range(firstCol, lastCol):
			if i == firstRow and hasHeaders:
				pass
			else:
				cell = row.getCell(j)
				cellType = cell.getCellType().toString()
				if cellType == 'NUMERIC':
					if DateUtil.isCellDateFormatted(cell):
						value =  cell.dateCellValue
					else:
						value = cell.getNumericCellValue()
						if value == int(value):
							value = int(value)
				elif cellType == 'STRING':
					value = cell.getStringCellValue()
				elif cellType == 'BOOLEAN':
					value = cell.getBooleanCellValue()
				elif cellType == 'BLANK':
					value = None	
				elif cellType == 'FORMULA':
					formulatype=str(cell.getCachedFormulaResultType())
					if formulatype == 'NUMERIC':
						if DateUtil.isCellDateFormatted(cell):
							value =  cell.dateCellValue
						else:
							value = cell.getNumericCellValue()
							if value == int(value):
								value = int(value)
					elif formulatype == 'STRING':
						value = cell.getStringCellValue()
					elif formulatype == 'BOOLEAN':
						value = cell.getBooleanCellValue()
					elif formulatype == 'BLANK':
						value = None
				else:
					value = None	
				rowOut.append(value)
		if len(rowOut) > 0:
			data.append(rowOut)

	fileStream.close()
	
	return system.dataset.toDataSet(headers, data)

# Read .xlsx file
fName = 'C:/Test/poi1.xlsx'
ds1 = excelToDataSet(fName, True)		
util.printDataSet(ds1)

#Read .xls file
fName = 'C:/Test/poi2.xls'
ds2 = excelToDataSet(fName, True)		
util.printDataSet(ds2)

row | t_stamp                      | sValue | iValue | fValue
-------------------------------------------------------------
0   | Thu Jan 21 08:00:00 EST 2021 | A      | 1      | 1.23  
1   | Fri Jan 22 08:30:00 EST 2021 | B      | 2      | 4.56  
2   | Sat Jan 23 09:24:00 EST 2021 | C      | 3      | 7.89  
3   | Sun Jan 24 19:37:00 EST 2021 | D      | 4      | 0.12  
4   | Mon Jan 25 18:42:00 EST 2021 | E      | 5      | 3.45 
 
row | t_stamp                      | sValue | iValue | fValue
-------------------------------------------------------------
0   | Thu Jan 21 08:00:00 EST 2021 | Z      | 5      | 3.45  
1   | Fri Jan 22 08:30:00 EST 2021 | Y      | 3      | 0.12  
2   | Sat Jan 23 09:24:00 EST 2021 | X      | 2      | 7.89  
3   | Sun Jan 24 19:37:00 EST 2021 | W      | 4      | 4.56  
4   | Mon Jan 25 18:42:00 EST 2021 | V      | 1      | 1.23