@@ -26,6 +26,10 @@ import nextflow.Nextflow
2626@CompileStatic
2727class WorkbookConverter {
2828
29+ // Constants for Excel file formats
30+ private static final List<String > EXCEL_EXTENSIONS = [' xlsx' , ' xlsm' , ' xlsb' , ' xls' ]
31+ private static final int DEFAULT_SHEET_INDEX = 0
32+
2933 private ValidationConfig config
3034
3135 WorkbookConverter (ValidationConfig config ) {
@@ -94,7 +98,9 @@ class WorkbookConverter {
9498 // Use WorkbookFactory for automatic format detection
9599 return WorkbookFactory . create(workbookFile. toFile())
96100 } catch (Exception e) {
97- throw new SchemaValidationException (" Failed to open Excel file ${ workbookFile} : ${ e.message} " )
101+ def colors = Utils . logColours(false ) // Use false for default since config may not be available here
102+ def msg = " ${ colors.red} Failed to open Excel file ${ workbookFile} : ${ e.message} \n ${ colors.reset} \n "
103+ throw new SchemaValidationException (msg)
98104 }
99105 }
100106
@@ -107,22 +113,28 @@ class WorkbookConverter {
107113
108114 if (sheetSelector == null ) {
109115 // Default to first sheet
110- sheet = workbook. getSheetAt(0 )
116+ sheet = workbook. getSheetAt(DEFAULT_SHEET_INDEX )
111117 } else if (sheetSelector instanceof String ) {
112118 // Select by sheet name
113119 sheet = workbook. getSheet(sheetSelector as String )
114120 if (sheet == null ) {
115- throw new SchemaValidationException (" Sheet '${ sheetSelector} ' not found in workbook" )
121+ def colors = Utils . logColours(config. monochromeLogs)
122+ def msg = " ${ colors.red} Sheet '${ sheetSelector} ' not found in workbook\n ${ colors.reset} \n "
123+ throw new SchemaValidationException (msg)
116124 }
117125 } else if (sheetSelector instanceof Integer ) {
118126 // Select by sheet index
119127 def sheetIndex = sheetSelector as Integer
120128 if (sheetIndex < 0 || sheetIndex >= workbook. getNumberOfSheets()) {
121- throw new SchemaValidationException (" Sheet index ${ sheetIndex} is out of range (0-${ workbook.getNumberOfSheets()-1} )" )
129+ def colors = Utils . logColours(config. monochromeLogs)
130+ def msg = " ${ colors.red} Sheet index ${ sheetIndex} is out of range (0-${ workbook.getNumberOfSheets()-1} )\n ${ colors.reset} \n "
131+ throw new SchemaValidationException (msg)
122132 }
123133 sheet = workbook. getSheetAt(sheetIndex)
124134 } else {
125- throw new SchemaValidationException (" Sheet selector must be either a String (sheet name) or Integer (sheet index)" )
135+ def colors = Utils . logColours(config. monochromeLogs)
136+ def msg = " ${ colors.red} Sheet selector must be either a String (sheet name) or Integer (sheet index)\n ${ colors.reset} \n "
137+ throw new SchemaValidationException (msg)
126138 }
127139
128140 return sheet
@@ -132,35 +144,52 @@ class WorkbookConverter {
132144 * Convert Excel sheet to list of maps
133145 */
134146 private List<Map<String , Object > > convertSheetToList (Sheet sheet ) {
135- List<Map<String , Object > > result = []
136- List<String > headers = []
137- boolean hasHeader = true
138-
139147 if (sheet. getPhysicalNumberOfRows() == 0 ) {
140- return result
148+ return []
141149 }
142150
143- // Get header row (first row)
151+ // Process headers
152+ List<String > headers = processHeaders(sheet)
153+ boolean hasHeader = headers. any { it != null && ! it. trim(). isEmpty() }
154+
155+ // Process data rows
156+ return processDataRows(sheet, headers, hasHeader)
157+ }
158+
159+ /**
160+ * Process headers from the sheet
161+ */
162+ private List<String > processHeaders (Sheet sheet ) {
144163 Row headerRow = sheet. getRow(sheet. getFirstRowNum())
145- if (headerRow != null ) {
146- headers = extractHeaders(headerRow)
147- hasHeader = headers. any { it != null && ! it. trim(). isEmpty() }
164+ if (headerRow == null ) {
165+ return []
148166 }
149167
150- // If no headers, create generic column names
151- if (! hasHeader && headerRow != null ) {
168+ List<String > headers = extractHeaders(headerRow)
169+ boolean hasValidHeaders = headers. any { it != null && ! it. trim(). isEmpty() }
170+
171+ // If no valid headers, create generic column names
172+ if (! hasValidHeaders) {
152173 def colCount = headerRow. getLastCellNum()
153174 headers = (0 .. < colCount). collect { " column_${ it} " . toString() }
154175 }
155176
156- // Process data rows
157- def startRow = hasHeader ? sheet. getFirstRowNum() + 1 : sheet. getFirstRowNum()
158- def endRow = sheet. getLastRowNum()
177+ return headers
178+ }
179+
180+ /**
181+ * Process data rows from the sheet
182+ */
183+ private List<Map<String , Object > > processDataRows (Sheet sheet , List<String > headers , boolean hasHeader ) {
184+ List<Map<String , Object > > result = []
185+
186+ int startRow = hasHeader ? sheet. getFirstRowNum() + 1 : sheet. getFirstRowNum()
187+ int endRow = sheet. getLastRowNum()
159188
160189 for (int rowIndex = startRow; rowIndex <= endRow; rowIndex++ ) {
161190 Row row = sheet. getRow(rowIndex)
162191 if (row != null ) {
163- Map<String , Object > rowData = processRow(row, headers as List< String > )
192+ Map<String , Object > rowData = processRow(row, headers)
164193 if (rowData && ! rowData. isEmpty()) {
165194 result. add(rowData)
166195 }
@@ -178,7 +207,7 @@ class WorkbookConverter {
178207
179208 for (int colIndex = 0 ; colIndex < headerRow. getLastCellNum(); colIndex++ ) {
180209 Cell cell = headerRow. getCell(colIndex)
181- String headerValue = getCellValueAsString (cell)
210+ String headerValue = getCellValue (cell, true ) as String
182211 headers. add(headerValue ?: " column_${ colIndex} " . toString())
183212 }
184213
@@ -209,88 +238,53 @@ class WorkbookConverter {
209238
210239 /**
211240 * Extract cell value based on cell type
241+ * @param cell The cell to extract value from
242+ * @param asString If true, returns string representation for headers
212243 */
213- private Object getCellValue (Cell cell ) {
244+ private Object getCellValue (Cell cell , boolean asString = false ) {
214245 if (cell == null ) {
215- return null
246+ return asString ? " " : null
216247 }
217248
218249 try {
219250 switch (cell. getCellType()) {
220251 case CellType . STRING :
221- return cell. getStringCellValue()?. trim()
252+ return cell. getStringCellValue()?. trim() ?: (asString ? " " : null )
222253 case CellType . NUMERIC :
223254 if (DateUtil . isCellDateFormatted(cell)) {
224255 // Handle date cells
225256 Date date = cell. getDateCellValue()
226257 return new SimpleDateFormat (" yyyy-MM-dd" ). format(date)
227258 } else {
228259 double numValue = cell. getNumericCellValue()
229- // Return as integer if it's a whole number
230- if (numValue == Math . floor(numValue) && ! Double . isInfinite(numValue)) {
231- return (int )numValue
260+ if (asString) {
261+ // For string representation, format appropriately
262+ return (numValue == Math . floor(numValue) && ! Double . isInfinite(numValue)) ?
263+ String . valueOf((int )numValue) : String . valueOf(numValue)
232264 } else {
233- return numValue
265+ // Return as integer if it's a whole number
266+ return (numValue == Math . floor(numValue) && ! Double . isInfinite(numValue)) ?
267+ (int )numValue : numValue
234268 }
235269 }
236270 case CellType . BOOLEAN :
237- return cell. getBooleanCellValue()
271+ return asString ? String . valueOf(cell . getBooleanCellValue()) : cell. getBooleanCellValue()
238272 case CellType . FORMULA :
239- // Evaluate formula and get the result
240- return evaluateFormula(cell)
273+ Object result = evaluateFormula(cell)
274+ return asString ? String . valueOf(result) : result
241275 case CellType . BLANK :
242- return null
276+ return asString ? " " : null
243277 case CellType . ERROR :
244278 return " #ERROR#"
245279 default :
246- return cell. toString()?. trim()
280+ return cell. toString()?. trim() ?: (asString ? " " : null )
247281 }
248282 } catch (Exception e) {
249283 log. warn(" Error reading cell value at row ${ cell.getRowIndex()} , column ${ cell.getColumnIndex()} : ${ e.message} " )
250- return cell. toString()?. trim()
284+ return cell. toString()?. trim() ?: (asString ? " " : null )
251285 }
252286 }
253287
254- /**
255- * Get cell value as string (for headers)
256- */
257- private String getCellValueAsString (Cell cell ) {
258- if (cell == null ) {
259- return null
260- }
261-
262- try {
263- switch (cell. getCellType()) {
264- case CellType . STRING :
265- return cell. getStringCellValue()?. trim()
266- case CellType . NUMERIC :
267- if (DateUtil . isCellDateFormatted(cell)) {
268- Date date = cell. getDateCellValue()
269- return new SimpleDateFormat (" yyyy-MM-dd" ). format(date)
270- } else {
271- double numValue = cell. getNumericCellValue()
272- if (numValue == Math . floor(numValue) && ! Double . isInfinite(numValue)) {
273- return String . valueOf((int )numValue)
274- } else {
275- return String . valueOf(numValue)
276- }
277- }
278- case CellType . BOOLEAN :
279- return String . valueOf(cell. getBooleanCellValue())
280- case CellType . FORMULA :
281- return String . valueOf(evaluateFormula(cell))
282- case CellType . BLANK :
283- return " "
284- case CellType . ERROR :
285- return " #ERROR#"
286- default :
287- return cell. toString()?. trim() ?: " "
288- }
289- } catch (Exception e) {
290- log. warn(" Error reading cell value as string: ${ e.message} " )
291- return cell. toString()?. trim() ?: " "
292- }
293- }
294288
295289 /**
296290 * Evaluate formula in a cell
@@ -321,26 +315,19 @@ class WorkbookConverter {
321315 }
322316 }
323317
324- /**
325- * Get file extension from filename
326- */
327- private String getFileExtension (String filename ) {
328- int lastDotIndex = filename. lastIndexOf(' .' )
329- return lastDotIndex >= 0 ? filename. substring(lastDotIndex + 1 ) : " "
330- }
331318
332319 /**
333320 * Check if file is an Excel format
334321 */
335322 public static boolean isExcelFile (Path file ) {
336- def extension = getStaticFileExtension (file. toString(). toLowerCase())
337- return extension in [ ' xlsx ' , ' xlsm ' , ' xlsb ' , ' xls ' ]
323+ def extension = getFileExtension (file. toString(). toLowerCase())
324+ return extension in EXCEL_EXTENSIONS
338325 }
339326
340327 /**
341- * Static method to get file extension
328+ * Get file extension from filename
342329 */
343- public static String getStaticFileExtension (String filename ) {
330+ public static String getFileExtension (String filename ) {
344331 int lastDotIndex = filename. lastIndexOf(' .' )
345332 return lastDotIndex >= 0 ? filename. substring(lastDotIndex + 1 ) : " "
346333 }
0 commit comments