diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 0000000..c743950 --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1 @@ +style = "blue" \ No newline at end of file diff --git a/Project.toml b/Project.toml index a500550..5812ae9 100644 --- a/Project.toml +++ b/Project.toml @@ -5,13 +5,21 @@ version = "0.11.0" [deps] BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +StringEncodings = "69024149-9ee7-55f6-a4c4-859efe599b68" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] BufferedStreams = "0.4.1, 1" CodecZlib = "0.5, 0.6, 0.7" +Dates = "1" HDF5 = "0.16, 0.17" +PooledArrays = "1.4.3" +StringEncodings = "0.3.7" +Tables = "1.12.1" julia = "1.6" [extras] diff --git a/docs/make.jl b/docs/make.jl index e48e71c..20eab35 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -25,6 +25,7 @@ makedocs(; pages = [ "Home" => "index.md", "Object Arrays" => "object_arrays.md", + "Types" => "types.md", "Methods" => "methods.md", ], warnonly = [:missing_docs,], diff --git a/docs/src/types.md b/docs/src/types.md new file mode 100644 index 0000000..4a14b6b --- /dev/null +++ b/docs/src/types.md @@ -0,0 +1,25 @@ +# Types and conversions + +MAT.jl uses the following type conversions from MATLAB types to Julia types: + +| MATLAB | Julia | +| -------- | ------- | +| numerical array | `Array{T}` | +| cell array | `Array{Any}` | +| char array | `String` | +| `struct` | `Dict{String,Any}` | +| `struct` array | `MAT.MatlabStructArray` | +| old class object | `MAT.MatlabClassObject` | +| new (opaque) class | `MAT.MatlabOpaque` | + +A few of the `MatlabOpaque` classes are automatically converted upon reading: + +| MATLAB | Julia | +| -------- | ------- | +| `string` | `String` | +| `datetime` | `Dates.DateTime` | +| `duration` | `Dates.Millisecond` | +| `category` | `PooledArrays.PooledArray` | +| `table` | `MAT.MatlabTable` (or any other table) | + +Note that single element arrays are typically converted to scalars in Julia, because MATLAB cannot distinguish between scalars and `1x1` sized arrays. \ No newline at end of file diff --git a/src/MAT.jl b/src/MAT.jl index b38126c..e0a4b6a 100644 --- a/src/MAT.jl +++ b/src/MAT.jl @@ -29,22 +29,23 @@ using HDF5, SparseArrays include("MAT_types.jl") using .MAT_types +include("MAT_subsys.jl") include("MAT_HDF5.jl") include("MAT_v5.jl") include("MAT_v4.jl") -using .MAT_HDF5, .MAT_v5, .MAT_v4 +using .MAT_HDF5, .MAT_v5, .MAT_v4, .MAT_subsys export matopen, matread, matwrite, @read, @write -export MatlabStructArray, MatlabClassObject +export MatlabStructArray, MatlabClassObject, MatlabOpaque, MatlabTable # Open a MATLAB file const HDF5_HEADER = UInt8[0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a] -function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::Bool, compress::Bool) +function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::Bool, compress::Bool; table::Type=MatlabTable) # When creating new files, create as HDF5 by default fs = filesize(filename) if cr && (tr || fs == 0) - return MAT_HDF5.matopen(filename, rd, wr, cr, tr, ff, compress) + return MAT_HDF5.matopen(filename, rd, wr, cr, tr, ff, compress, Base.ENDIAN_BOM == 0x04030201; table=table) elseif fs == 0 error("File \"$filename\" does not exist and create was not specified") end @@ -72,7 +73,7 @@ function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Boo if wr || cr || tr || ff error("creating or appending to MATLAB v5 files is not supported") end - return MAT_v5.matopen(rawfid, endian_indicator) + return MAT_v5.matopen(rawfid, endian_indicator; table=table) end # Check for HDF5 file @@ -80,7 +81,7 @@ function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Boo seek(rawfid, offset) if read!(rawfid, Vector{UInt8}(undef, 8)) == HDF5_HEADER close(rawfid) - return MAT_HDF5.matopen(filename, rd, wr, cr, tr, ff, compress) + return MAT_HDF5.matopen(filename, rd, wr, cr, tr, ff, compress, endian_indicator == 0x494D; table=table) end end @@ -88,10 +89,10 @@ function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Boo error("\"$filename\" is not a MAT file") end -function matopen(fname::AbstractString, mode::AbstractString; compress::Bool = false) - mode == "r" ? matopen(fname, true , false, false, false, false, false) : - mode == "r+" ? matopen(fname, true , true , false, false, false, compress) : - mode == "w" ? matopen(fname, false, true , true , true , false, compress) : +function matopen(fname::AbstractString, mode::AbstractString; compress::Bool = false, table::Type = MatlabTable) + mode == "r" ? matopen(fname, true , false, false, false, false, false; table=table) : + mode == "r+" ? matopen(fname, true , true , false, false, false, compress; table=table) : + mode == "w" ? matopen(fname, false, true , true , true , false, compress; table=table) : # mode == "w+" ? matopen(fname, true , true , true , true , false, compress) : # mode == "a" ? matopen(fname, false, true , true , false, true, compress) : # mode == "a+" ? matopen(fname, true , true , true , false, true, compress) : @@ -110,8 +111,8 @@ function matopen(f::Function, args...; kwargs...) end """ - matopen(filename [, mode]; compress = false) -> handle - matopen(f::Function, filename [, mode]; compress = false) -> f(handle) + matopen(filename [, mode]; compress = false, table = MatlabTable) -> handle + matopen(f::Function, filename [, mode]; compress = false, table = MatlabTable) -> f(handle) Mode defaults to `"r"` for read. It can also be `"w"` for write, @@ -121,18 +122,71 @@ Compression on reading is detected/handled automatically; the `compress` keyword argument only affects write operations. Use with `read`, `write`, `close`, `keys`, and `haskey`. + +Optional keyword argument is the `table` type, for automatic conversion of Matlab tables. +Note that Matlab tables may contain non-vector colums which cannot always be converted to a Julia table, like `DataFrame`. + +# Example + +```julia +using MAT, DataFrames +filepath = abspath(pkgdir(MAT), "./test/v7.3/struct_table_datetime.mat") +fid = matopen(filepath; table = DataFrame) +keys(fid) + +# outputs + +1-element Vector{String}: + "s" + +``` + +Now you can read any of the keys +``` +s = read(fid, "s") +close(fid) +s + +# outputs + +Dict{String, Any} with 2 entries: + "testDatetime" => DateTime("2019-12-02T16:42:49.634") + "testTable" => 3×5 DataFrame… + +``` """ matopen # Read all variables from a MATLAB file """ - matread(filename) -> Dict + matread(filename; table = MatlabTable) -> Dict Return a dictionary of all the variables and values in a Matlab file, opening and closing it automatically. + +Optionally provide the `table` type to convert Matlab tables into. Default uses a simple `MatlabTable` type. + +# Example + +```julia +using MAT, DataFrames +filepath = abspath(pkgdir(MAT), "./test/v7.3/struct_table_datetime.mat") +vars = matread(filepath; table = DataFrame) +vars["s"]["testTable"] + +# outputs + +3×5 DataFrame + Row │ FlightNum Customer Date Rating Comment + │ Float64 String DateTime String String +─────┼───────────────────────────────────────────────────────────────────────────────────── + 1 │ 1261.0 Jones 2016-12-20T00:00:00 Good Flight left on time, not crowded + 2 │ 547.0 Brown 2016-12-21T00:00:00 Poor Late departure, ran out of dinne… + 3 │ 3489.0 Smith 2016-12-22T00:00:00 Fair Late, but only by half an hour. … +``` """ -function matread(filename::AbstractString) - file = matopen(filename) +function matread(filename::AbstractString; table::Type=MatlabTable) + file = matopen(filename; table=table) local vars try vars = read(file) @@ -178,18 +232,4 @@ function _write_dict(fileio, dict::AbstractDict) end end -### -### v0.10.0 deprecations -### - -export exists -@noinline function exists(matfile::Union{MAT_v4.Matlabv4File,MAT_v5.Matlabv5File,MAT_HDF5.MatlabHDF5File}, varname::String) - Base.depwarn("`exists(matfile, varname)` is deprecated, use `haskey(matfile, varname)` instead.", :exists) - return haskey(matfile, varname) -end -@noinline function Base.names(matfile::Union{MAT_v4.Matlabv4File,MAT_v5.Matlabv5File,MAT_HDF5.MatlabHDF5File}) - Base.depwarn("`names(matfile)` is deprecated, use `keys(matfile)` instead.", :names) - return keys(matfile) -end - end diff --git a/src/MAT_HDF5.jl b/src/MAT_HDF5.jl index 023b7fe..66f7361 100644 --- a/src/MAT_HDF5.jl +++ b/src/MAT_HDF5.jl @@ -29,10 +29,14 @@ module MAT_HDF5 using HDF5, SparseArrays +using ..MAT_subsys import Base: names, read, write, close import HDF5: Reference -import ..MAT_types: MatlabStructArray, StructArrayField, convert_struct_array, MatlabClassObject +import Dates +import Tables +import PooledArrays: PooledArray +import ..MAT_types: MatlabStructArray, StructArrayField, convert_struct_array, MatlabClassObject, MatlabOpaque, MatlabTable const HDF5Parent = Union{HDF5.File, HDF5.Group} const HDF5BitsOrBool = Union{HDF5.BitsType,Bool} @@ -43,9 +47,10 @@ mutable struct MatlabHDF5File <: HDF5.H5DataStore writeheader::Bool refcounter::Int compress::Bool + subsystem::Subsystem function MatlabHDF5File(plain, toclose::Bool=true, writeheader::Bool=false, refcounter::Int=0, compress::Bool=false) - f = new(plain, toclose, writeheader, refcounter, compress) + f = new(plain, toclose, writeheader, refcounter, compress, Subsystem()) if toclose finalizer(close, f) end @@ -53,6 +58,15 @@ mutable struct MatlabHDF5File <: HDF5.H5DataStore end end +function Base.show(io::IO, f::MatlabHDF5File) + print(io, "MatlabHDF5File(") + print(io, f.plain, ", ") + print(io, f.toclose, ", ") + print(io, f.writeheader, ", ") + print(io, f.refcounter, ", ") + print(io, f.compress, ")") +end + """ close(matfile_handle) @@ -70,8 +84,13 @@ function close(f::MatlabHDF5File) unsafe_copyto!(magicptr, idptr, length(identifier)) end magic[126] = 0x02 - magic[127] = 0x49 - magic[128] = 0x4d + if Base.ENDIAN_BOM == 0x04030201 + magic[127] = 0x49 + magic[128] = 0x4d + else + magic[127] = 0x4d + magic[128] = 0x49 + end rawfid = open(f.plain.filename, "r+") write(rawfid, magic) close(rawfid) @@ -81,7 +100,7 @@ function close(f::MatlabHDF5File) nothing end -function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::Bool, compress::Bool) +function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::Bool, compress::Bool, endian_indicator::Bool; table::Type=MatlabTable) local f if ff && !wr error("Cannot append to a read-only file") @@ -110,6 +129,12 @@ function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Boo fid.refcounter = length(g)-1 close(g) end + subsys_refs = "#subsystem#" + if haskey(fid.plain, subsys_refs) + fid.subsystem.table_type = table + subsys_data = m_read(fid.plain[subsys_refs], fid.subsystem) + MAT_subsys.load_subsys!(fid.subsystem, subsys_data, endian_indicator) + end fid end @@ -119,6 +144,7 @@ const name_type_attr_matlab = "MATLAB_class" const empty_attr_matlab = "MATLAB_empty" const sparse_attr_matlab = "MATLAB_sparse" const int_decode_attr_matlab = "MATLAB_int_decode" +const object_type_attr_matlab = "MATLAB_object_decode" const object_decode_attr_matlab = "MATLAB_object_decode" ### Reading @@ -130,14 +156,14 @@ function read_complex(dtype::HDF5.Datatype, dset::HDF5.Dataset, ::Type{T}) where return read(dset, Complex{T}) end -function read_references(dset::HDF5.Dataset) +function read_cell(dset::HDF5.Dataset, subsys::Subsystem) refs = read(dset, Reference) out = Array{Any}(undef, size(refs)) f = HDF5.file(dset) for i = 1:length(refs) dset = f[refs[i]] try - out[i] = m_read(dset) + out[i] = m_read(dset, subsys) finally close(dset) end @@ -145,7 +171,7 @@ function read_references(dset::HDF5.Dataset) return out end -function m_read(dset::HDF5.Dataset) +function m_read(dset::HDF5.Dataset, subsys::Subsystem) if haskey(dset, empty_attr_matlab) # Empty arrays encode the dimensions as the dataset dims = convert(Vector{Int}, read(dset)) @@ -167,29 +193,50 @@ function m_read(dset::HDF5.Dataset) end end + objecttype = haskey(dset, object_type_attr_matlab) ? read_attribute(dset, object_type_attr_matlab) : nothing mattype = haskey(dset, name_type_attr_matlab) ? read_attribute(dset, name_type_attr_matlab) : "struct_array_field" - if mattype == "cell" + if mattype == "cell" && objecttype === nothing # Cell arrays, represented as an array of refs - return read_references(dset) + return read_cell(dset, subsys) + elseif objecttype !== nothing + if objecttype != 3 + @warn "MATLAB Object Type $mattype is currently not supported." + return missing + end + if mattype == "FileWrapper__" + return read_cell(dset, subsys) + end + if haskey(dset, "MATLAB_fields") + @warn "Enumeration Instances are not supported currently." + return missing + end elseif mattype == "struct_array_field" # This will be converted into MatlabStructArray in `m_read(g::HDF5.Group)` - return StructArrayField(read_references(dset)) + return StructArrayField(read_cell(dset, subsys)) elseif !haskey(str2type_matlab,mattype) - @warn "MATLAB $mattype values are currently not supported" + @warn "MATLAB $mattype values are currently not supported." return missing end # Regular arrays of values # Convert to Julia type - T = str2type_matlab[mattype] + if objecttype === nothing + T = str2type_matlab[mattype] + else + T = UInt32 # FIXME: Default for MATLAB objects? + end # Check for a COMPOUND data set, and if so handle complex numbers specially dtype = datatype(dset) try class_id = HDF5.API.h5t_get_class(dtype.id) d = class_id == HDF5.API.H5T_COMPOUND ? read_complex(dtype, dset, T) : read(dset, T) - length(d) == 1 ? d[1] : d + if objecttype !== nothing + return MAT_subsys.load_mcos_object(d, "MCOS", subsys) + else + return length(d) == 1 ? d[1] : d + end finally close(dtype) end @@ -232,7 +279,7 @@ function read_sparse_matrix(g::HDF5.Group, mattype::String) return SparseMatrixCSC(convert(Int, read_attribute(g, sparse_attr_matlab)), length(jc)-1, jc, ir, data) end -function read_struct_as_dict(g::HDF5.Group) +function read_struct_as_dict(g::HDF5.Group, subsys::Subsystem) if haskey(g, "MATLAB_fields") fn = [join(f) for f in read_attribute(g, "MATLAB_fields")] else @@ -242,7 +289,7 @@ function read_struct_as_dict(g::HDF5.Group) for i = 1:length(fn) dset = g[fn[i]] try - s[fn[i]] = m_read(dset) + s[fn[i]] = m_read(dset, subsys) finally close(dset) end @@ -251,8 +298,12 @@ function read_struct_as_dict(g::HDF5.Group) end # reading a struct, struct array, or sparse matrix -function m_read(g::HDF5.Group) - mattype = read_attribute(g, name_type_attr_matlab) +function m_read(g::HDF5.Group, subsys::Subsystem) + if HDF5.name(g) == "/#subsystem#" + mattype = "#subsystem#" + else + mattype = read_attribute(g, name_type_attr_matlab) + end is_object = false if mattype != "struct" attr = attributes(g) @@ -260,13 +311,12 @@ function m_read(g::HDF5.Group) if haskey(attr, sparse_attr_matlab) return read_sparse_matrix(g, mattype) elseif mattype == "function_handle" - @warn "MATLAB $mattype values are currently not supported" - return missing + # TODO: fall through for now, will become a Dict else if haskey(attr, object_decode_attr_matlab) && read_attribute(g, object_decode_attr_matlab)==2 # I think this means it's an old object class similar to mXOBJECT_CLASS in MAT_v5 is_object = true - else + elseif mattype != "#subsystem#" @warn "Unknown non-struct group of type $mattype detected; attempting to read as struct" end end @@ -276,7 +326,7 @@ function m_read(g::HDF5.Group) else class = "" end - s = read_struct_as_dict(g) + s = read_struct_as_dict(g, subsys) out = convert_struct_array(s, class) return out end @@ -292,7 +342,7 @@ function read(f::MatlabHDF5File, name::String) local val obj = f.plain[name] try - val = m_read(obj) + val = m_read(obj, f.subsystem) finally close(obj) end @@ -555,8 +605,7 @@ end # Struct array: Array of Dict => MATLAB struct array -function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, - arr::AbstractArray{<:AbstractDict}) +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, arr::AbstractArray{<:AbstractDict}) m_write(mfile, parent, name, MatlabStructArray(arr)) end @@ -649,15 +698,38 @@ end m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, s::AbstractDict) = m_write(mfile, parent, name, check_struct_keys(collect(keys(s))), collect(values(s))) +# Write named tuple as a struct +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, nt::NamedTuple) + m_write(mfile, parent, name, [string(x) for x in keys(nt)], collect(nt)) +end + # Write generic CompositeKind as a struct function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, s) if isbits(s) error("This is the write function for CompositeKind, but the input doesn't fit") + elseif Tables.istable(s) + error("writing tables is not yet supported") end T = typeof(s) m_write(mfile, parent, name, check_struct_keys([string(x) for x in fieldnames(T)]), [getfield(s, x) for x in fieldnames(T)]) end +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, dat::Dates.AbstractTime) + error("writing of Dates types is not yet supported") +end + +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, obj::MatlabOpaque) + error("writing of MatlabOpaque types is not yet supported") +end + +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, obj::AbstractArray{MatlabOpaque}) + error("writing of MatlabOpaque types is not yet supported") +end + +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, arr::PooledArray) + error("writing of PooledArray types as categorical is not yet supported") +end + # Check whether a variable name is valid, then write it """ write(matfile_handle, varname, value) diff --git a/src/MAT_subsys.jl b/src/MAT_subsys.jl new file mode 100644 index 0000000..d9396b1 --- /dev/null +++ b/src/MAT_subsys.jl @@ -0,0 +1,397 @@ +# MAT_subsys.jl +# Tools for processing MAT-file subsystem data in Julia +# +# Copyright (C) 2025 Nithin Lakshmisha +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# For reference +# https://github.com/foreverallama/matio/blob/main/docs/subsystem_data_format.md + +module MAT_subsys + +import ..MAT_types: MatlabStructArray, MatlabOpaque, convert_opaque + +export Subsystem + +const FWRAP_VERSION = 4 +const MCOS_IDENTIFIER = 0xdd000000 + +mutable struct Subsystem + object_cache::Dict{UInt32,MatlabOpaque} + num_names::UInt32 # number of mcos_names + mcos_names::Vector{String} # Class and Property Names + class_id_metadata::Vector{UInt32} + object_id_metadata::Vector{UInt32} + saveobj_prop_metadata::Vector{UInt32} + obj_prop_metadata::Vector{UInt32} + dynprop_metadata::Vector{UInt32} + _u6_metadata::Vector{UInt32} + _u7_metadata::Vector{UInt32} + prop_vals_saved::Vector{Any} + _c3::Any + _c2::Any + prop_vals_defaults::Any + handle_data::Any + java_data::Any + table_type::Type # Julia type to convert Matlab tables into + + function Subsystem() + return new( + Dict{UInt32,MatlabOpaque}(), + UInt32(0), + String[], + UInt32[], + UInt32[], + UInt32[], + UInt32[], + UInt32[], + UInt32[], + UInt32[], + Any[], + nothing, + nothing, + nothing, + nothing, + nothing, + Nothing, + ) + end +end + +function get_object!(subsys::Subsystem, oid::UInt32, classname::String) + if haskey(subsys.object_cache, oid) + # object is already cached, just retrieve it + obj = subsys.object_cache[oid] + else # it's a new object + prop_dict = Dict{String,Any}() + obj = MatlabOpaque(prop_dict, classname) + # cache the new object + subsys.object_cache[oid] = obj + # caching must be done before a next call to `get_properties` to avoid any infinite recursion + merge!(prop_dict, get_properties(subsys, oid)) + end + return obj +end + +function load_subsys!(subsystem_data::Dict{String,Any}, swap_bytes::Bool) + subsys = Subsystem() + return load_subsys!(subsys, subsystem_data, swap_bytes) +end + +function load_subsys!(subsys::Subsystem, subsystem_data::Dict{String,Any}, swap_bytes::Bool) + subsys.handle_data = get(subsystem_data, "handle", nothing) + subsys.java_data = get(subsystem_data, "java", nothing) + mcos_data = get(subsystem_data, "MCOS", nothing) + if mcos_data === nothing + return nothing + end + + if mcos_data isa Tuple + # Backward compatibility with MAT_v5 + mcos_data = mcos_data[2] + end + fwrap_metadata::Vector{UInt8} = vec(mcos_data[1, 1]) + + version = swapped_reinterpret(fwrap_metadata[1:4], swap_bytes)[1] + if version <= 1 || version > FWRAP_VERSION + error("Cannot read subsystem: Unsupported FileWrapper version: $version") + end + + subsys.num_names = swapped_reinterpret(fwrap_metadata[5:8], swap_bytes)[1] + load_mcos_names!(subsys, fwrap_metadata) + + load_mcos_regions!(subsys, fwrap_metadata, swap_bytes) + + if version == 2 + subsys.prop_vals_saved = mcos_data[3:(end - 1), 1] + elseif version == 3 + subsys.prop_vals_saved = mcos_data[3:(end - 2), 1] + subsys._c2 = mcos_data[end - 1, 1] + else + subsys.prop_vals_saved = mcos_data[3:(end - 3), 1] + subsys._c3 = mcos_data[end - 2, 1] + end + + subsys.prop_vals_defaults = mcos_data[end, 1] + return subsys +end + +# Class and Property Names are stored as list of null-terminated strings +function load_mcos_names!(subsys::Subsystem, fwrap_metadata::AbstractArray{UInt8}) + start = 41 + pos = start + name_count = 0 + while name_count < subsys.num_names + if fwrap_metadata[pos] == 0x00 + push!(subsys.mcos_names, String(fwrap_metadata[start:(pos - 1)])) + name_count += 1 + start = pos + 1 + if name_count == subsys.num_names + break + end + end + pos += 1 + end +end + +function load_mcos_regions!( + subsys::Subsystem, fwrap_metadata::AbstractArray{UInt8}, swap_bytes::Bool +) + region_offsets = swapped_reinterpret(fwrap_metadata[9:40], swap_bytes) + + subsys.class_id_metadata = swapped_reinterpret( + get_region(fwrap_metadata, region_offsets, 1), swap_bytes + ) + subsys.saveobj_prop_metadata = swapped_reinterpret( + get_region(fwrap_metadata, region_offsets, 2), swap_bytes + ) + subsys.object_id_metadata = swapped_reinterpret( + get_region(fwrap_metadata, region_offsets, 3), swap_bytes + ) + subsys.obj_prop_metadata = swapped_reinterpret( + get_region(fwrap_metadata, region_offsets, 4), swap_bytes + ) + subsys.dynprop_metadata = swapped_reinterpret( + get_region(fwrap_metadata, region_offsets, 5), swap_bytes + ) + + if region_offsets[7] != 0 + subsys._u6_metadata = swapped_reinterpret( + get_region(fwrap_metadata, region_offsets, 6), swap_bytes + ) + end + + if region_offsets[8] != 0 + subsys._u7_metadata = swapped_reinterpret( + get_region(fwrap_metadata, region_offsets, 7), swap_bytes + ) + end +end + +function get_region( + fwrap_metadata::Vector{UInt8}, region_offsets::AbstractVector{UInt32}, region::Integer +) + return fwrap_metadata[(region_offsets[region] + 1):region_offsets[region + 1]] +end + +function swapped_reinterpret(T::Type, A::AbstractArray{UInt8}, swap_bytes::Bool) + return reinterpret(T, swap_bytes ? reverse(A) : A) +end +# integers are written as uint8 (with swap), interpret as uint32 +function swapped_reinterpret(A::AbstractArray{UInt8}, swap_bytes::Bool) + return swapped_reinterpret(UInt32, A, swap_bytes) +end + +function get_classname(subsys::Subsystem, class_id::UInt32) + namespace_idx = subsys.class_id_metadata[class_id * 4 + 1] + classname_idx = subsys.class_id_metadata[class_id * 4 + 2] + + namespace = if namespace_idx == 0 + "" + else + subsys.mcos_names[namespace_idx] * "." + end + + classname = namespace * subsys.mcos_names[classname_idx] + return classname +end + +function get_object_metadata(subsys::Subsystem, object_id::UInt32) + return subsys.object_id_metadata[(object_id * 6 + 1):(object_id * 6 + 6)] +end + +function get_default_properties(subsys::Subsystem, class_id::UInt32) + default_props = Dict{String,Any}(subsys.prop_vals_defaults[class_id + 1, 1]) + for (key, value) in default_props + default_props[key] = update_nested_props!(value, subsys) + end + return default_props +end + +function get_property_idxs(subsys::Subsystem, obj_type_id::UInt32, saveobj_ret_type::Bool) + prop_field_idxs = + saveobj_ret_type ? subsys.saveobj_prop_metadata : subsys.obj_prop_metadata + nfields = 3 + offset = 1 + while obj_type_id > 0 + nprops = prop_field_idxs[offset] + offset += 1 + (nfields * nprops) + offset += (offset + 1) % 2 # Padding + obj_type_id -= 1 + end + nprops = prop_field_idxs[offset] + offset += 1 + return prop_field_idxs[offset:(offset + nprops * nfields - 1)] +end + +update_nested_props!(prop_value, subsys::Subsystem) = prop_value + +function update_nested_props!( + prop_value::Union{AbstractDict,MatlabStructArray}, subsys::Subsystem +) + # Handle nested objects in structs + for (key, value) in prop_value + prop_value[key] = update_nested_props!(value, subsys) + end + return prop_value +end + +function update_nested_props!(prop_value::Array{Any}, subsys::Subsystem) + # Handle nested objects in a Cell + for i in eachindex(prop_value) + prop_value[i] = update_nested_props!(prop_value[i], subsys) + end + return prop_value +end + +function update_nested_props!(prop_value::Array{UInt32}, subsys::Subsystem) + # Hacky way to find and update nested objects + # Nested objects are stored as a uint32 Matrix with a unique signature + # MATLAB probably uses some kind of placeholders to decode + # But this should work here + + if first(prop_value) == MCOS_IDENTIFIER + # MATLAB identifies any uint32 array with first value 0xdd000000 as an MCOS object + return load_mcos_object(prop_value, "MCOS", subsys) + else + return prop_value + end +end + +function get_saved_properties( + subsys::Subsystem, obj_type_id::UInt32, saveobj_ret_type::Bool +) + save_prop_map = Dict{String,Any}() + prop_field_idxs = get_property_idxs(subsys, obj_type_id, saveobj_ret_type) + nprops = length(prop_field_idxs) ÷ 3 + for i in 0:(nprops - 1) + prop_name = subsys.mcos_names[prop_field_idxs[i * 3 + 1]] + prop_type = prop_field_idxs[i * 3 + 2] + if prop_type == 0 + prop_value = subsys.mcos_names[prop_field_idxs[i * 3 + 3]] + elseif prop_type == 1 + prop_value = subsys.prop_vals_saved[prop_field_idxs[i * 3 + 3] + 1] + elseif prop_type == 2 + prop_value = prop_field_idxs[i * 3 + 3] + else + @warn "Unknown property type ID: $prop_type for property $prop_name encountered during deserialization" + prop_value = prop_field_idxs[i * 3 + 3] + end + save_prop_map[prop_name] = update_nested_props!(prop_value, subsys) + end + return save_prop_map +end + +function get_dynamic_properties(subsys::Subsystem, dep_id::UInt32) + offset = 1 + while dep_id > 0 + nprops = subsys.dynprop_metadata[offset] + offset += 1 + nprops + offset += (offset + 1) % 2 # Padding + dep_id -= 1 + end + + ndynprops = subsys.dynprop_metadata[offset] + offset += 1 + dyn_prop_obj_ids = subsys.dynprop_metadata[offset:(offset + ndynprops - 1)] + + if dyn_prop_obj_ids == UInt32[] + return Dict{String,Any}() + end + dyn_prop_map = Dict{String,Any}() + for (i, obj_id) in enumerate(dyn_prop_obj_ids) + dyn_class_id = get_object_metadata(subsys, obj_id)[1] + classname = get_classname(subsys, dyn_class_id) + dynobj_props = Dict{String,Any}() + dynobj = MatlabOpaque(dynobj_props, classname) + merge!(dynobj_props, get_properties(subsys, obj_id)) + dyn_prop_map["__dynamic_property_$(i)__"] = dynobj + end + return dyn_prop_map +end + +function get_properties(subsys::Subsystem, object_id::UInt32) + if object_id == 0 + return Dict{String,Any}() + end + + class_id, _, _, saveobj_id, normobj_id, _ = get_object_metadata(subsys, object_id) + if saveobj_id != 0 + saveobj_ret_type = true + obj_type_id = saveobj_id + else + saveobj_ret_type = false + obj_type_id = normobj_id + end + + defaults = get_default_properties(subsys, class_id) + prop_map = merge(defaults, get_saved_properties(subsys, obj_type_id, saveobj_ret_type)) + dyn_props = get_dynamic_properties(subsys, object_id) + merge!(prop_map, dyn_props) + return prop_map +end + +function load_mcos_object(metadata::Any, type_name::String, subsys::Subsystem) + @warn "Expected MCOS metadata to be an Array{UInt32}, got $(typeof(metadata)). Returning metadata." + return metadata +end + +function load_mcos_object(metadata::Dict, type_name::String, subsys::Subsystem) + @warn "Loading enumeration instances are not supported. Returning Metadata" + return metadata +end + +function load_mcos_object(metadata::Array{UInt32}, type_name::String, subsys::Subsystem) + if type_name != "MCOS" + @warn "Loading Type:$type_name is not implemented. Returning metadata." + return metadata + end + + if metadata[1, 1] != MCOS_IDENTIFIER + @warn "MCOS object metadata is corrupted. Returning raw data." + return metadata + end + + ndims = metadata[2, 1] + dims = metadata[3:(2 + ndims), 1] + nobjects = prod(dims) + object_ids = metadata[(3 + ndims):(2 + ndims + nobjects), 1] + + class_id = metadata[end, 1] + classname = get_classname(subsys, class_id) + + if nobjects == 1 + oid = object_ids[1] + obj = get_object!(subsys, oid, classname) + return convert_opaque(obj; table=subsys.table_type) + else + # no need to convert_opaque, matlab wraps object arrays in a single class normally + object_arr = Array{MatlabOpaque}(undef, convert(Vector{Int}, dims)...) + for i in 1:length(object_arr) + oid = object_ids[i] + obj = get_object!(subsys, oid, classname) + object_arr[i] = obj + end + return object_arr + end +end + +end \ No newline at end of file diff --git a/src/MAT_types.jl b/src/MAT_types.jl index 1ca8e08..a7c0674 100644 --- a/src/MAT_types.jl +++ b/src/MAT_types.jl @@ -28,252 +28,472 @@ module MAT_types - export MatlabStructArray, StructArrayField, convert_struct_array - export MatlabClassObject - - # struct arrays are stored as columns per field name - """ - MatlabStructArray{N}( - names::Vector{String}, - values::Vector{Array{Any,N}}, - class::String = "", - ) - - Data structure to store matlab struct arrays, which stores the field names separate from the field values. - The field values are stored as columns of `Array{Any,N}` per Matlab field, which is how MAT files store these structures. - - These are distinct from cell arrays of structs, - which are handled as in MAT.jl as `Array{Any,N}` with `Dict{String,Any}` inside, - for example `Any[Dict("x"=>1), Dict("x"=>2)]`. - - Old class object arrays can be handled by providing a non-empty class name. - - # Example - - ```julia - using MAT - - s_arr = MatlabStructArray(["a", "b"], [[1, 2],["foo", 5]]) - - # write-read - matwrite("matfile.mat", Dict("struct_array" => s_arr)) - read_s_arr = matread("matfile.mat")["struct_array"] - - # convert to Dict Array - dict_array = Array{Dict{String,Any}}(s_arr) - - # convert to Dict (with arrays as fields) - dict = Dict{String,Any}(s_arr) - ``` - """ - struct MatlabStructArray{N} - names::Vector{String} - values::Vector{Array{Any,N}} - class::String - function MatlabStructArray(names::Vector{String}, values::Vector{Array{Any,N}}, class::String=""; check::Bool=true) where N - check && check_struct_array(names, values) - return new{N}(names, values, class) - end - function MatlabStructArray{N}(names::Vector{String}, values::Vector{Array{Any,N}}, class::String="") where N - return new{N}(names, values, class) - end +using StringEncodings: StringEncodings +import StringEncodings: Encoding +import Dates: DateTime, Second, Millisecond +import PooledArrays: PooledArray, RefArray +using Tables: Tables + +export MatlabStructArray, StructArrayField, convert_struct_array +export MatlabClassObject +export MatlabOpaque, convert_opaque +export MatlabTable + +# struct arrays are stored as columns per field name +""" + MatlabStructArray{N}( + names::Vector{String}, + values::Vector{Array{Any,N}}, + class::String = "", + ) + +Data structure to store matlab struct arrays, which stores the field names separate from the field values. +The field values are stored as columns of `Array{Any,N}` per Matlab field, which is how MAT files store these structures. + +These are distinct from cell arrays of structs, +which are handled as in MAT.jl as `Array{Any,N}` with `Dict{String,Any}` inside, +for example `Any[Dict("x"=>1), Dict("x"=>2)]`. + +Old class object arrays can be handled by providing a non-empty class name. + +# Example + +```julia +using MAT + +s_arr = MatlabStructArray(["a", "b"], [[1, 2],["foo", 5]]) + +# write-read +matwrite("matfile.mat", Dict("struct_array" => s_arr)) +read_s_arr = matread("matfile.mat")["struct_array"] + +# convert to Dict Array +dict_array = Array{Dict{String,Any}}(s_arr) + +# convert to Dict (with arrays as fields) +dict = Dict{String,Any}(s_arr) +``` +""" +struct MatlabStructArray{N} + names::Vector{String} + values::Vector{Array{Any,N}} + class::String + function MatlabStructArray( + names::Vector{String}, + values::Vector{Array{Any,N}}, + class::String=""; + check::Bool=true, + ) where {N} + check && check_struct_array(names, values) + return new{N}(names, values, class) end - - function check_struct_array(names::Vector{String}, values::Vector{Array{Any,N}}) where N - if length(names) != length(values) - error("MatlabStructArray requires equal number of names and values") - end - first_value, rest_values = Iterators.peel(values) - first_len = length(first_value) - if !all(x->length(x)==first_len, rest_values) - error("MatlabStructArray requires all value columns to be of equal length") - end + function MatlabStructArray{N}( + names::Vector{String}, values::Vector{Array{Any,N}}, class::String="" + ) where {N} + return new{N}(names, values, class) end +end - function MatlabStructArray(names::AbstractVector{<:AbstractString}, values::AbstractArray{A}, class=""; check::Bool=true) where {N, A<:AbstractArray{T, N} where {T}} - MatlabStructArray(string.(names), Vector{Array{Any,N}}(values), string(class); check=check) +function check_struct_array(names::Vector{String}, values::Vector{Array{Any,N}}) where {N} + if length(names) != length(values) + error("MatlabStructArray requires equal number of names and values") end - function MatlabStructArray(names::Vector{String}, values::AbstractArray{A}, class=""; check::Bool=true) where {N, A<:AbstractArray{T, N} where {T}} - MatlabStructArray(names, Vector{Array{Any,N}}(values), string(class); check=check) + first_value, rest_values = Iterators.peel(values) + first_len = length(first_value) + if !all(x -> length(x) == first_len, rest_values) + error("MatlabStructArray requires all value columns to be of equal length") end - - # empty array - function MatlabStructArray(names::AbstractVector{<:AbstractString}, dims::Tuple) - N = length(dims) - return MatlabStructArray{N}(names, [Array{Any, N}(undef, dims...) for n in names]) +end + +function MatlabStructArray( + names::AbstractVector{<:AbstractString}, + values::AbstractArray{A}, + class=""; + check::Bool=true, +) where {N,A<:AbstractArray{T,N} where {T}} + return MatlabStructArray( + string.(names), Vector{Array{Any,N}}(values), string(class); check=check + ) +end +function MatlabStructArray( + names::Vector{String}, values::AbstractArray{A}, class=""; check::Bool=true +) where {N,A<:AbstractArray{T,N} where {T}} + return MatlabStructArray( + names, Vector{Array{Any,N}}(values), string(class); check=check + ) +end + +# empty array +function MatlabStructArray(names::AbstractVector{<:AbstractString}, dims::Tuple) + N = length(dims) + return MatlabStructArray{N}(names, [Array{Any,N}(undef, dims...) for n in names]) +end +function MatlabStructArray(names::AbstractVector{<:AbstractString}) + return MatlabStructArray(names, (0, 0)) +end + +Base.eltype(::Type{MatlabStructArray{N}}) where {N} = Pair{String,Array{Any,N}} +Base.length(arr::MatlabStructArray) = length(arr.names) +Base.keys(arr::MatlabStructArray) = arr.names +Base.values(arr::MatlabStructArray) = arr.values +Base.haskey(arr::MatlabStructArray, k::AbstractString) = k in keys(arr) +function Base.copy(arr::MatlabStructArray{N}) where {N} + return MatlabStructArray{N}(copy(arr.names), copy(arr.values)) +end + +function Base.iterate(arr::T, i=next_state(arr)) where {T<:MatlabStructArray} + if i == 0 + return nothing + else + return (eltype(T)(arr.names[i], arr.values[i]), next_state(arr, i)) end - MatlabStructArray(names::AbstractVector{<:AbstractString}) = MatlabStructArray(names, (0,0)) - - Base.eltype(::Type{MatlabStructArray{N}}) where N = Pair{String, Array{Any,N}} - Base.length(arr::MatlabStructArray) = length(arr.names) - Base.keys(arr::MatlabStructArray) = arr.names - Base.values(arr::MatlabStructArray) = arr.values - Base.haskey(arr::MatlabStructArray, k::AbstractString) = k in keys(arr) - - function Base.iterate(arr::T, i=next_state(arr)) where T<:MatlabStructArray - if i == 0 - return nothing - else - return (eltype(T)(arr.names[i], arr.values[i]), next_state(arr,i)) - end +end +next_state(arr, i=0) = length(arr) == i ? 0 : i + 1 + +function Base.show(io::IO, ::MIME"text/plain", arr::MatlabStructArray) + summary(io, arr) + ncol = length(arr.values) + print(io, " with $(ncol) ") + col_word = ncol == 1 ? "column" : "columns" + print(io, col_word, ":") + for (k, v) in arr + print(io, "\n \"$k\": ") + summary(io, v) end - next_state(arr, i=0) = length(arr)==i ? 0 : i+1 - - function Base.show(io::IO, ::MIME"text/plain", arr::MatlabStructArray) - summary(io, arr) - ncol = length(arr.values) - print(io, " with $(ncol) ") - col_word = ncol==1 ? "column" : "columns" - print(io, col_word, ":") - for (k,v) in arr - print(io, "\n \"$k\": $v") - end +end + +function Base.:(==)(m1::MatlabStructArray{N}, m2::MatlabStructArray{N}) where {N} + return isequal(m1.names, m2.names) && + isequal(m1.values, m2.values) && + isequal(m1.class, m2.class) +end + +function Base.isapprox(m1::MatlabStructArray, m2::MatlabStructArray; kwargs...) + return isequal(m1.names, m2.names) && isapprox(m1.values, m2.values; kwargs...) +end + +function find_index(m::MatlabStructArray, s::AbstractString) + idx = findfirst(isequal(s), m.names) + if isnothing(idx) + error("field \"$s\" not found in MatlabStructArray") end - - function Base.:(==)(m1::MatlabStructArray{N},m2::MatlabStructArray{N}) where N - return isequal(m1.names, m2.names) && isequal(m1.values, m2.values) && isequal(m1.class, m2.class) + return idx +end + +function Base.getindex(m::MatlabStructArray, s::AbstractString) + idx = find_index(m, s) + return getindex(m.values, idx) +end + +function Base.get(m::MatlabStructArray, s::AbstractString, default) + idx = findfirst(isequal(s), m.names) + if isnothing(idx) + return default + else + return getindex(m.values, idx) end - - function Base.isapprox(m1::MatlabStructArray,m2::MatlabStructArray; kwargs...) - return isequal(m1.names, m2.names) && isapprox(m1.values, m2.values; kwargs...) +end + +# convert Dict array to MatlabStructArray +function MatlabStructArray(arr::AbstractArray{<:AbstractDict,N}, class::String="") where {N} + first_dict, remaining_dicts = Iterators.peel(arr) + first_keys = keys(first_dict) + field_names = string.(first_keys) + # Ensure same field set for all elements + for d in remaining_dicts + if !issetequal(keys(d), first_keys) + error( + "Cannot convert Dict array to MatlabStructArray. All elements must share identical field names", + ) + end end - - function find_index(m::MatlabStructArray, s::AbstractString) - idx = findfirst(isequal(s), m.names) - if isnothing(idx) - error("field \"$s\" not found in MatlabStructArray") + field_values = Vector{Array{Any,N}}(undef, length(field_names)) + for (idx, k) in enumerate(first_keys) + this_field_values = Array{Any,N}(undef, size(arr)) + for (idx, d) in enumerate(arr) + this_field_values[idx] = d[k] end - return idx + field_values[idx] = this_field_values end - - function Base.getindex(m::MatlabStructArray, s::AbstractString) - idx = find_index(m, s) - return getindex(m.values, idx) + return MatlabStructArray{N}(field_names, field_values, class) +end + +function Base.Dict(arr::MatlabStructArray) + return Base.Dict{String,Any}(arr) +end +function Base.Dict{String,Any}(arr::MatlabStructArray) + return Base.Dict{String,Any}(arr.names .=> arr.values) +end + +Base.Array{D}(arr::MatlabStructArray{N}) where {D<:AbstractDict,N} = Array{D,N}(arr) + +function Base.Array{D,N}(arr::MatlabStructArray{N}) where {D<:AbstractDict,N} + first_field = first(arr.values) + sz = size(first_field) + result = Array{D,N}(undef, sz) + for idx in eachindex(first_field) + element_values = (v[idx] for v in arr.values) + result[idx] = create_struct(D, arr.names, element_values, arr.class) end - - function Base.get(m::MatlabStructArray, s::AbstractString, default) - idx = findfirst(isequal(s), m.names) - if isnothing(idx) - return default + return result +end + +function create_struct(::Type{D}, keys, values, class::String) where {T,D<:AbstractDict{T}} + return D(T.(keys) .=> values) +end + +# 1D MatlabStructArray also counts as table (mostly for testing purposes) +Tables.istable(::Type{MatlabStructArray{1}}) = true +Tables.columns(t::MatlabStructArray{1}) = Symbol.(t.values) +Tables.columnnames(t::MatlabStructArray{1}) = t.names +Tables.getcolumn(t::MatlabStructArray{1}, nm::String) = t[nm] +Tables.getcolumn(t::MatlabStructArray{1}, nm::Symbol) = Tables.getcolumn(t, string(nm)) +function MatlabStructArray{1}(t::Tables.CopiedColumns) + col_names = Tables.columnnames(t) + return MatlabStructArray{1}( + string.(col_names), [Vector{Any}(Tables.getcolumn(t, nm)) for nm in col_names] + ) +end +MatlabStructArray(t::Tables.CopiedColumns) = MatlabStructArray{1}(t) + +struct StructArrayField{N} + values::Array{Any,N} +end +dimension(::StructArrayField{N}) where {N} = N + +""" + MatlabClassObject( + d::Dict{String, Any}, + class::String, + ) <: AbstractDict{String, Any} + +Type to store old class objects. Inside MATLAB a class named \"TestClassOld\" would be defined within `@TestClassOld` folders. + +If you want to write these objects you have to make sure the keys in the Dict match the class defined properties/fields. +""" +struct MatlabClassObject <: AbstractDict{String,Any} + d::Dict{String,Any} + class::String +end + +Base.eltype(::Type{MatlabClassObject}) = Pair{String,Any} +Base.length(m::MatlabClassObject) = length(m.d) +Base.keys(m::MatlabClassObject) = keys(m.d) +Base.values(m::MatlabClassObject) = values(m.d) +Base.getindex(m::MatlabClassObject, i) = getindex(m.d, i) +Base.setindex!(m::MatlabClassObject, v, k) = setindex!(m.d, v, k) +Base.iterate(m::MatlabClassObject, i) = iterate(m.d, i) +Base.iterate(m::MatlabClassObject) = iterate(m.d) +Base.haskey(m::MatlabClassObject, k) = haskey(m.d, k) +Base.get(m::MatlabClassObject, k, default) = get(m.d, k, default) + +function MatlabStructArray(arr::AbstractArray{MatlabClassObject}) + first_obj, remaining_obj = Iterators.peel(arr) + class = first_obj.class + if !all(x -> isequal(class, x.class), remaining_obj) + error( + "to write a MatlabClassObject array all classes must be equal. Use `Array{Any}` to write a cell array", + ) + end + return MatlabStructArray(arr, class) +end + +function convert_struct_array(d::Dict{String,Any}, class::String="") + # there is no possibility of having cell arrays mixed with struct arrays (afaik) + field_values = first(values(d)) + if field_values isa StructArrayField + return MatlabStructArray{dimension(field_values)}( + collect(keys(d)), [arr.values for arr in values(d)], class + ) + else + if isempty(class) + return d else - return getindex(m.values, idx) + return MatlabClassObject(d, class) end end +end - # convert Dict array to MatlabStructArray - function MatlabStructArray(arr::AbstractArray{<:AbstractDict, N}, class::String="") where N - first_dict, remaining_dicts = Iterators.peel(arr) - first_keys = keys(first_dict) - field_names = string.(first_keys) - # Ensure same field set for all elements - for d in remaining_dicts - if !issetequal(keys(d), first_keys) - error("Cannot convert Dict array to MatlabStructArray. All elements must share identical field names") - end - end - field_values = Vector{Array{Any,N}}(undef, length(field_names)) - for (idx,k) in enumerate(first_keys) - this_field_values = Array{Any, N}(undef, size(arr)) - for (idx, d) in enumerate(arr) - this_field_values[idx] = d[k] - end - field_values[idx] = this_field_values - end - return MatlabStructArray{N}(field_names, field_values, class) +function Base.Array(arr::MatlabStructArray{N}) where {N} + if isempty(arr.class) + return Array{Dict{String,Any},N}(arr) + else + return Array{MatlabClassObject,N}(arr) + end +end + +function create_struct(::Type{D}, keys, values, class::String) where {D<:MatlabClassObject} + d = Dict{String,Any}(string.(keys) .=> values) + return MatlabClassObject(d, class) +end + +""" + MatlabOpaque( + d::Dict{String, Any}, + class::String, + ) <: AbstractDict{String, Any} + +Type to store opaque class objects. +These are the 'modern' Matlab classes, different from the old `MatlabClassObject` types. + +""" +struct MatlabOpaque <: AbstractDict{String,Any} + d::Dict{String,Any} + class::String +end + +Base.eltype(::Type{MatlabOpaque}) = Pair{String,Any} +Base.length(m::MatlabOpaque) = length(m.d) +Base.keys(m::MatlabOpaque) = keys(m.d) +Base.values(m::MatlabOpaque) = values(m.d) +Base.getindex(m::MatlabOpaque, i) = getindex(m.d, i) +Base.setindex!(m::MatlabOpaque, v, k) = setindex!(m.d, v, k) +Base.iterate(m::MatlabOpaque, i) = iterate(m.d, i) +Base.iterate(m::MatlabOpaque) = iterate(m.d) +Base.haskey(m::MatlabOpaque, k) = haskey(m.d, k) +Base.get(m::MatlabOpaque, k, default) = get(m.d, k, default) + +function convert_opaque(obj::MatlabOpaque; table::Type=Nothing) + if obj.class == "string" + return from_string(obj) + elseif obj.class == "datetime" + return from_datetime(obj) + elseif obj.class == "duration" + return from_duration(obj) + elseif obj.class == "categorical" + return from_categorical(obj) + elseif obj.class == "table" + return from_table(obj, table) + else + return obj end +end - function Base.Dict(arr::MatlabStructArray) - return Base.Dict{String, Any}(arr) +# for reference: https://github.com/foreverallama/matio/blob/main/matio/utils/converters/matstring.py +function from_string(obj::MatlabOpaque, encoding::Encoding=Encoding(Symbol("UTF-16LE"))) + data = obj["any"] + if isnothing(data) || isempty(data) + return String[] end - function Base.Dict{String, Any}(arr::MatlabStructArray) - Base.Dict{String, Any}(arr.names .=> arr.values) + if data[1, 1] != 1 + @warn "String saved from a different MAT-file version. Returning empty string" + return "" end - - Base.Array{D}(arr::MatlabStructArray{N}) where {D<:AbstractDict,N} = Array{D,N}(arr) - - function Base.Array{D, N}(arr::MatlabStructArray{N}) where {D<:AbstractDict,N} - first_field = first(arr.values) - sz = size(first_field) - result = Array{D, N}(undef, sz) - for idx in eachindex(first_field) - element_values = (v[idx] for v in arr.values) - result[idx] = create_struct(D, arr.names, element_values, arr.class) - end - return result + ndims = data[1, 2] + shape = Int.(data[1, 3:(2 + ndims)]) + num_strings = prod(shape) + char_counts = data[1, (3 + ndims):(2 + ndims + num_strings)] + byte_data = data[1, (3 + ndims + num_strings):end] + bytes = reinterpret(UInt8, byte_data) + + strings = String[] + pos = 1 + + for char_count in char_counts + byte_length = char_count * 2 # UTF-16 encoding + extracted_bytes = bytes[pos:(pos + byte_length - 1)] + str = StringEncodings.decode(extracted_bytes, encoding) + push!(strings, str) + pos += byte_length end - function create_struct(::Type{D}, keys, values, class::String) where {T, D<:AbstractDict{T}} - return D(T.(keys) .=> values) + if num_strings == 1 + return first(strings) + else + return reshape(strings, shape...) end +end - struct StructArrayField{N} - values::Array{Any,N} +function from_datetime(obj::MatlabOpaque) + dat = obj["data"] + if isnothing(dat) || isempty(dat) + return DateTime[] end - dimension(::StructArrayField{N}) where N = N - - """ - MatlabClassObject( - d::Dict{String, Any}, - class::String, - ) <: AbstractDict{String, Any} - - Type to store old class objects. Inside MATLAB a class named \"TestClassOld\" would be defined within `@TestClassOld` folders. - - If you want to write these objects you have to make sure the keys in the Dict match the class defined properties/fields. - """ - struct MatlabClassObject <: AbstractDict{String, Any} - d::Dict{String, Any} - class::String + if haskey(obj, "tz") && !isempty(obj["tz"]) + tz = obj["tz"] + @warn "no timezone conversion yet for datetime objects. timezone of \"$tz\" ignored" end - - Base.eltype(::Type{MatlabClassObject}) = Pair{String, Any} - Base.length(m::MatlabClassObject) = length(m.d) - Base.keys(m::MatlabClassObject) = keys(m.d) - Base.values(m::MatlabClassObject) = values(m.d) - Base.getindex(m::MatlabClassObject, i) = getindex(m.d, i) - Base.setindex!(m::MatlabClassObject, v, k) = setindex!(m.d, v, k) - Base.iterate(m::MatlabClassObject, i) = iterate(m.d, i) - Base.iterate(m::MatlabClassObject) = iterate(m.d) - Base.haskey(m::MatlabClassObject, k) = haskey(m.d, k) - Base.get(m::MatlabClassObject, k, default) = get(m.d, k, default) - - function MatlabStructArray(arr::AbstractArray{MatlabClassObject}) - first_obj, remaining_obj = Iterators.peel(arr) - class = first_obj.class - if !all(x->isequal(class, x.class), remaining_obj) - error("to write a MatlabClassObject array all classes must be equal. Use `Array{Any}` to write a cell array") - end - return MatlabStructArray(arr, class) + #isdate = obj["isDateOnly"] # optional: convert to Date instead of DateTime? + return map_or_not(ms_to_datetime, dat) +end + +# is the complex part the submilliseconds? +ms_to_datetime(ms::Complex) = ms_to_datetime(real(ms)) +function ms_to_datetime(ms::Real) + s, ms_rem = fldmod(ms, 1_000) # whole seconds and remainder milliseconds + return DateTime(1970, 1, 1) + Second(s) + Millisecond(ms_rem) +end + +function from_duration(obj::MatlabOpaque) + dat = obj["millis"] + #fmt = obj["fmt"] # TODO: format, e.g. 'd' to Day + if isnothing(dat) || isempty(dat) + return Millisecond[] end - - function convert_struct_array(d::Dict{String, Any}, class::String="") - # there is no possibility of having cell arrays mixed with struct arrays (afaik) - field_values = first(values(d)) - if field_values isa StructArrayField - return MatlabStructArray{dimension(field_values)}( - collect(keys(d)), - [arr.values for arr in values(d)], - class, - ) - else - if isempty(class) - return d - else - return MatlabClassObject(d, class) - end - end - end - - function Base.Array(arr::MatlabStructArray{N}) where N - if isempty(arr.class) - return Array{Dict{String,Any}, N}(arr) - else - return Array{MatlabClassObject, N}(arr) - end + return map_or_not(Millisecond, dat) +end + +function from_categorical(obj::MatlabOpaque) + category_names = obj["categoryNames"] + codes = obj["codes"] + pool = vec(Array{promoted_eltype(category_names)}(category_names)) + code_type = eltype(codes) + pool_type = eltype(pool) + invpool = Dict{pool_type,code_type}(pool .=> code_type.(1:length(pool))) + RA = typeof(codes) + N = ndims(codes) + return PooledArray{pool_type,code_type,N,RA}(RefArray(codes), invpool, pool) +end + +function promoted_eltype(v::AbstractArray{Any}) + isempty(v) && return T + first_el, remaining = Iterators.peel(v) + T_out = typeof(first_el) + for el in remaining + T_out = promote_type(T_out, typeof(el)) end - - function create_struct(::Type{D}, keys, values, class::String) where D<:MatlabClassObject - d = Dict{String, Any}(string.(keys) .=> values) - return MatlabClassObject(d, class) + return T_out +end +promoted_eltype(::AbstractArray{T}) where {T} = T + +map_or_not(f, dat::AbstractArray) = map(f, dat) +map_or_not(f, dat) = f(dat) + +struct MatlabTable + names::Vector{Symbol} + columns::Vector +end +Tables.istable(::Type{MatlabTable}) = true +Tables.columns(t::MatlabTable) = t.columns +Tables.columnnames(t::MatlabTable) = t.names +Tables.getcolumn(t::MatlabTable, nm::Symbol) = t[nm] +function find_index(m::MatlabTable, s::Symbol) + idx = findfirst(isequal(s), m.names) + if isnothing(idx) + error("column :$s not found in MatlabTable") end + return idx +end +function Base.getindex(m::MatlabTable, s::Symbol) + idx = find_index(m, s) + return getindex(m.columns, idx) +end +Base.getindex(m::MatlabTable, s::AbstractString) = getindex(m, Symbol(s)) +MatlabTable(t::Tables.CopiedColumns{MatlabTable}) = Tables.source(t) + +function from_table(obj::MatlabOpaque, ::Type{T}=MatlabTable) where {T} + names = vec(Symbol.(obj["varnames"])) + cols = vec([try_vec(c) for c in obj["data"]]) + t = MatlabTable(names, cols) + return T(Tables.CopiedColumns(t)) +end +# option to not convert and get the MatlabOpaque as table +from_table(obj::MatlabOpaque, ::Type{Nothing}) = obj + +try_vec(c::Vector) = c +try_vec(c) = [c] +function try_vec(c::AbstractArray) + return (size(c, 2) == 1) ? vec(c) : c +end + end \ No newline at end of file diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 0c12d01..8f9eed7 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -28,7 +28,9 @@ module MAT_v5 using CodecZlib, BufferedStreams, HDF5, SparseArrays import Base: read, write, close -import ..MAT_types: MatlabStructArray, MatlabClassObject +import ..MAT_types: MatlabStructArray, MatlabClassObject, MatlabTable + +using ..MAT_subsys round_uint8(data) = round.(UInt8, data) complex_array(a, b) = complex.(a, b) @@ -36,9 +38,17 @@ complex_array(a, b) = complex.(a, b) mutable struct Matlabv5File <: HDF5.H5DataStore ios::IOStream swap_bytes::Bool + subsystem::Subsystem + subsystem_position::UInt64 # nr of bytes taken by subsystem varnames::Dict{String, Int64} - Matlabv5File(ios, swap_bytes) = new(ios, swap_bytes) + Matlabv5File(ios, swap_bytes) = new(ios, swap_bytes, Subsystem(), UInt64(0)) +end + +function Base.show(io::IO, f::Matlabv5File) + print(io, "Matlabv5File(") + print(io, f.ios, ", ") + print(io, f.swap_bytes, ")") end const miINT8 = 1 @@ -160,15 +170,15 @@ function read_data(f::IO, swap_bytes::Bool, ::Type{T}, dimensions::Vector{Int32} read_array ? convert(Array{T}, data) : convert(T, data) end -function read_cell(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}) +function read_cell(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, subsys::Subsystem) data = Array{Any}(undef, convert(Vector{Int}, dimensions)...) for i = 1:length(data) - (ignored_name, data[i]) = read_matrix(f, swap_bytes) + (ignored_name, data[i]) = read_matrix(f, swap_bytes, subsys) end data end -function read_struct(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, is_object::Bool) +function read_struct(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, is_object::Bool, subsys::Subsystem) if is_object class = String(read_element(f, swap_bytes, UInt8)) else @@ -193,7 +203,7 @@ function read_struct(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, is_obje data = Dict{String, Any}() sizehint!(data, n_fields+1) for field_name in field_name_strings - data[field_name] = read_matrix(f, swap_bytes)[2] + data[field_name] = read_matrix(f, swap_bytes, subsys)[2] end if is_object data = MatlabClassObject(data, class) @@ -205,7 +215,7 @@ function read_struct(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, is_obje field_values = Array{Any, N}[Array{Any}(undef, dimensions...) for _ in 1:nfields] for i = 1:n_el for field in 1:nfields - field_values[field][i] = read_matrix(f, swap_bytes)[2] + field_values[field][i] = read_matrix(f, swap_bytes, subsys)[2] end end data = MatlabStructArray{N}(field_name_strings, field_values, class) @@ -250,7 +260,7 @@ function read_sparse(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, flags:: end if length(ir) > length(pr) # Fix for Issue #169, xref https://github.com/JuliaLang/julia/pull/40523 - #= + #= # The following expression must be obeyed according to # https://github.com/JuliaLang/julia/blob/b3e4341d43da32f4ab6087230d98d00b89c8c004/stdlib/SparseArrays/src/sparsematrix.jl#L86-L90 @debug "SparseMatrixCSC" m n jc ir pr @@ -315,11 +325,24 @@ function read_string(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}) data end +function read_opaque(f::IO, swap_bytes::Bool, subsys::Subsystem) + type_name = String(read_element(f, swap_bytes, UInt8)) + classname = String(read_element(f, swap_bytes, UInt8)) + + if classname == "FileWrapper__" + return read_matrix(f, swap_bytes, subsys) + end + + _, metadata = read_matrix(f, swap_bytes, subsys) + return MAT_subsys.load_mcos_object(metadata, type_name, subsys) +end + # Read matrix data -function read_matrix(f::IO, swap_bytes::Bool) +function read_matrix(f::IO, swap_bytes::Bool, subsys::Subsystem) (dtype, nbytes) = read_header(f, swap_bytes) if dtype == miCOMPRESSED - return read_matrix(ZlibDecompressorStream(IOBuffer(read!(f, Vector{UInt8}(undef, nbytes)))), swap_bytes) + decompressed_ios = ZlibDecompressorStream(IOBuffer(read!(f, Vector{UInt8}(undef, nbytes)))) + return read_matrix(decompressed_ios, swap_bytes, subsys) elseif dtype != miMATRIX error("Unexpected data type") elseif nbytes == 0 @@ -336,28 +359,25 @@ function read_matrix(f::IO, swap_bytes::Bool) flags = read_element(f, swap_bytes, UInt32) class = flags[1] & 0xFF - if class == mxOPAQUE_CLASS - s0 = read_data(f, swap_bytes) - s1 = read_data(f, swap_bytes) - s2 = read_data(f, swap_bytes) - arr = read_matrix(f, swap_bytes) - return ("__opaque__", Dict("s0"=>s0, "s1"=>s1, "s2"=>s2, "arr"=>arr)) + if class != mxOPAQUE_CLASS + dimensions = read_element(f, swap_bytes, Int32) end - dimensions = read_element(f, swap_bytes, Int32) name = String(read_element(f, swap_bytes, UInt8)) local data if class == mxCELL_CLASS - data = read_cell(f, swap_bytes, dimensions) + data = read_cell(f, swap_bytes, dimensions, subsys) elseif class == mxSTRUCT_CLASS || class == mxOBJECT_CLASS - data = read_struct(f, swap_bytes, dimensions, class == mxOBJECT_CLASS) + data = read_struct(f, swap_bytes, dimensions, class == mxOBJECT_CLASS, subsys) elseif class == mxSPARSE_CLASS data = read_sparse(f, swap_bytes, dimensions, flags) elseif class == mxCHAR_CLASS && length(dimensions) <= 2 data = read_string(f, swap_bytes, dimensions) elseif class == mxFUNCTION_CLASS - data = read_matrix(f, swap_bytes) + data = read_matrix(f, swap_bytes, subsys) + elseif class == mxOPAQUE_CLASS + data = read_opaque(f, swap_bytes, subsys) else if (flags[1] & (1 << 9)) != 0 # logical data = read_data(f, swap_bytes, Bool, dimensions) @@ -374,19 +394,52 @@ function read_matrix(f::IO, swap_bytes::Bool) end # Open MAT file for reading -matopen(ios::IOStream, endian_indicator::UInt16) = - Matlabv5File(ios, endian_indicator == 0x494D) +function matopen(ios::IOStream, endian_indicator::UInt16; table::Type=MatlabTable) + matfile = Matlabv5File(ios, endian_indicator == 0x494D) + + seek(matfile.ios, 116) + subsys_offset = read_bswap(matfile.ios, matfile.swap_bytes, UInt64) + if subsys_offset == 0x2020202020202020 + subsys_offset = UInt64(0) + end + if subsys_offset != 0 + matfile.subsystem_position = subsys_offset + matfile.subsystem.table_type = table + read_subsystem!(matfile) + end + + return matfile +end # Read whole MAT file function read(matfile::Matlabv5File) - seek(matfile.ios, 128) vars = Dict{String, Any}() + seek(matfile.ios, 128) while !eof(matfile.ios) - (name, data) = read_matrix(matfile.ios, matfile.swap_bytes) + pos = position(matfile.ios) + if pos == matfile.subsystem_position + # Skip reading subsystem again + (_, nbytes) = read_header(matfile.ios, matfile.swap_bytes) + skip(matfile.ios, nbytes) + continue + end + (name, data) = read_matrix(matfile.ios, matfile.swap_bytes, matfile.subsystem) vars[name] = data end vars end + +function read_subsystem!(matfile::Matlabv5File) + ios = matfile.ios + swap_bytes = matfile.swap_bytes + seek(ios, matfile.subsystem_position) + (_, subsystem_data) = read_matrix(ios, swap_bytes, matfile.subsystem) + buf = IOBuffer(vec(subsystem_data)) + seek(buf, 8) # Skip subsystem header + _, subsys_data = read_matrix(buf, swap_bytes, matfile.subsystem) + MAT_subsys.load_subsys!(matfile.subsystem, subsys_data, swap_bytes) +end + # Read only variable names from an HDF5 file function getvarnames(matfile::Matlabv5File) if !isdefined(matfile, :varnames) @@ -426,7 +479,7 @@ function read(matfile::Matlabv5File, varname::String) error("no variable $varname in file") end seek(matfile.ios, varnames[varname]) - (name, data) = read_matrix(matfile.ios, matfile.swap_bytes) + (name, data) = read_matrix(matfile.ios, matfile.swap_bytes, matfile.subsystem) data end diff --git a/test/read.jl b/test/read.jl index 410a10e..fe24fb3 100644 --- a/test/read.jl +++ b/test/read.jl @@ -1,4 +1,5 @@ using MAT, Test +using Dates function check(filename, result) matfile = matopen(filename) @@ -218,21 +219,133 @@ let objtestfile = "figure.fig" end # test reading file containing Matlab function handle, table, and datetime objects -# since we don't support these objects, just make sure that there are no errors -# reading the file and that the variables are there and replaced with `missing` let objtestfile = "function_handles.mat" vars = matread(joinpath(dirname(@__FILE__), "v7.3", objtestfile)) @test "sin" in keys(vars) - @test ismissing(vars["sin"]) + @test typeof(vars["sin"]) == Dict{String, Any} + @test Set(keys(vars["sin"])) == Set(["function_handle", "sentinel", "separator", "matlabroot"]) @test "anonymous" in keys(vars) - @test ismissing(vars["anonymous"]) + @test typeof(vars["anonymous"]) == Dict{String, Any} + @test Set(keys(vars["anonymous"])) == Set(["function_handle", "sentinel", "separator", "matlabroot"]) end -let objtestfile = "struct_table_datetime.mat" - vars = matread(joinpath(dirname(@__FILE__), "v7.3", objtestfile))["s"] - @test "testTable" in keys(vars) - @test ismissing(vars["testTable"]) - @test "testDatetime" in keys(vars) - @test ismissing(vars["testDatetime"]) + +for format in ["v7", "v7.3"] + @testset "struct_table_datetime $format" begin + let objtestfile = "struct_table_datetime.mat" + filepath = joinpath(dirname(@__FILE__), format, objtestfile) + + # make sure read(matopen(filepath), ::String) works + fid = matopen(filepath) + @test haskey(fid, "s") + var_s = read(fid, "s") + @test haskey(var_s, "testTable") + @test haskey(var_s, "testDatetime") + close(fid) + + # matread interface + vars = matread(filepath)["s"] + @test haskey(vars, "testTable") + t = vars["testTable"] + @test t isa MatlabTable + @test t.names == [:FlightNum, :Customer, :Date, :Rating, :Comment] + @test t[:Date] isa Vector{DateTime} + @test t[:Rating] isa AbstractVector{String} + @test all(x->length(x)==3, t.columns) + + # using Nothing will keep the MatlabOpaque + vars = matread(filepath; table=Nothing)["s"] + t = vars["testTable"] + @test Set(keys(t)) == Set(["props", "varnames", "nrows", "data", "rownames", "ndims", "nvars"]) + @test t.class == "table" + @test t["ndims"] === 2.0 + @test t["nvars"] === 5.0 + @test t["nrows"] === 3.0 + @test t["data"][1] == reshape([1261.0, 547.0, 3489.0], 3, 1) + @test t["data"][2] isa Matrix{String} + @test t["data"][3] isa Matrix{DateTime} + @test t["data"][4] isa AbstractMatrix{String} + @test t["data"][5] isa Matrix{String} + @test all(x->size(x)==(3,1), t["data"]) + + @test "testDatetime" in keys(vars) + dt = vars["testDatetime"] + @test dt isa DateTime + @test dt - DateTime(2019, 12, 2, 16, 42, 49) < Second(1) + end + end + + @testset "user defined classdef $format" begin + let objtestfile = "user_defined_classdefs.mat" + filepath = joinpath(dirname(@__FILE__), format, objtestfile) + + vars = matread(filepath) + @test haskey(vars, "obj_no_vals") + obj_no_vals = vars["obj_no_vals"] + @test obj_no_vals isa MatlabOpaque + @test obj_no_vals.class == "TestClasses.BasicClass" + @test obj_no_vals["a"] isa Matrix{Float64} + + @test haskey(vars, "obj_with_vals") + obj_with_vals = vars["obj_with_vals"] + @test obj_with_vals isa MatlabOpaque + @test obj_with_vals.class == "TestClasses.BasicClass" + @test obj_with_vals["a"] == 10.0 + + @test haskey(vars, "obj_with_default_val") + obj_with_default_val = vars["obj_with_default_val"] + @test obj_with_default_val isa MatlabOpaque + @test obj_with_default_val.class == "TestClasses.DefaultClass" + @test obj_with_default_val["a"] == "Default String" + @test obj_with_default_val["b"] == 10.0 + + @test haskey(vars, "obj_array") + obj_array = vars["obj_array"] + @test obj_array isa Array{MatlabOpaque} + @test size(obj_array) == (2, 2) + @test obj_array[1, 1] isa MatlabOpaque + @test obj_array[1, 1]["a"] == 1.0 + @test obj_array[1, 2]["a"] == 2.0 + + @test haskey(vars, "obj_with_nested_props") + obj_with_nested_props = vars["obj_with_nested_props"] + @test obj_with_nested_props isa MatlabOpaque + @test obj_with_nested_props.class == "TestClasses.BasicClass" + @test obj_with_nested_props["a"] isa MatlabOpaque + @test obj_with_nested_props["a"]["a"] == 1.0 + + @test obj_with_nested_props["b"] isa Matrix{Any} + @test obj_with_nested_props["b"][1] isa MatlabOpaque + @test obj_with_nested_props["b"][1]["b"] == "Obj1" + + @test obj_with_nested_props["c"] isa Dict{String, Any} + @test obj_with_nested_props["c"]["InnerProp"] isa MatlabOpaque + @test obj_with_nested_props["c"]["InnerProp"]["a"] == 2.0 + + @test haskey(vars, "obj_handle_1") + @test haskey(vars, "obj_handle_2") + obj_handle_1 = vars["obj_handle_1"] + obj_handle_2 = vars["obj_handle_2"] + @test obj_handle_1 === obj_handle_2 + @test obj_handle_1 isa MatlabOpaque + + end + end + + @testset "dynamic property" begin + let objtestfile = "dynamicprops.mat" + filepath = joinpath(dirname(@__FILE__), format, objtestfile) + + vars = matread(filepath) + @test haskey(vars, "obj") + obj = vars["obj"] + @test obj isa MatlabOpaque + @test obj.class == "TestClasses.BasicDynamic" + @test haskey(obj, "__dynamic_property_1__") + @test obj["__dynamic_property_1__"]["Name"] == "DynamicData" + @test obj["__dynamic_property_1__"]["DynamicValue_"] == 42.0 + end + end + end # test reading of old-style Matlab object in v7.3 format diff --git a/test/types.jl b/test/types.jl index aaa87da..b1cf01d 100644 --- a/test/types.jl +++ b/test/types.jl @@ -1,4 +1,5 @@ using MAT, Test +using Dates @testset "MatlabStructArray" begin d_arr = Dict{String, Any}[ @@ -85,4 +86,164 @@ end wrong_arr = [MatlabClassObject(d, "TestClassOld"), MatlabClassObject(d, "Bah")] @test_throws ErrorException MatlabStructArray(wrong_arr) +end + +@testset "MatlabOpaque string" begin + dat = UInt64[ + 0x0000000000000001 + 0x0000000000000002 + 0x0000000000000003 + 0x0000000000000001 + 0x0000000000000005 + 0x0000000000000005 + 0x0000000000000005 + 0x0065006e006f004a + 0x006f007200420073 + 0x006d0053006e0077 + 0x0000006800740069 + ] + dat = reshape(dat, 1, length(dat)) + obj = MatlabOpaque(Dict{String, Any}("any" => dat), "string") + str = MAT.convert_opaque(obj) + @test size(str) == (3,1) + @test vec(str) == ["Jones", "Brown", "Smith"] + + # single element string array is a single string in matlab, ofcourse + dat = [ + 0x0000000000000001 + 0x0000000000000002 + 0x0000000000000001 + 0x0000000000000001 + 0x0000000000000005 + 0x0065006e006f004a + 0x0000000000000073 + ] + dat = reshape(dat, 1, length(dat)) + obj = MatlabOpaque(Dict{String, Any}("any" => dat), "string") + str = MAT.convert_opaque(obj) + @test str == "Jones" +end + +@testset "MatlabOpaque datetime" begin + d = Dict{String, Any}( + "tz" => "", + "data" => ComplexF64[ + 1482192000000.0+0.0im; + 1482278400000.0+0.0im; + 1482364800000.0+0.0im;; + ], + "fmt" => "", + "isDateOnly" => true, # Note: "isDateOnly" not in all versions + ) + obj = MatlabOpaque(d, "datetime") + expected_dates = [ + DateTime(2016, 12, 20) # 20-Dec-2016 + DateTime(2016, 12, 21) # 21-Dec-2016 + DateTime(2016, 12, 22) # 22-Dec-2016 + ] + @test all(MAT.convert_opaque(obj) .== expected_dates) + + d = Dict{String, Any}( + "tz" => "", + "data" => 1575304969634.0+0.0im, + "fmt" => "", + "isDateOnly" => false, + ) + obj = MatlabOpaque(d, "datetime") + # "02-Dec-2019 16:42:49" + expected_dt = DateTime(2019, 12, 2, 16, 42, 49) + # still have some millisecond rounding issue? + @test MAT.convert_opaque(obj) - expected_dt < Second(1) +end + +@testset "MatlabOpaque duration" begin + d = Dict( + "millis" => [3.6e6 7.2e6], + "fmt" => 'h', + ) + obj = MatlabOpaque(d, "duration") + @test MAT.convert_opaque(obj) == map(Millisecond, d["millis"]) + + d = Dict( + "millis" => 12000.0, + "fmt" => 'h', + ) + obj = MatlabOpaque(d, "duration") + @test MAT.convert_opaque(obj) == Millisecond(d["millis"]) +end + +@testset "MatlabOpaque categorical" begin + d = Dict( + "isProtected" => false, + "codes" => reshape(UInt8[0x02, 0x03, 0x01, 0x01, 0x01, 0x02], 3, 2), + "categoryNames" => Any["Fair"; "Good"; "Poor";;], + "isOrdinal" => false, + ) + obj = MatlabOpaque(d, "categorical") + + c = MAT.convert_opaque(obj) + @test c == [ + "Good" "Fair" + "Poor" "Fair" + "Fair" "Good" + ] + +end + +@testset "MatlabOpaque table" begin + # simplified table struct; there's some other properties as well + d = Dict{String,Any}( + "varnames" => Any["FlightNum" "Customer"], + "nrows" => 3.0, + "data" => reshape(Any[[1261.0; 547.0; 3489.0;;], ["Jones"; "Brown"; "Smith";;]], 1, 2), + "ndims" => 2.0, + "nvars" => 2.0, + ) + obj = MatlabOpaque(d, "table") + + # Note: this should work with DataFrames.DataFrame, but that's a big dependency to add for testing + t = MAT.convert_opaque(obj; table = MatlabTable) + @test t.names == [:FlightNum, :Customer] + @test t[:FlightNum] isa Vector{Float64} + @test t[:FlightNum] == [1261.0, 547.0, 3489.0] + @test t[:Customer] isa Vector{String} + @test t["Customer"] == ["Jones", "Brown", "Smith"] + + t = MAT.convert_opaque(obj; table = MatlabStructArray{1}) + @test t isa MatlabStructArray{1} + @test t["FlightNum"] == [1261.0, 547.0, 3489.0] + @test t["Customer"] == ["Jones", "Brown", "Smith"] + + t = MAT.convert_opaque(obj; table = Nothing) + @test t === obj + + nd_array = reshape(1:12, 2, 3, 2) + + # ND-arrays as columns + # Note: does not convert to DataFrame + d = Dict{String,Any}( + "varnames" => Any["Floats" "NDArray"], + "nrows" => 2.0, + "data" => reshape(Any[[1261.0; 547.0;;], nd_array], 1, 2), + "ndims" => 2.0, + "nvars" => 2.0, + ) + obj = MatlabOpaque(d, "table") + t = MAT.convert_opaque(obj; table = MatlabTable) + @test size(t[:Floats]) == (2,) + @test size(t[:NDArray]) == (2,3,2) + + # single row table + d = Dict{String,Any}( + "varnames" => Any["Age" "Name" "Matrix"], + "nrows" => 1.0, + "data" => reshape([25.0, "Smith", [1.0 2.0]], 1, 3), + "ndims" => 2.0, + "nvars" => 2.0, + ) + obj = MatlabOpaque(d, "table") + t = MAT.convert_opaque(obj; table = MatlabTable) + @test t[:Age] == [25.0] + @test t[:Name] == ["Smith"] + @test t[:Matrix] == [1.0 2.0] end \ No newline at end of file diff --git a/test/v7.3/dynamicprops.mat b/test/v7.3/dynamicprops.mat new file mode 100644 index 0000000..abee0cb Binary files /dev/null and b/test/v7.3/dynamicprops.mat differ diff --git a/test/v7.3/user_defined_classdefs.mat b/test/v7.3/user_defined_classdefs.mat new file mode 100644 index 0000000..6444c06 Binary files /dev/null and b/test/v7.3/user_defined_classdefs.mat differ diff --git a/test/v7/dynamicprops.mat b/test/v7/dynamicprops.mat new file mode 100644 index 0000000..1d42bd5 Binary files /dev/null and b/test/v7/dynamicprops.mat differ diff --git a/test/v7/struct_table_datetime.mat b/test/v7/struct_table_datetime.mat new file mode 100644 index 0000000..6a4885a Binary files /dev/null and b/test/v7/struct_table_datetime.mat differ diff --git a/test/v7/user_defined_classdefs.mat b/test/v7/user_defined_classdefs.mat new file mode 100644 index 0000000..419ca1e Binary files /dev/null and b/test/v7/user_defined_classdefs.mat differ diff --git a/test/write.jl b/test/write.jl index 02acc62..21f32b3 100644 --- a/test/write.jl +++ b/test/write.jl @@ -152,6 +152,13 @@ test_write(Dict("reshape_arr"=>reshape([1 2 3;4 5 6;7 8 9]',1,9))) test_write(Dict("adjoint_arr"=>Any[1 2 3;4 5 6;7 8 9]')) test_write(Dict("reshape_arr"=>reshape(Any[1 2 3;4 5 6;7 8 9]',1,9))) +# named tuple +nt = (x = 5, y = Any[6, "string"]) +matwrite(tmpfile, Dict("nt" => nt)) +nt_read = matread(tmpfile)["nt"] +@test nt_read["x"] == 5 +@test nt_read["y"] == nt.y + # test nested struct array - interface via Dict array @testset "MatlabStructArray writing" begin sarr = Dict{String, Any}[