Export to ‘.tmat.h5’ format with R

Author

baptiste

Published

July 4, 2024

This document showcases a basic R script to export T-matrices in the .tmat.h5 HDF5 format. For illustration, we start by producing a dummy dataset. This minimal reproducible example file is available for download as a standalone script: export_R.R.

Mockup input data

Consistent with the other examples, we generate a 50x30x30 array of dummy data, which repeats 50 times (50 wavelengths) a matrix of 900 entries ranging from \(1+1i\) (first element, top left) to \(900 + 900i\) (last element, bottom right). Note the expectation of row-major ordering in HDF5. The 3x3 top-left block is

   1.0000 + 1.0000i   2.0000 + 2.0000i   3.0000 + 3.0000i ...
  31.0000 +31.0000i  32.0000 +32.0000i  33.0000 +33.0000i ...
  61.0000 +61.0000i  62.0000 +62.0000i  63.0000 +63.0000i ...
   ...               ...                ...               ...

The rhdf5 package has support for a variety of R objects, including lists which are automatically written as grouped objects in HDF5.

library(rhdf5) # note: dev version to support complex
library(uuid) # uuid
library(glue) # string interpolation
library(purrr) # mapping functions

## dummy data

# possibly multiple wavelengths
wavelength <- seq(400, 800, by=50)
Nl <- length(wavelength)

Lmax <- 3
qmax <- 2*(Lmax*(Lmax+1)+Lmax) # T-matrix size

# dummy 30x30 matrix values for each wavelength
# note the byrow due to HDF5 expecting
# row-major ordering vs R's default column-major
tdata <- matrix(1:qmax^2 + 1i*(1:qmax^2), qmax, qmax, byrow=TRUE)

tmatrix <- array(NA_complex_, c(Nl,qmax,qmax))
for(i in seq_len(Nl))
  tmatrix[i,,] <- tdata

tmatrix[1,1:3,1:3]

modes <- list(l = rep(NA_integer_, qmax),
              m = rep(NA_integer_, qmax),
              polarization = rep(NA_character_, qmax))

i <- 1
for (li in 1:Lmax){
  for (mi in -li:li){
    for (si in c("electric", "magnetic")){
      modes$l[i] <- li
      modes$m[i] <- mi
      modes$polarization[i] <- si
      i <-  i+1
    }
  }
}

embedding <- list('relative_permeability' = 1.0, 
                  'relative_permittivity' = 1.33^2)
scatterer <- list(material = list('relative_permeability' = 1.0, 
                                  'relative_permittivity' =
                                    rep(-11.4+1.181i,Nl)),
                  geometry = list('radiusxy' = 20.0, 
                                  'radiusz' = 40.0))

computation <- list(method_parameters = list('Lmax' = Lmax, 
                                             'Ntheta' = 100),
                    files = list(script = paste(readLines('export_R.R'), collapse = "\n")))

Saving to HDF5

The rhdf5 package provides support for reading/writing HDF5 files into/from R objects. Until my recent request, complex arrays were not supported, but this is now implemented in the dev version of the package.

library(rhdf5) # note: requires dev version to support complex arrays
# cf https://support.bioconductor.org/p/9156305/#9156408
# install.packages("remotes")
# remotes::install_github("grimbough/rhdf5@devel")
# may need 'crypto' library from openSSL to compile
# e.g. via brew install openssl on macos

We can then write the different objects defined above using hwrite. Note the important native = TRUE parameter for the T-matrix data: R stores arrays column-wise, while HDF5 uses row-major ordering. To avoid confusion between different programming languages, we suggest sticking with the native HDF5 convention (native = TRUE ensures that the array is written transposed).

Attributes don’t seem to have a convenient high-level interface, and unfortunately they need to be written slightly differently for the root level, datasets, and groups.

f <- 'ar.tmat.h5'
software = sprintf("SMARTIES=1.1, R=%s, rhdf5=%s", paste0(R.version$major, R.version$minor), packageVersion("rhdf5"))

unlink(f) # delete previous file if it exists
h5createFile(f)
h5closeAll() # in case connections open

h5write(wavelength, file=f, name="/vacuum_wavelength")

h5write(tmatrix, file=f, name="/tmatrix", native=TRUE) # store rowwise

h5write(modes, file=f, name='/modes')
h5write(embedding, file=f, name="/embedding")
h5write(scatterer, file=f, name="/scatterer")
h5write(computation, file=f, name='/computation')

## write attributes
fid <- H5Fopen(f)
# root level
h5writeAttribute("Au prolate spheroid in water", fid, "name")
h5writeAttribute("Computation using SMARTIES, a numerically robust EBCM implementation for spheroids", fid, "description")
h5writeAttribute("gold, spheroid, ebcm, passive, reciprocal, czinfinity, mirrorxyz", fid, "keywords")
h5writeAttribute("v0.01", fid, "storage_format_version")

# wavelength
did <- H5Dopen(fid, "vacuum_wavelength")
h5writeAttribute("nm", did, "unit")
H5Dclose(did)

# embedding
gid <- H5Gopen(fid, "embedding")
h5writeAttribute("H2O, Water", gid, "name")
h5writeAttribute("non-dispersive", gid, "keywords")
H5Gclose(gid)

# material
gid <- H5Gopen(fid, "scatterer/material")
h5writeAttribute("Au, Gold", gid, "name")
h5writeAttribute("dispersive, plasmonic", gid, "keywords")
h5writeAttribute("Au from Raschke et al 10.1103/PhysRevB.86.235147", gid, "reference")
H5Gclose(gid)

# geometry
gid <- H5Gopen(fid, "scatterer/geometry")
h5writeAttribute("nm", gid, "unit")
h5writeAttribute("spheroid", gid, "shape")
h5writeAttribute("homogeneous spheroid with symmetry axis z", gid, "name")
H5Gclose(gid)

# computation
gid <- H5Gopen(fid, "computation")
h5writeAttribute("EBCM, Extended Boundary Condition Method", gid, "method")
h5writeAttribute("Computation using SMARTIES, a numerically robust EBCM implementation for spheroids", gid, "description")
h5writeAttribute(software, gid, "software")
h5writeAttribute("SMARTIES", gid, "name")
H5Gclose(gid)

H5Fclose(fid)

Summary of the file:

<list>
├─data: <list>
│ ├─computation: <list>
│ │ ├─files: <list>
│ │ │ └─script: "## ----eval=FALSE---------------..."
│ │ └─method_parameters: <list>
│ │   ├─Lmax: 3
│ │   └─Ntheta: 100
│ ├─embedding: <list>
│ │ ├─relative_permeability: 1
│ │ └─relative_permittivity: 1.7689
│ ├─modes: <list>
│ │ ├─l<int [30]>: 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, ...
│ │ ├─m<int [30]>: -1, -1, 0, 0, 1, 1, -2, -2, -1, -1, ...
│ │ └─polarization<chr [30]>: "electric", "magnetic", "electric", "magnetic", "electric", "magnetic", "electric", "magnetic", "electric", "magnetic", ...
│ ├─scatterer: <list>
│ │ ├─geometry: <list>
│ │ │ ├─radiusxy: 20
│ │ │ └─radiusz: 40
│ │ └─material: <list>
│ │   ├─relative_permeability: 1
│ │   └─relative_permittivity<cpl [9]>: -11.4+1.181i, -11.4+1.181i, -11.4+1.181i, -11.4+1.181i, -11.4+1.181i, -11.4+1.181i, -11.4+1.181i, -11.4+1.181i, -11.4+1.181i
│ ├─tmatrix<cpl [8,100]>: 1+1i, 1+1i, 1+1i, 1+1i, 1+1i, 1+1i, 1+1i, 1+1i, 1+1i, 31+31i, ...
│ └─vacuum_wavelength<dbl [9]>: 400, 450, 500, 550, 600, 650, 700, 750, 800
└─attributes: <list>
  ├─root: <list>
  │ ├─description: "Computation using SMARTIES, a nu..."
  │ ├─keywords: "gold, spheroid, ebcm, passive, r..."
  │ ├─name: "Au prolate spheroid in water"
  │ └─storage_format_version: "v0.01"
  ├─vacuum_wavelength: <list>
  │ ├─rhdf5-NA.OK: 1
  │ └─unit: "nm"
  ├─computation: <list>
  │ ├─description: "Computation using SMARTIES, a nu..."
  │ ├─method: "EBCM, Extended Boundary Conditio..."
  │ ├─name: "SMARTIES"
  │ └─software: "SMARTIES=1.1, R=43.1, rhdf5=2.47.3"
  ├─embedding: <list>
  │ ├─keywords: "non-dispersive"
  │ └─name: "H2O, Water"
  ├─material: <list>
  │ ├─keywords: "dispersive, plasmonic"
  │ ├─name: "Au, Gold"
  │ └─reference: "Au from Raschke et al 10.1103/Ph..."
  └─geometry: <list>
    ├─name: "homogeneous spheroid with symmet..."
    ├─shape: "spheroid"
    └─unit: "nm"