From 05baf26c8125433e35745cef8082cc3799c7646c Mon Sep 17 00:00:00 2001 From: Gerd Duscher <50049264+gduscher@users.noreply.github.com> Date: Tue, 8 Sep 2020 18:55:47 -0400 Subject: [PATCH 1/3] Nion and dm3reader, with examples. --- ScopeReaders/__init__.py | 12 + ScopeReaders/em/tem/__init__.py | 16 + ScopeReaders/em/tem/dm3_reader.py | 715 +++++++++++++++++++++++++++++ ScopeReaders/em/tem/nion_reader.py | 274 +++++++++++ examples/plot_dm3_reader.py | 77 ++++ examples/plot_nion_reader.py | 81 ++++ 6 files changed, 1175 insertions(+) create mode 100644 ScopeReaders/em/tem/dm3_reader.py create mode 100644 ScopeReaders/em/tem/nion_reader.py create mode 100644 examples/plot_dm3_reader.py create mode 100644 examples/plot_nion_reader.py diff --git a/ScopeReaders/__init__.py b/ScopeReaders/__init__.py index b6b90a4..8745c17 100644 --- a/ScopeReaders/__init__.py +++ b/ScopeReaders/__init__.py @@ -7,8 +7,20 @@ .. autosummary:: :toctree: _autosummary + dm3_reader + """ from .__version__ import version as __version__ +""" +Tools to read, write data in h5NSID files + +Submodules +---------- + +.. autosummary:: + :toctree: _autosummary + +""" __all__ = ['__version__'] diff --git a/ScopeReaders/em/tem/__init__.py b/ScopeReaders/em/tem/__init__.py index e69de29..2ac6941 100644 --- a/ScopeReaders/em/tem/__init__.py +++ b/ScopeReaders/em/tem/__init__.py @@ -0,0 +1,16 @@ +""" +Tools to read, data in from TEM files + +Submodules +---------- + +.. autosummary:: + :toctree: _autosummary + + dm3_reader + nion_reader +""" +from .dm3_reader import DM3Reader +from .nion_reader import NionReader + +__all__ = ['DM3Reader', 'NionReader'] \ No newline at end of file diff --git a/ScopeReaders/em/tem/dm3_reader.py b/ScopeReaders/em/tem/dm3_reader.py new file mode 100644 index 0000000..a9547a2 --- /dev/null +++ b/ScopeReaders/em/tem/dm3_reader.py @@ -0,0 +1,715 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- + +################################################################################ +# Python class for reading GATAN DM3 (DigitalMicrograph) files +# and extracting all metadata +# -- +# tested on EELS spectra, spectrum images and single-image files and image-stacks +# -- +# based on the DM3_Reader plug-in (v 1.3.4) for ImageJ by Greg Jefferis +# http://rsb.info.nih.gov/ij/plugins/DM3_Reader.html +# -- +# Python adaptation: Pierre-Ivan Raynal +# http://microscopies.med.univ-tours.fr/ +# +# Extended for EELS support by Gerd Duscher, UTK 2012 +# Rewritten for integration of sidpy 2020 +# +# Works for python 3 +# +################################################################################ +from __future__ import division, print_function, absolute_import, unicode_literals + +import struct +import time +import numpy + +from warnings import warn +import sys +import numpy as np +import os + +from sidpy import Reader +from sidpy.sid import Dimension, Dataset +from sidpy.base.dict_utils import nest_dict + +__all__ = ["DM3Reader", "version"] + +version = '0.1beta' + +debugLevel = 0 # 0=none, 1-3=basic, 4-5=simple, 6-10 verbose + +if sys.version_info.major == 3: + unicode = str + +# ### utility functions ### + +# ## binary data reading functions ### + + +def read_long(f): + """Read 4 bytes as integer in file f""" + read_bytes = f.read(4) + return struct.unpack('>l', read_bytes)[0] + + +def read_short(f): + """Read 2 bytes as integer in file f""" + read_bytes = f.read(2) + return struct.unpack('>h', read_bytes)[0] + + +def read_byte(f): + """Read 1 byte as integer in file f""" + read_bytes = f.read(1) + return struct.unpack('>b', read_bytes)[0] + + +def read_bool(f): + """Read 1 byte as boolean in file f""" + read_val = read_byte(f) + return read_val != 0 + + +def read_char(f): + """Read 1 byte as char in file f""" + read_bytes = f.read(1) + return struct.unpack('c', read_bytes)[0] + + +def read_string(f, length=1): + """Read len bytes as a string in file f""" + read_bytes = f.read(length) + str_fmt = '>' + str(length) + 's' + return struct.unpack(str_fmt, read_bytes)[0] + + +def read_le_short(f): + """Read 2 bytes as *little endian* integer in file f""" + read_bytes = f.read(2) + return struct.unpack(' reading function +readFunc = { + SHORT: read_le_short, + LONG: read_le_long, + USHORT: read_leu_short, + ULONG: read_leu_long, + FLOAT: read_leu_float, + DOUBLE: read_leu_double, + BOOLEAN: read_bool, + CHAR: read_char, + OCTET: read_char, # difference with char??? +} + +# other constants ## +IMGLIST = "root.ImageList." +OBJLIST = "root.DocumentObjectList." +MAXDEPTH = 64 + + +# END constants ## + + +class DM3Reader(Reader): + debugLevel = -1 + + def __init__(self, file_path, verbose=False): + """ + file_path: filepath to dm3 file. + """ + warn('This Reader will eventually be moved to the ScopeReaders package' + '. Be prepared to change your import statements', + FutureWarning) + + super(DM3Reader, self).__init__(file_path) + + # initialize variables ## + self.verbose = verbose + self.__filename = file_path + self.__chosenImage = 1 + # - track currently read group + self.__cur_group_level = -1 + self.__cur_group_at_level_x = [0 for x in range(MAXDEPTH)] + self.__cur_group_name_at_level_x = ['' for x in range(MAXDEPTH)] + # - track current tag + self.__cur_tag_at_level_x = ['' for x in range(MAXDEPTH)] + self.__curTagName = '' + # - open file for reading + try: + self.__f = open(self.__filename, 'rb') + except FileNotFoundError: + raise FileNotFoundError('File not found') + + # - create Tags repositories + self.__storedTags = [] + self.__tagDict = {'DM': {}} + + # check if this is valid DM3 file + is_dm3 = True + # read header (first 3 4-byte int) + # get version + file_version = read_long(self.__f) + if file_version not in (3, 4): + is_dm3 = False + # get indicated file size + file_size = read_long(self.__f) + # get byte-ordering + le = read_long(self.__f) + little_endian = (le == 1) + if not little_endian: + is_dm3 = False + # check file header, raise Exception if not DM3 + if not is_dm3: + raise TypeError("%s does not appear to be a DM3 or DM4 file." % os.path.split(self.__filename)[1]) + elif self.verbose: + print("%s appears to be a DM3 file" % self.__filename) + self.file_version = file_version + self.file_size = file_size + + if self.verbose: + print("Header info.:") + print("- file version:", file_version) + print("- le:", le) + print("- file size:", file_size, "bytes") + + # set name of root group (contains all data)... + self.__cur_group_name_at_level_x[0] = "root" + # ... then read it + self.__f.close() + + def read(self): + try: + self.__f = open(self.__filename, 'rb') + except FileNotFoundError: + raise FileNotFoundError('File not found') + + file_version = read_long(self.__f) + file_size = read_long(self.__f) + le = read_long(self.__f) + little_endian = (le == 1) + # ... then read it + self.__read_tag_group() + + + if self.verbose: + print("-- %s Tags read --" % len(self.__storedTags)) + + if self.verbose: + t2 = time.time() + print("| parse DM3 file: %.3g s" % (t2 - t1)) + + dataset = Dataset.from_array(self.data_cube) + original_tags = nest_dict(self.get_tags(), separator='.') + dataset.original_metadata.update(original_tags['root']) + + dataset.quantity = 'intensity' + dataset.units = 'counts' + self.set_dimensions(dataset) + + self.set_data_type(dataset) + + path, file_name = os.path.split(self.__filename) + basename, extension = os.path.splitext(file_name) + dataset.title = basename + + dataset.modality = 'generic' + dataset.source = 'DM3Reader' + + return dataset + + def set_data_type(self, dataset): + image_number = len(dataset.original_metadata['ImageList']) - 1 + spectral_dim = False + for axis in dataset.axes.values(): + if axis.dimension_type == 'spectral': + spectral_dim = True + + dataset.data_type = 'unknown' + if 'ImageTags' in dataset.original_metadata['ImageList'][str(image_number)]: + image_tags = dataset.original_metadata['ImageList'][str(image_number)]['ImageTags'] + + if 'SI' in image_tags: + if len(dataset.shape) == 3: + dataset.data_type = 'spectrum_image' + else: + if spectral_dim: + dataset.data_type = 'spectrum_image' # 'linescan' + else: + dataset.data_type = 'image' + dataset.metadata['image_type'] = 'survey image' + + if dataset.data_type == 'unknown': + if len(dataset.shape) > 3: + raise NotImplementedError('Data_type not implemented yet') + elif len(dataset.shape) == 3: + if spectral_dim: + dataset.data_type = 'spectrum_image' + else: + dataset.data_type = 'image_stack' + elif len(dataset.shape) == 2: + if spectral_dim: + dataset.data_type = 'spectrum_image' + else: + dataset.data_type = 'image' + elif len(dataset.shape) == 1: + if spectral_dim: + dataset.data_type = 'spectrum' + else: + dataset.data_type = 'line_plot' + + def set_dimensions(self, dataset): + image_number = len(dataset.original_metadata['ImageList']) - 1 + dimensions_dict = dataset.original_metadata['ImageList'][str(image_number)]['ImageData']['Calibrations']['Dimension'] + + reciprocal_name = 'u' + spatial_name = 'x' + + for dim, dimension_tags in dimensions_dict.items(): + # Fix annoying scale of spectrum_images in Zeiss and SEM images + if dimension_tags['Units'] == 'µm': + dimension_tags['Units'] = 'nm' + dimension_tags['Scale'] *= 1000.0 + + units = dimension_tags['Units'] + values = (np.arange(dataset.shape[int(dim)]) - dimension_tags['Origin']) * dimension_tags['Scale'] + + if 'eV' == units: + dataset.set_dimension(int(dim), Dimension('energy_loss', values, units=units, + quantity='energy-loss', dimension_type='spectral')) + elif 'eV' in units: + dataset.set_dimension(int(dim), Dimension('energy', values, units=units, + quantity='energy', dimension_type='spectral')) + elif '1/' in units or units in ['mrad', 'rad']: + dataset.set_dimension(int(dim), Dimension(reciprocal_name, values, units=units, + quantity='reciprocal distance', dimension_type='reciprocal')) + reciprocal_name = chr(ord(reciprocal_name) + 1) + else: + dataset.set_dimension(int(dim), Dimension(spatial_name, values, units=units, + quantity='distance', dimension_type='spatial')) + spatial_name = chr(ord(spatial_name) + 1) + + # utility functions + def __make_group_string(self): + t_string = self.__cur_group_at_level_x[0] + for i in range(1, self.__cur_group_level + 1): + t_string += '.' + self.__cur_group_at_level_x[i] + return t_string + + def __make_group_name_string(self): + t_string = self.__cur_group_name_at_level_x[0] + for i in range(1, self.__cur_group_level + 1): + t_string += '.' + str(self.__cur_group_name_at_level_x[i]) + return t_string + + def __read_tag_group(self): + # go down a level + self.__cur_group_level += 1 + # increment group counter + self.__cur_group_at_level_x[self.__cur_group_level] += 1 + # set number of current tag to -1 --- readTagEntry() pre-increments => first gets 0 + self.__cur_tag_at_level_x[self.__cur_group_level] = -1 + # if ( debugLevel > 5): + # print "rTG: Current Group Level:", self.__cur_group_level + # is the group sorted? + g_sorted = read_byte(self.__f) + is_sorted = (g_sorted == 1) + # is the group open? + opened = read_byte(self.__f) + is_open = (opened == 1) + # number of Tags + n_tags = read_long(self.__f) + # if ( debugLevel > 5): + # print "rTG: Iterating over the", n_tags, "tag entries in this group" + # read Tags + for i in range(n_tags): + self.__read_rag_entry() + # go back up one level as reading group is finished + self.__cur_group_level += -1 + return 1 + + def __read_rag_entry(self): + # is data or a new group? + data = read_byte(self.__f) + is_data = (data == 21) + self.__cur_tag_at_level_x[self.__cur_group_level] += 1 + # get tag label if exists + len_tag_label = read_short(self.__f) + if len_tag_label != 0: + tag_label = read_string(self.__f, len_tag_label).decode('latin-1') + # print(tag_label) + else: + tag_label = str(self.__cur_tag_at_level_x[self.__cur_group_level]) + # if ( debugLevel > 5): + # print str(self.__cur_group_level)+"|"+__make_group_string()+": Tag label = "+tag_label + # elif ( debugLevel > 0 ): + # print str(self.__cur_group_level)+": Tag label = "+tag_label + if is_data: + # give it a name + self.__curTagName = self.__make_group_name_string() + "." + tag_label # .decode('utf8') + # read it + self.__read_tag_type() + else: + # it is a tag group + self.__cur_group_name_at_level_x[self.__cur_group_level + 1] = tag_label + self.__read_tag_group() # increments curGroupLevel + return 1 + + def __read_tag_type(self): + delim = read_string(self.__f, 4) + if delim != b"%%%%": + raise Exception(hex(self.__f.tell()) + ": Tag Type delimiter not %%%%") + n_in_tag = read_long(self.__f) + self.__read_any_data() + return 1 + + def __encoded_type_size(self, et): + # returns the size in bytes of the data type + if et == 0: + width = 0 + elif et in (BOOLEAN, CHAR, OCTET): + width = 1 + elif et in (SHORT, USHORT): + width = 2 + elif et in (LONG, ULONG, FLOAT): + width = 4 + elif et == DOUBLE: + width = 8 + else: + # returns -1 for unrecognised types + width = -1 + return width + + def __read_any_data(self): + # higher level function dispatching to handling data types to other functions + # - get Type category (short, long, array...) + encoded_type = read_long(self.__f) + # - calc size of encoded_type + et_size = self.__encoded_type_size(encoded_type) + if debugLevel > 5: + print(": Tag Type = " + str(encoded_type) + ", Tag Size = " + str(et_size)) + if et_size > 0: + self.__store_tag(self.__curTagName, self.__read_native_data(encoded_type, et_size)) + elif encoded_type == STRING: + string_size = read_long(self.__f) + data = self.__read_string_data(string_size) + if debugLevel > 5: + print('String') + print(data) + elif encoded_type == STRUCT: + # GD does store tags now + struct_types = self.__read_struct_types() + data = self.__read_struct_data(struct_types) + # print('Struct ',self.__curTagName) + if debugLevel > 5: + print('Struct') + print(data) + self.__store_tag(self.__curTagName, data) + + elif encoded_type == ARRAY: + # GD does store tags now + # indicates size of skipped data blocks + array_types = self.__read_array_types() + data = self.__read_array_data(array_types) + # print('Array ',self.__curTagName) + if debugLevel > 5: + print('Array') + print(data) + self.__store_tag(self.__curTagName, data) + + else: + raise Exception("rAnD, " + hex(self.__f.tell()) + ": Can't understand encoded type") + return 1 + + def __read_native_data(self, encoded_type, et_size): + # reads ordinary data types + if encoded_type in readFunc.keys(): + val = readFunc[encoded_type](self.__f) + else: + raise Exception("rND, " + hex(self.__f.tell()) + ": Unknown data type " + str(encoded_type)) + # if ( debugLevel > 3 ): + # print "rND, " + hex(self.__f.tell()) + ": " + str(val) + # elif ( debugLevel > 0 ): + # print val + return val + + def __read_string_data(self, string_size): + # reads string data + if string_size <= 0: + r_string = "" + else: + # if ( debugLevel > 3 ): + # print "rSD @ " + str(f.tell()) + "/" + hex(f.tell()) +" :", + # !!! *Unicode* string (UTF-16)... convert to Python unicode str + r_string = read_string(self.__f, string_size) + r_string = str(r_string, "utf_16_le") + # if ( debugLevel > 3 ): + # print r_string + " <" + repr( r_string ) + ">" + # if ( debugLevel > 0 ): + # print "StringVal:", r_string + self.__store_tag(self.__curTagName, r_string) + return r_string + + def __read_array_types(self): + # determines the data types in an array data type + array_type = read_long(self.__f) + item_types = [] + if array_type == STRUCT: + item_types = self.__read_struct_types() + elif array_type == ARRAY: + item_types = self.__read_array_types() + else: + item_types.append(array_type) + return item_types + + def __read_array_data(self, array_types): + # reads array data + + array_size = read_long(self.__f) + + # if ( debugLevel > 3 ): + # print "rArD, " + hex( f.tell() ) + ": Reading array of size = " + str(array_size) + + item_size = 0 + encoded_type = 0 + + for i in range(len(array_types)): + encoded_type = int(array_types[i]) + et_size = self.__encoded_type_size(encoded_type) + item_size += et_size + # if ( debugLevel > 5 ): + # print "rArD: Tag Type = " + str(encoded_type) + ", Tag Size = " + str(et_size) + # ! readNativeData( encoded_type, et_size ) !## + + # if ( debugLevel > 5 ): + # print "rArD: Array Item Size = " + str(item_size) + + buf_size = array_size * item_size + + if ((not self.__curTagName.endswith("ImageData.Data")) + and (len(array_types) == 1) + and (encoded_type == USHORT) + and (array_size < 256)): + # treat as string + val = self.__read_string_data(buf_size) + else: + # treat as binary data + # - store data size and offset as tags + self.__store_tag(self.__curTagName + ".Size", buf_size) + self.__store_tag(self.__curTagName + ".Offset", self.__f.tell()) + # - skip data w/o reading + self.__f.seek(self.__f.tell() + buf_size) + val = 1 + + return val + + def __read_struct_types(self): + # analyses data types in a struct + + # if ( debugLevel > 3 ): + # print "Reading Struct Types at Pos = " + hex(self.__f.tell()) + + struct_name_length = read_long(self.__f) + n_fields = read_long(self.__f) + + # if ( debugLevel > 5 ): + # print "n_fields = ", n_fields + + # if ( n_fields > 100 ): + # raise Exception, hex(self.__f.tell())+": Too many fields" + + field_types = [] + name_length = 0 + for i in range(n_fields): + name_length = read_long(self.__f) + # if ( debugLevel > 9 ): + # print i + "th namelength = " + nameLength + field_type = read_long(self.__f) + field_types.append(field_type) + + return field_types + + def __read_struct_data(self, struct_types): + # reads struct data based on type info in structType + data = [] + for i in range(len(struct_types)): + encoded_type = struct_types[i] + et_size = self.__encoded_type_size(encoded_type) + + # if ( debugLevel > 5 ): + # print "Tag Type = " + str(encoded_type) + ", Tag Size = " + str(et_size) + + # get data + data.append(self.__read_native_data(encoded_type, et_size)) + + return data + + def __store_tag(self, tag_name, tag_value): + # NB: all tag values (and names) stored as unicode objects; + # => can then be easily converted to any encoding + # - /!\ tag names may not be ascii char only (e.g. '\xb5', i.e. MICRO SIGN) + tag_name = str(tag_name) # , 'latin-1') + + # GD: Changed this over to store real values and not strings in dictionary + self.__tagDict[tag_name] = tag_value + # - convert tag value to unicode if not already unicode object (as for string data) + tag_value = str(tag_value) + # store Tags as list and dict + self.__storedTags.append(tag_name + " = " + tag_value) + + # ## END utility functions ### + + def get_filename(self): + return self.__filename + + filename = property(get_filename) + + def get_tags(self): + return self.__tagDict + + tags = property(get_tags) + + def get_raw(self): + """Extracts data as np array""" + + # DataTypes for image data <--> PIL decoders + data_types = { + '1': '0: + # print "Notice: image data read as %s"%decoder + # t1 = time.time() + + self.__f.seek(data_offset) + rawdata = self.__f.read(data_size) + + if data_dim > 2: + # print rawdata[0],rawdata[1],rawdata[2],rawdata[3] + shape = (im_width, im_height, im_length) + else: + shape = (im_width, im_height) + if data_dim == 1: + shape = im_width + + raw_data = numpy.fromstring(rawdata, dtype=dt, count=numpy.cumprod(shape)[-1]).reshape(shape, order='F') + # raw_data = numpy.array(rawdata).reshape(im_width,im_height,im_length) + # print raw_data[0],raw_data[1],raw_data[2],raw_data[3]#raw_data = numpy.array(rawdata). + # reshape((im_width,im_height,im_length), order = 'F') + return raw_data + + data_cube = property(get_raw) + + +if __name__ == '__main__': + pass # print "DM3lib v.%s"%version diff --git a/ScopeReaders/em/tem/nion_reader.py b/ScopeReaders/em/tem/nion_reader.py new file mode 100644 index 0000000..e8f4ac1 --- /dev/null +++ b/ScopeReaders/em/tem/nion_reader.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- + +################################################################################ +# Python class for reading Nion Swift files into sidpy Dataset +# and extracting all metadata +# +# Written by Gerd Duscher, UTK 2020 +# +# Works for python 3 +# +################################################################################ +from __future__ import division, print_function, absolute_import, unicode_literals + +import json +import struct +import h5py +from warnings import warn +import sys +import numpy as np +import os + + +from sidpy import Reader +from sidpy.sid import Dimension, Dataset + +__all__ = ["NionReader", "version"] + +version = '0.1beta' + +debugLevel = 0 # 0=none, 1-3=basic, 4-5=simple, 6-10 verbose + +if sys.version_info.major == 3: + unicode = str + +# ### utility functions ### + + +def parse_zip(fp): + """ + Parse the zip file headers at fp + :param fp: the file pointer from which to parse the zip file + :return: A tuple of local files, directory headers, and end of central directory + The local files are dictionary where the keys are the local file offset and the + values are each a tuple consisting of the name, data position, data length, and crc32. + The directory headers are a dictionary where the keys are the names of the files + and the values are a tuple consisting of the directory header position, and the + associated local file position. + The end of central directory is a tuple consisting of the location of the end of + central directory header and the location of the first directory header. + This method will seek to location 0 of fp and leave fp at end of file. + + This function is copied from nionswift/nion/swift/model/NDataHandler.py + + """ + local_files = {} + dir_files = {} + eocd = None + fp.seek(0) + while True: + pos = fp.tell() + signature = struct.unpack('I', fp.read(4))[0] + if signature == 0x04034b50: + fp.seek(pos + 14) + crc32 = struct.unpack('I', fp.read(4))[0] + fp.seek(pos + 18) + data_len = struct.unpack('I', fp.read(4))[0] + fp.seek(pos + 26) + name_len = struct.unpack('H', fp.read(2))[0] + extra_len = struct.unpack('H', fp.read(2))[0] + name_bytes = fp.read(name_len) + fp.seek(extra_len, os.SEEK_CUR) + data_pos = fp.tell() + fp.seek(data_len, os.SEEK_CUR) + local_files[pos] = (name_bytes, data_pos, data_len, crc32) + elif signature == 0x02014b50: + fp.seek(pos + 28) + name_len = struct.unpack('H', fp.read(2))[0] + extra_len = struct.unpack('H', fp.read(2))[0] + comment_len = struct.unpack('H', fp.read(2))[0] + fp.seek(pos + 42) + pos2 = struct.unpack('I', fp.read(4))[0] + name_bytes = fp.read(name_len) + fp.seek(pos + 46 + name_len + extra_len + comment_len) + dir_files[name_bytes] = (pos, pos2) + elif signature == 0x06054b50: + fp.seek(pos + 16) + pos2 = struct.unpack('I', fp.read(4))[0] + eocd = (pos, pos2) + break + else: + raise IOError() + return local_files, dir_files, eocd + + +class NionReader(Reader): + + def __init__(self, file_path, verbose=False): + """ + file_path: filepath to dm3 file. + """ + warn('This Reader will eventually be moved to the ScopeReaders package' + '. Be prepared to change your import statements', + FutureWarning) + + super(NionReader, self).__init__(file_path) + + # initialize variables ## + self.verbose = verbose + self.__filename = file_path + + path, file_name = os.path.split(self.__filename) + self.basename, self.extension = os.path.splitext(file_name) + self.data_cube = None + self.original_metadata = {} + if self.extension == '.ndata': + + # - open file for reading + try: + self.__f = open(self.__filename, "rb") + except FileNotFoundError: + raise FileNotFoundError('File not found') + try: + local_files, dir_files, eocd = parse_zip(self.__f) + except IOError: + raise IOError("File {} does not seem to be of Nion`s .ndata format".format(self.__filename)) + self.__f.close() + elif self.extension == '.h5': + try: + fp = h5py.File(self.__filename, mode='a') + if 'data' not in fp: + raise IOError("File {} does not seem to be of Nion`s .h5 format".format(self.__filename)) + fp.close() + except IOError: + raise IOError("File {} does not seem to be of Nion`s .h5 format".format(self.__filename)) + + def read(self): + if self.extension == '.ndata': + try: + self.__f = open(self.__filename, "rb") + except FileNotFoundError: + raise FileNotFoundError('File not found') + local_files, dir_files, eocd = parse_zip(self.__f) + + contains_data = b"data.npy" in dir_files + contains_metadata = b"metadata.json" in dir_files + file_count = contains_data + contains_metadata # use fact that True is 1, False is 0 + + self.__f.seek(local_files[dir_files[b"data.npy"][1]][1]) + + self.data_cube = np.load(self.__f) + + json_pos = local_files[dir_files[b"metadata.json"][1]][1] + json_len = local_files[dir_files[b"metadata.json"][1]][2] + self.__f.seek(json_pos) + json_properties = self.__f.read(json_len) + + self.original_metadata = json.loads(json_properties.decode("utf-8")) + self.__f.close() + elif self.extension == '.h5': + # TODO: use lazy load for large datasets + self.__f = h5py.File(self.__filename, 'a') + if 'data' in self.__f: + json_properties = self.__f['data'].attrs.get("properties", "") + self.data_cube = self.__f['data'][:] + self.original_metadata = json.loads(json_properties) + + dataset = Dataset.from_array(self.data_cube) + + dataset.original_metadata = self.original_metadata + if 'dimensional_calibrations' in dataset.original_metadata: + for dim in dataset.original_metadata['dimensional_calibrations']: + if dim['units'] == '': + dim['units'] = 'pixels' + + dataset.quantity = 'intensity' + dataset.units = 'counts' + if 'description' in dataset.original_metadata: + dataset.title = dataset.original_metadata['description']['title'] + else: + if 'title' in dataset.original_metadata: + dataset.title = dataset.original_metadata['title'] + else: + path, file_name = os.path.split(self.__filename) + basename, extension = os.path.splitext(file_name) + dataset.title = basename + + if 'data_source' in dataset.original_metadata: + dataset.source = dataset.original_metadata['data_source'] + else: + dataset.source = 'NionReader' + + self.set_dimensions(dataset) + + self.set_data_type(dataset) + + dataset.modality = 'generic' + + return dataset + + def set_data_type(self, dataset): + + spectral_dim = False + for axis in dataset.axes.values(): + if axis.dimension_type == 'spectral': + spectral_dim = True + + if len(dataset.shape) > 3: + raise NotImplementedError('Data_type not implemented yet') + elif len(dataset.shape) == 3: + if spectral_dim: + dataset.data_type = 'spectrum_image' + else: + dataset.data_type = 'image_stack' + for dim,axis in dataset.axes.items(): + if axis.dimension_type != 'spatial': + dataset.set_dimension(int(dim), Dimension('frame', axis.values, units='frame', + quantity='stack', + dimension_type='frame')) + break + elif len(dataset.shape) == 2: + if spectral_dim: + dataset.data_type = 'spectrum_image' + else: + dataset.data_type = 'image' + elif len(dataset.shape) == 1: + if spectral_dim: + dataset.data_type = 'spectrum' + else: + dataset.data_type = 'line_plot' + + def set_dimensions(self, dataset): + dic = dataset.original_metadata + + reciprocal_name = 'u' + spatial_name = 'x' + + if 'dimensional_calibrations' in dic: + for dim in range(len(dic['dimensional_calibrations'])): + dimension_tags = dic['dimensional_calibrations'][dim] + units = dimension_tags['units'] + values = (np.arange(dataset.shape[int(dim)]) - dimension_tags['offset']) * dimension_tags['scale'] + + if 'eV' == units: + dataset.set_dimension(int(dim), Dimension('energy_loss', values, units=units, + quantity='energy-loss', dimension_type='spectral')) + elif 'eV' in units: + dataset.set_dimension(int(dim), Dimension('energy', values, units=units, + quantity='energy', dimension_type='spectral')) + elif '1/' in units or units in ['mrad', 'rad']: + dataset.set_dimension(int(dim), Dimension(reciprocal_name, values, units=units, + quantity='reciprocal distance', + dimension_type='reciprocal')) + reciprocal_name = chr(ord(reciprocal_name) + 1) + elif 'nm' in units: + dataset.set_dimension(int(dim), Dimension(spatial_name, values, units=units, + quantity='distance', dimension_type='spatial')) + spatial_name = chr(ord(spatial_name) + 1) + + + def get_filename(self): + return self.__filename + + filename = property(get_filename) + + def get_raw(self): + return self.data + + data = property(get_raw) + + def get_tags(self): + return self.original_metadata + + tags = property(get_tags) diff --git a/examples/plot_dm3_reader.py b/examples/plot_dm3_reader.py new file mode 100644 index 0000000..22a0f47 --- /dev/null +++ b/examples/plot_dm3_reader.py @@ -0,0 +1,77 @@ +""" +====================================== +Reader for proprietary dm3 file format +====================================== + +**Gerd Duscher** + +9/08/2020 + +This document illustrates an example of extracting data out of dm3 +(Digirtal Micrograph) file. + + +Introduction +------------ +Digital Micrograph from Gatan runs on many TEMs for data acquisition. +We read and plot such files here. + +Import all necessary packages +============================= +There are a few setup procedures that need to be followed before any code is written. In this step, we simply load a +few python packages that will be necessary in the later steps. +""" +import numpy as np +import matplotlib.pyplot as plt +#import file_tools_nsid as ft + + +import sys +sys.path.append('../../sidpy') +from sidpy.io.interface_utils import openfile_dialog, get_QT_app +sys.path.append('../../pyNSID') +import pyNSID +sys.path.append('../') +from ScopeReaders.em.tem.dm3_reader import DM3Reader + +#################################################################################### +# Open a file dialog +# =================== +# Here we select the name of the file to open. We will be using the sidpy interface to do that. +# We start QT as a backend for the dialog first (in a notebook the magic command ``%gui qt5``) + +app = get_QT_app() + +# Then we can open QT file dialog to select a file + +file_name = openfile_dialog() +print(file_name) + +# catch a bad selection or cancelling of file selection +if len(file_name)<3 or file_name[-4:]!='.dm3': + print('File not supported') + exit() + +#################################################################################### + +#################################################################################### +# Read file +# ========= +# We use the ScopeReader to read the file into a sidpy dataset. +# All metadata (absolutely everything) is saved in the ``original_metadata`` attribute +# of the sidpy Dataset. If the selected file is not a dm3 File you get an ``IOError``. + +dm3_reader = DM3Reader(file_name) +dataset = dm3_reader.read() + +#################################################################################### + +################################################################################### +# Plot file +# ========== +# Only one command is necessary to plot the file. + +dataset.plot() + +#################################################################################### + diff --git a/examples/plot_nion_reader.py b/examples/plot_nion_reader.py new file mode 100644 index 0000000..24f9948 --- /dev/null +++ b/examples/plot_nion_reader.py @@ -0,0 +1,81 @@ +""" +====================================== +Reader for proprietary Nion file format +====================================== + +**Gerd Duscher** + +9/08/2020 + +This document illustrates an example of extracting data out of dm3 +(Digirtal Micrograph) file. + + +Introduction +------------ +The Nion Swift software stores the data in two different data formats with the extenson ``.ndata`` and ``.h5``. +Both can be read with the nion_reader of the ScopeReaders package +We read and plot such files here. + +Import all necessary packages +============================= +There are a few setup procedures that need to be followed before any code is written. In this step, we simply load a +few python packages that will be necessary in the later steps. +""" +import numpy as np +import matplotlib.pyplot as plt +#import file_tools_nsid as ft + + +import sys +sys.path.append('../../sidpy') +from sidpy.io.interface_utils import openfile_dialog, get_QT_app +sys.path.append('../../pyNSID') +import pyNSID +sys.path.append('../') +from ScopeReaders.em.tem.nion_reader import NionReader + +#################################################################################### +# Open a file dialog +# =================== +# Here we select the name of the file to open. We will be using the sidpy interface to do that. +# We start QT as a backend for the dialog first (in a notebook the magic command ``%gui qt5``) + +app = get_QT_app() + +# Then we can open QT file dialog to select a file + +file_name = openfile_dialog() +print(file_name) + +# catch a bad selection or cancelling of file selection +if len(file_name)<3: + print('File selection canceled') + exit() + + +#################################################################################### + +#################################################################################### +# Read file +# ========= +# We use the ScopeReader to read the file into a sidpy dataset. +# All metadata (absolutely everything) is saved in the ``original_metadata`` attribute +# of the sidpy Dataset. If the selected file is not a Nion File you get an ``IOError``. +# either you selected a file not with the right extension (``.h5`` or ``.ndata``) or the +# file is not consistent with the Swift file format. +nion_reader = NionReader(file_name) +dataset = nion_reader.read() +print(dataset) + +#################################################################################### + +################################################################################### +# Plot file +# ========== +# Only one command is necessary to plot the file. + +dataset.plot() + +#################################################################################### + From 5979d11d7d34eb228f21a625cd366a0aa6494f4c Mon Sep 17 00:00:00 2001 From: Gerd Duscher <50049264+gduscher@users.noreply.github.com> Date: Wed, 9 Sep 2020 18:43:46 -0400 Subject: [PATCH 2/3] Nion and dm3reader, with examples. Deleted nion_reader_example per request of Suhas --- examples/plot_nion_reader.py | 81 ------------------------------------ 1 file changed, 81 deletions(-) delete mode 100644 examples/plot_nion_reader.py diff --git a/examples/plot_nion_reader.py b/examples/plot_nion_reader.py deleted file mode 100644 index 24f9948..0000000 --- a/examples/plot_nion_reader.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -====================================== -Reader for proprietary Nion file format -====================================== - -**Gerd Duscher** - -9/08/2020 - -This document illustrates an example of extracting data out of dm3 -(Digirtal Micrograph) file. - - -Introduction ------------- -The Nion Swift software stores the data in two different data formats with the extenson ``.ndata`` and ``.h5``. -Both can be read with the nion_reader of the ScopeReaders package -We read and plot such files here. - -Import all necessary packages -============================= -There are a few setup procedures that need to be followed before any code is written. In this step, we simply load a -few python packages that will be necessary in the later steps. -""" -import numpy as np -import matplotlib.pyplot as plt -#import file_tools_nsid as ft - - -import sys -sys.path.append('../../sidpy') -from sidpy.io.interface_utils import openfile_dialog, get_QT_app -sys.path.append('../../pyNSID') -import pyNSID -sys.path.append('../') -from ScopeReaders.em.tem.nion_reader import NionReader - -#################################################################################### -# Open a file dialog -# =================== -# Here we select the name of the file to open. We will be using the sidpy interface to do that. -# We start QT as a backend for the dialog first (in a notebook the magic command ``%gui qt5``) - -app = get_QT_app() - -# Then we can open QT file dialog to select a file - -file_name = openfile_dialog() -print(file_name) - -# catch a bad selection or cancelling of file selection -if len(file_name)<3: - print('File selection canceled') - exit() - - -#################################################################################### - -#################################################################################### -# Read file -# ========= -# We use the ScopeReader to read the file into a sidpy dataset. -# All metadata (absolutely everything) is saved in the ``original_metadata`` attribute -# of the sidpy Dataset. If the selected file is not a Nion File you get an ``IOError``. -# either you selected a file not with the right extension (``.h5`` or ``.ndata``) or the -# file is not consistent with the Swift file format. -nion_reader = NionReader(file_name) -dataset = nion_reader.read() -print(dataset) - -#################################################################################### - -################################################################################### -# Plot file -# ========== -# Only one command is necessary to plot the file. - -dataset.plot() - -#################################################################################### - From 660d49b2931bd675e86664bd540925c1c24035cf Mon Sep 17 00:00:00 2001 From: Gerd Duscher <50049264+gduscher@users.noreply.github.com> Date: Wed, 9 Sep 2020 18:53:30 -0400 Subject: [PATCH 3/3] Nion and dm3reader, with examples. Deleted nion_reader_example per request of Suhas Cleaned-up some code Plotting works only with sidpy --- examples/plot_dm3_reader.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/examples/plot_dm3_reader.py b/examples/plot_dm3_reader.py index 22a0f47..5169dad 100644 --- a/examples/plot_dm3_reader.py +++ b/examples/plot_dm3_reader.py @@ -8,7 +8,7 @@ 9/08/2020 This document illustrates an example of extracting data out of dm3 -(Digirtal Micrograph) file. +(Digital Micrograph) file. Introduction @@ -23,14 +23,10 @@ """ import numpy as np import matplotlib.pyplot as plt -#import file_tools_nsid as ft - import sys sys.path.append('../../sidpy') from sidpy.io.interface_utils import openfile_dialog, get_QT_app -sys.path.append('../../pyNSID') -import pyNSID sys.path.append('../') from ScopeReaders.em.tem.dm3_reader import DM3Reader @@ -48,7 +44,7 @@ print(file_name) # catch a bad selection or cancelling of file selection -if len(file_name)<3 or file_name[-4:]!='.dm3': +if len(file_name) < 3 or file_name[-4:] != '.dm3': print('File not supported') exit() @@ -74,4 +70,3 @@ dataset.plot() #################################################################################### -