Source code for trixi.logger.experiment.pytorchexperimentlogger

from __future__ import print_function

import atexit
import fnmatch
import os
import warnings
from multiprocessing import Process
# import cv2
from PIL import Image

import torch
import numpy as np

from trixi.logger.abstractlogger import threaded
from trixi.logger.experiment import ExperimentLogger
from trixi.logger.file.pytorchplotfilelogger import PytorchPlotFileLogger
from trixi.util import name_and_iter_to_filename
from trixi.util.metrics import get_classification_metrics, get_pr_curve, get_roc_curve
from trixi.util.pytorchutils import update_model, get_vanilla_image_gradient, get_guided_image_gradient, \
    get_smooth_image_gradient, get_input_gradient


[docs]class PytorchExperimentLogger(ExperimentLogger):
    """
    A single class for logging your pytorch experiments to file.
    Extends the ExperimentLogger also also creates a experiment folder with a file structure:

    The folder structure is :
        base_dir/
            new_experiment_folder/
                checkpoint/
                config/
                img/
                log/
                plot/
                result/
                save/


    """

    def __init__(self, *args, **kwargs):
        """Initializes the PytorchExperimentLogger and parses the arguments to the ExperimentLogger"""

        super(PytorchExperimentLogger, self).__init__(*args, **kwargs)
        self.plot_logger = PytorchPlotFileLogger(self.img_dir, self.plot_dir)

[docs]    def show_images(self, images, name, **kwargs):
        """
        Saves images in the img folder

        Args:
            images: The images to be saved
            name: file name of the new image file

        """
        self.plot_logger.show_images(images, name, **kwargs)

[docs]    def show_image_grid(self, image, name, **kwargs):
        """
        Saves images in the img folder as a image grid

        Args:
            images: The images to be saved
            name: file name of the new image file

        """
        self.plot_logger.show_image_grid(image, name, **kwargs)

[docs]    def show_image_grid_heatmap(self, heatmap, background=None, name="heatmap", **kwargs):
        """
        Saves images in the img folder as a image grid

        Args:
            heatmap: The images to be converted to a heatmap
            background: Context of the heatmap (to be underlayed)
            name: file name of the new image file

        """
        self.plot_logger.show_image_grid_heatmap(heatmap=heatmap, background=background, name=name, **kwargs)

[docs]    def show_video(self, frame_list=None, name="video", dim="LxHxWxC", scale=1.0, fps=25,
                   extension=".mp4", codec="THEO"):
        """
        Saves video in the img folder. Should be a list of arrays with dimension HxWxC.

        Args:
            frame_list: The list of image tensors/arrays to be saved as a video
            name: Filename of the video
            dim: Dimension of the tensor - should be either LxHxWxC or LxCxHxW
            fps: FPS of the video
            extension: File extension - should be mp4, ogc, avi or webm
        """
        # TODO: trixi browser currently can't show videos, so using GIF instead - work in progress
        self.show_gif(frame_list, name=name, scale=scale, fps=fps)
        """
        tensor = np.array(frame_list)
        assert tensor.ndim == 4, "video should be a 4d tensor"
        assert dim == "LxHxWxC" or  dim == "LxCxHxW", "dimension argument should be LxHxWxC or LxCxHxW"
        if dim == "LxCxHxW":
            tensor = tensor.transpose([0, 2, 3, 1])
        filename = os.path.join(self.img_dir, name + extension)
        fourcc = cv2.VideoWriter_fourcc(*codec)
        writer = cv2.VideoWriter(filename, fourcc, fps, (tensor.shape[2], tensor.shape[1]))
        assert writer.isOpened(), "video writer could not be opened"
        for i in range(tensor.shape[0]):
            writer.write(tensor[i, :, :, :])
        writer.release()
        writer = None
        """

[docs]    def show_gif(self, frame_list=None, name="frames", scale=1.0, fps=25):
        """
        Saves gif in the img folder. Should be a list of arrays with dimension HxWxC.

        Args:
            frame_list: The list of image tensors/arrays to be saved as a gif
            name: Filename of the gif
            scale: Scaling factor of the individual frames
            fps: FPS of the gif
        """
        w, h = Image.fromarray(np.uint8(frame_list[0])).size
        image_list = []
        for i in range(len(frame_list)):
            image_list.append(Image.fromarray(np.uint8(frame_list[i])).resize((w * int(scale), h * int(scale))))
        filename = os.path.join(self.img_dir, name + ".gif")
        image_list[0].save(filename, save_all=True, append_images=image_list[1:], duration=int(1e3 / fps), loop=0)

    @staticmethod
    @threaded
    def save_model_static(model, model_dir, name):
        """
        Saves a pytorch model in a given directory (using pytorch)

        Args:
            model: The model to be stored
            model_dir: The directory in which the model file should be written
            name: The file name of the model file

        """

        model_file = os.path.join(model_dir, name)
        torch.save(model.state_dict(), model_file)

[docs]    def save_model(self, model, name, n_iter=None, iter_format="{:05d}", prefix=False):
        """
        Saves a pytorch model in the model directory of the experiment folder

        Args:
            model: The model to be stored
            name: The file name of the model file
            n_iter: The iteration number, formatted with the iter_format and added to the model name (if not None)
            iter_format: The format string, which indicates how n_iter will be formated as a string
            prefix: If True, the formated n_iter will be appended as a prefix, otherwise as a suffix

        """

        if n_iter is not None:
            name = name_and_iter_to_filename(name,
                                             n_iter,
                                             ".pth",
                                             iter_format=iter_format,
                                             prefix=prefix)

        if not name.endswith(".pth"):
            name += ".pth"

        self.save_model_static(model=model,
                               model_dir=self.checkpoint_dir,
                               name=name)

    @staticmethod
    @threaded
    def load_model_static(model, model_file, exclude_layers=(), warnings=True):
        """
        Loads a pytorch model from a given directory (using pytorch)


        Args:
            model: The model to be loaded (whose parameters should be restored)
            model_file: The file from which the model parameters should be loaded
            exclude_layers: List of layer names which should be excluded from restoring
            warnings (bool): Flag which indicates if method should warn if not everything went perfectly

        """

        if os.path.exists(model_file):

            pretrained_dict = torch.load(model_file, map_location=lambda storage, loc: storage)
            update_model(model, pretrained_dict, exclude_layers, warnings)
            return model

        else:

            raise IOError("Model file does not exist!")

[docs]    def load_model(self, model, name, exclude_layers=(), warnings=True):
        """
        Loads a pytorch model from the model directory of the experiment folder


        Args:
            model: The model to be loaded (whose parameters should be restored)
            name: The file name of the model file
            exclude_layers: List of layer names which should be excluded from restoring
            warnings: Flag which indicates if method should warn if not everything went perfectlys


        """

        if not name.endswith(".pth"):
            name += ".pth"

        self.load_model_static(model=model,
                               model_file=os.path.join(self.checkpoint_dir, name),
                               exclude_layers=exclude_layers,
                               warnings=warnings)

    @staticmethod
    @threaded
    def save_checkpoint_static(checkpoint_dir, name, move_to_cpu=False, **kwargs):
        """
        Saves a checkpoint/dict in a given directory (using pytorch)

        Args:
            checkpoint_dir: The directory in which the checkpoint file should be written
            name: The file name of the checkpoint file
            move_to_cpu (bool): Flag, if all pytorch tensors should be moved to cpu before storing
            **kwargs: dict which is actually saved

        """
        for key, value in kwargs.items():
            if isinstance(value, torch.nn.Module) or isinstance(value, torch.optim.Optimizer):
                kwargs[key] = value.state_dict()

        checkpoint_file = os.path.join(checkpoint_dir, name)

        def to_cpu(obj):
            if hasattr(obj, "cpu"):
                return obj.cpu()
            elif isinstance(obj, dict):
                return {key: to_cpu(val) for key, val in obj.items()}
            else:
                return obj

        if move_to_cpu:
            torch.save(to_cpu(kwargs), checkpoint_file)
        else:
            torch.save(kwargs, checkpoint_file)

[docs]    def save_checkpoint(self, name, n_iter=None, iter_format="{:05d}", prefix=False, **kwargs):
        """
        Saves a checkpoint in the checkpoint directory of the experiment folder

        Args:
            name: The file name of the checkpoint file
            n_iter: The iteration number, formatted with the iter_format and added to the checkpoint name (if not None)
            iter_format: The format string, which indicates how n_iter will be formated as a string
            prefix: If True, the formated n_iter will be appended as a prefix, otherwise as a suffix
            **kwargs:  dict which is actually saved (key=name, value=variable to be stored)

        """

        if n_iter is not None:
            name = name_and_iter_to_filename(name,
                                             n_iter,
                                             ".pth.tar",
                                             iter_format=iter_format,
                                             prefix=prefix)

        if not name.endswith(".pth.tar"):
            name += ".pth.tar"

        self.save_checkpoint_static(self.checkpoint_dir, name=name, **kwargs)

[docs]    @staticmethod
    def load_checkpoint_static(checkpoint_file, exclude_layer_dict=None, warnings=True, **kwargs):
        """
        Loads a checkpoint/dict in a given directory (using pytorch)

        Args:
            checkpoint_file: The checkpoint from which the checkpoint/dict should be loaded
            exclude_layer_dict: A dict with key 'model_name' and a list of all layers of 'model_name' which should
            not be restored
            warnings: Flag which indicates if method should warn if not everything went perfectlys
            **kwargs: dict which is actually loaded (key=name (used to save the checkpoint) , value=variable to be
            loaded/ overwritten)

        Returns: The kwargs dict with the loaded/ overwritten values

        """

        if exclude_layer_dict is None:
            exclude_layer_dict = {}

        checkpoint = torch.load(checkpoint_file, map_location=lambda storage, loc: storage)

        for key, value in kwargs.items():
            if key in checkpoint:
                if isinstance(value, torch.nn.Module) or isinstance(value, torch.optim.Optimizer):
                    exclude_layers = exclude_layer_dict.get(key, [])
                    update_model(value, checkpoint[key], exclude_layers, warnings)
                else:
                    kwargs[key] = checkpoint[key]

        return kwargs

[docs]    def load_checkpoint(self, name, exclude_layer_dict=None, warnings=True, **kwargs):
        """
        Loads a checkpoint from the checkpoint directory of the experiment folder

        Args:
            name: The name of the checkpoint file
            exclude_layer_dict: A dict with key 'model_name' and a list of all layers of 'model_name' which should
            not be restored
            warnings: Flag which indicates if method should warn if not everything went perfectlys
            **kwargs: dict which is actually loaded (key=name (used to save the checkpoint) , value=variable to be
            loaded/ overwritten)

        Returns: The kwargs dict with the loaded/ overwritten values

        """

        if not name.endswith(".pth.tar"):
            name += ".pth.tar"

        checkpoint_file = os.path.join(self.checkpoint_dir, name)
        return self.load_checkpoint_static(checkpoint_file=checkpoint_file,
                                           exclude_layer_dict=exclude_layer_dict,
                                           warnings=warnings,
                                           **kwargs)

[docs]    def save_at_exit(self, name="checkpoint_end", **kwargs):
        """
        Saves a dict as checkpoint if the program exits (not garanteed to work 100%)

        Args:
            name: Name of the checkpoint file
            **kwargs: dict which is actually saved (key=name, value=variable to be stored)

        """

        if not name.endswith(".pth.tar"):
            name += ".pth.tar"

        def save_fnc():
            self.save_checkpoint(name, **kwargs)
            print("Checkpoint saved securely... =)")

        atexit.register(save_fnc)

[docs]    def get_save_checkpoint_fn(self, name="checkpoint", **kwargs):
        """
        A function which returns a function which takes n_iter as arguments and saves the current values of the
        variables given as kwargs as a checkpoint file.


        Args:
            name: Base-name of the checkpoint file
            **kwargs:  dict which is actually saved, when the returned function is called

        Returns: Function which takes n_iter as arguments and saves a checkpoint file
        """

        def save_fnc(n_iter, iter_format="{:05d}", prefix=False):
            self.save_checkpoint(name=name,
                                 n_iter=n_iter,
                                 iter_format=iter_format,
                                 prefix=prefix,
                                 **kwargs)

        return save_fnc

[docs]    @staticmethod
    def load_last_checkpoint_static(dir_, name=None, **kwargs):
        """
        Loads the (alphabetically) last checkpoint file in a given directory

        Args:
            dir_: The directory to look for the (alphabetically) last checkpoint
            name: String pattern which indicates the files to look form
            **kwargs: dict which is actually loaded (key=name (used to save the checkpoint) , value=variable to be
            loaded/ overwritten)

        Returns:  The kwargs dict with the loaded/ overwritten values

        """

        if name is None:
            name = "*checkpoint*.pth.tar"

        checkpoint_files = []

        for root, dirs, files in os.walk(dir_):
            for filename in fnmatch.filter(files, name):
                checkpoint_file = os.path.join(root, filename)
                checkpoint_files.append(checkpoint_file)

        if len(checkpoint_files) == 0:
            return None

        last_file = sorted(checkpoint_files, reverse=True)[0]

        return PytorchExperimentLogger.load_checkpoint_static(last_file, **kwargs)

[docs]    def load_last_checkpoint(self, **kwargs):
        """
                Loads the (alphabetically) last checkpoint file in the checkpoint directory in the experiment folder

                Args:
                    **kwargs: dict which is actually loaded (key=name (used to save the checkpoint) , value=variable to be
                    loaded/ overwritten)

                Returns:  The kwargs dict with the loaded/ overwritten values

                """
        return self.load_last_checkpoint_static(self.checkpoint_dir, **kwargs)

[docs]    def print(self, *args):
        """
        Prints the given arguments using the text logger print function

        Args:
            *args: Things to be printed

        """
        self.text_logger.print(*args)

[docs]    @staticmethod
    def get_roc_curve(tensor, labels, reduce_to_n_samples=None, use_sub_process=False, results_fn=lambda
            x, *y, **z: None):
        """
        Displays a roc curve given a tensor with scores and the coresponding labels

        Args:
            tensor: Tensor with scores (e.g class probability )
            labels: Labels of the samples to which the scores match
            reduce_to_n_samples: Reduce/ downsample to to n samples for fewer data points
            use_sub_process: Use a sub process to do the processing, if true nothing is returned
            results_fn: function which is called with the results/ return values. Expected f(tpr, fpr)

        """
        warnings.warn("This method is deprecated !!! Please use the util.metrics method")
        return get_roc_curve(tensor, labels, reduce_to_n_samples, use_sub_process, results_fn)

[docs]    @staticmethod
    def get_pr_curve(tensor, labels, reduce_to_n_samples=None, use_sub_process=False,
                     results_fn=lambda x, *y, **z: None):
        """
        Displays a precision recall curve given a tensor with scores and the coresponding labels

        Args:
            tensor: Tensor with scores (e.g class probability )
            labels: Labels of the samples to which the scores match
            reduce_to_n_samples: Reduce/ downsample to to n samples for fewer data points
            use_sub_process: Use a sub process to do the processing, if true nothing is returned
            results_fn: function which is called with the results/ return values. Expected f(precision, recall)

        """
        warnings.warn("This method is deprecated !!! Please use the util.metrics method")
        return get_pr_curve(tensor, labels, reduce_to_n_samples, use_sub_process, results_fn)

[docs]    @staticmethod
    def get_classification_metrics(tensor, labels, name="", metric=("roc-auc", "pr-score"), use_sub_process=False,
                                   tag_name=None, results_fn=lambda x, *y, **z: None):
        """
        Displays some classification metrics as line plots in a graph (similar to show value (also uses show value
        for the caluclated values))

        Args:
            tensor: Tensor with scores (e.g class probability )
            labels: Labels of the samples to which the scores match
            name: The name of the window
            metric: List of metrics to calculate. Options are: roc-auc, pr-auc, pr-score, mcc, f1
            tag_name: Name for the tag, if no given use name
            use_sub_process: Use a sub process to do the processing, if true nothing is returned
            results_fn: function which is called with the results/ return values. Expected f(val, name, tag)

        Returns:

        """
        warnings.warn("This method is deprecated !!! Please use the util.metrics method")
        return get_classification_metrics(tensor, labels, name, metric, use_sub_process, tag_name, results_fn)

[docs]    @staticmethod
    def get_input_gradient(model, inpt, err_fn, grad_type="vanilla", n_runs=20, eps=0.1,
                           abs=False, results_fn=lambda x, *y, **z: None):
        """
        Given a model creates calculates the error and backpropagates it to the image and saves it (saliency map).

        Args:
            model: The model to be evaluated
            inpt: Input to the model
            err_fn: The error function the evaluate the output of the model on
            grad_type: Gradient calculation method, currently supports (vanilla, vanilla-smooth, guided,
            guided-smooth) ( the guided backprob can lead to segfaults -.-)
            n_runs: Number of runs for the smooth variants
            eps: noise scaling to be applied on the input image (noise is drawn from N(0,1))
            abs (bool): Flag, if the gradient should be a absolute value
            results_fn: function which is called with the results/ return values. Expected f(grads)

        """
        warnings.warn("This method is deprecated !!! Please use the util.pytorchutils method")
        return get_input_gradient(model, inpt, err_fn, grad_type, n_runs, eps, abs, results_fn)

[docs]    def show_image_gradient(self, name, *args, **kwargs):
        """
        Given a model creates calculates the error and backpropagates it to the image and saves it.

        Args:
            name: Name of the file
            model: The model to be evaluated
            inpt: Input to the model
            err_fn: The error function the evaluate the output of the model on
            grad_type: Gradient calculation method, currently supports (vanilla, vanilla-smooth, guided,
            guided-smooth) ( the guided backprob can lead to segfaults -.-)
            n_runs: Number of runs for the smooth variants
            eps: noise scaling to be applied on the input image (noise is drawn from N(0,1))
            abs (bool): Flag, if the gradient should be a absolute value


        """
        grad = self.get_input_gradient(*args, **kwargs)
        self.show_image_grid(grad, name)