200行代码实现CNN卷积结果的可视化

吃醋不吃辣的雷儿 2023-03-28 原文

from PIL import Image
import os
import numpy as np
import torch
import torch.nn as nn
import copy
from torch.autograd import Variable
from torchvision import models
import matplotlib.cm as mpl_color_map


def preprocess(pil_im, resize=True):
    """
        Processes image for CNNs

    Args:
        PIL_img (PIL_img): PIL Image or numpy array to process
        resize_im (bool): Resize to 224 or not
    returns:
        im_as_var (torch variable): Variable that contains processed float tensor
    """
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225] # mean and std for RGB channels in ImageNet
    if type(pil_im) != Image.Image:
        pil_im = Image.fromarray(pil_im) # convert input image to Image.image
    if resize:
        pil_im = pil_im.resize((224, 224), Image.ANTIALIAS) # resize image as width 224 and height 224
    image_array = np.float32(pil_im)
    image_array = image_array.transpose(2, 0, 1) # transpose to (D, W, H) form
    
    for channel, _ in enumerate(image_array):
        image_array[channel] /= 255
        image_array[channel] -= mean[channel]
        image_array[channel] /= std[channel] # normalize image array
    
    image_tensor = torch.from_numpy(image_array).float()
    image_tensor.unsqueeze_(0) # add one channel shaped as 1, 3, 224, 224
    image_variable = Variable(image_tensor, requires_grad=True)
    return image_variable


def get_example_params(list_index):
    """
        Gets used variables for almost all visualizations, like the image, model etc.

    Args:
        example_index (int): Image id to use from examples

    returns:
        original_image (numpy arr): Original image read from the file
        prep_img (numpy_arr): Processed image
        file_name_to_export (string): File name to export the visualizations
        pretrained_model(Pytorch model): Model to use for the operations
    """
    examples = ['../input_images/cat10.png', '../input_images/cat134.png', '../input_images/dog10014.png', '../input_images/panda1.png', '../input_images/tiger1.png']
    img_path = examples[list_index]
    file_name_to_export = img_path[img_path.rfind('/')+1:img_path.rfind('.')]
    original_image = Image.open(img_path).convert('RGB') # open as RGB format
    prep_img = preprocess(original_image)
    pretrained_model = models.alexnet(pretrained = True)
    return (original_image, prep_img, file_name_to_export, pretrained_model)


def format_np_output(np_arr):
    """
        This is a (kind of) bandaid fix to streamline saving procedure.
        It converts all the outputs to the same format which is 3xWxH with using sucecssive if clauses.
        
    Args:
        im_as_arr (Numpy array): Matrix of shape 1xWxH or WxH or 3xWxH
    """
    if len(np_arr.shape) == 2:
        np_arr = np.expand_dims(np_arr, axis=0) # case 1: append one dimension
    if np_arr.shape[0] == 1:
        np_arr = np.repeat(np_arr, 3, axis=0) # case 2: 1xWxH --> 3xWxH
    if np_arr.shape[0] == 3:
        np_arr = np_arr.transpose(1, 2, 0) # case 3: WxHx3
    if np.max(np_arr) <= 1:
        np_arr = (np_arr * 255).astype(np.uint8) # case 4: if normalized then x255
    return np_arr


def save_img(im_to_save, save_path):
    """
        Saves a numpy matrix or PIL image as an image
        
    Args:
        im_as_arr (Numpy array): Matrix of shape DxWxH
        path (str): Path to the image
    """
    if isinstance(im_to_save, np.ndarray):
        im_to_save = format_np_output(im_to_save)
        im_to_save = Image.fromarray(im_to_save)
    im_to_save.save(save_path)
    
    
def apply_colormap_to_image(origin_img, activation_map, colormap_type):
    """
        Apply heatmap on image
        
    Args:
        org_img (PIL img): Original image
        activation_map (numpy arr): Activation map (grayscale) 0-255
        colormap_name (str): Name of the colormap
    """
    color_map = mpl_color_map.get_cmap(colormap_type) # get colormap of hsv format
    no_trans_heatmap = color_map(activation_map)
    
    heatmap = copy.deepcopy(no_trans_heatmap)
    heatmap[:, :, 3] = 0.4 # change alpha
    heatmap = Image.fromarray((heatmap * 255).astype(np.uint8)) # heatmap image
    no_trans_heatmap = Image.fromarray((no_trans_heatmap*255).astype(np.uint8)) # no_trans_heatmap image
    
    heatmap_on_image = Image.new("RGBA", origin_img.size)
    heatmap_on_image = Image.alpha_composite(heatmap_on_image, origin_img.convert("RGBA"))
    heatmap_on_image = Image.alpha_composite(heatmap_on_image, heatmap) # heatmap + original image
    return no_trans_heatmap, heatmap_on_image
    
    
def save_class_activation_images(origin_img, activation_map, file_name):
    """
        Save cam activation map and activation map on the original image

    Args:
        org_img (PIL img): Original image
        activation_map (numpy arr): Activation map (grayscale) 0-255
        file_name (str): File name of the exported image
    """
    if not os.path.exists("../results"):
        os.makedirs("../results")
    heatmap, heatmap_on_image = apply_colormap_to_image(origin_img, activation_map, "hsv")
    heatmap_path = os.path.join("../results", file_name + "heatmap.png")
    save_img(heatmap, heatmap_path)
    heatmap_on_image_path = os.path.join("../results", file_name + "heatmap_on_image.png")
    save_img(heatmap_on_image, heatmap_on_image_path)
    activation_path = os.path.join("../results", file_name + "activation_map.png")
    save_img(activation_map, activation_path)


class Camextractor():
    """
        Class activation map extractor: to extract the feature at target layer
    """
    
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = int(target_layer)
        self.gradient = None
        
    def save_gradient(self, grad):
        self.gradient = grad
        
    def conv_output(self, x):
        # forward pass and save conv result at target layer
        conv_out = None
        for layer_index, layer in self.model.features._modules.items():
            print("layer_index:", layer_index, "layer:", layer)
            x = layer(x) # forward for layer at layer_index
            if int(layer_index) == self.target_layer:
                x.register_hook(self.save_gradient) # register hook and save gradients
                conv_out = x
        return conv_out, x
    
    def forward_pass(self, x):
        # forward pass for the whole model
        conv_out, x = self.conv_output(x)
        x = x.view(x.size(0), -1) # flatten
        x = self.model.classifier(x) # classifier and if softmax added behind, then output probability of each class
        return conv_out, x
    
class Layercam():
    """
        Produces class activation map using LayerCam method
    """
    
    def __init__(self, model, target_layer):
        
        self.model = model
        self.model.eval() # evaluation patten, not to activate BatchNorm and Dropout
        self.target_layer = int(target_layer)
        self.extractor = Camextractor(self.model, self.target_layer)
        
    def generate_cam(self, input_image):
        
        conv_out, model_out = self.extractor.forward_pass(input_image) # forward pass and save conv result at target layer
        target_class = np.argmax(model_out.data.numpy()) # classify and get the result with maximum probability
        one_hot_out = torch.FloatTensor(1, model_out.size()[-1]).zero_()
        one_hot_out[0][target_class] = 1 # target for back propagation
        self.model.features.zero_grad()
        self.model.classifier.zero_grad() # zero gradient
        model_out.backward(gradient = one_hot_out, retain_graph = True)
        target_out = conv_out.data.numpy()[0] # target layer output
        weight = self.extractor.gradient.data.numpy()[0] # weight for gradient
        weight[weight < 0] = 0 # relu
        cam = np.sum(weight * target_out, axis=0) # element multiply between weight and target layer output, then sum
        cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam)) # normalize cam to [0, 1]
        cam = np.uint8(cam * 255) # [0, 255]
        cam = np.uint8(Image.fromarray(cam).resize((input_image.shape[2], input_image.shape[3]), Image.ANTIALIAS)) / 255
        return cam
    
    
if __name__ == "__main__":
    
    target_example = 4  # Tiger '../input_images/tiger1.png'
    (original_image, prep_img, file_name_to_export, pretrained_model) = get_example_params(target_example)
    layercam = Layercam(pretrained_model, target_layer=9)
    cam = layercam.generate_cam(prep_img)
    save_class_activation_images(original_image, cam, file_name_to_export)
    print('Layer cam completed')

tiger1heatmap_on_image.png

tiger1heatmap.png

tiger1activation_map.png

tiger1.png

图一到图四分别为：热力图+原图、热力图、cam、原图。
这里推荐：
https://github.com/utkuozbulak/pytorch-cnn-visualizations
本篇是根据该github上的layercam方法魔改的，也就是把常用的函数封装到了一个文件里，并且不需要对target class的预测，而是根据输入图片自行调用训练好的alexnet进行预测，取预测概率最大的类别作为输出，而且可以随意调用本地的图片进行预测，该本地图片最好来自于ImageNet且resize为224x224的。imageresize的代码很简单，调用Image库几行代码即可此处不再粘贴。

有关200行代码实现CNN卷积结果的可视化的更多相关文章

ruby - 如何在 buildr 项目中使用 Ruby 代码？ - 2
如何在buildr项目中使用Ruby？我在很多不同的项目中使用过Ruby、JRuby、Java和Clojure。我目前正在使用我的标准Ruby开发一个模拟应用程序，我想尝试使用Clojure后端(我确实喜欢功能代码)以及JRubygui和测试套件。我还可以看到在未来的不同项目中使用Scala作为后端。我想我要为我的项目尝试一下buildr(http://buildr.apache.org/)，但我注意到buildr似乎没有设置为在项目中使用JRuby代码本身!这看起来有点傻，因为该工具旨在统一通用的JVM语言并且是在ruby中构建的。除了将输出的jar包含在一个独特的、仅限ruby
ruby-on-rails - Rails 源代码 : initialize hash in a weird way? - 2
在rails源中:https://github.com/rails/rails/blob/master/activesupport/lib/active_support/lazy_load_hooks.rb可以看到以下内容@load_hooks=Hash.new{|h,k|h[k]=[]}在IRB中，它只是初始化一个空哈希。和做有什么区别@load_hooks=Hash.new 最佳答案查看rubydocumentationforHashnew→new_hashclicktotogglesourcenew(obj)→new_has
ruby - 如何根据特征实现 FactoryGirl 的条件行为 - 2
我有一个用户工厂。我希望默认情况下确认用户。但是鉴于unconfirmed特征，我不希望它们被确认。虽然我有一个基于实现细节而不是抽象的工作实现，但我想知道如何正确地做到这一点。factory:userdoafter(:create)do|user,evaluator|#unwantedimplementationdetailshereunlessFactoryGirl.factories[:user].defined_traits.map(&:name).include?(:unconfirmed)user.confirm!endendtrait:unconfirmeddoenden
ruby-on-rails - 浏览 Ruby 源代码 - 2
我的主要目标是能够完全理解我正在使用的库/gem。我尝试在Github上从头到尾阅读源代码，但这真的很难。我认为更有趣、更温和的踏脚石就是在使用时阅读每个库/gem方法的源代码。例如，我想知道RubyonRails中的redirect_to方法是如何工作的:如何查找redirect_to方法的源代码？我知道在pry中我可以执行类似show-methodmethod的操作，但我如何才能对Rails框架中的方法执行此操作？您对我如何更好地理解Gem及其API有什么建议吗？仅仅阅读源代码似乎真的很难，尤其是对于框架。谢谢! 最佳答案 Ru
ruby - 模块嵌套代码风格偏好 - 2
我的假设是moduleAmoduleBendend和moduleA::Bend是一样的。我能够从thisblog找到解决方案,thisSOthread和andthisSOthread.为什么以及什么时候应该更喜欢紧凑语法A::B而不是另一个，因为它显然有一个缺点？我有一种直觉，它可能与性能有关，因为在更多命名空间中查找常量需要更多计算。但是我无法通过对普通类进行基准测试来验证这一点。最佳答案这两种写作方法经常被混淆。首先要说的是，据我所知，没有可衡量的性能差异。(在下面的书面示例中不断查找)最明显的区别，可能也是最著名的，是你的
ruby - Ruby 中的波形可视化 - 2
我即将开始一个将录制和编辑音频文件的项目，我正在寻找一个好的库(最好是Ruby，但会考虑Java或.NET以外的任何库)以进行实时可视化波形。有人知道我应该从哪里开始搜索吗？最佳答案要流入浏览器的数据量很大。Flash或Flex图表可能是唯一能提高内存效率的解决方案。Javascript图表往往会因大型数据集而崩溃。关于ruby-Ruby中的波形可视化，我们在StackOverflow上找到一个类似的问题： https://stackoverflow.c
ruby - 寻找通过阅读代码确定编程语言的ruby gem？ - 2
几个月前，我读了一篇关于rubygem的博客文章，它可以通过阅读代码本身来确定编程语言。对于我的生活，我不记得博客或gem的名称。谷歌搜索“ruby编程语言猜测”及其变体也无济于事。有人碰巧知道相关gem的名称吗？最佳答案是这个吗:http://github.com/chrislo/sourceclassifier/tree/master 关于ruby-寻找通过阅读代码确定编程语言的rubygem？，我们在StackOverflow上找到一个类似的问题：
ruby - Net::HTTP 获取源代码和状态 - 2
我目前正在使用以下方法获取页面的源代码:Net::HTTP.get(URI.parse(page.url))我还想获取HTTP状态，而无需发出第二个请求。有没有办法用另一种方法做到这一点？我一直在查看文档，但似乎找不到我要找的东西。最佳答案在我看来，除非您需要一些真正的低级访问或控制，否则最好使用Ruby的内置Open::URI模块:require'open-uri'io=open('http://www.example.org/')#=>#body=io.read[0,50]#=>"["200","OK"]io.base_ur
报告回顾丨模型进化狂飙，DetectGPT能否识别最新模型生成结果？ - 2
导读语言模型给我们的生产生活带来了极大便利，但同时不少人也利用他们从事作弊工作。如何规避这些难辨真伪的文字所产生的负面影响也成为一大难题。在3月9日智源Live第33期活动「DetectGPT：判断文本是否为机器生成的工具」中，主讲人Eric为我们讲解了DetectGPT工作背后的思路——一种基于概率曲率检测的用于检测模型生成文本的工具，它可以帮助我们更好地分辨文章的来源和可信度，对保护信息真实、防止欺诈等方面具有重要意义。本次报告主要围绕其功能，实现和效果等展开。（文末点击“阅读原文”，查看活动回放。）Ericmitchell斯坦福大学计算机系四年级博士生，由ChelseaFinn和Chri
程序员如何提高代码能力？ - 2
前言作为一名程序员，自己的本质工作就是做程序开发，那么程序开发的时候最直接的体现就是代码，检验一个程序员技术水平的一个核心环节就是开发时候的代码能力。众所周知，程序开发的水平提升是一个循序渐进的过程，每一位程序员都是从“菜鸟”变成“大神”的，所以程序员在程序开发过程中的代码能力也是根据平时开发中的业务实践来积累和提升的。提高代码能力核心要素程序员要想提高自身代码能力，尤其是新晋程序员的代码能力有很大的提升空间的时候，需要针对性的去提高自己的代码能力。提高代码能力其实有几个比较关键的点，只要把握住这些方面，就能很好的、快速的提高自己的一部分代码能力。1、多去阅读开源项目，如有机会可以亲自参与开源

200行代码实现CNN卷积结果的可视化

有关200行代码实现CNN卷积结果的可视化的更多相关文章

随机推荐