Python · 2018/06/03

一部にピントを合わせた画像の生成(Python)

（著）山たー

次のような画像があるとする。

画像はMIT Saliency BenchmarkのデータセットCAT2000からお借りした。(Stimuli/OutdoorNatural/197.jpg)

これの右側のペンギンだけに注目した画像を生成する。

Foveated image(フォービエイテッド画像)と言ったりする。Foveateは対象を中心窩でとらえる、という意味で、要は注視点から離れるにつれてぼやけさせた画像である。VRではフォービエイテッド・レンダリング（Foveated Rendering）といって没入感を高めるのにも用いられているそうだ。

実際の処理は、ぼやけた画像と元画像を次のようなマスクに従って重ね合わせている。

マスクを今回はガウシアンにしたが、白黒の0~1の画像なら何でも適用可能である。

コード

import cv2
import numpy as np

def GaussianMask(sizex,sizey, sigma=50, center=None):
    x = np.arange(0, sizex, 1, float)
    y = np.arange(0, sizey, 1, float)
    x,y=np.meshgrid(x,y)
    #y = x[:,np.newaxis]

    if center is None:
        x0 = sizex // 2
        y0 = sizey // 2
    else:
        x0 = center[0]
        y0 = center[1]

    return np.exp(-4*np.log(2) * ((x-x0)**2 + (y-y0)**2) / sigma**2)

#画像を読み込み
imgdir="test.png"
img = cv2.imread(imgdir)

#画像サイズを取得
H,W = img.shape[:2]

#Gaussianマスクを作製(画像サイズ、ガウシアンの標準偏差、中央点)
mask = GaussianMask(W,H,100,(800,200))

#maskを3次元に拡張
alpha = np.repeat(mask[:, :, np.newaxis], 3, axis=2)

#平滑化をかけた画像
blured = cv2.GaussianBlur(img, (21,21), 11)

#平滑化した画像と元画像をalphaの割合で結合
blended = cv2.convertScaleAbs(img*alpha + blured*(1-alpha))

#画像を保存
cv2.imwrite("blended.jpg",blended)

#描画
cv2.imshow("blended", blended)
cv2.waitKey();cv2.destroyAllWindows()

基本的にはOpenCVを用いた。もう少しいい実装がありそうにも思える。

その他の実装

参考にしたサイトにあったソースコードに手を加えたもの。

実装例1

import cv2
import numpy as np

def alphaBlend(img1, img2, mask):
    """ 
    alphaBlend img1 and img 2 (of CV_8UC3) with mask (CV_8UC1 or CV_8UC3)
    """
    if mask.ndim==3 and mask.shape[-1] == 3:
        alpha = mask/255.0
    else:
        alpha = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)/255.0
    blended = cv2.convertScaleAbs(img1*alpha + img2*(1-alpha))
    return blended

#画像の読み込み
imgdir="test.png"
img = cv2.imread(imgdir)

H,W = img.shape[:2]

mask = np.zeros((H,W), np.uint8)

#mask shape is circle
cv2.circle(mask,
           (800, 200), #center
           100,        #radius
           (255,255,255), -1, cv2.LINE_AA)

mask = cv2.GaussianBlur(mask, (21,21),11 )

blured = cv2.GaussianBlur(img, (21,21), 11)

blended1 = alphaBlend(img, blured, mask)
#blended2 = alphaBlend(img, blured, 255- mask)

# 画像を保存
cv2.imwrite("blended1.png",blended1)
#cv2.imwrite("blended2.png",blended2)

cv2.imshow("blened1", blended1);
#cv2.imshow("blened2", blended2);
cv2.waitKey();cv2.destroyAllWindows()

結果：

円形のマスクをかけたもの。実はこっちの方が速い。

ガウシアンマスク　…　elapsed_time:0.10277056694030762[sec]

円形マスク　　　　…　elapsed_time:0.07920980453491211[sec]

よく見たらマスクの境界が分かりやすいという程度の違いなので、こっちの方が良いかも。

実装例2

import cv2
import numpy as np

#画像の場所
imgdir="test.png"
img = cv2.imread(imgdir) # Read in image

height = img.shape[0] # Get the dimensions
width = img.shape[1]

# Define mask
mask = 255*np.ones((height,width), dtype='uint8')

# Draw circle at x = 800, y = 200 of radius 100 and fill this in with 0
cv2.circle(mask, (800, 200), 100, 0, -1)    

# Apply distance transform to mask
out = cv2.distanceTransform(mask, cv2.DIST_L2, 3)

# Define scale factor
scale_factor = 10

# Create output image that is the same as the original
filtered = img.copy() 

# Create floating point copy for precision
img_float = img.copy().astype('float')

# Number of channels
if len(img_float.shape) == 3:
  num_chan = img_float.shape[2]
else:
  # If there is a single channel, make the images 3D with a singleton
  # dimension to allow for loop to work properly
  num_chan = 1
  img_float = img_float[:,:,None]
  filtered = filtered[:,:,None]

# For each pixel in the input...
for y in range(height):
  for x in range(width):

    # If distance transform is 0, skip
    if out[y,x] == 0.0:
      continue

    # Calculate M = d / S
    mask_val = np.ceil(out[y,x] / scale_factor)

    # If M is too small, set the mask size to the smallest possible value
    if mask_val <= 3:
      mask_val = 3

    # Get beginning and ending x and y coordinates for neighbourhood
    # and ensure they are within bounds
    beginx = x-int(mask_val/2)
    if beginx < 0:
      beginx = 0

    beginy = y-int(mask_val/2)
    if beginy < 0:
      beginy = 0

    endx = x+int(mask_val/2)
    if endx >= width:
      endx = width-1

    endy = y+int(mask_val/2)
    if endy >= height:
      endy = height-1

    # Get the coordinates of where we need to grab pixels
    xvals = np.arange(beginx, endx+1)
    yvals = np.arange(beginy, endy+1)
    (col_neigh,row_neigh) = np.meshgrid(xvals, yvals)
    col_neigh = col_neigh.astype('int')
    row_neigh = row_neigh.astype('int')

    # Get the pixels now
    # For each channel, do the foveation
    for j in range(num_chan):
      chan = img_float[:,:,j]
      pix = chan[row_neigh, col_neigh].ravel()

      # Calculate the average and set it to be the output
      filtered[y,x,j] = int(np.mean(pix))

# Remove singleton dimension if required for display and saving
if num_chan == 1:
  filtered = filtered[:,:,0]

# Save the image
cv2.imwrite("Output.png",filtered)

# Show the image
cv2.imshow('Output', filtered)
cv2.waitKey(0)
cv2.destroyAllWindows()

結果：