使用Python进行图像字幕

图像字幕是深度学习领域的一个非常经典且具有挑战性的问题，我们使用其属性生成图像的文本描述，但我们不会在这里使用深度学习。在本文中，我们将简单地学习如何使用 PIL 简单地为图像添加字幕。

图像预处理是Python PIL 库提供的一个很棒的实用程序。我们不仅可以更改大小、模式、方向，还可以在图像上绘图，也可以在其上书写文字。

安装所需的库：

urllib
requests
PIL
glob
shutil

首先要遵循的步骤 -

使用此链接下载font.ttf文件（在运行代码之前）。
预先制作名称为“CaptionedImages”的文件夹，将存储输出的字幕图像。

下面是使用Python的逐步实现：

第1步：

# importing required libraries
import urllib
import requests
import os
  
# retrieving using image url 
urllib.request.urlretrieve("https://i.ibb.co/xY4DJJ5/img1.jpg", "img1.jpg")
urllib.request.urlretrieve("https://i.ibb.co/Gnd1Y1L/img2.jpg", "img2.jpg")
urllib.request.urlretrieve("https://i.ibb.co/Z6JgS1L/img3.jpg", "img3.jpg")
  
print('Images downloaded')
  
# get current working directory path
path = os.getcwd()
  
  
captionarr = [
    "This is the first caption",
    "This is the second caption",
    "This is the third caption"
    ]

第2步：

# importing necessary functions from PIL
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw 
  
# print(os.getcwd())
  
# checking the file mime types if
# it is jpg, png or jpeg
def ext(file):
    index = file.find(".jpg")
    current_file = ""
    current_file = file[index:]
    return current_file 
  
def ext2(file):
    index = file.find(".jpeg")
    current_file = ""
    current_file = file[index:]
    return current_file 
  
def ext3(file):
    index = file.find(".png")
    current_file = ""
    current_file = file[index:]
    return current_file 
  
  
# converting text from lowercase to uppercase
def convert(words):
    s = ""
    for word in words:
        s += word.upper() 
    return s
  
caption_first = convert(captionarr[0])
caption_second = convert(captionarr[1])
caption_third = convert(captionarr[2])
      
print(caption_first)
print(caption_second)
print(caption_third)
  
  
count = 0
  
for f in os.listdir('.'):
    try:
        # Checking for file types if jpg, png
        # or jpeg excluding other files
        if (ext(f) == '.jpg' or ext2(f) == '.jpeg' or ext3(f) == '.png'):
            img = Image.open(f) 
            width, height = img.size
            basewidth = 1200
            # print(height)
  
            # Resizinng images to same width height
            wpercent = (basewidth / float(img.size[0]))
            hsize = int((float(img.size[1])*float(wpercent)))
            img = img.resize((basewidth, hsize), Image.ANTIALIAS)
            new_width, new_height = img.size
  
  
            # print(new_height)
            # changing image mode if not in RGB
            if not img.mode == 'RGB':
                img = img.convert('RGB')
          
            draw = ImageDraw.Draw(img)
            # font = ImageFont.truetype(, )
            # initializing which font will be chosen by us
            font = ImageFont.truetype("Arial Bold.ttf", 35) 
              
             # First Caption on First image
            if count == 0:
                draw.text((new_width / 15 + 25, new_height - 100),
                           caption_first, (255, 0, 0), font = font,
                           align ="center")
                             
            # Second Caption on Second image
            elif count == 1: 
                draw.text((new_width / 15 + 25, new_height - 100),
                          caption_second, (255, 0, 0), font = font,
                          align ="center")
                                                    
            # Third Caption on Third image
            else: 
                draw.text(( new_width / 15 + 25, new_height - 100),
                            caption_third, (255, 0, 0), font = font,
                            align ="center")             
  
            img.save("CaptionedImges/{}".format(f))     
            print('done')
            count = count + 1
              
    except OSError:
        pass

第 3 步：
根据上次修改时间对输出文件进行排序，以免它们按字母顺序或任何其他管理不善的顺序排列。

import os
import glob
import shutil
  
# changing directory to CaptionedImages
os.chdir(".\\CaptionedImges") 
  
fnames = []
for file in os.listdir('.'):
    # appending files in directory to the frames arr
    fnames.append(file) 
  
# sorting the files in frames array 
# on the basis of last modified time
# reverse = True means ascending order sorting
fnames.sort(key = lambda x: os.stat(x).st_ctime, reverse = True)

输出：