📜  使用Python Selenium模块下载 Instagram 帖子

📅  最后修改于: 2022-05-13 01:55:11.509000             🧑  作者: Mango

使用Python Selenium模块下载 Instagram 帖子

在本文中,我们将学习如何使用Python Selenium模块下载个人资料的 Instagram 帖子。


  • 谷歌浏览器或火狐浏览器
  • Chrome 驱动程序(适用于 Google Chrome)或 Gecko 驱动程序(适用于 Mozilla Firefox)
  • Selenium包:它是通过程序控制 Web 浏览器的强大工具。它适用于所有浏览器,适用于所有主要操作系统,其脚本是用各种语言编写的,例如Python、 Java、C# 等。可以使用以下命令安装:
pip install selenium 
  • Beautiful Soap包它是一个Python库,用于从 HTML 和 XML 文件中提取数据。它与您最喜欢的解析器一起工作,提供导航、搜索和修改解析树的惯用方式。它可以使用以下命令安装:
pip install bs4
  • 请求包: 请求库是Python的一个组成部分,用于向指定的 URL 发出 HTTP 请求。可以使用以下命令安装它:
pip install requests



# import required modules
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import selenium.common.exceptions
import time
from bs4 import BeautifulSoup as bs
import requests
import os
# get instagram account credentials
username = input('Enter Your User Name ')
password = input('Enter Your Password ')
# assign URL
url = 'https://instagram.com/' + \
    input('Enter User Name Of User For Downloading Posts ')

# get URL path
def path():
    global chrome
    # starts a new chrome session
    # add path if required
    chrome = webdriver.Chrome()

# extract URL
def url_name(url):
    # the web page opens up
    # webdriver will wait for 4 sec before throwing a
    # NoSuchElement exception so that the element
    # is detected and not skipped.

# login to access post
def login(username, your_password):
    log_but = chrome.find_element_by_class_name("L3NKy")
    # finds the username box
    usern = chrome.find_element_by_name("username")
    # sends the entered username
    # finds the password box
    passw = chrome.find_element_by_name("password")
    # sends the entered password
    # sends the enter key
    # Find Not Now  Button
    notn = chrome.find_element_by_class_name("yWX7d")

# function to get first post
def first_post():
    pic = chrome.find_element_by_class_name("kIKUG").click()

def download_allposts():
    # open First Post
    user_name = url.split('/')[-1]
    # check if folder corresponding to user name exist or not
    if(os.path.isdir(user_name) == False):
        # Create folder
    # Check if Posts contains multiple images or videos
    multiple_images = nested_check()
    if multiple_images:
        nescheck = multiple_images
        count_img = 0
        while nescheck:
            elem_img = chrome.find_element_by_class_name('rQDP3')
            # Function to save nested images
            save_multiple(user_name+'/'+'content1.'+str(count_img), elem_img)
            count_img += 1
            nescheck = nested_check()
        # pass last_img_flag True
        save_multiple(user_name+'/'+'content1.' +
                      str(count_img), elem_img, last_img_flag=1)
        save_content('_97aPb', user_name+'/'+'content1')
    c = 2
        next_el = next_post()
        if next_el != False:
                multiple_images = nested_check()
                if multiple_images:
                    nescheck = multiple_images
                    count_img = 0
                    while nescheck:
                        elem_img = chrome.find_element_by_class_name('rQDP3')
                        save_multiple(user_name+'/'+'content' +
                                      str(c)+'.'+str(count_img), elem_img)
                        count_img += 1
                        nescheck = nested_check()
                    save_multiple(user_name+'/'+'content'+str(c) +
                                  '.'+str(count_img), elem_img, 1)
                    save_content('_97aPb', user_name+'/'+'content'+str(c))
            except selenium.common.exceptions.NoSuchElementException:
        c += 1

# function to get next post
def next_post():
        nex = chrome.find_element_by_class_name("coreSpriteRightPaginationArrow")
        return nex
    except selenium.common.exceptions.NoSuchElementException:
        return 0

# Function to save content of the current post
def save_content(class_name,img_name):
        pic = chrome.find_element_by_class_name(class_name)
    except selenium.common.exceptions.NoSuchElementException:
        print("Either This user has no images or you haven't followed this user or something went wrong")
    html = pic.get_attribute('innerHTML')
    soup = bs(html,'html.parser')
    link = soup.find('video')
    if link:
        link = link['src']
        link = soup.find('img')['src']
    response = requests.get(link)
    with open(img_name, 'wb') as f:

# Function to save multiple posts
def save_multiple(img_name,elem,last_img_flag = False):
    l = elem.get_attribute('innerHTML')
    html = bs(l,'html.parser')
    biglist = html.find_all('ul')
    biglist = biglist[0]
    list_images = biglist.find_all('li')
    if last_img_flag:
        user_image = list_images[-1]
        user_image = list_images[(len(list_images)//2)]
    video = user_image.find('video')
    if video:
        link = video['src']
        link = user_image.find('img')['src']
    response = requests.get(link)
    with open(img_name, 'wb') as f:

# function to check if the post is nested
def nested_check():
        nes_nex = chrome.find_element_by_class_name('coreSpriteRightChevron  ')
        return nes_nex
    except selenium.common.exceptions.NoSuchElementException:
        return 0

# Driver Code
login(username, password)

第 2 步:启动浏览器新会话的函数。您可能需要将路径添加到 Web 驱动程序。 Chrome()函数,这取决于您的安装。


# get URL path
def path():
    global chrome
    # starts a new chrome session
    # add path if required
    chrome = webdriver.Chrome()



# extract URL
def url_name(url):
    # the web page opens up
    # webdriver will wait for 4 sec before throwing a
    # NoSuchElement exception so that the element
    # is detected and not skipped.

第 4 步:输入您的登录信息的函数。


# login to access post
def login(username, your_password):
    log_but = chrome.find_element_by_class_name("L3NKy")
    # finds the username box
    usern = chrome.find_element_by_name("username")
    # sends the entered username
    # finds the password box
    passw = chrome.find_element_by_name("password")
    # sends the entered password
    # sends the enter key
    # Find Not Now  Button
    notn = chrome.find_element_by_class_name("yWX7d")



# function to get first post
def first_post():
    pic = chrome.find_element_by_class_name("kIKUG").click()



def download_allposts():
    # open First Post
    user_name = url.split('/')[-1]
    # check if folder corresponding to user name exist or not
    if(os.path.isdir(user_name) == False):
        # Create folder
    # Check if Posts contains multiple images or videos
    multiple_images = nested_check()
    if multiple_images:
        nescheck = multiple_images
        count_img = 0
        while nescheck:
            elem_img = chrome.find_element_by_class_name('rQDP3')
            # Function to save nested images
            save_multiple(user_name+'/'+'content1.'+str(count_img), elem_img)
            count_img += 1
            nescheck = nested_check()
        # pass last_img_flag True
        save_multiple(user_name+'/'+'content1.' +
                      str(count_img), elem_img, last_img_flag=1)
        save_content('_97aPb', user_name+'/'+'content1')
    c = 2
        next_el = next_post()
        if next_el != False:
                multiple_images = nested_check()
                if multiple_images:
                    nescheck = multiple_images
                    count_img = 0
                    while nescheck:
                        elem_img = chrome.find_element_by_class_name('rQDP3')
                        save_multiple(user_name+'/'+'content' +
                                      str(c)+'.'+str(count_img), elem_img)
                        count_img += 1
                        nescheck = nested_check()
                    save_multiple(user_name+'/'+'content'+str(c) +
                                  '.'+str(count_img), elem_img, 1)
                    save_content('_97aPb', user_name+'/'+'content'+str(c))
            except selenium.common.exceptions.NoSuchElementException:
        c += 1



# function to get next post
def next_post():
        nex = chrome.find_element_by_class_name("coreSpriteRightPaginationArrow")
        return nex
    except selenium.common.exceptions.NoSuchElementException:
        return 0



# Function to save content of the current post
def save_content(class_name,img_name):
        pic = chrome.find_element_by_class_name(class_name)
    except selenium.common.exceptions.NoSuchElementException:
        print("Either This user has no images or you haven't followed this user or something went wrong")
    html = pic.get_attribute('innerHTML')
    soup = bs(html,'html.parser')
    link = soup.find('video')
    if link:
        link = link['src']
        link = soup.find('img')['src']
    response = requests.get(link)
    with open(img_name, 'wb') as f:

第 9 步:保存嵌套帖子的函数。


# Function to save multiple posts
def save_multiple(img_name,elem,last_img_flag = False):
    l = elem.get_attribute('innerHTML')
    html = bs(l,'html.parser')
    biglist = html.find_all('ul')
    biglist = biglist[0]
    list_images = biglist.find_all('li')
    if last_img_flag:
        user_image = list_images[-1]
        user_image = list_images[(len(list_images)//2)]
    video = user_image.find('video')
    if video:
        link = video['src']
        link = user_image.find('img')['src']
    response = requests.get(link)
    with open(img_name, 'wb') as f:

第 10 步:检查帖子是否嵌套的函数。


# function to check if the post is nested
def nested_check():
        nes_nex = chrome.find_element_by_class_name('coreSpriteRightChevron  ')
        return nes_nex
    except selenium.common.exceptions.NoSuchElementException:
        return 0

第 11 步:在驱动程序代码中调用所需的函数。


# Driver Code
login(username, password)



注意:如果您是 Windows 用户,那么帖子将以.file扩展名保存,使用可以打开图像和视频的应用程序打开帖子(Instagram 帖子只有媒体、图像或视频类型)