如何使用Python在 Excel 中创建文件、文件夹和子文件夹的列表?
在本文中,我们将学习如何创建文件、文件夹和子文件夹的列表,然后使用Python将它们导出到 Excel。我们将使用下面解释的一些文件夹遍历方法创建名称和路径列表,并使用 openpyxl 或 pandas 模块将它们存储在 Excel 工作表中。
输入:下图表示目录的结构。
遍历文件、文件夹和子文件夹
以下函数是遍历文件夹的方法,并将文件/文件夹的名称和路径存储在列表中。
方法一:使用append_path_name(path, name_list, path_list, glob)
在以下文件夹遍历功能中使用的重要函数。该函数的目的是检查给定的路径是 Windows 还是 Linux 操作系统,因为路径分隔符不同,并将文件或文件夹的名称和路径分别附加到 name_list 和 path_list。
注意: Windows 使用“\”,Linux 使用“/”作为路径分隔符,由于Python将“\”视为无效字符,我们需要在路径中使用“\\”代替“\”。
方法:
- 该函数将首先使用以下命令查找路径是否包含“\\”:
# Returns the count if it finds
# any "\\" in the given path.
path.find("\\")
注意:如果它返回任何大于零的数字,则表示当前操作系统是 Windows,将执行第一个代码块,否则将执行代表 Linux 操作系统的第二个代码块。
- 我们将根据当前的操作系统拆分路径并将其存储在一个临时列表中。
# Windows
temp = path.split("\\")
# Linux
temp = path.split("/")
- 我们将分别将文件或文件夹的名称和路径附加到 name_list 和 path_list。
# temp[-1] gets the last value present in
# the temporary list that represents
# the file or folder name.
name_list.append(temp[-1])
path_list.append(path)
- 如果 glob 变量为 True,则父路径将与 glob.iglob() 方法中递归遍历所需的正则表达式连接。
# Windows
path = os.path.join(path, "**\\*")
# Linux
path = os.path.join(path, "**/*")
例子:
Python3
import os
# This function splits the path by checking
# if it is a windows os or linux os path and
# appends the name and path of directory (and
# files only for glob function).
def append_path_name(path, name_list, path_list, glob):
# Checks if it is a windows path or linux
# path
if path.find("\\") > 0:
# Splits the windows path and stores the
# list in a temp list and appends the last
# value of temp_list in name_list as it
# represents the name of file/ folder and
# also appends the path to path_list.
temp = path.split("\\")
name_list.append(temp[-1])
path_list.append(path)
# If this function is called under
# find_using_glob then we return modified
# path so that iglob can recursively
# traverse the folders.
if glob == True:
path = os.path.join(path, "**\\*")
return path, name_list, path_list
else:
# Same explanation as above but the path splitting
# is based on Linux
temp = path.split("/")
name_list.append(temp[-1])
path_list.append(path)
if glob == True:
path = os.path.join(path, "**/*")
return path, name_list, path_list
return name_list, path_list
name_list, path_list = append_path_name("/content/sample_data", [], [], False)
print(name_list)
print(path_list)
Python3
import os
# This Function uses os.walk method to traverse folders
# recursively and appends the name and path of file/
# folders in name_list and path_list respectively.
def find_using_os_walk(path, name_list, path_list):
for root, _, files in os.walk(path):
# Function returns modified name_list and
# path_list.
name_list, path_list = append_path_name(
root, name_list, path_list, False)
for file_name in files:
file_path = os.path.join(root, file_name)
# Appends file name and file path to
# name_list and path_list respectively.
name_list.append(file_name)
path_list.append(file_path)
return name_list, path_list
name_list, path_list = find_using_os_walk("/content/sample_data", [], [])
print(name_list)
print(path_list)
Python3
import os
# This Function uses os.scandir method to traverse
# folders recursively and appends the name and path of
# file/folders in name_list and path_list respectively.
def find_using_scandir(path, name_list, path_list):
# Function returns modified name_list and path_list.
name_list, path_list = append_path_name(
path, name_list, path_list, False)
for curr_path_obj in os.scandir(path):
# If the current path is a directory then the
# function calls itself with the directory path
# and goes on until a file is found.
if curr_path_obj.is_dir() == True:
file_path = curr_path_obj.path
find_using_scandir(file_path, name_list, path_list)
else:
# Appends file name and file path to
# name_list and path_list respectively.
file_name = curr_path_obj.name
file_path = curr_path_obj.path
name_list.append(file_name)
path_list.append(file_path)
return name_list, path_list
name_list, path_list = find_using_scandir("/content/sample_data", [], [])
print(name_list)
print(path_list)
Python3
import os
# This Function uses os.listdir method to traverse
# folders recursively and appends the name and path of
# file/folders in name_list and path_list respectively.
def find_using_listdir(path, name_list, path_list):
# Function appends folder name and folder path to
# name_list and path_list respectively.
name_list, path_list = append_path_name(path,
name_list, path_list, False)
for curr_name in os.listdir(path):
curr_path = os.path.join(path, curr_name)
# Checks if the current path is a directory.
if os.path.isdir(curr_path) == True:
# If the current path is a directory then the
# function calls itself with the directory path
# and goes on until a file is found
find_using_listdir(curr_path, name_list, path_list)
else:
# Appends file name and file path to
# name_list and path_list respectively.
name_list.append(curr_name)
path_list.append(curr_path)
return name_list, path_list
name_list, path_list = find_using_listdir("/content/sample_data", [], [])
print(name_list)
print(path_list)
Python3
import glob
# This Function uses glob.iglob method to traverse
# folders recursively and appends the name and path of
# file/folders in name_list and path_list respectively.
def find_using_glob(path, name_list, path_list):
# Appends the Parent Directory name and path
# and modifies the parent path so that iglob
# can traverse recursively.
path, name_list, path_list = append_path_name(
path, name_list, path_list, True)
# glob.iglob with recursive set to True will
# get all the file/folder paths.
for curr_path in glob.iglob(path, recursive=True):
# Appends file/folder name and path to
# name_list and path_list respectively.
name_list, path_list = append_path_name(
curr_path, name_list, path_list, False)
return name_list, path_list
name_list, path_list = find_using_glob("/content/sample_data", [], [])
print(name_list)
print(path_list)
Python3
# Function will create an excel file and
# write the file/ folder names and their
# path using openpyxl
def create_excel_using_openpyxl(name_list, path_list,
path):
# Creates a workbook object and gets an
# active sheet
work_book = Workbook()
work_sheet = work_book.active
# Writing the data in excel sheet
row, col1_width, col2_width = 0, 0, 0
while row <= len(name_list):
name = work_sheet.cell(row=row+1, column=1)
path = work_sheet.cell(row=row+1, column=2)
# Writing the Heading i.e Name and Path
if row == 0:
name.value = "Name"
path.value = "Path"
row += 1
continue
# Writing the data from specified lists to colums
name.value = name_list[row-1]
path.value = path_list[row-1]
# Adjusting width of Column in excel sheet
col1_width = max(col1_width, len(name_list[row-1]))
col2_width = max(col2_width, len(path_list[row-1]))
work_sheet.column_dimensions["A"].width = col1_width
work_sheet.column_dimensions["B"].width = col2_width
row += 1
# Saving the workbook
work_book.save(filename="Final.xlsx")
create_excel_using_openpyxl(name_list, path_list, path)
Python3
# Function will create a data frame using pandas and
# write File/Folder, and their path to excel file.
def create_excel_using_pandas_dataframe(name_list,
path_list, path):
# Default Frame (a dictionary) is created with
# File/Folder names and their path with the given lists
frame = {'Name': name_list,
'Path': path_list
}
# Creates the dataframe using pandas with the given
# dictionary
df_data = pd.DataFrame(frame)
# Creates and saves the data to an excel file
df_data.to_excel('Final.xlsx', index=False)
create_excel_using_pandas_dataframe(name_list,
path_list, path)
输出:
[‘sample_data’, ‘anscombe.json’, ‘california_housing_train.csv’, ‘F2’, ‘SF2’, ‘california_housing_test.csv’,
‘.ipynb_checkpoints’, ‘.ipynb_checkpoints’, ‘F1’, ‘mnist_test.csv’, ‘README.md’, ‘.ipynb_checkpoints’, ‘SF1’,
‘mnist_train_small.csv’]
[‘/content/sample_data’, ‘/content/sample_data/anscombe.json’,
‘/content/sample_data/california_housing_train.csv’, ‘/content/sample_data/F2’,
‘/content/sample_data/F2/SF2’, ‘/content/sample_data/F2/SF2/california_housing_test.csv’,
‘/content/sample_data/F2/.ipynb_checkpoints’, ‘/content/sample_data/.ipynb_checkpoints’,
‘/content/sample_data/F1’, ‘/content/sample_data/F1/mnist_test.csv’, ‘/content/sample_data/F1/README.md’,
‘/content/sample_data/F1/.ipynb_checkpoints’, ‘/content/sample_data/F1/SF1’,
‘/content/sample_data/F1/SF1/mnist_train_small.csv’]
方法二:使用 find_using_os_walk(path, name_list, path_list)
此方法通过在给定路径中自上而下或自下而上遍历树来生成目录树中的文件名。
Syntax : os.walk(path)
方法:
1.使用 os.walk(path) 方法启动一个 for 循环,它生成一个包含当前目录在 root 中的路径和 files 中的文件列表的元组。
for root, _, files in os.walk(path):
2.调用append_path_name函数,绕过当前目录路径存储目录的名称和路径。
name_list, path_list = append_path_name(
root, name_list, path_list, False)
3.迭代文件并将找到的文件的名称和路径存储在文件夹中。
# Joins the folder path and the
# file name to generate file path
file_path = os.path.join(root, file_name)
# Appends file name and file path to
# name_list and path_list respectively.
name_list.append(file_name)
path_list.append(file_path)
例子:
蟒蛇3
import os
# This Function uses os.walk method to traverse folders
# recursively and appends the name and path of file/
# folders in name_list and path_list respectively.
def find_using_os_walk(path, name_list, path_list):
for root, _, files in os.walk(path):
# Function returns modified name_list and
# path_list.
name_list, path_list = append_path_name(
root, name_list, path_list, False)
for file_name in files:
file_path = os.path.join(root, file_name)
# Appends file name and file path to
# name_list and path_list respectively.
name_list.append(file_name)
path_list.append(file_path)
return name_list, path_list
name_list, path_list = find_using_os_walk("/content/sample_data", [], [])
print(name_list)
print(path_list)
输出:
[‘sample_data’, ‘anscombe.json’, ‘california_housing_train.csv’, ‘F2’, ‘SF2’, ‘california_housing_test.csv’,
‘.ipynb_checkpoints’, ‘.ipynb_checkpoints’, ‘F1’, ‘mnist_test.csv’, ‘README.md’, ‘.ipynb_checkpoints’, ‘SF1’,
‘mnist_train_small.csv’]
[‘/content/sample_data’, ‘/content/sample_data/anscombe.json’,
‘/content/sample_data/california_housing_train.csv’, ‘/content/sample_data/F2’,
‘/content/sample_data/F2/SF2’, ‘/content/sample_data/F2/SF2/california_housing_test.csv’,
‘/content/sample_data/F2/.ipynb_checkpoints’, ‘/content/sample_data/.ipynb_checkpoints’,
‘/content/sample_data/F1’, ‘/content/sample_data/F1/mnist_test.csv’, ‘/content/sample_data/F1/README.md’,
‘/content/sample_data/F1/.ipynb_checkpoints’, ‘/content/sample_data/F1/SF1’,
‘/content/sample_data/F1/SF1/mnist_train_small.csv’]
方法三:使用 find_using_scandir(path, name_list, path_list)
此函数返回与路径给定的目录中的条目对应的 os.DirEntry 对象的迭代器。
Syntax : os.scandir(path)
方法:
1 、调用append_path_name函数,通过传递当前目录路径来存储目录的名称和路径。
name_list, path_list = append_path_name(
path, name_list, path_list, False)
2.使用 os.scandir(path) 方法启动一个 for 循环,该方法返回一个包含文件/文件夹当前名称和路径的对象。
for curr_path_obj in os.scandir(path):
3.如果当前路径是目录,则函数调用自身递归遍历文件夹并存储步骤 1 中的文件夹名称和路径。
if curr_path_obj.is_dir() == True:
file_path = curr_path_obj.path
find_using_scandir(file_path, name_list, path_list)
4.否则文件名和路径分别存储在name_list 和path_list 中。
file_name = curr_path_obj.name
file_path = curr_path_obj.path
name_list.append(file_name)
path_list.append(file_path)
例子:
蟒蛇3
import os
# This Function uses os.scandir method to traverse
# folders recursively and appends the name and path of
# file/folders in name_list and path_list respectively.
def find_using_scandir(path, name_list, path_list):
# Function returns modified name_list and path_list.
name_list, path_list = append_path_name(
path, name_list, path_list, False)
for curr_path_obj in os.scandir(path):
# If the current path is a directory then the
# function calls itself with the directory path
# and goes on until a file is found.
if curr_path_obj.is_dir() == True:
file_path = curr_path_obj.path
find_using_scandir(file_path, name_list, path_list)
else:
# Appends file name and file path to
# name_list and path_list respectively.
file_name = curr_path_obj.name
file_path = curr_path_obj.path
name_list.append(file_name)
path_list.append(file_path)
return name_list, path_list
name_list, path_list = find_using_scandir("/content/sample_data", [], [])
print(name_list)
print(path_list)
输出:
[‘sample_data’, ‘anscombe.json’, ‘california_housing_train.csv’, ‘F2’, ‘SF2’, ‘california_housing_test.csv’,
‘.ipynb_checkpoints’, ‘.ipynb_checkpoints’, ‘F1’, ‘mnist_test.csv’, ‘README.md’, ‘.ipynb_checkpoints’, ‘SF1’,
‘mnist_train_small.csv’]
[‘/content/sample_data’, ‘/content/sample_data/anscombe.json’,
‘/content/sample_data/california_housing_train.csv’, ‘/content/sample_data/F2’,
‘/content/sample_data/F2/SF2’, ‘/content/sample_data/F2/SF2/california_housing_test.csv’,
‘/content/sample_data/F2/.ipynb_checkpoints’, ‘/content/sample_data/.ipynb_checkpoints’,
‘/content/sample_data/F1’, ‘/content/sample_data/F1/mnist_test.csv’, ‘/content/sample_data/F1/README.md’,
‘/content/sample_data/F1/.ipynb_checkpoints’, ‘/content/sample_data/F1/SF1’,
‘/content/sample_data/F1/SF1/mnist_train_small.csv’]
方法四:使用 find_using_listdir(path, name_list, path_list)
此函数获取给定路径中所有文件和目录的列表。
Syntax : os.listdir(path)
方法:
1 、调用append_path_name函数,通过传递当前目录路径来存储目录的名称和路径。
name_list, path_list = append_path_name(
path, name_list, path_list, False)
2.使用 os.listdir(path) 方法启动 for 循环,该方法返回当前路径中存在的文件和文件夹名称列表。
for curr_name in os.listdir(path):
3.将文件夹或文件的名称与当前路径连接起来。
curr_path = os.path.join(path, curr_name)
4.如果当前路径是目录,则函数调用自身递归遍历文件夹并存储步骤 1 中的文件夹名称和路径。
if os.path.isdir(curr_path) == True:
find_using_listdir(curr_path, name_list, path_list)
5.否则文件名和路径分别存储在 name_list 和 path_list 中。
name_list.append(curr_name)
path_list.append(curr_path)
上述函数的代码:
蟒蛇3
import os
# This Function uses os.listdir method to traverse
# folders recursively and appends the name and path of
# file/folders in name_list and path_list respectively.
def find_using_listdir(path, name_list, path_list):
# Function appends folder name and folder path to
# name_list and path_list respectively.
name_list, path_list = append_path_name(path,
name_list, path_list, False)
for curr_name in os.listdir(path):
curr_path = os.path.join(path, curr_name)
# Checks if the current path is a directory.
if os.path.isdir(curr_path) == True:
# If the current path is a directory then the
# function calls itself with the directory path
# and goes on until a file is found
find_using_listdir(curr_path, name_list, path_list)
else:
# Appends file name and file path to
# name_list and path_list respectively.
name_list.append(curr_name)
path_list.append(curr_path)
return name_list, path_list
name_list, path_list = find_using_listdir("/content/sample_data", [], [])
print(name_list)
print(path_list)
输出:
[‘sample_data’, ‘anscombe.json’, ‘california_housing_train.csv’, ‘F2’, ‘SF2’, ‘california_housing_test.csv’,
‘.ipynb_checkpoints’, ‘.ipynb_checkpoints’, ‘F1’, ‘mnist_test.csv’, ‘README.md’, ‘.ipynb_checkpoints’, ‘SF1’,
‘mnist_train_small.csv’]
[‘/content/sample_data’, ‘/content/sample_data/anscombe.json’,
‘/content/sample_data/california_housing_train.csv’, ‘/content/sample_data/F2’,
‘/content/sample_data/F2/SF2’, ‘/content/sample_data/F2/SF2/california_housing_test.csv’,
‘/content/sample_data/F2/.ipynb_checkpoints’, ‘/content/sample_data/.ipynb_checkpoints’,
‘/content/sample_data/F1’, ‘/content/sample_data/F1/mnist_test.csv’, ‘/content/sample_data/F1/README.md’,
‘/content/sample_data/F1/.ipynb_checkpoints’, ‘/content/sample_data/F1/SF1’,
‘/content/sample_data/F1/SF1/mnist_train_small.csv’]
方法五:使用 find_using_glob(path, name_list, path_list)
此函数返回一个迭代器,该迭代器产生与 glob() 相同的值,但实际上并未同时存储它们。
Syntax : glob.iglob(path, recursive=True)
方法:
1.调用append_path_name函数存储父目录的名称和路径,并返回glob方法需要的修改后的路径,因为最后一个参数为True。
path, name_list, path_list = append_path_name(
path, name_list, path_list, True)
2.使用 glob.iglob(path, recursive=True) 方法启动 for 循环,递归遍历文件夹并返回文件/文件夹的当前路径。
for curr_path in glob.iglob(path, recursive=True):
3.调用append_path_name函数,通过传递当前路径来存储文件/文件夹的名称和路径。
name_list, path_list = append_path_name(
curr_path, name_list, path_list, False)
上述函数的代码:
蟒蛇3
import glob
# This Function uses glob.iglob method to traverse
# folders recursively and appends the name and path of
# file/folders in name_list and path_list respectively.
def find_using_glob(path, name_list, path_list):
# Appends the Parent Directory name and path
# and modifies the parent path so that iglob
# can traverse recursively.
path, name_list, path_list = append_path_name(
path, name_list, path_list, True)
# glob.iglob with recursive set to True will
# get all the file/folder paths.
for curr_path in glob.iglob(path, recursive=True):
# Appends file/folder name and path to
# name_list and path_list respectively.
name_list, path_list = append_path_name(
curr_path, name_list, path_list, False)
return name_list, path_list
name_list, path_list = find_using_glob("/content/sample_data", [], [])
print(name_list)
print(path_list)
输出:
[‘sample_data’, ‘anscombe.json’, ‘california_housing_train.csv’, ‘F2’, ‘SF2’, ‘california_housing_test.csv’,
‘.ipynb_checkpoints’, ‘.ipynb_checkpoints’, ‘F1’, ‘mnist_test.csv’, ‘README.md’, ‘.ipynb_checkpoints’, ‘SF1’,
‘mnist_train_small.csv’]
[‘/content/sample_data’, ‘/content/sample_data/anscombe.json’,
‘/content/sample_data/california_housing_train.csv’, ‘/content/sample_data/F2’,
‘/content/sample_data/F2/SF2’, ‘/content/sample_data/F2/SF2/california_housing_test.csv’,
‘/content/sample_data/F2/.ipynb_checkpoints’, ‘/content/sample_data/.ipynb_checkpoints’,
‘/content/sample_data/F1’, ‘/content/sample_data/F1/mnist_test.csv’, ‘/content/sample_data/F1/README.md’,
‘/content/sample_data/F1/.ipynb_checkpoints’, ‘/content/sample_data/F1/SF1’,
‘/content/sample_data/F1/SF1/mnist_train_small.csv’]
在 Excel 工作表中存储数据
方法一:使用openpyxl
该模块用于读取/写入数据到 excel。它具有广泛的功能,但在这里我们将使用它来创建和写入数据到 excel。您需要在系统中通过 pip 安装 openpyxl。
pip install openpyxl
方法:
1.导入需要的模块
# imports workbook from openpyxl module
from openpyxl import Workbook
2.创建工作簿对象
work_book = Workbook()
3.获取工作簿活动工作表对象并用 0 启动以下变量。
row, col1_width, col2_width = 0, 0, 0
work_sheet = work_book.active
4.将行迭代到name_list的最大长度,因为这些条目将写入excel表
while row <= len(name_list):
5 、获取同一行的第1列和第2列的单元格对象,将name_list和path_list的值分别存储到各自的单元格中。
name = work_sheet.cell(row=row+1, column=1)
path = work_sheet.cell(row=row+1, column=2)
# This block will execute only once and
# add the Heading of column 1 and column 2
if row == 0:
name.value = "Name"
path.value = "Path"
row += 1
continue
# Storing the values from name_list and path_list
# to the specified cell objects
name.value = name_list[row-1]
path.value = path_list[row-1]
6. (可选)使用openpyxl的列尺寸调整Excel表格中单元格的宽度。
col1_width = max(col1_width, len(name_list[row-1]))
col2_width = max(col2_width, len(path_list[row-1]))
work_sheet.column_dimensions["A"].width = col1_width
work_sheet.column_dimensions["B"].width = col2_width
7.迭代结束后用文件名保存工作簿。
work_book.save(filename="Final.xlsx")
例子:
蟒蛇3
# Function will create an excel file and
# write the file/ folder names and their
# path using openpyxl
def create_excel_using_openpyxl(name_list, path_list,
path):
# Creates a workbook object and gets an
# active sheet
work_book = Workbook()
work_sheet = work_book.active
# Writing the data in excel sheet
row, col1_width, col2_width = 0, 0, 0
while row <= len(name_list):
name = work_sheet.cell(row=row+1, column=1)
path = work_sheet.cell(row=row+1, column=2)
# Writing the Heading i.e Name and Path
if row == 0:
name.value = "Name"
path.value = "Path"
row += 1
continue
# Writing the data from specified lists to colums
name.value = name_list[row-1]
path.value = path_list[row-1]
# Adjusting width of Column in excel sheet
col1_width = max(col1_width, len(name_list[row-1]))
col2_width = max(col2_width, len(path_list[row-1]))
work_sheet.column_dimensions["A"].width = col1_width
work_sheet.column_dimensions["B"].width = col2_width
row += 1
# Saving the workbook
work_book.save(filename="Final.xlsx")
create_excel_using_openpyxl(name_list, path_list, path)
输出:
方法二:使用熊猫
1.创建一个框架(一个字典),键分别为 'Name' 和 'Path',值分别为 name_list 和 path_list:
frame = {'Name': name_list,
'Path': path_list
}
2.在导出之前,我们需要创建一个名为 df_data 的数据框,其中列作为 Name 和 Path。
df_data = pd.DataFrame(frame)
3.使用以下代码将数据导出到excel:
df_data.to_excel('Final.xlsx', index=False)
上述函数的代码:
蟒蛇3
# Function will create a data frame using pandas and
# write File/Folder, and their path to excel file.
def create_excel_using_pandas_dataframe(name_list,
path_list, path):
# Default Frame (a dictionary) is created with
# File/Folder names and their path with the given lists
frame = {'Name': name_list,
'Path': path_list
}
# Creates the dataframe using pandas with the given
# dictionary
df_data = pd.DataFrame(frame)
# Creates and saves the data to an excel file
df_data.to_excel('Final.xlsx', index=False)
create_excel_using_pandas_dataframe(name_list,
path_list, path)
输出: