os.walk() 是Python中用于遍历目录树的强大函数,它可以递归地访问指定目录下的所有子目录和文件。
基本语法
python
import os
for root, dirs, files in os.walk(top, topdown=True, onerror=None, followlinks=False):
# 处理逻辑参数说明:
top:要遍历的根目录路径topdown:如果为True,先遍历顶级目录再子目录;如果为False,先遍历子目录onerror:错误处理函数followlinks:是否跟随符号链接
返回值说明
每次迭代返回一个三元组:
root:当前正在遍历的目录路径dirs:当前目录下的子目录列表files:当前目录下的文件列表
基础用法示例
1. 基本遍历
python
import os
# 遍历当前目录
for root, dirs, files in os.walk('.'):
print(f"当前目录: {root}")
print(f"子目录: {dirs}")
print(f"文件: {files}")
print("-" * 50)2. 获取所有文件路径
python
import os
def get_all_files(directory):
"""获取目录下所有文件的完整路径"""
all_files = []
for root, dirs, files in os.walk(directory):
for file in files:
full_path = os.path.join(root, file)
all_files.append(full_path)
return all_files
# 使用示例
files = get_all_files('.')
for file in files:
print(file)3. 查找特定类型文件
python
import os
def find_files_by_extension(directory, extensions):
"""查找指定扩展名的文件"""
found_files = []
for root, dirs, files in os.walk(directory):
for file in files:
if any(file.endswith(ext) for ext in extensions):
full_path = os.path.join(root, file)
found_files.append(full_path)
return found_files
# 查找所有Python和文本文件
python_and_txt_files = find_files_by_extension('.', ['.py', '.txt', '.md'])
for file in python_and_txt_files:
print(file)高级用法示例
4. 控制遍历顺序(topdown参数)
python
import os
print("自上而下遍历 (默认):")
for root, dirs, files in os.walk('.', topdown=True):
print(f"访问: {root}")
print("\n自下而上遍历:")
for root, dirs, files in os.walk('.', topdown=False):
print(f"访问: {root}")5. 过滤目录
python
import os
def walk_with_filter(directory, exclude_dirs=None):
"""遍历时排除特定目录"""
if exclude_dirs is None:
exclude_dirs = ['.git', '__pycache__', 'node_modules']
for root, dirs, files in os.walk(directory):
# 从dirs中移除要排除的目录(防止os.walk进入这些目录)
dirs[:] = [d for d in dirs if d not in exclude_dirs]
print(f"当前目录: {root}")
print(f"过滤后子目录: {dirs}")
print(f"文件: {files}")
print("-" * 50)
# 使用示例
walk_with_filter('.')6. 错误处理
python
import os
def handle_walk_error(error):
"""错误处理函数"""
print(f"遍历错误: {error}")
# 带有错误处理的遍历
try:
for root, dirs, files in os.walk('/some/path', onerror=handle_walk_error):
print(f"处理: {root}")
except Exception as e:
print(f"严重错误: {e}")7. 计算目录大小
python
import os
def get_directory_size(directory):
"""计算目录总大小"""
total_size = 0
for root, dirs, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
try:
total_size += os.path.getsize(file_path)
except OSError:
# 忽略无法访问的文件
pass
return total_size
# 使用示例
size = get_directory_size('.')
print(f"目录总大小: {size} bytes ({size / 1024 / 1024:.2f} MB)")8. 查找重复文件
python
import os
import hashlib
def get_file_hash(filepath):
"""计算文件的MD5哈希值"""
hash_md5 = hashlib.md5()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def find_duplicate_files(directory):
"""查找重复文件"""
file_hashes = {}
duplicates = []
for root, dirs, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
try:
file_hash = get_file_hash(file_path)
if file_hash in file_hashes:
duplicates.append((file_path, file_hashes[file_hash]))
else:
file_hashes[file_hash] = file_path
except (OSError, IOError):
# 忽略无法读取的文件
continue
return duplicates
# 使用示例
duplicates = find_duplicate_files('.')
for dup in duplicates:
print(f"重复文件: {dup[0]} 和 {dup[1]}")9. 文件统计
python
import os
def analyze_directory(directory):
"""分析目录结构"""
stats = {
'total_dirs': 0,
'total_files': 0,
'file_types': {},
'largest_file': ('', 0)
}
for root, dirs, files in os.walk(directory):
stats['total_dirs'] += len(dirs)
stats['total_files'] += len(files)
for file in files:
# 统计文件类型
_, ext = os.path.splitext(file)
file_type = ext.lower() if ext else '无扩展名'
stats['file_types'][file_type] = stats['file_types'].get(file_type, 0) + 1
# 查找最大文件
file_path = os.path.join(root, file)
try:
file_size = os.path.getsize(file_path)
if file_size > stats['largest_file'][1]:
stats['largest_file'] = (file_path, file_size)
except OSError:
pass
return stats
# 使用示例
stats = analyze_directory('.')
print(f"总目录数: {stats['total_dirs']}")
print(f"总文件数: {stats['total_files']}")
print("文件类型分布:")
for file_type, count in stats['file_types'].items():
print(f" {file_type}: {count}")
print(f"最大文件: {stats['largest_file'][0]} ({stats['largest_file'][1]} bytes)")10. 批量重命名文件
python
import os
def rename_files_in_directory(directory, old_ext, new_ext):
"""批量重命名指定扩展名的文件"""
renamed_count = 0
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith(old_ext):
old_path = os.path.join(root, file)
new_file = file.replace(old_ext, new_ext)
new_path = os.path.join(root, new_file)
try:
os.rename(old_path, new_path)
print(f"重命名: {old_path} -> {new_path}")
renamed_count += 1
except OSError as e:
print(f"重命名失败 {old_path}: {e}")
return renamed_count
# 使用示例:将所有的.txt文件改为.md
# renamed = rename_files_in_directory('.', '.txt', '.md')
# print(f"重命名了 {renamed} 个文件")注意事项
- 性能考虑:对于非常大的目录树,
os.walk()可能会消耗较多内存 - 权限问题:可能会遇到权限不足无法访问的目录
- 符号链接:默认不跟随符号链接,设置
followlinks=True可以改变此行为 - 实时修改:在遍历过程中修改
dirs列表可以控制遍历行为
os.walk() 是文件系统操作中非常有用的工具,特别适合需要递归处理目录结构的场景。


