py-os-walk - 几何为欢的云站

os.walk() 是Python中用于遍历目录树的强大函数，它可以递归地访问指定目录下的所有子目录和文件。

基本语法#

1
import os
2

3
for root, dirs, files in os.walk(top, topdown=True, onerror=None, followlinks=False):
4
    # 处理逻辑

参数说明：

top：要遍历的根目录路径
topdown：如果为True，先遍历顶级目录再子目录；如果为False，先遍历子目录
onerror：错误处理函数
followlinks：是否跟随符号链接

返回值说明#

每次迭代返回一个三元组：

基础用法示例#

1. 基本遍历#

1
import os
2

3
# 遍历当前目录
4
for root, dirs, files in os.walk('.'):
5
    print(f"当前目录: {root}")
6
    print(f"子目录: {dirs}")
7
    print(f"文件: {files}")
8
    print("-" * 50)

2. 获取所有文件路径#

1
import os
2

3
def get_all_files(directory):
4
    """获取目录下所有文件的完整路径"""
5
    all_files = []
6
    for root, dirs, files in os.walk(directory):
7
        for file in files:
8
            full_path = os.path.join(root, file)
9
            all_files.append(full_path)
10
    return all_files
11

12
# 使用示例
13
files = get_all_files('.')
14
for file in files:
15
    print(file)

3. 查找特定类型文件#

1
import os
2

3
def find_files_by_extension(directory, extensions):
4
    """查找指定扩展名的文件"""
5
    found_files = []
6
    for root, dirs, files in os.walk(directory):
7
        for file in files:
8
            if any(file.endswith(ext) for ext in extensions):
9
                full_path = os.path.join(root, file)
10
                found_files.append(full_path)
11
    return found_files
12

13
# 查找所有Python和文本文件
14
python_and_txt_files = find_files_by_extension('.', ['.py', '.txt', '.md'])
15
for file in python_and_txt_files:
16
    print(file)

高级用法示例#

4. 控制遍历顺序（topdown参数）#

1
import os
2

3
print("自上而下遍历 (默认):")
4
for root, dirs, files in os.walk('.', topdown=True):
5
    print(f"访问: {root}")
6

7
print("\n自下而上遍历:")
8
for root, dirs, files in os.walk('.', topdown=False):
9
    print(f"访问: {root}")

5. 过滤目录#

1
import os
2

3
def walk_with_filter(directory, exclude_dirs=None):
4
    """遍历时排除特定目录"""
5
    if exclude_dirs is None:
6
        exclude_dirs = ['.git', '__pycache__', 'node_modules']
7

8
    for root, dirs, files in os.walk(directory):
9
        # 从dirs中移除要排除的目录（防止os.walk进入这些目录）
10
        dirs[:] = [d for d in dirs if d not in exclude_dirs]
11

12
        print(f"当前目录: {root}")
13
        print(f"过滤后子目录: {dirs}")
14
        print(f"文件: {files}")
15
        print("-" * 50)
16

17
# 使用示例
18
walk_with_filter('.')

6. 错误处理#

1
import os
2

3
def handle_walk_error(error):
4
    """错误处理函数"""
5
    print(f"遍历错误: {error}")
6

7
# 带有错误处理的遍历
8
try:
9
    for root, dirs, files in os.walk('/some/path', onerror=handle_walk_error):
10
        print(f"处理: {root}")
11
except Exception as e:
12
    print(f"严重错误: {e}")

7. 计算目录大小#

1
import os
2

3
def get_directory_size(directory):
4
    """计算目录总大小"""
5
    total_size = 0
6
    for root, dirs, files in os.walk(directory):
7
        for file in files:
8
            file_path = os.path.join(root, file)
9
            try:
10
                total_size += os.path.getsize(file_path)
11
            except OSError:
12
                # 忽略无法访问的文件
13
                pass
14
    return total_size
15

16
# 使用示例
17
size = get_directory_size('.')
18
print(f"目录总大小: {size} bytes ({size / 1024 / 1024:.2f} MB)")

8. 查找重复文件#

1
import os
2
import hashlib
3

4
def get_file_hash(filepath):
5
    """计算文件的MD5哈希值"""
6
    hash_md5 = hashlib.md5()
7
    with open(filepath, "rb") as f:
8
        for chunk in iter(lambda: f.read(4096), b""):
9
            hash_md5.update(chunk)
10
    return hash_md5.hexdigest()
11

12
def find_duplicate_files(directory):
13
    """查找重复文件"""
14
    file_hashes = {}
15
    duplicates = []
16

17
    for root, dirs, files in os.walk(directory):
18
        for file in files:
19
            file_path = os.path.join(root, file)
20
            try:
21
                file_hash = get_file_hash(file_path)
22
                if file_hash in file_hashes:
23
                    duplicates.append((file_path, file_hashes[file_hash]))
24
                else:
25
                    file_hashes[file_hash] = file_path
26
            except (OSError, IOError):
27
                continue
28

29
    return duplicates
30

31
# 使用示例
32
duplicates = find_duplicate_files('.')
33
for dup in duplicates:
34
    print(f"重复文件: {dup[0]} 和 {dup[1]}")

9. 文件统计#

1
import os
2

3
def analyze_directory(directory):
4
    """分析目录结构"""
5
    stats = {
6
        'total_dirs': 0,
7
        'total_files': 0,
8
        'file_types': {},
9
        'largest_file': ('', 0)
10
    }
11

12
    for root, dirs, files in os.walk(directory):
13
        stats['total_dirs'] += len(dirs)
14
        stats['total_files'] += len(files)
15

16
        for file in files:
17
            # 统计文件类型
18
            _, ext = os.path.splitext(file)
19
            file_type = ext.lower() if ext else '无扩展名'
20
            stats['file_types'][file_type] = stats['file_types'].get(file_type, 0) + 1
21

22
            # 查找最大文件
23
            file_path = os.path.join(root, file)
24
            try:
25
                file_size = os.path.getsize(file_path)
26
                if file_size > stats['largest_file'][1]:
27
                    stats['largest_file'] = (file_path, file_size)
28
            except OSError:
29
                pass
30

31
    return stats
32

33
# 使用示例
34
stats = analyze_directory('.')
35
print(f"总目录数: {stats['total_dirs']}")
36
print(f"总文件数: {stats['total_files']}")
37
print("文件类型分布:")
38
for file_type, count in stats['file_types'].items():
39
    print(f"  {file_type}: {count}")
40
print(f"最大文件: {stats['largest_file'][0]} ({stats['largest_file'][1]} bytes)")

10. 批量重命名文件#

1
import os
2

3
def rename_files_in_directory(directory, old_ext, new_ext):
4
    """批量重命名指定扩展名的文件"""
5
    renamed_count = 0
6

7
    for root, dirs, files in os.walk(directory):
8
        for file in files:
9
            if file.endswith(old_ext):
10
                old_path = os.path.join(root, file)
11
                new_file = file.replace(old_ext, new_ext)
12
                new_path = os.path.join(root, new_file)
13

14
                try:
15
                    os.rename(old_path, new_path)
16
                    print(f"重命名: {old_path} -> {new_path}")
17
                    renamed_count += 1
18
                except OSError as e:
19
                    print(f"重命名失败 {old_path}: {e}")
20

21
    return renamed_count
22

23
# 使用示例：将所有的.txt文件改为.md
24
# renamed = rename_files_in_directory('.', '.txt', '.md')
25
# print(f"重命名了 {renamed} 个文件")

注意事项#

性能考虑：对于非常大的目录树，os.walk() 可能会消耗较多内存
权限问题：可能会遇到权限不足无法访问的目录
符号链接：默认不跟随符号链接，设置 followlinks=True 可以改变此行为
实时修改：在遍历过程中修改 dirs 列表可以控制遍历行为

os.walk() 是文件系统操作中非常有用的工具，特别适合需要递归处理目录结构的场景。

音乐

音乐

基本语法#

返回值说明#

基础用法示例#

1. 基本遍历#

2. 获取所有文件路径#

3. 查找特定类型文件#

高级用法示例#

4. 控制遍历顺序（topdown参数）#

5. 过滤目录#

6. 错误处理#

7. 计算目录大小#

8. 查找重复文件#

9. 文件统计#

10. 批量重命名文件#

注意事项#

支持与分享

评论区

音乐

目录