Files
linux-practice/COURSE_TASKS.json

2040 lines
83 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"meta": {
"version": "4.1",
"title": "Linux 系统学习课程(运维全场景版)",
"author": "OpenClaw Dev",
"updated": "2026-03-10",
"description": "强调知识理解、场景迁移与运维全场景覆盖的 Linux 学习课程",
"module_count": 10,
"total_lessons": 30,
"total_exercises": 90,
"pedagogy": "learning-first",
"orientation": "ops-full-scenarios",
"source_style": "classic-linux-textbook-inspired"
},
"modules": [
{
"id": "module_1_foundation",
"title": "模块 1建立 Linux 基本认知",
"summary": "先理解终端、目录、路径和最基础命令,建立 Linux 使用的空间感。",
"lessons": [
{
"id": "m1_l1_pwd",
"title": "认识当前目录pwd",
"goal": "理解当前工作目录的意义,知道自己在文件系统中的位置。",
"why_it_matters": "很多 Linux 操作依赖路径。如果不知道自己当前在哪,后续命令容易出错。",
"concepts": [
"当前工作目录",
"绝对路径与相对路径",
"为什么要先定位再操作"
],
"command": "pwd",
"examples": [
"pwd",
"cd /tmp && pwd"
],
"pitfalls": [
"以为终端默认总在同一个目录",
"不分清当前目录和目标目录"
],
"scenarios": [
"切目录后确认自己到了哪里",
"写脚本前确认当前运行位置"
],
"exercises": [
{
"id": "m1_l1_e1",
"type": "understanding",
"question": "查看当前工作目录应该使用什么命令?",
"answer": "pwd"
},
{
"id": "m1_l1_e2",
"type": "operation",
"title": "输出当前目录",
"hint": "直接输入 pwd",
"success_test": "cmd == 'pwd'",
"solution": [
"pwd"
],
"success_msg": "你已经能确认自己所在的位置了。"
},
{
"id": "m1_l1_e3",
"type": "scenario",
"question": "如果你不确定自己当前在哪个目录,第一反应应该做什么?",
"answer": "先执行 pwd 确认当前目录"
}
],
"related_commands": [
"pwd"
],
"classic_view": "教材视角Linux 入门首先不是背命令,而是建立“目录、路径、文件”这套基础空间感。",
"takeaways": [
"学完后应能做到:理解当前工作目录的意义,知道自己在文件系统中的位置。",
"易错提醒:以为终端默认总在同一个目录",
"迁移场景:切目录后确认自己到了哪里"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 pwd 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m1_l2_ls",
"title": "看见目录内容ls",
"goal": "理解 ls 的作用,并掌握查看隐藏文件和详细信息的基本方式。",
"why_it_matters": "Linux 下很多探索行为都从 ls 开始,它决定你如何观察目录结构。",
"concepts": [
"目录内容查看",
"隐藏文件",
"长列表信息"
],
"command": "ls",
"examples": [
"ls",
"ls -la",
"ls -lh /etc"
],
"pitfalls": [
"误以为 ls 看不到的文件就不存在",
"不会区分普通 ls 和 ls -l 的用途"
],
"scenarios": [
"排查目录里到底有哪些文件",
"检查配置目录中是否有隐藏文件"
],
"exercises": [
{
"id": "m1_l2_e1",
"type": "understanding",
"question": "为什么 ls -a 会比 ls 多看到一些文件?",
"answer": "因为它会显示隐藏文件,包括以点开头的文件"
},
{
"id": "m1_l2_e2",
"type": "operation",
"title": "列出当前目录内容",
"hint": "输入 ls",
"success_test": "cmd == 'ls'",
"solution": [
"ls"
],
"success_msg": "你已经会观察目录内容了。"
},
{
"id": "m1_l2_e3",
"type": "operation",
"title": "显示隐藏文件和详细信息",
"hint": "使用 ls -la",
"success_test": "cmd == 'ls -la' or cmd == 'ls -al'",
"solution": [
"ls -la",
"ls -al"
],
"success_msg": "你已经会用更完整的方式查看目录了。"
}
],
"related_commands": [
"ls"
],
"classic_view": "教材视角Linux 入门首先不是背命令,而是建立“目录、路径、文件”这套基础空间感。",
"takeaways": [
"学完后应能做到:理解 ls 的作用,并掌握查看隐藏文件和详细信息的基本方式。",
"易错提醒:误以为 ls 看不到的文件就不存在",
"迁移场景:排查目录里到底有哪些文件"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 ls 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m1_l3_cd_cat_echo",
"title": "移动、读文件、输出文本",
"goal": "掌握 cd、cat、echo 这些最基础但最常用的命令。",
"why_it_matters": "这三个命令几乎贯穿 Linux 入门阶段的所有练习。",
"concepts": [
"切换目录",
"读取文件",
"输出文本与变量"
],
"command": "cd / cat / echo",
"examples": [
"cd /tmp",
"cat /etc/hosts",
"echo Hello Linux"
],
"pitfalls": [
"把 cd 和 ls 混用",
"用 cat 去看过大的文件",
"不知道 echo 也常用于脚本调试"
],
"scenarios": [
"进入指定目录继续操作",
"快速读取配置文件",
"验证变量和命令输出"
],
"exercises": [
{
"id": "m1_l3_e1",
"type": "operation",
"title": "进入 /tmp 目录",
"hint": "cd /tmp",
"success_test": "cmd == 'cd /tmp' and cwd == '/tmp'",
"solution": [
"cd /tmp"
],
"success_msg": "你已经能切换到目标目录了。"
},
{
"id": "m1_l3_e2",
"type": "operation",
"title": "读取 hosts 文件",
"hint": "cat /etc/hosts",
"success_test": "cmd == 'cat /etc/hosts' and 'localhost' in output",
"solution": [
"cat /etc/hosts"
],
"success_msg": "你已经会读取基础文本文件了。"
},
{
"id": "m1_l3_e3",
"type": "operation",
"title": "输出 Hello Linux",
"hint": "echo Hello Linux",
"success_test": "cmd == 'echo Hello Linux' and 'Hello Linux' in output",
"solution": [
"echo Hello Linux"
],
"success_msg": "你已经掌握了最基础的文本输出命令。"
}
],
"related_commands": [
"cd",
"cat",
"echo"
],
"classic_view": "教材视角Linux 入门首先不是背命令,而是建立“目录、路径、文件”这套基础空间感。",
"takeaways": [
"学完后应能做到:掌握 cd、cat、echo 这些最基础但最常用的命令。",
"易错提醒:把 cd 和 ls 混用",
"迁移场景:进入指定目录继续操作"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 cd、cat、echo 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
},
{
"id": "module_2_filesystem",
"title": "模块 2文件与目录操作",
"summary": "围绕创建、复制、移动、删除和查看文件属性建立文件系统操作能力。",
"lessons": [
{
"id": "m2_l1_create",
"title": "创建文件与目录mkdir / touch",
"goal": "理解目录和文件的创建逻辑,学会递归创建多级目录。",
"why_it_matters": "很多项目初始化、环境准备都从创建目录结构开始。",
"concepts": [
"目录创建",
"多级目录",
"空文件创建"
],
"command": "mkdir / touch",
"examples": [
"mkdir demo",
"mkdir -p /tmp/a/b/c",
"touch notes.txt"
],
"pitfalls": [
"忘记使用 -p 创建多级目录",
"目标父目录不存在时 touch 失败"
],
"scenarios": [
"初始化项目目录结构",
"创建占位文件和日志文件"
],
"exercises": [
{
"id": "m2_l1_e1",
"type": "operation",
"title": "递归创建目录",
"hint": "mkdir -p /tmp/a/b/c",
"success_test": "cmd == 'mkdir -p /tmp/a/b/c' and exists('/tmp/a/b/c')",
"solution": [
"mkdir -p /tmp/a/b/c"
],
"success_msg": "多级目录创建成功。"
},
{
"id": "m2_l1_e2",
"type": "operation",
"title": "创建空文件",
"hint": "touch /tmp/a/b/c/readme.txt",
"success_test": "cmd == 'touch /tmp/a/b/c/readme.txt' and exists('/tmp/a/b/c/readme.txt')",
"solution": [
"touch /tmp/a/b/c/readme.txt"
],
"success_msg": "空文件创建成功。"
},
{
"id": "m2_l1_e3",
"type": "scenario",
"question": "为什么 mkdir -p 适合项目初始化?",
"answer": "因为它可以一次创建多级目录,即使上层目录不存在也能自动补齐"
}
],
"related_commands": [
"mkdir",
"touch"
],
"classic_view": "教材视角:文件系统操作是 Linux 使用的基本手艺,关键不只是会敲命令,而是知道每一步在改变什么。",
"takeaways": [
"学完后应能做到:理解目录和文件的创建逻辑,学会递归创建多级目录。",
"易错提醒:忘记使用 -p 创建多级目录",
"迁移场景:初始化项目目录结构"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 mkdir、touch 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m2_l2_move_copy_delete",
"title": "复制、移动与删除cp / mv / rm",
"goal": "理解文件操作中的备份、迁移、重命名和清理。",
"why_it_matters": "日常 Linux 使用里最常见的就是处理文件的生命周期。",
"concepts": [
"复制与备份",
"移动与重命名",
"删除风险"
],
"command": "cp / mv / rm",
"examples": [
"cp /etc/hosts /tmp/hosts.bak",
"mv old.txt new.txt",
"rm -r /tmp/testdir"
],
"pitfalls": [
"把删除当成移动",
"对目录使用 cp 却忘记 -r",
"rm -rf 风险极高"
],
"scenarios": [
"做配置备份",
"整理日志文件",
"清理无用目录"
],
"exercises": [
{
"id": "m2_l2_e1",
"type": "operation",
"title": "复制 hosts 文件",
"hint": "cp /etc/hosts /tmp/hosts.bak",
"success_test": "cmd == 'cp /etc/hosts /tmp/hosts.bak' and exists('/tmp/hosts.bak')",
"solution": [
"cp /etc/hosts /tmp/hosts.bak"
],
"success_msg": "文件备份成功。"
},
{
"id": "m2_l2_e2",
"type": "operation",
"title": "重命名备份文件",
"hint": "mv /tmp/hosts.bak /tmp/hosts.backup",
"success_test": "cmd == 'mv /tmp/hosts.bak /tmp/hosts.backup' and exists('/tmp/hosts.backup')",
"solution": [
"mv /tmp/hosts.bak /tmp/hosts.backup"
],
"success_msg": "文件重命名成功。"
},
{
"id": "m2_l2_e3",
"type": "understanding",
"question": "为什么 rm -rf 是高风险命令?",
"answer": "因为它会递归并强制删除文件和目录,执行错误会造成不可恢复的数据丢失"
}
],
"related_commands": [
"cp",
"mv",
"rm"
],
"classic_view": "教材视角:文件系统操作是 Linux 使用的基本手艺,关键不只是会敲命令,而是知道每一步在改变什么。",
"takeaways": [
"学完后应能做到:理解文件操作中的备份、迁移、重命名和清理。",
"易错提醒:把删除当成移动",
"迁移场景:做配置备份"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 cp、mv、rm 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m2_l3_stat_permissions",
"title": "认识文件属性stat 与权限基础",
"goal": "开始理解文件属性和权限表达。",
"why_it_matters": "文件权限是 Linux 系统安全和协作的重要基础。",
"concepts": [
"文件元信息",
"权限三元组",
"目录与文件权限差异"
],
"command": "stat / chmod",
"examples": [
"stat /etc/hosts",
"chmod 755 script.sh",
"chmod +x run.sh"
],
"pitfalls": [
"不了解 755 / 644 的含义",
"给不该执行的文件随意加执行权限"
],
"scenarios": [
"检查脚本是否可执行",
"排查权限导致的运行失败"
],
"exercises": [
{
"id": "m2_l3_e1",
"type": "operation",
"title": "查看 hosts 属性",
"hint": "stat /etc/hosts",
"success_test": "cmd == 'stat /etc/hosts' and 'File:' in output",
"solution": [
"stat /etc/hosts"
],
"success_msg": "你已经会查看文件属性了。"
},
{
"id": "m2_l3_e2",
"type": "understanding",
"question": "755 和 644 最核心的区别是什么?",
"answer": "755 允许拥有者读写执行其他人读执行644 没有执行权限"
},
{
"id": "m2_l3_e3",
"type": "operation",
"title": "给文件添加执行权限",
"hint": "chmod +x /tmp/a/b/c/readme.txt",
"success_test": "cmd == 'chmod +x /tmp/a/b/c/readme.txt'",
"solution": [
"chmod +x /tmp/a/b/c/readme.txt"
],
"success_msg": "你已经完成了权限修改练习。"
}
],
"related_commands": [
"stat",
"chmod"
],
"classic_view": "教材视角:文件系统操作是 Linux 使用的基本手艺,关键不只是会敲命令,而是知道每一步在改变什么。",
"takeaways": [
"学完后应能做到:开始理解文件属性和权限表达。",
"易错提醒:不了解 755 / 644 的含义",
"迁移场景:检查脚本是否可执行"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 stat、chmod 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
},
{
"id": "module_3_searching",
"title": "模块 3阅读与筛选信息",
"summary": "把 Linux 当成信息检索工具来学,围绕日志、配置和统计建立阅读能力。",
"lessons": [
{
"id": "m3_l1_read_logs",
"title": "看文件头尾head / tail",
"goal": "学会快速读取大文件的局部内容。",
"why_it_matters": "日志通常很大,不可能总是整份去看。",
"concepts": [
"查看前几行",
"查看后几行",
"实时追踪"
],
"command": "head / tail",
"examples": [
"head -n 5 /var/log/syslog",
"tail -n 20 /var/log/syslog",
"tail -f /var/log/syslog"
],
"pitfalls": [
"大文件直接 cat 影响阅读效率",
"不会区分查看历史和跟踪新增日志"
],
"scenarios": [
"看配置文件开头",
"盯日志尾部排查实时错误"
],
"exercises": [
{
"id": "m3_l1_e1",
"type": "operation",
"title": "查看 syslog 前 5 行",
"hint": "head -n 5 /var/log/syslog",
"success_test": "(cmd == 'head -n 5 /var/log/syslog' or cmd == 'head -5 /var/log/syslog') and len(output.split('\\n')) >= 5",
"solution": [
"head -n 5 /var/log/syslog",
"head -5 /var/log/syslog"
],
"success_msg": "你已经会局部查看大文件开头了。"
},
{
"id": "m3_l1_e2",
"type": "operation",
"title": "查看 syslog 最后 3 行",
"hint": "tail -n 3 /var/log/syslog",
"success_test": "(cmd == 'tail -n 3 /var/log/syslog' or cmd == 'tail -3 /var/log/syslog') and len(output.split('\\n')) >= 3",
"solution": [
"tail -n 3 /var/log/syslog",
"tail -3 /var/log/syslog"
],
"success_msg": "你已经会快速查看日志尾部了。"
},
{
"id": "m3_l1_e3",
"type": "scenario",
"question": "为什么排查线上报错时更常先用 tail 而不是 cat",
"answer": "因为日志通常很大tail 可以更快聚焦最近发生的问题"
}
],
"related_commands": [
"head",
"tail"
],
"classic_view": "教材视角日志与文本处理是运维的核心阅读能力grep / find / tail 不是零散命令,而是一套信息筛选工具链。",
"takeaways": [
"学完后应能做到:学会快速读取大文件的局部内容。",
"易错提醒:大文件直接 cat 影响阅读效率",
"迁移场景:看配置文件开头"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 head、tail 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m3_l2_grep",
"title": "关键词搜索grep",
"goal": "理解 grep 作为日志排障和文本定位核心工具的价值。",
"why_it_matters": "没有 grep查日志和配置会慢很多。",
"concepts": [
"大小写忽略",
"显示行号",
"反向匹配",
"递归搜索"
],
"command": "grep",
"examples": [
"grep error /var/log/syslog",
"grep -in root /etc/passwd",
"grep -v nologin /etc/passwd"
],
"pitfalls": [
"不会结合 -n 定位行号",
"不知道 -i 和 -v 的常见用途"
],
"scenarios": [
"查错误日志",
"找配置项",
"过滤无效行"
],
"exercises": [
{
"id": "m3_l2_e1",
"type": "operation",
"title": "查找 syslog 中的 error",
"hint": "grep error /var/log/syslog",
"success_test": "cmd == 'grep error /var/log/syslog' and 'error' in output.lower()",
"solution": [
"grep error /var/log/syslog"
],
"success_msg": "你已经会在日志里搜关键词了。"
},
{
"id": "m3_l2_e2",
"type": "operation",
"title": "忽略大小写搜索 root",
"hint": "grep -i root /etc/passwd",
"success_test": "cmd == 'grep -i root /etc/passwd'",
"solution": [
"grep -i root /etc/passwd"
],
"success_msg": "你已经知道如何处理大小写差异了。"
},
{
"id": "m3_l2_e3",
"type": "understanding",
"question": "grep -n 的意义是什么?",
"answer": "显示匹配结果所在的行号,方便快速定位原文位置"
}
],
"related_commands": [
"grep"
],
"classic_view": "教材视角日志与文本处理是运维的核心阅读能力grep / find / tail 不是零散命令,而是一套信息筛选工具链。",
"takeaways": [
"学完后应能做到:理解 grep 作为日志排障和文本定位核心工具的价值。",
"易错提醒:不会结合 -n 定位行号",
"迁移场景:查错误日志"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 grep 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m3_l3_find_wc_sort",
"title": "查找与统计find / wc / sort",
"goal": "建立查找文件和做基础统计的能力。",
"why_it_matters": "Linux 的很多效率来自组合式查找与统计。",
"concepts": [
"按名称查找",
"行数字数统计",
"排序输出"
],
"command": "find / wc / sort",
"examples": [
"find /etc -name '*.conf'",
"wc -l /var/log/syslog",
"ls | sort"
],
"pitfalls": [
"把 find 和 grep 混淆",
"不会根据任务选文件查找还是内容查找"
],
"scenarios": [
"找配置文件",
"统计日志行数",
"整理输出结果"
],
"exercises": [
{
"id": "m3_l3_e1",
"type": "operation",
"title": "查找 /etc 下所有 .conf 文件",
"hint": "find /etc -name '*.conf'",
"success_test": "cmd == \"find /etc -name '*.conf'\" and '.conf' in output",
"solution": [
"find /etc -name '*.conf'"
],
"success_msg": "你已经会用 find 定位文件了。"
},
{
"id": "m3_l3_e2",
"type": "operation",
"title": "统计 syslog 行数",
"hint": "wc -l /var/log/syslog",
"success_test": "cmd == 'wc -l /var/log/syslog' and output.strip().isdigit()",
"solution": [
"wc -l /var/log/syslog"
],
"success_msg": "你已经会做基础统计了。"
},
{
"id": "m3_l3_e3",
"type": "understanding",
"question": "找文件位置应该优先想到 find 还是 grep为什么",
"answer": "优先用 find因为这是文件定位问题不是文件内容搜索问题"
}
],
"related_commands": [
"find",
"wc",
"sort"
],
"classic_view": "教材视角日志与文本处理是运维的核心阅读能力grep / find / tail 不是零散命令,而是一套信息筛选工具链。",
"takeaways": [
"学完后应能做到:建立查找文件和做基础统计的能力。",
"易错提醒:把 find 和 grep 混淆",
"迁移场景:找配置文件"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 find、wc、sort 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
},
{
"id": "module_4_system_state",
"title": "模块 4系统状态与资源认知",
"summary": "学习如何看进程、负载、磁盘、内存和挂载信息,建立系统状态判断能力。",
"lessons": [
{
"id": "m4_l1_process",
"title": "看进程ps / top",
"goal": "理解 Linux 中的进程概念,知道如何查看系统正在运行什么。",
"why_it_matters": "绝大多数服务故障、卡顿和异常都要先看进程。",
"concepts": [
"进程与服务",
"前台与后台",
"ps 和 top 的区别"
],
"command": "ps / top",
"examples": [
"ps aux",
"ps -ef",
"top"
],
"pitfalls": [
"只会看进程名,不会看状态",
"把存在进程等同于服务可用"
],
"scenarios": [
"确认服务进程是否存在",
"定位高 CPU 进程"
],
"exercises": [
{
"id": "m4_l1_e1",
"type": "operation",
"title": "查看所有进程",
"hint": "ps aux",
"success_test": "cmd == 'ps aux' and 'PID' in output",
"solution": [
"ps aux"
],
"success_msg": "你已经会查看系统进程了。"
},
{
"id": "m4_l1_e2",
"type": "understanding",
"question": "为什么看到进程存在,不代表服务一定可用?",
"answer": "因为进程存在只说明程序在运行,不代表端口监听、配置、依赖或接口一定正常"
},
{
"id": "m4_l1_e3",
"type": "scenario",
"question": "排查“服务似乎没启动”时,第一步通常可以用什么命令?",
"answer": "先用 ps aux 或 ps -ef 查看相关进程是否存在"
}
],
"related_commands": [
"ps",
"top"
],
"classic_view": "教材视角:系统状态认知是运维的基本盘,先学会“看懂机器”,再谈优化和修复。",
"takeaways": [
"学完后应能做到:理解 Linux 中的进程概念,知道如何查看系统正在运行什么。",
"易错提醒:只会看进程名,不会看状态",
"迁移场景:确认服务进程是否存在"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 ps、top 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m4_l2_disk_memory",
"title": "看磁盘与内存df / du / free",
"goal": "掌握查看磁盘使用、目录占用和内存情况的基础方法。",
"why_it_matters": "磁盘爆满、内存紧张是最常见的线上问题之一。",
"concepts": [
"磁盘空间 vs 目录占用",
"物理内存与可用内存",
"df 和 du 的区别"
],
"command": "df / du / free",
"examples": [
"df -h",
"du -sh /var/log",
"free -h"
],
"pitfalls": [
"只会看总磁盘,不会看哪个目录占用大",
"误把 free 的 used 当成唯一关键指标"
],
"scenarios": [
"排查磁盘已满",
"定位大目录",
"查看内存是否紧张"
],
"exercises": [
{
"id": "m4_l2_e1",
"type": "operation",
"title": "查看磁盘空间",
"hint": "df -h",
"success_test": "cmd == 'df -h' and 'Filesystem' in output",
"solution": [
"df -h"
],
"success_msg": "你已经会看磁盘使用情况了。"
},
{
"id": "m4_l2_e2",
"type": "operation",
"title": "查看 /sandbox 目录大小",
"hint": "du -sh /sandbox",
"success_test": "cmd == 'du -sh /sandbox' and '/sandbox' in output",
"solution": [
"du -sh /sandbox"
],
"success_msg": "你已经会看目录占用了。"
},
{
"id": "m4_l2_e3",
"type": "understanding",
"question": "df 和 du 的核心区别是什么?",
"answer": "df 看文件系统层面的磁盘使用du 看目录或文件占用大小"
}
],
"related_commands": [
"df",
"du",
"free"
],
"classic_view": "教材视角:系统状态认知是运维的基本盘,先学会“看懂机器”,再谈优化和修复。",
"takeaways": [
"学完后应能做到:掌握查看磁盘使用、目录占用和内存情况的基础方法。",
"易错提醒:只会看总磁盘,不会看哪个目录占用大",
"迁移场景:排查磁盘已满"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 df、du、free 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m4_l3_mount_history",
"title": "运行时间、挂载点与历史命令",
"goal": "建立系统运行时间、挂载结构与命令习惯认知。",
"why_it_matters": "理解机器运行了多久、磁盘挂载在哪里、最近执行过什么命令,是运维日常的基础信息。",
"concepts": [
"uptime 的含义",
"挂载点",
"history 复盘"
],
"command": "uptime / mount / history",
"examples": [
"uptime",
"mount",
"history -n 5"
],
"pitfalls": [
"不看历史重复犯错",
"忽略挂载点导致排查路径错位"
],
"scenarios": [
"查看机器是否重启过",
"判断目录属于哪个挂载点",
"复盘最近操作"
],
"exercises": [
{
"id": "m4_l3_e1",
"type": "operation",
"title": "查看系统运行时间",
"hint": "uptime",
"success_test": "cmd == 'uptime' and 'load average' in output",
"solution": [
"uptime"
],
"success_msg": "你已经会看系统运行时间和负载了。"
},
{
"id": "m4_l3_e2",
"type": "operation",
"title": "查看最近命令历史",
"hint": "history -n 5",
"success_test": "cmd == 'history -n 5' and output != ''",
"solution": [
"history -n 5"
],
"success_msg": "你已经会利用历史命令回顾操作了。"
},
{
"id": "m4_l3_e3",
"type": "scenario",
"question": "为什么排查问题时查看 history 很有价值?",
"answer": "因为它可以帮助回溯最近做过什么操作,快速定位变更和可能的触发点"
}
],
"related_commands": [
"uptime",
"mount",
"history"
],
"classic_view": "教材视角:系统状态认知是运维的基本盘,先学会“看懂机器”,再谈优化和修复。",
"takeaways": [
"学完后应能做到:建立系统运行时间、挂载结构与命令习惯认知。",
"易错提醒:不看历史重复犯错",
"迁移场景:查看机器是否重启过"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 uptime、mount、history 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
},
{
"id": "module_5_service_logs",
"title": "模块 5服务与日志排障",
"summary": "围绕服务启动、运行状态、日志报错和后台执行建立排障链路。",
"lessons": [
{
"id": "m5_l1_systemctl",
"title": "服务管理systemctl 基础",
"goal": "理解 Linux 服务的查看、启动、停止和重启。",
"why_it_matters": "现代 Linux 发行版大量使用 systemd 管理服务。",
"concepts": [
"服务状态",
"启动与重启",
"systemd 基础"
],
"command": "systemctl",
"examples": [
"systemctl status nginx",
"systemctl restart nginx",
"systemctl enable nginx",
"systemctl status app.service",
"systemctl restart app.service",
"systemctl is-enabled nginx"
],
"pitfalls": [
"改完配置却忘记重启服务",
"只看页面,不看服务状态",
"把 restart 当成排障终点,而不是排查起点"
],
"scenarios": [
"排查服务没起来",
"改配置后让服务生效",
"配置变更后重新加载服务",
"确认服务是否开机自启"
],
"exercises": [
{
"id": "m5_l1_e1",
"type": "understanding",
"question": "为什么改完服务配置后常常要 restart 或 reload",
"answer": "因为配置文件变化不会自动生效,需要让服务重新加载配置"
},
{
"id": "m5_l1_e2",
"type": "scenario",
"question": "排查“网站打不开”时,为什么应该先看 systemctl status",
"answer": "因为要先确认服务是否真的在运行,以及是否有明显启动失败信息"
},
{
"id": "m5_l1_e3",
"type": "understanding",
"question": "enable 和 start 的区别是什么?",
"answer": "start 是当前立即启动enable 是设置开机自动启动"
}
],
"related_commands": [
"systemctl"
],
"classic_view": "教材视角:服务排障要形成链路思维——状态、进程、端口、日志、请求,要分层观察。",
"takeaways": [
"学完后应能做到:理解 Linux 服务的查看、启动、停止和重启。",
"易错提醒:改完配置却忘记重启服务",
"迁移场景:排查服务没起来",
"服务问题先看状态,再决定下一步看日志、端口还是配置。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 systemctl 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m5_l2_journalctl",
"title": "看系统日志journalctl",
"goal": "理解如何查看服务日志和系统日志。",
"why_it_matters": "很多 systemd 管理的服务排障入口就是 journalctl。",
"concepts": [
"单服务日志",
"最近日志",
"实时跟踪日志"
],
"command": "journalctl",
"examples": [
"journalctl -u nginx -n 50",
"journalctl -xe",
"journalctl -u app -f",
"journalctl -u app.service -n 100",
"journalctl -u nginx -f",
"journalctl --since today"
],
"pitfalls": [
"只看应用日志,不看 systemd 日志",
"看太多日志却抓不到最近错误",
"不限定服务名导致日志范围过大,难以定位"
],
"scenarios": [
"查看服务启动失败原因",
"查看最近报错",
"定位服务启动失败的关键报错",
"观察重启前后日志变化"
],
"exercises": [
{
"id": "m5_l2_e1",
"type": "understanding",
"question": "为什么 journalctl 对 systemd 服务排障特别重要?",
"answer": "因为它能直接查看服务生命周期和 systemd 记录的日志"
},
{
"id": "m5_l2_e2",
"type": "scenario",
"question": "服务启动失败后,下一步除了看 status 还应该看什么?",
"answer": "看 journalctl -u 服务名 的日志,确认具体报错"
},
{
"id": "m5_l2_e3",
"type": "understanding",
"question": "为什么实时排查时常用 -f",
"answer": "因为 -f 可以持续跟踪新增日志,适合边操作边观察"
}
],
"related_commands": [
"journalctl"
],
"classic_view": "教材视角:服务排障要形成链路思维——状态、进程、端口、日志、请求,要分层观察。",
"takeaways": [
"学完后应能做到:理解如何查看服务日志和系统日志。",
"易错提醒:只看应用日志,不看 systemd 日志",
"迁移场景:查看服务启动失败原因",
"日志不是越多越好,关键是缩小范围看最近、看目标服务。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 journalctl 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m5_l3_process_control",
"title": "进程控制kill / pkill / nohup",
"goal": "理解如何控制进程和让任务脱离终端运行。",
"why_it_matters": "部署、排障和临时任务常会遇到进程管理问题。",
"concepts": [
"发送信号",
"按名称结束进程",
"后台运行"
],
"command": "kill / pkill / nohup",
"examples": [
"kill 1234",
"pkill nginx",
"nohup python3 app.py &",
"kill -9 1234",
"pkill -f python",
"nohup bash backup.sh &"
],
"pitfalls": [
"直接粗暴 kill 掉关键进程",
"不知道后台任务输出去哪了",
"不了解信号差异就直接使用 -9"
],
"scenarios": [
"结束卡死进程",
"让脚本后台运行",
"结束僵死任务",
"让临时脚本脱离终端继续执行"
],
"exercises": [
{
"id": "m5_l3_e1",
"type": "understanding",
"question": "为什么 kill 进程前要先确认 PID 和进程身份?",
"answer": "因为误杀错误进程可能导致服务中断或数据问题"
},
{
"id": "m5_l3_e2",
"type": "understanding",
"question": "nohup 的作用是什么?",
"answer": "让命令在退出终端后继续运行,适合后台任务"
},
{
"id": "m5_l3_e3",
"type": "scenario",
"question": "如果你想让一个脚本关掉 SSH 后仍然继续跑,应该想到什么?",
"answer": "使用 nohup 或其他后台运行方式"
}
],
"related_commands": [
"kill",
"pkill",
"nohup"
],
"classic_view": "教材视角:服务排障要形成链路思维——状态、进程、端口、日志、请求,要分层观察。",
"takeaways": [
"学完后应能做到:理解如何控制进程和让任务脱离终端运行。",
"易错提醒:直接粗暴 kill 掉关键进程",
"迁移场景:结束卡死进程",
"进程控制的重点是知道为什么结束、结束谁、结束后系统会怎样。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 kill、pkill、nohup 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
},
{
"id": "module_6_network",
"title": "模块 6网络与连接排查",
"summary": "建立 IP、端口、监听、连通性和请求验证等运维网络基础。",
"lessons": [
{
"id": "m6_l1_ip_ping",
"title": "网络基础ip addr / ifconfig / ping",
"goal": "理解网卡、IP 和连通性的基本概念。",
"why_it_matters": "服务是否可达,首先是网络问题还是应用问题,需要先分清。",
"concepts": [
"网卡",
"IP 地址",
"连通性测试"
],
"command": "ip addr / ifconfig / ping",
"examples": [
"ip addr",
"ifconfig",
"ping 127.0.0.1",
"ip addr show eth0",
"ping 192.168.1.1",
"ping -c 4 example.com"
],
"pitfalls": [
"能 ping 通就以为服务一定可用",
"只会看 IP不理解监听端口",
"把 DNS 解析失败误判成网络完全不通"
],
"scenarios": [
"确认机器是否有正确 IP",
"测试目标是否能连通",
"确认目标机器有无 IP",
"初步判断网络层是否通"
],
"exercises": [
{
"id": "m6_l1_e1",
"type": "operation",
"title": "查看网卡地址",
"hint": "ip addr",
"success_test": "cmd == 'ip addr' and 'inet' in output",
"solution": [
"ip addr"
],
"success_msg": "你已经会看基本网卡信息了。"
},
{
"id": "m6_l1_e2",
"type": "operation",
"title": "测试本机回环连通性",
"hint": "ping 127.0.0.1",
"success_test": "cmd == 'ping 127.0.0.1' and 'packet loss' in output",
"solution": [
"ping 127.0.0.1"
],
"success_msg": "你已经做了一次基础连通性验证。"
},
{
"id": "m6_l1_e3",
"type": "understanding",
"question": "为什么 ping 通不等于服务一定可用?",
"answer": "因为 ping 只说明网络层连通,不代表应用端口和接口层面正常"
}
],
"related_commands": [
"ip addr",
"ifconfig",
"ping"
],
"classic_view": "教材视角:网络问题最怕混层,学习要区分链路、端口、协议、请求,不要一股脑都归为“网络不通”。",
"takeaways": [
"学完后应能做到理解网卡、IP 和连通性的基本概念。",
"易错提醒:能 ping 通就以为服务一定可用",
"迁移场景:确认机器是否有正确 IP",
"网络排查第一步是先确认链路和地址,再看更上层。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 ip addr、ifconfig、ping 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m6_l2_ss_curl",
"title": "端口与请求ss / netstat / curl / wget",
"goal": "建立监听端口和服务请求验证的能力。",
"why_it_matters": "运维排障很多时候要回答两个问题:端口开没开?接口通不通?",
"concepts": [
"端口监听",
"TCP 层可达性",
"HTTP 请求验证"
],
"command": "ss / netstat / curl / wget",
"examples": [
"ss -ltnp",
"netstat -tunlp",
"curl http://127.0.0.1:8080/health",
"ss -ltnp | grep 80",
"curl -I http://127.0.0.1",
"wget http://127.0.0.1"
],
"pitfalls": [
"只看页面打不开,不查监听",
"只看监听,不测实际请求",
"只看 LISTEN 不看实际响应码和返回体"
],
"scenarios": [
"查服务是否监听端口",
"测试接口是否返回 200",
"确认 Web 服务是否监听 80 端口",
"确认 HTTP 健康检查是否正常"
],
"exercises": [
{
"id": "m6_l2_e1",
"type": "operation",
"title": "查看监听端口",
"hint": "ss -ltnp",
"success_test": "cmd == 'ss -ltnp' and 'LISTEN' in output",
"solution": [
"ss -ltnp"
],
"success_msg": "你已经会看监听端口了。"
},
{
"id": "m6_l2_e2",
"type": "operation",
"title": "请求本地页面",
"hint": "curl http://127.0.0.1",
"success_test": "cmd == 'curl http://127.0.0.1' and '<html>' in output",
"solution": [
"curl http://127.0.0.1"
],
"success_msg": "你已经会做基本 HTTP 探测了。"
},
{
"id": "m6_l2_e3",
"type": "scenario",
"question": "排查“服务起了但访问失败”时,为什么要同时看 ss 和 curl",
"answer": "因为 ss 看端口监听curl 看应用层响应,两者结合才能判断问题在哪一层"
}
],
"related_commands": [
"ss",
"netstat",
"curl",
"wget"
],
"classic_view": "教材视角:网络问题最怕混层,学习要区分链路、端口、协议、请求,不要一股脑都归为“网络不通”。",
"takeaways": [
"学完后应能做到:建立监听端口和服务请求验证的能力。",
"易错提醒:只看页面打不开,不查监听",
"迁移场景:查服务是否监听端口",
"监听正常不代表业务正常,请求失败也不一定是服务没启动。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 ss、netstat、curl、wget 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m6_l3_name_route",
"title": "路由与名称解析traceroute / dig / whereis 命令定位",
"goal": "建立链路定位和名称解析基础认知。",
"why_it_matters": "有些故障不是服务本身坏了,而是路径或解析出了问题。",
"concepts": [
"路由路径",
"DNS 解析",
"命令位置定位"
],
"command": "traceroute / dig / which / whereis",
"examples": [
"traceroute 8.8.8.8",
"dig example.com",
"which curl",
"whereis nginx",
"dig api.example.com",
"traceroute example.com"
],
"pitfalls": [
"把 DNS 问题误判成应用问题",
"不知道命令来自哪里",
"忽略 DNS TTL 和缓存带来的影响"
],
"scenarios": [
"排查域名异常",
"确认命令路径和来源",
"排查域名切换未生效",
"确认命令实际安装位置"
],
"exercises": [
{
"id": "m6_l3_e1",
"type": "operation",
"title": "定位 curl 命令路径",
"hint": "which curl",
"success_test": "cmd == 'which curl' and '/usr/bin/curl' in output",
"solution": [
"which curl"
],
"success_msg": "你已经会定位命令路径了。"
},
{
"id": "m6_l3_e2",
"type": "understanding",
"question": "为什么 DNS 出问题时,服务本身可能没坏但用户仍然访问失败?",
"answer": "因为域名解析不到正确 IP流量根本到不了目标服务"
},
{
"id": "m6_l3_e3",
"type": "scenario",
"question": "排查“域名不通”时除了 curl还应该想到什么",
"answer": "还应该检查 dig/nslookup 和网络路径,确认是不是解析或链路问题"
}
],
"related_commands": [
"traceroute",
"dig",
"which",
"whereis"
],
"classic_view": "教材视角:网络问题最怕混层,学习要区分链路、端口、协议、请求,不要一股脑都归为“网络不通”。",
"takeaways": [
"学完后应能做到:建立链路定位和名称解析基础认知。",
"易错提醒:把 DNS 问题误判成应用问题",
"迁移场景:排查域名异常",
"命令定位、解析路径和网络链路,都是“看不见的问题”的排查入口。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 traceroute、dig、which、whereis 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
},
{
"id": "module_7_security",
"title": "模块 7权限、用户与安全基础",
"summary": "围绕用户、组、权限和高风险操作建立 Linux 安全基本认知。",
"lessons": [
{
"id": "m7_l1_users",
"title": "用户与身份whoami / id / passwd / su",
"goal": "理解当前身份、用户组和密码变更的意义。",
"why_it_matters": "你是谁、你属于谁、你能做什么,是 Linux 安全的最基础问题。",
"concepts": [
"当前用户",
"用户组",
"身份切换"
],
"command": "whoami / id / passwd / su",
"examples": [
"whoami",
"id",
"passwd",
"id sandbox_user",
"su - root",
"passwd sandbox_user"
],
"pitfalls": [
"不知道自己当前权限边界",
"以为所有命令都能执行",
"不知道服务身份和登录用户身份可能不同"
],
"scenarios": [
"确认当前身份",
"看自己属于哪些组",
"确认程序以什么身份运行",
"确认某用户是否属于目标组"
],
"exercises": [
{
"id": "m7_l1_e1",
"type": "operation",
"title": "查看当前用户",
"hint": "whoami",
"success_test": "cmd == 'whoami' and 'sandbox_user' in output",
"solution": [
"whoami"
],
"success_msg": "你已经会确认当前登录身份了。"
},
{
"id": "m7_l1_e2",
"type": "operation",
"title": "查看当前用户组信息",
"hint": "id",
"success_test": "cmd == 'id' and 'uid=' in output",
"solution": [
"id"
],
"success_msg": "你已经会查看用户与组信息了。"
},
{
"id": "m7_l1_e3",
"type": "understanding",
"question": "为什么运维排障前先确认 whoami 很重要?",
"answer": "因为不同身份决定你能看到什么、改什么,以及排障时会不会被权限挡住"
}
],
"related_commands": [
"whoami",
"id",
"passwd",
"su"
],
"classic_view": "教材视角:权限和身份是 Linux 安全边界的基础,能执行不代表应该执行。",
"takeaways": [
"学完后应能做到:理解当前身份、用户组和密码变更的意义。",
"易错提醒:不知道自己当前权限边界",
"迁移场景:确认当前身份",
"身份问题常常决定你能看什么、改什么、执行什么。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 whoami、id、passwd、su 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m7_l2_permissions",
"title": "权限控制chmod / chown / chgrp",
"goal": "理解 Linux 权限控制的基本模型和常见修改动作。",
"why_it_matters": "很多“无法访问、无法执行、无法写入”本质上都是权限问题。",
"concepts": [
"读写执行",
"拥有者与属组",
"最小权限原则"
],
"command": "chmod / chown / chgrp",
"examples": [
"chmod 644 file.txt",
"chmod +x run.sh",
"chown app:app app.log",
"chmod 644 app.conf",
"chown app:app /var/log/app.log",
"chgrp deploy script.sh"
],
"pitfalls": [
"图省事直接给 777",
"不了解属组导致协作混乱",
"只改 chmod不看属主属组"
],
"scenarios": [
"修脚本执行权限",
"调整日志文件归属",
"修复配置文件权限",
"调整日志文件归属方便服务写入"
],
"exercises": [
{
"id": "m7_l2_e1",
"type": "operation",
"title": "给文件添加执行权限",
"hint": "chmod +x /tmp/a/b/c/readme.txt",
"success_test": "cmd == 'chmod +x /tmp/a/b/c/readme.txt'",
"solution": [
"chmod +x /tmp/a/b/c/readme.txt"
],
"success_msg": "你已经会做最基础的执行权限修改。"
},
{
"id": "m7_l2_e2",
"type": "understanding",
"question": "为什么生产环境里不应该随手给 777",
"answer": "因为 777 让所有人都有读写执行权限,风险过高,容易造成安全和误操作问题"
},
{
"id": "m7_l2_e3",
"type": "scenario",
"question": "脚本提示 Permission denied 时,你会先想到什么?",
"answer": "先检查文件是否有执行权限,以及当前用户是否有访问权限"
}
],
"related_commands": [
"chmod",
"chown",
"chgrp"
],
"classic_view": "教材视角:权限和身份是 Linux 安全边界的基础,能执行不代表应该执行。",
"takeaways": [
"学完后应能做到:理解 Linux 权限控制的基本模型和常见修改动作。",
"易错提醒:图省事直接给 777",
"迁移场景:修脚本执行权限",
"权限排障常常不是只看一个数字,而是同时看权限、属主、属组和执行身份。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 chmod、chown、chgrp 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m7_l3_risk",
"title": "高风险命令与最小权限原则",
"goal": "建立运维中“能做”不等于“该做”的安全意识。",
"why_it_matters": "很多事故不是因为不会,而是因为过度权限和冒险操作。",
"concepts": [
"sudo 的边界",
"危险删除",
"最小权限"
],
"command": "sudo / rm -rf / 安全习惯",
"examples": [
"sudo systemctl restart nginx",
"rm -rf /tmp/testdir",
"rm -rf /tmp/old-release"
],
"pitfalls": [
"把 sudo 当默认前缀",
"不确认路径就执行递归删除",
"高权限操作前不做确认和备份"
],
"scenarios": [
"高权限改系统配置",
"清理目录前先确认路径",
"修改系统级配置前先评估影响",
"清理目录前先校验路径"
],
"exercises": [
{
"id": "m7_l3_e1",
"type": "understanding",
"question": "为什么最小权限原则在运维里很重要?",
"answer": "因为权限越大,误操作和被利用的风险越高,应只给完成任务所需的最小权限"
},
{
"id": "m7_l3_e2",
"type": "scenario",
"question": "执行 rm -rf 之前最应该确认什么?",
"answer": "确认目标路径是否正确,以及是否真的需要递归强制删除"
},
{
"id": "m7_l3_e3",
"type": "understanding",
"question": "为什么不应该把 sudo 当成“万能解决方案”?",
"answer": "因为它绕过权限边界,容易掩盖根因并扩大误操作风险"
}
],
"related_commands": [
"sudo",
"rm -rf",
"安全习惯"
],
"classic_view": "教材视角:权限和身份是 Linux 安全边界的基础,能执行不代表应该执行。",
"takeaways": [
"学完后应能做到:建立运维中“能做”不等于“该做”的安全意识。",
"易错提醒:把 sudo 当默认前缀",
"迁移场景:高权限改系统配置",
"高风险命令需要形成“先确认、后执行、再验证”的习惯。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 sudo、rm -rf、安全习惯 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
},
{
"id": "module_8_packages_env",
"title": "模块 8软件包、环境与命令定位",
"summary": "理解命令从哪里来、环境变量如何影响执行、软件包如何管理。",
"lessons": [
{
"id": "m8_l1_path_env",
"title": "环境变量与命令定位env / export / which / whereis",
"goal": "理解 PATH、环境变量和命令查找机制。",
"why_it_matters": "很多“命令找不到”“版本不对”“环境不生效”都和环境变量有关。",
"concepts": [
"PATH",
"环境变量",
"命令来源"
],
"command": "env / export / which / whereis",
"examples": [
"env",
"export APP_ENV=prod",
"which python3",
"whereis nginx"
],
"pitfalls": [
"以为命令名唯一对应一个位置",
"不知道 PATH 顺序会影响执行结果"
],
"scenarios": [
"排查命令找不到",
"排查执行到错误版本"
],
"exercises": [
{
"id": "m8_l1_e1",
"type": "operation",
"title": "查看环境变量",
"hint": "env",
"success_test": "cmd == 'env' and 'PATH=' in output",
"solution": [
"env"
],
"success_msg": "你已经会查看环境变量了。"
},
{
"id": "m8_l1_e2",
"type": "operation",
"title": "定位 ls 命令",
"hint": "which ls",
"success_test": "cmd == 'which ls' and '/bin/ls' in output",
"solution": [
"which ls"
],
"success_msg": "你已经会查命令来源了。"
},
{
"id": "m8_l1_e3",
"type": "understanding",
"question": "为什么 PATH 顺序会影响命令执行结果?",
"answer": "因为系统会按 PATH 的顺序查找同名命令,先找到哪个就执行哪个"
}
],
"related_commands": [
"env",
"export",
"which",
"whereis"
],
"classic_view": "教材视角:很多“环境问题”本质是命令来源、变量配置和包版本问题,不是应用本身坏了。",
"takeaways": [
"学完后应能做到:理解 PATH、环境变量和命令查找机制。",
"易错提醒:以为命令名唯一对应一个位置",
"迁移场景:排查命令找不到"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 env、export、which、whereis 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m8_l2_package_mgr",
"title": "包管理基础apt / yum / dpkg / rpm",
"goal": "理解 Linux 软件安装与查询的基本方式。",
"why_it_matters": "软件装没装、版本对不对,是环境排障的重要基础。",
"concepts": [
"Debian 与 RedHat 系包管理差异",
"包查询",
"版本核对"
],
"command": "apt / yum / dpkg / rpm",
"examples": [
"apt list --installed",
"yum list installed",
"rpm -qa | grep nginx"
],
"pitfalls": [
"不知道发行版不同,命令体系也不同",
"只会装包,不会查版本"
],
"scenarios": [
"确认软件已安装",
"核对线上版本"
],
"exercises": [
{
"id": "m8_l2_e1",
"type": "understanding",
"question": "为什么 apt 和 yum 不能混着理解?",
"answer": "因为它们属于不同发行版的包管理体系,命令、仓库和包格式都有差异"
},
{
"id": "m8_l2_e2",
"type": "scenario",
"question": "排查“命令不存在”时,除了 which 还会想到什么?",
"answer": "还要确认对应软件包是否已安装,必要时用包管理工具查询"
},
{
"id": "m8_l2_e3",
"type": "understanding",
"question": "为什么确认软件版本在运维里很重要?",
"answer": "因为不同版本的配置、行为和兼容性可能不同,排障和发布都依赖版本信息"
}
],
"related_commands": [
"apt",
"yum",
"dpkg",
"rpm"
],
"classic_view": "教材视角:很多“环境问题”本质是命令来源、变量配置和包版本问题,不是应用本身坏了。",
"takeaways": [
"学完后应能做到:理解 Linux 软件安装与查询的基本方式。",
"易错提醒:不知道发行版不同,命令体系也不同",
"迁移场景:确认软件已安装"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 apt、yum、dpkg、rpm 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m8_l3_alias_habit",
"title": "alias 与命令行习惯",
"goal": "建立更高效、更安全的日常命令行习惯。",
"why_it_matters": "很多效率差异来自长期习惯,而不是单个命令是否会敲。",
"concepts": [
"alias",
"常用缩写",
"习惯的收益与风险"
],
"command": "alias",
"examples": [
"alias ll='ls -l'",
"alias gs='git status'"
],
"pitfalls": [
"别名过多反而混乱",
"依赖个人别名导致跨机器不一致"
],
"scenarios": [
"提高常用命令效率",
"统一个人命令习惯"
],
"exercises": [
{
"id": "m8_l3_e1",
"type": "understanding",
"question": "为什么 alias 既能提升效率,也可能带来问题?",
"answer": "它能简化命令,但如果过度依赖,换环境或和他人协作时可能造成理解和一致性问题"
},
{
"id": "m8_l3_e2",
"type": "understanding",
"question": "为什么运维平台环境中不建议胡乱定义复杂 alias",
"answer": "因为可能影响命令可预期性,增加排障和协作成本"
},
{
"id": "m8_l3_e3",
"type": "scenario",
"question": "什么时候 alias 是好的,什么时候需要克制?",
"answer": "高频、简单、个人明确的命令可以用 alias涉及生产、协作和高风险操作应尽量保持显式命令"
}
],
"related_commands": [
"alias"
],
"classic_view": "教材视角:很多“环境问题”本质是命令来源、变量配置和包版本问题,不是应用本身坏了。",
"takeaways": [
"学完后应能做到:建立更高效、更安全的日常命令行习惯。",
"易错提醒:别名过多反而混乱",
"迁移场景:提高常用命令效率"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 alias 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
},
{
"id": "module_9_automation",
"title": "模块 9自动化、归档与运维习惯",
"summary": "建立重定向、管道、定时任务、归档备份和命令复盘习惯。",
"lessons": [
{
"id": "m9_l1_pipe_redirect",
"title": "组合能力:管道与重定向",
"goal": "理解为什么 Linux 强调小命令组合,而不是一个命令包办一切。",
"why_it_matters": "运维效率往往来自命令组合,而不是单个命令本身。",
"concepts": [
"标准输入输出",
"重定向",
"管道组合"
],
"command": "| / > / >>",
"examples": [
"grep error /var/log/syslog | wc -l",
"echo hello > note.txt",
"cat file >> backup.txt"
],
"pitfalls": [
"覆盖写和追加写不分",
"不会把命令组合成链路"
],
"scenarios": [
"统计错误行数",
"生成结果文件"
],
"exercises": [
{
"id": "m9_l1_e1",
"type": "understanding",
"question": "为什么管道是 Unix/Linux 的核心思想之一?",
"answer": "因为它让小工具可以彼此组合,快速拼出解决问题的命令链路"
},
{
"id": "m9_l1_e2",
"type": "scenario",
"question": "如果想统计日志里 error 出现了多少次,为什么 grep 配合 wc 很自然?",
"answer": "因为 grep 负责筛选wc 负责统计,两者分工清晰又容易组合"
},
{
"id": "m9_l1_e3",
"type": "understanding",
"question": "> 和 >> 的区别是什么?",
"answer": "> 是覆盖写入,>> 是追加写入"
}
],
"related_commands": [
"|",
">",
">>"
],
"classic_view": "教材视角:自动化不是炫技,而是把重复工作做成稳定、可复用、可回溯的流程。",
"takeaways": [
"学完后应能做到:理解为什么 Linux 强调小命令组合,而不是一个命令包办一切。",
"易错提醒:覆盖写和追加写不分",
"迁移场景:统计错误行数"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 |、>、>> 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m9_l2_tar_backup",
"title": "归档与备份tar / gzip",
"goal": "理解打包压缩和备份的基本思路。",
"why_it_matters": "备份不是把文件复制一下,而是要考虑归档、压缩和恢复。",
"concepts": [
"归档 vs 压缩",
"备份与恢复",
"打包文件"
],
"command": "tar / gzip",
"examples": [
"tar -czf backup.tar.gz /etc",
"tar -xzf backup.tar.gz -C /tmp"
],
"pitfalls": [
"只会备份,不会恢复验证",
"不知道 tar 和 gzip 各自扮演什么角色"
],
"scenarios": [
"备份配置目录",
"迁移文件集合"
],
"exercises": [
{
"id": "m9_l2_e1",
"type": "understanding",
"question": "为什么备份后最好做一次恢复验证?",
"answer": "因为只有验证过能恢复,备份才真正有意义"
},
{
"id": "m9_l2_e2",
"type": "scenario",
"question": "为什么很多运维备份会用 tar.gz",
"answer": "因为它适合把多个文件归档后再压缩,便于传输和保存"
},
{
"id": "m9_l2_e3",
"type": "understanding",
"question": "tar 和 gzip 的角色区别是什么?",
"answer": "tar 负责打包归档gzip 负责压缩"
}
],
"related_commands": [
"tar",
"gzip"
],
"classic_view": "教材视角:自动化不是炫技,而是把重复工作做成稳定、可复用、可回溯的流程。",
"takeaways": [
"学完后应能做到:理解打包压缩和备份的基本思路。",
"易错提醒:只会备份,不会恢复验证",
"迁移场景:备份配置目录"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 tar、gzip 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m9_l3_crontab_history",
"title": "定时任务与操作复盘crontab / history",
"goal": "理解自动化执行与命令历史复盘的价值。",
"why_it_matters": "运维很多工作是周期性的,同时排障也离不开复盘。",
"concepts": [
"周期任务",
"命令历史",
"自动化意识"
],
"command": "crontab / history",
"examples": [
"crontab -l",
"history -n 10"
],
"pitfalls": [
"写了定时任务却不记录输出",
"不会利用 history 回顾近期操作"
],
"scenarios": [
"定时备份",
"回顾误操作"
],
"exercises": [
{
"id": "m9_l3_e1",
"type": "understanding",
"question": "为什么定时任务不只要能跑,还要关注日志和输出?",
"answer": "因为无人值守任务如果失败却没有输出记录,很难排查问题"
},
{
"id": "m9_l3_e2",
"type": "scenario",
"question": "复盘线上事故时history 能提供什么帮助?",
"answer": "帮助确认最近执行过哪些命令,判断是否有变更触发了问题"
},
{
"id": "m9_l3_e3",
"type": "understanding",
"question": "为什么自动化不是“偷懒”,而是运维能力的一部分?",
"answer": "因为自动化能减少重复劳动、降低人为失误并提升稳定性"
}
],
"related_commands": [
"crontab",
"history"
],
"classic_view": "教材视角:自动化不是炫技,而是把重复工作做成稳定、可复用、可回溯的流程。",
"takeaways": [
"学完后应能做到:理解自动化执行与命令历史复盘的价值。",
"易错提醒:写了定时任务却不记录输出",
"迁移场景:定时备份"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 crontab、history 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
},
{
"id": "module_10_incidents",
"title": "模块 10运维综合实战场景",
"summary": "把前面所有命令和认知串起来,围绕真实故障场景形成排查链路。",
"lessons": [
{
"id": "m10_l1_service_down",
"title": "场景:服务无法访问排查",
"goal": "建立“先服务、再端口、再日志、再请求”的排查顺序。",
"why_it_matters": "这是最经典的运维问题之一。",
"concepts": [
"服务状态",
"端口监听",
"日志定位",
"HTTP 验证"
],
"command": "systemctl / ps / ss / journalctl / curl",
"examples": [
"systemctl status nginx",
"ss -ltnp",
"curl http://127.0.0.1:8080/health",
"systemctl status nginx && ss -ltnp | grep 80",
"journalctl -u nginx -n 50",
"curl -I http://127.0.0.1"
],
"pitfalls": [
"只看浏览器打不开,不看服务状态",
"没有层次地乱查",
"没有层次感地同时改服务、改配置、重启,导致问题更难定位"
],
"scenarios": [
"应用服务无法访问",
"线上服务返回 502/504",
"站点页面打不开但机器正常"
],
"exercises": [
{
"id": "m10_l1_e1",
"type": "scenario",
"question": "遇到“网站打不开”,一个合理的排查顺序是什么?",
"answer": "先看服务状态,再看进程和端口,再看日志,最后用 curl 验证接口"
},
{
"id": "m10_l1_e2",
"type": "understanding",
"question": "为什么不应该一上来就改配置?",
"answer": "因为先确认问题在哪一层更重要,盲改配置可能扩大问题"
},
{
"id": "m10_l1_e3",
"type": "scenario",
"question": "如果端口没监听,你下一步更应该看什么?",
"answer": "看服务状态和日志,确认是否启动失败或启动后立即退出"
}
],
"related_commands": [
"systemctl",
"ps",
"ss",
"journalctl",
"curl"
],
"classic_view": "教材视角:综合场景训练的重点不是记住某条命令,而是建立分层排障顺序和判断习惯。",
"takeaways": [
"学完后应能做到:建立“先服务、再端口、再日志、再请求”的排查顺序。",
"易错提醒:只看浏览器打不开,不看服务状态",
"迁移场景:应用服务无法访问",
"服务不可用时,排障要按层进行:服务 → 进程 → 端口 → 日志 → 请求。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 systemctl、ps、ss、journalctl、curl 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m10_l2_disk_full",
"title": "场景:磁盘爆满排查",
"goal": "建立从 df 到 du 再到 find 的磁盘问题定位思路。",
"why_it_matters": "磁盘满会直接导致服务报错、写入失败和日志异常。",
"concepts": [
"文件系统空间",
"目录占用",
"大文件定位"
],
"command": "df / du / find / sort",
"examples": [
"df -h",
"du -sh /var/log",
"find /var/log -type f",
"df -h /var",
"du -sh /var/log/* | sort"
],
"pitfalls": [
"只看 df 不继续追目录",
"删文件前不确认用途",
"直接删除不熟悉的大文件,可能破坏恢复和排障线索"
],
"scenarios": [
"排查磁盘 100%",
"日志目录暴涨导致磁盘满",
"发布产物堆积导致空间不足"
],
"exercises": [
{
"id": "m10_l2_e1",
"type": "scenario",
"question": "磁盘爆满时,为什么通常先 df 再 du",
"answer": "因为先确认哪个文件系统满了,再定位具体哪个目录占用大"
},
{
"id": "m10_l2_e2",
"type": "understanding",
"question": "为什么删除日志前要先确认是否还能用于排障?",
"answer": "因为日志可能是定位故障的关键证据,盲删会丢失排障线索"
},
{
"id": "m10_l2_e3",
"type": "scenario",
"question": "如果 /var/log 特别大,你会想到哪些命令组合?",
"answer": "df、du、find、sort 组合起来定位大文件和大目录"
}
],
"related_commands": [
"df",
"du",
"find",
"sort"
],
"classic_view": "教材视角:综合场景训练的重点不是记住某条命令,而是建立分层排障顺序和判断习惯。",
"takeaways": [
"学完后应能做到:建立从 df 到 du 再到 find 的磁盘问题定位思路。",
"易错提醒:只看 df 不继续追目录",
"迁移场景:排查磁盘 100%",
"磁盘排查的关键是先找文件系统,再找目录,再找大文件。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 df、du、find、sort 做一次独立练习,并尝试自己解释每条输出的含义。"
},
{
"id": "m10_l3_login_fail",
"title": "场景:登录失败与权限异常排查",
"goal": "把身份、权限、日志三者串起来理解。",
"why_it_matters": "很多系统问题最终都落在权限与身份边界上。",
"concepts": [
"当前身份",
"认证日志",
"权限边界"
],
"command": "whoami / id / passwd / grep / tail",
"examples": [
"whoami",
"id",
"tail -n 20 /var/log/auth.log",
"grep sandbox_user /etc/passwd",
"id sandbox_user"
],
"pitfalls": [
"只怀疑密码错误,不看日志",
"忽略组权限问题",
"只盯着密码,不看账号状态和权限配置"
],
"scenarios": [
"SSH 登录失败",
"执行权限不足",
"脚本执行提示无权限"
],
"exercises": [
{
"id": "m10_l3_e1",
"type": "scenario",
"question": "排查登录失败时,除了用户名密码,还要想到什么?",
"answer": "要看认证日志、用户身份、组信息和权限配置"
},
{
"id": "m10_l3_e2",
"type": "understanding",
"question": "为什么权限异常常常不能只靠肉眼猜?",
"answer": "因为真实问题可能同时涉及用户、组、文件权限和服务身份,需要结合命令验证"
},
{
"id": "m10_l3_e3",
"type": "scenario",
"question": "如果脚本明明存在却执行不了,你会从哪几类信息开始看?",
"answer": "先看 whoami/id再看文件权限和属主属组必要时看相关日志"
}
],
"related_commands": [
"whoami",
"id",
"passwd",
"grep",
"tail"
],
"classic_view": "教材视角:综合场景训练的重点不是记住某条命令,而是建立分层排障顺序和判断习惯。",
"takeaways": [
"学完后应能做到:把身份、权限、日志三者串起来理解。",
"易错提醒:只怀疑密码错误,不看日志",
"迁移场景SSH 登录失败",
"登录失败排查要把身份、日志和权限一起看,不能只猜密码。"
],
"after_class": "课后建议:回到真实或模拟环境里,再用 whoami、id、passwd、grep、tail 做一次独立练习,并尝试自己解释每条输出的含义。"
}
]
}
]
}