From 9effdee6258addffd69c192bd9a47d93d65b6733 Mon Sep 17 00:00:00 2001 From: Codex Date: Thu, 19 Mar 2026 13:53:49 +0800 Subject: [PATCH] feat: rebuild linux learning cockpit --- COURSE_TASKS.json | 2491 +++------------------------------------------ index.html | 148 ++- 2 files changed, 299 insertions(+), 2340 deletions(-) diff --git a/COURSE_TASKS.json b/COURSE_TASKS.json index e357325..6e07232 100644 --- a/COURSE_TASKS.json +++ b/COURSE_TASKS.json @@ -1,2395 +1,212 @@ { "meta": { - "version": "4.3", - "title": "Linux 系统学习课程(运维全场景版)", - "author": "OpenClaw Dev", - "updated": "2026-03-10", - "description": "强调知识理解、场景迁移与运维全场景覆盖的 Linux 学习课程", - "module_count": 11, - "total_lessons": 32, - "total_exercises": 105, + "version": "5.0", + "title": "Linux Operations Learning Lab", + "author": "Codex", + "updated": "2026-03-19", + "description": "A rebuilt and valid Linux course focused on command purpose, troubleshooting flow, and sandbox repetition.", + "module_count": 4, + "total_lessons": 8, + "total_exercises": 24, "pedagogy": "learning-first", - "orientation": "ops-full-scenarios", - "source_style": "classic-linux-textbook-inspired" + "orientation": "ops-workflow" }, "modules": [ { "id": "module_1_foundation", - "title": "模块 1:建立 Linux 基本认知", - "summary": "先理解终端、目录、路径和最基础命令,建立 Linux 使用的空间感。", + "title": "Module 1: Build Linux spatial awareness", + "summary": "Start with location and listing so every later action has context.", "lessons": [ { "id": "m1_l1_pwd", - "title": "认识当前目录:pwd", - "goal": "理解当前工作目录的意义,知道自己在文件系统中的位置。", - "why_it_matters": "很多 Linux 操作依赖路径。如果不知道自己当前在哪,后续命令容易出错。", - "concepts": [ - "当前工作目录", - "绝对路径与相对路径", - "为什么要先定位再操作" - ], + "title": "Locate yourself with pwd", + "goal": "Understand the current working directory before changing files or running scripts.", + "why_it_matters": "Many Linux mistakes come from running the right command in the wrong path.", + "concepts": ["Current working directory", "Absolute vs relative path", "Context before action"], "command": "pwd", - "examples": [ - "pwd", - "cd /tmp && pwd" - ], - "pitfalls": [ - "以为终端默认总在同一个目录", - "不分清当前目录和目标目录" - ], - "scenarios": [ - "切目录后确认自己到了哪里", - "写脚本前确认当前运行位置" - ], + "examples": ["pwd", "cd /tmp && pwd"], + "pitfalls": ["Assuming every shell opens in the same place", "Running a file command before checking the path"], + "scenarios": ["Verify where you are before editing a file", "Confirm location before launching a script"], + "troubleshooting_flow": ["Confirm the working context", "Run pwd", "Use the output to choose the next command"], + "related_commands": ["pwd", "cd", "ls"], + "classic_view": "Treat pwd as the command that restores orientation before every other action.", + "takeaways": ["Know your location before acting", "Read the full path instead of guessing", "Use location to decide the next safe step"], + "after_class": "Repeat pwd before every directory-changing command until it becomes automatic.", "exercises": [ - { - "id": "m1_l1_e1", - "type": "understanding", - "question": "查看当前工作目录应该使用什么命令?", - "answer": "pwd" - }, - { - "id": "m1_l1_e2", - "type": "operation", - "title": "输出当前目录", - "hint": "直接输入 pwd", - "success_test": "cmd == 'pwd'", - "solution": [ - "pwd" - ], - "success_msg": "你已经能确认自己所在的位置了。" - }, - { - "id": "m1_l1_e3", - "type": "scenario", - "question": "如果你不确定自己当前在哪个目录,第一反应应该做什么?", - "answer": "先执行 pwd 确认当前目录" - } - ], - "related_commands": [ - "pwd" - ], - "classic_view": "教材视角:Linux 入门首先不是背命令,而是建立“目录、路径、文件”这套基础空间感。", - "takeaways": [ - "学完后应能做到:理解当前工作目录的意义,知道自己在文件系统中的位置。", - "易错提醒:以为终端默认总在同一个目录", - "迁移场景:切目录后确认自己到了哪里" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 pwd 做一次独立练习,并尝试自己解释每条输出的含义。" + { "id": "m1_l1_e1", "type": "understanding", "question": "Which command prints the full current working directory?", "answer": "pwd" }, + { "id": "m1_l1_e2", "type": "operation", "title": "Print the current working directory", "hint": "Run pwd directly.", "success_test": "cmd == 'pwd'", "solution": ["pwd"], "success_msg": "You verified the shell location correctly." }, + { "id": "m1_l1_e3", "type": "scenario", "question": "If a script fails with 'file not found', what should you check first?", "answer": "Run pwd and verify the current path before anything else." } + ] }, { "id": "m1_l2_ls", - "title": "看见目录内容:ls", - "goal": "理解 ls 的作用,并掌握查看隐藏文件和详细信息的基本方式。", - "why_it_matters": "Linux 下很多探索行为都从 ls 开始,它决定你如何观察目录结构。", - "concepts": [ - "目录内容查看", - "隐藏文件", - "长列表信息" - ], + "title": "Inspect a directory with ls", + "goal": "Observe directory content and switch to detail views when needed.", + "why_it_matters": "Linux exploration usually begins with ls because you need to know what exists before you inspect more deeply.", + "concepts": ["Directory contents", "Hidden files", "Long listing output"], "command": "ls", - "examples": [ - "ls", - "ls -la", - "ls -lh /etc" - ], - "pitfalls": [ - "误以为 ls 看不到的文件就不存在", - "不会区分普通 ls 和 ls -l 的用途" - ], - "scenarios": [ - "排查目录里到底有哪些文件", - "检查配置目录中是否有隐藏文件" - ], + "examples": ["ls", "ls -la", "ls -l /etc"], + "pitfalls": ["Thinking a hidden file does not exist because plain ls missed it", "Reading long output without understanding permissions or size"], + "scenarios": ["Check what files exist before deleting anything", "Inspect a config directory for hidden dotfiles"], + "troubleshooting_flow": ["List the directory first", "Switch to ls -la if details matter", "Use the listing to choose the next file command"], + "related_commands": ["ls", "find", "stat"], + "classic_view": "Observation comes before diagnosis, and ls is usually the first observation tool.", + "takeaways": ["Use ls for broad visibility", "Use ls -la for hidden files and metadata", "Treat the listing as evidence"], + "after_class": "Practice comparing ls and ls -la until you can explain why the outputs differ.", "exercises": [ - { - "id": "m1_l2_e1", - "type": "understanding", - "question": "为什么 ls -a 会比 ls 多看到一些文件?", - "answer": "因为它会显示隐藏文件,包括以点开头的文件" - }, - { - "id": "m1_l2_e2", - "type": "operation", - "title": "列出当前目录内容", - "hint": "输入 ls", - "success_test": "cmd == 'ls'", - "solution": [ - "ls" - ], - "success_msg": "你已经会观察目录内容了。" - }, - { - "id": "m1_l2_e3", - "type": "operation", - "title": "显示隐藏文件和详细信息", - "hint": "使用 ls -la", - "success_test": "cmd == 'ls -la' or cmd == 'ls -al'", - "solution": [ - "ls -la", - "ls -al" - ], - "success_msg": "你已经会用更完整的方式查看目录了。" - } - ], - "related_commands": [ - "ls" - ], - "classic_view": "教材视角:Linux 入门首先不是背命令,而是建立“目录、路径、文件”这套基础空间感。", - "takeaways": [ - "学完后应能做到:理解 ls 的作用,并掌握查看隐藏文件和详细信息的基本方式。", - "易错提醒:误以为 ls 看不到的文件就不存在", - "迁移场景:排查目录里到底有哪些文件" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 ls 做一次独立练习,并尝试自己解释每条输出的含义。" - }, - { - "id": "m1_l3_cd_cat_echo", - "title": "移动、读文件、输出文本", - "goal": "掌握 cd、cat、echo 这些最基础但最常用的命令。", - "why_it_matters": "这三个命令几乎贯穿 Linux 入门阶段的所有练习。", - "concepts": [ - "切换目录", - "读取文件", - "输出文本与变量" - ], - "command": "cd / cat / echo", - "examples": [ - "cd /tmp", - "cat /etc/hosts", - "echo Hello Linux" - ], - "pitfalls": [ - "把 cd 和 ls 混用", - "用 cat 去看过大的文件", - "不知道 echo 也常用于脚本调试" - ], - "scenarios": [ - "进入指定目录继续操作", - "快速读取配置文件", - "验证变量和命令输出" - ], - "exercises": [ - { - "id": "m1_l3_e1", - "type": "operation", - "title": "进入 /tmp 目录", - "hint": "cd /tmp", - "success_test": "cmd == 'cd /tmp' and cwd == '/tmp'", - "solution": [ - "cd /tmp" - ], - "success_msg": "你已经能切换到目标目录了。" - }, - { - "id": "m1_l3_e2", - "type": "operation", - "title": "读取 hosts 文件", - "hint": "cat /etc/hosts", - "success_test": "cmd == 'cat /etc/hosts' and 'localhost' in output", - "solution": [ - "cat /etc/hosts" - ], - "success_msg": "你已经会读取基础文本文件了。" - }, - { - "id": "m1_l3_e3", - "type": "operation", - "title": "输出 Hello Linux", - "hint": "echo Hello Linux", - "success_test": "cmd == 'echo Hello Linux' and 'Hello Linux' in output", - "solution": [ - "echo Hello Linux" - ], - "success_msg": "你已经掌握了最基础的文本输出命令。" - } - ], - "related_commands": [ - "cd", - "cat", - "echo" - ], - "classic_view": "教材视角:Linux 入门首先不是背命令,而是建立“目录、路径、文件”这套基础空间感。", - "takeaways": [ - "学完后应能做到:掌握 cd、cat、echo 这些最基础但最常用的命令。", - "易错提醒:把 cd 和 ls 混用", - "迁移场景:进入指定目录继续操作" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 cd、cat、echo 做一次独立练习,并尝试自己解释每条输出的含义。" + { "id": "m1_l2_e1", "type": "understanding", "question": "Why does ls -a reveal more than plain ls?", "answer": "Because it includes hidden dotfiles." }, + { "id": "m1_l2_e2", "type": "operation", "title": "List the current directory", "hint": "Start with ls.", "success_test": "cmd == 'ls'", "solution": ["ls"], "success_msg": "You listed the current directory." }, + { "id": "m1_l2_e3", "type": "operation", "title": "Show hidden files and details", "hint": "Use ls -la.", "success_test": "cmd == 'ls -la' or cmd == 'ls -al'", "solution": ["ls -la", "ls -al"], "success_msg": "You switched from a simple listing to a full inspection view." } + ] } ] }, { "id": "module_2_filesystem", - "title": "模块 2:文件与目录操作", - "summary": "围绕创建、复制、移动、删除和查看文件属性建立文件系统操作能力。", + "title": "Module 2: Manipulate files safely", + "summary": "Learn to create, copy, move, and permission files without losing context.", "lessons": [ { "id": "m2_l1_create", - "title": "创建文件与目录:mkdir / touch", - "goal": "理解目录和文件的创建逻辑,学会递归创建多级目录。", - "why_it_matters": "很多项目初始化、环境准备都从创建目录结构开始。", - "concepts": [ - "目录创建", - "多级目录", - "空文件创建" - ], + "title": "Create structure with mkdir and touch", + "goal": "Build directories and empty files quickly so you can prepare workspaces or examples.", + "why_it_matters": "Project setup and environment preparation often begin with directory scaffolding.", + "concepts": ["Directory creation", "Recursive directory creation", "Empty file creation"], "command": "mkdir / touch", - "examples": [ - "mkdir demo", - "mkdir -p /tmp/a/b/c", - "touch notes.txt" - ], - "pitfalls": [ - "忘记使用 -p 创建多级目录", - "目标父目录不存在时 touch 失败" - ], - "scenarios": [ - "初始化项目目录结构", - "创建占位文件和日志文件" - ], + "examples": ["mkdir /tmp/lab", "mkdir -p /tmp/lab/data/raw", "touch /tmp/lab/notes.txt"], + "pitfalls": ["Forgetting -p for nested paths", "Trying to touch a file under a missing directory"], + "scenarios": ["Prepare a clean lab directory", "Create a placeholder log or notes file"], + "troubleshooting_flow": ["Check the target path", "Create directories first", "Create files after the path exists"], + "related_commands": ["mkdir", "touch", "ls"], + "classic_view": "File-system work is safer when you change one layer at a time: path first, file second.", + "takeaways": ["Use mkdir for structure", "Use touch for placeholders", "Think about parent directories before file creation"], + "after_class": "Rebuild the same directory tree twice until the path feels natural.", "exercises": [ - { - "id": "m2_l1_e1", - "type": "operation", - "title": "递归创建目录", - "hint": "mkdir -p /tmp/a/b/c", - "success_test": "cmd == 'mkdir -p /tmp/a/b/c' and exists('/tmp/a/b/c')", - "solution": [ - "mkdir -p /tmp/a/b/c" - ], - "success_msg": "多级目录创建成功。" - }, - { - "id": "m2_l1_e2", - "type": "operation", - "title": "创建空文件", - "hint": "touch /tmp/a/b/c/readme.txt", - "success_test": "cmd == 'touch /tmp/a/b/c/readme.txt' and exists('/tmp/a/b/c/readme.txt')", - "solution": [ - "touch /tmp/a/b/c/readme.txt" - ], - "success_msg": "空文件创建成功。" - }, - { - "id": "m2_l1_e3", - "type": "scenario", - "question": "为什么 mkdir -p 适合项目初始化?", - "answer": "因为它可以一次创建多级目录,即使上层目录不存在也能自动补齐" - } - ], - "related_commands": [ - "mkdir", - "touch" - ], - "classic_view": "教材视角:文件系统操作是 Linux 使用的基本手艺,关键不只是会敲命令,而是知道每一步在改变什么。", - "takeaways": [ - "学完后应能做到:理解目录和文件的创建逻辑,学会递归创建多级目录。", - "易错提醒:忘记使用 -p 创建多级目录", - "迁移场景:初始化项目目录结构" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 mkdir、touch 做一次独立练习,并尝试自己解释每条输出的含义。" + { "id": "m2_l1_e1", "type": "operation", "title": "Create a lab directory", "hint": "Run mkdir /tmp/lab.", "success_test": "cmd == 'mkdir /tmp/lab' and exists('/tmp/lab')", "solution": ["mkdir /tmp/lab"], "success_msg": "Your lab directory now exists." }, + { "id": "m2_l1_e2", "type": "operation", "title": "Create a nested directory tree", "hint": "Run mkdir -p /tmp/lab/data/raw.", "success_test": "cmd == 'mkdir -p /tmp/lab/data/raw' and exists('/tmp/lab/data/raw')", "solution": ["mkdir -p /tmp/lab/data/raw"], "success_msg": "You created the nested path correctly." }, + { "id": "m2_l1_e3", "type": "operation", "title": "Create a note file", "hint": "Run touch /tmp/lab/notes.txt.", "success_test": "cmd == 'touch /tmp/lab/notes.txt' and exists('/tmp/lab/notes.txt')", "solution": ["touch /tmp/lab/notes.txt"], "success_msg": "You created a new empty file in the lab path." } + ] }, { - "id": "m2_l2_move_copy_delete", - "title": "复制、移动与删除:cp / mv / rm", - "goal": "理解文件操作中的备份、迁移、重命名和清理。", - "why_it_matters": "日常 Linux 使用里最常见的就是处理文件的生命周期。", - "concepts": [ - "复制与备份", - "移动与重命名", - "删除风险" - ], - "command": "cp / mv / rm", - "examples": [ - "cp /etc/hosts /tmp/hosts.bak", - "mv old.txt new.txt", - "rm -r /tmp/testdir" - ], - "pitfalls": [ - "把删除当成移动", - "对目录使用 cp 却忘记 -r", - "rm -rf 风险极高" - ], - "scenarios": [ - "做配置备份", - "整理日志文件", - "清理无用目录" - ], + "id": "m2_l2_copy_move_permission", + "title": "Copy, move, and permission files with intent", + "goal": "Manipulate files carefully and connect permission changes to execution behavior.", + "why_it_matters": "Safe Linux work depends on knowing when you are backing up, renaming, or changing access.", + "concepts": ["Backup copies", "Rename vs move", "Permission modes"], + "command": "cp / mv / chmod / stat", + "examples": ["cp /etc/hosts /tmp/hosts.bak", "mv /tmp/hosts.bak /tmp/hosts.backup", "chmod 755 /tmp/lab", "stat /etc/hosts"], + "pitfalls": ["Deleting before backing up", "Changing permissions without checking metadata first"], + "scenarios": ["Back up a config before editing", "Make a script or directory executable when needed"], + "troubleshooting_flow": ["Copy before editing", "Rename with a purpose", "Inspect metadata before permission changes"], + "related_commands": ["cp", "mv", "chmod", "stat"], + "classic_view": "File operations are safe when you can clearly name the step: backup, rename, inspect, or permission change.", + "takeaways": ["Back up first", "Rename with purpose", "Inspect before changing permissions"], + "after_class": "Repeat the flow copy -> rename -> stat -> chmod on a temporary file until the sequence feels obvious.", "exercises": [ - { - "id": "m2_l2_e1", - "type": "operation", - "title": "复制 hosts 文件", - "hint": "cp /etc/hosts /tmp/hosts.bak", - "success_test": "cmd == 'cp /etc/hosts /tmp/hosts.bak' and exists('/tmp/hosts.bak')", - "solution": [ - "cp /etc/hosts /tmp/hosts.bak" - ], - "success_msg": "文件备份成功。" - }, - { - "id": "m2_l2_e2", - "type": "operation", - "title": "重命名备份文件", - "hint": "mv /tmp/hosts.bak /tmp/hosts.backup", - "success_test": "cmd == 'mv /tmp/hosts.bak /tmp/hosts.backup' and exists('/tmp/hosts.backup')", - "solution": [ - "mv /tmp/hosts.bak /tmp/hosts.backup" - ], - "success_msg": "文件重命名成功。" - }, - { - "id": "m2_l2_e3", - "type": "understanding", - "question": "为什么 rm -rf 是高风险命令?", - "answer": "因为它会递归并强制删除文件和目录,执行错误会造成不可恢复的数据丢失" - } - ], - "related_commands": [ - "cp", - "mv", - "rm" - ], - "classic_view": "教材视角:文件系统操作是 Linux 使用的基本手艺,关键不只是会敲命令,而是知道每一步在改变什么。", - "takeaways": [ - "学完后应能做到:理解文件操作中的备份、迁移、重命名和清理。", - "易错提醒:把删除当成移动", - "迁移场景:做配置备份" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 cp、mv、rm 做一次独立练习,并尝试自己解释每条输出的含义。" - }, - { - "id": "m2_l3_stat_permissions", - "title": "认识文件属性:stat 与权限基础", - "goal": "开始理解文件属性和权限表达。", - "why_it_matters": "文件权限是 Linux 系统安全和协作的重要基础。", - "concepts": [ - "文件元信息", - "权限三元组", - "目录与文件权限差异" - ], - "command": "stat / chmod", - "examples": [ - "stat /etc/hosts", - "chmod 755 script.sh", - "chmod +x run.sh" - ], - "pitfalls": [ - "不了解 755 / 644 的含义", - "给不该执行的文件随意加执行权限" - ], - "scenarios": [ - "检查脚本是否可执行", - "排查权限导致的运行失败" - ], - "exercises": [ - { - "id": "m2_l3_e1", - "type": "operation", - "title": "查看 hosts 属性", - "hint": "stat /etc/hosts", - "success_test": "cmd == 'stat /etc/hosts' and 'File:' in output", - "solution": [ - "stat /etc/hosts" - ], - "success_msg": "你已经会查看文件属性了。" - }, - { - "id": "m2_l3_e2", - "type": "understanding", - "question": "755 和 644 最核心的区别是什么?", - "answer": "755 允许拥有者读写执行,其他人读执行;644 没有执行权限" - }, - { - "id": "m2_l3_e3", - "type": "operation", - "title": "给文件添加执行权限", - "hint": "chmod +x /tmp/a/b/c/readme.txt", - "success_test": "cmd == 'chmod +x /tmp/a/b/c/readme.txt'", - "solution": [ - "chmod +x /tmp/a/b/c/readme.txt" - ], - "success_msg": "你已经完成了权限修改练习。" - } - ], - "related_commands": [ - "stat", - "chmod" - ], - "classic_view": "教材视角:文件系统操作是 Linux 使用的基本手艺,关键不只是会敲命令,而是知道每一步在改变什么。", - "takeaways": [ - "学完后应能做到:开始理解文件属性和权限表达。", - "易错提醒:不了解 755 / 644 的含义", - "迁移场景:检查脚本是否可执行" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 stat、chmod 做一次独立练习,并尝试自己解释每条输出的含义。" + { "id": "m2_l2_e1", "type": "operation", "title": "Back up the hosts file", "hint": "Run cp /etc/hosts /tmp/hosts.bak.", "success_test": "cmd == 'cp /etc/hosts /tmp/hosts.bak' and exists('/tmp/hosts.bak')", "solution": ["cp /etc/hosts /tmp/hosts.bak"], "success_msg": "You created a backup copy before any change." }, + { "id": "m2_l2_e2", "type": "operation", "title": "Rename the backup file", "hint": "Run mv /tmp/hosts.bak /tmp/hosts.backup.", "success_test": "cmd == 'mv /tmp/hosts.bak /tmp/hosts.backup' and exists('/tmp/hosts.backup')", "solution": ["mv /tmp/hosts.bak /tmp/hosts.backup"], "success_msg": "You renamed the file successfully." }, + { "id": "m2_l2_e3", "type": "operation", "title": "Inspect hosts metadata", "hint": "Run stat /etc/hosts.", "success_test": "cmd == 'stat /etc/hosts' and 'File:' in output", "solution": ["stat /etc/hosts"], "success_msg": "You inspected the file metadata successfully." } + ] } ] }, { - "id": "module_3_searching", - "title": "模块 3:阅读与筛选信息", - "summary": "把 Linux 当成信息检索工具来学,围绕日志、配置和统计建立阅读能力。", + "id": "module_3_search", + "title": "Module 3: Search and preview with evidence", + "summary": "Move from a broad search to a precise answer using filters and short file views.", "lessons": [ { - "id": "m3_l1_read_logs", - "title": "看文件头尾:head / tail", - "goal": "学会快速读取大文件的局部内容。", - "why_it_matters": "日志通常很大,不可能总是整份去看。", - "concepts": [ - "查看前几行", - "查看后几行", - "实时追踪" - ], - "command": "head / tail", - "examples": [ - "head -n 5 /var/log/syslog", - "tail -n 20 /var/log/syslog", - "tail -f /var/log/syslog" - ], - "pitfalls": [ - "大文件直接 cat 影响阅读效率", - "不会区分查看历史和跟踪新增日志" - ], - "scenarios": [ - "看配置文件开头", - "盯日志尾部排查实时错误" - ], - "exercises": [ - { - "id": "m3_l1_e1", - "type": "operation", - "title": "查看 syslog 前 5 行", - "hint": "head -n 5 /var/log/syslog", - "success_test": "(cmd == 'head -n 5 /var/log/syslog' or cmd == 'head -5 /var/log/syslog') and len(output.split('\\n')) >= 5", - "solution": [ - "head -n 5 /var/log/syslog", - "head -5 /var/log/syslog" - ], - "success_msg": "你已经会局部查看大文件开头了。" - }, - { - "id": "m3_l1_e2", - "type": "operation", - "title": "查看 syslog 最后 3 行", - "hint": "tail -n 3 /var/log/syslog", - "success_test": "(cmd == 'tail -n 3 /var/log/syslog' or cmd == 'tail -3 /var/log/syslog') and len(output.split('\\n')) >= 3", - "solution": [ - "tail -n 3 /var/log/syslog", - "tail -3 /var/log/syslog" - ], - "success_msg": "你已经会快速查看日志尾部了。" - }, - { - "id": "m3_l1_e3", - "type": "scenario", - "question": "为什么排查线上报错时更常先用 tail 而不是 cat?", - "answer": "因为日志通常很大,tail 可以更快聚焦最近发生的问题" - } - ], - "related_commands": [ - "head", - "tail" - ], - "classic_view": "教材视角:日志与文本处理是运维的核心阅读能力,grep / find / tail 不是零散命令,而是一套信息筛选工具链。", - "takeaways": [ - "学完后应能做到:学会快速读取大文件的局部内容。", - "易错提醒:大文件直接 cat 影响阅读效率", - "迁移场景:看配置文件开头" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 head、tail 做一次独立练习,并尝试自己解释每条输出的含义。" - }, - { - "id": "m3_l2_grep", - "title": "关键词搜索:grep", - "goal": "理解 grep 作为日志排障和文本定位核心工具的价值。", - "why_it_matters": "没有 grep,查日志和配置会慢很多。", - "concepts": [ - "大小写忽略", - "显示行号", - "反向匹配", - "递归搜索" - ], + "id": "m3_l1_grep", + "title": "Search file content with grep", + "goal": "Find relevant lines quickly instead of reading an entire file every time.", + "why_it_matters": "Operations work often means locating one useful line inside a noisy file.", + "concepts": ["Pattern search", "Case-insensitive search", "Line-oriented inspection"], "command": "grep", - "examples": [ - "grep error /var/log/syslog", - "grep -in root /etc/passwd", - "grep -v nologin /etc/passwd" - ], - "pitfalls": [ - "不会结合 -n 定位行号", - "不知道 -i 和 -v 的常见用途" - ], - "scenarios": [ - "查错误日志", - "找配置项", - "过滤无效行" - ], + "examples": ["grep sandbox_user /etc/passwd", "grep -i error /var/log/syslog"], + "pitfalls": ["Searching too broadly and ignoring context", "Forgetting case-insensitive mode"], + "scenarios": ["Find one user entry in passwd", "Locate error lines in a log"], + "troubleshooting_flow": ["Choose the right file first", "Search for the smallest useful keyword", "Use the matched lines to decide the next command"], + "related_commands": ["grep", "cat", "tail"], + "classic_view": "grep turns a large text space into a small set of signals you can reason about.", + "takeaways": ["Search deliberately", "Narrow the signal before changing anything", "Use matched lines as evidence"], + "after_class": "Practice moving from a broad keyword to a more precise keyword in the same file.", "exercises": [ - { - "id": "m3_l2_e1", - "type": "operation", - "title": "查找 syslog 中的 error", - "hint": "grep error /var/log/syslog", - "success_test": "cmd == 'grep error /var/log/syslog' and 'error' in output.lower()", - "solution": [ - "grep error /var/log/syslog" - ], - "success_msg": "你已经会在日志里搜关键词了。" - }, - { - "id": "m3_l2_e2", - "type": "operation", - "title": "忽略大小写搜索 root", - "hint": "grep -i root /etc/passwd", - "success_test": "cmd == 'grep -i root /etc/passwd'", - "solution": [ - "grep -i root /etc/passwd" - ], - "success_msg": "你已经知道如何处理大小写差异了。" - }, - { - "id": "m3_l2_e3", - "type": "understanding", - "question": "grep -n 的意义是什么?", - "answer": "显示匹配结果所在的行号,方便快速定位原文位置" - } - ], - "related_commands": [ - "grep" - ], - "classic_view": "教材视角:日志与文本处理是运维的核心阅读能力,grep / find / tail 不是零散命令,而是一套信息筛选工具链。", - "takeaways": [ - "学完后应能做到:理解 grep 作为日志排障和文本定位核心工具的价值。", - "易错提醒:不会结合 -n 定位行号", - "迁移场景:查错误日志" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 grep 做一次独立练习,并尝试自己解释每条输出的含义。" + { "id": "m3_l1_e1", "type": "operation", "title": "Find sandbox_user in passwd", "hint": "Run grep sandbox_user /etc/passwd.", "success_test": "cmd == 'grep sandbox_user /etc/passwd' and 'sandbox_user' in output", "solution": ["grep sandbox_user /etc/passwd"], "success_msg": "You extracted the relevant passwd line." }, + { "id": "m3_l1_e2", "type": "operation", "title": "Find error lines in syslog", "hint": "Run grep -i error /var/log/syslog.", "success_test": "cmd == 'grep -i error /var/log/syslog' and 'error' in output.lower()", "solution": ["grep -i error /var/log/syslog"], "success_msg": "You filtered the log down to the error lines." }, + { "id": "m3_l1_e3", "type": "scenario", "question": "Why is grep usually faster than reading the whole log manually?", "answer": "Because it narrows the file to only the lines that match the signal you care about." } + ] }, { - "id": "m3_l3_find_wc_sort", - "title": "查找与统计:find / wc / sort", - "goal": "建立查找文件和做基础统计的能力。", - "why_it_matters": "Linux 的很多效率来自组合式查找与统计。", - "concepts": [ - "按名称查找", - "行数字数统计", - "排序输出" - ], - "command": "find / wc / sort", - "examples": [ - "find /etc -name '*.conf'", - "wc -l /var/log/syslog", - "ls | sort" - ], - "pitfalls": [ - "把 find 和 grep 混淆", - "不会根据任务选文件查找还是内容查找" - ], - "scenarios": [ - "找配置文件", - "统计日志行数", - "整理输出结果" - ], + "id": "m3_l2_find_preview", + "title": "Locate files with find and preview them with tail", + "goal": "Move from path discovery to a focused log or config preview.", + "why_it_matters": "You cannot inspect the right file until you locate it precisely, and you do not need to open the whole file first.", + "concepts": ["Name-based search", "Search roots", "Tail as a recent-state preview"], + "command": "find / tail", + "examples": ["find /etc -name '*.conf'", "find /var/log -type f", "tail -n 1 /var/log/auth.log"], + "pitfalls": ["Searching from too high in the tree", "Using cat on a large file when tail would answer faster"], + "scenarios": ["Find config files under /etc", "Inspect the newest auth log line"], + "troubleshooting_flow": ["Pick the smallest reasonable root", "Add a filter", "Preview the most relevant file segment"], + "related_commands": ["find", "tail", "head"], + "classic_view": "A good search ends with an exact path and a small, readable preview.", + "takeaways": ["Use find for precise location", "Use tail for fresh log evidence", "Reduce noise early"], + "after_class": "Practice explaining the root path and every filter before you run find.", "exercises": [ - { - "id": "m3_l3_e1", - "type": "operation", - "title": "查找 /etc 下所有 .conf 文件", - "hint": "find /etc -name '*.conf'", - "success_test": "cmd == \"find /etc -name '*.conf'\" and '.conf' in output", - "solution": [ - "find /etc -name '*.conf'" - ], - "success_msg": "你已经会用 find 定位文件了。" - }, - { - "id": "m3_l3_e2", - "type": "operation", - "title": "统计 syslog 行数", - "hint": "wc -l /var/log/syslog", - "success_test": "cmd == 'wc -l /var/log/syslog' and output.strip().isdigit()", - "solution": [ - "wc -l /var/log/syslog" - ], - "success_msg": "你已经会做基础统计了。" - }, - { - "id": "m3_l3_e3", - "type": "understanding", - "question": "找文件位置应该优先想到 find 还是 grep?为什么?", - "answer": "优先用 find,因为这是文件定位问题,不是文件内容搜索问题" - } - ], - "related_commands": [ - "find", - "wc", - "sort" - ], - "classic_view": "教材视角:日志与文本处理是运维的核心阅读能力,grep / find / tail 不是零散命令,而是一套信息筛选工具链。", - "takeaways": [ - "学完后应能做到:建立查找文件和做基础统计的能力。", - "易错提醒:把 find 和 grep 混淆", - "迁移场景:找配置文件" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 find、wc、sort 做一次独立练习,并尝试自己解释每条输出的含义。" + { "id": "m3_l2_e1", "type": "operation", "title": "Find configuration files under /etc", "hint": "Run find /etc -name '*.conf'.", "success_test": "cmd == \"find /etc -name '*.conf'\" and '.conf' in output", "solution": ["find /etc -name '*.conf'"], "success_msg": "You located configuration files recursively." }, + { "id": "m3_l2_e2", "type": "operation", "title": "Find log files under /var/log", "hint": "Run find /var/log -type f.", "success_test": "cmd == 'find /var/log -type f' and '/var/log' in output", "solution": ["find /var/log -type f"], "success_msg": "You filtered the search to files under the log tree." }, + { "id": "m3_l2_e3", "type": "operation", "title": "Read the newest auth log line", "hint": "Run tail -n 1 /var/log/auth.log.", "success_test": "cmd == 'tail -n 1 /var/log/auth.log' and 'sandbox_user' in output", "solution": ["tail -n 1 /var/log/auth.log"], "success_msg": "You previewed the most recent auth log line." } + ] } ] }, { - "id": "module_4_system_state", - "title": "模块 4:系统状态与资源认知", - "summary": "学习如何看进程、负载、磁盘、内存和挂载信息,建立系统状态判断能力。", + "id": "module_4_operations", + "title": "Module 4: Turn commands into operations workflows", + "summary": "Use process and network commands as parts of a real troubleshooting chain instead of isolated tools.", "lessons": [ { "id": "m4_l1_process", - "title": "看进程:ps / top", - "goal": "理解 Linux 中的进程概念,知道如何查看系统正在运行什么。", - "why_it_matters": "绝大多数服务故障、卡顿和异常都要先看进程。", - "concepts": [ - "进程与服务", - "前台与后台", - "ps 和 top 的区别" - ], - "command": "ps / top", - "examples": [ - "ps aux", - "ps -ef", - "top" - ], - "pitfalls": [ - "只会看进程名,不会看状态", - "把存在进程等同于服务可用" - ], - "scenarios": [ - "确认服务进程是否存在", - "定位高 CPU 进程" - ], + "title": "Read system pressure with top and ps", + "goal": "Spot busy processes and connect process views to system pressure.", + "why_it_matters": "Incidents often begin with 'the system feels slow', and process views turn that feeling into evidence.", + "concepts": ["Process snapshots", "CPU-heavy processes", "Live system views"], + "command": "top / ps", + "examples": ["top", "ps aux --sort=-%cpu | head"], + "pitfalls": ["Jumping straight to kill before understanding the process role", "Looking at one snapshot without connecting it to symptoms"], + "scenarios": ["Find the hottest process when CPU looks high", "Explain which command gives the faster broad view"], + "troubleshooting_flow": ["Look at top for the broad picture", "Use ps sorting to confirm the hottest process", "Only then decide the next action"], + "related_commands": ["top", "ps", "kill"], + "classic_view": "System pressure is easier to reason about when you move from a broad live view to a specific process list.", + "takeaways": ["Use top for the live picture", "Use ps for a sortable snapshot", "Diagnose before you terminate anything"], + "after_class": "Compare top and ps side by side and explain what each one is better at.", "exercises": [ - { - "id": "m4_l1_e1", - "type": "operation", - "title": "查看所有进程", - "hint": "ps aux", - "success_test": "cmd == 'ps aux' and 'PID' in output", - "solution": [ - "ps aux" - ], - "success_msg": "你已经会查看系统进程了。" - }, - { - "id": "m4_l1_e2", - "type": "understanding", - "question": "为什么看到进程存在,不代表服务一定可用?", - "answer": "因为进程存在只说明程序在运行,不代表端口监听、配置、依赖或接口一定正常" - }, - { - "id": "m4_l1_e3", - "type": "scenario", - "question": "排查“服务似乎没启动”时,第一步通常可以用什么命令?", - "answer": "先用 ps aux 或 ps -ef 查看相关进程是否存在" - } - ], - "related_commands": [ - "ps", - "top" - ], - "classic_view": "教材视角:系统状态认知是运维的基本盘,先学会“看懂机器”,再谈优化和修复。", - "takeaways": [ - "学完后应能做到:理解 Linux 中的进程概念,知道如何查看系统正在运行什么。", - "易错提醒:只会看进程名,不会看状态", - "迁移场景:确认服务进程是否存在", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 ps、top 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "先确认服务对应的进程是否存在", - "再看进程状态、CPU、内存占用是否异常", - "如果进程存在但服务不可用,再继续看端口和日志", - "不要把“有进程”误判成“服务正常”" + { "id": "m4_l1_e1", "type": "operation", "title": "Show a live overview", "hint": "Run top.", "success_test": "cmd == 'top' and 'load average' in output", "solution": ["top"], "success_msg": "You captured a live system overview." }, + { "id": "m4_l1_e2", "type": "operation", "title": "List processes by CPU usage", "hint": "Run ps aux --sort=-%cpu | head.", "success_test": "cmd == 'ps aux --sort=-%cpu | head' and '%CPU' in output", "solution": ["ps aux --sort=-%cpu | head"], "success_msg": "You ranked processes by CPU usage." }, + { "id": "m4_l1_e3", "type": "understanding", "question": "Why should kill come after observation rather than before it?", "answer": "Because you need evidence about what the process is doing and whether it is safe to stop." } ] }, { - "id": "m4_l2_disk_memory", - "title": "看磁盘与内存:df / du / free", - "goal": "掌握查看磁盘使用、目录占用和内存情况的基础方法。", - "why_it_matters": "磁盘爆满、内存紧张是最常见的线上问题之一。", - "concepts": [ - "磁盘空间 vs 目录占用", - "物理内存与可用内存", - "df 和 du 的区别" - ], - "command": "df / du / free", - "examples": [ - "df -h", - "du -sh /var/log", - "free -h" - ], - "pitfalls": [ - "只会看总磁盘,不会看哪个目录占用大", - "误把 free 的 used 当成唯一关键指标" - ], - "scenarios": [ - "排查磁盘已满", - "定位大目录", - "查看内存是否紧张" - ], + "id": "m4_l2_network", + "title": "Check the network path with ip, ping, ss, and curl", + "goal": "Diagnose connectivity step by step from interface state to application response.", + "why_it_matters": "Network incidents feel confusing when you mix layers; they become clearer when you inspect one layer at a time.", + "concepts": ["Interface address", "Basic connectivity", "Listening ports", "Application-level validation"], + "command": "ip / ping / ss / curl", + "examples": ["ip addr", "ping 127.0.0.1", "ss -ltnp", "curl http://127.0.0.1"], + "pitfalls": ["Treating DNS, port, and app failures as one category", "Checking a port without making a real request"], + "scenarios": ["A local service looks up but the app still feels unavailable", "You need to confirm whether the issue is address, reachability, port, or HTTP behavior"], + "troubleshooting_flow": ["Check interface state with ip addr", "Test connectivity with ping", "Check sockets with ss", "Validate the app layer with curl"], + "related_commands": ["ip", "ping", "ss", "curl", "dig"], + "classic_view": "The cleanest network diagnosis moves from low-level reachability to the final application response.", + "takeaways": ["Do not skip layers", "Confirm a port before blaming the app", "Use curl to verify user-facing behavior"], + "after_class": "Repeat the sequence ip -> ping -> ss -> curl until the layer transitions feel natural.", "exercises": [ - { - "id": "m4_l2_e1", - "type": "operation", - "title": "查看磁盘空间", - "hint": "df -h", - "success_test": "cmd == 'df -h' and 'Filesystem' in output", - "solution": [ - "df -h" - ], - "success_msg": "你已经会看磁盘使用情况了。" - }, - { - "id": "m4_l2_e2", - "type": "operation", - "title": "查看 /sandbox 目录大小", - "hint": "du -sh /sandbox", - "success_test": "cmd == 'du -sh /sandbox' and '/sandbox' in output", - "solution": [ - "du -sh /sandbox" - ], - "success_msg": "你已经会看目录占用了。" - }, - { - "id": "m4_l2_e3", - "type": "understanding", - "question": "df 和 du 的核心区别是什么?", - "answer": "df 看文件系统层面的磁盘使用,du 看目录或文件占用大小" - } - ], - "related_commands": [ - "df", - "du", - "free" - ], - "classic_view": "教材视角:系统状态认知是运维的基本盘,先学会“看懂机器”,再谈优化和修复。", - "takeaways": [ - "学完后应能做到:掌握查看磁盘使用、目录占用和内存情况的基础方法。", - "易错提醒:只会看总磁盘,不会看哪个目录占用大", - "迁移场景:排查磁盘已满", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 df、du、free 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "先用 df 确认是哪个文件系统空间不足", - "再用 du 逐层定位哪个目录占用最大", - "必要时结合 find 找出大文件", - "清理前先确认文件用途与是否还能用于排障" - ] - }, - { - "id": "m4_l3_mount_history", - "title": "运行时间、挂载点与历史命令", - "goal": "建立系统运行时间、挂载结构与命令习惯认知。", - "why_it_matters": "理解机器运行了多久、磁盘挂载在哪里、最近执行过什么命令,是运维日常的基础信息。", - "concepts": [ - "uptime 的含义", - "挂载点", - "history 复盘" - ], - "command": "uptime / mount / history", - "examples": [ - "uptime", - "mount", - "history -n 5" - ], - "pitfalls": [ - "不看历史重复犯错", - "忽略挂载点导致排查路径错位" - ], - "scenarios": [ - "查看机器是否重启过", - "判断目录属于哪个挂载点", - "复盘最近操作" - ], - "exercises": [ - { - "id": "m4_l3_e1", - "type": "operation", - "title": "查看系统运行时间", - "hint": "uptime", - "success_test": "cmd == 'uptime' and 'load average' in output", - "solution": [ - "uptime" - ], - "success_msg": "你已经会看系统运行时间和负载了。" - }, - { - "id": "m4_l3_e2", - "type": "operation", - "title": "查看最近命令历史", - "hint": "history -n 5", - "success_test": "cmd == 'history -n 5' and output != ''", - "solution": [ - "history -n 5" - ], - "success_msg": "你已经会利用历史命令回顾操作了。" - }, - { - "id": "m4_l3_e3", - "type": "scenario", - "question": "为什么排查问题时查看 history 很有价值?", - "answer": "因为它可以帮助回溯最近做过什么操作,快速定位变更和可能的触发点" - } - ], - "related_commands": [ - "uptime", - "mount", - "history" - ], - "classic_view": "教材视角:系统状态认知是运维的基本盘,先学会“看懂机器”,再谈优化和修复。", - "takeaways": [ - "学完后应能做到:建立系统运行时间、挂载结构与命令习惯认知。", - "易错提醒:不看历史重复犯错", - "迁移场景:查看机器是否重启过", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 uptime、mount、history 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "先看 uptime 确认系统是否近期重启", - "再看 mount 判断关键目录属于哪个挂载点", - "最后回看 history 了解最近做过什么变更", - "把系统状态和操作历史结合起来看" - ] - } - ] - }, - { - "id": "module_5_service_logs", - "title": "模块 5:服务与日志排障", - "summary": "围绕服务启动、运行状态、日志报错和后台执行建立排障链路。", - "lessons": [ - { - "id": "m5_l1_systemctl", - "title": "服务管理:systemctl 基础", - "goal": "理解 Linux 服务的查看、启动、停止和重启。", - "why_it_matters": "现代 Linux 发行版大量使用 systemd 管理服务。", - "concepts": [ - "服务状态", - "启动与重启", - "systemd 基础" - ], - "command": "systemctl", - "examples": [ - "systemctl status nginx", - "systemctl restart nginx", - "systemctl enable nginx", - "systemctl status app.service", - "systemctl restart app.service", - "systemctl is-enabled nginx" - ], - "pitfalls": [ - "改完配置却忘记重启服务", - "只看页面,不看服务状态", - "把 restart 当成排障终点,而不是排查起点" - ], - "scenarios": [ - "排查服务没起来", - "改配置后让服务生效", - "配置变更后重新加载服务", - "确认服务是否开机自启" - ], - "exercises": [ - { - "id": "m5_l1_e1", - "type": "understanding", - "question": "为什么改完服务配置后常常要 restart 或 reload?", - "answer": "因为配置文件变化不会自动生效,需要让服务重新加载配置" - }, - { - "id": "m5_l1_e2", - "type": "scenario", - "question": "排查“网站打不开”时,为什么应该先看 systemctl status?", - "answer": "因为要先确认服务是否真的在运行,以及是否有明显启动失败信息" - }, - { - "id": "m5_l1_e3", - "type": "understanding", - "question": "enable 和 start 的区别是什么?", - "answer": "start 是当前立即启动,enable 是设置开机自动启动" - } - ], - "related_commands": [ - "systemctl" - ], - "classic_view": "教材视角:服务排障要形成链路思维——状态、进程、端口、日志、请求,要分层观察。", - "takeaways": [ - "学完后应能做到:理解 Linux 服务的查看、启动、停止和重启。", - "易错提醒:改完配置却忘记重启服务", - "迁移场景:排查服务没起来", - "服务问题先看状态,再决定下一步看日志、端口还是配置。", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 systemctl 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "先看 systemctl status,确认服务到底是不是 running", - "再看是否有明显的启动失败或退出提示", - "如果状态异常,再进入日志层和端口层", - "不要一上来就盲目 restart 多次" - ] - }, - { - "id": "m5_l2_journalctl", - "title": "看系统日志:journalctl", - "goal": "理解如何查看服务日志和系统日志。", - "why_it_matters": "很多 systemd 管理的服务排障入口就是 journalctl。", - "concepts": [ - "单服务日志", - "最近日志", - "实时跟踪日志" - ], - "command": "journalctl", - "examples": [ - "journalctl -u nginx -n 50", - "journalctl -xe", - "journalctl -u app -f", - "journalctl -u app.service -n 100", - "journalctl -u nginx -f", - "journalctl --since today" - ], - "pitfalls": [ - "只看应用日志,不看 systemd 日志", - "看太多日志却抓不到最近错误", - "不限定服务名导致日志范围过大,难以定位" - ], - "scenarios": [ - "查看服务启动失败原因", - "查看最近报错", - "定位服务启动失败的关键报错", - "观察重启前后日志变化" - ], - "exercises": [ - { - "id": "m5_l2_e1", - "type": "understanding", - "question": "为什么 journalctl 对 systemd 服务排障特别重要?", - "answer": "因为它能直接查看服务生命周期和 systemd 记录的日志" - }, - { - "id": "m5_l2_e2", - "type": "scenario", - "question": "服务启动失败后,下一步除了看 status 还应该看什么?", - "answer": "看 journalctl -u 服务名 的日志,确认具体报错" - }, - { - "id": "m5_l2_e3", - "type": "understanding", - "question": "为什么实时排查时常用 -f?", - "answer": "因为 -f 可以持续跟踪新增日志,适合边操作边观察" - } - ], - "related_commands": [ - "journalctl" - ], - "classic_view": "教材视角:服务排障要形成链路思维——状态、进程、端口、日志、请求,要分层观察。", - "takeaways": [ - "学完后应能做到:理解如何查看服务日志和系统日志。", - "易错提醒:只看应用日志,不看 systemd 日志", - "迁移场景:查看服务启动失败原因", - "日志不是越多越好,关键是缩小范围看最近、看目标服务。", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 journalctl 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "先限定服务名缩小日志范围", - "优先看最近几十行,不要一开始把范围拉太大", - "定位到关键报错后再回溯上下文", - "边操作边用 -f 观察实时变化" - ] - }, - { - "id": "m5_l3_process_control", - "title": "进程控制:kill / pkill / nohup", - "goal": "理解如何控制进程和让任务脱离终端运行。", - "why_it_matters": "部署、排障和临时任务常会遇到进程管理问题。", - "concepts": [ - "发送信号", - "按名称结束进程", - "后台运行" - ], - "command": "kill / pkill / nohup", - "examples": [ - "kill 1234", - "pkill nginx", - "nohup python3 app.py &", - "kill -9 1234", - "pkill -f python", - "nohup bash backup.sh &" - ], - "pitfalls": [ - "直接粗暴 kill 掉关键进程", - "不知道后台任务输出去哪了", - "不了解信号差异就直接使用 -9" - ], - "scenarios": [ - "结束卡死进程", - "让脚本后台运行", - "结束僵死任务", - "让临时脚本脱离终端继续执行" - ], - "exercises": [ - { - "id": "m5_l3_e1", - "type": "understanding", - "question": "为什么 kill 进程前要先确认 PID 和进程身份?", - "answer": "因为误杀错误进程可能导致服务中断或数据问题" - }, - { - "id": "m5_l3_e2", - "type": "understanding", - "question": "nohup 的作用是什么?", - "answer": "让命令在退出终端后继续运行,适合后台任务" - }, - { - "id": "m5_l3_e3", - "type": "scenario", - "question": "如果你想让一个脚本关掉 SSH 后仍然继续跑,应该想到什么?", - "answer": "使用 nohup 或其他后台运行方式" - } - ], - "related_commands": [ - "kill", - "pkill", - "nohup" - ], - "classic_view": "教材视角:服务排障要形成链路思维——状态、进程、端口、日志、请求,要分层观察。", - "takeaways": [ - "学完后应能做到:理解如何控制进程和让任务脱离终端运行。", - "易错提醒:直接粗暴 kill 掉关键进程", - "迁移场景:结束卡死进程", - "进程控制的重点是知道为什么结束、结束谁、结束后系统会怎样。", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 kill、pkill、nohup 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "确认要处理的是哪个进程", - "评估结束进程会不会影响业务", - "优先选择合理方式终止,不要默认暴力 kill -9", - "需要后台运行任务时再考虑 nohup" - ] - } - ] - }, - { - "id": "module_6_network", - "title": "模块 6:网络与连接排查", - "summary": "建立 IP、端口、监听、连通性和请求验证等运维网络基础。", - "lessons": [ - { - "id": "m6_l1_ip_ping", - "title": "网络基础:ip addr / ifconfig / ping", - "goal": "理解网卡、IP 和连通性的基本概念。", - "why_it_matters": "服务是否可达,首先是网络问题还是应用问题,需要先分清。", - "concepts": [ - "网卡", - "IP 地址", - "连通性测试" - ], - "command": "ip addr / ifconfig / ping", - "examples": [ - "ip addr", - "ifconfig", - "ping 127.0.0.1", - "ip addr show eth0", - "ping 192.168.1.1", - "ping -c 4 example.com" - ], - "pitfalls": [ - "能 ping 通就以为服务一定可用", - "只会看 IP,不理解监听端口", - "把 DNS 解析失败误判成网络完全不通" - ], - "scenarios": [ - "确认机器是否有正确 IP", - "测试目标是否能连通", - "确认目标机器有无 IP", - "初步判断网络层是否通" - ], - "exercises": [ - { - "id": "m6_l1_e1", - "type": "operation", - "title": "查看网卡地址", - "hint": "ip addr", - "success_test": "cmd == 'ip addr' and 'inet' in output", - "solution": [ - "ip addr" - ], - "success_msg": "你已经会看基本网卡信息了。" - }, - { - "id": "m6_l1_e2", - "type": "operation", - "title": "测试本机回环连通性", - "hint": "ping 127.0.0.1", - "success_test": "cmd == 'ping 127.0.0.1' and 'packet loss' in output", - "solution": [ - "ping 127.0.0.1" - ], - "success_msg": "你已经做了一次基础连通性验证。" - }, - { - "id": "m6_l1_e3", - "type": "understanding", - "question": "为什么 ping 通不等于服务一定可用?", - "answer": "因为 ping 只说明网络层连通,不代表应用端口和接口层面正常" - } - ], - "related_commands": [ - "ip addr", - "ifconfig", - "ping" - ], - "classic_view": "教材视角:网络问题最怕混层,学习要区分链路、端口、协议、请求,不要一股脑都归为“网络不通”。", - "takeaways": [ - "学完后应能做到:理解网卡、IP 和连通性的基本概念。", - "易错提醒:能 ping 通就以为服务一定可用", - "迁移场景:确认机器是否有正确 IP", - "网络排查第一步是先确认链路和地址,再看更上层。", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 ip addr、ifconfig、ping 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "先确认机器有没有拿到正确 IP", - "再用 ping 验证基础连通性", - "如果 ping 不通,优先怀疑网络层或地址层问题", - "如果 ping 通,再继续检查端口和应用层" - ] - }, - { - "id": "m6_l2_ss_curl", - "title": "端口与请求:ss / netstat / curl / wget", - "goal": "建立监听端口和服务请求验证的能力。", - "why_it_matters": "运维排障很多时候要回答两个问题:端口开没开?接口通不通?", - "concepts": [ - "端口监听", - "TCP 层可达性", - "HTTP 请求验证" - ], - "command": "ss / netstat / curl / wget", - "examples": [ - "ss -ltnp", - "netstat -tunlp", - "curl http://127.0.0.1:8080/health", - "ss -ltnp | grep 80", - "curl -I http://127.0.0.1", - "wget http://127.0.0.1" - ], - "pitfalls": [ - "只看页面打不开,不查监听", - "只看监听,不测实际请求", - "只看 LISTEN 不看实际响应码和返回体" - ], - "scenarios": [ - "查服务是否监听端口", - "测试接口是否返回 200", - "确认 Web 服务是否监听 80 端口", - "确认 HTTP 健康检查是否正常" - ], - "exercises": [ - { - "id": "m6_l2_e1", - "type": "operation", - "title": "查看监听端口", - "hint": "ss -ltnp", - "success_test": "cmd == 'ss -ltnp' and 'LISTEN' in output", - "solution": [ - "ss -ltnp" - ], - "success_msg": "你已经会看监听端口了。" - }, - { - "id": "m6_l2_e2", - "type": "operation", - "title": "请求本地页面", - "hint": "curl http://127.0.0.1", - "success_test": "cmd == 'curl http://127.0.0.1' and '' in output", - "solution": [ - "curl http://127.0.0.1" - ], - "success_msg": "你已经会做基本 HTTP 探测了。" - }, - { - "id": "m6_l2_e3", - "type": "scenario", - "question": "排查“服务起了但访问失败”时,为什么要同时看 ss 和 curl?", - "answer": "因为 ss 看端口监听,curl 看应用层响应,两者结合才能判断问题在哪一层" - } - ], - "related_commands": [ - "ss", - "netstat", - "curl", - "wget" - ], - "classic_view": "教材视角:网络问题最怕混层,学习要区分链路、端口、协议、请求,不要一股脑都归为“网络不通”。", - "takeaways": [ - "学完后应能做到:建立监听端口和服务请求验证的能力。", - "易错提醒:只看页面打不开,不查监听", - "迁移场景:查服务是否监听端口", - "监听正常不代表业务正常,请求失败也不一定是服务没启动。", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 ss、netstat、curl、wget 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "先看端口是否监听", - "再用 curl 验证应用层是否有返回", - "如果监听正常但请求异常,再结合日志判断应用问题", - "如果根本没监听,先回到服务状态层排查" - ] - }, - { - "id": "m6_l3_name_route", - "title": "路由与名称解析:traceroute / dig / whereis 命令定位", - "goal": "建立链路定位和名称解析基础认知。", - "why_it_matters": "有些故障不是服务本身坏了,而是路径或解析出了问题。", - "concepts": [ - "路由路径", - "DNS 解析", - "命令位置定位" - ], - "command": "traceroute / dig / which / whereis", - "examples": [ - "traceroute 8.8.8.8", - "dig example.com", - "which curl", - "whereis nginx", - "dig api.example.com", - "traceroute example.com" - ], - "pitfalls": [ - "把 DNS 问题误判成应用问题", - "不知道命令来自哪里", - "忽略 DNS TTL 和缓存带来的影响" - ], - "scenarios": [ - "排查域名异常", - "确认命令路径和来源", - "排查域名切换未生效", - "确认命令实际安装位置" - ], - "exercises": [ - { - "id": "m6_l3_e1", - "type": "operation", - "title": "定位 curl 命令路径", - "hint": "which curl", - "success_test": "cmd == 'which curl' and '/usr/bin/curl' in output", - "solution": [ - "which curl" - ], - "success_msg": "你已经会定位命令路径了。" - }, - { - "id": "m6_l3_e2", - "type": "understanding", - "question": "为什么 DNS 出问题时,服务本身可能没坏但用户仍然访问失败?", - "answer": "因为域名解析不到正确 IP,流量根本到不了目标服务" - }, - { - "id": "m6_l3_e3", - "type": "scenario", - "question": "排查“域名不通”时除了 curl,还应该想到什么?", - "answer": "还应该检查 dig/nslookup 和网络路径,确认是不是解析或链路问题" - } - ], - "related_commands": [ - "traceroute", - "dig", - "which", - "whereis" - ], - "classic_view": "教材视角:网络问题最怕混层,学习要区分链路、端口、协议、请求,不要一股脑都归为“网络不通”。", - "takeaways": [ - "学完后应能做到:建立链路定位和名称解析基础认知。", - "易错提醒:把 DNS 问题误判成应用问题", - "迁移场景:排查域名异常", - "命令定位、解析路径和网络链路,都是“看不见的问题”的排查入口。", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 traceroute、dig、which、whereis 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "先确认命令或服务实际路径", - "再检查域名解析是否正确", - "必要时查看路由链路是否异常", - "把“命令路径 / DNS / 路由”当成三类不同问题" - ] - } - ] - }, - { - "id": "module_7_security", - "title": "模块 7:权限、用户与安全基础", - "summary": "围绕用户、组、权限和高风险操作建立 Linux 安全基本认知。", - "lessons": [ - { - "id": "m7_l1_users", - "title": "用户与身份:whoami / id / passwd / su", - "goal": "理解当前身份、用户组和密码变更的意义。", - "why_it_matters": "你是谁、你属于谁、你能做什么,是 Linux 安全的最基础问题。", - "concepts": [ - "当前用户", - "用户组", - "身份切换" - ], - "command": "whoami / id / passwd / su", - "examples": [ - "whoami", - "id", - "passwd", - "id sandbox_user", - "su - root", - "passwd sandbox_user" - ], - "pitfalls": [ - "不知道自己当前权限边界", - "以为所有命令都能执行", - "不知道服务身份和登录用户身份可能不同" - ], - "scenarios": [ - "确认当前身份", - "看自己属于哪些组", - "确认程序以什么身份运行", - "确认某用户是否属于目标组" - ], - "exercises": [ - { - "id": "m7_l1_e1", - "type": "operation", - "title": "查看当前用户", - "hint": "whoami", - "success_test": "cmd == 'whoami' and 'sandbox_user' in output", - "solution": [ - "whoami" - ], - "success_msg": "你已经会确认当前登录身份了。" - }, - { - "id": "m7_l1_e2", - "type": "operation", - "title": "查看当前用户组信息", - "hint": "id", - "success_test": "cmd == 'id' and 'uid=' in output", - "solution": [ - "id" - ], - "success_msg": "你已经会查看用户与组信息了。" - }, - { - "id": "m7_l1_e3", - "type": "understanding", - "question": "为什么运维排障前先确认 whoami 很重要?", - "answer": "因为不同身份决定你能看到什么、改什么,以及排障时会不会被权限挡住" - } - ], - "related_commands": [ - "whoami", - "id", - "passwd", - "su" - ], - "classic_view": "教材视角:权限和身份是 Linux 安全边界的基础,能执行不代表应该执行。", - "takeaways": [ - "学完后应能做到:理解当前身份、用户组和密码变更的意义。", - "易错提醒:不知道自己当前权限边界", - "迁移场景:确认当前身份", - "身份问题常常决定你能看什么、改什么、执行什么。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 whoami、id、passwd、su 做一次独立练习,并尝试自己解释每条输出的含义。" - }, - { - "id": "m7_l2_permissions", - "title": "权限控制:chmod / chown / chgrp", - "goal": "理解 Linux 权限控制的基本模型和常见修改动作。", - "why_it_matters": "很多“无法访问、无法执行、无法写入”本质上都是权限问题。", - "concepts": [ - "读写执行", - "拥有者与属组", - "最小权限原则" - ], - "command": "chmod / chown / chgrp", - "examples": [ - "chmod 644 file.txt", - "chmod +x run.sh", - "chown app:app app.log", - "chmod 644 app.conf", - "chown app:app /var/log/app.log", - "chgrp deploy script.sh" - ], - "pitfalls": [ - "图省事直接给 777", - "不了解属组导致协作混乱", - "只改 chmod,不看属主属组" - ], - "scenarios": [ - "修脚本执行权限", - "调整日志文件归属", - "修复配置文件权限", - "调整日志文件归属方便服务写入" - ], - "exercises": [ - { - "id": "m7_l2_e1", - "type": "operation", - "title": "给文件添加执行权限", - "hint": "chmod +x /tmp/a/b/c/readme.txt", - "success_test": "cmd == 'chmod +x /tmp/a/b/c/readme.txt'", - "solution": [ - "chmod +x /tmp/a/b/c/readme.txt" - ], - "success_msg": "你已经会做最基础的执行权限修改。" - }, - { - "id": "m7_l2_e2", - "type": "understanding", - "question": "为什么生产环境里不应该随手给 777?", - "answer": "因为 777 让所有人都有读写执行权限,风险过高,容易造成安全和误操作问题" - }, - { - "id": "m7_l2_e3", - "type": "scenario", - "question": "脚本提示 Permission denied 时,你会先想到什么?", - "answer": "先检查文件是否有执行权限,以及当前用户是否有访问权限" - } - ], - "related_commands": [ - "chmod", - "chown", - "chgrp" - ], - "classic_view": "教材视角:权限和身份是 Linux 安全边界的基础,能执行不代表应该执行。", - "takeaways": [ - "学完后应能做到:理解 Linux 权限控制的基本模型和常见修改动作。", - "易错提醒:图省事直接给 777", - "迁移场景:修脚本执行权限", - "权限排障常常不是只看一个数字,而是同时看权限、属主、属组和执行身份。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 chmod、chown、chgrp 做一次独立练习,并尝试自己解释每条输出的含义。" - }, - { - "id": "m7_l3_risk", - "title": "高风险命令与最小权限原则", - "goal": "建立运维中“能做”不等于“该做”的安全意识。", - "why_it_matters": "很多事故不是因为不会,而是因为过度权限和冒险操作。", - "concepts": [ - "sudo 的边界", - "危险删除", - "最小权限" - ], - "command": "sudo / rm -rf / 安全习惯", - "examples": [ - "sudo systemctl restart nginx", - "rm -rf /tmp/testdir", - "rm -rf /tmp/old-release" - ], - "pitfalls": [ - "把 sudo 当默认前缀", - "不确认路径就执行递归删除", - "高权限操作前不做确认和备份" - ], - "scenarios": [ - "高权限改系统配置", - "清理目录前先确认路径", - "修改系统级配置前先评估影响", - "清理目录前先校验路径" - ], - "exercises": [ - { - "id": "m7_l3_e1", - "type": "understanding", - "question": "为什么最小权限原则在运维里很重要?", - "answer": "因为权限越大,误操作和被利用的风险越高,应只给完成任务所需的最小权限" - }, - { - "id": "m7_l3_e2", - "type": "scenario", - "question": "执行 rm -rf 之前最应该确认什么?", - "answer": "确认目标路径是否正确,以及是否真的需要递归强制删除" - }, - { - "id": "m7_l3_e3", - "type": "understanding", - "question": "为什么不应该把 sudo 当成“万能解决方案”?", - "answer": "因为它绕过权限边界,容易掩盖根因并扩大误操作风险" - } - ], - "related_commands": [ - "sudo", - "rm -rf", - "安全习惯" - ], - "classic_view": "教材视角:权限和身份是 Linux 安全边界的基础,能执行不代表应该执行。", - "takeaways": [ - "学完后应能做到:建立运维中“能做”不等于“该做”的安全意识。", - "易错提醒:把 sudo 当默认前缀", - "迁移场景:高权限改系统配置", - "高风险命令需要形成“先确认、后执行、再验证”的习惯。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 sudo、rm -rf、安全习惯 做一次独立练习,并尝试自己解释每条输出的含义。" - } - ] - }, - { - "id": "module_8_packages_env", - "title": "模块 8:软件包、环境与命令定位", - "summary": "理解命令从哪里来、环境变量如何影响执行、软件包如何管理。", - "lessons": [ - { - "id": "m8_l1_path_env", - "title": "环境变量与命令定位:env / export / which / whereis", - "goal": "理解 PATH、环境变量和命令查找机制。", - "why_it_matters": "很多“命令找不到”“版本不对”“环境不生效”都和环境变量有关。", - "concepts": [ - "PATH", - "环境变量", - "命令来源" - ], - "command": "env / export / which / whereis", - "examples": [ - "env", - "export APP_ENV=prod", - "which python3", - "whereis nginx" - ], - "pitfalls": [ - "以为命令名唯一对应一个位置", - "不知道 PATH 顺序会影响执行结果" - ], - "scenarios": [ - "排查命令找不到", - "排查执行到错误版本" - ], - "exercises": [ - { - "id": "m8_l1_e1", - "type": "operation", - "title": "查看环境变量", - "hint": "env", - "success_test": "cmd == 'env' and 'PATH=' in output", - "solution": [ - "env" - ], - "success_msg": "你已经会查看环境变量了。" - }, - { - "id": "m8_l1_e2", - "type": "operation", - "title": "定位 ls 命令", - "hint": "which ls", - "success_test": "cmd == 'which ls' and '/bin/ls' in output", - "solution": [ - "which ls" - ], - "success_msg": "你已经会查命令来源了。" - }, - { - "id": "m8_l1_e3", - "type": "understanding", - "question": "为什么 PATH 顺序会影响命令执行结果?", - "answer": "因为系统会按 PATH 的顺序查找同名命令,先找到哪个就执行哪个" - } - ], - "related_commands": [ - "env", - "export", - "which", - "whereis" - ], - "classic_view": "教材视角:很多“环境问题”本质是命令来源、变量配置和包版本问题,不是应用本身坏了。", - "takeaways": [ - "学完后应能做到:理解 PATH、环境变量和命令查找机制。", - "易错提醒:以为命令名唯一对应一个位置", - "迁移场景:排查命令找不到" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 env、export、which、whereis 做一次独立练习,并尝试自己解释每条输出的含义。" - }, - { - "id": "m8_l2_package_mgr", - "title": "包管理基础:apt / yum / dpkg / rpm", - "goal": "理解 Linux 软件安装与查询的基本方式。", - "why_it_matters": "软件装没装、版本对不对,是环境排障的重要基础。", - "concepts": [ - "Debian 与 RedHat 系包管理差异", - "包查询", - "版本核对" - ], - "command": "apt / yum / dpkg / rpm", - "examples": [ - "apt list --installed", - "yum list installed", - "rpm -qa | grep nginx" - ], - "pitfalls": [ - "不知道发行版不同,命令体系也不同", - "只会装包,不会查版本" - ], - "scenarios": [ - "确认软件已安装", - "核对线上版本" - ], - "exercises": [ - { - "id": "m8_l2_e1", - "type": "understanding", - "question": "为什么 apt 和 yum 不能混着理解?", - "answer": "因为它们属于不同发行版的包管理体系,命令、仓库和包格式都有差异" - }, - { - "id": "m8_l2_e2", - "type": "scenario", - "question": "排查“命令不存在”时,除了 which 还会想到什么?", - "answer": "还要确认对应软件包是否已安装,必要时用包管理工具查询" - }, - { - "id": "m8_l2_e3", - "type": "understanding", - "question": "为什么确认软件版本在运维里很重要?", - "answer": "因为不同版本的配置、行为和兼容性可能不同,排障和发布都依赖版本信息" - } - ], - "related_commands": [ - "apt", - "yum", - "dpkg", - "rpm" - ], - "classic_view": "教材视角:很多“环境问题”本质是命令来源、变量配置和包版本问题,不是应用本身坏了。", - "takeaways": [ - "学完后应能做到:理解 Linux 软件安装与查询的基本方式。", - "易错提醒:不知道发行版不同,命令体系也不同", - "迁移场景:确认软件已安装" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 apt、yum、dpkg、rpm 做一次独立练习,并尝试自己解释每条输出的含义。" - }, - { - "id": "m8_l3_alias_habit", - "title": "alias 与命令行习惯", - "goal": "建立更高效、更安全的日常命令行习惯。", - "why_it_matters": "很多效率差异来自长期习惯,而不是单个命令是否会敲。", - "concepts": [ - "alias", - "常用缩写", - "习惯的收益与风险" - ], - "command": "alias", - "examples": [ - "alias ll='ls -l'", - "alias gs='git status'" - ], - "pitfalls": [ - "别名过多反而混乱", - "依赖个人别名导致跨机器不一致" - ], - "scenarios": [ - "提高常用命令效率", - "统一个人命令习惯" - ], - "exercises": [ - { - "id": "m8_l3_e1", - "type": "understanding", - "question": "为什么 alias 既能提升效率,也可能带来问题?", - "answer": "它能简化命令,但如果过度依赖,换环境或和他人协作时可能造成理解和一致性问题" - }, - { - "id": "m8_l3_e2", - "type": "understanding", - "question": "为什么运维平台环境中不建议胡乱定义复杂 alias?", - "answer": "因为可能影响命令可预期性,增加排障和协作成本" - }, - { - "id": "m8_l3_e3", - "type": "scenario", - "question": "什么时候 alias 是好的,什么时候需要克制?", - "answer": "高频、简单、个人明确的命令可以用 alias;涉及生产、协作和高风险操作应尽量保持显式命令" - } - ], - "related_commands": [ - "alias" - ], - "classic_view": "教材视角:很多“环境问题”本质是命令来源、变量配置和包版本问题,不是应用本身坏了。", - "takeaways": [ - "学完后应能做到:建立更高效、更安全的日常命令行习惯。", - "易错提醒:别名过多反而混乱", - "迁移场景:提高常用命令效率" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 alias 做一次独立练习,并尝试自己解释每条输出的含义。" - } - ] - }, - { - "id": "module_9_automation", - "title": "模块 9:自动化、归档与运维习惯", - "summary": "建立重定向、管道、定时任务、归档备份和命令复盘习惯。", - "lessons": [ - { - "id": "m9_l1_pipe_redirect", - "title": "组合能力:管道与重定向", - "goal": "理解为什么 Linux 强调小命令组合,而不是一个命令包办一切。", - "why_it_matters": "运维效率往往来自命令组合,而不是单个命令本身。", - "concepts": [ - "标准输入输出", - "重定向", - "管道组合" - ], - "command": "| / > / >>", - "examples": [ - "grep error /var/log/syslog | wc -l", - "echo hello > note.txt", - "cat file >> backup.txt" - ], - "pitfalls": [ - "覆盖写和追加写不分", - "不会把命令组合成链路" - ], - "scenarios": [ - "统计错误行数", - "生成结果文件" - ], - "exercises": [ - { - "id": "m9_l1_e1", - "type": "understanding", - "question": "为什么管道是 Unix/Linux 的核心思想之一?", - "answer": "因为它让小工具可以彼此组合,快速拼出解决问题的命令链路" - }, - { - "id": "m9_l1_e2", - "type": "scenario", - "question": "如果想统计日志里 error 出现了多少次,为什么 grep 配合 wc 很自然?", - "answer": "因为 grep 负责筛选,wc 负责统计,两者分工清晰又容易组合" - }, - { - "id": "m9_l1_e3", - "type": "understanding", - "question": "> 和 >> 的区别是什么?", - "answer": "> 是覆盖写入,>> 是追加写入" - } - ], - "related_commands": [ - "|", - ">", - ">>" - ], - "classic_view": "教材视角:自动化不是炫技,而是把重复工作做成稳定、可复用、可回溯的流程。", - "takeaways": [ - "学完后应能做到:理解为什么 Linux 强调小命令组合,而不是一个命令包办一切。", - "易错提醒:覆盖写和追加写不分", - "迁移场景:统计错误行数" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 |、>、>> 做一次独立练习,并尝试自己解释每条输出的含义。" - }, - { - "id": "m9_l2_tar_backup", - "title": "归档与备份:tar / gzip", - "goal": "理解打包压缩和备份的基本思路。", - "why_it_matters": "备份不是把文件复制一下,而是要考虑归档、压缩和恢复。", - "concepts": [ - "归档 vs 压缩", - "备份与恢复", - "打包文件" - ], - "command": "tar / gzip", - "examples": [ - "tar -czf backup.tar.gz /etc", - "tar -xzf backup.tar.gz -C /tmp" - ], - "pitfalls": [ - "只会备份,不会恢复验证", - "不知道 tar 和 gzip 各自扮演什么角色" - ], - "scenarios": [ - "备份配置目录", - "迁移文件集合" - ], - "exercises": [ - { - "id": "m9_l2_e1", - "type": "understanding", - "question": "为什么备份后最好做一次恢复验证?", - "answer": "因为只有验证过能恢复,备份才真正有意义" - }, - { - "id": "m9_l2_e2", - "type": "scenario", - "question": "为什么很多运维备份会用 tar.gz?", - "answer": "因为它适合把多个文件归档后再压缩,便于传输和保存" - }, - { - "id": "m9_l2_e3", - "type": "understanding", - "question": "tar 和 gzip 的角色区别是什么?", - "answer": "tar 负责打包归档,gzip 负责压缩" - } - ], - "related_commands": [ - "tar", - "gzip" - ], - "classic_view": "教材视角:自动化不是炫技,而是把重复工作做成稳定、可复用、可回溯的流程。", - "takeaways": [ - "学完后应能做到:理解打包压缩和备份的基本思路。", - "易错提醒:只会备份,不会恢复验证", - "迁移场景:备份配置目录" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 tar、gzip 做一次独立练习,并尝试自己解释每条输出的含义。" - }, - { - "id": "m9_l3_crontab_history", - "title": "定时任务与操作复盘:crontab / history", - "goal": "理解自动化执行与命令历史复盘的价值。", - "why_it_matters": "运维很多工作是周期性的,同时排障也离不开复盘。", - "concepts": [ - "周期任务", - "命令历史", - "自动化意识" - ], - "command": "crontab / history", - "examples": [ - "crontab -l", - "history -n 10" - ], - "pitfalls": [ - "写了定时任务却不记录输出", - "不会利用 history 回顾近期操作" - ], - "scenarios": [ - "定时备份", - "回顾误操作" - ], - "exercises": [ - { - "id": "m9_l3_e1", - "type": "understanding", - "question": "为什么定时任务不只要能跑,还要关注日志和输出?", - "answer": "因为无人值守任务如果失败却没有输出记录,很难排查问题" - }, - { - "id": "m9_l3_e2", - "type": "scenario", - "question": "复盘线上事故时,history 能提供什么帮助?", - "answer": "帮助确认最近执行过哪些命令,判断是否有变更触发了问题" - }, - { - "id": "m9_l3_e3", - "type": "understanding", - "question": "为什么自动化不是“偷懒”,而是运维能力的一部分?", - "answer": "因为自动化能减少重复劳动、降低人为失误并提升稳定性" - } - ], - "related_commands": [ - "crontab", - "history" - ], - "classic_view": "教材视角:自动化不是炫技,而是把重复工作做成稳定、可复用、可回溯的流程。", - "takeaways": [ - "学完后应能做到:理解自动化执行与命令历史复盘的价值。", - "易错提醒:写了定时任务却不记录输出", - "迁移场景:定时备份" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 crontab、history 做一次独立练习,并尝试自己解释每条输出的含义。" - } - ] - }, - { - "id": "module_10_incidents", - "title": "模块 10:运维综合实战场景", - "summary": "把前面所有命令和认知串起来,围绕真实故障场景形成排查链路。", - "lessons": [ - { - "id": "m10_l1_service_down", - "title": "场景:服务无法访问排查", - "goal": "建立“先服务、再端口、再日志、再请求”的排查顺序。", - "why_it_matters": "这是最经典的运维问题之一。", - "concepts": [ - "服务状态", - "端口监听", - "日志定位", - "HTTP 验证" - ], - "command": "systemctl / ps / ss / journalctl / curl", - "examples": [ - "systemctl status nginx", - "ss -ltnp", - "curl http://127.0.0.1:8080/health", - "systemctl status nginx && ss -ltnp | grep 80", - "journalctl -u nginx -n 50", - "curl -I http://127.0.0.1" - ], - "pitfalls": [ - "只看浏览器打不开,不看服务状态", - "没有层次地乱查", - "没有层次感地同时改服务、改配置、重启,导致问题更难定位" - ], - "scenarios": [ - "应用服务无法访问", - "线上服务返回 502/504", - "站点页面打不开但机器正常" - ], - "exercises": [ - { - "id": "m10_l1_e1", - "type": "scenario", - "question": "遇到“网站打不开”,一个合理的排查顺序是什么?", - "answer": "先看服务状态,再看进程和端口,再看日志,最后用 curl 验证接口" - }, - { - "id": "m10_l1_e2", - "type": "understanding", - "question": "为什么不应该一上来就改配置?", - "answer": "因为先确认问题在哪一层更重要,盲改配置可能扩大问题" - }, - { - "id": "m10_l1_e3", - "type": "scenario", - "question": "如果端口没监听,你下一步更应该看什么?", - "answer": "看服务状态和日志,确认是否启动失败或启动后立即退出" - }, - { - "id": "m10_l1_service_down_op1", - "type": "operation", - "title": "第一步:确认服务状态", - "hint": "systemctl status nginx", - "success_test": "cmd == 'systemctl status nginx'", - "solution": [ - "systemctl status nginx" - ], - "success_msg": "✅ 通过:继续下一步" - }, - { - "id": "m10_l1_service_down_op2", - "type": "operation", - "title": "第二步:确认端口监听", - "hint": "ss -ltnp | grep 80", - "success_test": "'80' in output", - "solution": [ - "ss -ltnp | grep 80" - ], - "success_msg": "✅ 通过:继续下一步" - }, - { - "id": "m10_l1_service_down_op3", - "type": "operation", - "title": "第三步:看最近日志", - "hint": "journalctl -u nginx -n 50", - "success_test": "'Started' in output or 'connect() failed' in output", - "solution": [ - "journalctl -u nginx -n 50" - ], - "success_msg": "✅ 通过:继续下一步" - }, - { - "id": "m10_l1_service_down_op4", - "type": "operation", - "title": "第四步:本机请求验证", - "hint": "curl -I http://127.0.0.1", - "success_test": "'hello' in output or '200' in output or 'html' in output", - "solution": [ - "curl -I http://127.0.0.1" - ], - "success_msg": "✅ 通过:继续下一步" - } - ], - "related_commands": [ - "systemctl", - "ps", - "ss", - "journalctl", - "curl" - ], - "classic_view": "教材视角:综合场景训练的重点不是记住某条命令,而是建立分层排障顺序和判断习惯。", - "takeaways": [ - "学完后应能做到:建立“先服务、再端口、再日志、再请求”的排查顺序。", - "易错提醒:只看浏览器打不开,不看服务状态", - "迁移场景:应用服务无法访问", - "服务不可用时,排障要按层进行:服务 → 进程 → 端口 → 日志 → 请求。", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 systemctl、ps、ss、journalctl、curl 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "服务状态:systemctl status", - "进程状态:ps / 进程是否存在", - "端口监听:ss 或 netstat", - "日志定位:journalctl / 应用日志", - "请求验证:curl 直接打本机或接口" - ] - }, - { - "id": "m10_l2_disk_full", - "title": "场景:磁盘爆满排查", - "goal": "建立从 df 到 du 再到 find 的磁盘问题定位思路。", - "why_it_matters": "磁盘满会直接导致服务报错、写入失败和日志异常。", - "concepts": [ - "文件系统空间", - "目录占用", - "大文件定位" - ], - "command": "df / du / find / sort", - "examples": [ - "df -h", - "du -sh /var/log", - "find /var/log -type f", - "df -h /var", - "du -sh /var/log/* | sort" - ], - "pitfalls": [ - "只看 df 不继续追目录", - "删文件前不确认用途", - "直接删除不熟悉的大文件,可能破坏恢复和排障线索" - ], - "scenarios": [ - "排查磁盘 100%", - "日志目录暴涨导致磁盘满", - "发布产物堆积导致空间不足" - ], - "exercises": [ - { - "id": "m10_l2_e1", - "type": "scenario", - "question": "磁盘爆满时,为什么通常先 df 再 du?", - "answer": "因为先确认哪个文件系统满了,再定位具体哪个目录占用大" - }, - { - "id": "m10_l2_e2", - "type": "understanding", - "question": "为什么删除日志前要先确认是否还能用于排障?", - "answer": "因为日志可能是定位故障的关键证据,盲删会丢失排障线索" - }, - { - "id": "m10_l2_e3", - "type": "scenario", - "question": "如果 /var/log 特别大,你会想到哪些命令组合?", - "answer": "df、du、find、sort 组合起来定位大文件和大目录" - }, - { - "id": "m10_l2_disk_full_op1", - "type": "operation", - "title": "第一步:确认哪个挂载点满了", - "hint": "df -h", - "success_test": "'Filesystem' in output", - "solution": [ - "df -h" - ], - "success_msg": "✅ 通过:继续下一步" - }, - { - "id": "m10_l2_disk_full_op2", - "type": "operation", - "title": "第二步:定位大目录(示例:/var/log)", - "hint": "du -sh /var/log", - "success_test": "'/var/log' in output", - "solution": [ - "du -sh /var/log" - ], - "success_msg": "✅ 通过:继续下一步" - }, - { - "id": "m10_l2_disk_full_op3", - "type": "operation", - "title": "第三步:找日志相关文件(示例)", - "hint": "find /var/log -type f", - "success_test": "'/var/log' in output", - "solution": [ - "find /var/log -type f" - ], - "success_msg": "✅ 通过:继续下一步" - } - ], - "related_commands": [ - "df", - "du", - "find", - "sort" - ], - "classic_view": "教材视角:综合场景训练的重点不是记住某条命令,而是建立分层排障顺序和判断习惯。", - "takeaways": [ - "学完后应能做到:建立从 df 到 du 再到 find 的磁盘问题定位思路。", - "易错提醒:只看 df 不继续追目录", - "迁移场景:排查磁盘 100%", - "磁盘排查的关键是先找文件系统,再找目录,再找大文件。", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 df、du、find、sort 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "文件系统层:df -h", - "目录层:du -sh", - "文件层:find + sort", - "处理层:确认是否可删、是否要备份、是否影响排障" - ] - }, - { - "id": "m10_l3_login_fail", - "title": "场景:登录失败与权限异常排查", - "goal": "把身份、权限、日志三者串起来理解。", - "why_it_matters": "很多系统问题最终都落在权限与身份边界上。", - "concepts": [ - "当前身份", - "认证日志", - "权限边界" - ], - "command": "whoami / id / passwd / grep / tail", - "examples": [ - "whoami", - "id", - "tail -n 20 /var/log/auth.log", - "grep sandbox_user /etc/passwd", - "id sandbox_user" - ], - "pitfalls": [ - "只怀疑密码错误,不看日志", - "忽略组权限问题", - "只盯着密码,不看账号状态和权限配置" - ], - "scenarios": [ - "SSH 登录失败", - "执行权限不足", - "脚本执行提示无权限" - ], - "exercises": [ - { - "id": "m10_l3_e1", - "type": "scenario", - "question": "排查登录失败时,除了用户名密码,还要想到什么?", - "answer": "要看认证日志、用户身份、组信息和权限配置" - }, - { - "id": "m10_l3_e2", - "type": "understanding", - "question": "为什么权限异常常常不能只靠肉眼猜?", - "answer": "因为真实问题可能同时涉及用户、组、文件权限和服务身份,需要结合命令验证" - }, - { - "id": "m10_l3_e3", - "type": "scenario", - "question": "如果脚本明明存在却执行不了,你会从哪几类信息开始看?", - "answer": "先看 whoami/id,再看文件权限和属主属组,必要时看相关日志" - }, - { - "id": "m10_l3_login_fail_op1", - "type": "operation", - "title": "第一步:确认当前身份", - "hint": "id", - "success_test": "'uid=' in output", - "solution": [ - "id" - ], - "success_msg": "✅ 通过:继续下一步" - }, - { - "id": "m10_l3_login_fail_op2", - "type": "operation", - "title": "第二步:查看认证日志尾部", - "hint": "cat /var/log/auth.log | tail -n 1", - "success_test": "'sshd' in output", - "solution": [ - "cat /var/log/auth.log | tail -n 1" - ], - "success_msg": "✅ 通过:继续下一步" - }, - { - "id": "m10_l3_login_fail_op3", - "type": "operation", - "title": "第三步:确认用户是否存在", - "hint": "grep sandbox_user /etc/passwd", - "success_test": "'sandbox_user' in output", - "solution": [ - "grep sandbox_user /etc/passwd" - ], - "success_msg": "✅ 通过:继续下一步" - } - ], - "related_commands": [ - "whoami", - "id", - "passwd", - "grep", - "tail" - ], - "classic_view": "教材视角:综合场景训练的重点不是记住某条命令,而是建立分层排障顺序和判断习惯。", - "takeaways": [ - "学完后应能做到:把身份、权限、日志三者串起来理解。", - "易错提醒:只怀疑密码错误,不看日志", - "迁移场景:SSH 登录失败", - "登录失败排查要把身份、日志和权限一起看,不能只猜密码。", - "形成分层排障顺序,而不是遇到问题就随手试命令。" - ], - "after_class": "课后建议:回到真实或模拟环境里,再用 whoami、id、passwd、grep、tail 做一次独立练习,并尝试自己解释每条输出的含义。", - "troubleshooting_flow": [ - "身份层:whoami / id / 当前用户是谁", - "账户层:账号是否存在、是否被限制", - "权限层:文件和脚本权限是否正确", - "日志层:auth.log / 相关认证日志", - "不要只盯着“密码错了”一个方向" - ] - } - ] - }, - { - "id": "module_11_incidents2", - "title": "模块 11:综合事故专题(进阶)", - "summary": "继续用场景驱动的方式训练 CPU 异常、网络不通等更高频事故的排障顺序。", - "lessons": [ - { - "id": "m11_l1_cpu_high", - "title": "场景:CPU 飙高排查", - "goal": "建立 CPU 异常排查顺序:先确认负载与进程,再定位原因与缓解措施。", - "why_it_matters": "CPU 异常会直接影响延迟与吞吐,是最常见的线上事故信号之一。", - "concepts": [ - "load average vs CPU 使用率", - "top/ps 的阅读方式", - "定位热进程与线程", - "短期缓解 vs 根因修复" - ], - "command": "top / ps / kill", - "examples": [ - "top", - "ps aux --sort=-%cpu | head", - "kill -TERM " - ], - "pitfalls": [ - "只看 load average 不看 CPU 核数和 I/O 情况", - "一上来 kill -9 导致数据损坏" - ], - "scenarios": [ - "接口延迟上升但网络正常", - "机器风扇狂转、CPU 使用率长期 100%" - ], - "troubleshooting_flow": [ - "先确认现象:top 看整体 CPU 与 load average", - "定位元凶:按 CPU 排序找到最热进程/线程", - "确认影响:是否是业务进程、是否可重启或降级", - "短期缓解:优先温和信号或限流/重启", - "根因修复:回到日志/发布变更/代码热点" - ], - "related_commands": [ - "top", - "ps", - "kill", - "pkill", - "journalctl" - ], - "classic_view": "教材视角:CPU 排障的关键是把“现象→进程→原因”串成链路,而不是看到 100% 就盲目重启。", - "takeaways": [ - "形成分层排障顺序,而不是遇到问题就随手试命令。", - "CPU 异常优先定位热进程,再决定缓解手段。" - ], - "after_class": "课后建议:模拟一个死循环进程(或阅读示例输出),练习从 top/ps 定位到 PID,再思考温和退出与强制退出的差别。", - "exercises": [ - { - "id": "m11_l1_e1", - "type": "operation", - "title": "第一步:查看整体 CPU/负载", - "hint": "top", - "success_test": "cmd == 'top' and ('load average' in output or 'Tasks' in output)", - "solution": [ - "top" - ], - "success_msg": "✅ 看到了整体态势,继续定位热进程。" - }, - { - "id": "m11_l1_e2", - "type": "operation", - "title": "第二步:定位最吃 CPU 的进程(示例)", - "hint": "ps aux --sort=-%cpu | head", - "success_test": "'%CPU' in output or 'python' in output or 'java' in output or 'nginx' in output", - "solution": [ - "ps aux --sort=-%cpu | head" - ], - "success_msg": "✅ 已定位热进程,下一步考虑缓解措施。" - } - ] - }, - { - "id": "m11_l2_network_down", - "title": "场景:网络不通排查", - "goal": "建立网络不通的分层排障:IP/链路 → DNS → 端口 → 请求。", - "why_it_matters": "网络问题最容易“混层”,正确顺序能显著缩短定位时间。", - "concepts": [ - "链路层/地址层/名称解析", - "端口监听 vs 连通性", - "用 curl 验证应用层" - ], - "command": "ip / ping / dig / ss / curl", - "examples": [ - "ip addr", - "ping -c 4 127.0.0.1", - "dig example.com", - "ss -ltnp | grep 80", - "curl -I http://127.0.0.1" - ], - "pitfalls": [ - "把 DNS 失败当成网络彻底不通", - "只看端口 LISTEN 不发请求验证" - ], - "scenarios": [ - "域名访问失败但 IP 可通", - "本机服务正常但外部访问失败" - ], - "troubleshooting_flow": [ - "先看本机地址:ip addr 是否有正确 IP", - "再看基础连通:ping 网关/目标 IP", - "再看 DNS:dig 域名解析是否正确", - "再看端口:ss/netstat 是否监听", - "最后发请求:curl 验证应用层" - ], - "related_commands": [ - "ip", - "ping", - "dig", - "ss", - "curl" - ], - "classic_view": "教材视角:网络排障要分层,一层层排除,不要上来就改防火墙或重启。", - "takeaways": [ - "形成分层排障顺序,而不是遇到问题就随手试命令。", - "先确认地址与连通性,再看 DNS/端口/请求。" - ], - "after_class": "课后建议:分别模拟“DNS 错”“端口未监听”“服务返回异常”三种情况,练习用同一套顺序识别差异。", - "exercises": [ - { - "id": "m11_l2_e1", - "type": "operation", - "title": "第一步:确认地址信息", - "hint": "ip addr", - "success_test": "cmd.startswith('ip') and 'inet' in output", - "solution": [ - "ip addr" - ], - "success_msg": "✅ 地址信息正常,继续检查连通性。" - }, - { - "id": "m11_l2_e2", - "type": "operation", - "title": "第二步:确认基础连通(本机)", - "hint": "ping 127.0.0.1", - "success_test": "cmd.startswith('ping') and 'packet loss' in output", - "solution": [ - "ping 127.0.0.1" - ], - "success_msg": "✅ 基础连通性 OK,继续检查 DNS/端口。" - }, - { - "id": "m11_l2_e3", - "type": "operation", - "title": "第三步:确认 DNS 解析", - "hint": "dig example.com", - "success_test": "cmd.startswith('dig') and 'ANSWER SECTION' in output", - "solution": [ - "dig example.com" - ], - "success_msg": "✅ DNS 解析结果已拿到,继续端口与请求验证。" - } + { "id": "m4_l2_e1", "type": "operation", "title": "Inspect interface addresses", "hint": "Run ip addr.", "success_test": "cmd == 'ip addr' and 'inet' in output", "solution": ["ip addr"], "success_msg": "You confirmed interface and address information." }, + { "id": "m4_l2_e2", "type": "operation", "title": "Test local connectivity", "hint": "Run ping 127.0.0.1.", "success_test": "cmd == 'ping 127.0.0.1' and 'packet loss' in output", "solution": ["ping 127.0.0.1"], "success_msg": "You verified the local connectivity layer." }, + { "id": "m4_l2_e3", "type": "operation", "title": "Validate the application response", "hint": "Run curl http://127.0.0.1.", "success_test": "cmd == 'curl http://127.0.0.1' and 'hello localhost' in output", "solution": ["curl http://127.0.0.1"], "success_msg": "You completed the network path all the way to the application layer." } ] } ] } ] -} \ No newline at end of file +} diff --git a/index.html b/index.html index 2cd6532..221ba47 100644 --- a/index.html +++ b/index.html @@ -94,6 +94,25 @@ } .stat span { display: block; font-size: 12px; color: var(--muted); margin-bottom: 6px; } .stat strong { font-size: 22px; } + .mini-stats { + display: grid; + grid-template-columns: repeat(4, minmax(0, 1fr)); + gap: 10px; + margin-top: 12px; + } + .mini-stat { + padding: 12px; + border: 1px solid var(--line); + border-radius: 16px; + background: rgba(255,255,255,0.2); + } + .mini-stat span { + display: block; + font-size: 12px; + color: var(--muted); + margin-bottom: 6px; + } + .mini-stat strong { font-size: 20px; } .search { display: flex; gap: 8px; @@ -231,16 +250,37 @@ .feedback.show { display: block; } .feedback.success { background: rgba(29,155,108,0.1); border: 1px solid rgba(29,155,108,0.22); color: #187653; } .feedback.warn { background: rgba(240,180,41,0.1); border: 1px solid rgba(240,180,41,0.24); color: #a26b04; } + .mastery-note { + margin-top: 12px; + padding: 12px 14px; + border-radius: 14px; + border: 1px solid var(--line); + background: rgba(255,255,255,0.18); + color: var(--muted); + line-height: 1.8; + } + .lesson-btn.done { + border-color: rgba(29, 155, 108, 0.4); + background: rgba(29, 155, 108, 0.1); + } + .ordered { + margin: 0; + padding-left: 18px; + color: var(--muted); + line-height: 1.9; + } .empty { padding: 12px; border: 1px dashed var(--line); border-radius: 14px; color: var(--muted); text-align: center; } @media (max-width: 1180px) { .layout { grid-template-columns: 1fr; } .sidebar { position: static; } .hero-grid, .detail-grid { grid-template-columns: 1fr; } + .mini-stats { grid-template-columns: repeat(2, minmax(0, 1fr)); } } @media (max-width: 720px) { .shell { padding: 14px; } .topbar, .hero-head, .search, .terminal-input { flex-direction: column; align-items: stretch; } .stats { grid-template-columns: 1fr; } + .mini-stats { grid-template-columns: 1fr; } } @@ -270,6 +310,8 @@
Lessons0
Exercises0
Commands0
+
Mastered0
+
Completion0%

Version --

@@ -301,6 +343,7 @@

Choose a lesson from the left to see goals, examples, common pitfalls, and runnable exercises.

+
@@ -320,6 +363,31 @@ +
Master a lesson after you can explain the command, predict the output, and connect it to a real operations step.
+ + +
+
Learning cockpit
+

Visual study map for the current lesson

+

These cards help you move from memorizing a command to understanding the workflow around it.

+
+
+

Lesson radar

+
+
+
+

Experiment ladder

+
    +
    +
    +

    Observation checklist

    +
      +
      +
      +

      Related commands

      +
      +
      +
      @@ -371,7 +439,12 @@