diff --git a/COURSE_TASKS.json b/COURSE_TASKS.json index 6076f78..a392ef6 100644 --- a/COURSE_TASKS.json +++ b/COURSE_TASKS.json @@ -860,15 +860,21 @@ "examples": [ "systemctl status nginx", "systemctl restart nginx", - "systemctl enable nginx" + "systemctl enable nginx", + "systemctl status app.service", + "systemctl restart app.service", + "systemctl is-enabled nginx" ], "pitfalls": [ "改完配置却忘记重启服务", - "只看页面,不看服务状态" + "只看页面,不看服务状态", + "把 restart 当成排障终点,而不是排查起点" ], "scenarios": [ "排查服务没起来", - "改配置后让服务生效" + "改配置后让服务生效", + "配置变更后重新加载服务", + "确认服务是否开机自启" ], "exercises": [ { @@ -897,7 +903,8 @@ "takeaways": [ "学完后应能做到:理解 Linux 服务的查看、启动、停止和重启。", "易错提醒:改完配置却忘记重启服务", - "迁移场景:排查服务没起来" + "迁移场景:排查服务没起来", + "服务问题先看状态,再决定下一步看日志、端口还是配置。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 systemctl 做一次独立练习,并尝试自己解释每条输出的含义。" }, @@ -915,15 +922,21 @@ "examples": [ "journalctl -u nginx -n 50", "journalctl -xe", - "journalctl -u app -f" + "journalctl -u app -f", + "journalctl -u app.service -n 100", + "journalctl -u nginx -f", + "journalctl --since today" ], "pitfalls": [ "只看应用日志,不看 systemd 日志", - "看太多日志却抓不到最近错误" + "看太多日志却抓不到最近错误", + "不限定服务名导致日志范围过大,难以定位" ], "scenarios": [ "查看服务启动失败原因", - "查看最近报错" + "查看最近报错", + "定位服务启动失败的关键报错", + "观察重启前后日志变化" ], "exercises": [ { @@ -952,7 +965,8 @@ "takeaways": [ "学完后应能做到:理解如何查看服务日志和系统日志。", "易错提醒:只看应用日志,不看 systemd 日志", - "迁移场景:查看服务启动失败原因" + "迁移场景:查看服务启动失败原因", + "日志不是越多越好,关键是缩小范围看最近、看目标服务。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 journalctl 做一次独立练习,并尝试自己解释每条输出的含义。" }, @@ -970,15 +984,21 @@ "examples": [ "kill 1234", "pkill nginx", - "nohup python3 app.py &" + "nohup python3 app.py &", + "kill -9 1234", + "pkill -f python", + "nohup bash backup.sh &" ], "pitfalls": [ "直接粗暴 kill 掉关键进程", - "不知道后台任务输出去哪了" + "不知道后台任务输出去哪了", + "不了解信号差异就直接使用 -9" ], "scenarios": [ "结束卡死进程", - "让脚本后台运行" + "让脚本后台运行", + "结束僵死任务", + "让临时脚本脱离终端继续执行" ], "exercises": [ { @@ -1009,7 +1029,8 @@ "takeaways": [ "学完后应能做到:理解如何控制进程和让任务脱离终端运行。", "易错提醒:直接粗暴 kill 掉关键进程", - "迁移场景:结束卡死进程" + "迁移场景:结束卡死进程", + "进程控制的重点是知道为什么结束、结束谁、结束后系统会怎样。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 kill、pkill、nohup 做一次独立练习,并尝试自己解释每条输出的含义。" } @@ -1034,15 +1055,21 @@ "examples": [ "ip addr", "ifconfig", - "ping 127.0.0.1" + "ping 127.0.0.1", + "ip addr show eth0", + "ping 192.168.1.1", + "ping -c 4 example.com" ], "pitfalls": [ "能 ping 通就以为服务一定可用", - "只会看 IP,不理解监听端口" + "只会看 IP,不理解监听端口", + "把 DNS 解析失败误判成网络完全不通" ], "scenarios": [ "确认机器是否有正确 IP", - "测试目标是否能连通" + "测试目标是否能连通", + "确认目标机器有无 IP", + "初步判断网络层是否通" ], "exercises": [ { @@ -1083,7 +1110,8 @@ "takeaways": [ "学完后应能做到:理解网卡、IP 和连通性的基本概念。", "易错提醒:能 ping 通就以为服务一定可用", - "迁移场景:确认机器是否有正确 IP" + "迁移场景:确认机器是否有正确 IP", + "网络排查第一步是先确认链路和地址,再看更上层。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 ip addr、ifconfig、ping 做一次独立练习,并尝试自己解释每条输出的含义。" }, @@ -1101,15 +1129,21 @@ "examples": [ "ss -ltnp", "netstat -tunlp", - "curl http://127.0.0.1:8080/health" + "curl http://127.0.0.1:8080/health", + "ss -ltnp | grep 80", + "curl -I http://127.0.0.1", + "wget http://127.0.0.1" ], "pitfalls": [ "只看页面打不开,不查监听", - "只看监听,不测实际请求" + "只看监听,不测实际请求", + "只看 LISTEN 不看实际响应码和返回体" ], "scenarios": [ "查服务是否监听端口", - "测试接口是否返回 200" + "测试接口是否返回 200", + "确认 Web 服务是否监听 80 端口", + "确认 HTTP 健康检查是否正常" ], "exercises": [ { @@ -1151,7 +1185,8 @@ "takeaways": [ "学完后应能做到:建立监听端口和服务请求验证的能力。", "易错提醒:只看页面打不开,不查监听", - "迁移场景:查服务是否监听端口" + "迁移场景:查服务是否监听端口", + "监听正常不代表业务正常,请求失败也不一定是服务没启动。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 ss、netstat、curl、wget 做一次独立练习,并尝试自己解释每条输出的含义。" }, @@ -1169,15 +1204,21 @@ "examples": [ "traceroute 8.8.8.8", "dig example.com", - "which curl" + "which curl", + "whereis nginx", + "dig api.example.com", + "traceroute example.com" ], "pitfalls": [ "把 DNS 问题误判成应用问题", - "不知道命令来自哪里" + "不知道命令来自哪里", + "忽略 DNS TTL 和缓存带来的影响" ], "scenarios": [ "排查域名异常", - "确认命令路径和来源" + "确认命令路径和来源", + "排查域名切换未生效", + "确认命令实际安装位置" ], "exercises": [ { @@ -1214,7 +1255,8 @@ "takeaways": [ "学完后应能做到:建立链路定位和名称解析基础认知。", "易错提醒:把 DNS 问题误判成应用问题", - "迁移场景:排查域名异常" + "迁移场景:排查域名异常", + "命令定位、解析路径和网络链路,都是“看不见的问题”的排查入口。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 traceroute、dig、which、whereis 做一次独立练习,并尝试自己解释每条输出的含义。" } @@ -1239,15 +1281,21 @@ "examples": [ "whoami", "id", - "passwd" + "passwd", + "id sandbox_user", + "su - root", + "passwd sandbox_user" ], "pitfalls": [ "不知道自己当前权限边界", - "以为所有命令都能执行" + "以为所有命令都能执行", + "不知道服务身份和登录用户身份可能不同" ], "scenarios": [ "确认当前身份", - "看自己属于哪些组" + "看自己属于哪些组", + "确认程序以什么身份运行", + "确认某用户是否属于目标组" ], "exercises": [ { @@ -1289,7 +1337,8 @@ "takeaways": [ "学完后应能做到:理解当前身份、用户组和密码变更的意义。", "易错提醒:不知道自己当前权限边界", - "迁移场景:确认当前身份" + "迁移场景:确认当前身份", + "身份问题常常决定你能看什么、改什么、执行什么。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 whoami、id、passwd、su 做一次独立练习,并尝试自己解释每条输出的含义。" }, @@ -1307,15 +1356,21 @@ "examples": [ "chmod 644 file.txt", "chmod +x run.sh", - "chown app:app app.log" + "chown app:app app.log", + "chmod 644 app.conf", + "chown app:app /var/log/app.log", + "chgrp deploy script.sh" ], "pitfalls": [ "图省事直接给 777", - "不了解属组导致协作混乱" + "不了解属组导致协作混乱", + "只改 chmod,不看属主属组" ], "scenarios": [ "修脚本执行权限", - "调整日志文件归属" + "调整日志文件归属", + "修复配置文件权限", + "调整日志文件归属方便服务写入" ], "exercises": [ { @@ -1351,7 +1406,8 @@ "takeaways": [ "学完后应能做到:理解 Linux 权限控制的基本模型和常见修改动作。", "易错提醒:图省事直接给 777", - "迁移场景:修脚本执行权限" + "迁移场景:修脚本执行权限", + "权限排障常常不是只看一个数字,而是同时看权限、属主、属组和执行身份。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 chmod、chown、chgrp 做一次独立练习,并尝试自己解释每条输出的含义。" }, @@ -1368,15 +1424,19 @@ "command": "sudo / rm -rf / 安全习惯", "examples": [ "sudo systemctl restart nginx", - "rm -rf /tmp/testdir" + "rm -rf /tmp/testdir", + "rm -rf /tmp/old-release" ], "pitfalls": [ "把 sudo 当默认前缀", - "不确认路径就执行递归删除" + "不确认路径就执行递归删除", + "高权限操作前不做确认和备份" ], "scenarios": [ "高权限改系统配置", - "清理目录前先确认路径" + "清理目录前先确认路径", + "修改系统级配置前先评估影响", + "清理目录前先校验路径" ], "exercises": [ { @@ -1407,7 +1467,8 @@ "takeaways": [ "学完后应能做到:建立运维中“能做”不等于“该做”的安全意识。", "易错提醒:把 sudo 当默认前缀", - "迁移场景:高权限改系统配置" + "迁移场景:高权限改系统配置", + "高风险命令需要形成“先确认、后执行、再验证”的习惯。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 sudo、rm -rf、安全习惯 做一次独立练习,并尝试自己解释每条输出的含义。" } @@ -1795,14 +1856,20 @@ "examples": [ "systemctl status nginx", "ss -ltnp", - "curl http://127.0.0.1:8080/health" + "curl http://127.0.0.1:8080/health", + "systemctl status nginx && ss -ltnp | grep 80", + "journalctl -u nginx -n 50", + "curl -I http://127.0.0.1" ], "pitfalls": [ "只看浏览器打不开,不看服务状态", - "没有层次地乱查" + "没有层次地乱查", + "没有层次感地同时改服务、改配置、重启,导致问题更难定位" ], "scenarios": [ - "应用服务无法访问" + "应用服务无法访问", + "线上服务返回 502/504", + "站点页面打不开但机器正常" ], "exercises": [ { @@ -1835,7 +1902,8 @@ "takeaways": [ "学完后应能做到:建立“先服务、再端口、再日志、再请求”的排查顺序。", "易错提醒:只看浏览器打不开,不看服务状态", - "迁移场景:应用服务无法访问" + "迁移场景:应用服务无法访问", + "服务不可用时,排障要按层进行:服务 → 进程 → 端口 → 日志 → 请求。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 systemctl、ps、ss、journalctl、curl 做一次独立练习,并尝试自己解释每条输出的含义。" }, @@ -1853,14 +1921,19 @@ "examples": [ "df -h", "du -sh /var/log", - "find /var/log -type f" + "find /var/log -type f", + "df -h /var", + "du -sh /var/log/* | sort" ], "pitfalls": [ "只看 df 不继续追目录", - "删文件前不确认用途" + "删文件前不确认用途", + "直接删除不熟悉的大文件,可能破坏恢复和排障线索" ], "scenarios": [ - "排查磁盘 100%" + "排查磁盘 100%", + "日志目录暴涨导致磁盘满", + "发布产物堆积导致空间不足" ], "exercises": [ { @@ -1892,7 +1965,8 @@ "takeaways": [ "学完后应能做到:建立从 df 到 du 再到 find 的磁盘问题定位思路。", "易错提醒:只看 df 不继续追目录", - "迁移场景:排查磁盘 100%" + "迁移场景:排查磁盘 100%", + "磁盘排查的关键是先找文件系统,再找目录,再找大文件。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 df、du、find、sort 做一次独立练习,并尝试自己解释每条输出的含义。" }, @@ -1910,15 +1984,19 @@ "examples": [ "whoami", "id", - "tail -n 20 /var/log/auth.log" + "tail -n 20 /var/log/auth.log", + "grep sandbox_user /etc/passwd", + "id sandbox_user" ], "pitfalls": [ "只怀疑密码错误,不看日志", - "忽略组权限问题" + "忽略组权限问题", + "只盯着密码,不看账号状态和权限配置" ], "scenarios": [ "SSH 登录失败", - "执行权限不足" + "执行权限不足", + "脚本执行提示无权限" ], "exercises": [ { @@ -1951,7 +2029,8 @@ "takeaways": [ "学完后应能做到:把身份、权限、日志三者串起来理解。", "易错提醒:只怀疑密码错误,不看日志", - "迁移场景:SSH 登录失败" + "迁移场景:SSH 登录失败", + "登录失败排查要把身份、日志和权限一起看,不能只猜密码。" ], "after_class": "课后建议:回到真实或模拟环境里,再用 whoami、id、passwd、grep、tail 做一次独立练习,并尝试自己解释每条输出的含义。" } diff --git a/sandbox.py b/sandbox.py index 1601b03..158e085 100644 --- a/sandbox.py +++ b/sandbox.py @@ -560,6 +560,9 @@ class LinuxSandbox: "kill": "signal sent (simulated)", "pkill": "matched processes terminated (simulated)", "nohup": "appending output to nohup.out", + "systemctl": "● nginx.service - A high performance web server\n Loaded: loaded (/usr/lib/systemd/system/nginx.service; enabled)\n Active: active (running) since Mon 2026-03-06 10:00:00 CST; 5 days ago\n Main PID: 1042 (nginx)\n Tasks: 3\n Memory: 12.5M", + "service": "nginx is running", + "journalctl": "Mar 06 10:00:00 systemd[1]: Started nginx.service\nMar 06 10:00:01 nginx[1042]: worker process started\nMar 06 10:04:12 nginx[1042]: connect() failed (111: Connection refused)", "ifconfig": "eth0: flags=4163 mtu 1500\n inet 192.168.1.20 netmask 255.255.255.0\nlo: flags=73 mtu 65536\n inet 127.0.0.1 netmask 255.0.0.0", "ip": "1: lo: mtu 65536\n inet 127.0.0.1/8 scope host lo\n2: eth0: mtu 1500\n inet 192.168.1.20/24 scope global eth0", "ping": "PING 127.0.0.1 (127.0.0.1): 56 data bytes\n64 bytes from 127.0.0.1: icmp_seq=0 ttl=64 time=0.025 ms\n--- 127.0.0.1 ping statistics ---\n4 packets transmitted, 4 received, 0% packet loss", @@ -589,12 +592,26 @@ class LinuxSandbox: } if cmd_name == "ip" and args[:1] == ["addr"]: return canned["ip"] + if cmd_name == "systemctl": + if args[:1] == ["status"]: + target = args[1] if len(args) > 1 else 'nginx' + return canned["systemctl"].replace('nginx', target) + if args[:1] in (["restart"], ["start"], ["stop"], ["reload"], ["enable"]): + target = args[1] if len(args) > 1 else 'service' + return f"{args[0]} operation completed for {target} (simulated)" + return canned["systemctl"] + if cmd_name == "service": + return canned["service"] + if cmd_name == "journalctl": + return canned["journalctl"] if cmd_name == "tail" or cmd_name == "head": return self._simulate_head_tail(cmd_name, args) if cmd_name == "which": return canned["which"] if cmd_name == "whereis": return canned["whereis"] + if cmd_name == "dig": + return canned.get("dig", ";; ANSWER SECTION:\nexample.com. 300 IN A 93.184.216.34") if cmd_name == "export" and args and "=" in args[0]: key, value = args[0].split("=", 1) self.env[key] = value @@ -681,6 +698,8 @@ class LinuxSandbox: output = self.user elif cmd_name == "history": output = self._simulate_history(args) + elif cmd_name in {"systemctl", "service", "journalctl", "dig"}: + output = self._simulate_system_text(cmd_name, args) else: output = self._simulate_system_text(cmd_name, args) except Exception as e: