{ "meta": { "version": "5.1", "title": "Linux Operations Learning Lab", "author": "Codex", "updated": "2026-03-19", "description": "A rebuilt Linux learning path that moves from command basics to real operations troubleshooting with sandbox exercises.", "module_count": 5, "total_lessons": 15, "total_exercises": 48, "pedagogy": "learning-first", "orientation": "ops-workflow" }, "modules": [ { "id": "module_1_foundation", "title": "Module 1: Build Linux spatial awareness", "summary": "Start with location and listing so every later action has context.", "lessons": [ { "id": "m1_l1_pwd", "title": "Locate yourself with pwd", "goal": "Understand the current working directory before changing files or running scripts.", "why_it_matters": "Many Linux mistakes come from running the right command in the wrong path.", "concepts": ["Current working directory", "Absolute vs relative path", "Context before action"], "command": "pwd", "examples": ["pwd", "cd /tmp && pwd"], "pitfalls": ["Assuming every shell opens in the same place", "Running a file command before checking the path"], "scenarios": ["Verify where you are before editing a file", "Confirm location before launching a script"], "troubleshooting_flow": ["Confirm the working context", "Run pwd", "Use the output to choose the next command"], "related_commands": ["pwd", "cd", "ls"], "classic_view": "Treat pwd as the command that restores orientation before every other action.", "takeaways": ["Know your location before acting", "Read the full path instead of guessing", "Use location to decide the next safe step"], "after_class": "Repeat pwd before every directory-changing command until it becomes automatic.", "exercises": [ { "id": "m1_l1_e1", "type": "understanding", "question": "Which command prints the full current working directory?", "answer": "pwd" }, { "id": "m1_l1_e2", "type": "operation", "title": "Print the current working directory", "hint": "Run pwd directly.", "success_test": "cmd == 'pwd'", "solution": ["pwd"], "success_msg": "You verified the shell location correctly." }, { "id": "m1_l1_e3", "type": "scenario", "question": "If a script fails with 'file not found', what should you check first?", "answer": "Run pwd and verify the current path before anything else." } ] }, { "id": "m1_l2_ls", "title": "Inspect a directory with ls", "goal": "Observe directory content and switch to detail views when needed.", "why_it_matters": "Linux exploration usually begins with ls because you need to know what exists before you inspect more deeply.", "concepts": ["Directory contents", "Hidden files", "Long listing output"], "command": "ls", "examples": ["ls", "ls -la", "ls -l /etc"], "pitfalls": ["Thinking a hidden file does not exist because plain ls missed it", "Reading long output without understanding permissions or size"], "scenarios": ["Check what files exist before deleting anything", "Inspect a config directory for hidden dotfiles"], "troubleshooting_flow": ["List the directory first", "Switch to ls -la if details matter", "Use the listing to choose the next file command"], "related_commands": ["ls", "find", "stat"], "classic_view": "Observation comes before diagnosis, and ls is usually the first observation tool.", "takeaways": ["Use ls for broad visibility", "Use ls -la for hidden files and metadata", "Treat the listing as evidence"], "after_class": "Practice comparing ls and ls -la until you can explain why the outputs differ.", "exercises": [ { "id": "m1_l2_e1", "type": "understanding", "question": "Why does ls -a reveal more than plain ls?", "answer": "Because it includes hidden dotfiles." }, { "id": "m1_l2_e2", "type": "operation", "title": "List the current directory", "hint": "Start with ls.", "success_test": "cmd == 'ls'", "solution": ["ls"], "success_msg": "You listed the current directory." }, { "id": "m1_l2_e3", "type": "operation", "title": "Show hidden files and details", "hint": "Use ls -la.", "success_test": "cmd == 'ls -la' or cmd == 'ls -al'", "solution": ["ls -la", "ls -al"], "success_msg": "You switched from a simple listing to a full inspection view." } ] }, { "id": "m1_l3_cd_cat_echo", "title": "Move, read, and confirm with cd, cat, and echo", "goal": "Use navigation, file reads, and quick output together as one everyday workflow.", "why_it_matters": "Many Linux tasks are just a chain of navigate, inspect, and confirm.", "concepts": ["Changing directories", "Reading text files", "Using echo for quick verification"], "command": "cd / cat / echo", "examples": ["cd /tmp", "cat /etc/hosts", "echo Hello Linux"], "pitfalls": ["Using cat on a directory", "Changing directories without verifying where you landed", "Ignoring echo as a debugging aid"], "scenarios": ["Enter a workspace and inspect a config file", "Print quick checkpoints in a shell session"], "troubleshooting_flow": ["Change to the right location", "Read the right file", "Echo the key assumption you want to verify"], "related_commands": ["cd", "cat", "echo", "pwd"], "classic_view": "These commands make context and state visible before you do more powerful operations.", "takeaways": ["Use cd intentionally", "Use cat for quick text inspection", "Use echo to make invisible state explicit"], "after_class": "Practice saying out loud what each command changed in your working context.", "exercises": [ { "id": "m1_l3_e1", "type": "operation", "title": "Enter /tmp", "hint": "Use cd /tmp.", "success_test": "cmd == 'cd /tmp' and cwd == '/tmp'", "solution": ["cd /tmp"], "success_msg": "You moved to the target working directory." }, { "id": "m1_l3_e2", "type": "operation", "title": "Read the hosts file", "hint": "Run cat /etc/hosts.", "success_test": "cmd == 'cat /etc/hosts' and 'localhost' in output", "solution": ["cat /etc/hosts"], "success_msg": "You inspected a real text file in the sandbox." }, { "id": "m1_l3_e3", "type": "operation", "title": "Print a confirmation message", "hint": "Run echo Hello Linux.", "success_test": "cmd == 'echo Hello Linux' and 'Hello Linux' in output", "solution": ["echo Hello Linux"], "success_msg": "You used echo to make the current step explicit." } ] } ] }, { "id": "module_2_filesystem", "title": "Module 2: Manipulate files safely", "summary": "Learn to create, copy, move, and permission files without losing context.", "lessons": [ { "id": "m2_l1_create", "title": "Create structure with mkdir and touch", "goal": "Build directories and empty files quickly so you can prepare workspaces or examples.", "why_it_matters": "Project setup and environment preparation often begin with directory scaffolding.", "concepts": ["Directory creation", "Recursive directory creation", "Empty file creation"], "command": "mkdir / touch", "examples": ["mkdir /tmp/lab", "mkdir -p /tmp/lab/data/raw", "touch /tmp/lab/notes.txt"], "pitfalls": ["Forgetting -p for nested paths", "Trying to touch a file under a missing directory"], "scenarios": ["Prepare a clean lab directory", "Create a placeholder log or notes file"], "troubleshooting_flow": ["Check the target path", "Create directories first", "Create files after the path exists"], "related_commands": ["mkdir", "touch", "ls"], "classic_view": "File-system work is safer when you change one layer at a time: path first, file second.", "takeaways": ["Use mkdir for structure", "Use touch for placeholders", "Think about parent directories before file creation"], "after_class": "Rebuild the same directory tree twice until the path feels natural.", "exercises": [ { "id": "m2_l1_e1", "type": "operation", "title": "Create a lab directory", "hint": "Run mkdir /tmp/lab.", "success_test": "cmd == 'mkdir /tmp/lab' and exists('/tmp/lab')", "solution": ["mkdir /tmp/lab"], "success_msg": "Your lab directory now exists." }, { "id": "m2_l1_e2", "type": "operation", "title": "Create a nested directory tree", "hint": "Run mkdir -p /tmp/lab/data/raw.", "success_test": "cmd == 'mkdir -p /tmp/lab/data/raw' and exists('/tmp/lab/data/raw')", "solution": ["mkdir -p /tmp/lab/data/raw"], "success_msg": "You created the nested path correctly." }, { "id": "m2_l1_e3", "type": "operation", "title": "Create a note file", "hint": "Run touch /tmp/lab/notes.txt.", "success_test": "cmd == 'touch /tmp/lab/notes.txt' and exists('/tmp/lab/notes.txt')", "solution": ["touch /tmp/lab/notes.txt"], "success_msg": "You created a new empty file in the lab path." } ] }, { "id": "m2_l2_copy_move_permission", "title": "Copy, move, and permission files with intent", "goal": "Manipulate files carefully and connect permission changes to execution behavior.", "why_it_matters": "Safe Linux work depends on knowing when you are backing up, renaming, or changing access.", "concepts": ["Backup copies", "Rename vs move", "Permission modes"], "command": "cp / mv / chmod / stat", "examples": ["cp /etc/hosts /tmp/hosts.bak", "mv /tmp/hosts.bak /tmp/hosts.backup", "chmod 755 /tmp/lab", "stat /etc/hosts"], "pitfalls": ["Deleting before backing up", "Changing permissions without checking metadata first"], "scenarios": ["Back up a config before editing", "Make a script or directory executable when needed"], "troubleshooting_flow": ["Copy before editing", "Rename with a purpose", "Inspect metadata before permission changes"], "related_commands": ["cp", "mv", "chmod", "stat"], "classic_view": "File operations are safe when you can clearly name the step: backup, rename, inspect, or permission change.", "takeaways": ["Back up first", "Rename with purpose", "Inspect before changing permissions"], "after_class": "Repeat the flow copy -> rename -> stat -> chmod on a temporary file until the sequence feels obvious.", "exercises": [ { "id": "m2_l2_e1", "type": "operation", "title": "Back up the hosts file", "hint": "Run cp /etc/hosts /tmp/hosts.bak.", "success_test": "cmd == 'cp /etc/hosts /tmp/hosts.bak' and exists('/tmp/hosts.bak')", "solution": ["cp /etc/hosts /tmp/hosts.bak"], "success_msg": "You created a backup copy before any change." }, { "id": "m2_l2_e2", "type": "operation", "title": "Rename the backup file", "hint": "Run mv /tmp/hosts.bak /tmp/hosts.backup.", "success_test": "cmd == 'mv /tmp/hosts.bak /tmp/hosts.backup' and exists('/tmp/hosts.backup')", "solution": ["mv /tmp/hosts.bak /tmp/hosts.backup"], "success_msg": "You renamed the file successfully." }, { "id": "m2_l2_e3", "type": "operation", "title": "Inspect hosts metadata", "hint": "Run stat /etc/hosts.", "success_test": "cmd == 'stat /etc/hosts' and 'File:' in output", "solution": ["stat /etc/hosts"], "success_msg": "You inspected the file metadata successfully." } ] }, { "id": "m2_l3_cleanup_and_modes", "title": "Clean up and change modes with rm and chmod", "goal": "Delete temporary artifacts safely and connect permission changes to execution behavior.", "why_it_matters": "Cleanup and permission changes are common, but both can create damage if you skip inspection.", "concepts": ["Cleanup order", "Symbolic vs numeric modes", "Execution permission"], "command": "rm / chmod", "examples": ["rm /tmp/hosts.backup", "chmod 755 /tmp/lab", "chmod +x /tmp/lab/notes.txt"], "pitfalls": ["Deleting before confirming the file is disposable", "Changing permissions without knowing why"], "scenarios": ["Remove temporary files after verification", "Make a script or directory executable when needed"], "troubleshooting_flow": ["Verify the target path", "Remove only temporary files", "Inspect and then change permissions with intent"], "related_commands": ["rm", "chmod", "stat", "ls"], "classic_view": "Cleanup and permission work should be deliberate, not reactive.", "takeaways": ["Delete last", "Use numeric and symbolic modes intentionally", "Connect permissions to a real need"], "after_class": "Repeat the sequence inspect -> remove or inspect -> chmod until it feels like a safe habit.", "exercises": [ { "id": "m2_l3_e1", "type": "operation", "title": "Remove the renamed backup", "hint": "Run rm /tmp/hosts.backup.", "success_test": "cmd == 'rm /tmp/hosts.backup' and not exists('/tmp/hosts.backup')", "solution": ["rm /tmp/hosts.backup"], "success_msg": "You cleaned up the temporary backup safely." }, { "id": "m2_l3_e2", "type": "operation", "title": "Set the lab directory mode to 755", "hint": "Run chmod 755 /tmp/lab.", "success_test": "cmd == 'chmod 755 /tmp/lab'", "solution": ["chmod 755 /tmp/lab"], "success_msg": "You applied a numeric permission change." }, { "id": "m2_l3_e3", "type": "operation", "title": "Add execute permission to notes.txt", "hint": "Run chmod +x /tmp/lab/notes.txt.", "success_test": "cmd == 'chmod +x /tmp/lab/notes.txt'", "solution": ["chmod +x /tmp/lab/notes.txt"], "success_msg": "You used a symbolic permission change." } ] } ] }, { "id": "module_3_search", "title": "Module 3: Search and preview with evidence", "summary": "Move from a broad search to a precise answer using filters and short file views.", "lessons": [ { "id": "m3_l1_grep", "title": "Search file content with grep", "goal": "Find relevant lines quickly instead of reading an entire file every time.", "why_it_matters": "Operations work often means locating one useful line inside a noisy file.", "concepts": ["Pattern search", "Case-insensitive search", "Line-oriented inspection"], "command": "grep", "examples": ["grep sandbox_user /etc/passwd", "grep -i error /var/log/syslog"], "pitfalls": ["Searching too broadly and ignoring context", "Forgetting case-insensitive mode"], "scenarios": ["Find one user entry in passwd", "Locate error lines in a log"], "troubleshooting_flow": ["Choose the right file first", "Search for the smallest useful keyword", "Use the matched lines to decide the next command"], "related_commands": ["grep", "cat", "tail"], "classic_view": "grep turns a large text space into a small set of signals you can reason about.", "takeaways": ["Search deliberately", "Narrow the signal before changing anything", "Use matched lines as evidence"], "after_class": "Practice moving from a broad keyword to a more precise keyword in the same file.", "exercises": [ { "id": "m3_l1_e1", "type": "operation", "title": "Find sandbox_user in passwd", "hint": "Run grep sandbox_user /etc/passwd.", "success_test": "cmd == 'grep sandbox_user /etc/passwd' and 'sandbox_user' in output", "solution": ["grep sandbox_user /etc/passwd"], "success_msg": "You extracted the relevant passwd line." }, { "id": "m3_l1_e2", "type": "operation", "title": "Find error lines in syslog", "hint": "Run grep -i error /var/log/syslog.", "success_test": "cmd == 'grep -i error /var/log/syslog' and 'error' in output.lower()", "solution": ["grep -i error /var/log/syslog"], "success_msg": "You filtered the log down to the error lines." }, { "id": "m3_l1_e3", "type": "scenario", "question": "Why is grep usually faster than reading the whole log manually?", "answer": "Because it narrows the file to only the lines that match the signal you care about." } ] }, { "id": "m3_l2_find_preview", "title": "Locate files with find and preview them with tail", "goal": "Move from path discovery to a focused log or config preview.", "why_it_matters": "You cannot inspect the right file until you locate it precisely, and you do not need to open the whole file first.", "concepts": ["Name-based search", "Search roots", "Tail as a recent-state preview"], "command": "find / tail", "examples": ["find /etc -name '*.conf'", "find /var/log -type f", "tail -n 1 /var/log/auth.log"], "pitfalls": ["Searching from too high in the tree", "Using cat on a large file when tail would answer faster"], "scenarios": ["Find config files under /etc", "Inspect the newest auth log line"], "troubleshooting_flow": ["Pick the smallest reasonable root", "Add a filter", "Preview the most relevant file segment"], "related_commands": ["find", "tail", "head"], "classic_view": "A good search ends with an exact path and a small, readable preview.", "takeaways": ["Use find for precise location", "Use tail for fresh log evidence", "Reduce noise early"], "after_class": "Practice explaining the root path and every filter before you run find.", "exercises": [ { "id": "m3_l2_e1", "type": "operation", "title": "Find configuration files under /etc", "hint": "Run find /etc -name '*.conf'.", "success_test": "cmd == \"find /etc -name '*.conf'\" and '.conf' in output", "solution": ["find /etc -name '*.conf'"], "success_msg": "You located configuration files recursively." }, { "id": "m3_l2_e2", "type": "operation", "title": "Find log files under /var/log", "hint": "Run find /var/log -type f.", "success_test": "cmd == 'find /var/log -type f' and '/var/log' in output", "solution": ["find /var/log -type f"], "success_msg": "You filtered the search to files under the log tree." }, { "id": "m3_l2_e3", "type": "operation", "title": "Read the newest auth log line", "hint": "Run tail -n 1 /var/log/auth.log.", "success_test": "cmd == 'tail -n 1 /var/log/auth.log' and 'sandbox_user' in output", "solution": ["tail -n 1 /var/log/auth.log"], "success_msg": "You previewed the most recent auth log line." } ] }, { "id": "m3_l3_preview_and_pipeline", "title": "Preview files with head, tail, and a simple pipeline", "goal": "Read only the part of a file that answers your current question.", "why_it_matters": "Large files are easier to scan when you only open the beginning or end first.", "concepts": ["Top lines", "Bottom lines", "Simple pipelines for narrowing output"], "command": "head / tail / cat | tail", "examples": ["head /etc/passwd", "tail -n 1 /var/log/auth.log", "cat /var/log/auth.log | tail -n 1"], "pitfalls": ["Using cat on a large file when a preview is enough", "Forgetting to change the line count for a focused question"], "scenarios": ["Check whether a file starts with the right structure", "Inspect the last auth event in a log"], "troubleshooting_flow": ["Choose the right end of the file", "Preview a small number of lines", "Switch to grep or full reads only if needed"], "related_commands": ["head", "tail", "cat", "grep"], "classic_view": "Preview commands reduce noise so you can decide whether the file deserves deeper inspection.", "takeaways": ["Use head for structure", "Use tail for recent activity", "Use a pipeline when one command is not enough"], "after_class": "Practice explaining why you chose head or tail before you run the command.", "exercises": [ { "id": "m3_l3_e1", "type": "operation", "title": "Preview the top of passwd", "hint": "Run head /etc/passwd.", "success_test": "cmd == 'head /etc/passwd' and 'root' in output", "solution": ["head /etc/passwd"], "success_msg": "You previewed the beginning of the file." }, { "id": "m3_l3_e2", "type": "operation", "title": "Inspect the newest auth log line", "hint": "Run tail -n 1 /var/log/auth.log.", "success_test": "cmd == 'tail -n 1 /var/log/auth.log' and 'sandbox_user' in output", "solution": ["tail -n 1 /var/log/auth.log"], "success_msg": "You narrowed the file to the most recent line." }, { "id": "m3_l3_e3", "type": "operation", "title": "Use a simple pipeline to preview auth.log", "hint": "Run cat /var/log/auth.log | tail -n 1.", "success_test": "cmd == 'cat /var/log/auth.log | tail -n 1' and 'sandbox_user' in output", "solution": ["cat /var/log/auth.log | tail -n 1"], "success_msg": "You combined two commands to narrow the log quickly." } ] } ] }, { "id": "module_4_operations", "title": "Module 4: Turn commands into operations workflows", "summary": "Use process and network commands as parts of a real troubleshooting chain instead of isolated tools.", "lessons": [ { "id": "m4_l1_process", "title": "Read system pressure with top and ps", "goal": "Spot busy processes and connect process views to system pressure.", "why_it_matters": "Incidents often begin with 'the system feels slow', and process views turn that feeling into evidence.", "concepts": ["Process snapshots", "CPU-heavy processes", "Live system views"], "command": "top / ps", "examples": ["top", "ps aux --sort=-%cpu | head"], "pitfalls": ["Jumping straight to kill before understanding the process role", "Looking at one snapshot without connecting it to symptoms"], "scenarios": ["Find the hottest process when CPU looks high", "Explain which command gives the faster broad view"], "troubleshooting_flow": ["Look at top for the broad picture", "Use ps sorting to confirm the hottest process", "Only then decide the next action"], "related_commands": ["top", "ps", "kill"], "classic_view": "System pressure is easier to reason about when you move from a broad live view to a specific process list.", "takeaways": ["Use top for the live picture", "Use ps for a sortable snapshot", "Diagnose before you terminate anything"], "after_class": "Compare top and ps side by side and explain what each one is better at.", "exercises": [ { "id": "m4_l1_e1", "type": "operation", "title": "Show a live overview", "hint": "Run top.", "success_test": "cmd == 'top' and 'load average' in output", "solution": ["top"], "success_msg": "You captured a live system overview." }, { "id": "m4_l1_e2", "type": "operation", "title": "List processes by CPU usage", "hint": "Run ps aux --sort=-%cpu | head.", "success_test": "cmd == 'ps aux --sort=-%cpu | head' and '%CPU' in output", "solution": ["ps aux --sort=-%cpu | head"], "success_msg": "You ranked processes by CPU usage." }, { "id": "m4_l1_e3", "type": "understanding", "question": "Why should kill come after observation rather than before it?", "answer": "Because you need evidence about what the process is doing and whether it is safe to stop." } ] }, { "id": "m4_l2_network", "title": "Check the network path with ip, ping, ss, and curl", "goal": "Diagnose connectivity step by step from interface state to application response.", "why_it_matters": "Network incidents feel confusing when you mix layers; they become clearer when you inspect one layer at a time.", "concepts": ["Interface address", "Basic connectivity", "Listening ports", "Application-level validation"], "command": "ip / ping / ss / curl", "examples": ["ip addr", "ping 127.0.0.1", "ss -ltnp", "curl http://127.0.0.1"], "pitfalls": ["Treating DNS, port, and app failures as one category", "Checking a port without making a real request"], "scenarios": ["A local service looks up but the app still feels unavailable", "You need to confirm whether the issue is address, reachability, port, or HTTP behavior"], "troubleshooting_flow": ["Check interface state with ip addr", "Test connectivity with ping", "Check sockets with ss", "Validate the app layer with curl"], "related_commands": ["ip", "ping", "ss", "curl", "dig"], "classic_view": "The cleanest network diagnosis moves from low-level reachability to the final application response.", "takeaways": ["Do not skip layers", "Confirm a port before blaming the app", "Use curl to verify user-facing behavior"], "after_class": "Repeat the sequence ip -> ping -> ss -> curl until the layer transitions feel natural.", "exercises": [ { "id": "m4_l2_e1", "type": "operation", "title": "Inspect interface addresses", "hint": "Run ip addr.", "success_test": "cmd == 'ip addr' and 'inet' in output", "solution": ["ip addr"], "success_msg": "You confirmed interface and address information." }, { "id": "m4_l2_e2", "type": "operation", "title": "Test local connectivity", "hint": "Run ping 127.0.0.1.", "success_test": "cmd == 'ping 127.0.0.1' and 'packet loss' in output", "solution": ["ping 127.0.0.1"], "success_msg": "You verified the local connectivity layer." }, { "id": "m4_l2_e3", "type": "operation", "title": "Validate the application response", "hint": "Run curl http://127.0.0.1.", "success_test": "cmd == 'curl http://127.0.0.1' and 'hello localhost' in output", "solution": ["curl http://127.0.0.1"], "success_msg": "You completed the network path all the way to the application layer." } ] }, { "id": "m4_l3_service", "title": "Trace service health with systemctl and journalctl", "goal": "Move from service state to recent logs in a deliberate sequence.", "why_it_matters": "Service incidents are easier to solve when you separate service state from application logs.", "concepts": ["Service unit state", "Recent journal lines", "From status to explanation"], "command": "systemctl / journalctl", "examples": ["systemctl status nginx", "journalctl -n 20", "systemctl restart nginx"], "pitfalls": ["Restarting a service without checking why it failed", "Reading status without following the logs"], "scenarios": ["A service is reported down or unstable", "You need the last error before considering a restart"], "troubleshooting_flow": ["Read service state with systemctl status", "Open the recent journal lines", "Use the message to guide the next repair"], "related_commands": ["systemctl", "journalctl", "ss"], "classic_view": "Service status tells you what is happening; logs tell you why.", "takeaways": ["Start with status", "Use logs for explanation", "Restart only after you understand the failure"], "after_class": "Say out loud what systemctl tells you that journalctl does not, and vice versa.", "exercises": [ { "id": "m4_l3_e1", "type": "operation", "title": "Check nginx service status", "hint": "Run systemctl status nginx.", "success_test": "cmd == 'systemctl status nginx' and 'Active:' in output", "solution": ["systemctl status nginx"], "success_msg": "You inspected the service state." }, { "id": "m4_l3_e2", "type": "operation", "title": "Read recent journal lines", "hint": "Run journalctl -n 20.", "success_test": "cmd == 'journalctl -n 20' and 'Started nginx.service' in output", "solution": ["journalctl -n 20"], "success_msg": "You pulled recent journal evidence." }, { "id": "m4_l3_e3", "type": "operation", "title": "Simulate a service restart command", "hint": "Run systemctl restart nginx.", "success_test": "cmd == 'systemctl restart nginx' and 'completed for nginx' in output", "solution": ["systemctl restart nginx"], "success_msg": "You practiced the restart command after inspection." } ] } ] }, { "id": "module_5_incidents", "title": "Module 5: Practice incident playbooks", "summary": "Use the commands you learned in layered incident drills so the tools feel connected to real operations work.", "lessons": [ { "id": "m5_l1_disk", "title": "Incident drill: investigate disk pressure", "goal": "Move from broad disk visibility to specific directory evidence.", "why_it_matters": "Disk pressure can break writes, logs, and services, so you need a repeatable inspection path.", "concepts": ["Filesystem usage", "Directory usage", "Large-file hunting"], "command": "df / du / find", "examples": ["df -h", "du -sh /var/log", "find /var/log -type f"], "pitfalls": ["Stopping at df without drilling into the heavy directory", "Deleting files before confirming what they are for"], "scenarios": ["A host reports that disk usage is full", "A log directory appears to be growing too quickly"], "troubleshooting_flow": ["Confirm which filesystem is full", "Measure the likely heavy directory", "Locate concrete files before deciding cleanup"], "related_commands": ["df", "du", "find", "sort"], "classic_view": "Disk incidents are easier when you move from filesystem view to directory view to file view in order.", "takeaways": ["Use df for the broad picture", "Use du for directory evidence", "Use find before deleting anything"], "after_class": "Practice saying which layer each command belongs to: filesystem, directory, or file.", "exercises": [ { "id": "m5_l1_e1", "type": "operation", "title": "Check filesystem usage", "hint": "Run df -h.", "success_test": "cmd == 'df -h' and 'Filesystem' in output", "solution": ["df -h"], "success_msg": "You captured the broad disk usage picture." }, { "id": "m5_l1_e2", "type": "operation", "title": "Measure /var/log usage", "hint": "Run du -sh /var/log.", "success_test": "cmd == 'du -sh /var/log' and '/var/log' in output", "solution": ["du -sh /var/log"], "success_msg": "You narrowed the issue to a directory-level view." }, { "id": "m5_l1_e3", "type": "operation", "title": "Locate log files under /var/log", "hint": "Run find /var/log -type f.", "success_test": "cmd == 'find /var/log -type f' and '/var/log' in output", "solution": ["find /var/log -type f"], "success_msg": "You drilled down to the file layer." }, { "id": "m5_l1_e4", "type": "scenario", "question": "Why is deleting files before directory and file inspection risky?", "answer": "Because you can remove useful evidence or the wrong data before you understand what is consuming space." } ] }, { "id": "m5_l2_auth", "title": "Incident drill: investigate login or permission failures", "goal": "Link identity, account records, and recent auth logs into one diagnosis sequence.", "why_it_matters": "Access problems often mix user identity, account state, and authentication evidence.", "concepts": ["Current identity", "Account presence", "Recent authentication history"], "command": "whoami / id / grep / tail", "examples": ["whoami", "id", "grep sandbox_user /etc/passwd", "cat /var/log/auth.log | tail -n 1"], "pitfalls": ["Guessing a password issue without checking logs", "Ignoring account records and group membership"], "scenarios": ["A login fails repeatedly", "A script exists but still cannot be used by the current user"], "troubleshooting_flow": ["Confirm the current identity", "Verify the target account exists", "Read the newest auth evidence"], "related_commands": ["whoami", "id", "grep", "tail"], "classic_view": "Access issues are easier when you inspect identity, account records, and logs as one chain.", "takeaways": ["Confirm the user context first", "Use account files as evidence", "Trust logs more than guesses"], "after_class": "Repeat the identity -> account -> log sequence until it feels like one habit.", "exercises": [ { "id": "m5_l2_e1", "type": "operation", "title": "Confirm your current identity", "hint": "Run whoami.", "success_test": "cmd == 'whoami' and 'sandbox_user' in output", "solution": ["whoami"], "success_msg": "You confirmed the current shell identity." }, { "id": "m5_l2_e2", "type": "operation", "title": "Inspect uid and group information", "hint": "Run id.", "success_test": "cmd == 'id' and 'uid=' in output", "solution": ["id"], "success_msg": "You inspected the uid and group context." }, { "id": "m5_l2_e3", "type": "operation", "title": "Check whether sandbox_user exists in passwd", "hint": "Run grep sandbox_user /etc/passwd.", "success_test": "cmd == 'grep sandbox_user /etc/passwd' and 'sandbox_user' in output", "solution": ["grep sandbox_user /etc/passwd"], "success_msg": "You verified the account record." }, { "id": "m5_l2_e4", "type": "operation", "title": "Read the newest auth log line", "hint": "Run cat /var/log/auth.log | tail -n 1.", "success_test": "cmd == 'cat /var/log/auth.log | tail -n 1' and 'sandbox_user' in output", "solution": ["cat /var/log/auth.log | tail -n 1"], "success_msg": "You retrieved recent authentication evidence." } ] }, { "id": "m5_l3_service_path", "title": "Incident drill: service is up, but the application still feels broken", "goal": "Check service state, listening sockets, and application output in one guided drill.", "why_it_matters": "A service can look healthy at one layer but still fail at another layer.", "concepts": ["Service state", "Socket listening state", "Application response validation"], "command": "systemctl / ss / curl / journalctl", "examples": ["systemctl status nginx", "ss -ltnp", "curl http://127.0.0.1", "journalctl -n 20"], "pitfalls": ["Trusting systemctl alone without checking sockets or a real request", "Stopping at ss without making an application-layer call"], "scenarios": ["A team says the service is up but the page still feels unavailable", "You need to prove whether the problem is service state, port state, or HTTP behavior"], "troubleshooting_flow": ["Check systemctl for service state", "Check ss for listening ports", "Use curl to verify the real response", "Open journal logs if behavior still looks wrong"], "related_commands": ["systemctl", "ss", "curl", "journalctl"], "classic_view": "Healthy service state does not guarantee healthy application behavior, so finish the chain with a real request.", "takeaways": ["Check multiple layers", "Do not confuse a listening port with a healthy app", "End the chain with a real request and logs when needed"], "after_class": "Practice explaining which layer each command validates: service, socket, application, or logs.", "exercises": [ { "id": "m5_l3_e1", "type": "operation", "title": "Check nginx service state", "hint": "Run systemctl status nginx.", "success_test": "cmd == 'systemctl status nginx' and 'Active:' in output", "solution": ["systemctl status nginx"], "success_msg": "You confirmed the service layer state." }, { "id": "m5_l3_e2", "type": "operation", "title": "Check listening sockets", "hint": "Run ss -ltnp.", "success_test": "cmd == 'ss -ltnp' and 'LISTEN' in output", "solution": ["ss -ltnp"], "success_msg": "You confirmed the socket layer." }, { "id": "m5_l3_e3", "type": "operation", "title": "Make a real application request", "hint": "Run curl http://127.0.0.1.", "success_test": "cmd == 'curl http://127.0.0.1' and 'hello localhost' in output", "solution": ["curl http://127.0.0.1"], "success_msg": "You verified the final application response layer." }, { "id": "m5_l3_e4", "type": "operation", "title": "Open recent journal lines", "hint": "Run journalctl -n 20.", "success_test": "cmd == 'journalctl -n 20' and 'Started nginx.service' in output", "solution": ["journalctl -n 20"], "success_msg": "You added log evidence to the service drill." } ] } ] } ] }