diff --git a/README.md b/README.md index 2c2d975..31f68a0 100644 --- a/README.md +++ b/README.md @@ -24,10 +24,12 @@ Designed for **homelab** and **server** environments. Script file is named `cifs ## How It Works 1. **Discovery** — scans `/etc/fstab` for uncommented `cifs` entries like: - ``` + + ```ini //nas.local/media /mnt/media cifs vers=3.0,credentials=/root/.smbcreds 0 0 //192.168.1.50/share /mnt/share cifs credentials=/root/.creds,iocharset=utf8 0 0 ``` + 2. **Reachability** — ensures the server resolves, optionally pings, and has TCP/445 open. 3. **Health probe** — times a quick `ls` against the mountpoint. 4. **Repair** — remount or unmount/remount as needed, with retries and logging. @@ -37,34 +39,44 @@ Designed for **homelab** and **server** environments. Script file is named `cifs ## First-Run Setup 1. **Install prerequisites** + ```bash sudo apt install cifs-utils ``` + 2. **Credentials file** + ```bash sudo nano /root/.smbcreds ``` - ``` + + ```ini username=myuser password=mypassword domain=MYDOMAIN # optional ``` + ```bash sudo chmod 600 /root/.smbcreds ``` + 3. **Add to `/etc/fstab`** - ``` + + ```ini //192.168.1.10/media /mnt/media cifs vers=3.0,credentials=/root/.smbcreds,uid=1000,gid=1000,file_mode=0644,dir_mode=0755 0 0 ``` + > Avoid `noauto` if you want the script to manage the mount. 4. **Test manually** + ```bash sudo mount -a sudo ls /mnt/media ``` 5. **Verify connectivity** + ```bash ping -c 2 192.168.1.10 nc -zv 192.168.1.10 445 @@ -80,12 +92,14 @@ sudo touch /var/log/cifs-remount.log && sudo chmod 600 /var/log/cifs-remount.log ``` **Test it:** + ```bash sudo /usr/local/sbin/cifs-watch --dry-run --verbose sudo /usr/local/sbin/cifs-watch --verbose ``` Logs: + - `/var/log/cifs-remount.log` - `journalctl -t cifs-watch` or `journalctl -u cifs-watch.service` @@ -96,6 +110,7 @@ Logs: Create the following two files: **`/etc/systemd/system/cifs-watch.service`** + ```ini [Unit] Description=Monitor and repair CIFS mounts @@ -107,6 +122,7 @@ Nice=10 ``` **`/etc/systemd/system/cifs-watch.timer`** + ```ini [Unit] Description=Run cifs-watch periodically @@ -122,6 +138,7 @@ WantedBy=timers.target ``` Enable and start: + ```bash sudo systemctl daemon-reload sudo systemctl enable --now cifs-watch.timer @@ -149,7 +166,8 @@ Prevents hammering an offline NAS with constant retries. - Backoff state is reset on reboot (uses `/var/tmp`). Example log flow: -``` + +```log [WARN] Server NOT reachable: nas.local (skipping /mnt/media) [INFO] Backoff started: will not retry nas.local for 10 minutes ... @@ -171,7 +189,7 @@ Example log flow: ## Cron Alternative -``` +```ini */5 * * * * /usr/local/sbin/cifs-watch >/dev/null 2>&1 ``` @@ -188,4 +206,5 @@ Example log flow: ## License -MIT — see `LICENSE`. +Released under the [Unlicense](https://unlicense.org/). +You can do whatever you want with this code. No warranty provided. diff --git a/cifs-watch b/cifs-watch new file mode 100644 index 0000000..a5f4dff --- /dev/null +++ b/cifs-watch @@ -0,0 +1,319 @@ +#!/usr/bin/env bash +# Monitor CIFS mounts from /etc/fstab and (re)mount if needed. +# Designed for cron/systemd timer. Requires root. +# Includes per-server backoff to avoid hammering offline hosts. + +set -Eeuo pipefail +IFS=$'\n\t' + +# --------------------------- +# Config (edit if desired) +# --------------------------- +LOGFILE="/var/log/cifs-remount.log" # Set empty ("") to disable file logging. +PING_COUNT=1 +PING_TIMEOUT=1 # seconds +TCP_TIMEOUT=2 # seconds for port 445 check +PROBE_TIMEOUT=2 # seconds to test mount health (listing) +REMOUNT_RETRY=2 # attempts +SLEEP_BETWEEN=1 # seconds between retries +SYSLOG_TAG="cifs-watch" + +DRY_RUN=0 +VERBOSE=0 + +# --------------------------- +# Optional backoff (per-server) +# --------------------------- +# Line-delimited JSON; last entry wins for each host +BACKOFF_FILE="/var/tmp/cifs-watch-backoff.json" +BACKOFF_MINUTES=10 # set to 0 to disable + +# --------------------------- +# Helpers +# --------------------------- +log() { + local level="$1"; shift + local msg="$*" + local ts + ts="$(date '+%Y-%m-%d %H:%M:%S')" + # stdout when verbose or for non-debug levels + if [[ "$VERBOSE" -eq 1 || "$level" != "DEBUG" ]]; then + echo "[$ts] [$level] $msg" + fi + # syslog + logger -t "$SYSLOG_TAG[$$]" -p "user.$(tr '[:upper:]' '[:lower:]' <<<"$level")" -- "$msg" || true + # logfile + if [[ -n "$LOGFILE" ]]; then + ( umask 0077; echo "[$ts] [$level] $msg" >> "$LOGFILE" ) || true + fi +} + +vdbg() { [[ "$VERBOSE" -eq 1 ]] && log "DEBUG" "$*"; } +fail() { log "ERROR" "$*"; exit 1; } +have_cmd() { command -v "$1" >/dev/null 2>&1; } + +# ----- Backoff helpers ----- +# Store one JSON object per line; we always use the last one for a host. +# Example line: {"host":"nas.local","last_unreachable":1729030000} +now_epoch() { date +%s; } + +# Return last unreachable epoch for a host (or 0 if none) +get_backoff_epoch() { + local host="$1" + [[ -f "$BACKOFF_FILE" ]] || { echo 0; return; } + local line epoch + line="$(grep -F ""host":"$host"" "$BACKOFF_FILE" | tail -n1 || true)" + if [[ -z "$line" ]]; then + echo 0; return + fi + epoch="$(sed -n 's/.*"last_unreachable":[[:space:]]*\([0-9]\+\).*/\1/p' <<<"$line")" + [[ -n "$epoch" ]] && echo "$epoch" || echo 0 +} + +# Record an unreachable event for the host +set_backoff_epoch() { + local host="$1" now + now="$(now_epoch)" + ( umask 0077; printf '{"host":"%s","last_unreachable":%s}\n' "$host" "$now" >> "$BACKOFF_FILE" ) || true +} + +# Return 0 if we should BACK OFF (i.e., skip trying this host), else 1 +backoff_active() { + [[ "${BACKOFF_MINUTES:-0}" -le 0 ]] && return 1 + local host="$1" last now cutoff + last="$(get_backoff_epoch "$host")" + [[ "$last" -eq 0 ]] && return 1 + now="$(now_epoch)" + cutoff=$(( BACKOFF_MINUTES * 60 )) + if (( now - last < cutoff )); then + return 0 + fi + return 1 +} + +announce_backoff_window() { + if [[ "${BACKOFF_MINUTES:-0}" -gt 0 ]]; then + log "DEBUG" "Backoff window is ${BACKOFF_MINUTES} minute(s); state file: ${BACKOFF_FILE}" + fi +} + +init_backoff_state() { + if [[ "${BACKOFF_MINUTES:-0}" -gt 0 && ! -f "$BACKOFF_FILE" ]]; then + ( umask 0077; : > "$BACKOFF_FILE" ) || true + fi +} + +# Return 0 if TCP port is open, else 1 +tcp_open_445() { + local host="$1" + if have_cmd nc; then + nc -z -w "$TCP_TIMEOUT" "$host" 445 >/dev/null 2>&1 + return $? + else + # shellcheck disable=SC3020 + timeout "$TCP_TIMEOUT" bash -c "cat < /dev/null > /dev/tcp/$host/445" 2>/dev/null + return $? + fi +} + +# Return 0 if host is reachable enough to try mount +host_reachable() { + local host="$1" + if ! getent ahosts "$host" >/dev/null 2>&1; then + vdbg "DNS resolution failed for $host" + return 1 + fi + if have_cmd ping; then + ping -c "$PING_COUNT" -W "$PING_TIMEOUT" "$host" >/dev/null 2>&1 || vdbg "Ping to $host failed" + fi + if tcp_open_445 "$host"; then + return 0 + else + vdbg "TCP/445 closed on $host" + return 1 + fi +} + +# Return 0 if mountpoint is currently mounted as CIFS +is_cifs_mounted() { + local mnt="$1" + if findmnt -no FSTYPE -T "$mnt" 2>/dev/null | grep -qi '^cifs$'; then + return 0 + fi + return 1 +} + +# Return 0 if mounted share appears healthy (no hang/transport error) +mount_healthy() { + local mnt="$1" + timeout "$PROBE_TIMEOUT" bash -c 'ls -1A -- "$0" >/dev/null 2>&1' "$mnt" +} + +# Try remount, else unmount+mount. Returns 0 on success. +repair_mount() { + local mnt="$1" + local attempt=1 + while (( attempt <= REMOUNT_RETRY )); do + vdbg "Attempt $attempt: remounting $mnt" + if (( DRY_RUN )); then + log "INFO" "DRY-RUN: would remount $mnt" + return 0 + fi + if mount -o remount "$mnt" >/dev/null 2>&1; then + if mount_healthy "$mnt"; then + log "INFO" "Remounted healthy: $mnt" + return 0 + fi + vdbg "Remount completed but health probe failed: $mnt" + fi + sleep "$SLEEP_BETWEEN" + (( attempt++ )) + done + + log "WARN" "Remount failed/unhealthy for $mnt; trying forced unmount + clean mount" + if (( DRY_RUN )); then + log "INFO" "DRY-RUN: would umount -f $mnt && mount $mnt" + return 0 + fi + + if umount -f "$mnt" >/dev/null 2>&1 || umount -l "$mnt" >/dev/null 2>&1; then + : + else + log "WARN" "Unable to unmount $mnt; will still attempt a mount" + fi + + if mount "$mnt" >/dev/null 2>&1; then + if mount_healthy "$mnt"; then + log "INFO" "Mounted healthy: $mnt" + return 0 + else + log "WARN" "Mounted but health probe failed: $mnt" + return 1 + fi + else + log "ERROR" "Mount failed for $mnt" + return 1 + fi +} + +usage() { + cat <<'USAGE' +cifs-watch [-n|--dry-run] [-v|--verbose] [--logfile PATH] + +Monitors CIFS entries in /etc/fstab, checks server reachability, and (re)mounts as needed. +- Processes uncommented lines with type "cifs". +- Accepts fstab lines with 4–6 fields. +- Skips entries containing "noauto". + +Options: + -n, --dry-run Show actions without changing anything + -v, --verbose More detailed output + --logfile P Override logfile path (empty to disable file logging) +USAGE +} + +# --------------------------- +# Parse args +# --------------------------- +while [[ $# -gt 0 ]]; do + case "$1" in + -n|--dry-run) DRY_RUN=1; shift ;; + -v|--verbose) VERBOSE=1; shift ;; + --logfile) LOGFILE="${2:-}"; shift 2 ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown arg: $1"; usage; exit 2 ;; + esac +done + +# Ensure tools present +for bin in findmnt mount umount awk grep sed timeout; do + have_cmd "$bin" || fail "Required command not found: $bin" +done + +# --------------------------- +# Main: parse /etc/fstab +# --------------------------- +init_backoff_state +announce_backoff_window +mapfile -t CIFS_LINES < <(awk ' + $0 !~ /^[[:space:]]*#/ && NF>=4 && tolower($3)=="cifs" { print } +' /etc/fstab) + +if [[ ${#CIFS_LINES[@]} -eq 0 ]]; then + log "INFO" "No CIFS entries found in /etc/fstab. Nothing to do." + exit 0 +fi + +overall_rc=0 + +for line in "${CIFS_LINES[@]}"; do + # fields: fs_spec mountpoint fstype options [dump] [pass] + fs_spec=$(awk '{print $1}' <<<"$line") + mnt_point=$(awk '{print $2}' <<<"$line") + fstype=$(awk '{print tolower($3)}' <<<"$line") + options=$(awk '{print $4}' <<<"$line") + dumpv=$(awk 'NF>=5{print $5}' <<<"$line") + passv=$(awk 'NF>=6{print $6}' <<<"$line") + + # Skip noauto entries + if grep -qi '(^|,)noauto(,|$)' <<<",$options,"; then + vdbg "Skipping noauto CIFS entry: $mnt_point ($fs_spec)" + continue + fi + + # Parse server from //server/share + if [[ "$fs_spec" =~ ^//([^/]+)/.+$ ]]; then + server="${BASH_REMATCH[1]}" + else + log "WARN" "Could not parse server from fs_spec: $fs_spec (skipping)" + continue + fi + + log "INFO" "Checking CIFS mount: $mnt_point (server: $server)" + + if is_cifs_mounted "$mnt_point"; then + if mount_healthy "$mnt_point"; then + vdbg "Healthy: $mnt_point" + continue + else + log "WARN" "Mounted but unhealthy: $mnt_point" + fi + else + log "WARN" "Not mounted: $mnt_point" + fi + + # --- Per-server backoff gate --- + if [[ "${BACKOFF_MINUTES:-0}" -gt 0 ]] && backoff_active "$server"; then + log "INFO" "Backoff active for $server — skipping $mnt_point (will retry after ${BACKOFF_MINUTES}m since last failure)" + overall_rc=1 + continue + fi + + # Probe reachability now (outside backoff or after it expired) + if host_reachable "$server"; then + log "INFO" "Server reachable: $server — attempting repair for $mnt_point" + else + log "ERROR" "Server NOT reachable: $server — skipping $mnt_point for now" + # Mark/refresh backoff timestamp + if [[ "${BACKOFF_MINUTES:-0}" -gt 0 ]]; then + set_backoff_epoch "$server" + if backoff_active "$server"; then + # Compute remaining (best-effort) for log readability + last="$(get_backoff_epoch "$server")" + now="$(now_epoch)" + rem=$(( BACKOFF_MINUTES*60 - (now - last) )) + (( rem < 0 )) && rem=0 + log "INFO" "Backoff started/extended for $server — next retry window in ~$(( rem/60 ))m" + fi + fi + overall_rc=1 + continue + fi + + if ! repair_mount "$mnt_point"; then + log "ERROR" "Repair failed: $mnt_point" + overall_rc=1 + fi +done + +exit "$overall_rc" diff --git a/cifs-watch.sh b/cifs-watch.sh deleted file mode 100644 index 1e177ac..0000000 --- a/cifs-watch.sh +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env bash -# Monitor CIFS mounts from /etc/fstab and (re)mount if needed. -# Designed for cron/systemd timer. Requires root. - -set -Eeuo pipefail -IFS=$'\n\t' - -# --------------------------- -# Config (edit if desired) -# --------------------------- -LOGFILE="/var/log/cifs-remount.log" # Set empty ("") to disable file logging. -PING_COUNT=1 -PING_TIMEOUT=1 # seconds -TCP_TIMEOUT=2 # seconds for port 445 check -PROBE_TIMEOUT=2 # seconds to test mount health (listing) -REMOUNT_RETRY=2 # attempts -SLEEP_BETWEEN=1 # seconds between retries -SYSLOG_TAG="cifs-watch" - -DRY_RUN=0 -VERBOSE=0 - -# --------------------------- -# Helpers -# --------------------------- -log() { - local level="$1"; shift - local msg="$*" - local ts - ts="$(date '+%Y-%m-%d %H:%M:%S')" - if [[ "$VERBOSE" -eq 1 || "$level" != "DEBUG" ]]; then - echo "[$ts] [$level] $msg" - fi - logger -t "$SYSLOG_TAG[$$]" -p "user.$(tr '[:upper:]' '[:lower:]' <<<"$level")" -- "$msg" || true - if [[ -n "$LOGFILE" ]]; then - ( umask 0077; echo "[$ts] [$level] $msg" >> "$LOGFILE" ) || true - fi -} - -vdbg() { [[ "$VERBOSE" -eq 1 ]] && log "DEBUG" "$*"; } -fail() { log "ERROR" "$*"; exit 1; } -have_cmd() { command -v "$1" >/dev/null 2>&1; } - -tcp_open_445() { - local host="$1" - if have_cmd nc; then - nc -z -w "$TCP_TIMEOUT" "$host" 445 >/dev/null 2>&1 - return $? - else - timeout "$TCP_TIMEOUT" bash -c "cat < /dev/null > /dev/tcp/$host/445" 2>/dev/null - return $? - fi -} - -host_reachable() { - local host="$1" - if ! getent ahosts "$host" >/dev/null 2>&1; then - vdbg "DNS resolution failed for $host" - return 1 - fi - if have_cmd ping; then - ping -c "$PING_COUNT" -W "$PING_TIMEOUT" "$host" >/dev/null 2>&1 || vdbg "Ping to $host failed" - fi - if tcp_open_445 "$host"; then - return 0 - else - vdbg "TCP/445 closed on $host" - return 1 - fi -} - -is_cifs_mounted() { - local mnt="$1" - if findmnt -no FSTYPE -T "$mnt" 2>/dev/null | grep -qi '^cifs$'; then - return 0 - fi - return 1 -} - -mount_healthy() { - local mnt="$1" - timeout "$PROBE_TIMEOUT" bash -c 'ls -1A -- "$0" >/dev/null 2>&1' "$mnt" -} - -repair_mount() { - local mnt="$1" - local attempt=1 - while (( attempt <= REMOUNT_RETRY )); do - vdbg "Attempt $attempt: remounting $mnt" - if (( DRY_RUN )); then - log "INFO" "DRY-RUN: would remount $mnt" - return 0 - fi - if mount -o remount "$mnt" >/dev/null 2>&1; then - if mount_healthy "$mnt"; then - log "INFO" "Remounted healthy: $mnt" - return 0 - fi - vdbg "Remount completed but health probe failed: $mnt" - fi - sleep "$SLEEP_BETWEEN" - (( attempt++ )) - done - - log "WARN" "Remount failed/unhealthy for $mnt; trying forced unmount + clean mount" - if (( DRY_RUN )); then - log "INFO" "DRY-RUN: would umount -f $mnt && mount $mnt" - return 0 - fi - - if umount -f "$mnt" >/dev/null 2>&1 || umount -l "$mnt" >/dev/null 2>&1; then - : - else - log "WARN" "Unable to unmount $mnt; will still attempt a mount" - fi - - if mount "$mnt" >/dev/null 2>&1; then - if mount_healthy "$mnt"; then - log "INFO" "Mounted healthy: $mnt" - return 0 - else - log "WARN" "Mounted but health probe failed: $mnt" - return 1 - fi - else - log "ERROR" "Mount failed for $mnt" - return 1 - fi -} - -usage() { - cat <<'USAGE' -cifs-watch.sh [-n|--dry-run] [-v|--verbose] [--logfile PATH] - -Monitors CIFS entries in /etc/fstab, checks server reachability, and (re)mounts as needed. -- Processes uncommented lines with type "cifs". -- Accepts fstab lines with 4–6 fields. -- Skips entries containing "noauto". - -Options: - -n, --dry-run Show actions without changing anything - -v, --verbose More detailed output - --logfile P Override logfile path (empty to disable file logging) -USAGE -} - -# --------------------------- -# Parse args -# --------------------------- -while [[ $# -gt 0 ]]; do - case "$1" in - -n|--dry-run) DRY_RUN=1; shift ;; - -v|--verbose) VERBOSE=1; shift ;; - --logfile) LOGFILE="${2:-}"; shift 2 ;; - -h|--help) usage; exit 0 ;; - *) echo "Unknown arg: $1"; usage; exit 2 ;; - esac -done - -# Ensure tools present -for bin in findmnt mount umount awk grep sed timeout; do - have_cmd "$bin" || fail "Required command not found: $bin" -done - -# --------------------------- -# Main: parse /etc/fstab -# --------------------------- -mapfile -t CIFS_LINES < <(awk ' - $0 !~ /^[[:space:]]*#/ && NF>=4 && tolower($3)=="cifs" { print } -' /etc/fstab) - -if [[ ${#CIFS_LINES[@]} -eq 0 ]]; then - log "INFO" "No CIFS entries found in /etc/fstab. Nothing to do." - exit 0 -fi - -overall_rc=0 - -for line in "${CIFS_LINES[@]}"; do - # fields: fs_spec mountpoint fstype options [dump] [pass] - fs_spec=$(awk '{print $1}' <<<"$line") - mnt_point=$(awk '{print $2}' <<<"$line") - fstype=$(awk '{print tolower($3)}' <<<"$line") - options=$(awk '{print $4}' <<<"$line") - dumpv=$(awk 'NF>=5{print $5}' <<<"$line") - passv=$(awk 'NF>=6{print $6}' <<<"$line") - - # Skip noauto entries - if grep -qi '(^|,)noauto(,|$)' <<<",$options,"; then - vdbg "Skipping noauto CIFS entry: $mnt_point ($fs_spec)" - continue - fi - - # Parse server from //server/share - if [[ "$fs_spec" =~ ^//([^/]+)/.+$ ]]; then - server="${BASH_REMATCH[1]}" - else - log "WARN" "Could not parse server from fs_spec: $fs_spec (skipping)" - continue - fi - - log "INFO" "Checking CIFS mount: $mnt_point (server: $server)" - - if is_cifs_mounted "$mnt_point"; then - if mount_healthy "$mnt_point"; then - vdbg "Healthy: $mnt_point" - continue - else - log "WARN" "Mounted but unhealthy: $mnt_point" - fi - else - log "WARN" "Not mounted: $mnt_point" - fi - - if host_reachable "$server"; then - log "INFO" "Server reachable: $server — attempting repair for $mnt_point" - else - log "ERROR" "Server NOT reachable: $server — skipping $mnt_point for now" - overall_rc=1 - continue - fi - - if ! repair_mount "$mnt_point"; then - log "ERROR" "Repair failed: $mnt_point" - overall_rc=1 - fi -done - -exit "$overall_rc" diff --git a/cifs-watch.service.sh b/systemd/cifs-watch.service similarity index 74% rename from cifs-watch.service.sh rename to systemd/cifs-watch.service index 44b7666..7512aa3 100644 --- a/cifs-watch.service.sh +++ b/systemd/cifs-watch.service @@ -1,4 +1,3 @@ -# /etc/systemd/system/cifs-watch.service [Unit] Description=Monitor and repair CIFS mounts diff --git a/cifs-watch.timer.txt b/systemd/cifs-watch.timer similarity index 80% rename from cifs-watch.timer.txt rename to systemd/cifs-watch.timer index 1c75757..30ae65e 100644 --- a/cifs-watch.timer.txt +++ b/systemd/cifs-watch.timer @@ -1,4 +1,3 @@ -# /etc/systemd/system/cifs-watch.timer [Unit] Description=Run cifs-watch periodically