Wednesday, December 3, 2025

How to Generate the NMI from the ILOM ( when system is hung)

1) log into iloms via ssh. 2) start /SP/console on the Node 3) set /HOST/generate_host_nmi=true on the ILOM 4) start /SP/console node, watch for it to start dumping core. Leave it at this point while it finishes dumping core and rebooting (this may take 30 minutes or more). NOTE: Do not wait on this step. Move on to step 5. 5) Verify that the node

Tuesday, December 2, 2025

ipmitool commands to list all serial numbers

ipmitool fru print | awk ' /^FRU Device Description/ {fru=$0} /[Ss]erial/ {print fru " -> " $0} ' /opt/oracle.SupportTools/CheckHWnFWProfile -action list -mode serial_numbers .

Sunday, November 30, 2025

Command to find the large file in the /u01 mount point

nice find /u01 -mount -type f -size +10240k -exec ls -l {} \;|awk '{print $5, $7$6$8, $3, $9}'|awk '{printf("%9.1fMB %s %12s %s\n", $1/1024/1024, $2, $3, $4) }' | sort -nr | head -20

Tuesday, November 18, 2025

How to collect the logs in the Linux

1) sos report --batch --all-logs 2) sos report --batch --all-logs --tmp-dir

Thursday, October 30, 2025

How to clear the pinned cache in Exadata

1. Validate if there is a cache pinned for any device, running command: /opt/MegaRAID/MegaCli/MegaCli64 -GetPreservedCacheList -a0 2. Discard the cache: #/opt/MegaRAID/MegaCli/MegaCli64 -DiscardPreservedCache -L11 -force -a0 The flag -L will specify the Virtual Disk / Target ID 3. Validate that cache was discarded: /opt/MegaRAID/MegaCli/MegaCli64 -GetPreservedCacheList -a0 a) The following is reported if cache has been discarded successfully: Exit Code: 0x00

Sunday, October 26, 2025

How to create a 50 files in a folder.

for i in $(seq 1 50); do echo "Creating file $i..." sudo dd if=/dev/urandom bs=1M count=50 of=/mnt/weka2/file_$i done

Friday, October 17, 2025

Exadata DB Xen health_check

exadata_DB_XM_health_check.sh #!/bin/bash # Script: exadata_DB_XM_health_check.sh # Purpose: Collect system diagnostics and Exadata hardware health summary # Usage: bash exadata_DB_XM_health_check.sh> exadata_health_$(hostname).log # Written by Gurudatta N.R SEPARATOR="---------------" print_section() { echo "" echo "$1" echo "$SEPARATOR" } # Host & System Basics print_section "Hostname" hostname print_section "Uptime" uptime print_section "Imageinfo" imageinfo print_section "Dmidecode" dmidecode -t1 # Infiniband print_section "ibstat" ibstat # Virtualization print_section "xm List" xm list # Alerts & Logs print_section "Alerthistory (Network Only)" dbmcli -e list alerthistory | grep -v AIDE | grep -i Network print_section "OS Messages (MCE)" grep -i mce /var/log/messages | tail -20 # Memory Info print_section "MemTotal" grep MemTotal /proc/meminfo # Disks print_section "Physical Disks" dbmcli -e 'list physicaldisk' print_section "BBU" if [ -x /opt/MegaRAID/storcli/storcli64 ]; then /opt/MegaRAID/storcli/storcli64 -adpbbucmd -aALL else echo "BBU check tool not found: /opt/MegaRAID/storcli/storcli64" fi # IPMI Fault Management print_section "Show Faulty" ipmitool sunoem cli 'show faulty' print_section "Open Problems" ipmitool sunoem cli 'show /system/open_problems' print_section "Fmadm Faulty" ipmitool sunoem cli 'start /SP/faultmgmt/shell' 'y' 'fmadm faulty' # Link Errors print_section "Dmesg Output (Link-related)" dmesg -T | grep -i link | tail -20 # Network Interfaces print_section "Interface Details (eth1 & eth2)" ip a s | grep -E 'eth1|eth2' # Network Interfaces print_section "Bond Details Interface Details " cat /proc/net/bonding/bondeth0 print_section "Eth1 Link Speed" ethtool eth1 | egrep 'Settings for|Link detected|Speed|Duplex' print_section "Eth2 Link Speed" ethtool eth2 | egrep 'Settings for|Link detected|Speed|Duplex' echo -e "\nEFI Boot Manager Output\n---------------" /usr/sbin/efibootmgr -v # Extra DBMCLI print_section "DBMCLI Disk Details" dbmcli -e list dbserver detail print_section "dbserver validate" dbmcli -e alter dbserver validate configuration

Exadata DB node KVM Hrealth

exadata_DB_KVM_health_check.sh #!/bin/bash # Script: exadata_DB_KVM_health_check.sh # Purpose: Collect system diagnostics and Exadata hardware health summary # Usage: bash exadata_DB_KVM_health_check.sh > exadata_health_$(hostname).log # Written by Gurudatta N.R # Colors RED='\033[1;31m' NC='\033[0m' # No Color SEPARATOR="---------------" print_section() { echo "" echo -e "${RED}$1${NC}" echo "$SEPARATOR" } # Host & System Basics print_section "Hostname" hostname print_section "Uptime" uptime print_section "Timestamp" date print_section "Kernel Version" uname -r print_section "OS Release" cat /etc/*release | grep -Ei 'name|version' print_section "Imageinfo" imageinfo print_section "Dmidecode" dmidecode -t1 # Infiniband print_section "ibstat" ibstat # Virtualization if command -v virsh &> /dev/null; then print_section "Virsh List" virsh list --all else print_section "Virsh List" echo "virsh not available" fi # Alerts & Logs if command -v dbmcli &> /dev/null; then print_section "Alerthistory (Network Only)" dbmcli -e list alerthistory | grep -v AIDE | grep -i Network else print_section "Alerthistory (Network Only)" echo "dbmcli not found in PATH" fi print_section "OS Messages (MCE)" grep -i mce /var/log/messages | tail -20 # Memory Info print_section "MemTotal" grep MemTotal /proc/meminfo # CPU Info print_section "CPU Info" lscpu | grep -E 'Model name|Socket|CPU\(s\)' # Disk Usage print_section "Disk Usage" df -hT | grep -v tmpfs # Disks print_section "Physical Disks" dbmcli -e 'list physicaldisk' print_section "BBU" if [ -x /opt/MegaRAID/storcli/storcli64 ]; then /opt/MegaRAID/storcli/storcli64 -adpbbucmd -aALL else echo "BBU check tool not found: /opt/MegaRAID/storcli/storcli64" fi # IPMI Fault Management print_section "Show Faulty" ipmitool sunoem cli 'show faulty' 2>/dev/null || echo "IPMI command failed" print_section "Open Problems" ipmitool sunoem cli 'show /system/open_problems' 2>/dev/null || echo "IPMI command failed" print_section "Fmadm Faulty" ipmitool sunoem cli 'start /SP/faultmgmt/shell' 'y' 'fmadm faulty' 2>/dev/null || echo "IPMI command failed" # Link Errors print_section "Dmesg Output (Link-related)" dmesg -T | grep -i link | tail -20 # Network Interfaces print_section "Active Interfaces and IPs" ip -o -4 addr show up | awk '{print $2, $4}' print_section "Interface Details (eth1 & eth2)" ip a s | grep -E 'eth1|eth2' print_section "Bond Details Interface Details" cat /proc/net/bonding/bondeth0 2>/dev/null || echo "bondeth0 not found" print_section "Eth1 Link Speed" ethtool eth1 | egrep 'Settings for|Link detected|Speed|Duplex' 2>/dev/null || echo "eth1 not found" print_section "Eth2 Link Speed" ethtool eth2 | egrep 'Settings for|Link detected|Speed|Duplex' 2>/dev/null || echo "eth2 not found" echo -e "\nEFI Boot Manager Output\n---------------" /usr/sbin/efibootmgr -v # Extra DBMCLI print_section "DBMCLI Disk Details" dbmcli -e list dbserver detail print_section "dbserver validate" dbmcli -e alter dbserver validate configuration

Exadata Cell Health check script

exadata_cell_health_check.sh #!/usr/bin/bash ################################################################################################ ################################################################################################ ## ## ## Name : exadata_cell_health_check.sh ## Usage : ## ## Version: 1.0 Latest ## ## Author : Gurudatta N.R ## ## MODIFIED (MM/DD/YY) : 10/10/2024 ## ## ## ## ## ################################################################################################# ################################################################################################# LOGFILE="exadata_health_report_$(date +%F_%H%M%S).log" exec > >(tee -a "$LOGFILE") 2>&1 print_section() { echo "" echo "$1" echo "***************" } print_section "Hostname" hostname print_section "Uptime" uptime print_section "Imageinfo" imageinfo print_section "Cell Services" cellcli -e list cell detail | egrep 'cellsrvStatus|msStatus|rsStatus' print_section "Dmidecode" dmidecode -t1 print_section "Dmesg Output (Link-related)" dmesg -T | grep -i link | tail -20 print_section "Alerthistory" cellcli -e "list alerthistory detail" | tail -30 print_section "Physicaldisk Status" cellcli -e list physicaldisk print_section "Flashdisk Status" cellcli -e list physicaldisk where diskType='FlashDisk' print_section "Failed Disk" cellcli -e list physicaldisk where status !=normal detail print_section "Griddisk Status" cellcli -e list griddisk attributes name, status,asmModeStatus,asmdeactivationoutcome,size print_section "Diskmap" cellcli -e list diskmap print_section "FlashCacheMode" cellcli -e list cell attributes name,flashCacheMode print_section "FlashCache Detail" cellcli -e list flashcache detail print_section "FlashLog Detail" cellcli -e list flashlog detail print_section "Griddisk Cache" cellcli -e list griddisk attributes name,cachedby print_section "Hardware fault status" ipmitool sunoem cli "show -l all /SYS fault_state==Faulted" print_section "griddisk attributes name, status, asmDiskgroupName, asmDiskName, asmModeStatus, availableTo,size, asmDeactivationOutcome " cellcli -e list griddisk attributes name, status, asmDiskgroupName, asmDiskName, asmModeStatus, availableTo,size, asmDeactivationOutcome |grep -i SYNCING print_section "Interface Stats" ip -br a # IPMI Fault Management print_section "Show Faulty" ipmitool sunoem cli 'show faulty' print_section "Open Problems" ipmitool sunoem cli 'show /system/open_problems' print_section "Fmadm Faulty" ipmitool sunoem cli 'start /SP/faultmgmt/shell' 'y' 'fmadm faulty' #echo -e "\nIPMI - PCI Add-on Devices\n---------------" #ipmitool sunoem cli "show -l all -d properties /System/PCI_Devices/Add-on" #echo -e "\nIPMI - Current Host Console\n---------------" #ipmitool sunoem cli "show /HOST/console" #echo -e "\nIPMI - Host Console History\n---------------" #ipmitool sunoem cli "show /HOST/console/history" print_section "Switch Stats" /opt/oracle.SupportTools/ibdiagtools/utils/lldp_cap.py re0pf /opt/oracle.SupportTools/ibdiagtools/utils/lldp_cap.py re1pf # Extra CellCLI print_section "cellcli Details" cellcli -e list cell detail print_section "cell validate" cellcli -e alter cell validate configuration; echo "" echo "Health report saved to $LOGFILE"