Solaris and Linux
Wednesday, December 3, 2025
How to Generate the NMI from the ILOM ( when system is hung)
1) log into iloms via ssh.
2) start /SP/console on the Node
3) set /HOST/generate_host_nmi=true on the ILOM
4) start /SP/console node, watch for it to start dumping core. Leave it at this point while it finishes dumping core and rebooting (this may take 30 minutes or more). NOTE: Do not wait on this step. Move on to step 5.
5) Verify that the node
Tuesday, December 2, 2025
ipmitool commands to list all serial numbers
ipmitool fru print | awk '
/^FRU Device Description/ {fru=$0}
/[Ss]erial/ {print fru " -> " $0}
'
/opt/oracle.SupportTools/CheckHWnFWProfile -action list -mode serial_numbers
.
Sunday, November 30, 2025
Command to find the large file in the /u01 mount point
nice find /u01 -mount -type f -size +10240k -exec ls -l {} \;|awk '{print $5, $7$6$8, $3, $9}'|awk '{printf("%9.1fMB %s %12s %s\n", $1/1024/1024, $2, $3, $4) }' | sort -nr | head -20
Tuesday, November 18, 2025
How to collect the logs in the Linux
1) sos report --batch --all-logs
2) sos report --batch --all-logs --tmp-dir
Thursday, October 30, 2025
How to clear the pinned cache in Exadata
1. Validate if there is a cache pinned for any device, running command:
/opt/MegaRAID/MegaCli/MegaCli64 -GetPreservedCacheList -a0
2. Discard the cache:
#/opt/MegaRAID/MegaCli/MegaCli64 -DiscardPreservedCache -L11 -force -a0
The flag -L will specify the Virtual Disk / Target ID
3. Validate that cache was discarded:
/opt/MegaRAID/MegaCli/MegaCli64 -GetPreservedCacheList -a0
a) The following is reported if cache has been discarded successfully:
Exit Code: 0x00
Sunday, October 26, 2025
How to create a 50 files in a folder.
for i in $(seq 1 50); do
echo "Creating file $i..."
sudo dd if=/dev/urandom bs=1M count=50 of=/mnt/weka2/file_$i
done
Friday, October 17, 2025
Exadata DB Xen health_check
exadata_DB_XM_health_check.sh
#!/bin/bash
# Script: exadata_DB_XM_health_check.sh
# Purpose: Collect system diagnostics and Exadata hardware health summary
# Usage: bash exadata_DB_XM_health_check.sh> exadata_health_$(hostname).log
# Written by Gurudatta N.R
SEPARATOR="---------------"
print_section() {
echo ""
echo "$1"
echo "$SEPARATOR"
}
# Host & System Basics
print_section "Hostname"
hostname
print_section "Uptime"
uptime
print_section "Imageinfo"
imageinfo
print_section "Dmidecode"
dmidecode -t1
# Infiniband
print_section "ibstat"
ibstat
# Virtualization
print_section "xm List"
xm list
# Alerts & Logs
print_section "Alerthistory (Network Only)"
dbmcli -e list alerthistory | grep -v AIDE | grep -i Network
print_section "OS Messages (MCE)"
grep -i mce /var/log/messages | tail -20
# Memory Info
print_section "MemTotal"
grep MemTotal /proc/meminfo
# Disks
print_section "Physical Disks"
dbmcli -e 'list physicaldisk'
print_section "BBU"
if [ -x /opt/MegaRAID/storcli/storcli64 ]; then
/opt/MegaRAID/storcli/storcli64 -adpbbucmd -aALL
else
echo "BBU check tool not found: /opt/MegaRAID/storcli/storcli64"
fi
# IPMI Fault Management
print_section "Show Faulty"
ipmitool sunoem cli 'show faulty'
print_section "Open Problems"
ipmitool sunoem cli 'show /system/open_problems'
print_section "Fmadm Faulty"
ipmitool sunoem cli 'start /SP/faultmgmt/shell' 'y' 'fmadm faulty'
# Link Errors
print_section "Dmesg Output (Link-related)"
dmesg -T | grep -i link | tail -20
# Network Interfaces
print_section "Interface Details (eth1 & eth2)"
ip a s | grep -E 'eth1|eth2'
# Network Interfaces
print_section "Bond Details Interface Details "
cat /proc/net/bonding/bondeth0
print_section "Eth1 Link Speed"
ethtool eth1 | egrep 'Settings for|Link detected|Speed|Duplex'
print_section "Eth2 Link Speed"
ethtool eth2 | egrep 'Settings for|Link detected|Speed|Duplex'
echo -e "\nEFI Boot Manager Output\n---------------"
/usr/sbin/efibootmgr -v
# Extra DBMCLI
print_section "DBMCLI Disk Details"
dbmcli -e list dbserver detail
print_section "dbserver validate"
dbmcli -e alter dbserver validate configuration
Exadata DB node KVM Hrealth
exadata_DB_KVM_health_check.sh
#!/bin/bash
# Script: exadata_DB_KVM_health_check.sh
# Purpose: Collect system diagnostics and Exadata hardware health summary
# Usage: bash exadata_DB_KVM_health_check.sh > exadata_health_$(hostname).log
# Written by Gurudatta N.R
# Colors
RED='\033[1;31m'
NC='\033[0m' # No Color
SEPARATOR="---------------"
print_section() {
echo ""
echo -e "${RED}$1${NC}"
echo "$SEPARATOR"
}
# Host & System Basics
print_section "Hostname"
hostname
print_section "Uptime"
uptime
print_section "Timestamp"
date
print_section "Kernel Version"
uname -r
print_section "OS Release"
cat /etc/*release | grep -Ei 'name|version'
print_section "Imageinfo"
imageinfo
print_section "Dmidecode"
dmidecode -t1
# Infiniband
print_section "ibstat"
ibstat
# Virtualization
if command -v virsh &> /dev/null; then
print_section "Virsh List"
virsh list --all
else
print_section "Virsh List"
echo "virsh not available"
fi
# Alerts & Logs
if command -v dbmcli &> /dev/null; then
print_section "Alerthistory (Network Only)"
dbmcli -e list alerthistory | grep -v AIDE | grep -i Network
else
print_section "Alerthistory (Network Only)"
echo "dbmcli not found in PATH"
fi
print_section "OS Messages (MCE)"
grep -i mce /var/log/messages | tail -20
# Memory Info
print_section "MemTotal"
grep MemTotal /proc/meminfo
# CPU Info
print_section "CPU Info"
lscpu | grep -E 'Model name|Socket|CPU\(s\)'
# Disk Usage
print_section "Disk Usage"
df -hT | grep -v tmpfs
# Disks
print_section "Physical Disks"
dbmcli -e 'list physicaldisk'
print_section "BBU"
if [ -x /opt/MegaRAID/storcli/storcli64 ]; then
/opt/MegaRAID/storcli/storcli64 -adpbbucmd -aALL
else
echo "BBU check tool not found: /opt/MegaRAID/storcli/storcli64"
fi
# IPMI Fault Management
print_section "Show Faulty"
ipmitool sunoem cli 'show faulty' 2>/dev/null || echo "IPMI command failed"
print_section "Open Problems"
ipmitool sunoem cli 'show /system/open_problems' 2>/dev/null || echo "IPMI command failed"
print_section "Fmadm Faulty"
ipmitool sunoem cli 'start /SP/faultmgmt/shell' 'y' 'fmadm faulty' 2>/dev/null || echo "IPMI command failed"
# Link Errors
print_section "Dmesg Output (Link-related)"
dmesg -T | grep -i link | tail -20
# Network Interfaces
print_section "Active Interfaces and IPs"
ip -o -4 addr show up | awk '{print $2, $4}'
print_section "Interface Details (eth1 & eth2)"
ip a s | grep -E 'eth1|eth2'
print_section "Bond Details Interface Details"
cat /proc/net/bonding/bondeth0 2>/dev/null || echo "bondeth0 not found"
print_section "Eth1 Link Speed"
ethtool eth1 | egrep 'Settings for|Link detected|Speed|Duplex' 2>/dev/null || echo "eth1 not found"
print_section "Eth2 Link Speed"
ethtool eth2 | egrep 'Settings for|Link detected|Speed|Duplex' 2>/dev/null || echo "eth2 not found"
echo -e "\nEFI Boot Manager Output\n---------------"
/usr/sbin/efibootmgr -v
# Extra DBMCLI
print_section "DBMCLI Disk Details"
dbmcli -e list dbserver detail
print_section "dbserver validate"
dbmcli -e alter dbserver validate configuration
Exadata Cell Health check script
exadata_cell_health_check.sh
#!/usr/bin/bash
################################################################################################
################################################################################################
## ##
## Name : exadata_cell_health_check.sh
## Usage : ##
## Version: 1.0 Latest ##
## Author : Gurudatta N.R ##
## MODIFIED (MM/DD/YY) : 10/10/2024 ##
## ##
## ##
#################################################################################################
#################################################################################################
LOGFILE="exadata_health_report_$(date +%F_%H%M%S).log"
exec > >(tee -a "$LOGFILE") 2>&1
print_section() {
echo ""
echo "$1"
echo "***************"
}
print_section "Hostname"
hostname
print_section "Uptime"
uptime
print_section "Imageinfo"
imageinfo
print_section "Cell Services"
cellcli -e list cell detail | egrep 'cellsrvStatus|msStatus|rsStatus'
print_section "Dmidecode"
dmidecode -t1
print_section "Dmesg Output (Link-related)"
dmesg -T | grep -i link | tail -20
print_section "Alerthistory"
cellcli -e "list alerthistory detail" | tail -30
print_section "Physicaldisk Status"
cellcli -e list physicaldisk
print_section "Flashdisk Status"
cellcli -e list physicaldisk where diskType='FlashDisk'
print_section "Failed Disk"
cellcli -e list physicaldisk where status !=normal detail
print_section "Griddisk Status"
cellcli -e list griddisk attributes name, status,asmModeStatus,asmdeactivationoutcome,size
print_section "Diskmap"
cellcli -e list diskmap
print_section "FlashCacheMode"
cellcli -e list cell attributes name,flashCacheMode
print_section "FlashCache Detail"
cellcli -e list flashcache detail
print_section "FlashLog Detail"
cellcli -e list flashlog detail
print_section "Griddisk Cache"
cellcli -e list griddisk attributes name,cachedby
print_section "Hardware fault status"
ipmitool sunoem cli "show -l all /SYS fault_state==Faulted"
print_section "griddisk attributes name, status, asmDiskgroupName, asmDiskName, asmModeStatus, availableTo,size, asmDeactivationOutcome "
cellcli -e list griddisk attributes name, status, asmDiskgroupName, asmDiskName, asmModeStatus, availableTo,size, asmDeactivationOutcome |grep -i SYNCING
print_section "Interface Stats"
ip -br a
# IPMI Fault Management
print_section "Show Faulty"
ipmitool sunoem cli 'show faulty'
print_section "Open Problems"
ipmitool sunoem cli 'show /system/open_problems'
print_section "Fmadm Faulty"
ipmitool sunoem cli 'start /SP/faultmgmt/shell' 'y' 'fmadm faulty'
#echo -e "\nIPMI - PCI Add-on Devices\n---------------"
#ipmitool sunoem cli "show -l all -d properties /System/PCI_Devices/Add-on"
#echo -e "\nIPMI - Current Host Console\n---------------"
#ipmitool sunoem cli "show /HOST/console"
#echo -e "\nIPMI - Host Console History\n---------------"
#ipmitool sunoem cli "show /HOST/console/history"
print_section "Switch Stats"
/opt/oracle.SupportTools/ibdiagtools/utils/lldp_cap.py re0pf
/opt/oracle.SupportTools/ibdiagtools/utils/lldp_cap.py re1pf
# Extra CellCLI
print_section "cellcli Details"
cellcli -e list cell detail
print_section "cell validate"
cellcli -e alter cell validate configuration;
echo ""
echo "Health report saved to $LOGFILE"
Subscribe to:
Comments (Atom)