热门关键字:  ubuntu  分区  函数  linux系统进程  Fedora

HA下的oracle监控

来源: 作者: 时间:2008-07-03 Tag: 点击:
#!/bin/bash
#
# Copyright 2003-2004, 2006 Red Hat, Inc.
#
# Author(s):
# Hardy Merrill <hmerrill at redhat.com>
# Lon Hohberger <lhh at redhat.com>
# Michael Moon <Michael dot Moon at oracle.com>
#
# This program is Open Source software. You may modify and/or redistribute
# it persuant to the terms of the Open Software License version 2.1, which
# is available from the following URL and is included herein by reference:
#
# http://opensource.org/licenses/osl-2.1.php
#
# chkconfig: 345 99 01
# description: Service script for starting/stopping \
# Oracle(R) Application Server software on: \
# Red Hat Enterprise Linux 2.1 AS \
# Red Hat Enterprise Linux 3 AS \
# Red Hat Enterprise Linux 3 ES \
# Also can start Oracle(R) Database 10g on \
# Red Hat Enterprise Linux 4
#
# NOTES:
#
# (1) You can comment out the LOCKFILE declaration below. This will prevent
# the need for this script to access anything outside of the ORACLE_HOME
# path.
#
# (2) You MUST customize ORACLE_USER, ORACLE_HOME, ORACLE_SID, and
# ORACLE_HOSTNAME to match your installation if not running from within
# rgmanager.
#
# (3) Do NOT place this script in shared storage; place it in ORACLE_USER's
# home directory in non-clustered environments and /usr/share/cluster
# in RHCS4 environments.
#
# Oracle is a registered trademark of Oracle Corporation.
# Oracle9i is a trademark of Oracle Corporation.
# Oracle10g is a trademark of Oracle Corporation.
# All other trademarks are property of their respective owners.
#
. /etc/init.d/functions
#
# Sourcxe stuff from /etc/sysconfig, but this may be overridden if
# this is being called as a cluster resource agent instead.
# . /etc/sysconfig/oracledb
declare SCRIPT="`basename $0`"
declare SCRIPTDIR="`dirname $0`"
[ -n "$OCF_RESKEY_user" ] && ORACLE_USER=$OCF_RESKEY_user
[ -n "$OCF_RESKEY_home" ] && ORACLE_HOME=$OCF_RESKEY_home
[ -n "$OCF_RESKEY_name" ] && ORACLE_SID=$OCF_RESKEY_name
[ -n "$OCF_RESKEY_lockfile" ] && LOCKFILE=$OCF_RESKEY_lockfile
[ -n "$OCF_RESKEY_type" ] && ORACLE_TYPE=$OCF_RESKEY_type
[ -n "$OCF_RESKEY_vhost" ] && ORACLE_HOSTNAME=$OCF_RESKEY_vhost
######################################################
# Customize these to match your Oracle installation. #
######################################################
#
# 1. Oracle user. Must be the same across all cluster members. In the event
# that this script is run by the super-user, it will automatically switch
# to the Oracle user and restart. Oracle needs to run as the Oracle
# user, not as root.
#
#[ -n "$ORACLE_USER" ] || ORACLE_USER=oracle
#
# 2. Oracle home. This is set up during the installation phase of Oracle.
# From the perspective of the cluster, this is generally the mount point
# you intend to use as the mount point for your Oracle Infrastructure
# service.
#
#[ -n "$ORACLE_HOME" ] || ORACLE_HOME=/mnt/oracle/home
#
# 3. This is your SID. This is set up during oracle installation as well.
#
#[ -n "$ORACLE_SID" ] || ORACLE_SID=orcl
#
# 4. The oracle user probably doesn't have the permission to write to
# /var/lock/subsys, so use the user's home directory.
#
[ -n "$LOCKFILE" ] || LOCKFILE="/home/$ORACLE_USER/.oracle-ias.lock"
#[ -n "$LOCKFILE" ] || LOCKFILE="$ORACLE_HOME/.oracle-ias.lock"
#[ -n "$LOCKFILE" ] || LOCKFILE="/var/lock/subsys/oracle-ias" # Watch privileges
#
# 5. Type of Oracle Database. Currently supported: 10g 10g-iAS(untested!)
#
#[ -n "$ORACLE_TYPE" ] || ORACLE_TYPE=10g
#
# 6. Oracle virtual hostname. This is the hostname you gave Oracle during
# installation.
#
#[ -n "$ORACLE_HOSTNAME" ] || ORACLE_HOSTNAME=svc0.foo.test.com

###########################################################################
ORACLE_TYPE=`echo $ORACLE_TYPE | tr A-Z a-z`
export ORACLE_USER ORACLE_HOME ORACLE_SID LOCKFILE ORACLE_TYPE
export ORACLE_HOSTNAME

##########################
# Set up paths we'll use. Not all are used by all the different types of
# Oracle installations
#
export LD_LIBRARY_PATH=$ORACLE_HOME/lib:$ORACLE_HOME/opmn/lib
export PATH=$ORACLE_HOME/bin:$ORACLE_HOME/opmn/bin:$ORACLE_HOME/dcm/bin:$PATH
declare -i RESTART_RETRIES=3
declare -r DB_PROCNAMES="pmon"
#declare -r DB_PROCNAMES="pmonXX" # testing
#declare -r DB_PROCNAMES="pmon smon dbw0 lgwr"
declare -r LSNR_PROCNAME="tnslsnr"
#declare -r LSNR_PROCNAME="tnslsnrXX" # testing
#
# The oracle user probably doesn't have the permission to write to
# /var/lock/subsys, so use the user's home directory.
#
declare -r LOCKFILE="/home/$ORACLE_USER/.oracle-ias.lock"
#declare -r LOCKFILE="$ORACLE_HOME/.oracle-ias.lock"
#declare -r LOCKFILE="/var/lock/subsys/oracle-ias" # Watch privileges
##########################################################
# (Hopefully) No user-serviceable parts below this line. #
##########################################################
meta_data()
{
cat <<EOT
<?xml version="1.0" ?>
<resource-agent name="oracledb" version="rgmanager 2.0">
<version>1.0</version>
<longdesc lang="en">
Oracle 10g Failover Instance
</longdesc>
<shortdesc lang="en">
Oracle 10g Failover Instance
</shortdesc>
<parameters>
<parameter name="name" primary="1">
<longdesc lang="en">
Instance name (SID) of oracle instance
</longdesc>
<shortdesc lang="en">
Oracle SID
</shortdesc>
<content type="string"/>
</parameter>
<parameter name="user" unique="1" required="1">
<longdesc lang="en">
Oracle user name. This is the user name of the Oracle
user which the Oracle AS instance runs as.
</longdesc>
<shortdesc lang="en">
Oracle User Name
</shortdesc>
<content type="string"/>
</parameter>
<parameter name="home" unique="1" required="1">
<longdesc lang="en">
This is the Oracle (application, not user) home directory.
This is configured when you install Oracle.
</longdesc>
<shortdesc lang="en">
Oracle Home Directory
</shortdesc>
<content type="string"/>
</parameter>
<parameter name="type" required="1">
<longdesc lang="en">
This is the Oracle installation type.
Only "10g" and "10g-ias" are supported, and 10g-ias is
untested.
</longdesc>
<shortdesc lang="en">
Oracle Installation Type
</shortdesc>
<content type="string"/>
</parameter>
<parameter name="vhost" required="0" unique="1">
<longdesc lang="en">
Virtual Hostname matching the installation hostname of
Oracle 10g. Note that during the start/stop of an oracledb
resource, your hostname will temporarily be changed to
this hostname. As such, it is recommended that oracledb
resources be instanced as part of an exclusive service only.
</longdesc>
<shortdesc lang="en">
Virtual Hostname
</shortdesc>
<content type="string"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="900"/>
<action name="stop" timeout="90"/>
<action name="recover" timeout="990"/>
<!-- Checks to see if it's mounted in the right place -->
<action name="status" timeout="10"/>
<action name="monitor" timeout="10"/>
<!-- Checks to see if we can read from the mountpoint -->
<action name="status" depth="10" timeout="30" interval="5m"/>
<action name="monitor" depth="10" timeout="30" interval="5m"/>
<!-- Checks to see if we can write to the mountpoint (if !ROFS) -->
<action name="status" depth="20" timeout="90" interval="10m"/>
<action name="monitor" depth="20" timeout="90" interval="10m"/>
<action name="meta-data" timeout="5"/>
<action name="verify-all" timeout="5"/>
</actions>
<special tag="rgmanager">
<attributes maxinstances="1"/>
</special>
</resource-agent>
EOT
}

#
# "action"-like macro supporting functions
#
faction()
{
echo -n "$1"
shift
$*
if [ $? -eq 0 ]; then
echo_success
echo
return 0
fi
echo_failure
echo
return 1
}

#
# Start Oracle9i (database portion)
#
start_db()
{
declare tmpfile
declare logfile
declare -i rv
tmpfile=/tmp/$SCRIPT-start.$$
logfile=/tmp/$SCRIPT-start.log
#
# Set up our sqlplus script. Basically, we're trying to
# capture output in the hopes that it's useful in the case
# that something doesn't work properly.
#
echo "startup" > $tmpfile
echo "quit" >> $tmpfile
sqlplus "/ as sysdba" < $tmpfile &> $logfile
rv=$?
# Dump logfile to /var/log/messages
initlog -q -c "cat $logfile"

if [ $rv -ne 0 ]; then
echo "ORACLE_HOME Incorrectly set?"
echo "See $logfile for more information."
return 1
fi
#
# If we see:
# ORA-.....: failure, we failed
#
rm -f $tmpfile
grep -q "failure" $logfile
if [ $? -eq 0 ]; then
rm -f $tmpfile
echo "ORACLE_SID Incorrectly set?"
echo "See $logfile for more information."
return 1
fi
return 0
}

#
# Stop Oracle9i (database portion)
#
stop_db()
{
declare tmpfile
declare logfile
declare -i rv
tmpfile=/tmp/$SCRIPT-stop.$$
logfile=/tmp/$SCRIPT-stop.log
# Setup for Stop ...
echo "shutdown abort" > $tmpfile
echo "quit" >> $tmpfile
sqlplus "/ as sysdba" < $tmpfile &> $logfile
rv=$?
# Dump logfile to /var/log/messages
initlog -q -c "cat $logfile"

if [ $rv -ne 0 ]; then
echo "ORACLE_HOME Incorrectly set?"
echo "See $logfile for more information."
return 1
fi
#
# If we see 'failure' in the log, we're done.
#
rm -f $tmpfile
grep -q failure $logfile
if [ $? -eq 0 ]; then
echo_failure
echo
echo "Possible reason: ORACLE_SID Incorrectly set."
echo "See $logfile for more information."
return 1
fi
return 0
}

#
# Destroy any remaining processes with refs to $ORACLE_HOME
#
force_cleanup()
{
declare pids
declare pid
pids=`ps ax | grep $ORACLE_HOME | grep -v grep | awk '{print $1}'`
initlog -n $SCRIPT -s "<err> Not all Oracle processes exited cleanly, killing"

for pid in $pids; do
kill -9 $pid
if [ $? -eq 0 ]; then
initlog -n $SCRIPT -s "Killed $pid"
fi
done
return 0
}

#
# Wait for oracle processes to exit. Time out after 60 seconds
#
exit_idle()
{
declare -i n=0
while ps ax | grep $ORACLE_HOME | grep -q -v grep; do
if [ $n -ge 90 ]; then
force_cleanup
return 0
fi
sleep 1
((n++))
done
return 0
}

#
# Get database background process status. Restart it if it failed and
# we have seen the lock file.
#
get_db_status()
{
declare -i subsys_lock=$1
declare -i i=0
declare -i rv=0
declare ora_procname
for procname in $DB_PROCNAMES ; do
ora_procname="ora_${procname}_${ORACLE_SID}"

status $ora_procname
if [ $? -eq 0 ] ; then
# This one's okay; go to the next one.
continue
fi
#
# We're not supposed to be running, and we are,
# in fact, not running...
# XXX only works when monitoring one db process; consider
# extending in future.
#
if [ $subsys_lock -ne 0 ]; then
return 3
fi
for (( i=$RESTART_RETRIES ; i; i-- )) ; do
# this db process is down - stop and
# (re)start all ora_XXXX_$ORACLE_SID processes
initlog -q -n $SCRIPT -s "Restarting Oracle Database..."
stop_db
if [ $? != 0 ] ; then
# stop failed - return 1
return 1
fi
start_db
if [ $? == 0 ] ; then
# ora_XXXX_$ORACLE_SID processes started
# successfully, so break out of the
# stop/start # 'for' loop
break
fi
done
if [ $i -eq 0 ]; then
# stop/start's failed - return 1 (failure)
return 1
fi
done
return 0
}

#
# Get the status of the Oracle listener process
#
get_lsnr_status()
{
declare -i subsys_lock=$1
declare -i rv

status $LSNR_PROCNAME
rv=$?
if [ $rv == 0 ] ; then
return 0 # Listener is running fine
fi

#
# We're not supposed to be running, and we are,
# in fact, not running. Return 3
#
if [ $subsys_lock -ne 0 ]; then
return 3
fi

#
# Listener is NOT running (but should be) - try to restart
#
for (( i=$RESTART_RETRIES ; i; i-- )) ; do

action "Restarting Oracle listener:" lsnrctl start
lsnrctl status >& /dev/null
if [ $? == 0 ] ; then
break # Listener was (re)started and is running fine
fi
done

if [ $i -eq 0 ]; then
# stop/start's failed - return 1 (failure)
return 1
fi

status $LSNR_PROCNAME
if [ $? != 0 ] ; then
return 1 # Problem restarting the Listener
fi
return 0 # Success restarting the Listener
}


#
# usage: get_opmn_proc_status <ias-component> [process-type]
#
# Get the status of a specific OPMN-managed process. If process-type
# is not specified, assume the process-type is the same as the ias-component.
# If the lock-file exists (or no lock file is specified), try to restart
# the given process-type if it is not running.
#
get_opmn_proc_status()
{
declare comp=$1
declare opmntype=$2
declare type_pretty
declare _pid _status

[ -n "$comp" ] || return 1
if [ -z "$opmntype" ]; then
opmntype=$comp
else
type_pretty=" [$opmntype]"
fi

for (( i=$RESTART_RETRIES ; i; i-- )) ; do

_status=`opmnctl status | grep "^$comp " | grep " $opmntype " | cut -d '|' -f3,4 | sed -e 's/ //g' -e 's/|/ /g'`

_pid=`echo $_status | cut -f1 -d' '`
_status=`echo $_status | cut -f2 -d' '`
if [ "${_status}" == "Alive" ] || [ "${_status}" == "Init" ]; then
if [ $i -lt $RESTART_RETRIES ] ; then
echo " $comp$type_pretty restarted"
fi
echo " $comp$type_pretty (pid $_pid) is running..."
break
else
echo " $comp$type_pretty is stopped"

#
# Try to restart it, but don't worry if we fail. OPMN
# is supposed to handle restarting these anyway.
#
# If it's running and you tell OPMN to "start" it,
# you will get an error.
#
# If it's NOT running and you tell OPMN to "restart"
# it, you will also get an error.
#
opmnctl startproc process-type=$opmntype &> /dev/null
fi
done

if [ $i -eq 0 ]; then
# restarts failed - return 1 (failure)
return 1
fi

return 0
}

上一篇:RMAN的备份\恢复过程
下一篇:没有了
最新评论共有 0 位网友发表了评论
发表评论
评论内容:不能超过250字,需审核,请自觉遵守互联网相关政策法规。
用户名: 密码:
匿名?
注册