"""
WebSphere Application Server Performance Mustgather Data Collection Script
A comprehensive tool for collecting performance data from WebSphere Application Server Instances including javacores, heap dumps, system cores, OS statistics, and trace data. It does not work during the JVM startup or stopping JVM and only works with locally servers. Reach out the WebSphere Support for alternative methods.

Maintainer: IBM WebSphere Support, WASSDK team, Piotr Zalewski

Usage:
<WAS_ROOT or PROFILE_ROOT>/bin/wsadmin.(sh|bat) -f wasperf.py server_name1 server_name2

By default, it produces 9 javacores (8 iterations plus final javacore) spaced 30 seconds apart over 4 minutes.
It then gathers the logs(SystemOut.log, native_stderr.log, verboseGC, etc), javacores and server.xml for the given server name(s) and/or PID(s).

For more usage instructions, run the script without arguments.
../wsadmin.(sh|bat) -f wasperf.py

NOTE: 
- You may have to manually gather logs and/or dumps if using non-default filenames.
- --cpu-threshold=<value> option is not suported on AIX.

For maintainer:
- Jython 2.1/WAS 8.5 does not support @property, @staticmethod and @classmethod (introduced in Python 2.2).
"""
########################################################################
# Following values can be modified 
########################################################################
ITERATIONS = 8                  # Number of javcores to produce, including the final one. Default is 8.
DELAY = 30                       # Delay in seconds between javacores. Default is 30.
CPU_THRESHOLD = None            # Trigger data collection when CPU threshold is reached. Default is None.
CHECK_CPU_INTERVAL = 30         # How often the script should check a server CPU usage. Default is 30 seconds.
COLLECTING_OS_DATA = 0          # Default is 0 as false. To turn it on, change to 1.
DISABLE_COLLECTING_HOSTNAME = 0 # Default is 0.
CHECK_HEAP_INTERVAL = 30         # How often the script should check the heap usage for a server. Default is 60 seconds. 
PRINT_HEAP_USAGE = 0            # Prints a heap usage for given server name.
PRINT_CPU_USAGE = 0             # Prints a CPU usage for given server name.
MONITOR_ONLY = 0                # Monitor CPU or heap usages only using --cpu-threshold= or --heap-threshold= options without data collection. Default is 0.
######## For both AIX and Linux
VMSTAT_INTERVAL=5
TPROF_SPAN=60                   # How long top/tprof should collect data. Default is 60.

######## For Linux
TOP_INTERVAL=60                 # How often top data should be taken. Default is 60.
TOP_DASH_H_INTERVAL=5           # How often top dash H data should be taken. Default is 5.
##################### END MODIFICATION #################################


########################################################################
# USAGE
def usage():
 """
 Return usage instructions for the wasperf script.
 Returns:
   String containing complete usage documentation.
 """
 return '''
 Usage:

 wsadmin -f <path to>/wasperf.py server_name1 server_name2

 To include node(s), quote the node name with a colon and separate server names with a space.
 To add another node, simply add a space between the quoted node names.

 wsadmin -f <path to>/wasperf.py "node_name:server_name1 server_name2" "node_name2:server_name3"

 PIDs can be used instead of server names, or mixed together:
 wsadmin -f <path to>/wasperf.py <PID> <PID> server_name
 
 Options:
 --iterations=<value> specifies how many javacores to generate, including the final one. Default is 8.
 --delay=<value> specifies the delay in seconds between javacores or other data(OS stats, etc). Default is 30. 

 --collect-os enables OS data collection. Cannot be combined with --diagnostic-plan=<value> .
 --cpu-threshold=<value> initiates data collection when CPU usage for server PID exceeds the specified threshold percentage. AIX is not supported.
 --check-cpu-interval=<value> specifies how often to check CPU usage in seconds. Default is 30.
 --print-cpu-usage prints the JVM's CPU usage.

 --heap-threshold=<value> initiates data collection when heap usage exceeds the specified threshold. (e.g., 512mb, 2gb).
 --check-heap-interval=<value> specifies how often to check heap usage in seconds. Default is 60. 
 --print-heap-usage prints the server's heap usage.
 
 --match-trace=<value> initiates data collection when matching text is printed in the logs. Cannot be combined with --diagnostic-plan=<value>.

 --runtime-trace=<value> sets a runtime trace string.

 --dump-on-trigger=<heapdump|core> generates a dump when triggered by either --heap-threshold=<value>(mb|gb), --cpu-threshold=<value> or --match-trace=<value>

 --collect_servers collects the remaining additional servers for comparative analysis beyond the triggered server when using conditional options (e.g, --cpu-threshold)
 
 --diagnostic-plan=<value> to use a diagnostic plan. See this link for how to use action scripts: https://www.ibm.com/docs/en/was/9.0.5?topic=plans-diagnostic-plan-action-scripts

 --output-dir=<value> redirects the wasperf_results.<date>.<timestamp> directory/zip to a specified directory. Default is the current working directory. 
 '''

########################################################################
# Import stuff
########################################################################
import sys
import time
import os
import zipfile
import shutil
import java.lang.Runtime as runtime
import java.io.BufferedReader as bufferedReader
import java.io.InputStreamReader as inputStreamReader
import re
import threading

from com.ibm.ws.scripting import ScriptingException
from javax.management import InstanceNotFoundException
from javax.management import JMRuntimeException

########################################################################
# Constants
########################################################################
SCRIPT_VERSION = '2026.01.21'
SECTION = '-' * 80
START_SCRIPT_TIME = time.time()
COLLECTING_CMD_STATS = 1 # To collect an elapsed time for Windows OS stats command, writing to cmd_stats.txt.

# Setting a default script output directory
SCRIPT_OUTPUT_NAME = 'wasperf_results.%s' % time.strftime('%Y%m%d.%H%M%S')
OUTPUT_DIR = os.getcwd()
SCRIPT_OUTPUT_DIR = os.path.join(OUTPUT_DIR, SCRIPT_OUTPUT_NAME)
DISABLE_EXIT = 0
############
# Global no-constant variables
############

def exit_script():
  """
  Exit the wsadmin script gracefully.
  """
  try:
    #if disable_exit == 1:
    if DISABLE_EXIT == 1:
      print 'Calling os._exit(0) is disabled in exit_script().'
    else:
      os._exit(1)
  except SystemExit:
    print('Script exited.')

############################
# Classes
############################
class RequiresServices:
  def _check_requirements(self,require_config=1, require_output_manager=1):
    if require_config and config is None:
      raise RuntimeError("Config is required")
    if require_output_manager and server_manager.output_manager is None:
      raise RuntimeError("OutputManager is required")

class RequiresServerManager:
  '''
  Mixin to verify that the global server_manager and its output_manager are initialized before use.
  '''
  def _check_server_manager_ready(self):
    if server_manager is None:
      raise RuntimeError('server_manager has not been created.')
    if server_manager.output_manager is None:
      raise RuntimeError('server_manager exists but has no OutputManager')

class WASHelper:
  '''
  Utility functions for WebSphere administrative operations.
  Provides stateless methods using AdminControl, AdminTask, and AdminConfig.
  '''
  _output_manager = None
  def set_output_manager(self,output_manager):
    '''Initialize output manager for error logging.'''
    self._output_manager = output_manager

  def query_mbean(self,query_string):
    '''Query MBean by name. Returns ObjectName string or None.'''
    try:
      result = AdminControl.queryNames(query_string)
      if result:
        return str(result)
      return None
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error: Failed to query MBean: %s' % str(e))
      return None
      
  def get_complete_object_name(self,object_name):
    """
    Get an object name of the given server.
    Args:
      server: server dicionary with cell, node, name
    Returns:
      Object name if successful, None if server appears unreachable
    """
    if not object_name:
      if self._output_manager:
        self._output_manager.log('Error in get_complete_object_name(): object_name is required')
      return None
    try:
      return AdminControl.completeObjectName(object_name) or None
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error in get_complete_object_name(): Failed to get a complete object name: %s' % str(e))
      return None

  def set_attribute(self,object_name, attribute_name, attribute_value):
    '''
    Custom wrapper for AdminControl.setAttribute() with error handling.
    Parameters:
    - object_name: Required - Mbean object name
    - attribute_name: Required - Attribute name to set
    - attribute_value: Rquired - new attribute value
    '''
    if not object_name or not attribute_name or not attribute_value:
      if self._output_manager:
        self._output_manager.log('Error in set_attribute(): object name, attribute name and attribute value are required.')
      return None
    try:
      return AdminControl.setAttribute(object_name, attribute_name, attribute_value)
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error in set_attribute(): Failed to set attribute %s with %s for %s: %s' % (attribute_name, attribute_value, object_name, str(e)))
      return None

  def get_attribute_value(self,mbean_name,attribute_name):
    '''
    Get an attribute value from an MBean.
    Args:
      mbean_name: The object name of the MBean of interest.
      attribute_name: The name of the attribute to query.
    Returns:
      The value of the specified attribute or None if the attribute cannot be retrieved.
    '''
    if not mbean_name:
      if self._output_manager:
        self._output_manager.log('Error in get_attribute_value(): Mbean name is required')
      return None
    if not attribute_name:
      if self._output_manager:
        self._output_manager.log('Error in get_attribute_value(): Attribute name is required')
      return None

    try:
      return AdminControl.getAttribute(mbean_name,attribute_name) or None
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error in get_attribute_value(): Failed to get attribute "%s" from MBean "%s": %s' % (attribute_name, mbean_name, str(e)))
      return None

  # 20260115 Added
  def get_attributes(self,obj_name):
    if not obj_name:
      if self._output_manager:
        self._output_manager.log('Error in get_attributes(): Object name is required')
      return None
    try:
      return AdminControl.getAttributes(obj_name) or None
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error in get_attributes(): Failed to get attributes for "%s": %s' % (obj_name, str(e)))
      return None

  def get_generic_JVM_arg_value(self,admin_config_id,JVM_arg_name):
    """
    Retrieve a specific JVM argument value from WebSphere configuration.
    Args:
      admin_config_id: WebSphere admin configuration ID
      jvm_arg_name: Name of the JVM argument to retrieve
    Returns:
      String value of the JVM argument, or None if not found
    Eample:
      value = was_helper.get_generic_JVM_arg_value(config_id, '-Xdump:directory')
    """
    # 20260113
    if not admin_config_id:
      if self._output_manager:
        self._output_manager.log('Error in get_generic_JVM_arg_value(): Admin config id is required')
      return None
    if not JVM_arg_name:
      if self._output_manager:
        self._output_manager.log('Error in get_generic_JVM_arg_value(): JVM argument name is required')
      return None

    try:
      conf_list = AdminConfig.list('JavaVirtualMachine', admin_config_id)
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error: Failed to list "JavaVirtualMachine" from "%s": %s' % ( admin_config_id, str(e)))
      return None

    try:
      args = AdminConfig.showAttribute(conf_list, 'genericJvmArguments')
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error: Failed to show attribute "%s" from "genericJvmArguments": %s' % (conf_list, str(e)))
      return None

    if args:
      for arg in args.split(' '):
        arg_arr = arg.split('=')
        if JVM_arg_name == arg_arr[0]:
          return arg_arr[1]
    return None

  def get_JVM_env_var_value(self,admin_config_id,env_var_name):
    """
    Retrieve a JVM environment variable value from WebSphere configuration.
    Args:
      admin_config_id: WebSphere admin configuration ID
      env_var_name: Name of the environment variable to retrieve
    Returns:
      String value of the environment variable, or None if not found
    Example
      ibm_javacoredir = was_helper.get_JVM_env_var_value(config_id, "IBM_JAVACOREDIR")
    """
    if not admin_config_id:
      if self._output_manager:
        self._output_manager.log('Error in get_other_conf_value(): Admin config id is required')
      return None
    if not env_var_name:
      if self._output_manager:
        self._output_manager.log('Error in get_other_conf_value(): Environment variable name is required')
      return None

    try:
      java_process_def = was_helper.get_id(admin_config_id+'JavaProcessDef:/')
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error: Failed to get id "%s": %s' % (admin_config_id+'JavaProcessDef:/', str(e)))
      return None

    try:
      env_entries = AdminConfig.showAttribute(java_process_def, 'environment')[1:-1].split()
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error: Failed to show attribute "%s" from "environment": %s' % (java_process_def, str(e)))
      return None
    
    for env_entry in env_entries:
      name = AdminConfig.showAttribute(env_entry, 'name')
      if name == env_var_name:
        return AdminConfig.showAttribute(env_entry, 'value')
    return None

  def get_other_conf_value(self,admin_config_id,name):
    """
    Retrieve other configuration values by symbolic name.
    Args:
      admin_config_id: WebSphere admin configuration ID
      name: Symbolic name of the configuration variable
    Returns:
      String value of the configuration variable, or None if not found
    Example:
      user_install_root = was_helper.get_other_conf_value(admin_config_id, 'USER_INSTALL_ROOT')
    """
    if not admin_config_id:
      if self._output_manager:
        self._output_manager.log('Error in get_other_conf_value(): Admin config id is required')
      return None
    if not name:
      if self._output_manager:
        self._output_manager.log('Error in get_other_conf_value(): Configuration name is required')
      return None

    try:
      variables = self.get_id(admin_config_id)
      if variables:
        variables = variables.splitlines()
      else:
        return None
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error in get_other_conf_value(): Failed to get id "%s": %s' % (admin_config_id, str(e)))
      return None

    for var in variables :
      try:
        local_name = AdminConfig.showAttribute(var, 'symbolicName')
      except java.lang.Exception, e:
        if self._output_manager:
          self._output_manager.log('Error in get_other_conf_value(): Failed to show attribute "%s": %s' % (var, str(e)))
        return None
      if ( re.match(local_name, name) ) :
        return AdminConfig.showAttribute(var, 'value')
    return None
  def invoke(self,object_name, operation, arguments=None):
    '''
    Custom invoke wrapper for AdminControl.invoke()
    Parameters:
    - object_name: Required - MBean object name
    - operation: Required - opreation to invoke
    - *args: Optional - variable number of arguments for the eopration
    '''
    try:
      if arguments:
        return AdminControl.invoke(object_name, operation, arguments)
      else:
        return AdminControl.invoke(object_name, operation)
    except java.lang.Exception, e:
      if self._output_manager:
        if arguments is not None:
          self._output_manager.log('Error in invoke(): Failed to invoke %s for "%s" with arguments %s: %s' % (operation, object_name, arguments, str(e)))
        else:
          self._output_manager.log('Error in invoke(): Failed to invoke operation %s for "%s": %s' % (operation, object_name, str(e)))
      return None

  def get_id(self,server_id):
    '''
    Parameter:
    - server_id: The configuration ID of the server of interest
    '''
    if not server_id:
      if self._output_manager:
        self._output_manager.log('Error in get_id(): Configuration ID is required')
      return None
    try:
      return AdminConfig.getid(server_id) or None
    except java.lang.Exception, e:
      if self._output_manager:
        self._output_manager.log('Error in get_id(): Failed to get ID for %s' % (server_id, str(e)))
      return None

was_helper = WASHelper()

class Config:
  def __init__(self):
    # The usual performance configurations
    self.iterations = ITERATIONS
    self.delay = DELAY
    self.output_dir = OUTPUT_DIR
    self.script_output_dir = SCRIPT_OUTPUT_DIR

    # OS collection configuration
    self.collecting_os_data = COLLECTING_OS_DATA
    self.cpu_threshold = CPU_THRESHOLD
    self.check_cpu_interval = CHECK_CPU_INTERVAL

    # Diagnostic Plan
    self.diagnostic_plan_parameters = ''
    self.print_diagnostic_plan_status = 0
    self.contains_javacore = 0

    # Produce dump when threshold is triggered (--heap-threshold=<value>mb|gb or --cpu-threshold=<value>)
    self.trigger_dump_type= None

    # Heap configurations
    self.heap_threshold = '' 
    self.check_heap_interval = CHECK_HEAP_INTERVAL
    self.print_heap_usage = PRINT_HEAP_USAGE
    self.print_cpu_usage = PRINT_CPU_USAGE
    #self.heap_trigger_dump_type = None

    # WAS configuration
    self.runtime_trace = ''
    self.match_trace = ''
    self.trace_file_size = '20' #TRACE_FILE_SIZE or '20'
    self.trace_max_files = '5' #TRACE_MAX_FILES or '5'

    # Other configurations
    self.monitor_only = MONITOR_ONLY
    self.disable_collecting_hostname = DISABLE_COLLECTING_HOSTNAME
    self.current_os = self.get_current_os()
    self.disable_exit = 1

    self.collect_servers = None 


  # Verify option inputs.
  def validate(self):
    if self.delay <= 0:
      self.delay = 1
    if self.iterations <= 0:
      self.iterations = 1
    if self.cpu_threshold and self.match_trace:
      print('The use of both --cpu-threshold=<value> and --match-trace=<value> are not accepted. Exiting the script.')
      self.cpu_threshold = None
      self.match_trace = ''
      exit_script()
    if self.collecting_os_data and self.diagnostic_plan_parameters:
      print('The use of both --collect-os (or COLLECTING_OS_DATA = 1) and --diagnostic-plan=<value> are not accepted. Exiting the script.')
      exit_script()
    if re.search("SET_TRACESPEC", self.diagnostic_plan_parameters) and self.runtime_trace:
      print('The use of both --diagnostic-plan=<value> containing "SET_TRACESPEC" and --runtime-trace=<value> are not accepted. Exiting the script.')
      exit_script()
    if re.search("MATCH=TRACE", self.diagnostic_plan_parameters) and self.match_trace:
      print('The use of both --diagnostic-plan=<value> containing "MATCH=TRACE" and --match-trace=<value> are not accepted. Exiting the script.')
      exit_script()
    if not self.heap_threshold and self.print_heap_usage:
      print("--print-heap-usage is being ignored as no --heap-threshold=<value>mb/gb is used.")

  def get_current_os(self):
    os_name = java.lang.System.getProperty("os.name")
    if re.search("windows",os_name.lower()):
      return 'Windows'
    elif re.search("linux", os_name.lower()):
      return 'Linux'
    elif re.search("aix", os_name.lower()):
      return "AIX"
    else:
      print("Unrecognized OS: %s" % os_name)
      os._exit(0)

class OutputManager:
  '''
  Manages output directory, file operations, logging, and cleanup.
  Handles creating output directories, writing files, logging messages, locating files, copying files with size limits and final packaing.
  '''
  # NOTE: Limitations
  # Jython 2.1 (WAS 8.5) lacks ZIP64 support - cannot zip output data larger than 2GB.

  LIMIT_COPY_FILE_SIZE = 52428800 # 50mb  #104857600 = 100MB
  def __init__(self,config):
    self.show_class_name = 0
    self.config = config
    self.manually_locate_files = []

    # WAS 8.5/Jython 2.1 does not have inspect module - use this instead.
    if self.show_class_name == 1:
      try:
        import inspect
        self.inspect = inspect
      except ImportError:
        self.show_class_name = 0
  
  def create_output_dir(self):
    try:
      os.makedirs(self.config.script_output_dir)
    except OSError , e:
      print('Unable writing to %s Reason:\n%s' % (self.config.script_output_dir,e))
      exit_script()

  def write_to_file(self,sub_dir,file_name,output):
    # Split output into an array of lines and remove any empty lines.
    output_lines = output.splitlines()
    output_lines = [line for line in output_lines if line.strip()]
    # Join the remaining lines back into a string
    clean_output = '\n'.join(output_lines)
    outfile = None

    #print os.path.join(self.config.script_output_dir,sub_dir,file_name)
    #return 

    try:
      #outfile = open(self.config.script_output_dir +'/'+ file_name, 'a')
      full_path = os.path.join(self.config.script_output_dir,sub_dir,file_name)
      dir_path = os.path.join(self.config.script_output_dir,sub_dir)
      if sub_dir and not os.path.exists(dir_path):
        os.makedirs(dir_path)

      outfile = open(full_path, 'a')

      outfile.write(clean_output.strip())
      outfile.write('\n')
      outfile.close()
    except IOError, e:
      print('Unable to write %s in %s. Reason: \n%s' % (file_name, self.config.script_output_dir, e))
      exit_script()
    if outfile:
      outfile.close()

  def log(self,msg):
    """Print timestamped message to screen.out"""
    formatted_class_name = ''
    if self.show_class_name:
      frame = self.inspect.currentframe().f_back
      class_name = self._get_caller_class_name(frame)
      if class_name:
        formatted_class_name =  self._format_class_name(class_name) + ': '
    # 20250908 Fixed alignment by padding class names to consitent width
    timestamp = time.strftime('%Y-%m-%d %H:%M:%S')
    if formatted_class_name:
      log = '%s\t%-20s\t%s' %(timestamp, formatted_class_name, msg)
    else:
      log = '%s\t%s' %(timestamp, msg)
    print(log)
    self.write_to_file('','screen.out',log)

  def _format_class_name(self, class_name):
    """ Convert ClassName to Class Name """
    formatted = re.sub(r'([a-z])([A-Z])',r'\1 \2', class_name)
    return re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', formatted)
  
  def _get_caller_class_name(self, frame):
    locals_dict = frame.f_locals
    if 'self' in locals_dict:
      return locals_dict['self'].__class__.__name__
    if 'cls' in locals_dict:
      return locals_dict['cls'].__name__

    # 20250908 Handle functions like main()
    function_name = frame.f_code.co_name
    if function_name == 'main':
      return 'Main'
    elif function_name != '<module>':
      return 'Custom'
    else:
      print(frame.f_code.co_name)
    return None

  # NOTE: Jython 2.1 lacks os.path.relpath() (available in Jython 2.7/WAS 9) - using this workaround.
  def add_all_to_zip_v3(self,zip_file, path, base_path_len=None):
    if base_path_len is None:
      base_path_len = len(path) + 1
    for item in os.listdir(path):
      full_path = os.path.join(path,item)
      archive_name = full_path[base_path_len:]      
      if os.path.isfile(full_path):
        zip_file.write(full_path, archive_name)
      elif os.path.isdir(full_path):
        self.add_all_to_zip_v3(zip_file,full_path, base_path_len)
  
  def zip_and_remove_output_dir(self):
    """
    Zip and remove SCRIPT_OUTPUT_DIR.
    """
    self.log('Preparing for packaging and cleanup...')
    zip_file = None
    try:
      # NOTE: allowZip64=True works in Jython 2.7 but not supported in Jython 2.1.
      zip_file = zipfile.ZipFile(self.config.script_output_dir + '.zip', 'w', zipfile.ZIP_DEFLATED)
      self.add_all_to_zip_v3(zip_file,self.config.script_output_dir)
    finally:
      if zip_file:
        zip_file.close()
    try:
      shutil.rmtree(self.config.script_output_dir)
    except OSError, e:
      self.log('Error removing the script output directory: %s' % e)

  def locate_files(self, directory, prefixes):
    """
    Returns an array of the expected default file paths. If it is partial expected logs, return partial data.
    """
    try:
      all_files = os.listdir(directory)
      matching_files = []
      for file_name in all_files:
        for prefix in prefixes:
          if file_name.startswith(prefix):
            full_path = os.path.join(directory,file_name)
            matching_files.append(full_path)
            break 
      if matching_files:
        return matching_files
      else:
        self.log(" - Unable to locate the default log file paths.")
        return []
    # Directory not found or inaccessible
    except OSError:
      return []

  def check_file_size_within_limit(self, file_paths):
    """Check and filter out files that exceed the copy limit.
    Args: file_paths: List of file paths to check.
    Returns: List of eligible paths (may be partial if some files exceed limit)
    """
    eligible_paths = []
    for file_path in file_paths:
      try:
        file_size = os.path.getsize(file_path)
        if file_size >= self.LIMIT_COPY_FILE_SIZE:
          limit_size_mb = self.LIMIT_COPY_FILE_SIZE / (1024 * 1024) 
          self.log(' - %s skipped (size >= %sMB)' % (os.path.basename(file_path),str(limit_size_mb)))
          self.manually_locate_files.append(file_path)
        else:
          eligible_paths.append(file_path)
      except OSError,e :
        self.log(' - File %s not found. %s' % (file_path,e))
    return eligible_paths
      
  def copy_files_to_output_dir(self,file_paths,sub_dir=''):
    PRINT_COPIED_LOG_PATH = 0
    # for f in file_paths:
    #   print f
    target_dir = os.path.join(SCRIPT_OUTPUT_DIR,sub_dir)
    if not os.path.exists(target_dir): # and os.path.isdir(target_dir)
      os.makedirs(target_dir)
    
    file_paths = self.check_file_size_within_limit(file_paths)
    if file_paths:
      for file_path in file_paths:
        if os.path.isfile(file_path):
          file_name = os.path.basename(file_path)
          target_path = os.path.join(target_dir, file_name)
          if PRINT_COPIED_LOG_PATH:
            self.log(' - '+target_path)
          shutil.copy2(file_path, target_path)
    else:
      self.log('Error: Not able to gather files. file_paths is empty.')

  def print_end_message(self):
    msg = 'Script finished.'
    stamp = time.strftime('%Y-%m-%d %H:%M:%S') + '\t'
    print('%s%s' % (stamp,msg))
    files = []
    script_output_path = self.config.script_output_dir+'.zip'

    files.append(script_output_path)
    files.extend(self.manually_locate_files)
    list_data = ''.join(['\n\t* ' + f for f in files])
    print('\n\tTo share with IBM support, zip and upload the following data:%s\n'%(list_data))

class WASCollector:
  '''Collects WebSphere Application Server Configuration and log files.
  Gather server XML files and log files for diagnostic analysis.'''
  def __init__(self,config,output_manager):
    self.config = config
    self.output_manager = output_manager

  def gather_logs(self,server):
    expected_files = ['SystemOut','native_stderr','verbosegc','native_stdout','startServer','SystemErr',]
    if self.config.match_trace or self.config.diagnostic_plan_parameters:
      expected_files.append('diagPlanSummary.log')
    if self.config.runtime_trace:
      expected_files.append('trace')

    cell,node,server_name = server['cell'], server['node'], server['name']
    user_install_root_path = was_helper.get_other_conf_value('/Node:'+node+'/VariableMap:/VariableSubstitutionEntry:/', 'USER_INSTALL_ROOT')

    server_logs_path = os.path.join(user_install_root_path,'logs',server_name)# user_install_root_path+'/logs/'+server_name+'/'

    file_paths = self.output_manager.locate_files(server_logs_path, expected_files)
    if file_paths:
      self.output_manager.log(' - logs(SystemOut.log, native_stderr.log, etc)')
      self.output_manager.copy_files_to_output_dir(file_paths,os.path.join(cell+'_'+node+'_'+server_name,'logs'))
    else:
      self.output_manager.log(" - Can't find the default logs path for %s." % server_manager.format_server(server))

  def gather_server_xml_file(self, server):
    cell_name,node_name,server_name = server['cell'],server['node'],server['name']
    server_name_dir = cell_name+'_'+node_name+'_'+server_name
    user_install_root_path = was_helper.get_other_conf_value('/Cell:'+cell_name+'/Node:'+node_name+'/VariableMap:/VariableSubstitutionEntry:/', 'USER_INSTALL_ROOT')
    if user_install_root_path:
      self.output_manager.log(' - server.xml')
      server_xml_path =  os.path.join(user_install_root_path,'config','cells',cell_name,'nodes',node_name,'servers',server_name,'server.xml')
      self.output_manager.copy_files_to_output_dir([server_xml_path],os.path.join(server_name_dir,'config_files'))
    else:
      self.output_manager.log(' - Unable to locate server.xml')

class DumpCollector:
  '''Collects thread, heapdup and system dumps.

  Invokes dump generation, locate dump files, and gather them for
  diagnostic analysis. 
  '''
  THREADS = 'dumpThreads'
  HEAP = 'generateHeapDump'
  SYSTEM = 'generateSystemDump'
  VALID_DUMP_TYPES = [THREADS,HEAP,SYSTEM]
  
  def __init__(self,config,output_manager):
    self.config = config
    self.output_manager = output_manager
    
  ########################################################################
  #   Zip/remove output files. Locate/gather javacores. Print end message. 
  ########################################################################
  def locate_dump(self,directory,dump_type,pid):
    '''Locate dump files generated after script start time.

    Only detects WebSphere default dump filename patterns.

    Arg:
      Directory: Directory to search for dump files.
      dump_type: Type of dump (javacore, heapdump, core).
      pid: Process ID to match in filename.

    Returns:
      List of dump file paths, or None if no matches found.
    '''
    start_time = time.strftime('%Y%m%d.%H%M%S', time.localtime(START_SCRIPT_TIME))
    if dump_type == 'javacore':
      file_pattern = re.compile(r'javacore\.(\d{8})\.(\d{6})\.'+pid+'..*\.txt')
    elif dump_type == 'heapdump':
      file_pattern = re.compile(r'heapdump\.(\d{8})\.(\d{6})\.'+pid+'..*\.phd')
    elif dump_type == 'core':
      file_pattern = re.compile(r'core\.(\d{8})\.(\d{6})\.'+pid+'..*\.dmp')
    else: #elif dump_type != None:
      self.output_manager.log('Unrecognized dumptype: %s' % dump_type)
      return None

    matching_dumps = []
    if os.path.exists(directory) and os.path.isdir(directory):
      for file_name in os.listdir(directory):
        file_path = os.path.join(directory, file_name)
        if os.path.isfile(file_path):
          if file_pattern.match(file_name):
            #heapdump.20250827.150455.9148.0050.phd
            #javacore.20250908.180330.2840.0036.txt
            file_stamp = file_name[9:24]
            if dump_type == 'core':
              #core.20250929.122806.7804.0032.dmp
              file_stamp = file_name[5:20]
            if start_time <= file_stamp:
              matching_dumps.append(file_path)
    if len(matching_dumps) > 0:
      return matching_dumps
    return None
    

  def gather_dumps(self, dump_type, server):
    try:
      AdminConfig.list('Cell')
    except java.lang.Exception, e:
      self.output_manager.log(' - Unable to get a configuration: %s' % e)
      self.output_manager.log(' - Skipping locating dumps.')
      return None
    cell_name, node_name, server_name, pid = server['cell'],server['node'],server['name'],server['pid']
    dumps_location_arr = None

    # Default dump directory
    default_dump_dir = was_helper.get_other_conf_value('/Cell:'+cell_name+'/Node:'+node_name+'/VariableMap:/VariableSubstitutionEntry:/', 'USER_INSTALL_ROOT')
    if default_dump_dir:
      dumps_location_arr = self.locate_dump(default_dump_dir,dump_type, pid)
    
    # -Xdump directory
    adm_conf = was_helper.get_id('/Cell:'+cell_name+'/Node:'+node_name+'/Server:'+server_name+'/')
    xdump_dir = was_helper.get_generic_JVM_arg_value(adm_conf, '-Xdump:directory')
    if xdump_dir and not dumps_location_arr:
      dumps_location_arr = self.locate_dump(os.path.normpath(xdump_dir),dump_type, pid)

    # IBM_JAVACOREDIR, IBM_HEAPDUMPDIR and IBM_COREDIR locations
    ibm_dumpdir_value = None
    if not dumps_location_arr:
      if dump_type == 'javacore':
        ibm_dumpdir_value = was_helper.get_JVM_env_var_value('/Cell:'+cell_name+'/Node:'+node_name+'/Server:'+server_name+'/', 'IBM_JAVACOREDIR')
      elif dump_type == 'heapdump':
        ibm_dumpdir_value = was_helper.get_JVM_env_var_value('/Cell:'+cell_name+'/Node:'+node_name+'/Server:'+server_name+'/', 'IBM_HEAPDUMPDIR')
      elif dump_type == 'core':
        ibm_dumpdir_value = was_helper.get_JVM_env_var_value('/Cell:'+cell_name+'/Node:'+node_name+'/Server:'+server_name+'/', 'IBM_COREDIR')
      if ibm_dumpdir_value and not dumps_location_arr:
        dumps_location_arr = self.locate_dump(os.path.normpath(ibm_dumpdir_value),dump_type, pid)


    if dumps_location_arr and dump_type in ('heapdump','core'):
      self.output_manager.manually_locate_files.extend(dumps_location_arr)
    elif dumps_location_arr:
      self.output_manager.copy_files_to_output_dir(dumps_location_arr,os.path.join(cell_name+'_'+node_name+'_'+server_name,'dumps'))
      self.output_manager.log(' - %ss' % dump_type)
    else:
      self.output_manager.log(' - Unable to locate %ss' % dump_type)
      return None

  ##################################################
  # Invoke dump_type to produce a dump for a server.
  ##################################################
  def invoke_dump(self,dump_type,server):
    if server.get('status') == ServerManager.STATUS.UNREACHABLE:
      return
    if dump_type in self.VALID_DUMP_TYPES:
      self.output_manager.log('Invoking %s: %s' % (dump_type,server_manager.format_server(server)))
      cell,node,server_name = server['cell'], server['node'],server['name']
      start_time = time.time()
      jvm = was_helper.get_complete_object_name('type=JVM,cell=%s,node=%s,process=%s,*' % (cell,node,server_name))
      if jvm:
        try:
          AdminControl.invoke(jvm, dump_type)
          self.output_manager.log('Invoked %s: %s' % (dump_type,server_manager.format_server(server)))
        # NOTE:
        # Tried 'java.lang.Exception, e' but does not include exception message - using bare except instead.
        # Example: com.ibm.websphere.management.exception.HeapDumpOnDiskLimitReachedException: There are already 10 heap dumps on disk
        except: 
          exec_type, exc_value, exc_traceback = sys.exc_info()
          if hasattr(exc_value, 'getCause') and exc_value.getCause():
            try:
              root_cause = exc_value.getCause() 
              self.output_manager.log('Exception: %s' % root_cause)
            except:
              self.output_manager.log('Exception: %s' % exc_value)
          # traceback.format_exception(exc_info[0], exc_info[1], exc_info[2])
          return None
      else:
        self.output_manager.log('Unable to reach %s' % (server_manager.format_server(server)))
        server_manager.set_status(server,ServerManager.STATUS.UNREACHABLE)
      # elapsed = time.time() - start_time
      # self.output_manager.log('Debug: Elapsed time:  %.2f seconds' % elapsed)
    else:
      self.output_manager.log('Error: Unrecognized %s passed to invoke_dump(dump_type,server)' % dump_type)

  def threading_generate_javacore(self,server):
    for i in range(0,self.config.iterations+1):
      if server.get('status') == ServerManager.STATUS.UNREACHABLE:
        break
      start_time = time.time()
      self.invoke_dump(self.THREADS,server)
      elapsed = time.time() - start_time
      if i < self.config.iterations:
        adjusted_delay = max(1, self.config.delay - elapsed)
        time.sleep(adjusted_delay)

class DiagnosticPlanManager(RequiresServerManager):
  def __init__(self,config,output_manager):
    #self.config = config
    self.print_diagnostic_plan_status = config.print_diagnostic_plan_status
    self.output_manager = output_manager
    self._check_server_manager_ready()

    if config.match_trace is None:
      self.match_trace = ''
    else:
      self.match_trace = config.match_trace

    if config.diagnostic_plan_parameters is None:
      self.diagnostic_plan_parameters = ''
    else:
      self.diagnostic_plan_parameters = config.diagnostic_plan_parameters

    self.match_trace_count = 0
    self.first_match_trace_only = 0
    self.action_count = 0

    self.contains_javacore = self.diagnostic_plan_parameters.find('JAVACORE') != -1
    self.contains_heapdump = self.diagnostic_plan_parameters.find('HEAPDUMP') != -1
    self.contains_systemcore = self.diagnostic_plan_parameters.find('SYSTEMCORE') != -1

    #print('contains_javacore: %i\ncontains_heapdump: %i\ncontains_system: %i'%(self.contains_javacore,self.contains_heapdump,self.contains_systemcore))

    if self.match_trace and self.diagnostic_plan_parameters:
      raise ValueError('match_trace requires diagnostic_plan_parameters to be empty.')
    if self.match_trace and not self.diagnostic_plan_parameters:
      self.diagnostic_plan_parameters = 'MATCH=TRACE:' + self.match_trace
      self.first_match_trace_only = 1
    
    if self.diagnostic_plan_parameters:
      self.match_trace_count = len(re.findall(r'MATCH=TRACE:([^,:]*)(?:,|$)',self.diagnostic_plan_parameters))
      if self.match_trace_count == 1 and self.diagnostic_plan_parameters.startswith('MATCH=TRACE:'):
        self.first_match_trace_only = 1
        self.match_trace = self.get_match_trace_from_diagnostic_plan_parameters()
      self.action_count = self.diagnostic_plan_parameters.count(',') + 1


  ##########################################################################################################
  # Known bug: Diagnostic plan may cancel after first javacore, returning TRAS1102I completion message.
  # Fixed in APAR PH60295. Some clients experienced this even without -Xdump:directory option.
  # NOTE: Can be ignored if only one javacore is needed.
  ##########################################################################################################

  def send_diagnostic_plan(self):
    if not server_manager.servers:
      self.output_manager.log('Error: server_manager.servers is empty in send_diagnostic_plan().')
      return
    
    exception_occured = 0
    self.output_manager.log('Sending the diagnostic plan actions to the following servers:')
    for server in server_manager.servers:
      self.output_manager.log('- %s' % server_manager.format_server(server))
    for server in server_manager.servers[:]:
      diag_plan_name = was_helper.query_mbean('WebSphere:type=DiagPlanManager,cell='+server['cell']+',node='+server['node']+',process='+server['name']+',*')
      if diag_plan_name:
        diag_plan_obj = AdminControl.makeObjectName(diag_plan_name)
        try:
          AdminControl.invoke_jmx(diag_plan_obj, 'setDiagPlan', [self.diagnostic_plan_parameters], ['java.lang.String'])
          server['diag_plan_obj'] = diag_plan_obj
        except java.lang.Exception, e:
          exception_occured = 1
          # NOTE: java.lang.Exception does not provide exception details, e.g.:
          # DiagPlanParserException: TRAS1150E: An unknown action SET_TRACESPEC:* is defined at position 1: ">SET_TRACESPEC:*=info...".
          # Using bare except to capture full message.
          exec_type, exc_value, exc_traceback = sys.exc_info()
          if hasattr(exc_value, 'getCause') and exc_value.getCause():
            try:
              root_cause = exc_value.getCause() 
              self.output_manager.log('- %s encountered %s' % (server_manager.format_server(server),root_cause))
            except:
              self.output_manager.log('Exception: %s' % exc_value)
      else:
        if was_helper.get_complete_object_name('type=Server,cell='+server['cell']+',node='+server['node']+',process='+server['name']+',*'):
          self.output_manager.log(''+ server_manager.format_server(server) + ' is still running but diagnostic plan states otherwise.') 
        else:
          self.output_manager.log('%s is unreachable.' % server_manager.format_server(server))
          server_manager.set_status(server,ServerManager.STATUS.UNREACHABLE)

    if not exception_occured:
      if server_manager.servers:
        self.output_manager.log('Sent actions to servers, now waiting for diagnostic plan event.')
      if self.contains_systemcore or self.contains_heapdump:
        self.output_manager.log('NOTE: Dump generation time varies from seconds to minutes.')
    

  def clear_diagnostic_plan_v2(self,exclude_server=None):
    if len(server_manager.servers) > 0:
      self.output_manager.log('Clearing actions on remaining servers.')
      for server in server_manager.servers[:]:
        if server.get('diag_plan_obj'):
          if exclude_server is not None and server is exclude_server:
            continue
          AdminControl.invoke_jmx(server['diag_plan_obj'], 'clearDiagPlan', [], [])
      self.output_manager.log('Actions cleared.')

  def get_match_trace_from_diagnostic_plan_parameters(self):
    return re.search(r'MATCH=TRACE:([^,:]*)(?:,|$)', self.diagnostic_plan_parameters).group(1)

  def extract_current_action_and_index(self,msg):
    '''
    Extract the current action and its index from diagnostic plan message.
    Return: current_action and current_index
    '''
    lines = msg.split('\n')
    actions = []
    in_block = 0
    current_index = -1
    current_action = None

    for line in lines:
      line = line.strip()

      # Skip empty lines and informational header lines
      if not line or line.startswith('The '):
        continue
      # Handle MATCH block start and end
      if '[' in line:
        if line.startswith('*'):
          current_index = len(actions)
          current_action = line[1:].split(':')[0]
          return current_action, current_index
        actions.append(line)
        in_block = 1
        continue
      if ']' in line:
        in_block = 0
        continue
      # Skip lines inside blocks of MATCH action
      if in_block:
        continue
      actions.append(line)
      if line.startswith('*'):
        current_index = len(actions)
        current_action = line[1:].split(':')[0]
        return current_action, current_index
    return None, None

  def check_diagnostic_plan_status(self):
    if not server_manager.servers:
      self.output_manager.log("Error: server_manager.servers is empty in check_diagnostic_plan_status()")
      return None


    need_to_break = 0
    count_done = 0
    count = 0
    while not need_to_break:
      filtered_servers = filter(lambda s: s.get('diag_plan_obj'), server_manager.servers)
      if not filtered_servers:
        self.output_manager.log("Error: server_manager.servers not found with 'diag_plan_obj' key in check_diagnostic_plan_status().")
        break

      for server in filter(lambda s: s.get('diag_plan_obj'), server_manager.servers[:]):
        try:
          msg = AdminControl.invoke_jmx(server['diag_plan_obj'],'getDiagPlan',[],[])
        except java.lang.Exception, e:
          self.output_manager.log('Unable to reach %s: %s' % (server_manager.format_server(server), e))
          del server['diag_plan_obj']
          server_manager.set_status(server,ServerManager.STATUS.UNREACHABLE)
          if len(server_manager.servers) > 0:
            self.output_manager.log('Continaully checking the running diagnostic plan on the remaining servers.')
          if len(server_manager.servers) == 0:
            count_done += 1
            need_to_break = 1
            break
          continue
        # TRAS1102I: No diagnostic plan is set or running.
        if re.search('TRAS1102I', msg):
          del server['diag_plan_obj']
                    
          if self.first_match_trace_only and self.action_count == 1:
            if len(server_manager.servers) <= 1:
              self.output_manager.log('"%s" detected on %s.' % (self.match_trace, server_manager.format_server(server)))
            else:
              self.output_manager.log('"%s" detected on %s. Clearing diagnostic plan on remaining servers.' % ((self.match_trace),server_manager.format_server(server) ))
              self.clear_diagnostic_plan_v2(server)
            server_manager.set_status(server,ServerManager.STATUS.TRIGGERED)
            need_to_break = 1
            break
          # Any actions including more than one match trace action.
          else:
            # NOTE: Prevents script from long running and waiting on servers that still have running diagnostic plan waiting to be triggered - clear them when one server completes its diagnostic plan.
            self.output_manager.log('Completed on %s.' % server_manager.format_server(server))
            if self.match_trace_count > 0 and len(server_manager.servers) > 1:
              server_manager.remove_servers_except_matched(server)
              self.clear_diagnostic_plan_v2(server)
              need_to_break = 1
              break
          # Preventing loop forever
          count_done += 1
          if count_done >= len(server_manager.servers):
            need_to_break = 1
            break
        # The diagnostic plan is still running.
        else:
          # Tracking server's diag plan actions
          current_action, current_index = self.extract_current_action_and_index(msg)
          if self.first_match_trace_only and current_index > 0 and not server.get('match_found'):
            self.output_manager.log('"%s" detected on %s, running the remaining actions.' % (self.match_trace, server_manager.format_server(server)))
            server['match_found'] = 1

          if self.print_diagnostic_plan_status == 1:
            self.output_manager.log("%s's current action: %s" % (server_manager.format_server(server),current_action))
            self.output_manager.log("%s's current action index: %s" % (server_manager.format_server(server),current_index))
      time.sleep(5)

  #####################

class OSCollector(RequiresServerManager):
  '''
  Collect OS stats if --collect-os or --cpu-threshold=<value> is used, or 
  COLLECTING_OS_DATA is true. 
  --monitor-only can be used if no data collection is needed.
  '''
  OS_DATA_DIR = 'OS_data'
  def __init__(self, config,output_manager):
    self.config = config
    self.output_manager = output_manager
    self._check_server_manager_ready()
  
  ######## From previous winperf.py ########
  def run_cmd(self,cmd):
    cmd_runtime = runtime.getRuntime().exec(cmd)
    inputStream = cmd_runtime.getInputStream()
    isr = inputStreamReader(inputStream)
    br = bufferedReader(isr)
    output = ''
    line = br.readLine()
    while line != None:
      output += line + '\n'
      line = br.readLine()
    return output

  def run_task_list_cmd(self):
    start_time = time.time()
    self.output_manager.log('Shell script: tasklist /v')
    output = self.run_cmd('tasklist /v')
    stamp = time.strftime('%Y-%m-%d %H:%M:%S')
    self.output_manager.write_to_file(self.OS_DATA_DIR,'tasklist.out.txt', stamp)
    self.output_manager.write_to_file(self.OS_DATA_DIR,'tasklist.out.txt', output)
    self.output_manager.write_to_file(self.OS_DATA_DIR,'tasklist.out.txt', SECTION)

    # collect stat for cmd
    if COLLECTING_CMD_STATS:
      elapsed_cmd_time = time.time() - start_time
      self.output_manager.write_to_file(self.OS_DATA_DIR,'cmd_stats.txt','%s:%s' % ('tasklist /v',elapsed_cmd_time))

  WINDOWS_OS_COMMANDS = {
    "memory usage":"wmic os get totalvisiblememorysize,totalvirtualmemorysize,freephysicalmemory,freevirtualmemory",
    "CPU usage":["powershell.exe", "Write-Host \"CPU usage:\" (Get-Counter '\\Processor(_Total)\\% Processor Time').CounterSamples[0].CookedValue'%'"],
    "disk usage":"wmic logicaldisk get size,freespace,caption",
    "paging usage":"wmic PATH Win32_PerfFormattedData_PerfOS_Memory get PageFaultsperSec,PagesPerSec,PageReadsPerSec",
    "netstat -oan": "netstat -oan",
    "netstat -s":"netstat -s"
  }

  # Monitor PID CPU, triggering data collection when the CPU threshold for given PID is reached.
  def windows_powershell_cpu_trigger_script(self):
    return """
    $version="2025.12"
    $CheckCPUInterval = 30
    $ContinueMonitoringCPU = $true
    $CPUThreshold = $null
    $PrintCPUUsage = $false

    if ($args.count -eq 0){
      Write-Output "args is empty. Exiting the script"
      exit 1
    }

    $ProvidedPIDs = @()
    function LogStamp ($InputString){
      #$dateString = Get-Date -Format "yyy-MM-d HH:mm:ss"
      #Write-Output "$dateString`t$InputString"
      Write-Output "$InputString"
    }

    foreach ($arg in $args){
      $parts = $arg -split '='
      if ($parts[0].StartsWith('--')){
      switch ($parts[0]) {
        '--CPUThreshold' { $CPUThreshold = $parts[1] }
        '--CheckCPUInterval' { $CheckCPUInterval = $parts[1]}
        '--PrintCPUUsage' { $PrintCPUUsage = $true }
      }
      }else{
      $ProvidedPIDs += $parts[0]
      }
    }
    if ($CPUThreshold -eq $null){
      Write-Host "CPUThreshold is null. Exiting the script."
      exit 1
    }
    LogStamp "Script version: $version"
    LogStamp "PowerShell version: $($PSVersionTable.PSVersion.ToString())"
    LogStamp "CPU threshold: $CPUThreshold%"
    LogStamp "Check CPU interval: $CheckCPUInterval seconds"
    LogStamp "Monitoring PIDs: $ProvidedPIDs"

    # Returns true if provided ID exists. Otherwise false.
    function VerifyProvidedRunningPID {
      param (
      [string] $ProcessID
      )
      if (Get-Process -Id $ProcessID -ErrorAction SilentlyContinue) {
      return $true
      } else {
      return $false
      }
    }

    function GetProcessNameUsingPID {
      param (
      [string] $ProcessID
      )
      $processName = Get-CimInstance -ClassName Win32_PerfFormattedData_PerfProc_Process | Where-Object { $_.IDProcess -eq $ProcessID } | Select-Object -ExpandProperty Name
      return $processName
    }

    function GetCPUUsageForPIDUsingProcessName($name) {
      Try{
      $SamplesCookedValue = (Get-Counter "\Process($name)\% Processor Time" -ErrorAction stop).CounterSamples.CookedValue
      }Catch{
      Write-Output "An error occured: $_"
      return
      }
      $CPUCores = (Get-WMIObject Win32_ComputerSystem).NumberOfLogicalProcessors
      return [Decimal]::Round(($SamplesCookedValue / $CPUCores), 2)
    }

    $ProcessIDsAndNames = @()
    foreach($ProvidedPID in $ProvidedPIDs){
      if (VerifyProvidedRunningPID $ProvidedPID){
      $ProcessIDsAndNames += @{ PID = $ProvidedPID}
      }else{
      LogStamp "PID $ProvidedPID does not exist. Exiting the script."
      exit 1
      }
    }

    # Getting the process name for each provided running PID.
    foreach($item in $ProcessIDsAndNames){
      $ProcessName = GetProcessNameUsingPID $item.PID
      $item.name = $ProcessName
    }

    LogStamp "Waiting for a process excceding the CPU threshold of $CPUThreshold%."
    while($ContinueMonitoringCPU){
      foreach($item in $ProcessIDsAndNames){
      $CPUUsage = GetCPUUsageForPIDUsingProcessName $item.name
      if ($CPUUsage -eq $null){
        # Exits the script if process name does not exist. 
        LogStamp "Exiting the script."
        exit 1
      }
      if ($PrintCPUUsage){
        LogStamp "PID $($item.PID)'s CPU usage: $CPUUsage%"
      }
      if ($CPUUsage -ge $CPUThreshold){
        LogStamp "PID $($item.PID) ($($item.name)) exceeded the CPU threshold, currently at $CPUUsage%."
        $ContinueMonitoringCPU = $false
        break
      }
      }
      if ($ContinueMonitoringCPU){
      Start-Sleep -Seconds $CheckCPUInterval
      }
    }
    """

  # Collect Windows performance data.
  def collect_windows_data(self):
    self.output_manager.log('Collecting OS stats...')
    for i in range(self.config.iterations):
      start_collecting_OS_data_time = time.time()
      self.run_task_list_cmd()
      stamp = time.strftime('%Y-%m-%d %H:%M:%S') + '\t'
      self.output_manager.write_to_file(self.OS_DATA_DIR,'perf.out.txt', stamp)
      for key_cmd_name, command in self.WINDOWS_OS_COMMANDS.items():
        start_cmd_time = time.time()
        self.output_manager.log('Shell script: %s' % (key_cmd_name))

        command_output = self.run_cmd(command)
        self.output_manager.write_to_file(self.OS_DATA_DIR,'perf.out.txt', command_output)
        self.output_manager.write_to_file(self.OS_DATA_DIR,'perf.out.txt','\n')

        # collect stat for cmd
        if COLLECTING_CMD_STATS:
          elapsed_cmd_time = time.time() - start_cmd_time
          self.output_manager.write_to_file('','cmd_stats.txt','%s:%s' % (key_cmd_name,elapsed_cmd_time))

      self.output_manager.write_to_file(self.OS_DATA_DIR,'perf.out.txt',SECTION)
      threads_cmd = 'wmic PATH win32_thread WHERE processhandle=%s GET handle,elapsedtime,usermodetime,kernelmodetime'

      for pid in map(lambda server: server['pid'], server_manager.servers):
        start_cmd_time = time.time()
        self.output_manager.log('Shell script: win32_thread for PID %s' % (pid))
        command = threads_cmd % (pid)
        command_output = self.run_cmd(command)

        stamp = time.strftime('%Y-%m-%d %H:%M:%S')
        self.output_manager.write_to_file(self.OS_DATA_DIR,str(pid)+'.threads.txt', stamp)
        self.output_manager.write_to_file(self.OS_DATA_DIR,str(pid)+'.threads.txt', command_output)
        self.output_manager.write_to_file(self.OS_DATA_DIR,str(pid)+'.threads.txt', SECTION)

        # collect stat for cmd
        if COLLECTING_CMD_STATS:
          elapsed_cmd_time = time.time() - start_cmd_time
          self.output_manager.write_to_file(self.OS_DATA_DIR,'cmd_stats.txt','%s:%s' % ('wmic PATH win32_thread',elapsed_cmd_time))
      
      OS_data_collection_time = time.time() - start_collecting_OS_data_time        
      
      # If elapsed_time is less than 0 or the max iteration has been reached, break the loop to finish the script.
      elapsed_time = time.time() - START_SCRIPT_TIME
      total_time_script_duration = self.config.iterations * self.config.delay
      remaining_time = total_time_script_duration - elapsed_time
      if remaining_time <= 0 or i == self.config.iterations-1:
        break
      # Skip the DELAY to continue the OS data collection if OS_data_collection_time exceeds the delay. 
      elif (self.config.delay-OS_data_collection_time <= 0):
        self.output_manager.log('Continuing OS collection...')
        pass
      else:
        self.output_manager.log('Pausing OS collection for %.0f seconds.' % (self.config.delay-OS_data_collection_time))
        time.sleep(self.config.delay-OS_data_collection_time)

  def windows_CPU_trigger(self):
    PIDs_string = ' '.join(map(str,map(lambda server: server['pid'], server_manager.servers)))
    self.output_manager.log('Writing a temporary PowerShell script file to monitor CPU.')
    temp_powershell_script_path = self.config.script_output_dir+'/temp_powershell_CPU_trigger.ps1'
    self.output_manager.write_to_file('','temp_powershell_CPU_trigger.ps1', self.windows_powershell_cpu_trigger_script())
    self.output_manager.log('======== PowerShell script starting up ========')

    additiona_args = ''
    if self.config.print_cpu_usage:
      additiona_args += '--PrintCPUUsage'
    cmd = 'powershell.exe -ExecutionPolicy Bypass -Command %s %s --CPUThreshold=%s --CheckCPUInterval=%s %s' % (temp_powershell_script_path, PIDs_string, self.config.cpu_threshold, self.config.check_cpu_interval, additiona_args)


    powershell_process = runtime.getRuntime().exec(cmd)
    powershell_process.getOutputStream().close()
    stout = bufferedReader(inputStreamReader(powershell_process.getInputStream()))
    line = stout.readLine()

    while line is not None:
      self.output_manager.log('Shell script: '+line)
      if self.config.cpu_threshold and line.startswith("PID ") and re.search('exceeded', line):
        high_cpu_pid = line.split()[1] # Extract PID number from the "PID <pid> exceeded the CPU threshold, currently at <usage>%".
        if high_cpu_pid.isdigit():
          server = server_manager.find_server_by_pid(high_cpu_pid)
          server_manager.set_status(server,ServerManager.STATUS.TRIGGERED)
          break
        #self.config.cpu_threshold = None
      line = stout.readLine()
    os.remove(temp_powershell_script_path)
    exit_code = powershell_process.waitFor()

    if exit_code != 0:
      self.output_manager.log('PowerShell script returns with %s' % exit_code)
      self.output_manager.log('======== PowerShell script unexpectedly exited ========')
      print ''
      exit_script()
    else:
      self.output_manager.log('======== PowerShell script completed ========') 
  ######## End previous winperf.py ########

  ########################################################################
  #
  #   Collect Unix data (AIX or Linux) 
  #   Shell type: /bin/ksh for AIX and /bin/sh for Linux
  #
  ########################################################################
  def collect_unix_data(self):
    if not server_manager.servers:
      self.output_manager.log('Error: server_manager.servers is empty. OS collection will not proceed.')
      return
    if self.config.current_os == 'AIX':
      shell_type = '/bin/ksh'
      shell_script = self.AIX_SHELL_SCRIPT
    elif self.config.current_os == 'Linux':
      shell_type = '/bin/sh'
      shell_script = self.LINUX_SHELL_SCRIPT
    elif self.config.current_os == 'Windows':
      self.output_manager.log("Error: Calling collect_unix_data() for collecting Windows stats is incorrect.")
      return

    self.output_manager.log("Starting up unix script.")

    PIDs_string_arg = ' '.join(map(str,map(lambda server: server['pid'], server_manager.servers)))
    java_runtime = java.lang.Runtime.getRuntime()
    if not self.config.cpu_threshold:
      shell_span=self.config.iterations*self.config.delay
      tprof_span = TPROF_SPAN
      if tprof_span > shell_span:
        if shell_span/4 >= 5:
          tprof_span = shell_span/4
        else:
          tprof_span = 5
      flags = '-j %i -s %i -t %i -v %i' % (self.config.delay, shell_span, tprof_span, VMSTAT_INTERVAL)
      args = flags + ' ' + PIDs_string_arg
      command = [shell_type, '-c', shell_script, '_'] + args.split() + ['--disable-collecting-hostname','--disable-kill', '--output-dir='+OUTPUT_DIR, '--dir-name='+SCRIPT_OUTPUT_NAME+'/'+self.OS_DATA_DIR, '--hide-stamp']
    elif self.config.cpu_threshold:
      #args = '%i %i' % (CPU_INTERVAL, CPU_THRESHOLD) + ' ' + PIDs_string_arg
      args = '--cpu-threshold=%i --check-cpu-interval=%i' % (self.config.cpu_threshold,self.config.check_cpu_interval) + ' ' + PIDs_string_arg + ' ' + '--output-dir='+OUTPUT_DIR + ' --dir-name='+SCRIPT_OUTPUT_NAME+'/'+self.OS_DATA_DIR + ' --monitor-only' + ' --hide-stamp'
      if self.config.print_cpu_usage:
        args += ' --print-cpu-usage'
      #flags = '-j %i -s %i -t %i -v %i' % (ITERATIONS, shell_span, tprof_span, VMSTAT_INTERVAL)
      #print args
      command = [shell_type, '-c', shell_script, '_'] + args.split()

    else:
      self.output_manager.log("Error: Something went wrong in collect_unix_data().")
      return
    process = java_runtime.exec(command)
    input_stream = process.getInputStream()
    reader = java.io.BufferedReader(java.io.InputStreamReader(input_stream))
    line = reader.readLine()
    while line is not None:
      self.output_manager.log('Shell script: '+line)
      if self.config.cpu_threshold and line.startswith("PID "):
        high_cpu_pid = line.split()[1] # Extract PID number from the "PID <pid> exceeded the CPU threshold, currently at <usage>%" output line.
        if high_cpu_pid.isdigit():
          server = server_manager.find_server_by_pid(high_cpu_pid)
          server_manager.set_status(server,ServerManager.STATUS.TRIGGERED)
          break
        # self.config.cpu_threshold = None
      line = reader.readLine()
    exit_code = process.waitFor()
    if exit_code != 0:
      self.output_manager.log('Shell script returns with %s' % exit_code)
      self.output_manager.log('Unix script unexpectedly exited')

    else:
      self.output_manager.log('Unix script completed')

  # To make chages to LINUX_SHELL_SCRIPT content, change in linperf.sh and test first before updating here. 
  LINUX_SHELL_SCRIPT = r"""
    #!/bin/sh
    SCRIPT_SPAN=240         # How long the whole script should take . Default=240
    JAVACORE_INTERVAL=30    # How often javacores should be taken including the final one. Default=30
    TOP_INTERVAL=60         # How often top data should be taken. Default=60
    TOP_DASH_H_INTERVAL=5   # How often top dash H data should be taken. Default=5
    VMSTAT_INTERVAL=5       # How often vmstat data should be taken. Default=5
    CHECK_CPU_INTERVAL=30   # How often the script should check a process's CPU usage. Default=30.
                            # It only works with -c <value> or --cpu-threshold=<value>.
    MONITOR_ONLY=0          # Monitoring trigering events only with no data created.
                            # It only works with -c <value> or --cpu-threshold=<value>. Default=0
    KEEP_QUIET=0            # Disable the end print message, change to 1. Default=0
    ALLOW_STATS=0           # Collect OS data without provided PIDs, change to 1. Default=0
    ROOT_ACCESS_REQUIRED=1        # Default=1 to require root for running the script.
    DISABLE_COLLECTING_IFCONFIG=0 # Default=0 to collect ifconfig info
    DISABLE_COLLECTING_HOSTNAME=0 # Default=0 to collect hostname info
    #####################################################################################################
    # * All the 'INTERVAL' values should divide into the 'SCRIPT_SPAN' by a whole
    #   integer to obtain expected results.
    # * Setting any 'INTERVAL' too low (especially JAVACORE) can result in data
    #   that may not be useful towards resolving the issue.  This becomes a problem
    #   when the process of collecting data obscures the real issue.
    ###############################################################################
    SCRIPT_VERSION=2025.12.08
    START_DAY="$(date +%Y%m%d)"
    START_TIME="$(date +%H%M%S)"

    provided_pids=""

    while [ $# -gt 0 ]; do
      case "$1" in
        -j ) JAVACORE_INTERVAL="$2"; shift 2;;  
        -q ) KEEP_QUIET=1; shift 1;;
        -s ) SCRIPT_SPAN="$2"; shift 2;;
        -t ) TOP_INTERVAL="$2"; shift 2;;
        -v ) VMSTAT_INTERVAL="$2"; shift 2;;
        -z ) ALLOW_STATS=1; shift 1;;
        # Triggering event
        -c ) CPU_THRESHOLD="$2"; shift 2;;

        # Disable issuing a kill -3.
        --disable-kill ) disable_kill=1; shift 1;;
        # Disable writing to screen.out file. Only used by wsadmin script.
        --disable-screen-output ) DISABLE_SCREEN_OUTPUT=1; shift 1;;

        # Rename the output dir name. Only used by a wsadmin script.
        --dir-name=* ) DIR_NAME="${1#*=}"; shift 1;;
        # Redirect linperf_RESULTS data to a specified directory.
        --output-dir=* ) OUTPUT_DIR="${1#*=}"; shift 1;;

        # Monitoring a triggering event only without data created.
        --monitor-only ) MONITOR_ONLY=1; shift 1;;

        # Triggering event.
        --cpu-threshold=* ) CPU_THRESHOLD="${1#*=}"; shift 1;;
        --check-cpu-interval=* ) CHECK_CPU_INTERVAL="${1#*=}"; shift 1;;
        --print-cpu-usage ) PRINT_CPU_USAGE=1; shift 1;;

        # Disable irix mode to cap at 100% CPU in this script.
        --disable-irix-mode ) irix_mode=0; shift 1;;

        # Only used by a wsadmin scirpt.
        --hide-stamp ) HIDE_STAMP=1; shift 1;;

        # Disable collecting hostname and ifconfig
        --disable-collecting-ifconfig ) DISABLE_COLLECTING_IFCONFIG=1; shift 1;;
        --disable-collecting-hostname ) DISABLE_COLLECTING_HOSTNAME=1; shift 1;;

        # Disable root requirement
        --ignore-root ) ROOT_ACCESS_REQUIRED=0; shift 1;;

        # Clean up javacores after tarring. 
        --clean-up-javacores ) CLEAN_UP_JAVACORES=1; shift 1;;

        [0-9]* ) provided_pids="$provided_pids $1"; shift 1;;

        * ) echo "Unknown option: $1"; exit 1;;
      esac
    done
    # echo "$monitor_log_file $match_trace $provided_pids"
    # exit 1
    #############################################
    # If PIDs are not provided, the script exits.
    #############################################
    if [ -z "$provided_pids" ] && [ $ALLOW_STATS -eq 0 ]; then
      echo "Unable to find required PID argument. Please rerun the script as follows:"
      echo "./linperf.sh [PID(s)_of_the_problematic_JVM(s)_separated_by_spaces]"
      exit 1
    fi

    ######################
    # Verify option inputs
    ######################
    if [ $MONITOR_ONLY -eq 1 ]; then
      if [ -z "${CPU_THRESHOLD}" ]; then
        echo "To run with --monitor-only, include --cpu-threshold=<value>. Exiting script."
        exit 1
      fi
    fi

    if [ -n "$irix_mode" ] && [ -n "$CPU_THRESHOLD" ]; then
      if [ $irix_mode -eq 0 ] && [ $CPU_THRESHOLD -gt 100 ]; then
        echo "With --disable-irix-mode, --cpu-threshold=<value> cannot exceed 100. Exiting script"
        exit 1
      fi
    fi

    ################################################
    # Verify running PIDs
    # Remove duplicate PID and non-exist PID.
    # The script exits if PIDs do not exist.
    ################################################
    pids=""
    if [ -n "$provided_pids" ]; then
      for pid in `echo $provided_pids | xargs -n1 | sort -u | xargs`
      do
        if test -d /proc/"$pid"/; then
          pids="$pid $pids"
        else
          echo "PID $pid does not exist. Exiting script."
          exit 1
        fi
      done
    fi

    ####################################################
    # Exit the script if running as a non-root user and
    # the user does not match all PID owners
    ####################################################
    current_id="$(id -u)"
    current_group="$(id -g)"
    if [ "${current_id}" -ne "0" ] && [ "${current_group}" -ne "0" ] && [ $ROOT_ACCESS_REQUIRED -eq 1 ]; then
      for pid in $pids; do
        if [ "${current_id}" -ne "$(stat -c "%u" /proc/${pid})" ] && [ "${current_group}" -ne "$(stat -c "%g" /proc/${pid})" ]; then
          echo "PID ${pid} is owned by user '$(stat -c "%U" /proc/${pid})' and group '$(stat -c "%G" /proc/${pid})' but you are $(id). Either switch users or run as root/sudo. Use --ignore-root to bypass."
          exit 1
        fi
      done
    elif [ $ROOT_ACCESS_REQUIRED -eq 0 ]; then
      echo $(date '+%Y-%m-%d %H:%M:%S') "\tWarning: Root access is disabled. Data may be incomplete."
    fi

    ################################
    # Assign OUTPUT_DIR and DIR_NAME 
    ################################
    if [ -n "$OUTPUT_DIR" ] && [ -n "$DIR_NAME" ]; then
      OUTPUT_DIR="$OUTPUT_DIR/$DIR_NAME"
    elif [ -n "$OUTPUT_DIR" ] && [ -z "$DIR_NAME" ]; then
      readonly DIR_NAME="linperf_RESULTS.$START_DAY.$START_TIME"
      OUTPUT_DIR="$OUTPUT_DIR/$DIR_NAME"
    elif [ -z "$OUTPUT_DIR" ] && [ -n "$DIR_NAME" ]; then
      OUTPUT_DIR="$DIR_NAME"
    else
      readonly DIR_NAME="linperf_RESULTS.$START_DAY.$START_TIME"
      OUTPUT_DIR="$DIR_NAME"
    fi
    case "$OUTPUT_DIR" in
      *" "* ) echo "\"$OUTPUT_DIR\" contains a space. Exiting script."
      exit 1
    esac

    #####################################
    # Create a temporary output directory
    #####################################
    if [ $MONITOR_ONLY -ne 1 ]; then
      mkdir -p $OUTPUT_DIR
      if [ $? -ne 0 ]; then
        echo "Failed to create $OUTPUT_DIR."
        exit 1
      fi
      # Go into the created output directory and
      # get the full path to let the user know the current directory.
      cd $OUTPUT_DIR
      if [ $? -ne 0 ]; then
        echo "Failed to go into $OUTPUT_DIR."
        exit 1
      fi
      readonly OUTPUT_DIR="$(pwd)"
    fi

    #################################
    # Write a message to screen.out
    # Empty $1 does not write out
    #################################
    log()
    {
      # $1 - message
      if [ -z "${DISABLE_SCREEN_OUTPUT}" ] && [ -n "$1" ] && [ $MONITOR_ONLY -ne 1 ] && [ -z "${HIDE_STAMP}" ]; then
        printf "%s\t%s\n" "$(date '+%Y-%m-%d %H:%M:%S')" "$1" | tee -a screen.out
      elif [ -n "${DISABLE_SCREEN_OUTPUT}" ] && [ -n "${HIDE_STAMP}" ] && [ -n "$1" ]; then
        printf "%s\n" "$1"
      elif [ -n "${HIDE_STAMP}" ] && [ -n "$1" ]; then
        printf "%s\n" "$1" | tee -a screen.out 
      elif [ -n "$1" ]; then
        printf "%s\t%s\n" "$(date '+%Y-%m-%d %H:%M:%S')" "$1"
      fi
    }
    #######################################################################
    # Remove PID either calling from cpu_trigger() or when kill -3 PID fail
    # and return remaining $pids.
    #######################################################################
    no_longer_running_pids=""
    remove_pid()
    {
      pid_to_remove=$1
      pids=$2
      # Remove PID from PIDs, replace multiple spaces with single, remove trailing space
      new_pids=$(echo "$pids" | sed "s/\b$pid_to_remove\b//g" | sed 's/  / /g' | sed 's/ *$//')
      echo "$new_pids"
    }

    #############################
    # Calculate CPU usage for PID
    #############################
    # Get the total CPU time (user time + system time) for given PID.
    get_total_time_for_pid(){
      stat_line=$(cat /proc/$1/stat)
      utime=$(echo $stat_line | awk '{print $14}')
      stime=$(echo $stat_line | awk '{print $15}')
      echo $((utime + stime))
    }

    get_initial_cpu_times(){
      for pid in $pids; do
        get_total_time_for_pid $pid
      done
    }

    #########################################################
    # Loop until CPU usage threshold for given PID is reached
    #########################################################
    cpu_trigger()
    {
      if [ -n "${CPU_THRESHOLD}" ] && [ -n "${pids}" ]; then
        # Assign irix_mode if it is null and toprc file exists.
        # Otherwise, set irix_mode to 1 if no toprc file exists.
        if [ -z "$irix_mode" ] && [ -f "$HOME/.config/procps/toprc" ]; then
          irix_mode=$(grep -oP 'Mode_irixps=\K[0-9]' "$HOME/.config/procps/toprc")
        elif [ -z "$irix_mode" ] && [ -f "$HOME/.toprc" ]; then
          irix_mode=$(grep -oP 'Mode_irixps=\K[0-9]' "$HOME/.toprc")
        elif [ -z "$irix_mode" ]; then
          irix_mode=1
        fi

        num_of_cpus=$(nproc)
        log "CHECK_CPU_INTERVAL = $CHECK_CPU_INTERVAL"
        log "Irix mode = $irix_mode"
        log "Number of CPUs: $num_of_cpus"
        log "Monitoring PIDs: $pids"
        log "Waiting for a process to exceed $CPU_THRESHOLD% CPU usage."

        num_of_cpus=$(nproc)
        while :
        do
          # An array of total times spent by provided PIDs at the first point in time.
          initial_times=$(get_initial_cpu_times)

          sleep $CHECK_CPU_INTERVAL
          i=1
          for pid in $pids; do

            if [ -e /proc/$pid ]; then
              # Get the value of the initial time spent by the process from the array.
              cpu_time_before=$(echo "$initial_times" | sed -n "${i}p")
              # The second point: The process has been up.
              cpu_time_after=$(get_total_time_for_pid $pid)

              # Calculate the difference in total time between two points (time_after - time_before)
              # Calculate CPU usage percentage over the CHECK_CPU_INTERVAL(sleep_time)
              # If irix mode is off, divide the CPU usage by number of CPUs.
              pid_cpu_usage=$(awk -v time_before=$cpu_time_before -v time_after=$cpu_time_after -v sleep_time=$CHECK_CPU_INTERVAL 'BEGIN {printf "%0.f", ((time_after - time_before ) / (sleep_time  )) }')

              if [ "$irix_mode" -eq 0 ]; then
                pid_cpu_usage=$((pid_cpu_usage/num_of_cpus))
              fi
              if [ $PRINT_CPU_USAGE -eq 1 ]; then
                log "PID $pid's CPU usage: $pid_cpu_usage%"
              fi

              if [ $pid_cpu_usage -ge $CPU_THRESHOLD ]; then
                log "PID $pid exceeded the CPU usage threshold, currently at $pid_cpu_usage%."
                break 2
              fi
            else
              log "PID $pid is no longer running."
              no_longer_running_pids="$no_longer_running_pids $pid"
            fi
            i=$((i + 1))
          done

          for pid in $no_longer_running_pids; do
            pids=$(remove_pid $pid "$pids")
          done

          if [ -z "$pids" ]; then
            log "No more running PIDs provided. Exiting script."
            exit 1
          elif [ -n "$no_longer_running_pids" ]; then
            log "Monitoring remaining PIDs: $pids"
            # Reset no_longer_running_pids for future use if $pid becomes inaccessible.
            no_longer_running_pids=""
          fi
        done
      fi
      if [ $MONITOR_ONLY -eq 1 ]; then
        exit 0
      fi
    }

    #############################################################################
    # If MONITOR_ONLY is true, no output data is created after this if statement.
    #############################################################################
    if [ $MONITOR_ONLY -eq 1 ]; then
      log "linperf version:  $SCRIPT_VERSION."
      if [ $ROOT_ACCESS_REQUIRED -eq 0 ]; then
        log "ROOT_ACCESS_REQUIRED = 0."
      fi
      cpu_trigger
      # Future plan:
      # FileDescriptorTrigger?
      # SwapTrigger?
      exit 0
    fi

    #############################################
    # Let user know path to a temporary directory
    # and the inputs and beings are displayed.
    #############################################
    if [ $DISABLE_COLLECTING_HOSTNAME -eq 0 ]; then
      current_hostname="on $(hostname)"
    fi
    log "Output to $OUTPUT_DIR $current_hostname."
    log "linperf version: $SCRIPT_VERSION."
    if [ $ROOT_ACCESS_REQUIRED -eq 0 ]; then
      log "ROOT_ACCESS_REQUIRED = 0."
    fi
    for pid in $pids
    do
      log "Provided PID: $pid"
    done
    log "SCRIPT_SPAN = $SCRIPT_SPAN"
    if [ -z $disable_kill ]; then
      log "JAVACORE_INTERVAL = $JAVACORE_INTERVAL"
    else
      log "Disable kill -3: true"
    fi
    log "TOP_INTERVAL = $TOP_INTERVAL"
    log "TOP_DASH_H_INTERVAL = $TOP_DASH_H_INTERVAL"
    log "VMSTAT_INTERVAL = $VMSTAT_INTERVAL"
    log "Timezone: $(date +%Z)"
    # Call cpu_trigger() to check if CPU_THRESHOLD is set.
    # If set, it will check CPU usage and collect relevant data.
    cpu_trigger

    # Collect the user currently executing the script.
    date > whoami.out
    whoami >> whoami.out 2>&1
    log "Collection of user authority data complete."

    ######################
    # Start collection of:
    #  * netstat x2
    #  * ps -elf
    #  * uptime
    #  * top
    #  * top dash H
    #  * vmstat
    #  * javacores
    #  * ifconfig -a if DISABLE_COLLECTING_IFCONFIG is 0. 
    ######################
    # Collect the first netstat: date at the top, data, and then a blank line.
    date >> netstat.out
    netstat -pan >> netstat.out 2>&1
    echo >> netstat.out
    log "First netstat snapshot complete."

    # Collect the ps -elf: date at the top, data, and then a blank line.
    log "Collecting a ps -elf snapshot."
    date >> ps.out
    ps -elf >> ps.out 2>&1
    ps aux >> ps.aux.out 2>&1
    echo >> ps.out


    # Collect the uptime
    log "Collecting uptime."
    uptime >> uptime.out

    # Start the collection of top data.
    # It runs in the background so that other tasks can be completed while this runs.
    date >> top.out
    echo >> top.out
    top -bc -d $TOP_INTERVAL -n `expr $SCRIPT_SPAN / $TOP_INTERVAL + 1` >> top.out 2>&1 &
    # Assign TOP's PID to top_pids. It will be used for terminating TOP processes
    # when the script is unexpectedly finished early (e.g. PIDs are inaccessible).
    top_pids=$!
    log "Collection of top data started."

    # Start the collection of top dash H data.
    # It runs in the background so that other tasks can be completed while this runs.
    for pid in $pids
    do
      log "Collecting against PID $pid." >> topdashH.$pid.out
      echo >> topdashH.$pid.out
      top -bH -d $TOP_DASH_H_INTERVAL -n `expr $SCRIPT_SPAN / $TOP_DASH_H_INTERVAL + 1` -p $pid >> topdashH.$pid.out 2>&1 &
      top_pids="$top_pids $!"
      log "Collection of top dash H data started for PID $pid."
    done

    # Start the collection of vmstat data.
    # It runs in the background so that other tasks can be completed while this runs.
    date >> vmstat.out
    vmstat $VMSTAT_INTERVAL `expr $SCRIPT_SPAN / $VMSTAT_INTERVAL + 1` >> vmstat.out 2>&1 &
    log "Collection of vmstat data started."

    if [ $DISABLE_COLLECTING_IFCONFIG -eq 0 ]; then
      log "Collecting ifconfig -a"
      ifconfig -a >> ifconfig.out
      ifconfig_file="ifconfig.out"
    fi

    #############################################################################
    # Start collection of javacores and ps -eLf.
    # Loop the appropriate number of times, pausing for the given amount of time,
    # and iterate through each supplied PID.
    # A kill -3 command will not be executed if disable_kill is true.
    #############################################################################
    n=1
    m=`expr $SCRIPT_SPAN / $JAVACORE_INTERVAL`

    if [ -n "$pids" ] && [ -z "$disable_kill" ]; then
      log "Issuing a kill -3 command for the provided PID(s) to create a javacore or a thread dump."
    fi
    while [ $n -le $m ]
    do
      # Collect a ps snapshot: date at the top, data, and then a blank line.
      log "Collecting a ps -eLf snapshot."
      date >> ps.threads.out
      ps -eLf >> ps.threads.out 2>&1
      echo >> ps.threads.out
      
      # Produce a javacore/thread dump
      if [ -z "$disable_kill" ]; then
        for pid in $pids
        do
          kill_output=$(kill -3 $pid 2>&1)
          if [ $? -ne 0 ]; then
            log "PID $pid inaccessible. $kill_output"
            pids=$(remove_pid $pid "$pids")
            no_longer_running_pids="$no_longer_running_pids $pid"
            if [ -z "$pids" ]; then
              log "No PIDs remaining to process, finishing the script."
              # Kill -3 $PID is no longer needed due to above.
              # Support team guides for locating javacores/thread dumps if they were produced before PIDs are inaccessible/no longer.
              disable_kill=1
              break 3
            fi
          else
            log "Issued a kill -3 for PID $pid."
          fi
        done
      fi

      # Pause for JAVACORE_INTERVAL seconds.
      log "Continuing to collect data for $JAVACORE_INTERVAL seconds..."
      sleep $JAVACORE_INTERVAL
      # Increment counter
      n=`expr $n + 1`
    done

    # Collect a final javacore and ps -eLf snapshot.
    log "Collecting the final ps -eLf snapshot."
    date >> ps.threads.out
    ps -eLf >> ps.threads.out 2>&1
    echo >> ps.threads.out

    # Produce the final javacore/thread dump.
    if [ -z "$disable_kill" ]; then
      for pid in $pids
      do
        log "Issuing the final kill -3 for PID $pid."
        log $(kill -3 $pid 2>&1)
      done
    fi

    # Collect a final netstat.
    date >> netstat.out
    netstat -pan >> netstat.out 2>&1
    log "Final netstat snapshot complete."

    #########################
    # Other data collection #
    #########################
    log "Collecting other data."
    dmesg="dmesg.out"
    if ! dmesg > /dev/null 2>&1; then
      dmesg=""
      log "dmesg data unavailable due to access restrictions (normal in containers/non-root)."
    else
      dmesg > dmesg.out 2>&1
    fi
    df -hk > df-hk.out 2>&1

    # Terminate TOP processes created by this script.
    trap 'kill $top_pids 2>/dev/null' EXIT
    """

  # To make chages to AIX_SHELL_SCRIPT content, change in aixperf.sh and test first before updating here. 
  AIX_SHELL_SCRIPT = r"""
    #!/bin/ksh
    SCRIPT_SPAN=240         # How long the whole script should take. Default=240
    JAVACORE_INTERVAL=30    # How often javacores should be taken. Default=30  
    TPROF_SPAN=60           # How long tprof should collect data. Default=60
    VMSTAT_INTERVAL=5       # How often vmstat data should be taken. Default=5
    CPU_THRESHOLD=          # Triggers data collection when the CPU usage is reached. Default empty.
                            # WARNING: ps -p $PID -o pcpu prints the process's lifetime CPU usage. Not supported but being explored.
    CHECK_CPU_INTERVAL=30   # How often the script should check a process's CPU usage. Default=30.
                            # It only works with CPU_THRESHOLD=<value>, -c <value> or --CPUThreshold=<value>. 
    MONITOR_ONLY=0          # Monitoring trigering events only with no data created. 
                            # It only works with -c <value> or --CPUThreshold=<value>. Default=0
    KEEP_QUIET=0            # To disable the end print message, change to 1. Default=0
    ALLOW_STATS=0           # To collect OS data without provided PIDs, change to 1. Default=0
    ROOT_ACCESS_REQUIRED=1        # Default=1 to require root for running the script.
    DISABLE_COLLECTING_IFCONFIG=0 # Default=0 to collect ifconfig info.
    DISABLE_COLLECTING_HOSTNAME=0 # Default=0 to collect hostname info.
    #####################################################################################################
    # * All the 'INTERVAL' values should divide into the 'SCRIPT_SPAN' by a whole
    #   integer to obtain expected results.
    # * Setting any 'INTERVAL' too low (especially JAVACORE) can result in data
    #   that may not be useful towards resolving the issue.  This becomes a problem
    #   when the process of collecting data obscures the real issue.
    ###############################################################################
    SCRIPT_VERSION=2025.12.08
    START_DAY="$(date +%Y%m%d)"
    START_TIME="$(date +%H%M%S)"

    provided_pids=""

    while [[ $# -gt 0 ]]; do
      case "$1" in
        -j ) JAVACORE_INTERVAL="$2"; shift 2;;
        -q ) KEEP_QUIET=1; shift 1;;
        -s ) SCRIPT_SPAN="$2"; shift 2;;
        -t ) TPROF_SPAN="$2"; shift 2;;
        -v ) VMSTAT_INTERVAL="$2"; shift 2;;
        -z ) ALLOW_STATS=1; shift 1;;
        # Triggering event
        -c ) CPU_THRESHOLD="$2"; shift 2;; # Not Supported but being explored.
        
        # Disable issuing a kill -3
        --disable-kill ) disable_kill=1; shift 1;;
        # Disable writing to screen.out file. Only used by wsadmin script.
        --disable-screen-output ) DISABLE_SCREEN_OUTPUT=1; shift 1;;
        
        # Rename aixperf_RESULTS. Only used by a wsadmin script.
        --dir-name=* ) DIR_NAME="${1#*=}"; shift 1;;
        # Redirect aixperf_RESULTS data to a specified directory.
        --output-dir=* ) OUTPUT_DIR="${1#*=}"; shift 1;;

        # Monitoring a triggering event only without data created.
        --monitor-only ) MONITOR_ONLY=1; shift 1;;

        # Triggering event. UDPATE: Not supported but being explored.
        --cpu-threshold=* ) CPU_THRESHOLD="${1#*=}"; shift 1;;
        --check-cpu-interval=* ) CHECK_CPU_INTERVAL="${1#*=}"; shift 1;;

        # Only used by a wsadmin scirpt.
        --hide-stamp ) HIDE_STAMP=1; shift 1;;

        # Disable collecting hostname and ifconfig
        --disable-collecting-ifconfig ) DISABLE_COLLECTING_IFCONFIG=1; shift 1;;
        --disable-collecting-hostname ) DISABLE_COLLECTING_HOSTNAME=1; shift 1;;

        # Disable root requirement
        --ignore-root ) ROOT_ACCESS_REQUIRED=0; shift 1;;

        # Clean up javacores after tarring. 
        --clean-up-javacores ) CLEAN_UP_JAVACORES=1; shift 1;;
        
        [0-9]* ) provided_pids="$provided_pids $1"; shift 1;;

        * ) print "Unknown option: $1"; exit 1;;
        
      esac
    done
    ############################################
    # Root access is required for complete data.
    ############################################
    if [ "$(id -u)" -ne 0 ] && [ $ROOT_ACCESS_REQUIRED -eq 1 ]; then
      print "Root access is required. Use --ignore-root to bypass."
      exit 1
    elif [ $ROOT_ACCESS_REQUIRED -eq 0 ]; then
      print $(date '+%Y-%m-%d %H:%M:%S') "\tWarning: Root access is disabled. Data may be incomplete."
    fi

    #############################################
    # If PIDs are not provided, the script exits.
    #############################################
    if [ -z "$provided_pids" ] && [ $ALLOW_STATS -eq 0 ]; then
      print "Unable to find required PID argument. Please rerun the script as follows:"
      print "./aixperf.sh [PID(s)_of_the_problematic_JVM(s)_separated_by_spaces]"
      exit 1
    fi

    ######################
    # Verify option inputs
    ######################
    if [ $MONITOR_ONLY -eq 1 ]; then
      if [ -z "${CPU_THRESHOLD}" ]; then
        echo "To run with --monitorOnly, include --CPUThreshold=<value>. Exiting the script."
        exit 1
      fi
    fi

    ################################################
    # Verify running PIDs
    # Remove duplicate PID and non-exist PID.
    # The script exits if PIDs do not exist.
    ################################################
    pids=""
    if [ -n "$provided_pids" ]; then
      for pid in `echo $provided_pids | xargs -n1 | sort -u | xargs`
      do
        if test -d /proc/"$pid"/; then
          pids="$pid $pids"
        else
          print "pid $pid does not exist. Exiting the script."
          exit 1
        fi
      done
    fi

    ################################
    # Assign OUTPUT_DIR and DIR_NAME 
    ################################
    if [ -n "$OUTPUT_DIR" ] && [ -n "$DIR_NAME" ]; then
      OUTPUT_DIR="$OUTPUT_DIR/$DIR_NAME"
    elif [ -n "$OUTPUT_DIR" ] && [ -z "$DIR_NAME" ]; then
      readonly DIR_NAME="aixperf_RESULTS.$START_DAY.$START_TIME"
      OUTPUT_DIR="$OUTPUT_DIR/$DIR_NAME"
    elif [ -z "$OUTPUT_DIR" ] && [ -n "$DIR_NAME" ]; then
      OUTPUT_DIR="$DIR_NAME"
    else
      readonly DIR_NAME="aixperf_RESULTS.$START_DAY.$START_TIME"
      OUTPUT_DIR="$DIR_NAME"
    fi
    case "$OUTPUT_DIR" in
      *" "* ) print "\"$OUTPUT_DIR\" contains a space. Exiting the script."
      exit 1
    esac

    #####################################
    # Create a temporary output directory
    #####################################
    if [ $MONITOR_ONLY -ne 1 ]; then
      mkdir -p $OUTPUT_DIR
      if [ $? -ne 0 ]; then
        print "Failed to create $OUTPUT_DIR."
        exit 1
      fi
      # Go into the created output directory and
      # get the full path to let the user know the current directory.
      cd $OUTPUT_DIR
      if [ $? -ne 0 ]; then
        print "Failed to go into $OUTPUT_DIR."
        exit 1
      fi
      readonly OUTPUT_DIR="$(pwd)"
    fi

    ###############################
    # Write a message to screen.out 
    # Empty $1 does not write out   
    ###############################
    log()
    {
      # $1 - message
      if [ -z "${DISABLE_SCREEN_OUTPUT}" ] && [ -n "$1" ] && [ $MONITOR_ONLY -ne 1 ] && [ -z "${HIDE_STAMP}" ]; then
        print $(date '+%Y-%m-%d %H:%M:%S')  "\t$1" | tee -a screen.out
      elif [ -n "${HIDE_STAMP}" ] && [ -n "$1" ]; then
        print "$1"
      elif [ -n "$1" ]; then
        print $(date '+%Y-%m-%d %H:%M:%S')  "\t$1"
      fi
    }
    #######################################################################
    # Remove PID either calling from cpu_trigger() or when kill -3 PID fail
    # and return remaining $pids.
    #######################################################################
    no_longer_running_pids=""
    remove_pid()
    {
      pid_to_remove=$1
      pids=$2
      # Remove PID from PIDs, replaces multiple spaces with single, remove trailing space
      new_pids=$(echo "$pids" | sed "s/\b$pid_to_remove\b//g" | sed 's/  / /g' | sed 's/ *$//')
      echo "$new_pids"
    }

    ###########################################
    # Loop until CPU usage threshold is reached
    # TODO: ps -p $pid -o pcpu does not print the current CPU usage. Workaround is being explored.
    ###########################################
    cpu_trigger()
    {
      if [ -n "${CPU_THRESHOLD}" ] && [ -n "${pids}" ]; then
        log "CHECK_CPU_INTERVAL = $CHECK_CPU_INTERVAL"
        log "Monitoring PIDs: $pids"
        log "Waiting for a process to exceed $CPU_THRESHOLD% CPU usage."
        count_pids=$(echo $pids | awk '{print NF}')
        non_exist_pids=""
        while :
        do
          for pid in $pids
          do
            pid_CPU=$(ps -p $pid -o pcpu | tail -1 | sed 's/^[[:blank:]]*//;s/[[:blank:]]*$//')
            if [ "$pid_CPU" = "%CPU" ]; then
              log "PID $pid inaccessible."
              # Remove PID from PIDs and add it to non_exist_pids
              temp_pids=$(echo $pids| sed s/"$pid"//)
              pids=$temp_pids
              temp_non_exist_pid="$non_exist_pids $pid"
              non_exist_pids="$temp_non_exist_pid"

              count_non_pids=$(echo $non_exist_pids | awk '{print NF}')
              if [ $count_non_pids -eq $count_pids ]; then
                log "PIDs $non_exist_pids inaccessible. Exiting the script."
                exit 1
              else
                log "Monitoring remaining PIDs: $pids"
              fi

              # As PID is inaccessible, assign 0 to pid_CPU rather having it as '%CPU'
              # to avoid error for the next if statement below.
              pid_CPU=0
            fi
            
            if [ $pid_CPU -ge $CPU_THRESHOLD ] ; then
              log "PID $pid exceeded the CPU usage threshold, currently at $pid_CPU%"
              break 2
            fi
            pid_CPU=0
          done
          sleep $CHECK_CPU_INTERVAL
        done
      fi
      if [ $MONITOR_ONLY -eq 1 ]; then
        exit 0
      fi
    }

    #############################################################################
    # If MONITOR_ONLY is true, no output data is created after this if statement.   
    #############################################################################
    if [ $MONITOR_ONLY -eq 1 ]; then
      log "aixperf version:  $SCRIPT_VERSION."
      # cpu_trigger # Not supported.
      # Future plan:
      # FileDescriptorTrigger?
      # SwapTrigger?
      exit 0
    fi

    #############################################
    # Let user know path to a temporary directory
    # and the inputs and beings are displayed.
    #############################################
    if [ $DISABLE_COLLECTING_HOSTNAME -eq 0 ]; then
      current_hostname="on $(hostname)"
    fi
    log "Output to $OUTPUT_DIR $current_hostname."
    log "aixperf version:  $SCRIPT_VERSION."
    if [ $ROOT_ACCESS_REQUIRED -eq 0 ]; then
      log "ROOT_ACCESS_REQUIRED = 0."
    fi
    for pid in $pids
    do
      log "Provided PID:  $pid"
    done
    log "SCRIPT_SPAN = $SCRIPT_SPAN"
    if [ -z "$disable_kill" ]; then
      log "JAVACORE_INTERVAL = $JAVACORE_INTERVAL"
    else
      log "Disable kill -3 : true"
    fi
    log "TPROF_SPAN = $TPROF_SPAN"
    log "VMSTAT_INTERVAL = $VMSTAT_INTERVAL"
    log "Timezone: $(date +%Z)"
    # Call cpu_trigger() to check if CPU_THRESHOLD is set.
    # If set, it will check CPU usage and collect relevant data.
    cpu_trigger

    # Collect the user currently executing the script
    date > whoami.out
    whoami >> whoami.out 2>&1
    log "Collection of user authority data complete."

    ################################################################################
    # Start collection of:
    #  * netstat x2
    #  * ps
    #  * uptime
    #  * vmstat
    #  * tprof
    #  * javacores
    #  * ifconfig -a if DISABLE_COLLECTING_IFCONFIG is 0. 
    ######################### 
    wparname=`uname -W`
    wparspec=""    
    if ((wparname != 0))
      then wparspec="-@ $wparname"
    fi

    # Collect the first netstat: date at the top, data, and then a blank line
    date >> netstat.out
    netstat -an >> netstat.out 2>&1
    print >> netstat.out
    log "First netstat snapshot complete."

    # Collect the ps: date at the top, data, and then a blank line
    log "Collecting a ps snapshot."
    date >> ps.out
    ps $wparspec avwwwg >> ps.out 2>&1
    print >> ps.out

    # Collect the uptime
    log "Collecting uptime."
    uptime >> uptime.out

    # Start the collection of tprof data.
    # It runs in the background so that other tasks can be completed while this runs.
    log "Starting collection of tprof data..."
    date >> tprof.out
    # Calculate TPROF_ARGS
    # First, check if environment is SMT capable.  If so, proceed to next check; if not, set args without -R option
    # Next, check if OS is 32 bit.  If so, set args without -R option (-R is not supported on 32 bit); if not, set args with -R option.
    if  /usr/sbin/smtctl >/dev/null 2>&1; then
      KERTYPE=`getconf KERNEL_BITMODE`
      if [ "$KERTYPE" = 32 ]; then
        log "TPROF_ARGS set to -skex; SMT capable but 32 bit environment."
        TPROF_ARGS="-skex"
      else
        log "TPROF_ARGS set to -Rskex; SMT capable and NOT a 32 bit environment."
        TPROF_ARGS="-Rskex"
      fi
    else
      log "TPROF_ARGS set to -skex; Not SMT capable."
      TPROF_ARGS="-skex"
    fi
    LDR_CNTRL=MAXDATA=0x80000000 tprof $wparspec $TPROF_ARGS sleep $TPROF_SPAN >> tprof.out 2>&1 &
    log "Collection of tprof data started."

    # Start the colletion of vmstat data.
    # It runs in the background so that other tasks can be completed while this runs.
    date >> vmstat.out
    vmstat $wparspec $VMSTAT_INTERVAL `expr $SCRIPT_SPAN / $VMSTAT_INTERVAL + 1` >> vmstat.out 2>&1 &
    log "Collection of vmstat data started."

    if [ $DISABLE_COLLECTING_IFCONFIG -eq 0 ]; then
      log "Collecting ifconfig -a"
      ifconfig -a >> ifconfig.out
      ifconfig_file="ifconfig.out"
    fi

    ###############################################################################
    # Start collection of javacores.
    # Loop the appropriate number of times, pausing for the given amount of time, 
    # and iterate through each supplied PID.
    # A kill -3 command will not be executed if disable_kill is true.
    ###############################################################################
    n=1
    m=`expr $SCRIPT_SPAN / $JAVACORE_INTERVAL`

    if [ -n "$pids" ] && [ -z "$disable_kill" ]; then
      log "Issuing a kill -3 command for the provided PID(s) to create a javacore or a thread dump."
    fi
    while [ $n -le $m ]
    do
      # Produce a javacore/thread dump
      if [ -z "$disable_kill" ];then
        for pid in $pids
        do
          kill_output=$(kill -3 $pid 2>&1)
          if [ $? -ne 0 ]; then
            log "PID $pid inaccessible. $kill_output"
            pids=$(remove_pid $pid "$pids")
            no_longer_running_pids="$no_longer_running_pids $pid"
            if [ -z "$pids" ]; then
              log "No PIDs remaining to process, finishing the script."
              # Kill -3 $PID is no longer needed due to above.
              # Support team guides for locating javacores/thread dumps if they were produced before PIDs are inaccessible/no longer.
              disable_kill=1
              break 3
            fi
          else
            log "Issued a kill -3 for PID $pid."
          fi
        done
      fi
      # Pause for JAVACORE_INTERVAL seconds.
      log "Continuing to collect data for $JAVACORE_INTERVAL seconds..."
      sleep $JAVACORE_INTERVAL
      # Increment counter
      n=`expr $n + 1`
    done

    # Produce the final javacore/thread dump.
    if [ -z "$disable_kill" ];then
      for pid in $pids
      do
        log $(kill -3 $pid 2>&1)
        log "Issued the final kill -3 for PID $pid."
      done
    fi

    # Collect a final netstat
    date >> netstat.out
    netstat -an >> netstat.out 2>&1
    log "Final netstat snapshot complete."

    #########################
    # Other data collection #
    ######################### 
    log "Collecting other data. This may take a few moments..."
    /usr/sbin/emgr -lv3 > emgr-lv3.out 2>&1
    oslevel -s > oslevel-s.out 2>&1
    lslpp -la > lslpp-la.out 2>&1
    instfix -i > instfix-i.out 2>&1
    prtconf > prtconf.out 2>&1
    errpt -a > errpt.out 2>&1
    lparstat -i > lparstat-i.out 2>&1
    lsattr -El sys0 > lsattr.out 2>&1
    df > df.out 2>&1

    print >> ps.out
    date >> ps.out
    ps $wparspec avwwwg >> ps.out 2>&1
    """

class HeapMonitor(RequiresServerManager):
  '''
  Monitors heap usage and triggers data collection when threshold is exceeded.
  This only runs when --heap-threshold=<value> is used.
  --monitor-only can be used if no data collection is needed.
  '''
  def __init__(self,config,output_manager,dump_collector):
    self.config = config
    self.output_manager = output_manager
    self.dump_collector = dump_collector
    self._check_server_manager_ready()

  # 20260109
  def get_JVM_current_heap_usage(self, jvm):
    freeMemory = was_helper.get_attribute_value(jvm,'freeMemory')# bytes
    if freeMemory is not None:
      freeMemory = long(freeMemory)
    heapSize = long(was_helper.get_attribute_value(jvm,'heapSize'))# bytes
    if heapSize is not None:
      heapSize = long(heapSize)
    if freeMemory and heapSize:
      return heapSize - freeMemory
    return None
    
  def wait_for_sustained_heap_threshold_breach(self):
    '''
    Monitor servers and wait for sustained heap usage above threshold.
    Triggers when a server's heap usage is consistently above the configured threshold at both initial measurement (point A) and current measurement (point B).
    This ensure detection of sustained high memory usage, not transitent spikes.

    Block until:
    - A server shows sustained heap break (sets status to TRIGGERED), or
    - All servers become unreachable

    Args: None (Uses self.config.heap_threshold and self.config.print_heap_usage)
    Returns: None (exits when trigger condition is met)
    '''
    self.output_manager.log('Waiting for heap usage to exceed %s.' % self.config.heap_threshold)
    need_to_break = 0
    heap_unit = self.config.heap_threshold[-2:].lower() # Gets the last 2 characters (mb or gb)
    heap_threshold_value = float(self.config.heap_threshold[:-2])

    tracking_servers_heap = {}
    while not need_to_break:
      unreachable_servers_count = 0
      for server in server_manager.servers:
        if server.get('status') == ServerManager.STATUS.UNREACHABLE:
          unreachable_servers_count +=1
          if unreachable_servers_count == len(server_manager.servers):
            need_to_break = 1
            break
          continue
        cell_name = server['cell']
        node_name = server['node']
        server_name = server['name']
        # NOTE: The dmgr does not have a perf bean to get the stats object (getStatsObject) so use this instead.
        jvm = was_helper.get_complete_object_name('type=JVM,cell=%s,node=%s,process=%s,*' % (cell_name,node_name,server_name))

        if jvm:
          used_memory = self.get_JVM_current_heap_usage(jvm)
          if heap_unit == 'mb':
            used_memory = round(float(used_memory)/1024/1024, 2)
          elif heap_unit == 'gb':
            used_memory = round(float(used_memory)/1024/1024/1024, 2)
          
          if self.config.print_heap_usage:
            formatted_server = server_manager.format_server(server)
            if not tracking_servers_heap.get(formatted_server):
              self.output_manager.log('%s\'s current heap usage is %s %s.' % (server_manager.format_server(server),used_memory,heap_unit))
            else:
              self.output_manager.log('%s\'s heap usage at interval start was %s %s, currently %s %s.' % 
    (formatted_server, tracking_servers_heap.get(formatted_server), heap_unit, used_memory, heap_unit))
              
          if not tracking_servers_heap.get(server_manager.format_server(server)):
            tracking_servers_heap[server_manager.format_server(server)] = used_memory
          elif tracking_servers_heap.get(server_manager.format_server(server)) >= heap_threshold_value and used_memory >= heap_threshold_value:
            self.output_manager.log('%s exceeded the heap threshold of %s at both the start and end of the %i-second interval, currently at %s%s' % (server_manager.format_server(server),self.config.heap_threshold,self.config.check_heap_interval,used_memory,heap_unit))
            server_manager.set_status(server, ServerManager.STATUS.TRIGGERED)
            need_to_break = 1
            break
          else:
            tracking_servers_heap[server_manager.format_server(server)] = used_memory
        else:
          self.output_manager.log('%s is unreachable.' % (server_manager.format_server(server)) )
          server_manager.set_status(server, ServerManager.STATUS.UNREACHABLE)
      if not need_to_break:
        time.sleep(self.config.check_heap_interval)

class WASTraceManager:
  def __init__(self,config,output_manager):
    self.config = config
    self.output_manager = output_manager
    
  ########################################################################
  # WebSphere Configuration Setting Functions
  ########################################################################
  def enable_trace(self,server):
    """
    Enable trace for the specified WebSphere server.
    Args:
      server: server dicionary with cell, node, name
    Returns:
      Empty string if successful, None if server appears unreachable
    """
    ts = was_helper.get_complete_object_name('type=TraceService,cell='+server['cell']+',node='+server['node']+',process='+server['name']+',*')
    if ts:
      if self.config.trace_file_size or self.config.trace_max_files:
        current_file = was_helper.get_attribute_value(ts, 'traceFileName')
        rollover_size = self.config.trace_file_size or re.search(r'rolloverSize="(\d+)"', was_helper.get_attributes(ts)).group(1) or '20'
        max_backups = self.config.trace_max_files or re.search(r'maxBackups="(\d+)"', was_helper.get_attributes(ts)).group(1) or '5'

        was_helper.invoke(ts, 'setTraceOutputToFile', [current_file, str(rollover_size), str(max_backups), 'basic'])
      return was_helper.set_attribute(ts, 'traceSpecification', self.config.runtime_trace+'=enabled')
    return None

  def disable_trace(self,server):
    """
    Disable trace for the specified WebSphere server.
    Args:
      server: server dicionary with cell, node, name
    Returns:
      Empty string if successful, None if server appears unreachable
    """
    ts = was_helper.get_complete_object_name('type=TraceService,cell='+server['cell']+',node='+server['node']+',process='+server['name']+',*')
    if ts:
      return was_helper.set_attribute(ts, 'traceSpecification', self.config.runtime_trace+'=disabled') 
    return None

  ########################################################################
  # WebSphere configuration retrieval functions
  ########################################################################
  def get_current_trace_value(self,server):
    """
    Get the current trace specification for a WebSphere server.
    Args:
      server: Server dictionary containing cell, node, and name
    Returns:
      String containing current trace specification, or None if server unreachable.
    NOTE:
      Not currently used in main script. For testing only.
    """
    ts = was_helper.get_complete_object_name('type=TraceService,cell='+server['cell']+',node='+server['node']+',process='+server['name']+',*')
    if ts:
      return was_helper.get_attribute_value(ts,'traceSpecification')
    else:
      return None


class ServerManager:
  '''
  Manges WebSphere server validation and tracking.
  Verifies servers are running and maintains their state throughout execution.
  '''
  class STATUS:
    TRIGGERED='triggered'
    UNREACHABLE='unreachable'
    TERMINATED='terminated'

  def __init__(self):
    self.output_manager = None
    self.servers = []

  def set_output_manager(self,output_manager):
    self.output_manager = output_manager

  def parse_and_process_servers(self,server_args):
    """
    Parses server specificaions in formats:
    - server_name
    - node_name:server_name1 server_name2
    - cell_name:node_name:server_name1 server_name2
    And then verify that all provided servers are running
    """
    if self.output_manager is None:
      raise RuntimeError("OutputManager must be set before calling parse_and_process_servers.")
  
    ##########################
    # 20260106
    provided_pids = []
    remaining_args = []
    for s in server_args:
      if s.isdigit():
        provided_pids.append(s)
      else:
        remaining_args.append(s)
    server_args = remaining_args

    if provided_pids:
      s_char = ''
      if len(provided_pids) > 1:
        s_char = 's'
      pid_list = ' '.join(map(str,provided_pids))
      self.output_manager.log('Searching for server%s with PID%s: %s' % (s_char,s_char, pid_list))
      servers = was_helper.query_mbean('WebSphere:type=Server,*').split()
      for s in servers:
        obj_name = AdminControl.makeObjectName(s)
        # try:
        #   pid = AdminControl.getAttribute(s, 'pid')
        # except java.lang.Exception, e:
        #   self.output_manager.log('Error: Failed to get attribute "pid" for %s: %s' % (s, e))
        #   self.output_manager.log('Exiting the script.')
        #   exit_script()
        #   return
        # 20260121 Added here to replace above.
        pid = was_helper.get_attribute_value(s,'pid')
        # Add the server to the servers array if PID is matched.
        if pid in provided_pids:
          cell,node,name = obj_name.getKeyProperty('cell'),obj_name.getKeyProperty('node'),obj_name.getKeyProperty('name')
          self.servers.append({'cell':cell,'node':node,'name':name,'pid':pid})
          provided_pids.remove(pid)
      
      if self.servers:
        ss_char = ''
        if len(self.servers) > 1:
          ss_char = 's'
        self.output_manager.log('Found the matching server%s:' % (ss_char))
        for s in self.servers:
          self.output_manager.log('- %s' % (server_manager.format_server(s)))

    # Exit if no servers match the provided PID(s)
    if provided_pids:
      plural = ''
      if len(provided_pids) > 1:
        plural = 's'
      pid_list = ' '.join(map(str,provided_pids))
      self.output_manager.log('No matching server%s found for PID%s: %s' % (plural,plural, pid_list))
      self.output_manager.log('Exiting the script')
      
      server_args = []
      self.servers = []
      exit_script()
    # END 20260106-08
    ##########################

    if server_args: # Added 20260109, no need to run below code if no server names are included.
      plural = ''
      if len(provided_pids) > 1:
        plural = 's'
      self.output_manager.log('Verifying provided server name%s:' % plural)
      server_identifiers = []
      for item in server_args:
        self.output_manager.log('- %s ' % item)
        # Checking for ':' in the given cell_name:node_name:server_name. 
        # E.g.: 'node01:server1 server2 server3' 'server4'
        if ':' in item:
          if len(item.split(':')) == 2:
            node_name, server_names = item.split(':')
            for server_name in server_names.split():
              server_identifiers.append('node=' + node_name + ',process='+server_name)
          elif len(item.split(':')) == 3:
            cell_name, node_name, server_names = item.split(':')
            for server_name in server_names.split():
              server_identifiers.append('cell='+cell_name+',node=' + node_name + ',process='+server_name)
          else:
            self.output_manager.log('Unrecognized %s. Exiting the script.' % item)
            exit_script()
        else:
          server_identifiers.append('process='+str(item))
      
      # Exit the script for the following reasons:
      # - Server is not running.
      # - There are nodes with the same server name. Provide a speficied node name.
      # Duplicate one will not be added to servers list to prevent from creating duplicate data. 
      for server_identifier in server_identifiers:
        found_duplicate = 0 
        result = was_helper.query_mbean('WebSphere:type=Server,%s,*' % server_identifier )
        if not result:
          self.output_manager.log('Unable to get MBean for %s. Either it is not running or not found.' % server_identifier)
          server_identifiers = []
          self.servers[:] = []
          return None
    
        output_lines = result.split('\n')
        if len(output_lines) > 1:
          self.output_manager.log('Multiple nodes found with the same server name:' )
          for line in output_lines:
            self.output_manager.log(line)
          output_lines = None
          print('Please specify the node/server name. Exiting the script.')
          print(usage())
          exit_script()
        elif not output_lines[0]:
          output_lines = None
          self.output_manager.log('%s is not running (or not found). Exiting the script.' % server_identifier)
          exit_script()
        
        if output_lines:
          for server in self.servers:
            output_object_item = AdminControl.makeObjectName(output_lines[0])
            cell_name, node_name, server_name = output_object_item.getKeyProperty('cell'),output_object_item.getKeyProperty('node'),output_object_item.getKeyProperty('name')

            if (cell_name == server['cell']) and \
            (node_name == server['node']) and \
            (server_name == server['name']):
              found_duplicate = 1

          if not found_duplicate and output_lines:
            obj_name = AdminControl.makeObjectName(output_lines[0])
            cell, node, server = obj_name.getKeyProperty('cell'),obj_name.getKeyProperty('node'),obj_name.getKeyProperty('name')

            pid = was_helper.get_attribute_value(output_lines[0], 'pid')
            if not pid:
              exit_script()
            self.servers.append({'cell':cell,'node':node,'name':server,'pid':pid})
        else:
          self.servers[:] = []


  def remove_servers_except_matched(self,matched_server):
    '''Remove all servers from the list except the matched one.'''
    self.servers[:] = [s for s in self.servers if s is matched_server ]

  def find_server_by_pid(self,pid):
    '''Find and return the server dictionary that matches the given PID.'''
    for server in self.servers:
      if server.get('pid') == pid:
        return server
    return None

  def format_server(self,server):
    '''Return formatted server string.'''
    return '%s:%s:%s:%s' % (server['cell'],server['node'],server['name'],server['pid'])

  def set_status(self,server,event):
    if not (event == self.STATUS.TRIGGERED or \
      event == self.STATUS.UNREACHABLE or \
      event == self.STATUS.TERMINATED):
      self.output_manager.log("Error: Unrecognized '%s' event passed to ServerManager.set_status()" % event)
      return None
    for s in self.servers:
      if self.format_server(s) == self.format_server(server):
        s['status'] = event
        break
server_manager = ServerManager()


########################
# Sanity provided inputs
########################
def parse_sys_argv():
  """Parse sys.argv, splitting arguments on whitespace and filtering empty values."""
  args = []
  for arg in sys.argv:
    arg = arg.strip()
    if arg.find(' ') != -1:
      parts = arg.split(' ')
      for part in parts:
        if part:
          args.append(part)
    else:
      if arg:
        args.append(arg)
  return args

ACCEPTED_OPTIONS = (
  '--collect-os',
  '--check-cpu-interval=',
  '--cpu-threshold=',
  '--print-cpu-usage',
  '--output-dir=',
  '--iterations=',
  '--delay=',
  '--runtime-trace=',
  '--trace-file-size=',
  '--trace-max-files=',
  '--disable-exit',
  '--diagnostic-plan=',
  '--print-diagnostic-plan-status',
  '--match-trace',
  '--disable-collecting-hostname',
  '--heap-threshold=',
  '--dump-on-trigger=',
  '--check-heap-interval',
  '--print-heap-usage',
  '--monitor-only',
  '--collect-servers',
  '--collect-servers=')

def parse_and_validate_args(args):
  # Validing provided arguments. Otherwise, stops the script if unrecognized argument is found.
  config_args = []
  server_args = []
  for arg in args:
    valid_arg = 0
    if arg.startswith('--'):
      for prefix in ACCEPTED_OPTIONS:
        if arg.lower().startswith(prefix):
          valid_arg = 1
          config_args.append(arg)
          break
      if not valid_arg:
        print('')
        print('Unrecognized %s.' % arg)
        print('Accepted the following arguments: %s' % ', '.join(ACCEPTED_OPTIONS))
        print('Exiting the script.')
        sys.argv = []
        config_args[:] = []
        server_args[:] = []
        exit_script()
    elif arg.startswith('-'):
      print('')
      print('An option with a single dash is not accepted for this script.')
      sys.argv = []
      config_args[:] = []
      server_args[:] = []
      exit_script()
    else:
      # Assuming it is a server name
      server_args.append(arg)
    
  if len(server_args) > 0 and server_args[0]:
    for node_server in server_args:
      if ':' in node_server:
        parts = node_server.split(':')
        if len(parts) > 1:
          if not parts[0]:
            print('No node name provided in "%s". Exiting the script.' % node_server)
            print(usage())
            exit_script()
          elif not parts[1]:
            print('No server name provided in "%s". Exiting the script.' % node_server)
            print(usage())
            exit_script()
  else:
    print('Node and server names were not provided. Exiting the script.')
    print(usage())
    exit_script()
  
  return config_args, server_args

#########################################
# Parse command line arguments from user.
#########################################

def process_args_values(config, config_args):
  for arg in config_args:
    if arg.startswith('--'):
      for option in ACCEPTED_OPTIONS:
        parts = arg.split('=',1)
        parts[0] == parts[0].lower()
        if re.search(option, arg.lower()) and len(parts) == 2:
          if parts[0] == '--output-dir':
            config.output_dir = parts[1]
            config.script_output_dir = OUTPUT_DIR + '/' + SCRIPT_OUTPUT_NAME #+ '/'
          elif parts[0] == '--iterations':
            config.iterations = int(parts[1])
          elif parts[0] == '--delay':
            config.delay = int(parts[1])
          # Monitoring CPU usage
          elif parts[0] == '--cpu-threshold' and len(parts[1]) == 0:
            print('Mising value after %s. Exiting the script.' % option)
            exit_script()
          elif parts[0] == '--cpu-threshold':
            config.cpu_threshold = int(parts[1])
            config.collecting_os_data = 1
          elif parts[0] == '--check-cpu-interval' and len(parts[1]) == 0:
            print('Mising value after %s. Exiting the script.' % option)
            exit_script()
          elif parts[0] == '--check-cpu-interval':
            config.check_cpu_interval = int(parts[1])
          # Configuring runtime trace
          elif parts[0] == '--runtime-trace' and len(parts[1]) > 0:
            config.runtime_trace = parts[1]
          elif parts[0] == '--runtime-trace' and len(parts[1]) == 0:
            print('Mising value after %s. Exiting the script.' % option)
            exit_script()
          elif parts[0] == '--trace-max-files' and len(parts[1]) > 0:
            config.trace_max_files = parts[1]
          elif parts[0] == '--trace-max-files' and len(parts[1]) == 0:
            print('Mising value after %s. Exiting the script.' % option)
            exit_script()
          elif parts[0] == '--trace-file-size' and len(parts[1]) > 0:
            config.trace_file_size = parts[1]
          elif parts[0] == '--trace-file-size' and len(parts[1]) == 0:
            print('Mising value after %s. Exiting the script.' % option)
            exit_script()
          # Match text printed in logs to trigger data collection. 
          elif parts[0] == '--match-trace' and len(parts[1]) > 0:
            config.match_trace = parts[1]
          elif parts[0] == '--match-trace' and len(parts[1]) == 0:
            print('Mising value after %s. Exiting the script.' % option)
            exit_script()
          # Diagnostic Plan
          elif parts[0] == '--diagnostic-plan' and len(parts[1]) > 0:
            config.diagnostic_plan_parameters = parts[1]
            if re.search('JAVACORE',config.diagnostic_plan_parameters):
              config.contains_javacore = 1
          elif parts[0] == '--diagnostic-plan' and len(parts[1]) == 0:
            print('Missing value after %s. Exiting the script.' % option)
            exit_script()
          # Monitor heap usage to trigger collection.
          elif parts[0] == '--heap-threshold' and len(parts[1]) > 0:
            dump_parts = parts[1].lower().strip().split(',')
            config.heap_threshold = dump_parts[0]
            if not config.heap_threshold.find('mb') > 0 and not config.heap_threshold.find('gb') > 0:
              print('Usage: --heap-threshold=<value> with a text of mb or gb')
              print('Example: --heap-threshold=1024mb or --heap-threshold=1gb')
              config.heap_threshold = ''
              exit_script()
            # Handle dump type if provided
            if len(dump_parts) > 1:
              dump_type = dump_parts[1].strip()
              if dump_type not in ['heapdump','core']: 
                print('Unrecognized dump type: %s in the --heap-threshold option' % dump_type)
                print('Valid dump types are: heapdump, core')
                dump_type = None
                exit_script()
              config.trigger_dump_type = dump_type
          elif parts[0] == '--heap-threshold' and len(parts[1]) == 0:
            print('Missing value after %s. Exiting the script.' % option)
            exit_script()
          elif parts[0] == '--dump-on-trigger' and len(parts[1]) > 0:
            dump_type = parts[1]
            if dump_type not in ['heapdump','core']:
              print('Usage: --dump-on-trigger=<heapdump|core>')
              print('Example: --dump-on-trigger=heapdump')
              dump_type = None
              exit_script()
            config.trigger_dump_type = dump_type
          elif parts[0] == '--heap-threshold' and len(parts[1]) == 0:
            print('Missing value after %s. Exiting the script.' % option)
            exit_script()
          elif parts[0] == '--check-heap-interval' and len(parts[1]) > 0:
            config.check_heap_interval = int(parts[1])
          elif parts[0] == '--check-heap-interval' and len(parts[1]) == 0:
            print('Missing value after %s. Exiting the script.' % option)
            exit_script()
          elif parts[0] == '--collects-servers' and len(parts[1]) > 0:
            config.collect_servers = parts[1]
          else:
            print('Invalid argument: "%s" does not match any accepted option.' % arg)
            exit_script()
        elif re.search(option, arg.lower()):
          if parts[0] == '--collect-os':
            config.collecting_os_data = 1
          elif parts[0] == '--disable-exit':
            config.disable_exit=1
          elif parts[0] == '--print-diagnostic-plan-status':
            config.print_diagnostic_plan_status = 1
          elif parts[0] == '--disable-collecting-hostname':
            config.disable_collecting_hostname = 1
          elif parts[0] == '--print-heap-usage':
            config.print_heap_usage = 1
          elif parts[0] == '--print-cpu-usage':
            print 'print-cpu-usage'
            config.print_cpu_usage = 1
          elif parts[0] == '--monitor-only':
            config.monitor_only = 1
          elif parts[0] == '--collect-servers':
            config.collect_servers = 'all'
          else:
            print('Invalid argument: "%s" does not match any accepted option.' % arg)
            exit_script()

def main():
  args = parse_sys_argv()
  config_args, server_args = parse_and_validate_args(args)

  if len(server_args) == 0:
    print('Error: server_args is empty.')
    return

  config = Config()
  process_args_values(config, config_args)
  config.validate()

  output_manager = OutputManager(config)
  output_manager.create_output_dir()

  was_helper.set_output_manager(output_manager)
  server_manager.set_output_manager(output_manager)
  
  was_collector = WASCollector(config,output_manager)
  dump_collector = DumpCollector(config, output_manager)
  diag_plan_mgr = DiagnosticPlanManager(config,output_manager)
  heap_monitor = HeapMonitor(config,output_manager,dump_collector)
  os_collector = OSCollector(config,output_manager)
  trace_manager = WASTraceManager(config,output_manager)

  hostname = ''
  if not config.disable_collecting_hostname:
    hostname = ' on ' + str(java.net.InetAddress.getLocalHost().getHostName())
  output_manager.log('%s created%s.' % (config.script_output_dir,hostname) )
  output_manager.log('wasperf version: %s' % SCRIPT_VERSION)

  if not len(config.diagnostic_plan_parameters) > 0:
    output_manager.log('Iterations: %s' % config.iterations)
    output_manager.log('Delay: %s' % config.delay)
    if config.runtime_trace:
      output_manager.log('Runtime trace: %s' % config.runtime_trace)
      output_manager.log('Trace file size: %s' % config.trace_file_size)
      output_manager.log('Trace max size: %s' % config.trace_max_files)
    if config.match_trace:
      output_manager.log('Match trace: %s' % config.match_trace)
  else:
    output_manager.log('Diagnostic plan actions: %s' % config.diagnostic_plan_parameters)
  if config.collecting_os_data:
    output_manager.log('Collecting OS data: true')
  if config.cpu_threshold:
    output_manager.log('CPU threshold: %s' % config.cpu_threshold)
    output_manager.log('Check CPU interval: %s' % config.check_cpu_interval)
  if config.heap_threshold:
    output_manager.log('Heap threshold: %s' % config.heap_threshold)
    output_manager.log('Check heap interval: %i seconds' % config.check_heap_interval)
  if config.trigger_dump_type:
    output_manager.log('Dump type when server is triggered: %s' % config.trigger_dump_type)

  server_manager.parse_and_process_servers(server_args)


  if server_manager.servers:
    plural = ''
    if len(server_manager.servers) > 1:
      plural = 's'
    if config.runtime_trace:
      servers_with_trace_enabled = []
      output_manager.log('Enabling trace on the following server%s:' % plural)
      for s in server_manager.servers:
        result = trace_manager.enable_trace(s)
        if result == "":
          output_manager.log('- %s' % (server_manager.format_server(s)))
          servers_with_trace_enabled.append(s)
        else:
          output_manager.log('- %s appears to be unreachable.' % (server_manager.format_server(s)))
          server_manager.set_status(s,ServerManager.STATUS.UNREACHABLE)

    # CPU threshold
    if config.cpu_threshold:
      if config.current_os == 'Windows':
        os_collector.windows_CPU_trigger()
      elif config.current_os == 'Linux' or config.current_os == 'AIX':
        os_collector.collect_unix_data()

    # Heap threshold
    if config.heap_threshold:
      heap_monitor.wait_for_sustained_heap_threshold_breach()

    if config.match_trace:
      diag_plan_mgr.send_diagnostic_plan()
      diag_plan_mgr.check_diagnostic_plan_status()

    # Data collection without the diagnostic plan option.
    if not config.diagnostic_plan_parameters and not config.monitor_only:
      if config.trigger_dump_type and (config.match_trace or config.cpu_threshold or config.heap_threshold):
        for server in server_manager.servers:
          if not server.get("status") == ServerManager.STATUS.TRIGGERED and not config.collect_servers:
            continue
          if config.trigger_dump_type == 'heapdump':
            dump_collector.invoke_dump(DumpCollector.HEAP, server)
          elif config.trigger_dump_type == 'core':
            dump_collector.invoke_dump(DumpCollector.SYSTEM, server)
          elif config.trigger_dump_type != None:
            output_manager.log('Unrecognized dump type: %s' % config.trigger_dump_type)
        config.cpu_threshold = None
        config.heap_threshold = None
        config.match_trace = ''

      if not config.collect_servers and len(server_manager.servers) > 1:
        for s in server_manager.servers[:]:
          if s.get('status') == ServerManager.STATUS.TRIGGERED:
            server_manager.remove_servers_except_matched(s)
            break

      # Produce javacores
      if server_manager.servers:
        generating_javacores_threads = []
        for server in server_manager.servers:
          thread = threading.Thread(target=dump_collector.threading_generate_javacore,args=(server,))
          generating_javacores_threads.append(thread)
        for thread in generating_javacores_threads:
          thread.start()

        # OS collection
        if config.collecting_os_data and config.current_os == 'Windows':
          threading_os_collection = threading.Thread(target=os_collector.collect_windows_data)
          threading_os_collection.start()
          threading_os_collection.join()
        elif config.collecting_os_data and (config.current_os == 'Linux' or  config.current_os == 'AIX'):
          threading_os_collection = threading.Thread(target=os_collector.collect_unix_data)
          threading_os_collection.start()
          threading_os_collection.join()
        # Wait until all threads are done. 
        for thread in generating_javacores_threads:
          thread.join()

    # The diagnostic plan option is used. 
    elif config.diagnostic_plan_parameters:
      diag_plan_mgr.send_diagnostic_plan()
      diag_plan_mgr.check_diagnostic_plan_status()

    if config.runtime_trace:
      output_manager.log('Disabling trace on the following server%s:' % plural)
      for s in servers_with_trace_enabled:
        result = trace_manager.disable_trace(s)
        if result == "":
          output_manager.log('- %s' % (server_manager.format_server(s)))
        else:
          output_manager.log('- %s appears to be unreachable.' % (server_manager.format_server(s)))


    if not config.monitor_only:
      for server in server_manager.servers:
        output_manager.log('Copying files for %s:' % server_manager.format_server(server))
        was_collector.gather_server_xml_file(server)
        was_collector.gather_logs(server)
        if not config.diagnostic_plan_parameters:
          dump_collector.gather_dumps('javacore',server)
          if config.trigger_dump_type:
            dump_collector.gather_dumps(config.trigger_dump_type,server)

        # Gather dumps produced by Diagnostic Plan 
        elif config.diagnostic_plan_parameters and (diag_plan_mgr.contains_javacore or diag_plan_mgr.contains_heapdump or diag_plan_mgr.contains_systemcore):
          if diag_plan_mgr.contains_javacore:
            dump_collector.gather_dumps('javacore',server)
          if diag_plan_mgr.contains_heapdump:
            dump_collector.gather_dumps('heapdump',server)
          if diag_plan_mgr.contains_systemcore:
            dump_collector.gather_dumps('core',server)

        output_manager.log('Copy completed for %s.' % server_manager.format_server(server))
      
      output_manager.zip_and_remove_output_dir()
      output_manager.print_end_message()

    elif config.monitor_only:
      output_manager.log("The script has completed its work with no data collection.")
  else:
    output_manager.log('Error: server_manager.servers is empty.')

if __name__ == "__main__":
    main()