#!/usr/bin/env bash

# This program is part of Percona Toolkit: http://www.percona.com/software/
# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
# notices and disclaimers.

usage() {
   if [ "${OPT_ERR}" ]; then
      echo "Error: $OPT_ERR" >&2
   fi
   echo "Usage: pt-sift FILE|PREFIX|DIRECTORY" >&2
   echo "For more information, 'man pt-sift' or 'perldoc $0'." >&2
   exit 1
}

# ###########################################################################
# tmpdir package
# This package is a copy without comments from the original.  The original
# with comments and its test file can be found in the Bazaar repository at,
#   lib/bash/tmpdir.sh
#   t/lib/bash/tmpdir.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################


set -u

PT_TMPDIR=""

mk_tmpdir() {
   local dir="${1:-""}"

   if [ -n "$dir" ]; then
      if [ ! -d "$dir" ]; then
         mkdir "$dir" || die "Cannot make tmpdir $dir"
      fi
      PT_TMPDIR="$dir"
   else
      local tool="${0##*/}"
      local pid="$$"
      PT_TMPDIR=`mktemp -d -t "${tool}.${pid}.XXXXXX"` \
         || die "Cannot make secure tmpdir"
   fi
}

rm_tmpdir() {
   if [ -n "$PT_TMPDIR" ] && [ -d "$PT_TMPDIR" ]; then
      rm -rf "$PT_TMPDIR"
   fi
   PT_TMPDIR=""
}

# ###########################################################################
# End tmpdir package
# ###########################################################################

# ###########################################################################
# Global variables
# ###########################################################################

TOOL="pt-sift"
if [ -d "/var/lib/pt-stalk" ]; then
   BASEDIR="/var/lib/pt-stalk"
else
   BASEDIR="$PWD"
fi
PREFIX=""

# ###########################################################################
# Subroutines
# ###########################################################################

sigtrap() {
   echo "Caught signal, exiting" >&2
   rm_tmpdir
   exit 0
}

# Show current help and settings
print_help() {
   cat <<-HELP
   You can control this program with key presses.
                  ---  COMMANDS  ---
      1  Default action: summarize files
      0  Minimal action: list files
      *  View all the files in less
      d  Invoke 'diskstats' on the disk performance data
      i  View the first INNODB STATUS sample in 'less'
      m  Invoke 'pt-mext' to show the SHOW STATUS counters side by side
      n  Summarize the 'netstat -antp' status data
                  --- NAVIGATION ---
      j  Select the next timestamp
      k  Select the previous timestamp
      q  Quit the program
	HELP
}

# ###########################################################################
# Main program loop, called below if tool is ran from the command line.
# ###########################################################################

main() {
   trap sigtrap SIGHUP SIGINT SIGTERM

   # If there's a command-line arg, figure out if it's a file, directory, or
   # prefix.  The outcome of this block of code should be that BASEDIR is the
   # directory where the files live, without a trailing slash; and PREFIX is
   # either empty or a timestamp, such as "2011_02_08_16_58_07".
   if [ $# -gt 1 ]; then
      OPT_ERR="Specify only one PREFIX or DIR"
      usage
   fi

   if [ $# -eq 1 ]; then
      if [ -d "$1" ]; then
         BASEDIR="$1"
         PREFIX=""
      elif [ -f "$1" -o -f "$1-df" -o -f "$1df" ]; then
         BASEDIR="$(dirname "$1")"
         PREFIX="$(echo "$1" | perl -ne '$_ =~ m/([\d_]+)/; print $1;')"
      else
         echo "Error: $1 is not a directory, and there are no pt-stalk files in the curent working directory ($BASEDIR) with a $1 prefix." >&2
         echo "For more information, 'man pt-sift' or 'perldoc $0'." >&2
         exit 1
      fi
   fi

   # If the programs we need don't exist, try to get them.
   # Percona Toolkit tools:
   for prog in pt-diskstats pt-pmp pt-mext pt-align; do
      # A var can't be named "PR_pt-pmp" so we chop of "pt-" to get
      # the program's basename, resulting in "PR_pmp".
      prog_base=${prog#"pt-"}
      if which "$prog" >/dev/null 2>&1 ; then
         eval "PR_$prog_base"="$(which "$prog")"
      elif [ -f "$prog" -a -x "$prog" ]; then
         eval "PR_$prog_base"="./$prog"
      elif [ -f "${BASEDIR}/$prog" -a -x "${BASEDIR}/$prog" ]; then
         eval "PR_$prog_base"="${BASEDIR}/$prog"
      elif which "curl" >/dev/null 2>&1; then
         echo "Fetching $prog" >&2
         curl "http://www.percona.com/get/$prog" > "$prog" && chmod +x "$prog"
         eval "PR_$prog_base"="./$prog"
      else
         echo "Cannot find or fetch required program: $prog" >&2
         exit 1
      fi 
   done

   # Make a secure tmpdir.
   mk_tmpdir

   # We need to generate a list of timestamps, and ask the user to choose one if
   # there is no PREFIX yet.  NOTE: we rely on the "-df" files here.
   (
      cd "$BASEDIR"
      ls *-df 2>/dev/null | cut -d- -f1 | sort > "$PT_TMPDIR/pt-sift.prefixes" 
   )
   if [ ! -s "$PT_TMPDIR/pt-sift.prefixes" ]; then
      echo "Error: There are no pt-stalk files in $BASEDIR" >&2
      echo "For more information, 'man pt-sift' or 'perldoc $0'." >&2
      exit 1
   fi
   if [ -z "${PREFIX}" ]; then
      if [ "$(grep -c . $PT_TMPDIR/pt-sift.prefixes)" = "1" ]; then
         # If there is only one sample, we use it as the prefix.
         PREFIX="$(cat $PT_TMPDIR/pt-sift.prefixes)"
      fi
   fi
   if [ -z "${PREFIX}" ]; then
      echo
      i=0
      cat $PT_TMPDIR/pt-sift.prefixes | while read line; do
         i=$(($i + 1))
         echo -n "  $line"
         if [ $i -eq 3 ]; then
            echo
            i=0
         fi
      done
      # We might have ended mid-line or we might have printed a newline; print a
      # newline if required to end the list of timestamp prefixes.
      awk 'BEGIN { i = 0 } { i++ } END { if ( i % 3 != 0 ) { print "" } }' $PT_TMPDIR/pt-sift.prefixes
      echo
      while [ -z "${PREFIX}" -o "$(grep -c "${PREFIX}" $PT_TMPDIR/pt-sift.prefixes)" -ne 1 ]; do
         DEFAULT="$(tail -1 $PT_TMPDIR/pt-sift.prefixes)"
         read -e -p "Select a timestamp from the list [${DEFAULT}] " ARG
         ARG="${ARG:-${DEFAULT}}"
         if [ "$(grep -c "${ARG}" $PT_TMPDIR/pt-sift.prefixes)" -eq 1 ]; then
            PREFIX="$(grep "${ARG}" $PT_TMPDIR/pt-sift.prefixes)"
         fi
      done
   fi

   KEY=""
   ACTION="DEFAULT"
   while [ "${KEY}" != "q" ]; do

      if [ "${ACTION}" != "INVALID" ]; then
         # Print the current host, timestamp and action.  Figure out if we're at
         # the first or last sample, to make it easy to navigate.
         PAGE="$(awk "/./{i++} /${PREFIX}/{c=i} END{print c, \"of\", i}" $PT_TMPDIR/pt-sift.prefixes)"
         HOST="$(cat "${BASEDIR}/${PREFIX}-hostname" 2>/dev/null)"
         echo -e "======== ${HOST:-unknown} at \033[34m${PREFIX} \033[31m${ACTION}\033[0m (${PAGE}) ========"
      fi

      # Take an action based on the current $ACTION
      case "${ACTION}" in

         # Format a brief report: busiest device's disk stats, CPU stats
         DEFAULT)
            echo "--diskstats--"
            if [ -f "${BASEDIR}/${PREFIX}-diskstats" ]; then

               $PR_diskstats --group-by disk "${BASEDIR}/${PREFIX}-diskstats" \
                  | awk '
                     /ts/ { header = $0 }
                     /[0-9]/ {
                        io  = $3 + $9;
                        if ( io >= mio ) {
                           mio   = io;
                           mseen = $0;
                        }
                     }
                     END {
                        print header;
                        print mseen;
                     }'

               # Find out which device was the busiest.
               mdev="$($PR_diskstats --group-by disk "${BASEDIR}/${PREFIX}-diskstats" \
                  | awk '
                     /[0-9]/ {
                        io  = $3 + $9;
                        if ( io >= mio ) {
                           mio   = io;
                           mdev  = $2;
                        }
                     }
                     END {
                        print mdev;
                     }')"

               # Print the busy% for that device, rounded to the nearest N%, with
               # "." as a marker for a repeated value.
               $PR_diskstats --group-by sample "${BASEDIR}/${PREFIX}-diskstats" \
                  | awk "
                     BEGIN {
                        fuzz = 5;
                        printf \" ${mdev} \"
                     }
                     \$1 = \"${mdev}\" {
                        busy_rounded = fuzz * sprintf(\"%d\", substr(\$15, 1, length(\$15) - 1) / fuzz);
                        if ( printed == 1 && prev == busy_rounded ) {
                           printf \" .\";
                        }
                        else {
                           printf \" %d%%\", busy_rounded;
                           prev    = busy_rounded;
                           printed = 1;
                        }
                     }"
               echo
            else
               echo "    No diskstats file exists"
            fi

            echo "--vmstat--"
            if [ -f "${BASEDIR}/${PREFIX}-vmstat" ]; then
               tail -n 3 "${BASEDIR}/${PREFIX}-vmstat-overall" | $PR_align

               # Figure out which column is 'wa' and print this, similar to the
               # busy% for disks above.
               wa_col="$(awk '/swpd/{for(i=1;i<=NF;++i){if($i=="wa"){print i; exit}}}' "${BASEDIR}/${PREFIX}-vmstat")"
               awk "
                  BEGIN {
                     fuzz = 5;
                     printf \"wa\"
                  }
                  /[0-9]/ {
                     wa_rounded = fuzz * sprintf(\"%d\", \$${wa_col} / fuzz);
                     if ( printed == 1 && prev == wa_rounded ) {
                        printf \" .\";
                     }
                     else {
                        printf \" %d%%\", wa_rounded;
                        prev    = wa_rounded;
                        printed = 1;
                     }
                  }" "${BASEDIR}/${PREFIX}-vmstat"
               echo
            else
               echo "    No vmstat file exists"
            fi

            echo "--innodb--"
            awk '
               /queries inside/ {
                  inside = $0;
               }
               /Main thread/ {
                  main_state = substr($0, index($0, ":") + 2);
               }
               /Pending normal/ {
                  pending_reads += substr($5, 1, length($5) - 1);
                  pending_reads += substr($NF, 1, length($NF) - 1);
               }
               /ibuf aio reads/ {
                  pending_reads += substr($4, 1, length($4) - 1);
                  pending_reads += substr($7, 1, length($7) - 1);
                  pending_reads += $NF;
               }
               /Pending flushes/ {
                  pending_flushes = substr($5, 1, length($5) - 1) + $NF;
               }
               /pending preads/ {
                  pending_reads += $1;
                  pending_writes += $4;
               }
               /pending log writes/ {
                  pending_writes += $1 + $5;
               }
               /Pending reads/ {
                  pending_reads += $NF;
               }
               /Pending writes/ {
                  pending_writes += substr($4, 1, length($4) - 1);
                  pending_writes += substr($7, 1, length($7) - 1);
                  pending_writes += $NF;
               }
               /Log sequence number/ {
                  if ( $NF == 5 ) {
                     lsn = ($4 * (2^32)) + $5;
                  }
                  else {
                     lsn = $4;
                  }
               }
               /Last checkpoint at/ {
                  if ( $NF == 5 ) {
                     chkp = ($4 * (2^32)) + $5;
                  }
                  else {
                     chkp = $4;
                  }
               }
               /END OF INNODB/ {
                  complete = 1;
               }
               /^TRANSACTIONS$/ {
                  tseen = 1;
               }
               /^---TRANSACTION/ {
                  if ( tseen == 1 ) {
                     if ( $2 ~ /,/ ) {
                        status = $3;
                        time   = $4;
                     }
                     else {
                        status = $4;
                        time   = $5;
                     }
                     txns[status]++;
                     if ( time > txntime[status] ) {
                        txntime[status] = time;
                     }
                  }
               }
               /LOCK WAIT/ {
                  if ( tseen == 1 ) {
                     txns["LOCK WAIT"]++;
                     if ( $3 > txntime["LOCK WAIT"] ) {
                        txntime["LOCK WAIT"] = $3;
                     }
                  }
               }
               END {
                  if ( complete != 1 ) {
                     print "    (innodb status is incomplete)";
                  }
                  printf "    txns:";
                  for ( i in txns ) {
                     printf " %dx%s (%ds)", txns[i], i, txntime[i];
                  }
                  print "";
                  if ( inside ) {
                     print "    " inside;
                  }
                  printf "    Main thread: %s, pending reads %d, writes %d, flush %d\n", main_state, pending_reads, pending_writes, pending_flushes;
                  printf "    Log: lsn = %d, chkp = %d, chkp age = %d\n", lsn, chkp, lsn - chkp;
               }
            ' "${BASEDIR}/${PREFIX}-innodbstatus1"
            echo "    Threads are waiting at:"
            awk '/has waited at/ { print $6, $7, $8 }' \
              "${BASEDIR}/${PREFIX}-innodbstatus1" | sort | uniq -c | sort -rn
            echo "    Threads are waiting on:"
            awk '/^[XS]-lock on.*latch/ { print }' \
              "${BASEDIR}/${PREFIX}-innodbstatus1" | sort | uniq -c | sort -rn

            # This section checks for processlist or processlist1 for backwards
            # compatibility with the obsolete pt-collect tool.
            echo "--processlist--"
            local PROCESSLIST_FILE="${BASEDIR}/${PREFIX}-processlist"
            if [ -e "${BASEDIR}/${PREFIX}-processlist1" ]; then
               PROCESSLIST_FILE="${BASEDIR}/${PREFIX}-processlist1"
            fi
            for word in State Command; do
               echo "    $word"
               awk -F: -v column="$word" '
               BEGIN {
                  regex = "^ *" column
               }
               {
                  if ( $1 ~ regex ) {
                     print $2;
                  }
                  # Newer versions of pt-stalk gather several samples. We will
                  # analyze only the first sample.
                  if ( $0 ~ /^TS/ ) {
                     ts++;
                     if (ts > 1) {
                        exit
                     }
                  }
               }' "${PROCESSLIST_FILE}" \
                  | sort | uniq -c | sort -rn | head -n 5
            done

            echo "--stack traces--"
            if [ -e "${BASEDIR}/${PREFIX}-stacktrace" ]; then
               $PR_pmp -l 5 "${BASEDIR}/${PREFIX}-stacktrace" | head -n 5
            else
               echo "    No stack trace file exists"
            fi

            echo "--oprofile--"
            if [ ! -e "${BASEDIR}/${PREFIX}-opreport" ]; then
               echo "    No opreport file exists"
            fi
            test -e "${BASEDIR}/${PREFIX}-opreport" && awk '
               {
                  if ( $1 == "samples" ) {
                     go = 1;
                  }
                  if ( go == 1 ) {
                     print "    " $0;
                     if ( printed++ == 6 ) {
                        exit;
                     }
                  }
               } ' "${BASEDIR}/${PREFIX}-opreport"
            ;;

         LIST)
            ls -lh ${BASEDIR}/${PREFIX}-*
            ;;

         VIEW)
            echo "Viewing all files"
            less -i ${BASEDIR}/${PREFIX}-*
            echo "Press a key to continue or choose a different action"
            ;;

         DISKSTATS)
            echo "Starting $PR_diskstats"
            $PR_diskstats "${BASEDIR}/${PREFIX}-diskstats"
            echo "Press a key to continue or choose a different action"
            ;;

         INNODB)
            echo "Viewing InnoDB files"
            less -i "${BASEDIR}/${PREFIX}-innodbstatus1"
            echo "Press a key to continue or choose a different action"
            ;;

         MEXT)
            echo "Displaying the first 4 samples of SHOW STATUS counters"
            # Grab the first 4 samples by looking for blank lines.
            # I'll rewrite pt-mext and this will be simpler in future.
            # TODO: upgrade, if pt-mext is fixed :)
            awk '/---/{if(i++>12){exit}}{print}' "${BASEDIR}/${PREFIX}-mysqladmin" | $PR_mext -r -- cat - | less -S
            echo "Press a key to continue or choose a different action"
            ;;

         NETWORK)
            echo "Source of connections to port 3306"
            awk '
               /:3306/ {
                  print substr($5, 0, index($5, ":") - 1);
               }
               /TS/ {
                  if ( i++ > 1 ) {
                     # Stop after the first sample
                     exit;
                  }
               }' "${BASEDIR}/${PREFIX}-netstat" | sort | uniq -c | sort -rn
            echo "Status of connections to port 3306"
            awk '
               /:3306/ {
                  print $6;
               }
               /TS/ {
                  if ( i++ > 1 ) {
                     # Stop after the first sample
                     exit;
                  }
               }' "${BASEDIR}/${PREFIX}-netstat" | sort | uniq -c | sort -rn
            echo "Press a key to continue or choose a different action"
            ;;

         INVALID)
            ;;

      esac

      # Capture and handle the interactive key-strokes.
      tput sgr0
      KEY=""
      if ! read -n 1 -s KEY 2>/dev/null; then
         echo "Error while trying to read interactive keystroke command. Exiting."
         exit
      fi
      case "${KEY:-}" in
         j|k)
            PREFIX="$(awk "
               BEGIN {
                  printed = 0;
               }
               {
                  prev=curr;
                  curr=\$1;
                  if ( \"j\" == \"${KEY}\" && prev == \"${PREFIX}\" && curr ~ /./ ) {
                     print curr;
                     printed = 1;
                     exit;
                  }
                  if ( \"k\" == \"${KEY}\" && curr == \"${PREFIX}\" && prev ~ /./ ) {
                     print prev;
                     printed = 1;
                     exit;
                  }
               }
               END {
                  if ( printed == 0 ) {
                     print \"${PREFIX}\";
                  }
               }" $PT_TMPDIR/pt-sift.prefixes)"
            ;;
         1)
            ACTION="DEFAULT"
            ;;
         0)
            ACTION="LIST"
            ;;
         '*')
            ACTION="VIEW"
            ;;
         d)
            ACTION="DISKSTATS"
            ;;
         i)
            ACTION="INNODB"
            ;;
         m)
            ACTION="MEXT"
            ;;
         n)
            ACTION="NETWORK"
            ;;
         q)
            ;;
         '?')
            print_help
            echo "Press any key to continue"
            read -n 1 -s
            ;;
         *)
            echo "Unknown key '${KEY}'; press ? for help"
            ACTION="INVALID"
            ;;
      esac
   done

   rm_tmpdir
}

# Execute the program if it was not included from another file.  This makes it
# possible to include without executing, and thus test.
if    [ "${0##*/}" = "$TOOL" ] \
   || [ "${0##*/}" = "bash" -a "${_:-""}" = "$0" ]; then
    main "${@:-""}"
fi

# ############################################################################
# Documentation
# ############################################################################
:<<'DOCUMENTATION'
=pod

=head1 NAME

pt-sift - Browses files created by pt-stalk.

=head1 SYNOPSIS

Usage: pt-sift FILE|PREFIX|DIRECTORY

pt-sift browses files created by L<pt-stalk>.  If no options are given,
the tool browses all pt-stalk files in C</var/lib/pt-stalk> if that directory
exists, else the current working directory is used.  If a FILE is given,
the tool browses files with the same prefix in the given file's directory.
If a PREFIX is given, the tool browses files in C</var/lib/pt-stalk>
(or the current working directory) with the same prefix.  If a DIRECTORY
is given, the tool browses all pt-stalk files in it.

=head1 RISKS

Percona Toolkit is mature, proven in the real world, and well tested,
but all database tools can pose a risk to the system and the database
server.  Before using this tool, please:

=over

=item * Read the tool's documentation

=item * Review the tool's known L<"BUGS">

=item * Test the tool on a non-production server

=item * Backup your production server and verify the backups

=back

=head1 DESCRIPTION

pt-sift downloads other tools that it might need, such as L<pt-diskstats>,
and then makes a list of the unique timestamp prefixes of all the files in
the directory, as written by the L<pt-stalk> tool.  If the user specified
a timestamp on the command line, then it begins with that sample of data;
otherwise it begins by showing a list of the timestamps and prompting for
a selection.  Thereafter, it displays a summary of the selected sample, and
the user can navigate and inspect with keystrokes.  The keystroke commands
you can use are as follows:

=over

=item d

Sets the action to start the L<pt-diskstats> tool on the sample's disk
performance statistics.

=item i

Sets the action to view the first INNODB STATUS sample in less.

=item m

Displays the first 4 samples of SHOW STATUS counters side by side with the
L<pt-mext> tool.

=item n

Summarizes the first sample of netstat data in two ways: by originating host,
and by connection state.

=item j

Select the next timestamp as the active sample.

=item k

Select the previous timestamp as the active sample.

=item q

Quit the program.

=item 1

Sets the action for each sample to the default, which is to view a summary
of the sample.

=item 0

Sets the action to just list the files in the sample.

=item *

Sets the action to view all of the sample's files in the less program.

=back

=head1 OPTIONS

This tool does not have any command-line options.

=head1 ENVIRONMENT

This tool does not use any environment variables.

=head1 SYSTEM REQUIREMENTS

This tool requires Bash v3 and the following programs: pt-diskstats, pt-pmp,
pt-mext, and pt-align.  If these programs are not in your PATH,
they will be fetched from the Internet if curl is available.

=head1 BUGS

For a list of known bugs, see L<http://www.percona.com/bugs/pt-sift>.

Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
Include the following information in your bug report:

=over

=item * Complete command-line used to run the tool

=item * Tool L<"--version">

=item * MySQL version of all servers involved

=item * Output from the tool including STDERR

=item * Input files (log/dump/config files, etc.)

=back

If possible, include debugging output by running the tool with C<PTDEBUG>;
see L<"ENVIRONMENT">.

=head1 DOWNLOADING

Visit L<http://www.percona.com/software/percona-toolkit/> to download the
latest release of Percona Toolkit.  Or, get the latest release from the
command line:

   wget percona.com/get/percona-toolkit.tar.gz

   wget percona.com/get/percona-toolkit.rpm

   wget percona.com/get/percona-toolkit.deb

You can also get individual tools from the latest release:

   wget percona.com/get/TOOL

Replace C<TOOL> with the name of any tool.

=head1 AUTHORS

Baron Schwartz

=head1 ABOUT PERCONA TOOLKIT

This tool is part of Percona Toolkit, a collection of advanced command-line
tools for MySQL developed by Percona.  Percona Toolkit was forked from two
projects in June, 2011: Maatkit and Aspersa.  Those projects were created by
Baron Schwartz and primarily developed by him and Daniel Nichter.  Visit
L<http://www.percona.com/software/> to learn about other free, open-source
software from Percona.

=head1 COPYRIGHT, LICENSE, AND WARRANTY

This program is copyright 2011-2013 Percona Ireland Ltd,
2010-2011 Baron Schwartz.

THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA  02111-1307  USA.

=head1 VERSION

pt-sift 2.2.1

=cut

DOCUMENTATION
