224 lines
6.4 KiB
Python
Executable File
224 lines
6.4 KiB
Python
Executable File
#! /usr/bin/python3
|
|
#
|
|
# cachestat Count cache kernel function calls.
|
|
# For Linux, uses BCC, eBPF. See .c file.
|
|
#
|
|
# USAGE: cachestat
|
|
# Taken from funccount by Brendan Gregg
|
|
# This is a rewrite of cachestat from perf to bcc
|
|
# https://github.com/brendangregg/perf-tools/blob/master/fs/cachestat
|
|
#
|
|
# Copyright (c) 2016 Allan McAleavy.
|
|
# Copyright (c) 2015 Brendan Gregg.
|
|
# Licensed under the Apache License, Version 2.0 (the "License")
|
|
#
|
|
# 09-Sep-2015 Brendan Gregg Created this.
|
|
# 06-Nov-2015 Allan McAleavy
|
|
# 13-Jan-2016 Allan McAleavy run pep8 against program
|
|
# 02-Feb-2019 Brendan Gregg Column shuffle, bring back %ratio
|
|
# 15-Feb-2023 Rong Tao Add writeback_dirty_{folio,page} tracepoints
|
|
|
|
from __future__ import print_function
|
|
from bcc import BPF
|
|
from time import sleep, strftime
|
|
import argparse
|
|
import signal
|
|
import re
|
|
from sys import argv
|
|
|
|
# signal handler
|
|
def signal_ignore(signal, frame):
|
|
print()
|
|
|
|
# Function to gather data from /proc/meminfo
|
|
# return dictionary for quicker lookup of both values
|
|
def get_meminfo():
|
|
result = dict()
|
|
|
|
for line in open('/proc/meminfo'):
|
|
k = line.split(':', 3)
|
|
v = k[1].split()
|
|
result[k[0]] = int(v[0])
|
|
return result
|
|
|
|
# set global variables
|
|
mpa = 0
|
|
mbd = 0
|
|
apcl = 0
|
|
apd = 0
|
|
total = 0
|
|
misses = 0
|
|
hits = 0
|
|
debug = 0
|
|
|
|
# arguments
|
|
parser = argparse.ArgumentParser(
|
|
description="Count cache kernel function calls",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
parser.add_argument("-T", "--timestamp", action="store_true",
|
|
help="include timestamp on output")
|
|
parser.add_argument("interval", nargs="?", default=1,
|
|
help="output interval, in seconds")
|
|
parser.add_argument("count", nargs="?", default=-1,
|
|
help="number of outputs")
|
|
parser.add_argument("--ebpf", action="store_true",
|
|
help=argparse.SUPPRESS)
|
|
args = parser.parse_args()
|
|
count = int(args.count)
|
|
tstamp = args.timestamp
|
|
interval = int(args.interval)
|
|
|
|
# define BPF program
|
|
bpf_text = """
|
|
#include <uapi/linux/ptrace.h>
|
|
struct key_t {
|
|
// NF_{APCL,MPA,MBD,APD}
|
|
u32 nf;
|
|
};
|
|
|
|
enum {
|
|
NF_APCL,
|
|
NF_MPA,
|
|
NF_MBD,
|
|
NF_APD,
|
|
};
|
|
|
|
BPF_HASH(counts, struct key_t);
|
|
|
|
static int __do_count(void *ctx, u32 nf) {
|
|
struct key_t key = {};
|
|
u64 ip;
|
|
|
|
key.nf = nf;
|
|
counts.atomic_increment(key); // update counter
|
|
return 0;
|
|
}
|
|
|
|
int do_count_apcl(struct pt_regs *ctx) {
|
|
return __do_count(ctx, NF_APCL);
|
|
}
|
|
int do_count_mpa(struct pt_regs *ctx) {
|
|
return __do_count(ctx, NF_MPA);
|
|
}
|
|
int do_count_mbd(struct pt_regs *ctx) {
|
|
return __do_count(ctx, NF_MBD);
|
|
}
|
|
int do_count_apd(struct pt_regs *ctx) {
|
|
return __do_count(ctx, NF_APD);
|
|
}
|
|
int do_count_apd_tp(void *ctx) {
|
|
return __do_count(ctx, NF_APD);
|
|
}
|
|
"""
|
|
|
|
if debug or args.ebpf:
|
|
print(bpf_text)
|
|
if args.ebpf:
|
|
exit()
|
|
|
|
# load BPF program
|
|
b = BPF(text=bpf_text)
|
|
b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count_apcl")
|
|
b.attach_kprobe(event="mark_page_accessed", fn_name="do_count_mpa")
|
|
|
|
# Function account_page_dirtied() is changed to folio_account_dirtied() in 5.15.
|
|
# Both folio_account_dirtied() and account_page_dirtied() are
|
|
# static functions and they may be gone during compilation and this may
|
|
# introduce some inaccuracy, use tracepoint writeback_dirty_{page,folio},
|
|
# instead when attaching kprobe fails, and report the running
|
|
# error in time.
|
|
if BPF.get_kprobe_functions(b'folio_account_dirtied'):
|
|
b.attach_kprobe(event="folio_account_dirtied", fn_name="do_count_apd")
|
|
elif BPF.get_kprobe_functions(b'account_page_dirtied'):
|
|
b.attach_kprobe(event="account_page_dirtied", fn_name="do_count_apd")
|
|
elif BPF.tracepoint_exists("writeback", "writeback_dirty_folio"):
|
|
b.attach_tracepoint(tp="writeback:writeback_dirty_folio", fn_name="do_count_apd_tp")
|
|
elif BPF.tracepoint_exists("writeback", "writeback_dirty_page"):
|
|
b.attach_tracepoint(tp="writeback:writeback_dirty_page", fn_name="do_count_apd_tp")
|
|
else:
|
|
raise Exception("Failed to attach kprobe %s or %s or any tracepoint" %
|
|
("folio_account_dirtied", "account_page_dirtied"))
|
|
b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count_mbd")
|
|
|
|
# check whether hash table batch ops is supported
|
|
htab_batch_ops = True if BPF.kernel_struct_has_field(b'bpf_map_ops',
|
|
b'map_lookup_and_delete_batch') == 1 else False
|
|
|
|
# header
|
|
if tstamp:
|
|
print("%-8s " % "TIME", end="")
|
|
print("%8s %8s %8s %8s %12s %10s" %
|
|
("HITS", "MISSES", "DIRTIES", "HITRATIO", "BUFFERS_MB", "CACHED_MB"))
|
|
|
|
loop = 0
|
|
exiting = 0
|
|
while 1:
|
|
if count > 0:
|
|
loop += 1
|
|
if loop > count:
|
|
exit()
|
|
|
|
try:
|
|
sleep(interval)
|
|
except KeyboardInterrupt:
|
|
exiting = 1
|
|
# as cleanup can take many seconds, trap Ctrl-C:
|
|
signal.signal(signal.SIGINT, signal_ignore)
|
|
|
|
counts = b["counts"]
|
|
for k, v in sorted(counts.items_lookup_and_delete_batch()
|
|
if htab_batch_ops else counts.items(),
|
|
key=lambda counts: counts[1].value):
|
|
# partial string matches in case of .isra (necessary?)
|
|
if k.nf == 0: # NF_APCL
|
|
apcl = max(0, v.value)
|
|
if k.nf == 1: # NF_MPA
|
|
mpa = max(0, v.value)
|
|
if k.nf == 2: # NF_MBD
|
|
mbd = max(0, v.value)
|
|
if k.nf == 3: # NF_APD
|
|
apd = max(0, v.value)
|
|
|
|
# total = total cache accesses without counting dirties
|
|
# misses = total of add to lru because of read misses
|
|
total = mpa - mbd
|
|
misses = apcl - apd
|
|
if misses < 0:
|
|
misses = 0
|
|
if total < 0:
|
|
total = 0
|
|
hits = total - misses
|
|
|
|
# If hits are < 0, then its possible misses are overestimated
|
|
# due to possibly page cache read ahead adding more pages than
|
|
# needed. In this case just assume misses as total and reset hits.
|
|
if hits < 0:
|
|
misses = total
|
|
hits = 0
|
|
ratio = 0
|
|
if total > 0:
|
|
ratio = float(hits) / total
|
|
|
|
if debug:
|
|
print("%d %d %d %d %d %d %d\n" %
|
|
(mpa, mbd, apcl, apd, total, misses, hits))
|
|
|
|
if not htab_batch_ops:
|
|
counts.clear()
|
|
|
|
# Get memory info
|
|
mem = get_meminfo()
|
|
cached = int(mem["Cached"]) / 1024
|
|
buff = int(mem["Buffers"]) / 1024
|
|
|
|
if tstamp:
|
|
print("%-8s " % strftime("%H:%M:%S"), end="")
|
|
print("%8d %8d %8d %7.2f%% %12.0f %10.0f" %
|
|
(hits, misses, mbd, 100 * ratio, buff, cached))
|
|
|
|
mpa = mbd = apcl = apd = total = misses = hits = cached = buff = 0
|
|
|
|
if exiting:
|
|
print("Detaching...")
|
|
exit()
|