-
Notifications
You must be signed in to change notification settings - Fork 166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
set sampling ratio of dropping for single syscall individually #1309
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
5.0.0 | ||
5.1.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -394,6 +394,7 @@ static void check_remove_consumer(struct ppm_consumer_t *consumer, int remove_fr | |
static int ppm_open(struct inode *inode, struct file *filp) | ||
{ | ||
int ret; | ||
int i; | ||
int in_list = false; | ||
#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 20) | ||
int ring_no = iminor(filp->f_path.dentry->d_inode); | ||
|
@@ -531,9 +532,12 @@ static int ppm_open(struct inode *inode, struct file *filp) | |
*/ | ||
consumer->dropping_mode = 0; | ||
consumer->snaplen = SNAPLEN; | ||
consumer->sampling_ratio = 1; | ||
consumer->sampling_interval = 0; | ||
consumer->is_dropping = 0; | ||
for (i = 0; i < SYSCALL_TABLE_SIZE; i++) | ||
{ | ||
consumer->sampling_ratio[i] = 1; | ||
consumer->sampling_interval[i] = 0; | ||
consumer->is_dropping[i] = false; | ||
} | ||
consumer->do_dynamic_snaplen = false; | ||
consumer->drop_failed = false; | ||
consumer->need_to_insert_drop_e = 0; | ||
|
@@ -956,17 +960,23 @@ static long ppm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
switch (cmd) { | ||
case PPM_IOCTL_DISABLE_DROPPING_MODE: | ||
{ | ||
int i; | ||
vpr_info("PPM_IOCTL_DISABLE_DROPPING_MODE, consumer %p\n", consumer_id); | ||
|
||
consumer->dropping_mode = 0; | ||
consumer->sampling_interval = 1000000000; | ||
consumer->sampling_ratio = 1; | ||
for (i = 0; i < SYSCALL_TABLE_SIZE; i++) | ||
{ | ||
consumer->sampling_interval[i] = 1000000000; | ||
consumer->sampling_ratio[i] = 1; | ||
consumer->is_dropping[i] = false; | ||
} | ||
|
||
ret = 0; | ||
goto cleanup_ioctl; | ||
} | ||
case PPM_IOCTL_ENABLE_DROPPING_MODE: | ||
{ | ||
int i; | ||
u32 new_sampling_ratio; | ||
|
||
consumer->dropping_mode = 1; | ||
|
@@ -987,10 +997,13 @@ static long ppm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
goto cleanup_ioctl; | ||
} | ||
|
||
consumer->sampling_interval = 1000000000 / new_sampling_ratio; | ||
consumer->sampling_ratio = new_sampling_ratio; | ||
for (i = 0; i < SYSCALL_TABLE_SIZE; i++) | ||
{ | ||
consumer->sampling_interval[i] = 1000000000 / new_sampling_ratio; | ||
consumer->sampling_ratio[i] = new_sampling_ratio; | ||
} | ||
|
||
vpr_info("new sampling ratio: %d\n", new_sampling_ratio); | ||
vpr_info("new default sampling ratio: %d\n", new_sampling_ratio); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't really setting just the default, it's overwriting any previous per-syscall sampling ratios configured |
||
|
||
ret = 0; | ||
goto cleanup_ioctl; | ||
|
@@ -1186,6 +1199,43 @@ static long ppm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
ret = 0; | ||
goto cleanup_ioctl; | ||
} | ||
case PPM_IOCTL_SET_DROPPING_RATIO: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bikeshedding names is always fun, but I'd rather keep the Also, please consider (not forcing this in any way, just please consider :)) passing a (two-u32) struct by pointer instead of unpacking the arguments from a single (by-value) u64. |
||
{ | ||
u32 syscall_to_set = (arg >> 32) - SYSCALL_TABLE_ID0; | ||
u32 new_sampling_ratio = (u32)arg; | ||
|
||
vpr_info("PPM_IOCTL_SET_DROPPING_RATIO, syscall(%u), ratio(%u), consumer %p\n", syscall_to_set, new_sampling_ratio, consumer_id); | ||
|
||
if (syscall_to_set >= SYSCALL_TABLE_SIZE) { | ||
pr_err("invalid syscall %u\n", syscall_to_set); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will appear in the kernel log without any extra context, so maybe add a few words here (like that we're in this particular ioctl for example). I'm sure there's precedent for cryptic log messages but let's try to make things better one line at a time :) |
||
ret = -EINVAL; | ||
goto cleanup_ioctl; | ||
} | ||
|
||
if ((g_syscall_table[syscall_to_set].flags & (UF_NEVER_DROP | UF_ALWAYS_DROP))) { | ||
ret = -EPERM; | ||
goto cleanup_ioctl; | ||
} | ||
|
||
if (new_sampling_ratio != 1 && | ||
new_sampling_ratio != 2 && | ||
new_sampling_ratio != 4 && | ||
new_sampling_ratio != 8 && | ||
new_sampling_ratio != 16 && | ||
new_sampling_ratio != 32 && | ||
new_sampling_ratio != 64 && | ||
new_sampling_ratio != 128) { | ||
pr_err("invalid sampling ratio %u\n", new_sampling_ratio); | ||
ret = -EINVAL; | ||
goto cleanup_ioctl; | ||
} | ||
|
||
consumer->sampling_interval[syscall_to_set] = 1000000000 / new_sampling_ratio; | ||
consumer->sampling_ratio[syscall_to_set] = new_sampling_ratio; | ||
pr_info("new sampling ratio %u, %u\n", syscall_to_set, new_sampling_ratio); | ||
ret = 0; | ||
goto cleanup_ioctl; | ||
} | ||
default: | ||
ret = -ENOTTY; | ||
goto cleanup_ioctl; | ||
|
@@ -1660,6 +1710,7 @@ static inline int drop_nostate_event(ppm_event_code event_type, | |
static inline int drop_event(struct ppm_consumer_t *consumer, | ||
ppm_event_code event_type, | ||
enum syscall_flags drop_flags, | ||
long table_index, | ||
nanoseconds ns, | ||
struct pt_regs *regs) | ||
{ | ||
|
@@ -1682,21 +1733,22 @@ static inline int drop_event(struct ppm_consumer_t *consumer, | |
ASSERT((drop_flags & UF_NEVER_DROP) == 0); | ||
return 1; | ||
} | ||
if (table_index != -1) { | ||
if (consumer->sampling_interval[table_index] < SECOND_IN_NS && | ||
/* do_div replaces ns2 with the quotient and returns the remainder */ | ||
do_div(ns2, SECOND_IN_NS) >= consumer->sampling_interval[table_index]) { | ||
if (!consumer->is_dropping[table_index]) { | ||
consumer->is_dropping[table_index] = true; | ||
record_drop_e(consumer, ns, drop_flags); | ||
} | ||
|
||
if (consumer->sampling_interval < SECOND_IN_NS && | ||
/* do_div replaces ns2 with the quotient and returns the remainder */ | ||
do_div(ns2, SECOND_IN_NS) >= consumer->sampling_interval) { | ||
if (consumer->is_dropping == 0) { | ||
consumer->is_dropping = 1; | ||
record_drop_e(consumer, ns, drop_flags); | ||
return 1; | ||
} | ||
|
||
return 1; | ||
} | ||
|
||
if (consumer->is_dropping == 1) { | ||
consumer->is_dropping = 0; | ||
record_drop_x(consumer, ns, drop_flags); | ||
if (consumer->is_dropping[table_index]) { | ||
consumer->is_dropping[table_index] = false; | ||
record_drop_x(consumer, ns, drop_flags); | ||
} | ||
} | ||
} | ||
|
||
|
@@ -1742,7 +1794,7 @@ static int record_event_consumer(struct ppm_consumer_t *consumer, | |
int drop = 1; | ||
int32_t cbres = PPM_SUCCESS; | ||
int cpu; | ||
long table_index; | ||
long table_index = -1; | ||
int64_t retval; | ||
|
||
if (tp_type < INTERNAL_EVENTS && !(consumer->tracepoints_attached & (1 << tp_type))) | ||
|
@@ -1807,6 +1859,7 @@ static int record_event_consumer(struct ppm_consumer_t *consumer, | |
if (drop_event(consumer, | ||
event_type, | ||
drop_flags, | ||
table_index, | ||
ns, | ||
event_datap->event_info.syscall_data.regs)) | ||
return res; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ int BPF_PROG(t1_drop_e) | |
|
||
/*=============================== COLLECT PARAMETERS ===========================*/ | ||
|
||
ringbuf__store_u32(&ringbuf, maps__get_sampling_ratio()); | ||
ringbuf__store_u32(&ringbuf, 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as for the other engines, we can't work without a reliable sampling ratio report (as seen by the driver) :< |
||
|
||
/*=============================== COLLECT PARAMETERS ===========================*/ | ||
|
||
|
@@ -47,7 +47,7 @@ int BPF_PROG(t1_drop_x) | |
|
||
/*=============================== COLLECT PARAMETERS ===========================*/ | ||
|
||
ringbuf__store_u32(&ringbuf, maps__get_sampling_ratio()); | ||
ringbuf__store_u32(&ringbuf, 0); | ||
|
||
/*=============================== COLLECT PARAMETERS ===========================*/ | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4525,7 +4525,7 @@ int f_sched_drop(struct event_filler_arguments *args) | |
/* | ||
* ratio | ||
*/ | ||
res = val_to_ring(args, args->consumer->sampling_ratio, 0, false, 0); | ||
res = val_to_ring(args, 0, 0, false, 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same. We need it :( |
||
if (unlikely(res != PPM_SUCCESS)) | ||
return res; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I'm correct, this is going to cause a lot of pain for us :( we actually do rely on getting the sampling ratio in drop events) and would require a major redesign once there's no single sampling ratio.
It feels like a way forward would be to keep the notion of a single sampling ratio and disable it only when a consumer uses the per-syscall ioctl (and we'd never do that then).
something like:
and in the ioctl that sets per syscall sampling ratios, just set
settings->sampling_ratio = PER_SYSCALL_SAMPLING_RATIO
tooThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If there are both global and per-system call ratios, which one should be reported here? Global? Or the one being sampled? In the komd driver, it seems difficult to obtain the system call being sampled based on its mechanism that may delay the insertion of drop events.