Go to the documentation of this file.
32 #if HAVE_PTHREAD_SETAFFINITY_NP
48 #include "checkasm/checkasm.h"
57 #if HAVE_PTHREAD_SETAFFINITY_NP
60 #include <pthread_np.h>
70 #include <sys/prctl.h>
145 return sqrt(
exp(lvar) - 1.0);
186 json_var(json,
"regressionSlope", unit,
231 printf(
"name%csuffix%c%ss%cstddev%cnanoseconds\n", sep, sep,
238 printf(
"<!doctype html>\n"
241 " <meta charset=\"utf-8\"/>\n"
242 " <title>checkasm report</title>\n"
243 " <script type=\"module\">\n"
250 " <script type=\"application/json\" id=\"report-data\">\n",
287 char perf_scale_unit[32];
301 "time (nanoseconds)");
305 printf(
" nop:%*.1f +/- %-7.1f %11.1f ns +/- %-6.1f\n",
325 printf(
" - average timing error: %.3f%% across %d benchmarks "
326 "(maximum %.3f%%)\n",
327 100.0 * err_rel,
current.num_benched, 100.0 * err_max);
339 " <meta name=\"viewport\" content=\"width=device-width, "
340 "initial-scale=1\">\n"
366 int json_func_pushed = 0;
382 if (!json_func_pushed) {
388 json_func_pushed = 1;
393 json_var(json,
"rawTime",
"nsec", raw_time);
395 json_var(json,
"adjustedTime",
"nsec", time);
396 if (v !=
ref &&
ref->cycles.nb_measurements)
407 const int pad = 12 +
state.max_function_name_length
414 if (v !=
ref &&
ref->cycles.nb_measurements) {
429 }
while ((v = v->
next));
431 if (json_func_pushed) {
482 checkasm_simd_warmup();
508 const char *wild = strchr(pattern,
'*');
510 const size_t len = wild - pattern;
511 if (strncmp(str, pattern,
len))
513 while (*++wild ==
'*')
522 return strcmp(str, pattern);
541 if (
cpu &&
current.cpu_flags == prev_cpu_flags)
594 if (!
current.cpu_name_printed) {
603 #if HAVE_PTHREAD_SETAFFINITY_NP && defined(CPU_SET)
605 if (pthread_getaffinity_np(pthread_self(),
sizeof(
mask), &
mask))
609 for (
int c = 0;
c < CPU_SETSIZE;
c++) {
610 if (CPU_ISSET(
c, &
mask)) {
614 if (pthread_setaffinity_np(pthread_self(),
sizeof(
set), &
set)) {
621 pthread_setaffinity_np(pthread_self(),
sizeof(
mask), &
mask);
633 HANDLE
process = GetCurrentProcess();
634 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
635 BOOL(WINAPI * spdcs)(HANDLE,
const ULONG *, ULONG) = (
void *) GetProcAddress(
636 GetModuleHandleW(
L"kernel32.dll"),
"SetProcessDefaultCpuSets");
638 affinity_err = !spdcs(
process, (ULONG[]) { (ULONG) affinity + 256 }, 1);
642 if (affinity <
sizeof(DWORD_PTR) * 8)
643 affinity_err = !SetProcessAffinityMask(
process, (DWORD_PTR) 1 << affinity);
647 #elif HAVE_PTHREAD_SETAFFINITY_NP && defined(CPU_SET)
650 CPU_SET(affinity, &
set);
651 affinity_err = pthread_setaffinity_np(pthread_self(),
sizeof(
set), &
set);
655 fprintf(stderr,
"checkasm: --affinity is not supported on your system\n");
660 fprintf(stderr,
"checkasm: invalid cpu affinity (%u)\n", affinity);
663 fprintf(stderr,
"checkasm: running on cpu %u\n", affinity);
693 while ((v = v->
next))
704 state.skip_tests = 1;
721 fprintf(
f,
" - CPU: ");
722 vfprintf(
f, fmt, ap);
739 " - Timing resolution: %.4f +/- %.3f ns/%s (%.0f +/- %.1f "
740 "MHz) (provisional)\n",
745 " - No-op overhead: %.2f +/- %.3f %ss per call (provisional)\n",
749 fprintf(stderr,
" - Bench duration: %d µs per function (%" PRIu64
" %ss)\n",
752 fprintf(stderr,
" - Random seed: %u\n",
cfg.
seed);
758 const int num_checked_asm =
current.num_checked -
current.num_funcs;
760 fprintf(stderr,
"checkasm: %d of %d tests failed\n",
current.num_failed,
762 }
else if (num_checked_asm) {
763 fprintf(stderr,
"checkasm: all %d tests passed\n", num_checked_asm);
765 fprintf(stderr,
"checkasm: no tests to perform\n");
777 fprintf(stderr,
"checkasm: interrupted\n");
787 fprintf(stderr,
"checkasm: built without HTML support\n");
797 #if HAVE_PRCTL && defined(PR_SET_UNALIGN)
798 prctl(PR_SET_UNALIGN, PR_UNALIGN_SIGBUS);
824 if (low_estimate <= 0.0) {
826 "checkasm: cycle counter seems to be non-functional "
827 "(invalid timer scale: %.4f %ss/nsec)\n",
843 fprintf(stderr,
" - Random seed: %u\n",
cfg.
seed);
877 if (!
version || name_length <= 0 || (
size_t) name_length >=
sizeof(name_buf)
899 fp->report_idx =
current.report_idx;
915 }
while ((v = v->
next));
923 name_length += (int) strlen(v->
suffix) + 1;
925 name_length +=
current.cpu_suffix_length;
928 if (name_length >
state.max_function_name_length)
929 state.max_function_name_length = name_length;
937 if (
state.skip_tests)
942 if (
f->report_idx <
current.report_idx) {
955 checkasm_simd_warmup();
971 va_start(
arg, id_fmt);
987 vfprintf(stderr, msg,
arg);
988 fputs(
")\n", stderr);
1021 #if CHECKASM_HAVE_LONGJMP
1034 char report_name[256];
1037 int length = (int) strlen(
current.test_name);
1045 if (length >
state.max_report_name_length)
1046 state.max_report_name_length = length;
1048 const int new_checked =
current.num_checked -
current.prev_checked;
1050 int pad_length = (int)
state.max_report_name_length + 3;
1051 assert(!
state.skip_tests);
1055 current.num_failed =
current.prev_failed + (new_checked - fails);
1065 pad_length -= fprintf(stderr,
" - %s.%s",
current.test_name, report_name);
1067 pad_length -= fprintf(stderr,
" - %s",
current.test_name);
1069 fprintf(stderr,
"%*c",
imax(pad_length, 0) + 2,
'[');
1073 current.should_fail ?
"EXPECTED" :
"OK");
1074 }
else if (!
current.should_fail)
1078 fprintf(stderr,
"]\n");
1101 "Usage: %s [options...] <random seed>\n"
1102 " <random seed> Use fixed value to seed the PRNG\n"
1104 " --affinity=<cpu> Run the process on CPU <cpu>\n"
1105 " --bench -b Benchmark the tested functions\n"
1106 " --csv, --tsv, --json, Choose output format for benchmarks\n"
1108 " --function=<pattern> -f Test only the functions matching "
1110 " --help -h Print this usage info\n"
1111 " --list-cpu-flags List available cpu flags\n"
1112 " --list-functions List available functions\n"
1113 " --list-tests List available tests\n"
1114 " --duration=<μs> Benchmark duration (per function) in "
1116 " --repeat[=<N>] Repeat tests N times, on successive seeds\n"
1117 " --test=<pattern> -t Test only <pattern>\n"
1118 " --verbose -v Print verbose timing info and failure "
1128 val = strtoul(str, &end,
base);
1129 if (errno || end == str || *end)
1131 #if !defined(__SIZEOF_LONG__) || !defined(__SIZEOF_INT__) || __SIZEOF_LONG__ > __SIZEOF_INT__
1135 if (
val > (
unsigned) -1)
1145 if (!strncmp(argv[1],
"--help", 6) || !strcmp(argv[1],
"-h")) {
1148 }
else if (!strcmp(argv[1],
"--list-cpu-flags")
1149 || !strcmp(argv[1],
"--list-cpuflags")) {
1152 }
else if (!strcmp(argv[1],
"--list-tests")) {
1155 }
else if (!strcmp(argv[1],
"--list-functions")) {
1158 }
else if (!strcmp(argv[1],
"--bench") || !strcmp(argv[1],
"-b")) {
1160 }
else if (!strncmp(argv[1],
"--bench=", 8)) {
1162 config->function_pattern = argv[1] + 8;
1163 }
else if (!strcmp(argv[1],
"--csv")) {
1165 }
else if (!strcmp(argv[1],
"--tsv")) {
1167 }
else if (!strcmp(argv[1],
"--json")) {
1169 }
else if (!strcmp(argv[1],
"--html")) {
1173 fprintf(stderr,
"checkasm: built without HTML support\n");
1176 }
else if (!strncmp(argv[1],
"--duration=", 11)) {
1177 const char *
const s = argv[1] + 11;
1179 fprintf(stderr,
"checkasm: invalid duration (%s)\n",
s);
1183 }
else if (!strncmp(argv[1],
"--test=", 7)) {
1184 config->test_pattern = argv[1] + 7;
1185 }
else if (!strcmp(argv[1],
"-t")) {
1186 config->test_pattern = argc > 1 ? argv[2] :
"";
1189 }
else if (!strncmp(argv[1],
"--function=", 11)) {
1190 config->function_pattern = argv[1] + 11;
1191 }
else if (!strcmp(argv[1],
"-f")) {
1192 config->function_pattern = argc > 1 ? argv[2] :
"";
1195 }
else if (!strcmp(argv[1],
"--verbose") || !strcmp(argv[1],
"-v")) {
1197 }
else if (!strncmp(argv[1],
"--affinity=", 11)) {
1198 const char *
const s = argv[1] + 11;
1199 config->cpu_affinity_set = 1;
1201 fprintf(stderr,
"checkasm: invalid cpu affinity (%s)\n",
s);
1205 }
else if (!strncmp(argv[1],
"--repeat=", 9)) {
1206 const char *
const s = argv[1] + 9;
1208 fprintf(stderr,
"checkasm: invalid number of repetitions (%s)\n",
s);
1212 }
else if (!strcmp(argv[1],
"--repeat")) {
1213 config->repeat = UINT_MAX;
1217 fprintf(stderr,
"checkasm: unknown option (%s)\n", argv[1]);
static void process(NormalizeContext *s, AVFrame *in, AVFrame *out)
volatile sig_atomic_t checkasm_interrupted
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
__device__ int printf(const char *,...)
unsigned cpu_affinity
CPU core ID for process pinning.
COLD void checkasm_cpu_info(void(*info_cb)(void *priv, const char *fmt,...), void *priv, const CheckasmConfig *config)
static const char * ver_suffix(const CheckasmFuncVersion *ver)
CheckasmSample samples[CHECKASM_STATS_SAMPLES]
checkasm_jmp_buf checkasm_context
static const char checkasm_js[]
CheckasmKey checkasm_check_key(const CheckasmKey version, const char *const name,...)
int checkasm_perf_init(void)
#define checkasm_save_context(ctx)
int bench
Enable benchmarking.
void checkasm_measure_nop_cycles(CheckasmMeasurement *meas, uint64_t target_cycles)
void checkasm_list_cpu_flags(const CheckasmConfig *cfg)
Print available CPU flags to stdout.
static void cpu_info_json(void *priv, const char *fmt,...)
void checkasm_list_tests(const CheckasmConfig *config)
Print available tests.
const char * test_pattern
Pattern for filtering which tests to run.
static int fail_internal(const char *const msg, va_list arg)
#define CHECKASM_STATS_SAMPLES
static const char checkasm_chart_js[]
void checkasm_set_func_variant(const char *id_fmt,...)
CheckasmPerf checkasm_perf
static void handle_interrupt(void)
void checkasm_fprintf(FILE *const f, const int color, const char *const fmt,...) CHECKASM_PRINTF(3
CheckasmFormat
Output format for benchmark results.
@ CHECKASM_FORMAT_JSON
JSON structured output with all measurement data.
#define checkasm_load_context(ctx)
static void print_bench_iter(const CheckasmFunc *const f, struct IterState *const iter)
int checkasm_run_on_all_cores(void(*func)(void))
const CheckasmCpuInfo * cpu_flags
List of CPU flags understood by the implementation.
void checkasm_func_tree_uninit(CheckasmFuncTree *tree)
const CheckasmTest * tests
Array of test functions to execute.
static int imax(const int a, const int b)
const char * name
Name of the timing mechanism (e.g., "clock_gettime")
static double val(void *priv, double ch)
int max_function_name_length
static void check_cpu_flag(const CheckasmCpuInfo *cpu)
Describes a CPU feature flag/capability.
static CheckasmVar checkasm_measurement_result(const CheckasmMeasurement measurement)
static double checkasm_mode(const CheckasmVar x)
const char * function_pattern
Pattern for filtering which functions within tests to run.
int checkasm_should_fail(CheckasmCpu cpu_flags)
Mark a block of tests as expected to fail.
void checkasm_json(CheckasmJson *json, const char *key, const char *fmt,...) CHECKASM_PRINTF(3
static void set(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f, double v)
static void * checkasm_mallocz(const size_t size)
static int parseu(unsigned *const dst, const char *const str, const int base)
void checkasm_list_functions(const CheckasmConfig *config)
Print available functions within tests.
void checkasm_set_signal_handlers(void)
void checkasm_bench_update(const int iterations, const uint64_t cycles)
Update benchmark statistics with timing results.
static double checkasm_mean(const CheckasmVar x)
static CheckasmVar checkasm_var_const(double x)
struct CheckasmFunc * prev
unsigned seed
Random number generator seed.
static int set_cpu_affinity(const unsigned affinity)
static void print_bench_footer(struct IterState *const iter)
and forward the result(frame or status change) to the corresponding input. If nothing is possible
unsigned bench_usec
Target benchmark duration in microseconds.
New swscale design to change SwsGraph is what coordinates multiple passes These can include cascaded scaling error diffusion and so on Or we could have separate passes for the vertical and horizontal scaling In between each SwsPass lies a fully allocated image buffer Graph passes may have different levels of e g we can have a single threaded error diffusion pass following a multi threaded scaling pass SwsGraph is internally recreated whenever the image format
Describes a single test function.
void checkasm_measure_perf_scale(CheckasmMeasurement *meas)
@ CHECKASM_FORMAT_PRETTY
Pretty-printed (colored) text output (default)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
@ CHECKASM_FORMAT_TSV
Tab-separated values with optional header.
CheckasmVar checkasm_var_mul(CheckasmVar a, CheckasmVar b)
static void print_benchmarks(void)
static CheckasmConfig cfg
const char * suffix
Short suffix for function names (e.g., "sse2", "avx2")
CheckasmFuncVersion * func_ver
CheckasmVar checkasm_stats_estimate(const CheckasmStats *const stats)
static const char checkasm_html_body[]
static void checkasm_measurement_init(CheckasmMeasurement *measurement)
static int print_summary(void)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
#define i(width, name, range_min, range_max)
static char * checkasm_strdup(const char *str)
Configuration structure for the checkasm test suite.
Test writing API for checkasm.
COLD void checkasm_init_cpu(void)
static double relative_error(double lvar)
const CheckasmCpuInfo * cpu
static CheckasmStats stats
void checkasm_fail_abort(const char *const msg,...)
@ CHECKASM_FORMAT_CSV
Comma-separated values with optional header.
@ CHECKASM_FORMAT_HTML
Interactive HTML report for web viewing.
CheckasmCpu checkasm_get_cpu_flags(void)
Get the current active set of CPU flags.
static void print_functions(const CheckasmFunc *const f)
CheckasmVar checkasm_var_sub(CheckasmVar a, CheckasmVar b)
void checkasm_setup_fprintf(void)
static void cpu_fprintf(void *priv, const char *fmt,...)
int checkasm_bench_func(void)
Check if current function should be benchmarked.
static void print_cpu_name(void)
CheckasmVar checkasm_var_div(CheckasmVar a, CheckasmVar b)
CheckasmMeasurement perf_scale
unsigned checkasm_seed(void)
static int wildstrcmp(const char *str, const char *pattern)
int checkasm_run(const CheckasmConfig *config)
Run all tests and benchmarks matching the specified patterns.
struct CheckasmFuncVersion * next
static void json_measurement(CheckasmJson *json, const char *key, const char *unit, const CheckasmMeasurement measurement)
int max_report_name_length
CheckasmCpu flag
Bitmask flag value for this CPU feature.
const CheckasmCpuInfo * cpu
void checkasm_srand(unsigned seed)
static double checkasm_median(const CheckasmVar x)
void checkasm_json_pop(CheckasmJson *json, char type)
static COLD void print_info(void)
static void checkasm_stats_count_grow(CheckasmStats *const stats, uint64_t cycles, uint64_t target_cycles)
const char * unit
Unit of measurement (e.g., "ns", "cycles")
int checkasm_main(CheckasmConfig *config, int argc, const char *argv[])
Main entry point for checkasm test programs.
static double checkasm_stddev(const CheckasmVar x)
void void checkasm_json_str(CheckasmJson *json, const char *key, const char *str)
int cpu_affinity_set
Enable process pinning via cpu_affinity.
static const char checkasm_css[]
const CheckasmCpuInfo * checkasm_get_cpu_info(void)
Get the CPU flag currently being tested.
int checkasm_bench_runs(void)
Get number of iterations for current benchmark run.
void checkasm_json_push(CheckasmJson *json, const char *const key, char type)
uintptr_t CheckasmKey
Opaque type used to identify function implementations.
double fmax(double, double)
const char * checkasm_get_last_signal_desc(void)
int seed_set
Enable using the seed value.
unsigned repeat
Number of times to repeat tests.
static char separator(CheckasmFormat format)
static const char * cpu_suffix(const CheckasmCpuInfo *cpu)
static void checkasm_stats_reset(CheckasmStats *const stats)
static int ref[MAX_W *MAX_W]
static void print_bench_header(struct IterState *const iter)
void checkasm_report(const char *const name,...)
char * checkasm_vasprintf(const char *fmt, va_list arg)
void checkasm_bench_finish(void)
Finalize and store benchmark results.
CheckasmCpu cpu
Detected CPU flags for the current system.
static void checkasm_measurement_update(CheckasmMeasurement *measurement, const CheckasmStats stats)
CheckasmFunc * checkasm_func_get(CheckasmFuncTree *tree, const char *const name)
static double checkasm_sample(const CheckasmVar x, const double q)
CheckasmFormat format
Output format for benchmark results.
int checkasm_fail_func(const char *const msg,...)
static void json_var(CheckasmJson *json, const char *key, const char *unit, const CheckasmVar var)
static struct @582 current
uint64_t CheckasmCpu
Opaque type representing a set of CPU feature flags.
static void print_usage(const char *const progname)
CheckasmMeasurement cycles
static void checkasm_stats_add(CheckasmStats *const stats, const CheckasmSample s)
void(* set_cpu_flags)(CheckasmCpu new_flags)
Callback invoked when active CPU flags change.
int verbose
Enable verbose output.
static void BS_FUNC() skip(BSCTX *bc, unsigned int n)
Skip n bits in the buffer.
CheckasmMeasurement nop_cycles