1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KERNEL_PRINTK__ #define __KERNEL_PRINTK__ #include <stdarg.h> #include <linux/init.h> #include <linux/kern_levels.h> #include <linux/linkage.h> #include <linux/cache.h> #include <linux/ratelimit_types.h> extern const char linux_banner[]; extern const char linux_proc_banner[]; extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ #define PRINTK_MAX_SINGLE_HEADER_LEN 2 static inline int printk_get_level(const char *buffer) { if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { switch (buffer[1]) { case '0' ... '7': case 'c': /* KERN_CONT */ return buffer[1]; } } return 0; } static inline const char *printk_skip_level(const char *buffer) { if (printk_get_level(buffer)) return buffer + 2; return buffer; } static inline const char *printk_skip_headers(const char *buffer) { while (printk_get_level(buffer)) buffer = printk_skip_level(buffer); return buffer; } #define CONSOLE_EXT_LOG_MAX 8192 /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT /* We show everything that is MORE important than this.. */ #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ #define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ #define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ #define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ /* * Default used to be hard-coded at 7, quiet used to be hardcoded at 4, * we're now allowing both to be set from kernel config. */ #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET extern int console_printk[]; #define console_loglevel (console_printk[0]) #define default_message_loglevel (console_printk[1]) #define minimum_console_loglevel (console_printk[2]) #define default_console_loglevel (console_printk[3]) static inline void console_silent(void) { console_loglevel = CONSOLE_LOGLEVEL_SILENT; } static inline void console_verbose(void) { if (console_loglevel) console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; } /* strlen("ratelimit") + 1 */ #define DEVKMSG_STR_MAX_SIZE 10 extern char devkmsg_log_str[]; struct ctl_table; extern int suppress_printk; struct va_format { const char *fmt; va_list *va; }; /* * FW_BUG * Add this to a message where you are sure the firmware is buggy or behaves * really stupid or out of spec. Be aware that the responsible BIOS developer * should be able to fix this issue or at least get a concrete idea of the * problem by reading your message without the need of looking at the kernel * code. * * Use it for definite and high priority BIOS bugs. * * FW_WARN * Use it for not that clear (e.g. could the kernel messed up things already?) * and medium priority BIOS bugs. * * FW_INFO * Use this one if you want to tell the user or vendor about something * suspicious, but generally harmless related to the firmware. * * Use it for information or very low priority BIOS bugs. */ #define FW_BUG "[Firmware Bug]: " #define FW_WARN "[Firmware Warn]: " #define FW_INFO "[Firmware Info]: " /* * HW_ERR * Add this to a message for hardware errors, so that user can report * it to hardware vendor instead of LKML or software vendor. */ #define HW_ERR "[Hardware Error]: " /* * DEPRECATED * Add this to a message whenever you want to warn user space about the use * of a deprecated aspect of an API so they can stop using it */ #define DEPRECATED "[Deprecated]: " /* * Dummy printk for disabled debugging statements to use whilst maintaining * gcc's format checking. */ #define no_printk(fmt, ...) \ ({ \ if (0) \ printk(fmt, ##__VA_ARGS__); \ 0; \ }) #ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); #else static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif #ifdef CONFIG_PRINTK_NMI extern void printk_nmi_enter(void); extern void printk_nmi_exit(void); extern void printk_nmi_direct_enter(void); extern void printk_nmi_direct_exit(void); #else static inline void printk_nmi_enter(void) { } static inline void printk_nmi_exit(void) { } static inline void printk_nmi_direct_enter(void) { } static inline void printk_nmi_direct_exit(void) { } #endif /* PRINTK_NMI */ struct dev_printk_info; #ifdef CONFIG_PRINTK asmlinkage __printf(4, 0) int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args); asmlinkage __printf(1, 0) int vprintk(const char *fmt, va_list args); asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); /* * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! */ __printf(1, 2) __cold int printk_deferred(const char *fmt, ...); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use * printk_ratelimited() or plain old __ratelimit(). */ extern int __printk_ratelimit(const char *func); #define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); extern int printk_delay_msec; extern int dmesg_restrict; extern int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos); extern void wake_up_klogd(void); char *log_buf_addr_get(void); u32 log_buf_len_get(void); void log_buf_vmcoreinfo_setup(void); void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack(void) __cold; extern void printk_safe_flush(void); extern void printk_safe_flush_on_panic(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) { return 0; } static inline __printf(1, 2) __cold int printk(const char *s, ...) { return 0; } static inline __printf(1, 2) __cold int printk_deferred(const char *s, ...) { return 0; } static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec) { return false; } static inline void wake_up_klogd(void) { } static inline char *log_buf_addr_get(void) { return NULL; } static inline u32 log_buf_len_get(void) { return 0; } static inline void log_buf_vmcoreinfo_setup(void) { } static inline void setup_log_buf(int early) { } static inline __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...) { } static inline void dump_stack_print_info(const char *log_lvl) { } static inline void show_regs_print_info(const char *log_lvl) { } static inline void dump_stack(void) { } static inline void printk_safe_flush(void) { } static inline void printk_safe_flush_on_panic(void) { } #endif extern int kptr_restrict; /** * pr_fmt - used by the pr_*() macros to generate the printk format string * @fmt: format string passed from a pr_*() macro * * This macro can be used to generate a unified format string for pr_*() * macros. A common use is to prefix all pr_*() messages in a file with a common * string. For example, defining this at the top of a source file: * * #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt * * would prefix all pr_info, pr_emerg... messages in the file with the module * name. */ #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif /** * pr_emerg - Print an emergency-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_EMERG loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_emerg(fmt, ...) \ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) /** * pr_alert - Print an alert-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_ALERT loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_alert(fmt, ...) \ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) /** * pr_crit - Print a critical-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_CRIT loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_crit(fmt, ...) \ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) /** * pr_err - Print an error-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_ERR loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_err(fmt, ...) \ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) /** * pr_warn - Print a warning-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_WARNING loglevel. It uses pr_fmt() * to generate the format string. */ #define pr_warn(fmt, ...) \ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) /** * pr_notice - Print a notice-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_NOTICE loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_notice(fmt, ...) \ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) /** * pr_info - Print an info-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_INFO loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /** * pr_cont - Continues a previous log message in the same line. * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_CONT loglevel. It should only be * used when continuing a log message with no newline ('\n') enclosed. Otherwise * it defaults back to KERN_DEFAULT loglevel. */ #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) /** * pr_devel - Print a debug-level message conditionally * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_DEBUG loglevel if DEBUG is * defined. Otherwise it does nothing. * * It uses pr_fmt() to generate the format string. */ #ifdef DEBUG #define pr_devel(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #include <linux/dynamic_debug.h> /** * pr_debug - Print a debug-level message conditionally * @fmt: format string * @...: arguments for the format string * * This macro expands to dynamic_pr_debug() if CONFIG_DYNAMIC_DEBUG is * set. Otherwise, if DEBUG is defined, it's equivalent to a printk with * KERN_DEBUG loglevel. If DEBUG is not defined it does nothing. * * It uses pr_fmt() to generate the format string (dynamic_pr_debug() uses * pr_fmt() internally). */ #define pr_debug(fmt, ...) \ dynamic_pr_debug(fmt, ##__VA_ARGS__) #elif defined(DEBUG) #define pr_debug(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* * Print a one-time message (analogous to WARN_ONCE() et al): */ #ifdef CONFIG_PRINTK #define printk_once(fmt, ...) \ ({ \ static bool __section(".data.once") __print_once; \ bool __ret_print_once = !__print_once; \ \ if (!__print_once) { \ __print_once = true; \ printk(fmt, ##__VA_ARGS__); \ } \ unlikely(__ret_print_once); \ }) #define printk_deferred_once(fmt, ...) \ ({ \ static bool __section(".data.once") __print_once; \ bool __ret_print_once = !__print_once; \ \ if (!__print_once) { \ __print_once = true; \ printk_deferred(fmt, ##__VA_ARGS__); \ } \ unlikely(__ret_print_once); \ }) #else #define printk_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #define printk_deferred_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_once(fmt, ...) \ printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert_once(fmt, ...) \ printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit_once(fmt, ...) \ printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err_once(fmt, ...) \ printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn_once(fmt, ...) \ printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice_once(fmt, ...) \ printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_once(fmt, ...) \ printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* no pr_cont_once, don't do that... */ #if defined(DEBUG) #define pr_devel_once(fmt, ...) \ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel_once(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) #define pr_debug_once(fmt, ...) \ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug_once(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* * ratelimited messages with local ratelimit_state, * no local ratelimit_state used in the !PRINTK case */ #ifdef CONFIG_PRINTK #define printk_ratelimited(fmt, ...) \ ({ \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ \ if (__ratelimit(&_rs)) \ printk(fmt, ##__VA_ARGS__); \ }) #else #define printk_ratelimited(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_ratelimited(fmt, ...) \ printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert_ratelimited(fmt, ...) \ printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit_ratelimited(fmt, ...) \ printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err_ratelimited(fmt, ...) \ printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn_ratelimited(fmt, ...) \ printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice_ratelimited(fmt, ...) \ printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_ratelimited(fmt, ...) \ printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* no pr_cont_ratelimited, don't do that... */ #if defined(DEBUG) #define pr_devel_ratelimited(fmt, ...) \ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel_ratelimited(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) /* descriptor check is first to prevent flooding with "callbacks suppressed" */ #define pr_debug_ratelimited(fmt, ...) \ do { \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt)); \ if (DYNAMIC_DEBUG_BRANCH(descriptor) && \ __ratelimit(&_rs)) \ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) #define pr_debug_ratelimited(fmt, ...) \ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug_ratelimited(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif extern const struct file_operations kmsg_fops; enum { DUMP_PREFIX_NONE, DUMP_PREFIX_ADDRESS, DUMP_PREFIX_OFFSET }; extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize, char *linebuf, size_t linebuflen, bool ascii); #ifdef CONFIG_PRINTK extern void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); #else static inline void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { } static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type, const void *buf, size_t len) { } #endif #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) #elif defined(DEBUG) #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) #else static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { } #endif /** * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params * @prefix_str: string to prefix each line with; * caller supplies trailing spaces for alignment if desired * @prefix_type: controls whether prefix of an offset, address, or none * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE) * @buf: data blob to dump * @len: number of bytes in the @buf * * Calls print_hex_dump(), with log level of KERN_DEBUG, * rowsize of 16, groupsize of 1, and ASCII output included. */ #define print_hex_dump_bytes(prefix_str, prefix_type, buf, len) \ print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _X86_IRQFLAGS_H_ #define _X86_IRQFLAGS_H_ #include <asm/processor-flags.h> #ifndef __ASSEMBLY__ #include <asm/nospec-branch.h> /* Provide __cpuidle; we can't safely include <linux/cpu.h> */ #define __cpuidle __section(".cpuidle.text") /* * Interrupt control: */ /* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ extern inline unsigned long native_save_fl(void); extern __always_inline unsigned long native_save_fl(void) { unsigned long flags; /* * "=rm" is safe here, because "pop" adjusts the stack before * it evaluates its effective address -- this is part of the * documented behavior of the "pop" instruction. */ asm volatile("# __raw_save_flags\n\t" "pushf ; pop %0" : "=rm" (flags) : /* no input */ : "memory"); return flags; } extern inline void native_restore_fl(unsigned long flags); extern inline void native_restore_fl(unsigned long flags) { asm volatile("push %0 ; popf" : /* no output */ :"g" (flags) :"memory", "cc"); } static __always_inline void native_irq_disable(void) { asm volatile("cli": : :"memory"); } static __always_inline void native_irq_enable(void) { asm volatile("sti": : :"memory"); } static inline __cpuidle void native_safe_halt(void) { mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } #endif #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #ifndef __ASSEMBLY__ #include <linux/types.h> static __always_inline unsigned long arch_local_save_flags(void) { return native_save_fl(); } static __always_inline void arch_local_irq_restore(unsigned long flags) { native_restore_fl(flags); } static __always_inline void arch_local_irq_disable(void) { native_irq_disable(); } static __always_inline void arch_local_irq_enable(void) { native_irq_enable(); } /* * Used in the idle loop; sti takes one instruction cycle * to complete: */ static inline __cpuidle void arch_safe_halt(void) { native_safe_halt(); } /* * Used when interrupts are already enabled or to * shutdown the processor: */ static inline __cpuidle void halt(void) { native_halt(); } /* * For spinlocks, etc: */ static __always_inline unsigned long arch_local_irq_save(void) { unsigned long flags = arch_local_save_flags(); arch_local_irq_disable(); return flags; } #else #define ENABLE_INTERRUPTS(x) sti #define DISABLE_INTERRUPTS(x) cli #ifdef CONFIG_X86_64 #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS(x) pushfq; popq %rax #endif #define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ sysretq; #define USERGS_SYSRET32 \ swapgs; \ sysretl #else #define INTERRUPT_RETURN iret #endif #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT_XXL */ #ifndef __ASSEMBLY__ static __always_inline int arch_irqs_disabled_flags(unsigned long flags) { return !(flags & X86_EFLAGS_IF); } static __always_inline int arch_irqs_disabled(void) { unsigned long flags = arch_local_save_flags(); return arch_irqs_disabled_flags(flags); } #else #ifdef CONFIG_X86_64 #ifdef CONFIG_XEN_PV #define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV #else #define SWAPGS swapgs #endif #endif #endif /* !__ASSEMBLY__ */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/relay.h * * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) * * CONFIG_RELAY definitions and declarations */ #ifndef _LINUX_RELAY_H #define _LINUX_RELAY_H #include <linux/types.h> #include <linux/sched.h> #include <linux/timer.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/irq_work.h> #include <linux/bug.h> #include <linux/fs.h> #include <linux/poll.h> #include <linux/kref.h> #include <linux/percpu.h> /* * Tracks changes to rchan/rchan_buf structs */ #define RELAYFS_CHANNEL_VERSION 7 /* * Per-cpu relay channel buffer */ struct rchan_buf { void *start; /* start of channel buffer */ void *data; /* start of current sub-buffer */ size_t offset; /* current offset into sub-buffer */ size_t subbufs_produced; /* count of sub-buffers produced */ size_t subbufs_consumed; /* count of sub-buffers consumed */ struct rchan *chan; /* associated channel */ wait_queue_head_t read_wait; /* reader wait queue */ struct irq_work wakeup_work; /* reader wakeup */ struct dentry *dentry; /* channel file dentry */ struct kref kref; /* channel buffer refcount */ struct page **page_array; /* array of current buffer pages */ unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ size_t *padding; /* padding counts per sub-buffer */ size_t prev_padding; /* temporary variable */ size_t bytes_consumed; /* bytes consumed in cur read subbuf */ size_t early_bytes; /* bytes consumed before VFS inited */ unsigned int cpu; /* this buf's cpu */ } ____cacheline_aligned; /* * Relay channel data structure */ struct rchan { u32 version; /* the version of this struct */ size_t subbuf_size; /* sub-buffer size */ size_t n_subbufs; /* number of sub-buffers per buffer */ size_t alloc_size; /* total buffer size allocated */ struct rchan_callbacks *cb; /* client callbacks */ struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ size_t last_toobig; /* tried to log event > subbuf size */ struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */ int is_global; /* One global buffer ? */ struct list_head list; /* for channel list */ struct dentry *parent; /* parent dentry passed to open */ int has_base_filename; /* has a filename associated? */ char base_filename[NAME_MAX]; /* saved base filename */ }; /* * Relay channel client callbacks */ struct rchan_callbacks { /* * subbuf_start - called on buffer-switch to a new sub-buffer * @buf: the channel buffer containing the new sub-buffer * @subbuf: the start of the new sub-buffer * @prev_subbuf: the start of the previous sub-buffer * @prev_padding: unused space at the end of previous sub-buffer * * The client should return 1 to continue logging, 0 to stop * logging. * * NOTE: subbuf_start will also be invoked when the buffer is * created, so that the first sub-buffer can be initialized * if necessary. In this case, prev_subbuf will be NULL. * * NOTE: the client can reserve bytes at the beginning of the new * sub-buffer by calling subbuf_start_reserve() in this callback. */ int (*subbuf_start) (struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding); /* * buf_mapped - relay buffer mmap notification * @buf: the channel buffer * @filp: relay file pointer * * Called when a relay file is successfully mmapped */ void (*buf_mapped)(struct rchan_buf *buf, struct file *filp); /* * buf_unmapped - relay buffer unmap notification * @buf: the channel buffer * @filp: relay file pointer * * Called when a relay file is successfully unmapped */ void (*buf_unmapped)(struct rchan_buf *buf, struct file *filp); /* * create_buf_file - create file to represent a relay channel buffer * @filename: the name of the file to create * @parent: the parent of the file to create * @mode: the mode of the file to create * @buf: the channel buffer * @is_global: outparam - set non-zero if the buffer should be global * * Called during relay_open(), once for each per-cpu buffer, * to allow the client to create a file to be used to * represent the corresponding channel buffer. If the file is * created outside of relay, the parent must also exist in * that filesystem. * * The callback should return the dentry of the file created * to represent the relay buffer. * * Setting the is_global outparam to a non-zero value will * cause relay_open() to create a single global buffer rather * than the default set of per-cpu buffers. * * See Documentation/filesystems/relay.rst for more info. */ struct dentry *(*create_buf_file)(const char *filename, struct dentry *parent, umode_t mode, struct rchan_buf *buf, int *is_global); /* * remove_buf_file - remove file representing a relay channel buffer * @dentry: the dentry of the file to remove * * Called during relay_close(), once for each per-cpu buffer, * to allow the client to remove a file used to represent a * channel buffer. * * The callback should return 0 if successful, negative if not. */ int (*remove_buf_file)(struct dentry *dentry); }; /* * CONFIG_RELAY kernel API, kernel/relay.c */ struct rchan *relay_open(const char *base_filename, struct dentry *parent, size_t subbuf_size, size_t n_subbufs, struct rchan_callbacks *cb, void *private_data); extern int relay_late_setup_files(struct rchan *chan, const char *base_filename, struct dentry *parent); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); extern void relay_subbufs_consumed(struct rchan *chan, unsigned int cpu, size_t consumed); extern void relay_reset(struct rchan *chan); extern int relay_buf_full(struct rchan_buf *buf); extern size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length); /** * relay_write - write data into the channel * @chan: relay channel * @data: data to be written * @length: number of bytes to write * * Writes data into the current cpu's channel buffer. * * Protects the buffer by disabling interrupts. Use this * if you might be logging from interrupt context. Try * __relay_write() if you know you won't be logging from * interrupt context. */ static inline void relay_write(struct rchan *chan, const void *data, size_t length) { unsigned long flags; struct rchan_buf *buf; local_irq_save(flags); buf = *this_cpu_ptr(chan->buf); if (unlikely(buf->offset + length > chan->subbuf_size)) length = relay_switch_subbuf(buf, length); memcpy(buf->data + buf->offset, data, length); buf->offset += length; local_irq_restore(flags); } /** * __relay_write - write data into the channel * @chan: relay channel * @data: data to be written * @length: number of bytes to write * * Writes data into the current cpu's channel buffer. * * Protects the buffer by disabling preemption. Use * relay_write() if you might be logging from interrupt * context. */ static inline void __relay_write(struct rchan *chan, const void *data, size_t length) { struct rchan_buf *buf; buf = *get_cpu_ptr(chan->buf); if (unlikely(buf->offset + length > buf->chan->subbuf_size)) length = relay_switch_subbuf(buf, length); memcpy(buf->data + buf->offset, data, length); buf->offset += length; put_cpu_ptr(chan->buf); } /** * relay_reserve - reserve slot in channel buffer * @chan: relay channel * @length: number of bytes to reserve * * Returns pointer to reserved slot, NULL if full. * * Reserves a slot in the current cpu's channel buffer. * Does not protect the buffer at all - caller must provide * appropriate synchronization. */ static inline void *relay_reserve(struct rchan *chan, size_t length) { void *reserved = NULL; struct rchan_buf *buf = *get_cpu_ptr(chan->buf); if (unlikely(buf->offset + length > buf->chan->subbuf_size)) { length = relay_switch_subbuf(buf, length); if (!length) goto end; } reserved = buf->data + buf->offset; buf->offset += length; end: put_cpu_ptr(chan->buf); return reserved; } /** * subbuf_start_reserve - reserve bytes at the start of a sub-buffer * @buf: relay channel buffer * @length: number of bytes to reserve * * Helper function used to reserve bytes at the beginning of * a sub-buffer in the subbuf_start() callback. */ static inline void subbuf_start_reserve(struct rchan_buf *buf, size_t length) { BUG_ON(length >= buf->chan->subbuf_size - 1); buf->offset = length; } /* * exported relay file operations, kernel/relay.c */ extern const struct file_operations relay_file_operations; #ifdef CONFIG_RELAY int relay_prepare_cpu(unsigned int cpu); #else #define relay_prepare_cpu NULL #endif #endif /* _LINUX_RELAY_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_STRINGHASH_H #define __LINUX_STRINGHASH_H #include <linux/compiler.h> /* For __pure */ #include <linux/types.h> /* For u32, u64 */ #include <linux/hash.h> /* * Routines for hashing strings of bytes to a 32-bit hash value. * * These hash functions are NOT GUARANTEED STABLE between kernel * versions, architectures, or even repeated boots of the same kernel. * (E.g. they may depend on boot-time hardware detection or be * deliberately randomized.) * * They are also not intended to be secure against collisions caused by * malicious inputs; much slower hash functions are required for that. * * They are optimized for pathname components, meaning short strings. * Even if a majority of files have longer names, the dynamic profile of * pathname components skews short due to short directory names. * (E.g. /usr/lib/libsesquipedalianism.so.3.141.) */ /* * Version 1: one byte at a time. Example of use: * * unsigned long hash = init_name_hash; * while (*p) * hash = partial_name_hash(tolower(*p++), hash); * hash = end_name_hash(hash); * * Although this is designed for bytes, fs/hfsplus/unicode.c * abuses it to hash 16-bit values. */ /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ #define init_name_hash(salt) (unsigned long)(salt) /* partial hash update function. Assume roughly 4 bits per character */ static inline unsigned long partial_name_hash(unsigned long c, unsigned long prevhash) { return (prevhash + (c << 4) + (c >> 4)) * 11; } /* * Finally: cut down the number of bits to a int value (and try to avoid * losing bits). This also has the property (wanted by the dcache) * that the msbits make a good hash table index. */ static inline unsigned int end_name_hash(unsigned long hash) { return hash_long(hash, 32); } /* * Version 2: One word (32 or 64 bits) at a time. * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning <asm/word-at-a-time.h> * exists, which describes major Linux platforms like x86 and ARM), then * this computes a different hash function much faster. * * If not set, this falls back to a wrapper around the preceding. */ extern unsigned int __pure full_name_hash(const void *salt, const char *, unsigned int); /* * A hash_len is a u64 with the hash of a string in the low * half and the length in the high half. */ #define hashlen_hash(hashlen) ((u32)(hashlen)) #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) #define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash)) /* Return the "hash_len" (hash and length) of a null-terminated string */ extern u64 __pure hashlen_string(const void *salt, const char *name); #endif /* __LINUX_STRINGHASH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_BITMAP_H #define __LINUX_BITMAP_H #ifndef __ASSEMBLY__ #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/types.h> struct device; /* * bitmaps provide bit arrays that consume one or more unsigned * longs. The bitmap interface and available operations are listed * here, in bitmap.h * * Function implementations generic to all architectures are in * lib/bitmap.c. Functions implementations that are architecture * specific are in various include/asm-<arch>/bitops.h headers * and other arch/<arch> specific files. * * See lib/bitmap.c for more details. */ /** * DOC: bitmap overview * * The available bitmap operations and their rough meaning in the * case that the bitmap is a single unsigned long are thus: * * The generated code is more efficient when nbits is known at * compile-time and at most BITS_PER_LONG. * * :: * * bitmap_zero(dst, nbits) *dst = 0UL * bitmap_fill(dst, nbits) *dst = ~0UL * bitmap_copy(dst, src, nbits) *dst = *src * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) * bitmap_complement(dst, src, nbits) *dst = ~(*src) * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal? * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap? * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2? * bitmap_empty(src, nbits) Are all bits zero in *src? * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area * bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above * bitmap_next_clear_region(map, &start, &end, nbits) Find next clear region * bitmap_next_set_region(map, &start, &end, nbits) Find next set region * bitmap_for_each_clear_region(map, rs, re, start, end) * Iterate over all clear regions * bitmap_for_each_set_region(map, rs, re, start, end) * Iterate over all set regions * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * bitmap_get_value8(map, start) Get 8bit value from map at start * bitmap_set_value8(map, value, start) Set 8bit value to map at start * * Note, bitmap_zero() and bitmap_fill() operate over the region of * unsigned longs, that is, bits behind bitmap till the unsigned long * boundary will be zeroed or filled as well. Consider to use * bitmap_clear() or bitmap_set() to make explicit zeroing or filling * respectively. */ /** * DOC: bitmap bitops * * Also the following operations in asm/bitops.h apply to bitmaps.:: * * set_bit(bit, addr) *addr |= bit * clear_bit(bit, addr) *addr &= ~bit * change_bit(bit, addr) *addr ^= bit * test_bit(bit, addr) Is bit set in *addr? * test_and_set_bit(bit, addr) Set bit and return old value * test_and_clear_bit(bit, addr) Clear bit and return old value * test_and_change_bit(bit, addr) Change bit and return old value * find_first_zero_bit(addr, nbits) Position first zero bit in *addr * find_first_bit(addr, nbits) Position first set bit in *addr * find_next_zero_bit(addr, nbits, bit) * Position next zero bit in *addr >= bit * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit * find_next_and_bit(addr1, addr2, nbits, bit) * Same as find_next_bit, but in * (*addr1 & *addr2) * */ /** * DOC: declare bitmap * The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used * to declare an array named 'name' of just enough unsigned longs to * contain all bit positions from 0 to 'bits' - 1. */ /* * Allocation and deallocation of bitmap. * Provided in lib/bitmap.c to avoid circular dependency. */ extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); extern void bitmap_free(const unsigned long *bitmap); /* Managed variants of the above. */ unsigned long *devm_bitmap_alloc(struct device *dev, unsigned int nbits, gfp_t flags); unsigned long *devm_bitmap_zalloc(struct device *dev, unsigned int nbits, gfp_t flags); /* * lib/bitmap.c provides these functions: */ extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern bool __pure __bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, const unsigned long *src3, unsigned int nbits); extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits); extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); extern void bitmap_cut(unsigned long *dst, const unsigned long *src, unsigned int first, unsigned int cut, unsigned int nbits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_replace(unsigned long *dst, const unsigned long *old, const unsigned long *new, const unsigned long *mask, unsigned int nbits); extern int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); extern void __bitmap_set(unsigned long *map, unsigned int start, int len); extern void __bitmap_clear(unsigned long *map, unsigned int start, int len); extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long align_mask, unsigned long align_offset); /** * bitmap_find_next_zero_area - find a contiguous aligned zero area * @map: The address to base the search on * @size: The bitmap size in bits * @start: The bitnumber to start searching at * @nr: The number of zeroed bits we're looking for * @align_mask: Alignment mask for zero area * * The @align_mask should be one less than a power of 2; the effect is that * the bit offset of all zero areas this function finds is multiples of that * power of 2. A @align_mask of 0 means no alignment is required. */ static inline unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long align_mask) { return bitmap_find_next_zero_area_off(map, size, start, nr, align_mask, 0); } extern int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); extern int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); extern void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); extern int bitmap_bitremap(int oldbit, const unsigned long *old, const unsigned long *new, int bits); extern void bitmap_onto(unsigned long *dst, const unsigned long *orig, const unsigned long *relmap, unsigned int bits); extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, unsigned int sz, unsigned int nbits); extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); #ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else #define bitmap_copy_le bitmap_copy #endif extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) /* * The static inlines below do not handle constant nbits==0 correctly, * so make such users (should any ever turn up) call the out-of-line * versions. */ #define small_const_nbits(nbits) \ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); memset(dst, 0, len); } static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); memset(dst, 0xff, len); } static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); memcpy(dst, src, len); } /* * Copy bitmap and clear tail bits in last word. */ static inline void bitmap_copy_clear_tail(unsigned long *dst, const unsigned long *src, unsigned int nbits) { bitmap_copy(dst, src, nbits); if (nbits % BITS_PER_LONG) dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); } /* * On 32-bit systems bitmaps are represented as u32 arrays internally, and * therefore conversion is not needed when copying data from/to arrays of u32. */ #if BITS_PER_LONG == 64 extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits); extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits); #else #define bitmap_from_arr32(bitmap, buf, nbits) \ bitmap_copy_clear_tail((unsigned long *) (bitmap), \ (const unsigned long *) (buf), (nbits)) #define bitmap_to_arr32(buf, bitmap, nbits) \ bitmap_copy_clear_tail((unsigned long *) (buf), \ (const unsigned long *) (bitmap), (nbits)) #endif static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_and(dst, src1, src2, nbits); } static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 | *src2; else __bitmap_or(dst, src1, src2, nbits); } static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 ^ *src2; else __bitmap_xor(dst, src1, src2, nbits); } static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_andnot(dst, src1, src2, nbits); } static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = ~(*src); else __bitmap_complement(dst, src, nbits); } #ifdef __LITTLE_ENDIAN #define BITMAP_MEM_ALIGNMENT 8 #else #define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long)) #endif #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) static inline int bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) && IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) return !memcmp(src1, src2, nbits / 8); return __bitmap_equal(src1, src2, nbits); } /** * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third * @src1: Pointer to bitmap 1 * @src2: Pointer to bitmap 2 will be or'ed with bitmap 1 * @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2 * @nbits: number of bits in each of these bitmaps * * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise */ static inline bool bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, const unsigned long *src3, unsigned int nbits) { if (!small_const_nbits(nbits)) return __bitmap_or_equal(src1, src2, src3, nbits); return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits)); } static inline int bitmap_intersects(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; else return __bitmap_intersects(src1, src2, nbits); } static inline int bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_subset(src1, src2, nbits); } static inline int bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); return find_first_bit(src, nbits) == nbits; } static inline int bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); return find_first_zero_bit(src, nbits) == nbits; } static __always_inline int bitmap_weight(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); return __bitmap_weight(src, nbits); } static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && __builtin_constant_p(nbits & BITMAP_MEM_MASK) && IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) memset((char *)map + start / 8, 0xff, nbits / 8); else __bitmap_set(map, start, nbits); } static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && __builtin_constant_p(nbits & BITMAP_MEM_MASK) && IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) memset((char *)map + start / 8, 0, nbits / 8); else __bitmap_clear(map, start, nbits); } static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; else __bitmap_shift_right(dst, src, shift, nbits); } static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits); else __bitmap_shift_left(dst, src, shift, nbits); } static inline void bitmap_replace(unsigned long *dst, const unsigned long *old, const unsigned long *new, const unsigned long *mask, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*old & ~(*mask)) | (*new & *mask); else __bitmap_replace(dst, old, new, mask, nbits); } static inline void bitmap_next_clear_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) { *rs = find_next_zero_bit(bitmap, end, *rs); *re = find_next_bit(bitmap, end, *rs + 1); } static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) { *rs = find_next_bit(bitmap, end, *rs); *re = find_next_zero_bit(bitmap, end, *rs + 1); } /* * Bitmap region iterators. Iterates over the bitmap between [@start, @end). * @rs and @re should be integer variables and will be set to start and end * index of the current clear or set region. */ #define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \ for ((rs) = (start), \ bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \ (rs) < (re); \ (rs) = (re) + 1, \ bitmap_next_clear_region((bitmap), &(rs), &(re), (end))) #define bitmap_for_each_set_region(bitmap, rs, re, start, end) \ for ((rs) = (start), \ bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \ (rs) < (re); \ (rs) = (re) + 1, \ bitmap_next_set_region((bitmap), &(rs), &(re), (end))) /** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value * * Linux bitmaps are internally arrays of unsigned longs, i.e. 32-bit * integers in 32-bit environment, and 64-bit integers in 64-bit one. * * There are four combinations of endianness and length of the word in linux * ABIs: LE64, BE64, LE32 and BE32. * * On 64-bit kernels 64-bit LE and BE numbers are naturally ordered in * bitmaps and therefore don't require any special handling. * * On 32-bit kernels 32-bit LE ABI orders lo word of 64-bit number in memory * prior to hi, and 32-bit BE orders hi word prior to lo. The bitmap on the * other hand is represented as an array of 32-bit words and the position of * bit N may therefore be calculated as: word #(N/32) and bit #(N%32) in that * word. For example, bit #42 is located at 10th position of 2nd word. * It matches 32-bit LE ABI, and we can simply let the compiler store 64-bit * values in memory as it usually does. But for BE we need to swap hi and lo * words manually. * * With all that, the macro BITMAP_FROM_U64() does explicit reordering of hi and * lo parts of u64. For LE32 it does nothing, and for BE environment it swaps * hi and lo words, as is expected by bitmap. */ #if __BITS_PER_LONG == 64 #define BITMAP_FROM_U64(n) (n) #else #define BITMAP_FROM_U64(n) ((unsigned long) ((u64)(n) & ULONG_MAX)), \ ((unsigned long) ((u64)(n) >> 32)) #endif /** * bitmap_from_u64 - Check and swap words within u64. * @mask: source bitmap * @dst: destination bitmap * * In 32-bit Big Endian kernel, when using ``(u32 *)(&val)[*]`` * to read u64 mask, we will get the wrong word. * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits, * but we expect the lower 32-bits of u64. */ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) { dst[0] = mask & ULONG_MAX; if (sizeof(mask) > sizeof(unsigned long)) dst[1] = mask >> 32; } /** * bitmap_get_value8 - get an 8-bit value within a memory region * @map: address to the bitmap memory region * @start: bit offset of the 8-bit value; must be a multiple of 8 * * Returns the 8-bit value located at the @start bit offset within the @src * memory region. */ static inline unsigned long bitmap_get_value8(const unsigned long *map, unsigned long start) { const size_t index = BIT_WORD(start); const unsigned long offset = start % BITS_PER_LONG; return (map[index] >> offset) & 0xFF; } /** * bitmap_set_value8 - set an 8-bit value within a memory region * @map: address to the bitmap memory region * @value: the 8-bit value; values wider than 8 bits may clobber bitmap * @start: bit offset of the 8-bit value; must be a multiple of 8 */ static inline void bitmap_set_value8(unsigned long *map, unsigned long value, unsigned long start) { const size_t index = BIT_WORD(start); const unsigned long offset = start % BITS_PER_LONG; map[index] &= ~(0xFFUL << offset); map[index] |= value << offset; } #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 /* SPDX-License-Identifier: GPL-2.0-only */ /* * async.h: Asynchronous function calls for boot performance * * (C) Copyright 2009 Intel Corporation * Author: Arjan van de Ven <arjan@linux.intel.com> */ #ifndef __ASYNC_H__ #define __ASYNC_H__ #include <linux/types.h> #include <linux/list.h> #include <linux/numa.h> #include <linux/device.h> typedef u64 async_cookie_t; typedef void (*async_func_t) (void *data, async_cookie_t cookie); struct async_domain { struct list_head pending; unsigned registered:1; }; /* * domain participates in global async_synchronize_full */ #define ASYNC_DOMAIN(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 1 } /* * domain is free to go out of scope as soon as all pending work is * complete, this domain does not participate in async_synchronize_full */ #define ASYNC_DOMAIN_EXCLUSIVE(_name) \ struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } async_cookie_t async_schedule_node(async_func_t func, void *data, int node); async_cookie_t async_schedule_node_domain(async_func_t func, void *data, int node, struct async_domain *domain); /** * async_schedule - schedule a function for asynchronous execution * @func: function to execute asynchronously * @data: data pointer to pass to the function * * Returns an async_cookie_t that may be used for checkpointing later. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule(async_func_t func, void *data) { return async_schedule_node(func, data, NUMA_NO_NODE); } /** * async_schedule_domain - schedule a function for asynchronous execution within a certain domain * @func: function to execute asynchronously * @data: data pointer to pass to the function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_domain(async_func_t func, void *data, struct async_domain *domain) { return async_schedule_node_domain(func, data, NUMA_NO_NODE, domain); } /** * async_schedule_dev - A device specific version of async_schedule * @func: function to execute asynchronously * @dev: device argument to be passed to function * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_dev(async_func_t func, struct device *dev) { return async_schedule_node(func, dev, dev_to_node(dev)); } /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously * @dev: device argument to be passed to function * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. * @dev is used as both the argument for the function and to provide NUMA * context for where to run the function. By doing this we can try to * provide for the best possible outcome by operating on the device on the * CPUs closest to the device. * @domain may be used in the async_synchronize_*_domain() functions to * wait within a certain synchronization domain rather than globally. * Note: This function may be called from atomic or non-atomic contexts. */ static inline async_cookie_t async_schedule_dev_domain(async_func_t func, struct device *dev, struct async_domain *domain) { return async_schedule_node_domain(func, dev, dev_to_node(dev), domain); } void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); extern bool current_is_async(void); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 /* SPDX-License-Identifier: GPL-2.0-only */ /* * pm_runtime.h - Device run-time power management helper functions. * * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl> */ #ifndef _LINUX_PM_RUNTIME_H #define _LINUX_PM_RUNTIME_H #include <linux/device.h> #include <linux/notifier.h> #include <linux/pm.h> #include <linux/jiffies.h> /* Runtime PM flag argument bits */ #define RPM_ASYNC 0x01 /* Request is asynchronous */ #define RPM_NOWAIT 0x02 /* Don't wait for concurrent state change */ #define RPM_GET_PUT 0x04 /* Increment/decrement the usage_count */ #define RPM_AUTO 0x08 /* Use autosuspend_delay */ #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; static inline bool queue_pm_work(struct work_struct *work) { return queue_work(pm_wq, work); } extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); extern int pm_runtime_force_suspend(struct device *dev); extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); extern int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); extern void pm_runtime_allow(struct device *dev); extern void pm_runtime_forbid(struct device *dev); extern void pm_runtime_no_callbacks(struct device *dev); extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern u64 pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); extern void pm_runtime_release_supplier(struct device_link *link); /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. * @dev: Target device. * * Increment the runtime PM usage counter of @dev if its runtime PM status is * %RPM_ACTIVE and its runtime PM usage counter is greater than 0. */ static inline int pm_runtime_get_if_in_use(struct device *dev) { return pm_runtime_get_if_active(dev, false); } /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. * @dev: Target device. * @enable: Whether or not to ignore possible dependencies on children. * * The dependencies of @dev on its children will not be taken into account by * the runtime PM framework going forward if @enable is %true, or they will * be taken into account otherwise. */ static inline void pm_suspend_ignore_children(struct device *dev, bool enable) { dev->power.ignore_children = enable; } /** * pm_runtime_get_noresume - Bump up runtime PM usage counter of a device. * @dev: Target device. */ static inline void pm_runtime_get_noresume(struct device *dev) { atomic_inc(&dev->power.usage_count); } /** * pm_runtime_put_noidle - Drop runtime PM usage counter of a device. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev unless it is 0 already. */ static inline void pm_runtime_put_noidle(struct device *dev) { atomic_add_unless(&dev->power.usage_count, -1, 0); } /** * pm_runtime_suspended - Check whether or not a device is runtime-suspended. * @dev: Target device. * * Return %true if runtime PM is enabled for @dev and its runtime PM status is * %RPM_SUSPENDED, or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev and its runtime PM * status cannot change. */ static inline bool pm_runtime_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED && !dev->power.disable_depth; } /** * pm_runtime_active - Check whether or not a device is runtime-active. * @dev: Target device. * * Return %true if runtime PM is disabled for @dev or its runtime PM status is * %RPM_ACTIVE, or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev and its runtime PM * status cannot change. */ static inline bool pm_runtime_active(struct device *dev) { return dev->power.runtime_status == RPM_ACTIVE || dev->power.disable_depth; } /** * pm_runtime_status_suspended - Check if runtime PM status is "suspended". * @dev: Target device. * * Return %true if the runtime PM status of @dev is %RPM_SUSPENDED, or %false * otherwise, regardless of whether or not runtime PM has been enabled for @dev. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which the * runtime PM status of @dev cannot change. */ static inline bool pm_runtime_status_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED; } /** * pm_runtime_enabled - Check if runtime PM is enabled. * @dev: Target device. * * Return %true if runtime PM is enabled for @dev or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev. */ static inline bool pm_runtime_enabled(struct device *dev) { return !dev->power.disable_depth; } /** * pm_runtime_has_no_callbacks - Check if runtime PM callbacks may be present. * @dev: Target device. * * Return %true if @dev is a special device without runtime PM callbacks or * %false otherwise. */ static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return dev->power.no_callbacks; } /** * pm_runtime_mark_last_busy - Update the last access time of a device. * @dev: Target device. * * Update the last access time of @dev used by the runtime PM autosuspend * mechanism to the current time as returned by ktime_get_mono_fast_ns(). */ static inline void pm_runtime_mark_last_busy(struct device *dev) { WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns()); } /** * pm_runtime_is_irq_safe - Check if runtime PM can work in interrupt context. * @dev: Target device. * * Return %true if @dev has been marked as an "IRQ-safe" device (with respect * to runtime PM), in which case its runtime PM callabcks can be expected to * work correctly when invoked from interrupt handlers. */ static inline bool pm_runtime_is_irq_safe(struct device *dev) { return dev->power.irq_safe; } extern u64 pm_runtime_suspended_time(struct device *dev); #else /* !CONFIG_PM */ static inline bool queue_pm_work(struct work_struct *work) { return false; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } static inline int pm_runtime_force_suspend(struct device *dev) { return 0; } static inline int pm_runtime_force_resume(struct device *dev) { return 0; } static inline int __pm_runtime_idle(struct device *dev, int rpmflags) { return -ENOSYS; } static inline int __pm_runtime_suspend(struct device *dev, int rpmflags) { return -ENOSYS; } static inline int __pm_runtime_resume(struct device *dev, int rpmflags) { return 1; } static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) { return -ENOSYS; } static inline int pm_runtime_get_if_in_use(struct device *dev) { return -EINVAL; } static inline int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count) { return -EINVAL; } static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } static inline void pm_runtime_enable(struct device *dev) {} static inline void __pm_runtime_disable(struct device *dev, bool c) {} static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } static inline bool pm_runtime_enabled(struct device *dev) { return false; } static inline void pm_runtime_no_callbacks(struct device *dev) {} static inline void pm_runtime_irq_safe(struct device *dev) {} static inline bool pm_runtime_is_irq_safe(struct device *dev) { return false; } static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return false; } static inline void pm_runtime_mark_last_busy(struct device *dev) {} static inline void __pm_runtime_use_autosuspend(struct device *dev, bool use) {} static inline void pm_runtime_set_autosuspend_delay(struct device *dev, int delay) {} static inline u64 pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} static inline void pm_runtime_release_supplier(struct device_link *link) {} #endif /* !CONFIG_PM */ /** * pm_runtime_idle - Conditionally set up autosuspend of a device or suspend it. * @dev: Target device. * * Invoke the "idle check" callback of @dev and, depending on its return value, * set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend has been enabled for it). */ static inline int pm_runtime_idle(struct device *dev) { return __pm_runtime_idle(dev, 0); } /** * pm_runtime_suspend - Suspend a device synchronously. * @dev: Target device. */ static inline int pm_runtime_suspend(struct device *dev) { return __pm_runtime_suspend(dev, 0); } /** * pm_runtime_autosuspend - Set up autosuspend of a device or suspend it. * @dev: Target device. * * Set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend is enabled for it) without engaging its "idle check" callback. */ static inline int pm_runtime_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_AUTO); } /** * pm_runtime_resume - Resume a device synchronously. * @dev: Target device. */ static inline int pm_runtime_resume(struct device *dev) { return __pm_runtime_resume(dev, 0); } /** * pm_request_idle - Queue up "idle check" execution for a device. * @dev: Target device. * * Queue up a work item to run an equivalent of pm_runtime_idle() for @dev * asynchronously. */ static inline int pm_request_idle(struct device *dev) { return __pm_runtime_idle(dev, RPM_ASYNC); } /** * pm_request_resume - Queue up runtime-resume of a device. * @dev: Target device. */ static inline int pm_request_resume(struct device *dev) { return __pm_runtime_resume(dev, RPM_ASYNC); } /** * pm_request_autosuspend - Queue up autosuspend of a device. * @dev: Target device. * * Queue up a work item to run an equivalent pm_runtime_autosuspend() for @dev * asynchronously. */ static inline int pm_request_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO); } /** * pm_runtime_get - Bump up usage counter and queue up resume of a device. * @dev: Target device. * * Bump up the runtime PM usage counter of @dev and queue up a work item to * carry out runtime-resume of it. */ static inline int pm_runtime_get(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC); } /** * pm_runtime_get_sync - Bump up usage counter of a device and resume it. * @dev: Target device. * * Bump up the runtime PM usage counter of @dev and carry out runtime-resume of * it synchronously. * * The possible return values of this function are the same as for * pm_runtime_resume() and the runtime PM usage counter of @dev remains * incremented in all cases, even if it returns an error code. */ static inline int pm_runtime_get_sync(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT); } /** * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. * @dev: Target device. * * Resume @dev synchronously and if that is successful, increment its runtime * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been * incremented or a negative error code otherwise. */ static inline int pm_runtime_resume_and_get(struct device *dev) { int ret; ret = __pm_runtime_resume(dev, RPM_GET_PUT); if (ret < 0) { pm_runtime_put_noidle(dev); return ret; } return 0; } /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_idle(). */ static inline int pm_runtime_put(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } /** * pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). */ static inline int pm_runtime_put_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); } /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, invoke the "idle check" callback of @dev and, depending on its * return value, set up autosuspend of @dev or suspend it (depending on whether * or not autosuspend has been enabled for it). * * The possible return values of this function are the same as for * pm_runtime_idle() and the runtime PM usage counter of @dev remains * decremented in all cases, even if it returns an error code. */ static inline int pm_runtime_put_sync(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT); } /** * pm_runtime_put_sync_suspend - Drop device usage counter and suspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, carry out runtime-suspend of @dev synchronously. * * The possible return values of this function are the same as for * pm_runtime_suspend() and the runtime PM usage counter of @dev remains * decremented in all cases, even if it returns an error code. */ static inline int pm_runtime_put_sync_suspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT); } /** * pm_runtime_put_sync_autosuspend - Drop device usage counter and autosuspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, set up autosuspend of @dev or suspend it synchronously (depending * on whether or not autosuspend has been enabled for it). * * The possible return values of this function are the same as for * pm_runtime_autosuspend() and the runtime PM usage counter of @dev remains * decremented in all cases, even if it returns an error code. */ static inline int pm_runtime_put_sync_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO); } /** * pm_runtime_set_active - Set runtime PM status to "active". * @dev: Target device. * * Set the runtime PM status of @dev to %RPM_ACTIVE and ensure that dependencies * of it will be taken into account. * * It is not valid to call this function for devices with runtime PM enabled. */ static inline int pm_runtime_set_active(struct device *dev) { return __pm_runtime_set_status(dev, RPM_ACTIVE); } /** * pm_runtime_set_suspended - Set runtime PM status to "suspended". * @dev: Target device. * * Set the runtime PM status of @dev to %RPM_SUSPENDED and ensure that * dependencies of it will be taken into account. * * It is not valid to call this function for devices with runtime PM enabled. */ static inline int pm_runtime_set_suspended(struct device *dev) { return __pm_runtime_set_status(dev, RPM_SUSPENDED); } /** * pm_runtime_disable - Disable runtime PM for a device. * @dev: Target device. * * Prevent the runtime PM framework from working with @dev (by incrementing its * "blocking" counter). * * For each invocation of this function for @dev there must be a matching * pm_runtime_enable() call in order for runtime PM to be enabled for it. */ static inline void pm_runtime_disable(struct device *dev) { __pm_runtime_disable(dev, true); } /** * pm_runtime_use_autosuspend - Allow autosuspend to be used for a device. * @dev: Target device. * * Allow the runtime PM autosuspend mechanism to be used for @dev whenever * requested (or "autosuspend" will be handled as direct runtime-suspend for * it). */ static inline void pm_runtime_use_autosuspend(struct device *dev) { __pm_runtime_use_autosuspend(dev, true); } /** * pm_runtime_dont_use_autosuspend - Prevent autosuspend from being used. * @dev: Target device. * * Prevent the runtime PM autosuspend mechanism from being used for @dev which * means that "autosuspend" will be handled as direct runtime-suspend for it * going forward. */ static inline void pm_runtime_dont_use_autosuspend(struct device *dev) { __pm_runtime_use_autosuspend(dev, false); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 // SPDX-License-Identifier: GPL-2.0 #ifndef _LINUX_KERNEL_TRACE_H #define _LINUX_KERNEL_TRACE_H #include <linux/fs.h> #include <linux/atomic.h> #include <linux/sched.h> #include <linux/clocksource.h> #include <linux/ring_buffer.h> #include <linux/mmiotrace.h> #include <linux/tracepoint.h> #include <linux/ftrace.h> #include <linux/trace.h> #include <linux/hw_breakpoint.h> #include <linux/trace_seq.h> #include <linux/trace_events.h> #include <linux/compiler.h> #include <linux/glob.h> #include <linux/irq_work.h> #include <linux/workqueue.h> #include <linux/ctype.h> #ifdef CONFIG_FTRACE_SYSCALLS #include <asm/unistd.h> /* For NR_SYSCALLS */ #include <asm/syscall.h> /* some archs define it here */ #endif enum trace_type { __TRACE_FIRST_TYPE = 0, TRACE_FN, TRACE_CTX, TRACE_WAKE, TRACE_STACK, TRACE_PRINT, TRACE_BPRINT, TRACE_MMIO_RW, TRACE_MMIO_MAP, TRACE_BRANCH, TRACE_GRAPH_RET, TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_BLK, TRACE_BPUTS, TRACE_HWLAT, TRACE_RAW_DATA, __TRACE_LAST_TYPE, }; #undef __field #define __field(type, item) type item; #undef __field_fn #define __field_fn(type, item) type item; #undef __field_struct #define __field_struct(type, item) __field(type, item) #undef __field_desc #define __field_desc(type, container, item) #undef __field_packed #define __field_packed(type, container, item) #undef __array #define __array(type, item, size) type item[size]; #undef __array_desc #define __array_desc(type, container, item, size) #undef __dynamic_array #define __dynamic_array(type, item) type item[]; #undef F_STRUCT #define F_STRUCT(args...) args #undef FTRACE_ENTRY #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ struct struct_name { \ struct trace_entry ent; \ tstruct \ } #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) #undef FTRACE_ENTRY_REG #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED #define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print) \ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) __packed #include "trace_entries.h" /* Use this for memory failure errors */ #define MEM_FAIL(condition, fmt, ...) ({ \ static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ __warned = true; \ pr_err("ERROR: " fmt, ##__VA_ARGS__); \ } \ unlikely(__ret_warn_once); \ }) /* * syscalls are special, and need special handling, this is why * they are not included in trace_entries.h */ struct syscall_trace_enter { struct trace_entry ent; int nr; unsigned long args[]; }; struct syscall_trace_exit { struct trace_entry ent; int nr; long ret; }; struct kprobe_trace_entry_head { struct trace_entry ent; unsigned long ip; }; struct kretprobe_trace_entry_head { struct trace_entry ent; unsigned long func; unsigned long ret_ip; }; /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: * IRQS_OFF - interrupts were disabled * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_IRQS_NOSUPPORT = 0x02, TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, }; #define TRACE_BUF_SIZE 1024 struct trace_array; /* * The CPU trace array - it consists of thousands of trace entries * plus some other descriptor data: (for example which task started * the trace, etc.) */ struct trace_array_cpu { atomic_t disabled; void *buffer_page; /* ring buffer spare */ unsigned long entries; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; unsigned long critical_sequence; unsigned long nice; unsigned long policy; unsigned long rt_priority; unsigned long skipped_entries; u64 preempt_timestamp; pid_t pid; kuid_t uid; char comm[TASK_COMM_LEN]; #ifdef CONFIG_FUNCTION_TRACER int ftrace_ignore_pid; #endif bool ignore_pid; }; struct tracer; struct trace_option_dentry; struct array_buffer { struct trace_array *tr; struct trace_buffer *buffer; struct trace_array_cpu __percpu *data; u64 time_start; int cpu; }; #define TRACE_FLAGS_MAX_SIZE 32 struct trace_options { struct tracer *tracer; struct trace_option_dentry *topts; }; struct trace_pid_list { int pid_max; unsigned long *pids; }; enum { TRACE_PIDS = BIT(0), TRACE_NO_PIDS = BIT(1), }; static inline bool pid_type_enabled(int type, struct trace_pid_list *pid_list, struct trace_pid_list *no_pid_list) { /* Return true if the pid list in type has pids */ return ((type & TRACE_PIDS) && pid_list) || ((type & TRACE_NO_PIDS) && no_pid_list); } static inline bool still_need_pid_events(int type, struct trace_pid_list *pid_list, struct trace_pid_list *no_pid_list) { /* * Turning off what is in @type, return true if the "other" * pid list, still has pids in it. */ return (!(type & TRACE_PIDS) && pid_list) || (!(type & TRACE_NO_PIDS) && no_pid_list); } typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data); /** * struct cond_snapshot - conditional snapshot data and callback * * The cond_snapshot structure encapsulates a callback function and * data associated with the snapshot for a given tracing instance. * * When a snapshot is taken conditionally, by invoking * tracing_snapshot_cond(tr, cond_data), the cond_data passed in is * passed in turn to the cond_snapshot.update() function. That data * can be compared by the update() implementation with the cond_data * contained within the struct cond_snapshot instance associated with * the trace_array. Because the tr->max_lock is held throughout the * update() call, the update() function can directly retrieve the * cond_snapshot and cond_data associated with the per-instance * snapshot associated with the trace_array. * * The cond_snapshot.update() implementation can save data to be * associated with the snapshot if it decides to, and returns 'true' * in that case, or it returns 'false' if the conditional snapshot * shouldn't be taken. * * The cond_snapshot instance is created and associated with the * user-defined cond_data by tracing_cond_snapshot_enable(). * Likewise, the cond_snapshot instance is destroyed and is no longer * associated with the trace instance by * tracing_cond_snapshot_disable(). * * The method below is required. * * @update: When a conditional snapshot is invoked, the update() * callback function is invoked with the tr->max_lock held. The * update() implementation signals whether or not to actually * take the snapshot, by returning 'true' if so, 'false' if no * snapshot should be taken. Because the max_lock is held for * the duration of update(), the implementation is safe to * directly retrieved and save any implementation data it needs * to in association with the snapshot. */ struct cond_snapshot { void *cond_data; cond_update_fn_t update; }; /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. * They have on/off state as well: */ struct trace_array { struct list_head list; char *name; struct array_buffer array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE /* * The max_buffer is used to snapshot the trace when a maximum * latency is reached, or when the user initiates a snapshot. * Some tracers will use this to store a maximum trace while * it continues examining live traces. * * The buffers for the max_buffer are set up the same as the array_buffer * When a snapshot is taken, the buffer of the max_buffer is swapped * with the buffer of the array_buffer and the buffers are reset for * the array_buffer so the tracing can continue. */ struct array_buffer max_buffer; bool allocated_snapshot; #endif #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) unsigned long max_latency; #ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; struct work_struct fsnotify_work; struct irq_work fsnotify_irqwork; #endif #endif struct trace_pid_list __rcu *filtered_pids; struct trace_pid_list __rcu *filtered_no_pids; /* * max_lock is used to protect the swapping of buffers * when taking a max snapshot. The buffers themselves are * protected by per_cpu spinlocks. But the action of the swap * needs its own lock. * * This is defined as a arch_spinlock_t in order to help * with performance when lockdep debugging is enabled. * * It is also used in other places outside the update_max_tr * so it needs to be defined outside of the * CONFIG_TRACER_MAX_TRACE. */ arch_spinlock_t max_lock; int buffer_disabled; #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; struct trace_event_file __rcu *enter_syscall_files[NR_syscalls]; struct trace_event_file __rcu *exit_syscall_files[NR_syscalls]; #endif int stop_count; int clock_id; int nr_topts; bool clear_trace; int buffer_percent; unsigned int n_err_log_entries; struct tracer *current_trace; unsigned int trace_flags; unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; struct list_head err_log; struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; struct dentry *event_dir; struct trace_options *topts; struct list_head systems; struct list_head events; struct trace_event_file *trace_marker_file; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ int ref; int trace_ref; #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; struct trace_pid_list __rcu *function_no_pids; #ifdef CONFIG_DYNAMIC_FTRACE /* All of these are protected by the ftrace_lock */ struct list_head func_probes; struct list_head mod_trace; struct list_head mod_notrace; #endif /* function tracing enabled */ int function_enabled; #endif int time_stamp_abs_ref; struct list_head hist_vars; #ifdef CONFIG_TRACER_SNAPSHOT struct cond_snapshot *cond_snapshot; #endif }; enum { TRACE_ARRAY_FL_GLOBAL = (1 << 0) }; extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); extern int tracing_check_open_get_tr(struct trace_array *tr); extern struct trace_array *trace_array_find(const char *instance); extern struct trace_array *trace_array_find_get(const char *instance); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); extern bool trace_clock_in_ns(struct trace_array *tr); /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. */ static inline struct trace_array *top_trace_array(void) { struct trace_array *tr; if (list_empty(&ftrace_trace_arrays)) return NULL; tr = list_entry(ftrace_trace_arrays.prev, typeof(*tr), list); WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL)); return tr; } #define FTRACE_CMP_TYPE(var, type) \ __builtin_types_compatible_p(typeof(var), type *) #undef IF_ASSIGN #define IF_ASSIGN(var, entry, etype, id) \ if (FTRACE_CMP_TYPE(var, etype)) { \ var = (typeof(var))(entry); \ WARN_ON(id != 0 && (entry)->type != id); \ break; \ } /* Will cause compile errors if type is not found. */ extern void __ftrace_bad_type(void); /* * The trace_assign_type is a verifier that the entry type is * the same as the type being assigned. To add new types simply * add a line with the following format: * * IF_ASSIGN(var, ent, type, id); * * Where "type" is the trace type that includes the trace_entry * as the "ent" item. And "id" is the trace identifier that is * used in the trace_type enum. * * If the type can have more than one id, then use zero. */ #define trace_assign_type(var, ent) \ do { \ IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \ IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT); \ IF_ASSIGN(var, ent, struct raw_data_entry, TRACE_RAW_DATA);\ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ TRACE_GRAPH_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ __ftrace_bad_type(); \ } while (0) /* * An option specific to a tracer. This is a boolean value. * The bit is the bit index that sets its value on the * flags value in struct tracer_flags. */ struct tracer_opt { const char *name; /* Will appear on the trace_options file */ u32 bit; /* Mask assigned in val field in tracer_flags */ }; /* * The set of specific options for a tracer. Your tracer * have to set the initial value of the flags val. */ struct tracer_flags { u32 val; struct tracer_opt *opts; struct tracer *trace; }; /* Makes more easy to define a tracer opt */ #define TRACER_OPT(s, b) .name = #s, .bit = b struct trace_option_dentry { struct tracer_opt *opt; struct tracer_flags *flags; struct trace_array *tr; struct dentry *entry; }; /** * struct tracer - a specific tracer and its callbacks to interact with tracefs * @name: the name chosen to select it on the available_tracers file * @init: called when one switches to this tracer (echo name > current_tracer) * @reset: called when one switches to another tracer * @start: called when tracing is unpaused (echo 1 > tracing_on) * @stop: called when tracing is paused (echo 0 > tracing_on) * @update_thresh: called when tracing_thresh is updated * @open: called when the trace file is opened * @pipe_open: called when the trace_pipe file is opened * @close: called when the trace file is released * @pipe_close: called when the trace_pipe file is released * @read: override the default read callback on trace_pipe * @splice_read: override the default splice_read callback on trace_pipe * @selftest: selftest to run on boot (see trace_selftest.c) * @print_headers: override the first lines that describe your columns * @print_line: callback that prints a trace * @set_flag: signals one of your private flags changed (trace_options file) * @flags: your private flags */ struct tracer { const char *name; int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); int (*update_thresh)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); void (*pipe_close)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); ssize_t (*splice_read)(struct trace_iterator *iter, struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif void (*print_header)(struct seq_file *m); enum print_line_t (*print_line)(struct trace_iterator *iter); /* If you handled the flag setting, return 0 */ int (*set_flag)(struct trace_array *tr, u32 old_flags, u32 bit, int set); /* Return 0 if OK with change, else return non-zero */ int (*flag_changed)(struct trace_array *tr, u32 mask, int set); struct tracer *next; struct tracer_flags *flags; int enabled; bool print_max; bool allow_instances; #ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; #endif /* True if tracer cannot be enabled in kernel param */ bool noboot; }; /* Only current can touch trace_recursion */ /* * For function tracing recursion: * The order of these bits are important. * * When function tracing occurs, the following steps are made: * If arch does not support a ftrace feature: * call internal function (uses INTERNAL bits) which calls... * If callback is registered to the "global" list, the list * function is called and recursion checks the GLOBAL bits. * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. */ enum { /* Function recursion bits */ TRACE_FTRACE_BIT, TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, TRACE_FTRACE_TRANSITION_BIT, /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, TRACE_INTERNAL_TRANSITION_BIT, TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function * graph in irq because we want to trace a particular function that * was called in irq context but we have irq tracing off. Since this * can only be modified by current, we can reuse trace_recursion. */ TRACE_IRQ_BIT, /* Set if the function is in the set_graph_function file */ TRACE_GRAPH_BIT, /* * In the very unlikely case that an interrupt came in * at a start of graph tracing, and we want to trace * the function in that interrupt, the depth can be greater * than zero, because of the preempted start of a previous * trace. In an even more unlikely case, depth could be 2 * if a softirq interrupted the start of graph tracing, * followed by an interrupt preempting a start of graph * tracing in the softirq, and depth can even be 3 * if an NMI came in at the start of an interrupt function * that preempted a softirq start of a function that * preempted normal context!!!! Luckily, it can't be * greater than 3, so the next two bits are a mask * of what the depth is when we set TRACE_GRAPH_BIT */ TRACE_GRAPH_DEPTH_START_BIT, TRACE_GRAPH_DEPTH_END_BIT, /* * To implement set_graph_notrace, if this bit is set, we ignore * function graph tracing of called functions, until the return * function is called to clear it. */ TRACE_GRAPH_NOTRACE_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) #define trace_recursion_depth() \ (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) #define trace_recursion_set_depth(depth) \ do { \ current->trace_recursion &= \ ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ current->trace_recursion |= \ ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ } while (0) #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT #define TRACE_LIST_START TRACE_INTERNAL_BIT #define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) enum { TRACE_CTX_NMI, TRACE_CTX_IRQ, TRACE_CTX_SOFTIRQ, TRACE_CTX_NORMAL, TRACE_CTX_TRANSITION, }; static __always_inline int trace_get_context_bit(void) { int bit; if (in_interrupt()) { if (in_nmi()) bit = TRACE_CTX_NMI; else if (in_irq()) bit = TRACE_CTX_IRQ; else bit = TRACE_CTX_SOFTIRQ; } else bit = TRACE_CTX_NORMAL; return bit; } static __always_inline int trace_test_and_set_recursion(int start) { unsigned int val = current->trace_recursion; int bit; bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* * It could be that preempt_count has not been updated during * a switch between contexts. Allow for a single recursion. */ bit = start + TRACE_CTX_TRANSITION; if (trace_recursion_test(bit)) return -1; trace_recursion_set(bit); barrier(); return bit; } val |= 1 << bit; current->trace_recursion = val; barrier(); return bit; } static __always_inline void trace_clear_recursion(int bit) { unsigned int val = current->trace_recursion; bit = 1 << bit; val &= ~bit; barrier(); current->trace_recursion = val; } static inline struct ring_buffer_iter * trace_buffer_iter(struct trace_iterator *iter, int cpu) { return iter->buffer_iter ? iter->buffer_iter[cpu] : NULL; } int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); void tracer_tracing_off(struct trace_array *tr); struct dentry *trace_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); int tracing_init_dentry(void); struct ring_buffer_event; struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu); unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); int trace_empty(struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); void set_graph_array(struct trace_array *tr); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); void tracing_start_tgid_record(void); void tracing_stop_tgid_record(void); int register_tracer(struct tracer *type); int is_tracing_stopped(void); loff_t tracing_lseek(struct file *file, loff_t offset, int whence); extern cpumask_var_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu(cpu, tracing_buffer_mask) extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_thresh; /* PID filtering */ extern int pid_max; bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid); bool trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct trace_pid_list *filtered_no_pids, struct task_struct *task); void trace_filter_add_remove_task(struct trace_pid_list *pid_list, struct task_struct *self, struct task_struct *task); void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos); void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos); int trace_pid_show(struct seq_file *m, void *v); void trace_free_pid_list(struct trace_pid_list *pid_list); int trace_pid_write(struct trace_pid_list *filtered_pids, struct trace_pid_list **new_pid_list, const char __user *ubuf, size_t cnt); #ifdef CONFIG_TRACER_MAX_TRACE void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); #endif /* CONFIG_TRACER_MAX_TRACE */ #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \ defined(CONFIG_FSNOTIFY) void latency_fsnotify(struct trace_array *tr); #else static inline void latency_fsnotify(struct trace_array *tr) { } #endif #ifdef CONFIG_STACKTRACE void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc); #else static inline void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { } #endif /* CONFIG_STACKTRACE */ extern u64 ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); extern int trace_find_tgid(int pid); extern void trace_event_follow_fork(struct trace_array *tr, bool enable); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; extern unsigned long ftrace_number_of_pages; extern unsigned long ftrace_number_of_groups; void ftrace_init_trace_array(struct trace_array *tr); #else static inline void ftrace_init_trace_array(struct trace_array *tr) { } #endif #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func extern int DYN_FTRACE_TEST_NAME(void); #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 extern int DYN_FTRACE_TEST_NAME2(void); extern bool ring_buffer_expanded; extern bool tracing_selftest_disabled; #ifdef CONFIG_FTRACE_STARTUP_TEST extern void __init disable_tracing_selftest(const char *reason); extern int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_function_graph(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); /* * Tracer data references selftest functions that only occur * on boot up. These can be __init functions. Thus, when selftests * are enabled, then the tracers need to reference __init functions. */ #define __tracer_data __refdata #else static inline void __init disable_tracing_selftest(const char *reason) { } /* Tracers are seldom changed. Optimize when selftests are disabled. */ #define __tracer_data __read_mostly #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); extern unsigned long long ns2usecs(u64 nsec); extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args); int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); enum print_line_t print_trace_line(struct trace_iterator *iter); extern char trace_find_mark(unsigned long long duration); struct ftrace_hash; struct ftrace_mod_load { struct list_head list; char *func; char *module; int enable; }; enum { FTRACE_HASH_FL_MOD = (1 << 0), }; struct ftrace_hash { unsigned long size_bits; struct hlist_head *buckets; unsigned long count; unsigned long flags; struct rcu_head rcu; }; struct ftrace_func_entry * ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip); static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) { return !hash || !(hash->count || (hash->flags & FTRACE_HASH_FL_MOD)); } /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Flag options */ #define TRACE_GRAPH_PRINT_OVERRUN 0x1 #define TRACE_GRAPH_PRINT_CPU 0x2 #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 #define TRACE_GRAPH_PRINT_REL_TIME 0x40 #define TRACE_GRAPH_PRINT_IRQS 0x80 #define TRACE_GRAPH_PRINT_TAIL 0x100 #define TRACE_GRAPH_SLEEP_TIME 0x200 #define TRACE_GRAPH_GRAPH_TIME 0x400 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) extern void ftrace_graph_sleep_time_control(bool enable); #ifdef CONFIG_FUNCTION_PROFILER extern void ftrace_graph_graph_time_control(bool enable); #else static inline void ftrace_graph_graph_time_control(bool enable) { } #endif extern enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags); extern void print_graph_headers_flags(struct seq_file *s, u32 flags); extern void trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); extern void graph_trace_open(struct trace_iterator *iter); extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, int pc); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; struct ftrace_hash *hash; preempt_disable_notrace(); /* * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". * Protected with schedule_on_each_cpu(ftrace_sync) */ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); if (ftrace_hash_empty(hash)) { ret = 1; goto out; } if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions * when the depth is zero. */ trace_recursion_set(TRACE_GRAPH_BIT); trace_recursion_set_depth(trace->depth); /* * If no irqs are to be traced, but a set_graph_function * is set, and called by an interrupt handler, we still * want to trace it. */ if (in_irq()) trace_recursion_set(TRACE_IRQ_BIT); else trace_recursion_clear(TRACE_IRQ_BIT); ret = 1; } out: preempt_enable_notrace(); return ret; } static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) { if (trace_recursion_test(TRACE_GRAPH_BIT) && trace->depth == trace_recursion_depth()) trace_recursion_clear(TRACE_GRAPH_BIT); } static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; struct ftrace_hash *notrace_hash; preempt_disable_notrace(); /* * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". * Protected with schedule_on_each_cpu(ftrace_sync) */ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, !preemptible()); if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); return ret; } #else static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { return 1; } static inline int ftrace_graph_notrace_addr(unsigned long addr) { return 0; } static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) { } #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) { /* trace it when it is-nested-in or is a function enabled. */ return !(trace_recursion_test(TRACE_GRAPH_BIT) || ftrace_graph_addr(trace)) || (trace->depth < 0) || (fgraph_max_depth && trace->depth >= fgraph_max_depth); } #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER #define FTRACE_PID_IGNORE -1 #define FTRACE_PID_TRACE -2 struct ftrace_func_command { struct list_head list; char *name; int (*func)(struct trace_array *tr, struct ftrace_hash *hash, char *func, char *cmd, char *params, int enable); }; extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct trace_array *tr) { return this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid) != FTRACE_PID_IGNORE; } extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent); void ftrace_destroy_function_files(struct trace_array *tr); int ftrace_allocate_ftrace_ops(struct trace_array *tr); void ftrace_free_ftrace_ops(struct trace_array *tr); void ftrace_init_global_array_ops(struct trace_array *tr); void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func); void ftrace_reset_array_ops(struct trace_array *tr); void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer); void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d_tracer); void ftrace_clear_pids(struct trace_array *tr); int init_function_trace(void); void ftrace_pid_follow_fork(struct trace_array *tr, bool enable); #else static inline int ftrace_trace_task(struct trace_array *tr) { return 1; } static inline int ftrace_is_dead(void) { return 0; } static inline int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent) { return 0; } static inline int ftrace_allocate_ftrace_ops(struct trace_array *tr) { return 0; } static inline void ftrace_free_ftrace_ops(struct trace_array *tr) { } static inline void ftrace_destroy_function_files(struct trace_array *tr) { } static inline __init void ftrace_init_global_array_ops(struct trace_array *tr) { } static inline void ftrace_reset_array_ops(struct trace_array *tr) { } static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { } static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { } static inline void ftrace_clear_pids(struct trace_array *tr) { } static inline int init_function_trace(void) { return 0; } static inline void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) { } /* ftace_func_t type is not defined, use macro instead of static inline */ #define ftrace_init_array_ops(tr, func) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER */ #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) struct ftrace_probe_ops { void (*func)(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data); int (*init)(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *init_data, void **data); void (*free)(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *data); int (*print)(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data); }; struct ftrace_func_mapper; typedef int (*ftrace_mapper_func)(void *data); struct ftrace_func_mapper *allocate_ftrace_func_mapper(void); void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, unsigned long ip); int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, unsigned long ip, void *data); void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper, unsigned long ip); void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, ftrace_mapper_func free_func); extern int register_ftrace_function_probe(char *glob, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data); extern int unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, struct ftrace_probe_ops *ops); extern void clear_ftrace_function_probes(struct trace_array *tr); int register_ftrace_command(struct ftrace_func_command *cmd); int unregister_ftrace_command(struct ftrace_func_command *cmd); void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent); void ftrace_destroy_filter_files(struct ftrace_ops *ops); extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); #else struct ftrace_func_command; static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; } static inline __init int unregister_ftrace_command(char *cmd_name) { return -EINVAL; } static inline void clear_ftrace_function_probes(struct trace_array *tr) { } /* * The ops parameter passed in is usually undefined. * This must be a macro. */ #define ftrace_create_filter_files(ops, parent) do { } while (0) #define ftrace_destroy_filter_files(ops) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */ bool ftrace_event_is_function(struct trace_event_call *call); /* * struct trace_parser - servers for reading the user input separated by spaces * @cont: set if the input is not complete - no final space char was found * @buffer: holds the parsed user input * @idx: user input length * @size: buffer size */ struct trace_parser { bool cont; char *buffer; unsigned idx; unsigned size; }; static inline bool trace_parser_loaded(struct trace_parser *parser) { return (parser->idx != 0); } static inline bool trace_parser_cont(struct trace_parser *parser) { return parser->cont; } static inline void trace_parser_clear(struct trace_parser *parser) { parser->cont = false; parser->idx = 0; } extern int trace_parser_get_init(struct trace_parser *parser, int size); extern void trace_parser_put(struct trace_parser *parser); extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, size_t cnt, loff_t *ppos); /* * Only create function graph options if function graph is configured. */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER # define FGRAPH_FLAGS \ C(DISPLAY_GRAPH, "display-graph"), #else # define FGRAPH_FLAGS #endif #ifdef CONFIG_BRANCH_TRACER # define BRANCH_FLAGS \ C(BRANCH, "branch"), #else # define BRANCH_FLAGS #endif #ifdef CONFIG_FUNCTION_TRACER # define FUNCTION_FLAGS \ C(FUNCTION, "function-trace"), \ C(FUNC_FORK, "function-fork"), # define FUNCTION_DEFAULT_FLAGS TRACE_ITER_FUNCTION #else # define FUNCTION_FLAGS # define FUNCTION_DEFAULT_FLAGS 0UL # define TRACE_ITER_FUNC_FORK 0UL #endif #ifdef CONFIG_STACKTRACE # define STACK_FLAGS \ C(STACKTRACE, "stacktrace"), #else # define STACK_FLAGS #endif /* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. * * NOTE: These bits must match the trace_options array in * trace.c (this macro guarantees it). */ #define TRACE_FLAGS \ C(PRINT_PARENT, "print-parent"), \ C(SYM_OFFSET, "sym-offset"), \ C(SYM_ADDR, "sym-addr"), \ C(VERBOSE, "verbose"), \ C(RAW, "raw"), \ C(HEX, "hex"), \ C(BIN, "bin"), \ C(BLOCK, "block"), \ C(PRINTK, "trace_printk"), \ C(ANNOTATE, "annotate"), \ C(USERSTACKTRACE, "userstacktrace"), \ C(SYM_USEROBJ, "sym-userobj"), \ C(PRINTK_MSGONLY, "printk-msg-only"), \ C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \ C(LATENCY_FMT, "latency-format"), \ C(RECORD_CMD, "record-cmd"), \ C(RECORD_TGID, "record-tgid"), \ C(OVERWRITE, "overwrite"), \ C(STOP_ON_FREE, "disable_on_free"), \ C(IRQ_INFO, "irq-info"), \ C(MARKERS, "markers"), \ C(EVENT_FORK, "event-fork"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ BRANCH_FLAGS /* * By defining C, we can make TRACE_FLAGS a list of bit names * that will define the bits for the flag masks. */ #undef C #define C(a, b) TRACE_ITER_##a##_BIT enum trace_iterator_bits { TRACE_FLAGS /* Make sure we don't go more than we have bits for */ TRACE_ITER_LAST_BIT }; /* * By redefining C, we can make TRACE_FLAGS a list of masks that * use the bits as defined above. */ #undef C #define C(a, b) TRACE_ITER_##a = (1 << TRACE_ITER_##a##_BIT) enum trace_iterator_flags { TRACE_FLAGS }; /* * TRACE_ITER_SYM_MASK masks the options in trace_flags that * control the output of kernel symbols. */ #define TRACE_ITER_SYM_MASK \ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) extern struct tracer nop_trace; #ifdef CONFIG_BRANCH_TRACER extern int enable_branch_tracing(struct trace_array *tr); extern void disable_branch_tracing(void); static inline int trace_branch_enable(struct trace_array *tr) { if (tr->trace_flags & TRACE_ITER_BRANCH) return enable_branch_tracing(tr); return 0; } static inline void trace_branch_disable(void) { /* due to races, always disable */ disable_branch_tracing(); } #else static inline int trace_branch_enable(struct trace_array *tr) { return 0; } static inline void trace_branch_disable(void) { } #endif /* CONFIG_BRANCH_TRACER */ /* set ring buffers to default size if not already done so */ int tracing_update_buffers(void); struct ftrace_event_field { struct list_head link; const char *name; const char *type; int filter_type; int offset; int size; int is_signed; }; struct prog_entry; struct event_filter { struct prog_entry __rcu *prog; char *filter_string; }; struct event_subsystem { struct list_head list; const char *name; struct event_filter *filter; int ref_count; }; struct trace_subsystem_dir { struct list_head list; struct event_subsystem *subsystem; struct trace_array *tr; struct dentry *entry; int ref_count; int nr_events; }; extern int call_filter_check_discard(struct trace_event_call *call, void *rec, struct trace_buffer *buffer, struct ring_buffer_event *event); void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); } DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); DECLARE_PER_CPU(int, trace_buffered_event_cnt); void trace_buffered_event_disable(void); void trace_buffered_event_enable(void); static inline void __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { /* Simply release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); return; } ring_buffer_discard_commit(buffer, event); } /* * Helper function for event_trigger_unlock_commit{_regs}(). * If there are event triggers attached to this event that requires * filtering against its fields, then they will be called as the * entry already holds the field information of the current event. * * It also checks if the event should be discarded or not. * It is to be discarded if the event is soft disabled and the * event was only recorded to process triggers, or if the event * filter is active and this event did not match the filters. * * Returns true if the event is discarded, false otherwise. */ static inline bool __event_trigger_test_discard(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, enum event_trigger_type *tt) { unsigned long eflags = file->flags; if (eflags & EVENT_FILE_FL_TRIGGER_COND) *tt = event_triggers_call(file, entry, event); if (likely(!(file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED | EVENT_FILE_FL_PID_FILTER)))) return false; if (file->flags & EVENT_FILE_FL_SOFT_DISABLED) goto discard; if (file->flags & EVENT_FILE_FL_FILTERED && !filter_match_preds(file->filter, entry)) goto discard; if ((file->flags & EVENT_FILE_FL_PID_FILTER) && trace_event_ignore_this_pid(file)) goto discard; return false; discard: __trace_event_discard_commit(buffer, event); return true; } /** * event_trigger_unlock_commit - handle triggers and finish event commit * @file: The file pointer assoctiated to the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself * @irq_flags: The state of the interrupts at the start of the event * @pc: The state of the preempt count at the start of the event. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and * if the event is soft disabled and should be discarded. */ static inline void event_trigger_unlock_commit(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); if (tt) event_triggers_post_call(file, tt); } /** * event_trigger_unlock_commit_regs - handle triggers and finish event commit * @file: The file pointer assoctiated to the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself * @irq_flags: The state of the interrupts at the start of the event * @pc: The state of the preempt count at the start of the event. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and * if the event is soft disabled and should be discarded. * * Same as event_trigger_unlock_commit() but calls * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). */ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc, struct pt_regs *regs) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit_regs(file->tr, buffer, event, irq_flags, pc, regs); if (tt) event_triggers_post_call(file, tt); } #define FILTER_PRED_INVALID ((unsigned short)-1) #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) /* * The max preds is the size of unsigned short with * two flags at the MSBs. One bit is used for both the IS_RIGHT * and FOLD flags. The other is reserved. * * 2^14 preds is way more than enough. */ #define MAX_FILTER_PRED 16384 struct filter_pred; struct regex; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); typedef int (*regex_match_func)(char *str, struct regex *r, int len); enum regex_type { MATCH_FULL = 0, MATCH_FRONT_ONLY, MATCH_MIDDLE_ONLY, MATCH_END_ONLY, MATCH_GLOB, MATCH_INDEX, }; struct regex { char pattern[MAX_FILTER_STR_VAL]; int len; int field_len; regex_match_func match; }; struct filter_pred { filter_pred_fn_t fn; u64 val; struct regex regex; unsigned short *ops; struct ftrace_event_field *field; int offset; int not; int op; }; static inline bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || field->filter_type == FILTER_STATIC_STRING || field->filter_type == FILTER_PTR_STRING || field->filter_type == FILTER_COMM; } static inline bool is_function_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_TRACE_FN; } extern enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not); extern void print_event_filter(struct trace_event_file *file, struct trace_seq *s); extern int apply_event_filter(struct trace_event_file *file, char *filter_string); extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, char *filter_string); extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); extern int create_event_filter(struct trace_array *tr, struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp); extern void free_event_filter(struct event_filter *filter); struct ftrace_event_field * trace_find_event_field(struct trace_event_call *call, char *name); extern void trace_event_enable_cmd_record(bool enable); extern void trace_event_enable_tgid_record(bool enable); extern int event_trace_init(void); extern int init_events(void); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); extern void __trace_early_add_events(struct trace_array *tr); extern struct trace_event_file *__find_event_file(struct trace_array *tr, const char *system, const char *event); extern struct trace_event_file *find_event_file(struct trace_array *tr, const char *system, const char *event); static inline void *event_file_data(struct file *filp) { return READ_ONCE(file_inode(filp)->i_private); } extern struct mutex event_mutex; extern struct list_head ftrace_events; extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; extern const struct file_operations event_inject_fops; #ifdef CONFIG_HIST_TRIGGERS extern int register_trigger_hist_cmd(void); extern int register_trigger_hist_enable_disable_cmds(void); #else static inline int register_trigger_hist_cmd(void) { return 0; } static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; } #endif extern int register_trigger_cmds(void); extern void clear_event_triggers(struct trace_array *tr); struct event_trigger_data { unsigned long count; int ref; struct event_trigger_ops *ops; struct event_command *cmd_ops; struct event_filter __rcu *filter; char *filter_str; void *private_data; bool paused; bool paused_tmp; struct list_head list; char *name; struct list_head named_list; struct event_trigger_data *named_data; }; /* Avoid typos */ #define ENABLE_EVENT_STR "enable_event" #define DISABLE_EVENT_STR "disable_event" #define ENABLE_HIST_STR "enable_hist" #define DISABLE_HIST_STR "disable_hist" struct enable_trigger_data { struct trace_event_file *file; bool enable; bool hist; }; extern int event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, struct event_trigger_data *data); extern void event_enable_trigger_free(struct event_trigger_ops *ops, struct event_trigger_data *data); extern int event_enable_trigger_func(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, char *cmd, char *param); extern int event_enable_register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); extern void event_enable_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file); extern void trigger_data_free(struct event_trigger_data *data); extern int event_trigger_init(struct event_trigger_ops *ops, struct event_trigger_data *data); extern int trace_event_trigger_enable_disable(struct trace_event_file *file, int trigger_enable); extern void update_cond_flag(struct trace_event_file *file); extern int set_trigger_filter(char *filter_str, struct event_trigger_data *trigger_data, struct trace_event_file *file); extern struct event_trigger_data *find_named_trigger(const char *name); extern bool is_named_trigger(struct event_trigger_data *test); extern int save_named_trigger(const char *name, struct event_trigger_data *data); extern void del_named_trigger(struct event_trigger_data *data); extern void pause_named_trigger(struct event_trigger_data *data); extern void unpause_named_trigger(struct event_trigger_data *data); extern void set_named_trigger_data(struct event_trigger_data *data, struct event_trigger_data *named_data); extern struct event_trigger_data * get_named_trigger_data(struct event_trigger_data *data); extern int register_event_command(struct event_command *cmd); extern int unregister_event_command(struct event_command *cmd); extern int register_trigger_hist_enable_disable_cmds(void); /** * struct event_trigger_ops - callbacks for trace event triggers * * The methods in this structure provide per-event trigger hooks for * various trigger operations. * * All the methods below, except for @init() and @free(), must be * implemented. * * @func: The trigger 'probe' function called when the triggering * event occurs. The data passed into this callback is the data * that was supplied to the event_command @reg() function that * registered the trigger (see struct event_command) along with * the trace record, rec. * * @init: An optional initialization function called for the trigger * when the trigger is registered (via the event_command reg() * function). This can be used to perform per-trigger * initialization such as incrementing a per-trigger reference * count, for instance. This is usually implemented by the * generic utility function @event_trigger_init() (see * trace_event_triggers.c). * * @free: An optional de-initialization function called for the * trigger when the trigger is unregistered (via the * event_command @reg() function). This can be used to perform * per-trigger de-initialization such as decrementing a * per-trigger reference count and freeing corresponding trigger * data, for instance. This is usually implemented by the * generic utility function @event_trigger_free() (see * trace_event_triggers.c). * * @print: The callback function invoked to have the trigger print * itself. This is usually implemented by a wrapper function * that calls the generic utility function @event_trigger_print() * (see trace_event_triggers.c). */ struct event_trigger_ops { void (*func)(struct event_trigger_data *data, void *rec, struct ring_buffer_event *rbe); int (*init)(struct event_trigger_ops *ops, struct event_trigger_data *data); void (*free)(struct event_trigger_ops *ops, struct event_trigger_data *data); int (*print)(struct seq_file *m, struct event_trigger_ops *ops, struct event_trigger_data *data); }; /** * struct event_command - callbacks and data members for event commands * * Event commands are invoked by users by writing the command name * into the 'trigger' file associated with a trace event. The * parameters associated with a specific invocation of an event * command are used to create an event trigger instance, which is * added to the list of trigger instances associated with that trace * event. When the event is hit, the set of triggers associated with * that event is invoked. * * The data members in this structure provide per-event command data * for various event commands. * * All the data members below, except for @post_trigger, must be set * for each event command. * * @name: The unique name that identifies the event command. This is * the name used when setting triggers via trigger files. * * @trigger_type: A unique id that identifies the event command * 'type'. This value has two purposes, the first to ensure that * only one trigger of the same type can be set at a given time * for a particular event e.g. it doesn't make sense to have both * a traceon and traceoff trigger attached to a single event at * the same time, so traceon and traceoff have the same type * though they have different names. The @trigger_type value is * also used as a bit value for deferring the actual trigger * action until after the current event is finished. Some * commands need to do this if they themselves log to the trace * buffer (see the @post_trigger() member below). @trigger_type * values are defined by adding new values to the trigger_type * enum in include/linux/trace_events.h. * * @flags: See the enum event_command_flags below. * * All the methods below, except for @set_filter() and @unreg_all(), * must be implemented. * * @func: The callback function responsible for parsing and * registering the trigger written to the 'trigger' file by the * user. It allocates the trigger instance and registers it with * the appropriate trace event. It makes use of the other * event_command callback functions to orchestrate this, and is * usually implemented by the generic utility function * @event_trigger_callback() (see trace_event_triggers.c). * * @reg: Adds the trigger to the list of triggers associated with the * event, and enables the event trigger itself, after * initializing it (via the event_trigger_ops @init() function). * This is also where commands can use the @trigger_type value to * make the decision as to whether or not multiple instances of * the trigger should be allowed. This is usually implemented by * the generic utility function @register_trigger() (see * trace_event_triggers.c). * * @unreg: Removes the trigger from the list of triggers associated * with the event, and disables the event trigger itself, after * initializing it (via the event_trigger_ops @free() function). * This is usually implemented by the generic utility function * @unregister_trigger() (see trace_event_triggers.c). * * @unreg_all: An optional function called to remove all the triggers * from the list of triggers associated with the event. Called * when a trigger file is opened in truncate mode. * * @set_filter: An optional function called to parse and set a filter * for the trigger. If no @set_filter() method is set for the * event command, filters set by the user for the command will be * ignored. This is usually implemented by the generic utility * function @set_trigger_filter() (see trace_event_triggers.c). * * @get_trigger_ops: The callback function invoked to retrieve the * event_trigger_ops implementation associated with the command. */ struct event_command { struct list_head list; char *name; enum event_trigger_type trigger_type; int flags; int (*func)(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, char *cmd, char *params); int (*reg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg_all)(struct trace_event_file *file); int (*set_filter)(char *filter_str, struct event_trigger_data *data, struct trace_event_file *file); struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); }; /** * enum event_command_flags - flags for struct event_command * * @POST_TRIGGER: A flag that says whether or not this command needs * to have its action delayed until after the current event has * been closed. Some triggers need to avoid being invoked while * an event is currently in the process of being logged, since * the trigger may itself log data into the trace buffer. Thus * we make sure the current event is committed before invoking * those triggers. To do that, the trigger invocation is split * in two - the first part checks the filter using the current * trace record; if a command has the @post_trigger flag set, it * sets a bit for itself in the return value, otherwise it * directly invokes the trigger. Once all commands have been * either invoked or set their return flag, the current record is * either committed or discarded. At that point, if any commands * have deferred their triggers, those commands are finally * invoked following the close of the current event. In other * words, if the event_trigger_ops @func() probe implementation * itself logs to the trace buffer, this flag should be set, * otherwise it can be left unspecified. * * @NEEDS_REC: A flag that says whether or not this command needs * access to the trace record in order to perform its function, * regardless of whether or not it has a filter associated with * it (filters make a trigger require access to the trace record * but are not always present). */ enum event_command_flags { EVENT_CMD_FL_POST_TRIGGER = 1, EVENT_CMD_FL_NEEDS_REC = 2, }; static inline bool event_command_post_trigger(struct event_command *cmd_ops) { return cmd_ops->flags & EVENT_CMD_FL_POST_TRIGGER; } static inline bool event_command_needs_rec(struct event_command *cmd_ops) { return cmd_ops->flags & EVENT_CMD_FL_NEEDS_REC; } extern int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable); extern int tracing_alloc_snapshot(void); extern void tracing_snapshot_cond(struct trace_array *tr, void *cond_data); extern int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update); extern int tracing_snapshot_cond_disable(struct trace_array *tr); extern void *tracing_cond_snapshot_data(struct trace_array *tr); extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; extern const char *__start___tracepoint_str[]; extern const char *__stop___tracepoint_str[]; void trace_printk_control(bool enabled); void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); /* Used from boot time tracer */ extern int trace_set_options(struct trace_array *tr, char *option); extern int tracing_set_tracer(struct trace_array *tr, const char *buf); extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id); extern int tracing_set_cpumask(struct trace_array *tr, cpumask_var_t tracing_cpumask_new); #define MAX_EVENT_NAME_LEN 64 extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(int, char**)); extern unsigned int err_pos(char *cmd, const char *str); extern void tracing_log_err(struct trace_array *tr, const char *loc, const char *cmd, const char **errs, u8 type, u8 pos); /* * Normal trace_printk() and friends allocates special buffers * to do the manipulation, as well as saves the print formats * into sections to display. But the trace infrastructure wants * to use these without the added overhead at the price of being * a bit slower (used mainly for warnings, where we don't care * about performance). The internal_trace_puts() is for such * a purpose. */ #define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str)) #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ extern struct trace_event_call \ __aligned(4) event_##call; #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED #define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER) int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data); #else #define perf_ftrace_event_register NULL #endif #ifdef CONFIG_FTRACE_SYSCALLS void init_ftrace_syscalls(void); const char *get_syscall_name(int syscall); #else static inline void init_ftrace_syscalls(void) { } static inline const char *get_syscall_name(int syscall) { return NULL; } #endif #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); void trace_event_eval_update(struct trace_eval_map **map, int len); /* Used from boot time tracer */ extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); extern int trigger_process_regex(struct trace_event_file *file, char *buff); #else static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } #endif #ifdef CONFIG_TRACER_SNAPSHOT void tracing_snapshot_instance(struct trace_array *tr); int tracing_alloc_snapshot_instance(struct trace_array *tr); #else static inline void tracing_snapshot_instance(struct trace_array *tr) { } static inline int tracing_alloc_snapshot_instance(struct trace_array *tr) { return 0; } #endif #ifdef CONFIG_PREEMPT_TRACER void tracer_preempt_on(unsigned long a0, unsigned long a1); void tracer_preempt_off(unsigned long a0, unsigned long a1); #else static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { } static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_IRQSOFF_TRACER void tracer_hardirqs_on(unsigned long a0, unsigned long a1); void tracer_hardirqs_off(unsigned long a0, unsigned long a1); #else static inline void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { } static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif extern struct trace_iterator *tracepoint_print_iter; /* * Reset the state of the trace_iterator so that it can read consumed data. * Normally, the trace_iterator is used for reading the data when it is not * consumed, and must retain state. */ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) { const size_t offset = offsetof(struct trace_iterator, seq); /* * Keep gcc from complaining about overwriting more than just one * member in the structure. */ memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset); iter->pos = -1; } /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) { if (!isalpha(*name) && *name != '_') return false; while (*++name != '\0') { if (!isalpha(*name) && !isdigit(*name) && *name != '_') return false; } return true; } #endif /* _LINUX_KERNEL_TRACE_H */
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 // SPDX-License-Identifier: GPL-2.0-only /* * Netlink message type permission tables, for user generated messages. * * Author: James Morris <jmorris@redhat.com> * * Copyright (C) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> #include <linux/if.h> #include <linux/inet_diag.h> #include <linux/xfrm.h> #include <linux/audit.h> #include <linux/sock_diag.h> #include "flask.h" #include "av_permissions.h" #include "security.h" struct nlmsg_perm { u16 nlmsg_type; u32 perm; }; static const struct nlmsg_perm nlmsg_route_perms[] = { { RTM_NEWLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETLINK, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWADDR, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELADDR, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETADDR, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWROUTE, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELROUTE, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETROUTE, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWNEIGH, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNEIGH, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNEIGH, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWRULE, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELRULE, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETRULE, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWQDISC, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELQDISC, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETQDISC, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWTCLASS, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELTCLASS, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETTCLASS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWTFILTER, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELTFILTER, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETTFILTER, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWACTION, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELACTION, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETACTION, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWPREFIX, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETMULTICAST, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETANYCAST, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETNEIGHTBL, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETNEIGHTBL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWADDRLABEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELADDRLABEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETADDRLABEL, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETDCB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETMDB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWNSID, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNSID, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETNSID, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWCACHEREPORT, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWLINKPROP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELLINKPROP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWVLAN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELVLAN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETVLAN, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = { { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { SOCK_DESTROY, NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE }, }; static const struct nlmsg_perm nlmsg_xfrm_perms[] = { { XFRM_MSG_NEWSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_DELSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETSA, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_NEWPOLICY, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_DELPOLICY, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETPOLICY, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_ALLOCSPI, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_ACQUIRE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_EXPIRE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_UPDPOLICY, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_UPDSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_POLEXPIRE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_FLUSHSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_FLUSHPOLICY, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_NEWAE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETAE, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_REPORT, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_MIGRATE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_NEWSADINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_GETSADINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_NEWSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_MAPPING, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_SETDEFAULT, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETDEFAULT, NETLINK_XFRM_SOCKET__NLMSG_READ }, }; static const struct nlmsg_perm nlmsg_audit_perms[] = { { AUDIT_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_SET, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_LIST, NETLINK_AUDIT_SOCKET__NLMSG_READPRIV }, { AUDIT_ADD, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_DEL, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_LIST_RULES, NETLINK_AUDIT_SOCKET__NLMSG_READPRIV }, { AUDIT_ADD_RULE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_DEL_RULE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_USER, NETLINK_AUDIT_SOCKET__NLMSG_RELAY }, { AUDIT_SIGNAL_INFO, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_TRIM, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_MAKE_EQUIV, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_TTY_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_TTY_SET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT }, { AUDIT_GET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_SET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, }; static int nlmsg_perm(u16 nlmsg_type, u32 *perm, const struct nlmsg_perm *tab, size_t tabsize) { int i, err = -EINVAL; for (i = 0; i < tabsize/sizeof(struct nlmsg_perm); i++) if (nlmsg_type == tab[i].nlmsg_type) { *perm = tab[i].perm; err = 0; break; } return err; } int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) { int err = 0; switch (sclass) { case SECCLASS_NETLINK_ROUTE_SOCKET: /* RTM_MAX always points to RTM_SETxxxx, ie RTM_NEWxxx + 3. * If the BUILD_BUG_ON() below fails you must update the * structures at the top of this file with the new mappings * before updating the BUILD_BUG_ON() macro! */ BUILD_BUG_ON(RTM_MAX != (RTM_NEWVLAN + 3)); err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms, sizeof(nlmsg_route_perms)); break; case SECCLASS_NETLINK_TCPDIAG_SOCKET: err = nlmsg_perm(nlmsg_type, perm, nlmsg_tcpdiag_perms, sizeof(nlmsg_tcpdiag_perms)); break; case SECCLASS_NETLINK_XFRM_SOCKET: /* If the BUILD_BUG_ON() below fails you must update the * structures at the top of this file with the new mappings * before updating the BUILD_BUG_ON() macro! */ BUILD_BUG_ON(XFRM_MSG_MAX != XFRM_MSG_GETDEFAULT); err = nlmsg_perm(nlmsg_type, perm, nlmsg_xfrm_perms, sizeof(nlmsg_xfrm_perms)); break; case SECCLASS_NETLINK_AUDIT_SOCKET: if ((nlmsg_type >= AUDIT_FIRST_USER_MSG && nlmsg_type <= AUDIT_LAST_USER_MSG) || (nlmsg_type >= AUDIT_FIRST_USER_MSG2 && nlmsg_type <= AUDIT_LAST_USER_MSG2)) { *perm = NETLINK_AUDIT_SOCKET__NLMSG_RELAY; } else { err = nlmsg_perm(nlmsg_type, perm, nlmsg_audit_perms, sizeof(nlmsg_audit_perms)); } break; /* No messaging from userspace, or class unknown/unhandled */ default: err = -ENOENT; break; } return err; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BITOPS_H #define _LINUX_BITOPS_H #include <asm/types.h> #include <linux/bits.h> /* Set bits in the first 'n' bytes when loaded from memory */ #ifdef __LITTLE_ENDIAN # define aligned_byte_mask(n) ((1UL << 8*(n))-1) #else # define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) #endif #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) #define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) #define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); extern unsigned long __sw_hweight64(__u64 w); /* * Include this here because some architectures need generic_ffs/fls in * scope */ #include <asm/bitops.h> #define for_each_set_bit(bit, addr, size) \ for ((bit) = find_first_bit((addr), (size)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) /* same as for_each_set_bit() but use bit as value to start with */ #define for_each_set_bit_from(bit, addr, size) \ for ((bit) = find_next_bit((addr), (size), (bit)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) #define for_each_clear_bit(bit, addr, size) \ for ((bit) = find_first_zero_bit((addr), (size)); \ (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) /* same as for_each_clear_bit() but use bit as value to start with */ #define for_each_clear_bit_from(bit, addr, size) \ for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) /** * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits * @start: bit offset to start search and to store the current iteration offset * @clump: location to store copy of current 8-bit clump * @bits: bitmap address to base the search on * @size: bitmap size in number of bits */ #define for_each_set_clump8(start, clump, bits, size) \ for ((start) = find_first_clump8(&(clump), (bits), (size)); \ (start) < (size); \ (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) static inline int get_bitmask_order(unsigned int count) { int order; order = fls(count); return order; /* We could be slightly more clever with -1 here... */ } static __always_inline unsigned long hweight_long(unsigned long w) { return sizeof(w) == 4 ? hweight32(w) : hweight64((__u64)w); } /** * rol64 - rotate a 64-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u64 rol64(__u64 word, unsigned int shift) { return (word << (shift & 63)) | (word >> ((-shift) & 63)); } /** * ror64 - rotate a 64-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u64 ror64(__u64 word, unsigned int shift) { return (word >> (shift & 63)) | (word << ((-shift) & 63)); } /** * rol32 - rotate a 32-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u32 rol32(__u32 word, unsigned int shift) { return (word << (shift & 31)) | (word >> ((-shift) & 31)); } /** * ror32 - rotate a 32-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u32 ror32(__u32 word, unsigned int shift) { return (word >> (shift & 31)) | (word << ((-shift) & 31)); } /** * rol16 - rotate a 16-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u16 rol16(__u16 word, unsigned int shift) { return (word << (shift & 15)) | (word >> ((-shift) & 15)); } /** * ror16 - rotate a 16-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u16 ror16(__u16 word, unsigned int shift) { return (word >> (shift & 15)) | (word << ((-shift) & 15)); } /** * rol8 - rotate an 8-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u8 rol8(__u8 word, unsigned int shift) { return (word << (shift & 7)) | (word >> ((-shift) & 7)); } /** * ror8 - rotate an 8-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u8 ror8(__u8 word, unsigned int shift) { return (word >> (shift & 7)) | (word << ((-shift) & 7)); } /** * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit * @value: value to sign extend * @index: 0 based bit index (0<=index<32) to sign bit * * This is safe to use for 16- and 8-bit types as well. */ static __always_inline __s32 sign_extend32(__u32 value, int index) { __u8 shift = 31 - index; return (__s32)(value << shift) >> shift; } /** * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit * @value: value to sign extend * @index: 0 based bit index (0<=index<64) to sign bit */ static __always_inline __s64 sign_extend64(__u64 value, int index) { __u8 shift = 63 - index; return (__s64)(value << shift) >> shift; } static inline unsigned fls_long(unsigned long l) { if (sizeof(l) == 4) return fls(l); return fls64(l); } static inline int get_count_order(unsigned int count) { if (count == 0) return -1; return fls(--count); } /** * get_count_order_long - get order after rounding @l up to power of 2 * @l: parameter * * it is same as get_count_order() but with long type parameter */ static inline int get_count_order_long(unsigned long l) { if (l == 0UL) return -1; return (int)fls_long(--l); } /** * __ffs64 - find first set bit in a 64 bit word * @word: The 64 bit word * * On 64 bit arches this is a synomyn for __ffs * The result is not defined if no bits are set, so check that @word * is non-zero before calling this. */ static inline unsigned long __ffs64(u64 word) { #if BITS_PER_LONG == 32 if (((u32)word) == 0UL) return __ffs((u32)(word >> 32)) + 32; #elif BITS_PER_LONG != 64 #error BITS_PER_LONG not 32 or 64 #endif return __ffs((unsigned long)word); } /** * assign_bit - Assign value to a bit in memory * @nr: the bit to set * @addr: the address to start counting from * @value: the value to assign */ static __always_inline void assign_bit(long nr, volatile unsigned long *addr, bool value) { if (value) set_bit(nr, addr); else clear_bit(nr, addr); } static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, bool value) { if (value) __set_bit(nr, addr); else __clear_bit(nr, addr); } #ifdef __KERNEL__ #ifndef set_mask_bits #define set_mask_bits(ptr, mask, bits) \ ({ \ const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \ typeof(*(ptr)) old__, new__; \ \ do { \ old__ = READ_ONCE(*(ptr)); \ new__ = (old__ & ~mask__) | bits__; \ } while (cmpxchg(ptr, old__, new__) != old__); \ \ old__; \ }) #endif #ifndef bit_clear_unless #define bit_clear_unless(ptr, clear, test) \ ({ \ const typeof(*(ptr)) clear__ = (clear), test__ = (test);\ typeof(*(ptr)) old__, new__; \ \ do { \ old__ = READ_ONCE(*(ptr)); \ new__ = old__ & ~clear__; \ } while (!(old__ & test__) && \ cmpxchg(ptr, old__, new__) != old__); \ \ !(old__ & test__); \ }) #endif #ifndef find_last_bit /** * find_last_bit - find the last set bit in a memory region * @addr: The address to start the search at * @size: The number of bits to search * * Returns the bit number of the last set bit, or size. */ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); #endif #endif /* __KERNEL__ */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright 1997 Transmeta Corporation - All Rights Reserved * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> * Copyright 2005-2006,2013,2017-2018 Ian Kent <raven@themaw.net> * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your * option, any later version, incorporated herein by reference. * * ----------------------------------------------------------------------- */ #ifndef _UAPI_LINUX_AUTO_FS_H #define _UAPI_LINUX_AUTO_FS_H #include <linux/types.h> #include <linux/limits.h> #ifndef __KERNEL__ #include <sys/ioctl.h> #endif /* __KERNEL__ */ #define AUTOFS_PROTO_VERSION 5 #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 5 #define AUTOFS_PROTO_SUBVERSION 5 /* * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed * back to the kernel via ioctl from userspace. On architectures where 32- and * 64-bit userspace binaries can be executed it's important that the size of * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we * do not break the binary ABI interface by changing the structure size. */ #if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */ typedef unsigned long autofs_wqt_t; #else typedef unsigned int autofs_wqt_t; #endif /* Packet types */ #define autofs_ptype_missing 0 /* Missing entry (mount request) */ #define autofs_ptype_expire 1 /* Expire entry (umount request) */ struct autofs_packet_hdr { int proto_version; /* Protocol version */ int type; /* Type of packet */ }; struct autofs_packet_missing { struct autofs_packet_hdr hdr; autofs_wqt_t wait_queue_token; int len; char name[NAME_MAX+1]; }; /* v3 expire (via ioctl) */ struct autofs_packet_expire { struct autofs_packet_hdr hdr; int len; char name[NAME_MAX+1]; }; #define AUTOFS_IOCTL 0x93 enum { AUTOFS_IOC_READY_CMD = 0x60, AUTOFS_IOC_FAIL_CMD, AUTOFS_IOC_CATATONIC_CMD, AUTOFS_IOC_PROTOVER_CMD, AUTOFS_IOC_SETTIMEOUT_CMD, AUTOFS_IOC_EXPIRE_CMD, }; #define AUTOFS_IOC_READY _IO(AUTOFS_IOCTL, AUTOFS_IOC_READY_CMD) #define AUTOFS_IOC_FAIL _IO(AUTOFS_IOCTL, AUTOFS_IOC_FAIL_CMD) #define AUTOFS_IOC_CATATONIC _IO(AUTOFS_IOCTL, AUTOFS_IOC_CATATONIC_CMD) #define AUTOFS_IOC_PROTOVER _IOR(AUTOFS_IOCTL, \ AUTOFS_IOC_PROTOVER_CMD, int) #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(AUTOFS_IOCTL, \ AUTOFS_IOC_SETTIMEOUT_CMD, \ compat_ulong_t) #define AUTOFS_IOC_SETTIMEOUT _IOWR(AUTOFS_IOCTL, \ AUTOFS_IOC_SETTIMEOUT_CMD, \ unsigned long) #define AUTOFS_IOC_EXPIRE _IOR(AUTOFS_IOCTL, \ AUTOFS_IOC_EXPIRE_CMD, \ struct autofs_packet_expire) /* autofs version 4 and later definitions */ /* Mask for expire behaviour */ #define AUTOFS_EXP_NORMAL 0x00 #define AUTOFS_EXP_IMMEDIATE 0x01 #define AUTOFS_EXP_LEAVES 0x02 #define AUTOFS_EXP_FORCED 0x04 #define AUTOFS_TYPE_ANY 0U #define AUTOFS_TYPE_INDIRECT 1U #define AUTOFS_TYPE_DIRECT 2U #define AUTOFS_TYPE_OFFSET 4U static inline void set_autofs_type_indirect(unsigned int *type) { *type = AUTOFS_TYPE_INDIRECT; } static inline unsigned int autofs_type_indirect(unsigned int type) { return (type == AUTOFS_TYPE_INDIRECT); } static inline void set_autofs_type_direct(unsigned int *type) { *type = AUTOFS_TYPE_DIRECT; } static inline unsigned int autofs_type_direct(unsigned int type) { return (type == AUTOFS_TYPE_DIRECT); } static inline void set_autofs_type_offset(unsigned int *type) { *type = AUTOFS_TYPE_OFFSET; } static inline unsigned int autofs_type_offset(unsigned int type) { return (type == AUTOFS_TYPE_OFFSET); } static inline unsigned int autofs_type_trigger(unsigned int type) { return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET); } /* * This isn't really a type as we use it to say "no type set" to * indicate we want to search for "any" mount in the * autofs_dev_ioctl_ismountpoint() device ioctl function. */ static inline void set_autofs_type_any(unsigned int *type) { *type = AUTOFS_TYPE_ANY; } static inline unsigned int autofs_type_any(unsigned int type) { return (type == AUTOFS_TYPE_ANY); } /* Daemon notification packet types */ enum autofs_notify { NFY_NONE, NFY_MOUNT, NFY_EXPIRE }; /* Kernel protocol version 4 packet types */ /* Expire entry (umount request) */ #define autofs_ptype_expire_multi 2 /* Kernel protocol version 5 packet types */ /* Indirect mount missing and expire requests. */ #define autofs_ptype_missing_indirect 3 #define autofs_ptype_expire_indirect 4 /* Direct mount missing and expire requests */ #define autofs_ptype_missing_direct 5 #define autofs_ptype_expire_direct 6 /* v4 multi expire (via pipe) */ struct autofs_packet_expire_multi { struct autofs_packet_hdr hdr; autofs_wqt_t wait_queue_token; int len; char name[NAME_MAX+1]; }; union autofs_packet_union { struct autofs_packet_hdr hdr; struct autofs_packet_missing missing; struct autofs_packet_expire expire; struct autofs_packet_expire_multi expire_multi; }; /* autofs v5 common packet struct */ struct autofs_v5_packet { struct autofs_packet_hdr hdr; autofs_wqt_t wait_queue_token; __u32 dev; __u64 ino; __u32 uid; __u32 gid; __u32 pid; __u32 tgid; __u32 len; char name[NAME_MAX+1]; }; typedef struct autofs_v5_packet autofs_packet_missing_indirect_t; typedef struct autofs_v5_packet autofs_packet_expire_indirect_t; typedef struct autofs_v5_packet autofs_packet_missing_direct_t; typedef struct autofs_v5_packet autofs_packet_expire_direct_t; union autofs_v5_packet_union { struct autofs_packet_hdr hdr; struct autofs_v5_packet v5_packet; autofs_packet_missing_indirect_t missing_indirect; autofs_packet_expire_indirect_t expire_indirect; autofs_packet_missing_direct_t missing_direct; autofs_packet_expire_direct_t expire_direct; }; enum { AUTOFS_IOC_EXPIRE_MULTI_CMD = 0x66, /* AUTOFS_IOC_EXPIRE_CMD + 1 */ AUTOFS_IOC_PROTOSUBVER_CMD, AUTOFS_IOC_ASKUMOUNT_CMD = 0x70, /* AUTOFS_DEV_IOCTL_VERSION_CMD - 1 */ }; #define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, \ AUTOFS_IOC_EXPIRE_MULTI_CMD, int) #define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, \ AUTOFS_IOC_PROTOSUBVER_CMD, int) #define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, \ AUTOFS_IOC_ASKUMOUNT_CMD, int) #endif /* _UAPI_LINUX_AUTO_FS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 /* SPDX-License-Identifier: GPL-2.0 */ /* * Statically sized hash table implementation * (C) 2012 Sasha Levin <levinsasha928@gmail.com> */ #ifndef _LINUX_HASHTABLE_H #define _LINUX_HASHTABLE_H #include <linux/list.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/hash.h> #include <linux/rculist.h> #define DEFINE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } #define DEFINE_READ_MOSTLY_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] __read_mostly = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } #define DECLARE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] #define HASH_SIZE(name) (ARRAY_SIZE(name)) #define HASH_BITS(name) ilog2(HASH_SIZE(name)) /* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ #define hash_min(val, bits) \ (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) static inline void __hash_init(struct hlist_head *ht, unsigned int sz) { unsigned int i; for (i = 0; i < sz; i++) INIT_HLIST_HEAD(&ht[i]); } /** * hash_init - initialize a hash table * @hashtable: hashtable to be initialized * * Calculates the size of the hashtable from the given parameter, otherwise * same as hash_init_size. * * This has to be a macro since HASH_BITS() will not work on pointers since * it calculates the size during preprocessing. */ #define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) /** * hash_add - add an object to a hashtable * @hashtable: hashtable to add to * @node: the &struct hlist_node of the object to be added * @key: the key of the object to be added */ #define hash_add(hashtable, node, key) \ hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) /** * hash_add_rcu - add an object to a rcu enabled hashtable * @hashtable: hashtable to add to * @node: the &struct hlist_node of the object to be added * @key: the key of the object to be added */ #define hash_add_rcu(hashtable, node, key) \ hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) /** * hash_hashed - check whether an object is in any hashtable * @node: the &struct hlist_node of the object to be checked */ static inline bool hash_hashed(struct hlist_node *node) { return !hlist_unhashed(node); } static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) { unsigned int i; for (i = 0; i < sz; i++) if (!hlist_empty(&ht[i])) return false; return true; } /** * hash_empty - check whether a hashtable is empty * @hashtable: hashtable to check * * This has to be a macro since HASH_BITS() will not work on pointers since * it calculates the size during preprocessing. */ #define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) /** * hash_del - remove an object from a hashtable * @node: &struct hlist_node of the object to remove */ static inline void hash_del(struct hlist_node *node) { hlist_del_init(node); } /** * hash_del_rcu - remove an object from a rcu enabled hashtable * @node: &struct hlist_node of the object to remove */ static inline void hash_del_rcu(struct hlist_node *node) { hlist_del_init_rcu(node); } /** * hash_for_each - iterate over a hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each(name, bkt, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry(obj, &name[bkt], member) /** * hash_for_each_rcu - iterate over a rcu enabled hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each_rcu(name, bkt, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry_rcu(obj, &name[bkt], member) /** * hash_for_each_safe - iterate over a hashtable safe against removal of * hash entry * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @tmp: a &struct hlist_node used for temporary storage * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each_safe(name, bkt, tmp, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) /** * hash_for_each_possible - iterate over all possible objects hashing to the * same bucket * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible(name, obj, member, key) \ hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_rcu - iterate over all possible objects hashing to the * same bucket in an rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible_rcu(name, obj, member, key, cond...) \ hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ member, ## cond) /** * hash_for_each_possible_rcu_notrace - iterate over all possible objects hashing * to the same bucket in an rcu enabled hashtable in a rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over * * This is the same as hash_for_each_possible_rcu() except that it does * not do any RCU debugging or tracing. */ #define hash_for_each_possible_rcu_notrace(name, obj, member, key) \ hlist_for_each_entry_rcu_notrace(obj, \ &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_safe - iterate over all possible objects hashing to the * same bucket safe against removals * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @tmp: a &struct hlist_node used for temporary storage * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible_safe(name, obj, tmp, member, key) \ hlist_for_each_entry_safe(obj, tmp,\ &name[hash_min(key, HASH_BITS(name))], member) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 /* SPDX-License-Identifier: GPL-2.0+ */ #undef TRACE_SYSTEM #define TRACE_SYSTEM rseq #if !defined(_TRACE_RSEQ_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RSEQ_H #include <linux/tracepoint.h> #include <linux/types.h> TRACE_EVENT(rseq_update, TP_PROTO(struct task_struct *t), TP_ARGS(t), TP_STRUCT__entry( __field(s32, cpu_id) ), TP_fast_assign( __entry->cpu_id = raw_smp_processor_id(); ), TP_printk("cpu_id=%d", __entry->cpu_id) ); TRACE_EVENT(rseq_ip_fixup, TP_PROTO(unsigned long regs_ip, unsigned long start_ip, unsigned long post_commit_offset, unsigned long abort_ip), TP_ARGS(regs_ip, start_ip, post_commit_offset, abort_ip), TP_STRUCT__entry( __field(unsigned long, regs_ip) __field(unsigned long, start_ip) __field(unsigned long, post_commit_offset) __field(unsigned long, abort_ip) ), TP_fast_assign( __entry->regs_ip = regs_ip; __entry->start_ip = start_ip; __entry->post_commit_offset = post_commit_offset; __entry->abort_ip = abort_ip; ), TP_printk("regs_ip=0x%lx start_ip=0x%lx post_commit_offset=%lu abort_ip=0x%lx", __entry->regs_ip, __entry->start_ip, __entry->post_commit_offset, __entry->abort_ip) ); #endif /* _TRACE_SOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IPV6_H #define _IPV6_H #include <uapi/linux/ipv6.h> #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) #define ipv6_authlen(p) (((p)->hdrlen+2) << 2) /* * This structure contains configuration options per IPv6 link. */ struct ipv6_devconf { __s32 forwarding; __s32 hop_limit; __s32 mtu6; __s32 accept_ra; __s32 accept_redirects; __s32 autoconf; __s32 dad_transmits; __s32 rtr_solicits; __s32 rtr_solicit_interval; __s32 rtr_solicit_max_interval; __s32 rtr_solicit_delay; __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; __s32 mldv2_unsolicited_report_interval; __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; __s32 regen_max_retry; __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; __s32 accept_ra_min_hop_limit; __s32 accept_ra_pinfo; __s32 ignore_routes_with_linkdown; #ifdef CONFIG_IPV6_ROUTER_PREF __s32 accept_ra_rtr_pref; __s32 rtr_probe_interval; #ifdef CONFIG_IPV6_ROUTE_INFO __s32 accept_ra_rt_info_min_plen; __s32 accept_ra_rt_info_max_plen; #endif #endif __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE atomic_t mc_forwarding; #endif __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; __s32 ndisc_notify; __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; __s32 drop_unsolicited_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; __s32 seg6_enabled; #ifdef CONFIG_IPV6_SEG6_HMAC __s32 seg6_require_hmac; #endif __u32 enhanced_dad; __u32 addr_gen_mode; __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; struct ctl_table_header *sysctl_header; }; struct ipv6_params { __s32 disable_ipv6; __s32 autoconf; }; extern struct ipv6_params ipv6_defaults; #include <linux/tcp.h> #include <linux/udp.h> #include <net/inet_sock.h> static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_network_header(skb); } static inline struct ipv6hdr *inner_ipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_inner_network_header(skb); } static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_transport_header(skb); } static inline unsigned int ipv6_transport_len(const struct sk_buff *skb) { return ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr) - skb_network_header_len(skb); } /* This structure contains results of exthdrs parsing as offsets from skb->nh. */ struct inet6_skb_parm { int iif; __be16 ra; __u16 dst0; __u16 srcrt; __u16 dst1; __u16 lastopt; __u16 nhoff; __u16 flags; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dsthao; #endif __u16 frag_max_size; #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 #define IP6SKB_REROUTED 4 #define IP6SKB_ROUTERALERT 8 #define IP6SKB_FRAGMENTED 16 #define IP6SKB_HOPBYHOP 32 #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 }; #if defined(CONFIG_NET_L3_MASTER_DEV) static inline bool ipv6_l3mdev_skb(__u16 flags) { return flags & IP6SKB_L3SLAVE; } #else static inline bool ipv6_l3mdev_skb(__u16 flags) { return false; } #endif #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) #define IP6CBMTU(skb) ((struct ip6_mtuinfo *)((skb)->cb)) static inline int inet6_iif(const struct sk_buff *skb) { bool l3_slave = ipv6_l3mdev_skb(IP6CB(skb)->flags); return l3_slave ? skb->skb_iif : IP6CB(skb)->iif; } static inline bool inet6_is_jumbogram(const struct sk_buff *skb) { return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM); } /* can not be used in TCP layer after tcp_v6_fill_cb */ static inline int inet6_sdif(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) return IP6CB(skb)->iif; #endif return 0; } struct tcp6_request_sock { struct tcp_request_sock tcp6rsk_tcp; }; struct ipv6_mc_socklist; struct ipv6_ac_socklist; struct ipv6_fl_socklist; struct inet6_cork { struct ipv6_txoptions *opt; u8 hop_limit; u8 tclass; }; /** * struct ipv6_pinfo - ipv6 private area * * In the struct sock hierarchy (tcp6_sock, upd6_sock, etc) * this _must_ be the last member, so that inet6_sk_generic * is able to calculate its offset from the base struct sock * by using the struct proto->slab_obj_size member. -acme */ struct ipv6_pinfo { struct in6_addr saddr; struct in6_pktinfo sticky_pktinfo; const struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES const struct in6_addr *saddr_cache; #endif __be32 flow_label; __u32 frag_size; /* * Packed in 16bits. * Omit one shift by putting the signed field at MSB. */ #if defined(__BIG_ENDIAN_BITFIELD) __s16 hop_limit:9; __u16 __unused_1:7; #else __u16 __unused_1:7; __s16 hop_limit:9; #endif #if defined(__BIG_ENDIAN_BITFIELD) /* Packed in 16bits. */ __s16 mcast_hops:9; __u16 __unused_2:6, mc_loop:1; #else __u16 mc_loop:1, __unused_2:6; __s16 mcast_hops:9; #endif int ucast_oif; int mcast_oif; /* pktoption flags */ union { struct { __u16 srcrt:1, osrcrt:1, rxinfo:1, rxoinfo:1, rxhlim:1, rxohlim:1, hopopts:1, ohopopts:1, dstopts:1, odstopts:1, rxflow:1, rxtclass:1, rxpmtu:1, rxorigdstaddr:1, recvfragsize:1; /* 1 bits hole */ } bits; __u16 all; } rxopt; /* sockopt flags */ __u16 recverr:1, sndflow:1, repflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ srcprefs:3, /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ dontfrag:1, autoflowlabel:1, autoflowlabel_set:1, mc_all:1, recverr_rfc4884:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; __u32 dst_cookie; __u32 rx_dst_cookie; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct inet6_cork cork; }; /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ struct inet_sock inet; __u32 checksum; /* perform checksum */ __u32 offset; /* checksum offset */ struct icmp6_filter filter; __u32 ip6mr_table; /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */ struct ipv6_pinfo inet6; }; struct udp6_sock { struct udp_sock udp; /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */ struct ipv6_pinfo inet6; }; struct tcp6_sock { struct tcp_sock tcp; /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */ struct ipv6_pinfo inet6; }; extern int inet6_sk_rebuild_header(struct sock *sk); struct tcp6_timewait_sock { struct tcp_timewait_sock tcp6tw_tcp; }; #if IS_ENABLED(CONFIG_IPV6) bool ipv6_mod_enabled(void); static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; } static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return (struct raw6_sock *)sk; } #define __ipv6_only_sock(sk) (sk->sk_ipv6only) #define ipv6_only_sock(sk) (__ipv6_only_sock(sk)) #define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \ inet6_sk(sk)->rxopt.bits.rxinfo) static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk) { if (sk->sk_family == AF_INET6) return &sk->sk_v6_rcv_saddr; return NULL; } static inline int inet_v6_ipv6only(const struct sock *sk) { /* ipv6only field is at same position for timewait and other sockets */ return ipv6_only_sock(sk); } #else #define __ipv6_only_sock(sk) 0 #define ipv6_only_sock(sk) 0 #define ipv6_sk_rxinfo(sk) 0 static inline bool ipv6_mod_enabled(void) { return false; } static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { return NULL; } static inline struct inet6_request_sock * inet6_rsk(const struct request_sock *rsk) { return NULL; } static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; } #define inet6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _IPV6_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 /* SPDX-License-Identifier: GPL-2.0 */ /* * A hash table (hashtab) maintains associations between * key values and datum values. The type of the key values * and the type of the datum values is arbitrary. The * functions for hash computation and key comparison are * provided by the creator of the table. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> */ #ifndef _SS_HASHTAB_H_ #define _SS_HASHTAB_H_ #include <linux/types.h> #include <linux/errno.h> #include <linux/sched.h> #define HASHTAB_MAX_NODES U32_MAX struct hashtab_key_params { u32 (*hash)(const void *key); /* hash function */ int (*cmp)(const void *key1, const void *key2); /* key comparison function */ }; struct hashtab_node { void *key; void *datum; struct hashtab_node *next; }; struct hashtab { struct hashtab_node **htable; /* hash table */ u32 size; /* number of slots in hash table */ u32 nel; /* number of elements in hash table */ }; struct hashtab_info { u32 slots_used; u32 max_chain_len; }; /* * Initializes a new hash table with the specified characteristics. * * Returns -ENOMEM if insufficient space is available or 0 otherwise. */ int hashtab_init(struct hashtab *h, u32 nel_hint); int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst, void *key, void *datum); /* * Inserts the specified (key, datum) pair into the specified hash table. * * Returns -ENOMEM on memory allocation error, * -EEXIST if there is already an entry with the same key, * -EINVAL for general errors or 0 otherwise. */ static inline int hashtab_insert(struct hashtab *h, void *key, void *datum, struct hashtab_key_params key_params) { u32 hvalue; struct hashtab_node *prev, *cur; cond_resched(); if (!h->size || h->nel == HASHTAB_MAX_NODES) return -EINVAL; hvalue = key_params.hash(key) & (h->size - 1); prev = NULL; cur = h->htable[hvalue]; while (cur) { int cmp = key_params.cmp(key, cur->key); if (cmp == 0) return -EEXIST; if (cmp < 0) break; prev = cur; cur = cur->next; } return __hashtab_insert(h, prev ? &prev->next : &h->htable[hvalue], key, datum); } /* * Searches for the entry with the specified key in the hash table. * * Returns NULL if no entry has the specified key or * the datum of the entry otherwise. */ static inline void *hashtab_search(struct hashtab *h, const void *key, struct hashtab_key_params key_params) { u32 hvalue; struct hashtab_node *cur; if (!h->size) return NULL; hvalue = key_params.hash(key) & (h->size - 1); cur = h->htable[hvalue]; while (cur) { int cmp = key_params.cmp(key, cur->key); if (cmp == 0) return cur->datum; if (cmp < 0) break; cur = cur->next; } return NULL; } /* * Destroys the specified hash table. */ void hashtab_destroy(struct hashtab *h); /* * Applies the specified apply function to (key,datum,args) * for each entry in the specified hash table. * * The order in which the function is applied to the entries * is dependent upon the internal structure of the hash table. * * If apply returns a non-zero status, then hashtab_map will cease * iterating through the hash table and will propagate the error * return to its caller. */ int hashtab_map(struct hashtab *h, int (*apply)(void *k, void *d, void *args), void *args); int hashtab_duplicate(struct hashtab *new, struct hashtab *orig, int (*copy)(struct hashtab_node *new, struct hashtab_node *orig, void *args), int (*destroy)(void *k, void *d, void *args), void *args); /* Fill info with some hash table statistics */ void hashtab_stat(struct hashtab *h, struct hashtab_info *info); #endif /* _SS_HASHTAB_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* include/asm-generic/tlb.h * * Generic TLB shootdown code * * Copyright 2001 Red Hat, Inc. * Based on code from mm/memory.c Copyright Linus Torvalds and others. * * Copyright 2011 Red Hat, Inc., Peter Zijlstra */ #ifndef _ASM_GENERIC__TLB_H #define _ASM_GENERIC__TLB_H #include <linux/mmu_notifier.h> #include <linux/swap.h> #include <linux/hugetlb_inline.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> /* * Blindly accessing user memory from NMI context can be dangerous * if we're in the middle of switching the current user task or switching * the loaded mm. */ #ifndef nmi_uaccess_okay # define nmi_uaccess_okay() true #endif #ifdef CONFIG_MMU /* * Generic MMU-gather implementation. * * The mmu_gather data structure is used by the mm code to implement the * correct and efficient ordering of freeing pages and TLB invalidations. * * This correct ordering is: * * 1) unhook page * 2) TLB invalidate page * 3) free page * * That is, we must never free a page before we have ensured there are no live * translations left to it. Otherwise it might be possible to observe (or * worse, change) the page content after it has been reused. * * The mmu_gather API consists of: * * - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather * * Finish in particular will issue a (final) TLB invalidate and free * all (remaining) queued pages. * * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA * * Defaults to flushing at tlb_end_vma() to reset the range; helps when * there's large holes between the VMAs. * * - tlb_remove_table() * * tlb_remove_table() is the basic primitive to free page-table directories * (__p*_free_tlb()). In it's most primitive form it is an alias for * tlb_remove_page() below, for when page directories are pages and have no * additional constraints. * * See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE. * * - tlb_remove_page() / __tlb_remove_page() * - tlb_remove_page_size() / __tlb_remove_page_size() * * __tlb_remove_page_size() is the basic primitive that queues a page for * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a * boolean indicating if the queue is (now) full and a call to * tlb_flush_mmu() is required. * * tlb_remove_page() and tlb_remove_page_size() imply the call to * tlb_flush_mmu() when required and has no return value. * * - tlb_change_page_size() * * call before __tlb_remove_page*() to set the current page-size; implies a * possible tlb_flush_mmu() call. * * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() * * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets * related state, like the range) * * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees * whatever pages are still batched. * * - mmu_gather::fullmm * * A flag set by tlb_gather_mmu() to indicate we're going to free * the entire mm; this allows a number of optimizations. * * - We can ignore tlb_{start,end}_vma(); because we don't * care about ranges. Everything will be shot down. * * - (RISC) architectures that use ASIDs can cycle to a new ASID * and delay the invalidation until ASID space runs out. * * - mmu_gather::need_flush_all * * A flag that can be set by the arch code if it wants to force * flush the entire TLB irrespective of the range. For instance * x86-PAE needs this when changing top-level entries. * * And allows the architecture to provide and implement tlb_flush(): * * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make * use of: * * - mmu_gather::start / mmu_gather::end * * which provides the range that needs to be flushed to cover the pages to * be freed. * * - mmu_gather::freed_tables * * set when we freed page table pages * * - tlb_get_unmap_shift() / tlb_get_unmap_size() * * returns the smallest TLB entry size unmapped in this range. * * If an architecture does not provide tlb_flush() a default implementation * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is * specified, in which case we'll default to flush_tlb_mm(). * * Additionally there are a few opt-in features: * * MMU_GATHER_PAGE_SIZE * * This ensures we call tlb_flush() every time tlb_change_page_size() actually * changes the size and provides mmu_gather::page_size to tlb_flush(). * * This might be useful if your architecture has size specific TLB * invalidation instructions. * * MMU_GATHER_TABLE_FREE * * This provides tlb_remove_table(), to be used instead of tlb_remove_page() * for page directores (__p*_free_tlb()). * * Useful if your architecture has non-page page directories. * * When used, an architecture is expected to provide __tlb_remove_table() * which does the actual freeing of these pages. * * MMU_GATHER_RCU_TABLE_FREE * * Like MMU_GATHER_TABLE_FREE, and adds semi-RCU semantics to the free (see * comment below). * * Useful if your architecture doesn't use IPIs for remote TLB invalidates * and therefore doesn't naturally serialize with software page-table walkers. * * MMU_GATHER_NO_RANGE * * Use this if your architecture lacks an efficient flush_tlb_range(). * * MMU_GATHER_NO_GATHER * * If the option is set the mmu_gather will not track individual pages for * delayed page free anymore. A platform that enables the option needs to * provide its own implementation of the __tlb_remove_page_size() function to * free pages. * * This is useful if your architecture already flushes TLB entries in the * various ptep_get_and_clear() functions. */ #ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch { #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE struct rcu_head rcu; #endif unsigned int nr; void *tables[0]; }; #define MAX_TABLE_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ /* * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based * page directories and we can use the normal page batching to free them. */ #define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* * This allows an architecture that does not use the linux page-tables for * hardware to skip the TLBI when freeing page tables. */ #ifndef tlb_needs_table_invalidate #define tlb_needs_table_invalidate() (true) #endif void tlb_remove_table_sync_one(void); #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif static inline void tlb_remove_table_sync_one(void) { } #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ #ifndef CONFIG_MMU_GATHER_NO_GATHER /* * If we can't allocate a page to make a big batch of page pointers * to work on, then just handle a few from the on-stack structure. */ #define MMU_GATHER_BUNDLE 8 struct mmu_gather_batch { struct mmu_gather_batch *next; unsigned int nr; unsigned int max; struct page *pages[0]; }; #define MAX_GATHER_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *)) /* * Limit the maximum number of mmu_gather batches to reduce a risk of soft * lockups for non-preemptible kernels on huge machines when a lot of memory * is zapped during unmapping. * 10K pages freed at once should be safe even without a preemption point. */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); #endif /* * struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. */ struct mmu_gather { struct mm_struct *mm; #ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch *batch; #endif unsigned long start; unsigned long end; /* * we are in the middle of an operation to clear * a full mm and can make some optimizations */ unsigned int fullmm : 1; /* * we have performed an operation which * requires a complete flush of the tlb */ unsigned int need_flush_all : 1; /* * we have removed page directories */ unsigned int freed_tables : 1; /* * at which levels have we cleared entries? */ unsigned int cleared_ptes : 1; unsigned int cleared_pmds : 1; unsigned int cleared_puds : 1; unsigned int cleared_p4ds : 1; /* * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma */ unsigned int vma_exec : 1; unsigned int vma_huge : 1; unsigned int batch_count; #ifndef CONFIG_MMU_GATHER_NO_GATHER struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; #ifdef CONFIG_MMU_GATHER_PAGE_SIZE unsigned int page_size; #endif #endif }; void tlb_flush_mmu(struct mmu_gather *tlb); static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address, unsigned int range_size) { tlb->start = min(tlb->start, address); tlb->end = max(tlb->end, address + range_size); } static inline void __tlb_reset_range(struct mmu_gather *tlb) { if (tlb->fullmm) { tlb->start = tlb->end = ~0; } else { tlb->start = TASK_SIZE; tlb->end = 0; } tlb->freed_tables = 0; tlb->cleared_ptes = 0; tlb->cleared_pmds = 0; tlb->cleared_puds = 0; tlb->cleared_p4ds = 0; /* * Do not reset mmu_gather::vma_* fields here, we do not * call into tlb_start_vma() again to set them if there is an * intermediate flush. */ } #ifdef CONFIG_MMU_GATHER_NO_RANGE #if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma) #error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma() #endif /* * When an architecture does not have efficient means of range flushing TLBs * there is no point in doing intermediate flushes on tlb_end_vma() to keep the * range small. We equally don't have to worry about page granularity or other * things. * * All we need to do is issue a full flush for any !0 range. */ static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->end) flush_tlb_mm(tlb->mm); } static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } #define tlb_end_vma tlb_end_vma static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { } #else /* CONFIG_MMU_GATHER_NO_RANGE */ #ifndef tlb_flush #if defined(tlb_start_vma) || defined(tlb_end_vma) #error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() #endif /* * When an architecture does not provide its own tlb_flush() implementation * but does have a reasonably efficient flush_vma_range() implementation * use that. */ static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->fullmm || tlb->need_flush_all) { flush_tlb_mm(tlb->mm); } else if (tlb->end) { struct vm_area_struct vma = { .vm_mm = tlb->mm, .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) | (tlb->vma_huge ? VM_HUGETLB : 0), }; flush_tlb_range(&vma, tlb->start, tlb->end); } } static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { /* * flush_tlb_range() implementations that look at VM_HUGETLB (tile, * mips-4k) flush only large pages. * * flush_tlb_range() implementations that flush I-TLB also flush D-TLB * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing * range. * * We rely on tlb_end_vma() to issue a flush, such that when we reset * these values the batch is empty. */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); } #else static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } #endif #endif /* CONFIG_MMU_GATHER_NO_RANGE */ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { /* * Anything calling __tlb_adjust_range() also sets at least one of * these bits. */ if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds || tlb->cleared_puds || tlb->cleared_p4ds)) return; tlb_flush(tlb); mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); __tlb_reset_range(tlb); } static inline void tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { if (__tlb_remove_page_size(tlb, page, page_size)) tlb_flush_mmu(tlb); } static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { return __tlb_remove_page_size(tlb, page, PAGE_SIZE); } /* tlb_remove_page * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when * required. */ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { return tlb_remove_page_size(tlb, page, PAGE_SIZE); } static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size) { #ifdef CONFIG_MMU_GATHER_PAGE_SIZE if (tlb->page_size && tlb->page_size != page_size) { if (!tlb->fullmm && !tlb->need_flush_all) tlb_flush_mmu(tlb); } tlb->page_size = page_size; #endif } static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) { if (tlb->cleared_ptes) return PAGE_SHIFT; if (tlb->cleared_pmds) return PMD_SHIFT; if (tlb->cleared_puds) return PUD_SHIFT; if (tlb->cleared_p4ds) return P4D_SHIFT; return PAGE_SHIFT; } static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) { return 1UL << tlb_get_unmap_shift(tlb); } /* * In the case of tlb vma handling, we can optimise these away in the * case where we're doing a full MM flush. When we're doing a munmap, * the vmas are adjusted to only cover the region to be torn down. */ #ifndef tlb_start_vma static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; tlb_update_vma_flags(tlb, vma); flush_cache_range(vma, vma->vm_start, vma->vm_end); } #endif #ifndef tlb_end_vma static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; /* * Do a TLB flush and reset the range at VMA boundaries; this avoids * the ranges growing with the unused space between consecutive VMAs, * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on * this. */ tlb_flush_mmu_tlbonly(tlb); } #endif /* * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, * and set corresponding cleared_*. */ static inline void tlb_flush_pte_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_ptes = 1; } static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_pmds = 1; } static inline void tlb_flush_pud_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_puds = 1; } static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_p4ds = 1; } #ifndef __tlb_remove_tlb_entry #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #endif /** * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. * * Record the fact that pte's were really unmapped by updating the range, * so we can later optimise away the tlb invalidate. This helps when * userspace is unmapping already-unmapped pages, which happens quite a lot. */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ tlb_flush_pte_range(tlb, address, PAGE_SIZE); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ if (_sz >= P4D_SIZE) \ tlb_flush_p4d_range(tlb, address, _sz); \ else if (_sz >= PUD_SIZE) \ tlb_flush_pud_range(tlb, address, _sz); \ else if (_sz >= PMD_SIZE) \ tlb_flush_pmd_range(tlb, address, _sz); \ else \ tlb_flush_pte_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation * This is a nop so far, because only x86 needs it. */ #ifndef __tlb_remove_pmd_tlb_entry #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) #endif #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) /** * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb * invalidation. This is a nop so far, because only x86 needs it. */ #ifndef __tlb_remove_pud_tlb_entry #define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0) #endif #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) /* * For things like page tables caches (ie caching addresses "inside" the * page tables, like x86 does), for legacy reasons, flushing an * individual page had better flush the page table caches behind it. This * is definitely how x86 works, for example. And if you have an * architected non-legacy page table cache (which I'm not aware of * anybody actually doing), you're going to have some architecturally * explicit flushing for that, likely *separate* from a regular TLB entry * flush, and thus you'd need more than just some range expansion.. * * So if we ever find an architecture * that would want something that odd, I think it is up to that * architecture to do its own odd thing, not cause pain for others * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com * * For now w.r.t page table cache, mark the range_size as PAGE_SIZE */ #ifndef pte_free_tlb #define pte_free_tlb(tlb, ptep, address) \ do { \ tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ tlb_flush_pud_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif #ifndef pud_free_tlb #define pud_free_tlb(tlb, pudp, address) \ do { \ tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif #ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif #endif /* CONFIG_MMU */ #endif /* _ASM_GENERIC__TLB_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 /* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS definitions. * * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #ifndef FS_9P_V9FS_H #define FS_9P_V9FS_H #include <linux/backing-dev.h> /** * enum p9_session_flags - option flags for each 9P session * @V9FS_PROTO_2000U: whether or not to use 9P2000.u extensions * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client. * @V9FS_ACCESS_ANY: use a single attach for all users * @V9FS_ACCESS_MASK: bit mask of different ACCESS options * @V9FS_POSIX_ACL: POSIX ACLs are enforced * * Session flags reflect options selected by users at mount time */ #define V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \ V9FS_ACCESS_USER | \ V9FS_ACCESS_CLIENT) #define V9FS_ACCESS_MASK V9FS_ACCESS_ANY #define V9FS_ACL_MASK V9FS_POSIX_ACL enum p9_session_flags { V9FS_PROTO_2000U = 0x01, V9FS_PROTO_2000L = 0x02, V9FS_ACCESS_SINGLE = 0x04, V9FS_ACCESS_USER = 0x08, V9FS_ACCESS_CLIENT = 0x10, V9FS_POSIX_ACL = 0x20 }; /* possible values of ->cache */ /** * enum p9_cache_modes - user specified cache preferences * @CACHE_NONE: do not cache data, dentries, or directory contents (default) * @CACHE_LOOSE: cache data, dentries, and directory contents w/no consistency * * eventually support loose, tight, time, session, default always none */ enum p9_cache_modes { CACHE_NONE, CACHE_MMAP, CACHE_LOOSE, CACHE_FSCACHE, nr__p9_cache_modes }; /** * struct v9fs_session_info - per-instance session information * @flags: session options of type &p9_session_flags * @nodev: set to 1 to disable device mapping * @debug: debug level * @afid: authentication handle * @cache: cache mode of type &p9_cache_modes * @cachetag: the tag of the cache associated with this session * @fscache: session cookie associated with FS-Cache * @uname: string user name to mount hierarchy as * @aname: mount specifier for remote hierarchy * @maxdata: maximum data to be sent/recvd per protocol message * @dfltuid: default numeric userid to mount hierarchy as * @dfltgid: default numeric groupid to mount hierarchy as * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @clnt: reference to 9P network client instantiated for this session * @slist: reference to list of registered 9p sessions * * This structure holds state for each session instance established during * a sys_mount() . * * Bugs: there seems to be a lot of state which could be condensed and/or * removed. */ struct v9fs_session_info { /* options */ unsigned char flags; unsigned char nodev; unsigned short debug; unsigned int afid; unsigned int cache; #ifdef CONFIG_9P_FSCACHE char *cachetag; struct fscache_cookie *fscache; #endif char *uname; /* user name to mount as */ char *aname; /* name of remote hierarchy being mounted */ unsigned int maxdata; /* max data for client interface */ kuid_t dfltuid; /* default uid/muid for legacy support */ kgid_t dfltgid; /* default gid for legacy support */ kuid_t uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ struct rw_semaphore rename_sem; long session_lock_timeout; /* retry interval for blocking locks */ }; /* cache_validity flags */ #define V9FS_INO_INVALID_ATTR 0x01 struct v9fs_inode { #ifdef CONFIG_9P_FSCACHE struct mutex fscache_lock; struct fscache_cookie *fscache; #endif struct p9_qid qid; unsigned int cache_validity; struct p9_fid *writeback_fid; struct mutex v_mutex; struct inode vfs_inode; }; static inline struct v9fs_inode *V9FS_I(const struct inode *inode) { return container_of(inode, struct v9fs_inode, vfs_inode); } extern int v9fs_show_options(struct seq_file *m, struct dentry *root); struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, char *); extern void v9fs_session_close(struct v9fs_session_info *v9ses); extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new); extern const struct inode_operations v9fs_dir_inode_operations_dotl; extern const struct inode_operations v9fs_file_inode_operations_dotl; extern const struct inode_operations v9fs_symlink_inode_operations_dotl; extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new); /* other default globals */ #define V9FS_PORT 564 #define V9FS_DEFUSER "nobody" #define V9FS_DEFANAME "" #define V9FS_DEFUID KUIDT_INIT(-2) #define V9FS_DEFGID KGIDT_INIT(-2) static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) { return (inode->i_sb->s_fs_info); } static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry) { return dentry->d_sb->s_fs_info; } static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000U; } static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000L; } /** * v9fs_get_inode_from_fid - Helper routine to populate an inode by * issuing a attribute request * @v9ses: session information * @fid: fid to issue attribute request for * @sb: superblock on which to create inode * */ static inline struct inode * v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { if (v9fs_proto_dotl(v9ses)) return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0); else return v9fs_inode_from_fid(v9ses, fid, sb, 0); } /** * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by * issuing a attribute request * @v9ses: session information * @fid: fid to issue attribute request for * @sb: superblock on which to create inode * */ static inline struct inode * v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { if (v9fs_proto_dotl(v9ses)) return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1); else return v9fs_inode_from_fid(v9ses, fid, sb, 1); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM x86_fpu #if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_FPU_H #include <linux/tracepoint.h> DECLARE_EVENT_CLASS(x86_fpu, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu), TP_STRUCT__entry( __field(struct fpu *, fpu) __field(bool, load_fpu) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD); if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, __entry->load_fpu, __entry->xfeatures, __entry->xcomp_bv ) ); DEFINE_EVENT(x86_fpu, x86_fpu_before_save, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_after_save, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_before_restore, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_after_restore, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_init_state, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_dropped, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_copy_src, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH asm/trace/ #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE fpu #endif /* _TRACE_FPU_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NETFILTER_H #define __LINUX_NETFILTER_H #include <linux/init.h> #include <linux/skbuff.h> #include <linux/net.h> #include <linux/if.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/static_key.h> #include <linux/netfilter_defs.h> #include <linux/netdevice.h> #include <linux/sockptr.h> #include <net/net_namespace.h> static inline int NF_DROP_GETERR(int verdict) { return -(verdict >> NF_VERDICT_QBITS); } static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ul1 = (const unsigned long *)a1; const unsigned long *ul2 = (const unsigned long *)a2; return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; #else return a1->all[0] == a2->all[0] && a1->all[1] == a2->all[1] && a1->all[2] == a2->all[2] && a1->all[3] == a2->all[3]; #endif } static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, union nf_inet_addr *result, const union nf_inet_addr *mask) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ua = (const unsigned long *)a1; unsigned long *ur = (unsigned long *)result; const unsigned long *um = (const unsigned long *)mask; ur[0] = ua[0] & um[0]; ur[1] = ua[1] & um[1]; #else result->all[0] = a1->all[0] & mask->all[0]; result->all[1] = a1->all[1] & mask->all[1]; result->all[2] = a1->all[2] & mask->all[2]; result->all[3] = a1->all[3] & mask->all[3]; #endif } int netfilter_init(void); struct sk_buff; struct nf_hook_ops; struct sock; struct nf_hook_state { unsigned int hook; u_int8_t pf; struct net_device *in; struct net_device *out; struct sock *sk; struct net *net; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); struct nf_hook_ops { /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; void *priv; u_int8_t pf; unsigned int hooknum; /* Hooks are ordered in ascending priority. */ int priority; }; struct nf_hook_entry { nf_hookfn *hook; void *priv; }; struct nf_hook_entries_rcu_head { struct rcu_head head; void *allocation; }; struct nf_hook_entries { u16 num_hook_entries; /* padding */ struct nf_hook_entry hooks[]; /* trailer: pointers to original orig_ops of each hook, * followed by rcu_head and scratch space used for freeing * the structure via call_rcu. * * This is not part of struct nf_hook_entry since its only * needed in slow path (hook register/unregister): * const struct nf_hook_ops *orig_ops[] * * For the same reason, we store this at end -- its * only needed when a hook is deleted, not during * packet path processing: * struct nf_hook_entries_rcu_head head */ }; #ifdef CONFIG_NETFILTER static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e) { unsigned int n = e->num_hook_entries; const void *hook_end; hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */ return (struct nf_hook_ops **)hook_end; } static inline int nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, struct nf_hook_state *state) { return entry->hook(entry->priv, skb, state); } static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, struct net_device *indev, struct net_device *outdev, struct sock *sk, struct net *net, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { p->hook = hook; p->pf = pf; p->in = indev; p->out = outdev; p->sk = sk; p->net = net; p->okfn = okfn; } struct nf_sockopt_ops { struct list_head list; u_int8_t pf; /* Non-inclusive ranges: use 0/0/NULL to never get called. */ int set_optmin; int set_optmax; int (*set)(struct sock *sk, int optval, sockptr_t arg, unsigned int len); int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void __user *user, int *len); /* Use the module struct to lock set/get code in place */ struct module *owner; }; /* Function to register/unregister hook points. */ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops); int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ int nf_register_sockopt(struct nf_sockopt_ops *reg); void nf_unregister_sockopt(struct nf_sockopt_ops *reg); #ifdef CONFIG_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, const struct nf_hook_entries *e, unsigned int i); void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state, const struct nf_hook_entries *e); /** * nf_hook - call a netfilter hook * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entries *hook_head = NULL; int ret = 1; #ifdef CONFIG_JUMP_LABEL if (__builtin_constant_p(pf) && __builtin_constant_p(hook) && !static_key_false(&nf_hooks_needed[pf][hook])) return 1; #endif rcu_read_lock(); switch (pf) { case NFPROTO_IPV4: hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); break; case NFPROTO_IPV6: hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); break; case NFPROTO_ARP: #ifdef CONFIG_NETFILTER_FAMILY_ARP if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp))) break; hook_head = rcu_dereference(net->nf.hooks_arp[hook]); #endif break; case NFPROTO_BRIDGE: #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); #endif break; #if IS_ENABLED(CONFIG_DECNET) case NFPROTO_DECNET: hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); break; #endif default: WARN_ON_ONCE(1); break; } if (hook_head) { struct nf_hook_state state; nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state, hook_head, 0); } rcu_read_unlock(); return ret; } /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). Returns -ERRNO if packet dropped. Zero means queued, stolen or accepted. */ /* RR: > I don't want nf_hook to return anything because people might forget > about async and trust the return value to mean "packet was ok". AK: Just document it clearly, then you can expect some sense from kernel coders :) */ static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), bool cond) { int ret; if (!cond || ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1)) ret = okfn(net, sk, skb); return ret; } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); if (ret == 1) ret = okfn(net, sk, skb); return ret; } static inline void NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct list_head *head, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entries *hook_head = NULL; #ifdef CONFIG_JUMP_LABEL if (__builtin_constant_p(pf) && __builtin_constant_p(hook) && !static_key_false(&nf_hooks_needed[pf][hook])) return; #endif rcu_read_lock(); switch (pf) { case NFPROTO_IPV4: hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); break; case NFPROTO_IPV6: hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); break; default: WARN_ON_ONCE(1); break; } if (hook_head) { struct nf_hook_state state; nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn); nf_hook_slow_list(head, &state, hook_head); } rcu_read_unlock(); } /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, sockptr_t opt, unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); struct flowi; struct nf_queue_entry; __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol, unsigned short family); __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, unsigned int len, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry); #include <net/flow.h> struct nf_conn; enum nf_nat_manip_type; struct nlattr; enum ip_conntrack_dir; struct nf_nat_hook { int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr); void (*decode_session)(struct sk_buff *skb, struct flowi *fl); unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct, enum nf_nat_manip_type mtype, enum ip_conntrack_dir dir); }; extern struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #if IS_ENABLED(CONFIG_NF_NAT) struct nf_nat_hook *nat_hook; rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); if (nat_hook && nat_hook->decode_session) nat_hook->decode_session(skb, fl); rcu_read_unlock(); #endif } #else /* !CONFIG_NETFILTER */ static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), bool cond) { return okfn(net, sk, skb); } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return okfn(net, sk, skb); } static inline void NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct list_head *head, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { /* nothing to do */ } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return 1; } struct flowi; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { } #endif /*CONFIG_NETFILTER*/ #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_zones_common.h> extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu; void nf_ct_attach(struct sk_buff *, const struct sk_buff *); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} struct nf_conntrack_tuple; static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { return false; } #endif struct nf_conn; enum ip_conntrack_info; struct nf_ct_hook { int (*update)(struct net *net, struct sk_buff *skb); void (*destroy)(struct nf_conntrack *); bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); }; extern struct nf_ct_hook __rcu *nf_ct_hook; struct nlattr; struct nfnl_ct_hook { struct nf_conn *(*get_ct)(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo); size_t (*build_size)(const struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, u_int16_t ct_attr, u_int16_t ct_info_attr); int (*parse)(const struct nlattr *attr, struct nf_conn *ct); int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report); void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); }; extern struct nfnl_ct_hook __rcu *nfnl_ct_hook; /** * nf_skb_duplicated - TEE target has sent a packet * * When a xtables target sends a packet, the OUTPUT and POSTROUTING * hooks are traversed again, i.e. nft and xtables are invoked recursively. * * This is used by xtables TEE target to prevent the duplicated skb from * being duplicated again. */ DECLARE_PER_CPU(bool, nf_skb_duplicated); #endif /*__LINUX_NETFILTER_H*/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/prandom.h * * Include file for the fast pseudo-random 32-bit * generation. */ #ifndef _LINUX_PRANDOM_H #define _LINUX_PRANDOM_H #include <linux/types.h> #include <linux/percpu.h> #include <linux/siphash.h> u32 prandom_u32(void); void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); DECLARE_PER_CPU(unsigned long, net_rand_noise); #define PRANDOM_ADD_NOISE(a, b, c, d) \ prandom_u32_add_noise((unsigned long)(a), (unsigned long)(b), \ (unsigned long)(c), (unsigned long)(d)) #if BITS_PER_LONG == 64 /* * The core SipHash round function. Each line can be executed in * parallel given enough CPU resources. */ #define PRND_SIPROUND(v0, v1, v2, v3) SIPHASH_PERMUTATION(v0, v1, v2, v3) #define PRND_K0 (SIPHASH_CONST_0 ^ SIPHASH_CONST_2) #define PRND_K1 (SIPHASH_CONST_1 ^ SIPHASH_CONST_3) #elif BITS_PER_LONG == 32 /* * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. * This is weaker, but 32-bit machines are not used for high-traffic * applications, so there is less output for an attacker to analyze. */ #define PRND_SIPROUND(v0, v1, v2, v3) HSIPHASH_PERMUTATION(v0, v1, v2, v3) #define PRND_K0 (HSIPHASH_CONST_0 ^ HSIPHASH_CONST_2) #define PRND_K1 (HSIPHASH_CONST_1 ^ HSIPHASH_CONST_3) #else #error Unsupported BITS_PER_LONG #endif static inline void prandom_u32_add_noise(unsigned long a, unsigned long b, unsigned long c, unsigned long d) { /* * This is not used cryptographically; it's just * a convenient 4-word hash function. (3 xor, 2 add, 2 rol) */ a ^= raw_cpu_read(net_rand_noise); PRND_SIPROUND(a, b, c, d); raw_cpu_write(net_rand_noise, d); } struct rnd_state { __u32 s1, s2, s3, s4; }; u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); #define prandom_init_once(pcpu_state) \ DO_ONCE(prandom_seed_full_state, (pcpu_state)) /** * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) * @ep_ro: right open interval endpoint * * Returns a pseudo-random number that is in interval [0, ep_ro). Note * that the result depends on PRNG being well distributed in [0, ~0U] * u32 space. Here we use maximally equidistributed combined Tausworthe * generator, that is, prandom_u32(). This is useful when requesting a * random index of an array containing ep_ro elements, for example. * * Returns: pseudo-random number in interval [0, ep_ro) */ static inline u32 prandom_u32_max(u32 ep_ro) { return (u32)(((u64) prandom_u32() * ep_ro) >> 32); } /* * Handle minimum values for seeds */ static inline u32 __seed(u32 x, u32 m) { return (x < m) ? x + m : x; } /** * prandom_seed_state - set seed for prandom_u32_state(). * @state: pointer to state structure to receive the seed. * @seed: arbitrary 64-bit value to use as a seed. */ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) { u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL; state->s1 = __seed(i, 2U); state->s2 = __seed(i, 8U); state->s3 = __seed(i, 16U); state->s4 = __seed(i, 128U); PRANDOM_ADD_NOISE(state, i, 0, 0); } /* Pseudo random number generator from numerical recipes. */ static inline u32 next_pseudo_random32(u32 seed) { return seed * 1664525 + 1013904223; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Internal procfs definitions * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/refcount.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/binfmts.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> struct ctl_table_header; struct mempolicy; /* * This is not completely implemented yet. The idea is to * create an in-memory tree (like the actual /proc filesystem * tree) of these proc_dir_entries, so that we can dynamically * add new files to /proc. * * parent/subdir are used for the directory structure (every /proc file has a * parent, but "subdir" is empty for all non-directory entries). * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { /* * number of callers into module in progress; * negative -> it's going away RSN */ atomic_t in_use; refcount_t refcnt; struct list_head pde_openers; /* who did ->open, but not ->release */ /* protects ->pde_openers and all struct pde_opener instances */ spinlock_t pde_unload_lock; struct completion *pde_unload_completion; const struct inode_operations *proc_iops; union { const struct proc_ops *proc_ops; const struct file_operations *proc_dir_ops; }; const struct dentry_operations *proc_dops; union { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); }; proc_write_t write; void *data; unsigned int state_size; unsigned int low_ino; nlink_t nlink; kuid_t uid; kgid_t gid; loff_t size; struct proc_dir_entry *parent; struct rb_root subdir; struct rb_node subdir_node; char *name; umode_t mode; u8 flags; u8 namelen; char inline_name[]; } __randomize_layout; #define SIZEOF_PDE ( \ sizeof(struct proc_dir_entry) < 128 ? 128 : \ sizeof(struct proc_dir_entry) < 192 ? 192 : \ sizeof(struct proc_dir_entry) < 256 ? 256 : \ sizeof(struct proc_dir_entry) < 512 ? 512 : \ 0) #define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry)) static inline bool pde_is_permanent(const struct proc_dir_entry *pde) { return pde->flags & PROC_ENTRY_PERMANENT; } extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); union proc_op { int (*proc_get_link)(struct dentry *, struct path *); int (*proc_show)(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); const char *lsm; }; struct proc_inode { struct pid *pid; unsigned int fd; union proc_op op; struct proc_dir_entry *pde; struct ctl_table_header *sysctl; struct ctl_table *sysctl_entry; struct hlist_node sibling_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; } __randomize_layout; /* * General functions */ static inline struct proc_inode *PROC_I(const struct inode *inode) { return container_of(inode, struct proc_inode, vfs_inode); } static inline struct proc_dir_entry *PDE(const struct inode *inode) { return PROC_I(inode)->pde; } static inline void *__PDE_DATA(const struct inode *inode) { return PDE(inode)->data; } static inline struct pid *proc_pid(const struct inode *inode) { return PROC_I(inode)->pid; } static inline struct task_struct *get_proc_task(const struct inode *inode) { return get_pid_task(proc_pid(inode), PIDTYPE_PID); } void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t *ruid, kgid_t *rgid); unsigned name_to_int(const struct qstr *qstr); /* * Offset of the first process in the /proc root directory.. */ #define FIRST_PROCESS_ENTRY 256 /* Worst case buffer size needed for holding an integer. */ #define PROC_NUMBUF 13 /* * array.c */ extern const struct file_operations proc_tid_children_operations; extern void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape); extern int proc_tid_stat(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_pid_status(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_pid_statm(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); /* * base.c */ extern const struct dentry_operations pid_dentry_operations; extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int proc_setattr(struct dentry *, struct iattr *); extern void proc_pid_evict_inode(struct proc_inode *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); extern void pid_update_inode(struct task_struct *, struct inode *); extern int pid_delete_dentry(const struct dentry *); extern int proc_pid_readdir(struct file *, struct dir_context *); struct dentry *proc_pid_lookup(struct dentry *, unsigned int); extern loff_t mem_lseek(struct file *, loff_t, int); /* Lookups */ typedef struct dentry *instantiate_t(struct dentry *, struct task_struct *, const void *); bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int, instantiate_t, struct task_struct *, const void *); /* * generic.c */ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, struct proc_dir_entry **parent, void *data); struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, struct proc_dir_entry *dp); extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *); static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) { refcount_inc(&pde->refcnt); return pde; } extern void pde_put(struct proc_dir_entry *); static inline bool is_empty_pde(const struct proc_dir_entry *pde) { return S_ISDIR(pde->mode) && !pde->proc_iops; } extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, loff_t *); /* * inode.c */ struct pde_opener { struct list_head lh; struct file *file; bool closing; struct completion *c; } __randomize_layout; extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; extern const struct super_operations proc_sops; void proc_init_kmemcache(void); void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern void proc_entry_rundown(struct proc_dir_entry *); /* * proc_namespaces.c */ extern const struct inode_operations proc_ns_dir_inode_operations; extern const struct file_operations proc_ns_dir_operations; /* * proc_net.c */ extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; #ifdef CONFIG_NET extern int proc_net_init(void); #else static inline int proc_net_init(void) { return 0; } #endif /* * proc_self.c */ extern int proc_setup_self(struct super_block *); /* * proc_thread_self.c */ extern int proc_setup_thread_self(struct super_block *); extern void proc_thread_self_init(void); /* * proc_sysctl.c */ #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); extern void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head); #else static inline void proc_sys_init(void) { } static inline void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) { } #endif /* * proc_tty.c */ #ifdef CONFIG_TTY extern void proc_tty_init(void); #else static inline void proc_tty_init(void) {} #endif /* * root.c */ extern struct proc_dir_entry proc_root; extern void proc_self_init(void); /* * task_[no]mmu.c */ struct mem_size_stats; struct proc_maps_private { struct inode *inode; struct task_struct *task; struct mm_struct *mm; #ifdef CONFIG_MMU struct vm_area_struct *tail_vma; #endif #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif } __randomize_layout; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); extern const struct file_operations proc_pid_maps_operations; extern const struct file_operations proc_pid_numa_maps_operations; extern const struct file_operations proc_pid_smaps_operations; extern const struct file_operations proc_pid_smaps_rollup_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern unsigned long task_vsize(struct mm_struct *); extern unsigned long task_statm(struct mm_struct *, unsigned long *, unsigned long *, unsigned long *, unsigned long *); extern void task_mem(struct seq_file *, struct mm_struct *); extern const struct dentry_operations proc_net_dentry_ops; static inline void pde_force_lookup(struct proc_dir_entry *pde) { /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ pde->proc_dops = &proc_net_dentry_ops; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H /* * Jump label support * * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra * * DEPRECATED API: * * The use of 'struct static_key' directly, is now DEPRECATED. In addition * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following: * * struct static_key false = STATIC_KEY_INIT_FALSE; * struct static_key true = STATIC_KEY_INIT_TRUE; * static_key_true() * static_key_false() * * The updated API replacements are: * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); * static_branch_likely() * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)", * an "if (static_branch_unlikely(&key))" statement is an unconditional branch * (which defaults to false - and the true block is placed out of line). * Similarly, we can define an initially true key via * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same * "if (static_branch_unlikely(&key))", in which case we will generate an * unconditional branch to the out-of-line true branch. Keys that are * initially true or false can be using in both static_branch_unlikely() * and static_branch_likely() statements. * * At runtime we can change the branch target by setting the key * to true via a call to static_branch_enable(), or false using * static_branch_disable(). If the direction of the branch is switched by * these calls then we run-time modify the branch target via a * no-op -> jump or jump -> no-op conversion. For example, for an * initially false key that is used in an "if (static_branch_unlikely(&key))" * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional, their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total * effect is a single NOP of appropriate size. The on case will patch in a jump * to the out-of-line block. * * When the control is directly exposed to userspace, it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) * cause significant performance degradation. Struct static_key_deferred and * static_key_slow_dec_deferred() provide for this. * * Lacking toolchain and or architecture support, static keys fall back to a * simple conditional branch. * * Additional babbling in: Documentation/staging/static-keys.rst */ #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/compiler.h> extern bool static_key_initialized; #define STATIC_KEY_CHECK_USE(key) WARN(!static_key_initialized, \ "%s(): static key '%pS' used before call to jump_label_init()", \ __func__, (key)) #ifdef CONFIG_JUMP_LABEL struct static_key { atomic_t enabled; /* * Note: * To make anonymous unions work with old compilers, the static * initialization of them requires brackets. This creates a dependency * on the order of the struct with the initializers. If any fields * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need * to be modified. * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod * 0 if points to struct jump_entry */ union { unsigned long type; struct jump_entry *entries; struct static_key_mod *next; }; }; #else struct static_key { atomic_t enabled; }; #endif /* CONFIG_JUMP_LABEL */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_JUMP_LABEL #include <asm/jump_label.h> #ifndef __ASSEMBLY__ #ifdef CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE struct jump_entry { s32 code; s32 target; long key; // key may be far away from the core kernel under KASLR }; static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return (unsigned long)&entry->code + entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return (unsigned long)&entry->target + entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { long offset = entry->key & ~3L; return (struct static_key *)((unsigned long)&entry->key + offset); } #else static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { return (struct static_key *)((unsigned long)entry->key & ~3UL); } #endif static inline bool jump_entry_is_branch(const struct jump_entry *entry) { return (unsigned long)entry->key & 1UL; } static inline bool jump_entry_is_init(const struct jump_entry *entry) { return (unsigned long)entry->key & 2UL; } static inline void jump_entry_set_init(struct jump_entry *entry) { entry->key |= 2; } #endif #endif #ifndef __ASSEMBLY__ enum jump_label_type { JUMP_LABEL_NOP = 0, JUMP_LABEL_JMP, }; struct module; #ifdef CONFIG_JUMP_LABEL #define JUMP_TYPE_FALSE 0UL #define JUMP_TYPE_TRUE 1UL #define JUMP_TYPE_LINKED 2UL #define JUMP_TYPE_MASK 3UL static __always_inline bool static_key_false(struct static_key *key) { return arch_static_branch(key, false); } static __always_inline bool static_key_true(struct static_key *key) { return !arch_static_branch(key, true); } extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type); extern bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_apply(void); extern int jump_label_text_reserved(void *start, void *end); extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern void static_key_slow_inc_cpuslocked(struct static_key *key); extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); extern void static_key_disable(struct static_key *key); extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); /* * We should be using ATOMIC_INIT() for initializing .enabled, but * the inclusion of atomic.h is problematic for inclusion of jump_label.h * in 'low-level' headers. Thus, we are initializing .enabled with a * raw value, but have added a BUILD_BUG_ON() to catch any issues in * jump_label_init() see: kernel/jump_label.c. */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !CONFIG_JUMP_LABEL */ #include <linux/atomic.h> #include <linux/bug.h> static __always_inline int static_key_count(struct static_key *key) { return arch_atomic_read(&key->enabled); } static __always_inline void jump_label_init(void) { static_key_initialized = true; } static __always_inline bool static_key_false(struct static_key *key) { if (unlikely(static_key_count(key) > 0)) return true; return false; } static __always_inline bool static_key_true(struct static_key *key) { if (likely(static_key_count(key) > 0)) return true; return false; } static inline void static_key_slow_inc(struct static_key *key) { STATIC_KEY_CHECK_USE(key); atomic_inc(&key->enabled); } static inline void static_key_slow_dec(struct static_key *key) { STATIC_KEY_CHECK_USE(key); atomic_dec(&key->enabled); } #define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key) #define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key) static inline int jump_label_text_reserved(void *start, void *end) { return 0; } static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} static inline int jump_label_apply_nops(struct module *mod) { return 0; } static inline void static_key_enable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 0) { WARN_ON_ONCE(atomic_read(&key->enabled) != 1); return; } atomic_set(&key->enabled, 1); } static inline void static_key_disable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 1) { WARN_ON_ONCE(atomic_read(&key->enabled) != 0); return; } atomic_set(&key->enabled, 0); } #define static_key_enable_cpuslocked(k) static_key_enable((k)) #define static_key_disable_cpuslocked(k) static_key_disable((k)) #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } #endif /* CONFIG_JUMP_LABEL */ #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled /* -------------------------------------------------------------------------- */ /* * Two type wrappers around static_key, such that we can use compile time * type differentiation to emit the right code. * * All the below code is macros in order to play type games. */ struct static_key_true { struct static_key key; }; struct static_key_false { struct static_key key; }; #define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, } #define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, } #define DEFINE_STATIC_KEY_TRUE(name) \ struct static_key_true name = STATIC_KEY_TRUE_INIT #define DEFINE_STATIC_KEY_TRUE_RO(name) \ struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT #define DECLARE_STATIC_KEY_TRUE(name) \ extern struct static_key_true name #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT #define DEFINE_STATIC_KEY_FALSE_RO(name) \ struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT #define DECLARE_STATIC_KEY_FALSE(name) \ extern struct static_key_false name #define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \ struct static_key_true name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \ } #define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count) \ struct static_key_false name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ } extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ ({ \ if (!__builtin_types_compatible_p(typeof(*x), struct static_key) && \ !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\ !__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ ____wrong_branch_error(); \ static_key_count((struct static_key *)x) > 0; \ }) #ifdef CONFIG_JUMP_LABEL /* * Combine the right initial value (type) with the right branch order * to generate the desired result. * * * type\branch| likely (1) | unlikely (0) * -----------+-----------------------+------------------ * | | * true (1) | ... | ... * | NOP | JMP L * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * | | * false (0) | ... | ... * | JMP L | NOP * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * * The initial value is encoded in the LSB of static_key::entries, * type: 0 = false, 1 = true. * * The branch type is encoded in the LSB of jump_entry::key, * branch: 0 = unlikely, 1 = likely. * * This gives the following logic table: * * enabled type branch instuction * -----------------------------+----------- * 0 0 0 | NOP * 0 0 1 | JMP * 0 1 0 | NOP * 0 1 1 | JMP * * 1 0 0 | JMP * 1 0 1 | NOP * 1 1 0 | JMP * 1 1 1 | NOP * * Which gives the following functions: * * dynamic: instruction = enabled ^ branch * static: instruction = type ^ branch * * See jump_label_type() / jump_label_init_type(). */ #define static_branch_likely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = !arch_static_branch(&(x)->key, true); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = !arch_static_branch_jump(&(x)->key, true); \ else \ branch = ____wrong_branch_error(); \ likely(branch); \ }) #define static_branch_unlikely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = arch_static_branch_jump(&(x)->key, false); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = arch_static_branch(&(x)->key, false); \ else \ branch = ____wrong_branch_error(); \ unlikely(branch); \ }) #else /* !CONFIG_JUMP_LABEL */ #define static_branch_likely(x) likely(static_key_enabled(&(x)->key)) #define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key)) #endif /* CONFIG_JUMP_LABEL */ /* * Advanced usage; refcount, branch is enabled when: count != 0 */ #define static_branch_inc(x) static_key_slow_inc(&(x)->key) #define static_branch_dec(x) static_key_slow_dec(&(x)->key) #define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key) #define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key) /* * Normal usage; boolean enable/disable. */ #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) #define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) #define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) #endif /* __ASSEMBLY__ */ #endif /* _LINUX_JUMP_LABEL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/ipc/util.h * Copyright (C) 1999 Christoph Rohland * * ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com> * namespaces support. 2006 OpenVZ, SWsoft Inc. * Pavel Emelianov <xemul@openvz.org> */ #ifndef _IPC_UTIL_H #define _IPC_UTIL_H #include <linux/unistd.h> #include <linux/err.h> #include <linux/ipc_namespace.h> /* * The IPC ID contains 2 separate numbers - index and sequence number. * By default, * bits 0-14: index (32k, 15 bits) * bits 15-30: sequence number (64k, 16 bits) * * When IPCMNI extension mode is turned on, the composition changes: * bits 0-23: index (16M, 24 bits) * bits 24-30: sequence number (128, 7 bits) */ #define IPCMNI_SHIFT 15 #define IPCMNI_EXTEND_SHIFT 24 #define IPCMNI_EXTEND_MIN_CYCLE (RADIX_TREE_MAP_SIZE * RADIX_TREE_MAP_SIZE) #define IPCMNI (1 << IPCMNI_SHIFT) #define IPCMNI_EXTEND (1 << IPCMNI_EXTEND_SHIFT) #ifdef CONFIG_SYSVIPC_SYSCTL extern int ipc_mni; extern int ipc_mni_shift; extern int ipc_min_cycle; #define ipcmni_seq_shift() ipc_mni_shift #define IPCMNI_IDX_MASK ((1 << ipc_mni_shift) - 1) #else /* CONFIG_SYSVIPC_SYSCTL */ #define ipc_mni IPCMNI #define ipc_min_cycle ((int)RADIX_TREE_MAP_SIZE) #define ipcmni_seq_shift() IPCMNI_SHIFT #define IPCMNI_IDX_MASK ((1 << IPCMNI_SHIFT) - 1) #endif /* CONFIG_SYSVIPC_SYSCTL */ void sem_init(void); void msg_init(void); void shm_init(void); struct ipc_namespace; struct pid_namespace; #ifdef CONFIG_POSIX_MQUEUE extern void mq_clear_sbinfo(struct ipc_namespace *ns); extern void mq_put_mnt(struct ipc_namespace *ns); #else static inline void mq_clear_sbinfo(struct ipc_namespace *ns) { } static inline void mq_put_mnt(struct ipc_namespace *ns) { } #endif #ifdef CONFIG_SYSVIPC void sem_init_ns(struct ipc_namespace *ns); void msg_init_ns(struct ipc_namespace *ns); void shm_init_ns(struct ipc_namespace *ns); void sem_exit_ns(struct ipc_namespace *ns); void msg_exit_ns(struct ipc_namespace *ns); void shm_exit_ns(struct ipc_namespace *ns); #else static inline void sem_init_ns(struct ipc_namespace *ns) { } static inline void msg_init_ns(struct ipc_namespace *ns) { } static inline void shm_init_ns(struct ipc_namespace *ns) { } static inline void sem_exit_ns(struct ipc_namespace *ns) { } static inline void msg_exit_ns(struct ipc_namespace *ns) { } static inline void shm_exit_ns(struct ipc_namespace *ns) { } #endif /* * Structure that holds the parameters needed by the ipc operations * (see after) */ struct ipc_params { key_t key; int flg; union { size_t size; /* for shared memories */ int nsems; /* for semaphores */ } u; /* holds the getnew() specific param */ }; /* * Structure that holds some ipc operations. This structure is used to unify * the calls to sys_msgget(), sys_semget(), sys_shmget() * . routine to call to create a new ipc object. Can be one of newque, * newary, newseg * . routine to call to check permissions for a new ipc object. * Can be one of security_msg_associate, security_sem_associate, * security_shm_associate * . routine to call for an extra check if needed */ struct ipc_ops { int (*getnew)(struct ipc_namespace *, struct ipc_params *); int (*associate)(struct kern_ipc_perm *, int); int (*more_checks)(struct kern_ipc_perm *, struct ipc_params *); }; struct seq_file; struct ipc_ids; void ipc_init_ids(struct ipc_ids *ids); #ifdef CONFIG_PROC_FS void __init ipc_init_proc_interface(const char *path, const char *header, int ids, int (*show)(struct seq_file *, void *)); struct pid_namespace *ipc_seq_pid_ns(struct seq_file *); #else #define ipc_init_proc_interface(path, header, ids, show) do {} while (0) #endif #define IPC_SEM_IDS 0 #define IPC_MSG_IDS 1 #define IPC_SHM_IDS 2 #define ipcid_to_idx(id) ((id) & IPCMNI_IDX_MASK) #define ipcid_to_seqx(id) ((id) >> ipcmni_seq_shift()) #define ipcid_seq_max() (INT_MAX >> ipcmni_seq_shift()) /* must be called with ids->rwsem acquired for writing */ int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); /* must be called with both locks acquired. */ void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); /* must be called with both locks acquired. */ void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *); /* must be called with ipcp locked */ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg); /** * ipc_get_maxidx - get the highest assigned index * @ids: ipc identifier set * * Called with ipc_ids.rwsem held for reading. */ static inline int ipc_get_maxidx(struct ipc_ids *ids) { if (ids->in_use == 0) return -1; if (ids->in_use == ipc_mni) return ipc_mni - 1; return ids->max_idx; } /* * For allocation that need to be freed by RCU. * Objects are reference counted, they start with reference count 1. * getref increases the refcount, the putref call that reduces the recount * to 0 schedules the rcu destruction. Caller must guarantee locking. * * refcount is initialized by ipc_addid(), before that point call_rcu() * must be used. */ bool ipc_rcu_getref(struct kern_ipc_perm *ptr); void ipc_rcu_putref(struct kern_ipc_perm *ptr, void (*func)(struct rcu_head *head)); struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); struct kern_ipc_perm *ipcctl_obtain_check(struct ipc_namespace *ns, struct ipc_ids *ids, int id, int cmd, struct ipc64_perm *perm, int extra_perm); static inline void ipc_update_pid(struct pid **pos, struct pid *pid) { struct pid *old = *pos; if (old != pid) { *pos = get_pid(pid); put_pid(old); } } #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION int ipc_parse_version(int *cmd); #endif extern void free_msg(struct msg_msg *msg); extern struct msg_msg *load_msg(const void __user *src, size_t len); extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst); extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len); static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int id) { return ipcid_to_seqx(id) != ipcp->seq; } static inline void ipc_lock_object(struct kern_ipc_perm *perm) { spin_lock(&perm->lock); } static inline void ipc_unlock_object(struct kern_ipc_perm *perm) { spin_unlock(&perm->lock); } static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm) { assert_spin_locked(&perm->lock); } static inline void ipc_unlock(struct kern_ipc_perm *perm) { ipc_unlock_object(perm); rcu_read_unlock(); } /* * ipc_valid_object() - helper to sort out IPC_RMID races for codepaths * where the respective ipc_ids.rwsem is not being held down. * Checks whether the ipc object is still around or if it's gone already, as * ipc_rmid() may have already freed the ID while the ipc lock was spinning. * Needs to be called with kern_ipc_perm.lock held -- exception made for one * checkpoint case at sys_semtimedop() as noted in code commentary. */ static inline bool ipc_valid_object(struct kern_ipc_perm *perm) { return !perm->deleted; } struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, const struct ipc_ops *ops, struct ipc_params *params); void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, void (*free)(struct ipc_namespace *, struct kern_ipc_perm *)); static inline int sem_check_semmni(struct ipc_namespace *ns) { /* * Check semmni range [0, ipc_mni] * semmni is the last element of sem_ctls[4] array */ return ((ns->sem_ctls[3] < 0) || (ns->sem_ctls[3] > ipc_mni)) ? -ERANGE : 0; } #ifdef CONFIG_COMPAT #include <linux/compat.h> struct compat_ipc_perm { key_t key; __compat_uid_t uid; __compat_gid_t gid; __compat_uid_t cuid; __compat_gid_t cgid; compat_mode_t mode; unsigned short seq; }; void to_compat_ipc_perm(struct compat_ipc_perm *, struct ipc64_perm *); void to_compat_ipc64_perm(struct compat_ipc64_perm *, struct ipc64_perm *); int get_compat_ipc_perm(struct ipc64_perm *, struct compat_ipc_perm __user *); int get_compat_ipc64_perm(struct ipc64_perm *, struct compat_ipc64_perm __user *); static inline int compat_ipc_parse_version(int *cmd) { int version = *cmd & IPC_64; *cmd &= ~IPC_64; return version; } long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg); long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr); long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, int msgflg); long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr); #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 /* SPDX-License-Identifier: GPL-2.0 */ /* * NFS internal definitions */ #include "nfs4_fs.h" #include <linux/fs_context.h> #include <linux/security.h> #include <linux/crc32.h> #include <linux/sunrpc/addr.h> #include <linux/nfs_page.h> #include <linux/wait_bit.h> #define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) extern const struct export_operations nfs_export_ops; struct nfs_string; struct nfs_pageio_descriptor; static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) { if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT; } static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) { if (((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) == 0) || (((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) && ((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0))) return 0; return 1; } static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) { if (!(NFS_SB(dentry->d_sb)->flags & NFS_MOUNT_SOFTREVAL)) return false; if (!d_is_positive(dentry) || !NFS_FH(d_inode(dentry))->size) return false; return true; } static inline fmode_t flags_to_mode(int flags) { fmode_t res = (__force fmode_t)flags & FMODE_EXEC; if ((flags & O_ACCMODE) != O_WRONLY) res |= FMODE_READ; if ((flags & O_ACCMODE) != O_RDONLY) res |= FMODE_WRITE; return res; } /* * Note: RFC 1813 doesn't limit the number of auth flavors that * a server can return, so make something up. */ #define NFS_MAX_SECFLAVORS (12) /* * Value used if the user did not specify a port value. */ #define NFS_UNSPEC_PORT (-1) #define NFS_UNSPEC_RETRANS (UINT_MAX) #define NFS_UNSPEC_TIMEO (UINT_MAX) /* * Maximum number of pages that readdir can use for creating * a vmapped array of pages. */ #define NFS_MAX_READDIR_PAGES 8 struct nfs_client_initdata { unsigned long init_flags; const char *hostname; /* Hostname of the server */ const struct sockaddr *addr; /* Address of the server */ const char *nodename; /* Hostname of the client */ const char *ip_addr; /* IP address of the client */ size_t addrlen; struct nfs_subversion *nfs_mod; int proto; u32 minorversion; unsigned int nconnect; struct net *net; const struct rpc_timeout *timeparms; const struct cred *cred; }; /* * In-kernel mount arguments */ struct nfs_fs_context { bool internal; bool skip_reconfig_option_check; bool need_mount; bool sloppy; unsigned int flags; /* NFS{,4}_MOUNT_* flags */ unsigned int rsize, wsize; unsigned int timeo, retrans; unsigned int acregmin, acregmax; unsigned int acdirmin, acdirmax; unsigned int namlen; unsigned int options; unsigned int bsize; struct nfs_auth_info auth_info; rpc_authflavor_t selected_flavor; char *client_address; unsigned int version; unsigned int minorversion; char *fscache_uniq; unsigned short protofamily; unsigned short mountfamily; struct { union { struct sockaddr address; struct sockaddr_storage _address; }; size_t addrlen; char *hostname; u32 version; int port; unsigned short protocol; } mount_server; struct { union { struct sockaddr address; struct sockaddr_storage _address; }; size_t addrlen; char *hostname; char *export_path; int port; unsigned short protocol; unsigned short nconnect; unsigned short export_path_len; } nfs_server; struct nfs_fh *mntfh; struct nfs_server *server; struct nfs_subversion *nfs_mod; /* Information for a cloned mount. */ struct nfs_clone_mount { struct super_block *sb; struct dentry *dentry; struct nfs_fattr *fattr; unsigned int inherited_bsize; } clone_data; }; #define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ errorf(fc, fmt, ## __VA_ARGS__) : \ ({ dprintk(fmt "\n", ## __VA_ARGS__); })) #define nfs_ferrorf(fc, fac, fmt, ...) ((fc)->log.log ? \ errorf(fc, fmt, ## __VA_ARGS__) : \ ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) #define nfs_invalf(fc, fmt, ...) ((fc)->log.log ? \ invalf(fc, fmt, ## __VA_ARGS__) : \ ({ dprintk(fmt "\n", ## __VA_ARGS__); -EINVAL; })) #define nfs_finvalf(fc, fac, fmt, ...) ((fc)->log.log ? \ invalf(fc, fmt, ## __VA_ARGS__) : \ ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); -EINVAL; })) #define nfs_warnf(fc, fmt, ...) ((fc)->log.log ? \ warnf(fc, fmt, ## __VA_ARGS__) : \ ({ dprintk(fmt "\n", ## __VA_ARGS__); })) #define nfs_fwarnf(fc, fac, fmt, ...) ((fc)->log.log ? \ warnf(fc, fmt, ## __VA_ARGS__) : \ ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) { return fc->fs_private; } /* mount_clnt.c */ struct nfs_mount_request { struct sockaddr *sap; size_t salen; char *hostname; char *dirpath; u32 version; unsigned short protocol; struct nfs_fh *fh; int noresvport; unsigned int *auth_flav_len; rpc_authflavor_t *auth_flavs; struct net *net; }; extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); extern void nfs_clients_exit(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); void nfs_server_insert_lists(struct nfs_server *); void nfs_server_remove_lists(struct nfs_server *); void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans); int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, rpc_authflavor_t); struct nfs_server *nfs_alloc_server(void); void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *); extern void nfs_put_client(struct nfs_client *); extern void nfs_free_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); extern struct nfs_server *nfs_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, struct sockaddr *sap, size_t salen, struct net *net); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); extern bool nfs_client_init_is_complete(const struct nfs_client *clp); extern int nfs_client_init_status(const struct nfs_client *clp); extern int nfs_wait_client_init_complete(const struct nfs_client *clp); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, u32 minor_version); extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, struct inode *); extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); extern int nfs_fs_proc_net_init(struct net *net); extern void nfs_fs_proc_net_exit(struct net *net); #else static inline int nfs_fs_proc_net_init(struct net *net) { return 0; } static inline void nfs_fs_proc_net_exit(struct net *net) { } static inline int nfs_fs_proc_init(void) { return 0; } static inline void nfs_fs_proc_exit(void) { } #endif /* callback_xdr.c */ extern const struct svc_version nfs4_callback_version1; extern const struct svc_version nfs4_callback_version4; /* fs_context.c */ extern struct file_system_type nfs_fs_type; /* pagelist.c */ extern int __init nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); extern int __init nfs_init_readpagecache(void); extern void nfs_destroy_readpagecache(void); extern int __init nfs_init_writepagecache(void); extern void nfs_destroy_writepagecache(void); extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_lock_context *l_ctx); extern const struct nfs_pageio_ops nfs_pgio_rw_ops; struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); void nfs_pgio_header_free(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, const struct cred *cred, const struct nfs_rpc_ops *rpc_ops, const struct rpc_call_ops *call_ops, int how, int flags); void nfs_free_request(struct nfs_page *req); struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { return cred_fscmp(ctx1->cred, ctx2->cred) == 0 && ctx1->state == ctx2->state; } /* nfs2xdr.c */ extern const struct rpc_procinfo nfs_procedures[]; extern int nfs2_decode_dirent(struct xdr_stream *, struct nfs_entry *, bool); /* nfs3xdr.c */ extern const struct rpc_procinfo nfs3_procedures[]; extern int nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, bool); /* nfs4xdr.c */ #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, bool); #endif #ifdef CONFIG_NFS_V4_1 extern const u32 nfs41_maxread_overhead; extern const u32 nfs41_maxwrite_overhead; extern const u32 nfs41_maxgetdevinfo_overhead; #endif /* nfs4proc.c */ #if IS_ENABLED(CONFIG_NFS_V4) extern const struct rpc_procinfo nfs4_procedures[]; #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags); static inline struct nfs4_label * nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) { if (!dst || !src) return NULL; if (src->len > NFS4_MAXLABELLEN) return NULL; dst->lfs = src->lfs; dst->pi = src->pi; dst->len = src->len; memcpy(dst->label, src->label, src->len); return dst; } static inline void nfs4_label_free(struct nfs4_label *label) { if (label) { kfree(label->label); kfree(label); } return; } static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL)) nfsi->cache_validity |= NFS_INO_INVALID_LABEL; } #else static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } static inline void nfs4_label_free(void *label) {} static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { } static inline struct nfs4_label * nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) { return NULL; } #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct nfs_client_initdata *); /* dir.c */ extern void nfs_advise_use_readdirplus(struct inode *dir); extern void nfs_force_use_readdirplus(struct inode *dir); extern unsigned long nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc); extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); int nfs_create(struct inode *, struct dentry *, umode_t, bool); int nfs_mkdir(struct inode *, struct dentry *, umode_t); int nfs_rmdir(struct inode *, struct dentry *); int nfs_unlink(struct inode *, struct dentry *); int nfs_symlink(struct inode *, struct dentry *, const char *); int nfs_link(struct dentry *, struct inode *, struct dentry *); int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); /* file.c */ int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); loff_t nfs_file_llseek(struct file *, loff_t, int); ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); int nfs_file_mmap(struct file *, struct vm_area_struct *); ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); int nfs_file_release(struct inode *, struct file *); int nfs_lock(struct file *, int, struct file_lock *); int nfs_flock(struct file *, int, struct file_lock *); int nfs_check_flags(int); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_free_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode); /* super.c */ extern const struct super_operations nfs_sops; bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); int nfs_try_get_tree(struct fs_context *); int nfs_get_tree_common(struct fs_context *); void nfs_kill_super(struct super_block *); extern struct rpc_stat nfs_rpcstat; extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); extern bool nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); extern int nfs_client_for_each_server(struct nfs_client *clp, int (*fn)(struct nfs_server *, void *), void *data); /* io.c */ extern void nfs_start_io_read(struct inode *inode); extern void nfs_end_io_read(struct inode *inode); extern void nfs_start_io_write(struct inode *inode); extern void nfs_end_io_write(struct inode *inode); extern void nfs_start_io_direct(struct inode *inode); extern void nfs_end_io_direct(struct inode *inode); static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) { return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0; } /* namespace.c */ #define NFS_PATH_CANONICAL 1 extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); int nfs_submount(struct fs_context *, struct nfs_server *); int nfs_do_submount(struct fs_context *); /* getroot.c */ extern int nfs_get_root(struct super_block *s, struct fs_context *fc); #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool); #endif struct nfs_pgio_completion_ops; /* read.c */ extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ void nfs_umount_begin(struct super_block *); int nfs_statfs(struct dentry *, struct kstatfs *); int nfs_show_options(struct seq_file *, struct dentry *); int nfs_show_devname(struct seq_file *, struct dentry *); int nfs_show_path(struct seq_file *, struct dentry *); int nfs_show_stats(struct seq_file *, struct dentry *); int nfs_reconfigure(struct fs_context *); /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_commit_free(struct nfs_commit_data *p); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); extern int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, int how, int flags); extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max); unsigned long nfs_reqs_to_commit(struct nfs_commit_info *); int nfs_scan_commit(struct inode *inode, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, u32 ds_commit_idx); int nfs_write_need_commit(struct nfs_pgio_header *); void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, u32 ds_commit_idx); void nfs_commitdata_release(struct nfs_commit_data *data); void nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo); void nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_request_remove_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo); void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct inode *inode, struct nfs_direct_req *dreq); int nfs_key_timeout_notify(struct file *filp, struct inode *inode); bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode); void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); int nfs_filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); #ifdef CONFIG_NFS_V4_1 static inline void pnfs_bucket_clear_pnfs_ds_commit_verifiers(struct pnfs_commit_bucket *buckets, unsigned int nbuckets) { unsigned int i; for (i = 0; i < nbuckets; i++) buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; } static inline void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) { struct pnfs_commit_array *array; rcu_read_lock(); list_for_each_entry_rcu(array, &cinfo->commits, cinfo_list) pnfs_bucket_clear_pnfs_ds_commit_verifiers(array->buckets, array->nbuckets); rcu_read_unlock(); } #else static inline void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) { } #endif #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); #endif static inline int nfs_write_verifier_cmp(const struct nfs_write_verifier *v1, const struct nfs_write_verifier *v2) { return memcmp(v1->data, v2->data, sizeof(v1->data)); } static inline bool nfs_write_match_verf(const struct nfs_writeverf *verf, struct nfs_page *req) { return verf->committed > NFS_UNSTABLE && !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier); } static inline gfp_t nfs_io_gfp_mask(void) { if (current->flags & PF_WQ_WORKER) return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; return GFP_KERNEL; } /* unlink.c */ extern struct rpc_task * nfs_async_rename(struct inode *old_dir, struct inode *new_dir, struct dentry *old_dentry, struct dentry *new_dentry, void (*complete)(struct rpc_task *, struct nfs_renamedata *)); extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); /* direct.c */ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct nfs_client_initdata *); extern int nfs40_walk_client_list(struct nfs_client *clp, struct nfs_client **result, const struct cred *cred); extern int nfs41_walk_client_list(struct nfs_client *clp, struct nfs_client **result, const struct cred *cred); extern void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *data); static inline struct inode *nfs_igrab_and_active(struct inode *inode) { struct super_block *sb = inode->i_sb; if (sb && nfs_sb_active(sb)) { if (igrab(inode)) return inode; nfs_sb_deactive(sb); } return NULL; } static inline void nfs_iput_and_deactive(struct inode *inode) { if (inode != NULL) { struct super_block *sb = inode->i_sb; iput(inode); nfs_sb_deactive(sb); } } /* * Determine the device name as a string */ static inline char *nfs_devname(struct dentry *dentry, char *buffer, ssize_t buflen) { char *dummy; return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL); } /* * Determine the actual block size (and log2 thereof) */ static inline unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) { /* make sure blocksize is a power of two */ if ((bsize & (bsize - 1)) || nrbitsp) { unsigned char nrbits; for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) ; bsize = 1 << nrbits; if (nrbitsp) *nrbitsp = nrbits; } return bsize; } /* * Calculate the number of 512byte blocks used. */ static inline blkcnt_t nfs_calc_block_size(u64 tsize) { blkcnt_t used = (tsize + 511) >> 9; return (used > ULONG_MAX) ? ULONG_MAX : used; } /* * Compute and set NFS server blocksize */ static inline unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) { if (bsize < NFS_MIN_FILE_IO_SIZE) bsize = NFS_DEF_FILE_IO_SIZE; else if (bsize >= NFS_MAX_FILE_IO_SIZE) bsize = NFS_MAX_FILE_IO_SIZE; return nfs_block_bits(bsize, nrbitsp); } /* * Determine the maximum file size for a superblock */ static inline void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) { sb->s_maxbytes = (loff_t)maxfilesize; if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) sb->s_maxbytes = MAX_LFS_FILESIZE; } /* * Record the page as unstable (an extra writeback period) and mark its * inode as dirty. */ static inline void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo) { if (!cinfo->dreq) { struct inode *inode = page_file_mapping(page)->host; /* This page is really still in write-back - just that the * writeback is happening on the server now. */ inc_node_page_state(page, NR_WRITEBACK); inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } } /* * Determine the number of bytes of data the page contains */ static inline unsigned int nfs_page_length(struct page *page) { loff_t i_size = i_size_read(page_file_mapping(page)->host); if (i_size > 0) { pgoff_t index = page_index(page); pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT; if (index < end_index) return PAGE_SIZE; if (index == end_index) return ((i_size - 1) & ~PAGE_MASK) + 1; } return 0; } /* * Convert a umode to a dirent->d_type */ static inline unsigned char nfs_umode_to_dtype(umode_t mode) { return (mode >> 12) & 15; } /* * Determine the number of pages in an array of length 'len' and * with a base offset of 'base' */ static inline unsigned int nfs_page_array_len(unsigned int base, size_t len) { return ((unsigned long)len + (unsigned long)base + PAGE_SIZE - 1) >> PAGE_SHIFT; } /* * Convert a struct timespec64 into a 64-bit change attribute * * This does approximately the same thing as timespec64_to_ns(), * but for calculation efficiency, we multiply the seconds by * 1024*1024*1024. */ static inline u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) { return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } #ifdef CONFIG_CRC32 /** * nfs_fhandle_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle * * returns a crc32 hash for the filehandle that is compatible with * the one displayed by "wireshark". */ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) { return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); } static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid) { return ~crc32_le(0xFFFFFFFF, &stateid->other[0], NFS4_STATEID_OTHER_SIZE); } #else static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) { return 0; } static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) { return 0; } #endif static inline bool nfs_error_is_fatal(int err) { switch (err) { case -ERESTARTSYS: case -EINTR: case -EACCES: case -EDQUOT: case -EFBIG: case -EIO: case -ENOSPC: case -EROFS: case -ESTALE: case -E2BIG: case -ENOMEM: case -ETIMEDOUT: return true; default: return false; } } static inline bool nfs_error_is_fatal_on_server(int err) { switch (err) { case 0: case -ERESTARTSYS: case -EINTR: case -ENOMEM: return false; } return nfs_error_is_fatal(err); } /* * Select between a default port value and a user-specified port value. * If a zero value is set, then autobind will be used. */ static inline void nfs_set_port(struct sockaddr *sap, int *port, const unsigned short default_port) { if (*port == NFS_UNSPEC_PORT) *port = default_port; rpc_set_port(sap, *port); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 /* SPDX-License-Identifier: GPL-2.0 */ /* * Common values for AES algorithms */ #ifndef _CRYPTO_AES_H #define _CRYPTO_AES_H #include <linux/types.h> #include <linux/crypto.h> #define AES_MIN_KEY_SIZE 16 #define AES_MAX_KEY_SIZE 32 #define AES_KEYSIZE_128 16 #define AES_KEYSIZE_192 24 #define AES_KEYSIZE_256 32 #define AES_BLOCK_SIZE 16 #define AES_MAX_KEYLENGTH (15 * 16) #define AES_MAX_KEYLENGTH_U32 (AES_MAX_KEYLENGTH / sizeof(u32)) /* * Please ensure that the first two fields are 16-byte aligned * relative to the start of the structure, i.e., don't move them! */ struct crypto_aes_ctx { u32 key_enc[AES_MAX_KEYLENGTH_U32]; u32 key_dec[AES_MAX_KEYLENGTH_U32]; u32 key_length; }; extern const u32 crypto_ft_tab[4][256] ____cacheline_aligned; extern const u32 crypto_it_tab[4][256] ____cacheline_aligned; /* * validate key length for AES algorithms */ static inline int aes_check_keylen(unsigned int keylen) { switch (keylen) { case AES_KEYSIZE_128: case AES_KEYSIZE_192: case AES_KEYSIZE_256: break; default: return -EINVAL; } return 0; } int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len); /** * aes_expandkey - Expands the AES key as described in FIPS-197 * @ctx: The location where the computed key will be stored. * @in_key: The supplied key. * @key_len: The length of the supplied key. * * Returns 0 on success. The function fails only if an invalid key size (or * pointer) is supplied. * The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes * key schedule plus a 16 bytes key which is used before the first round). * The decryption key is prepared for the "Equivalent Inverse Cipher" as * described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is * for the initial combination, the second slot for the first round and so on. */ int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); /** * aes_encrypt - Encrypt a single AES block * @ctx: Context struct containing the key schedule * @out: Buffer to store the ciphertext * @in: Buffer containing the plaintext */ void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); /** * aes_decrypt - Decrypt a single AES block * @ctx: Context struct containing the key schedule * @out: Buffer to store the plaintext * @in: Buffer containing the ciphertext */ void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); extern const u8 crypto_aes_sbox[]; extern const u8 crypto_aes_inv_sbox[]; #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * AEAD: Authenticated Encryption with Associated Data * * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_AEAD_H #define _CRYPTO_AEAD_H #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/slab.h> /** * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API * * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD * (listed as type "aead" in /proc/crypto) * * The most prominent examples for this type of encryption is GCM and CCM. * However, the kernel supports other types of AEAD ciphers which are defined * with the following cipher string: * * authenc(keyed message digest, block cipher) * * For example: authenc(hmac(sha256), cbc(aes)) * * The example code provided for the symmetric key cipher operation * applies here as well. Naturally all *skcipher* symbols must be exchanged * the *aead* pendants discussed in the following. In addition, for the AEAD * operation, the aead_request_set_ad function must be used to set the * pointer to the associated data memory location before performing the * encryption or decryption operation. In case of an encryption, the associated * data memory is filled during the encryption operation. For decryption, the * associated data memory must contain data that is used to verify the integrity * of the decrypted data. Another deviation from the asynchronous block cipher * operation is that the caller should explicitly check for -EBADMSG of the * crypto_aead_decrypt. That error indicates an authentication error, i.e. * a breach in the integrity of the message. In essence, that -EBADMSG error * code is the key bonus an AEAD cipher has over "standard" block chaining * modes. * * Memory Structure: * * The source scatterlist must contain the concatenation of * associated data || plaintext or ciphertext. * * The destination scatterlist has the same layout, except that the plaintext * (resp. ciphertext) will grow (resp. shrink) by the authentication tag size * during encryption (resp. decryption). * * In-place encryption/decryption is enabled by using the same scatterlist * pointer for both the source and destination. * * Even in the out-of-place case, space must be reserved in the destination for * the associated data, even though it won't be written to. This makes the * in-place and out-of-place cases more consistent. It is permissible for the * "destination" associated data to alias the "source" associated data. * * As with the other scatterlist crypto APIs, zero-length scatterlist elements * are not allowed in the used part of the scatterlist. Thus, if there is no * associated data, the first element must point to the plaintext/ciphertext. * * To meet the needs of IPsec, a special quirk applies to rfc4106, rfc4309, * rfc4543, and rfc7539esp ciphers. For these ciphers, the final 'ivsize' bytes * of the associated data buffer must contain a second copy of the IV. This is * in addition to the copy passed to aead_request_set_crypt(). These two IV * copies must not differ; different implementations of the same algorithm may * behave differently in that case. Note that the algorithm might not actually * treat the IV as associated data; nevertheless the length passed to * aead_request_set_ad() must include it. */ struct crypto_aead; /** * struct aead_request - AEAD request * @base: Common attributes for async crypto requests * @assoclen: Length in bytes of associated data for authentication * @cryptlen: Length of data to be encrypted or decrypted * @iv: Initialisation vector * @src: Source data * @dst: Destination data * @__ctx: Start of private context data */ struct aead_request { struct crypto_async_request base; unsigned int assoclen; unsigned int cryptlen; u8 *iv; struct scatterlist *src; struct scatterlist *dst; void *__ctx[] CRYPTO_MINALIGN_ATTR; }; /** * struct aead_alg - AEAD cipher definition * @maxauthsize: Set the maximum authentication tag size supported by the * transformation. A transformation may support smaller tag sizes. * As the authentication tag is a message digest to ensure the * integrity of the encrypted data, a consumer typically wants the * largest authentication tag possible as defined by this * variable. * @setauthsize: Set authentication size for the AEAD transformation. This * function is used to specify the consumer requested size of the * authentication tag to be either generated by the transformation * during encryption or the size of the authentication tag to be * supplied during the decryption operation. This function is also * responsible for checking the authentication tag size for * validity. * @setkey: see struct skcipher_alg * @encrypt: see struct skcipher_alg * @decrypt: see struct skcipher_alg * @ivsize: see struct skcipher_alg * @chunksize: see struct skcipher_alg * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. In case the * cryptographic hardware has some special requirements which need to * be handled by software, this function shall check for the precise * requirement of the transformation and put any software fallbacks * in place. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @base: Definition of a generic crypto cipher algorithm. * * All fields except @ivsize is mandatory and must be filled. */ struct aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize); int (*encrypt)(struct aead_request *req); int (*decrypt)(struct aead_request *req); int (*init)(struct crypto_aead *tfm); void (*exit)(struct crypto_aead *tfm); unsigned int ivsize; unsigned int maxauthsize; unsigned int chunksize; struct crypto_alg base; }; struct crypto_aead { unsigned int authsize; unsigned int reqsize; struct crypto_tfm base; }; static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_aead, base); } /** * crypto_alloc_aead() - allocate AEAD cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * AEAD cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an AEAD. The returned struct * crypto_aead is the cipher handle that is required for any subsequent * API invocation for that AEAD. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm) { return &tfm->base; } /** * crypto_free_aead() - zeroize and free aead handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_aead(struct crypto_aead *tfm) { crypto_destroy_tfm(tfm, crypto_aead_tfm(tfm)); } static inline struct aead_alg *crypto_aead_alg(struct crypto_aead *tfm) { return container_of(crypto_aead_tfm(tfm)->__crt_alg, struct aead_alg, base); } static inline unsigned int crypto_aead_alg_ivsize(struct aead_alg *alg) { return alg->ivsize; } /** * crypto_aead_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the aead referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm) { return crypto_aead_alg_ivsize(crypto_aead_alg(tfm)); } /** * crypto_aead_authsize() - obtain maximum authentication data size * @tfm: cipher handle * * The maximum size of the authentication data for the AEAD cipher referenced * by the AEAD cipher handle is returned. The authentication data size may be * zero if the cipher implements a hard-coded maximum. * * The authentication data may also be known as "tag value". * * Return: authentication data size / tag size in bytes */ static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm) { return tfm->authsize; } static inline unsigned int crypto_aead_alg_maxauthsize(struct aead_alg *alg) { return alg->maxauthsize; } static inline unsigned int crypto_aead_maxauthsize(struct crypto_aead *aead) { return crypto_aead_alg_maxauthsize(crypto_aead_alg(aead)); } /** * crypto_aead_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the AEAD referenced with the cipher handle is returned. * The caller may use that information to allocate appropriate memory for the * data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm) { return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm)); } static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm) { return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm)); } static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm) { return crypto_tfm_get_flags(crypto_aead_tfm(tfm)); } static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags) { crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags); } static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags); } /** * crypto_aead_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the AEAD referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); /** * crypto_aead_setauthsize() - set authentication data size * @tfm: cipher handle * @authsize: size of the authentication data / tag in bytes * * Set the authentication data size / tag size. AEAD requires an authentication * tag (or MAC) in addition to the associated data. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize); static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req) { return __crypto_aead_cast(req->base.tfm); } /** * crypto_aead_encrypt() - encrypt plaintext * @req: reference to the aead_request handle that holds all information * needed to perform the cipher operation * * Encrypt plaintext data using the aead_request handle. That data structure * and how it is filled with data is discussed with the aead_request_* * functions. * * IMPORTANT NOTE The encryption operation creates the authentication data / * tag. That data is concatenated with the created ciphertext. * The ciphertext memory size is therefore the given number of * block cipher blocks + the size defined by the * crypto_aead_setauthsize invocation. The caller must ensure * that sufficient memory is available for the ciphertext and * the authentication tag. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_aead_encrypt(struct aead_request *req); /** * crypto_aead_decrypt() - decrypt ciphertext * @req: reference to the aead_request handle that holds all information * needed to perform the cipher operation * * Decrypt ciphertext data using the aead_request handle. That data structure * and how it is filled with data is discussed with the aead_request_* * functions. * * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the * authentication data / tag. That authentication data / tag * must have the size defined by the crypto_aead_setauthsize * invocation. * * * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD * cipher operation performs the authentication of the data during the * decryption operation. Therefore, the function returns this error if * the authentication of the ciphertext was unsuccessful (i.e. the * integrity of the ciphertext or the associated data was violated); * < 0 if an error occurred. */ int crypto_aead_decrypt(struct aead_request *req); /** * DOC: Asynchronous AEAD Request Handle * * The aead_request data structure contains all pointers to data required for * the AEAD cipher operation. This includes the cipher handle (which can be * used by multiple aead_request instances), pointer to plaintext and * ciphertext, asynchronous callback function, etc. It acts as a handle to the * aead_request_* API calls in a similar way as AEAD handle to the * crypto_aead_* API calls. */ /** * crypto_aead_reqsize() - obtain size of the request data structure * @tfm: cipher handle * * Return: number of bytes */ static inline unsigned int crypto_aead_reqsize(struct crypto_aead *tfm) { return tfm->reqsize; } /** * aead_request_set_tfm() - update cipher handle reference in request * @req: request handle to be modified * @tfm: cipher handle that shall be added to the request handle * * Allow the caller to replace the existing aead handle in the request * data structure with a different one. */ static inline void aead_request_set_tfm(struct aead_request *req, struct crypto_aead *tfm) { req->base.tfm = crypto_aead_tfm(tfm); } /** * aead_request_alloc() - allocate request data structure * @tfm: cipher handle to be registered with the request * @gfp: memory allocation flag that is handed to kmalloc by the API call. * * Allocate the request data structure that must be used with the AEAD * encrypt and decrypt API calls. During the allocation, the provided aead * handle is registered in the request data structure. * * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm, gfp_t gfp) { struct aead_request *req; req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp); if (likely(req)) aead_request_set_tfm(req, tfm); return req; } /** * aead_request_free() - zeroize and free request data structure * @req: request data structure cipher handle to be freed */ static inline void aead_request_free(struct aead_request *req) { kfree_sensitive(req); } /** * aead_request_set_callback() - set asynchronous callback function * @req: request handle * @flags: specify zero or an ORing of the flags * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and * increase the wait queue beyond the initial maximum size; * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep * @compl: callback function pointer to be registered with the request handle * @data: The data pointer refers to memory that is not used by the kernel * crypto API, but provided to the callback function for it to use. Here, * the caller can provide a reference to memory the callback function can * operate on. As the callback function is invoked asynchronously to the * related functionality, it may need to access data structures of the * related functionality which can be referenced using this pointer. The * callback function can access the memory via the "data" field in the * crypto_async_request data structure provided to the callback function. * * Setting the callback function that is triggered once the cipher operation * completes * * The callback function is registered with the aead_request handle and * must comply with the following template:: * * void callback_function(struct crypto_async_request *req, int error) */ static inline void aead_request_set_callback(struct aead_request *req, u32 flags, crypto_completion_t compl, void *data) { req->base.complete = compl; req->base.data = data; req->base.flags = flags; } /** * aead_request_set_crypt - set data buffers * @req: request handle * @src: source scatter / gather list * @dst: destination scatter / gather list * @cryptlen: number of bytes to process from @src * @iv: IV for the cipher operation which must comply with the IV size defined * by crypto_aead_ivsize() * * Setting the source data and destination data scatter / gather lists which * hold the associated data concatenated with the plaintext or ciphertext. See * below for the authentication tag. * * For encryption, the source is treated as the plaintext and the * destination is the ciphertext. For a decryption operation, the use is * reversed - the source is the ciphertext and the destination is the plaintext. * * The memory structure for cipher operation has the following structure: * * - AEAD encryption input: assoc data || plaintext * - AEAD encryption output: assoc data || cipherntext || auth tag * - AEAD decryption input: assoc data || ciphertext || auth tag * - AEAD decryption output: assoc data || plaintext * * Albeit the kernel requires the presence of the AAD buffer, however, * the kernel does not fill the AAD buffer in the output case. If the * caller wants to have that data buffer filled, the caller must either * use an in-place cipher operation (i.e. same memory location for * input/output memory location). */ static inline void aead_request_set_crypt(struct aead_request *req, struct scatterlist *src, struct scatterlist *dst, unsigned int cryptlen, u8 *iv) { req->src = src; req->dst = dst; req->cryptlen = cryptlen; req->iv = iv; } /** * aead_request_set_ad - set associated data information * @req: request handle * @assoclen: number of bytes in associated data * * Setting the AD information. This function sets the length of * the associated data. */ static inline void aead_request_set_ad(struct aead_request *req, unsigned int assoclen) { req->assoclen = assoclen; } #endif /* _CRYPTO_AEAD_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 /* SPDX-License-Identifier: GPL-2.0 */ /* * Definitions of structures and functions for quota formats using trie */ #ifndef _LINUX_DQBLK_QTREE_H #define _LINUX_DQBLK_QTREE_H #include <linux/types.h> /* Numbers of blocks needed for updates - we count with the smallest * possible block size (1024) */ #define QTREE_INIT_ALLOC 4 #define QTREE_INIT_REWRITE 2 #define QTREE_DEL_ALLOC 0 #define QTREE_DEL_REWRITE 6 struct dquot; struct kqid; /* Operations */ struct qtree_fmt_operations { void (*mem2disk_dqblk)(void *disk, struct dquot *dquot); /* Convert given entry from in memory format to disk one */ void (*disk2mem_dqblk)(struct dquot *dquot, void *disk); /* Convert given entry from disk format to in memory one */ int (*is_id)(void *disk, struct dquot *dquot); /* Is this structure for given id? */ }; /* Inmemory copy of version specific information */ struct qtree_mem_dqinfo { struct super_block *dqi_sb; /* Sb quota is on */ int dqi_type; /* Quota type */ unsigned int dqi_blocks; /* # of blocks in quota file */ unsigned int dqi_free_blk; /* First block in list of free blocks */ unsigned int dqi_free_entry; /* First block with free entry */ unsigned int dqi_blocksize_bits; /* Block size of quota file */ unsigned int dqi_entry_size; /* Size of quota entry in quota file */ unsigned int dqi_usable_bs; /* Space usable in block for quota data */ unsigned int dqi_qtree_depth; /* Precomputed depth of quota tree */ const struct qtree_fmt_operations *dqi_ops; /* Operations for entry manipulation */ }; int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk); static inline int qtree_depth(struct qtree_mem_dqinfo *info) { unsigned int epb = info->dqi_usable_bs >> 2; unsigned long long entries = epb; int i; for (i = 1; entries < (1ULL << 32); i++) entries *= epb; return i; } int qtree_get_next_id(struct qtree_mem_dqinfo *info, struct kqid *qid); #endif /* _LINUX_DQBLK_QTREE_H */
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 /* SPDX-License-Identifier: GPL-2.0 */ /* * This header provides generic wrappers for memory access instrumentation that * the compiler cannot emit for: KASAN, KCSAN. */ #ifndef _LINUX_INSTRUMENTED_H #define _LINUX_INSTRUMENTED_H #include <linux/compiler.h> #include <linux/kasan-checks.h> #include <linux/kcsan-checks.h> #include <linux/types.h> /** * instrument_read - instrument regular read access * * Instrument a regular read access. The instrumentation should be inserted * before the actual read happens. * * @ptr address of access * @size size of access */ static __always_inline void instrument_read(const volatile void *v, size_t size) { kasan_check_read(v, size); kcsan_check_read(v, size); } /** * instrument_write - instrument regular write access * * Instrument a regular write access. The instrumentation should be inserted * before the actual write happens. * * @ptr address of access * @size size of access */ static __always_inline void instrument_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_write(v, size); } /** * instrument_read_write - instrument regular read-write access * * Instrument a regular write access. The instrumentation should be inserted * before the actual write happens. * * @ptr address of access * @size size of access */ static __always_inline void instrument_read_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_read_write(v, size); } /** * instrument_atomic_read - instrument atomic read access * * Instrument an atomic read access. The instrumentation should be inserted * before the actual read happens. * * @ptr address of access * @size size of access */ static __always_inline void instrument_atomic_read(const volatile void *v, size_t size) { kasan_check_read(v, size); kcsan_check_atomic_read(v, size); } /** * instrument_atomic_write - instrument atomic write access * * Instrument an atomic write access. The instrumentation should be inserted * before the actual write happens. * * @ptr address of access * @size size of access */ static __always_inline void instrument_atomic_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_atomic_write(v, size); } /** * instrument_atomic_read_write - instrument atomic read-write access * * Instrument an atomic read-write access. The instrumentation should be * inserted before the actual write happens. * * @ptr address of access * @size size of access */ static __always_inline void instrument_atomic_read_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_atomic_read_write(v, size); } /** * instrument_copy_to_user - instrument reads of copy_to_user * * Instrument reads from kernel memory, that are due to copy_to_user (and * variants). The instrumentation must be inserted before the accesses. * * @to destination address * @from source address * @n number of bytes to copy */ static __always_inline void instrument_copy_to_user(void __user *to, const void *from, unsigned long n) { kasan_check_read(from, n); kcsan_check_read(from, n); } /** * instrument_copy_from_user - instrument writes of copy_from_user * * Instrument writes to kernel memory, that are due to copy_from_user (and * variants). The instrumentation should be inserted before the accesses. * * @to destination address * @from source address * @n number of bytes to copy */ static __always_inline void instrument_copy_from_user(const void *to, const void __user *from, unsigned long n) { kasan_check_write(to, n); kcsan_check_write(to, n); } #endif /* _LINUX_INSTRUMENTED_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* audit.h -- Auditing support * * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. * All Rights Reserved. * * Written by Rickard E. (Rik) Faith <faith@redhat.com> */ #ifndef _LINUX_AUDIT_H_ #define _LINUX_AUDIT_H_ #include <linux/sched.h> #include <linux/ptrace.h> #include <uapi/linux/audit.h> #include <uapi/linux/netfilter/nf_tables.h> #define AUDIT_INO_UNSET ((unsigned long)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) struct audit_sig_info { uid_t uid; pid_t pid; char ctx[]; }; struct audit_buffer; struct audit_context; struct inode; struct netlink_skb_parms; struct path; struct linux_binprm; struct mq_attr; struct mqstat; struct audit_watch; struct audit_tree; struct sk_buff; struct audit_krule { u32 pflags; u32 flags; u32 listnr; u32 action; u32 mask[AUDIT_BITMASK_SIZE]; u32 buflen; /* for data alloc on list rules */ u32 field_count; char *filterkey; /* ties events to rules */ struct audit_field *fields; struct audit_field *arch_f; /* quick access to arch field */ struct audit_field *inode_f; /* quick access to an inode field */ struct audit_watch *watch; /* associated watch */ struct audit_tree *tree; /* associated watched tree */ struct audit_fsnotify_mark *exe; struct list_head rlist; /* entry in audit_{watch,tree}.rules list */ struct list_head list; /* for AUDIT_LIST* purposes only */ u64 prio; }; /* Flag to indicate legacy AUDIT_LOGINUID unset usage */ #define AUDIT_LOGINUID_LEGACY 0x1 struct audit_field { u32 type; union { u32 val; kuid_t uid; kgid_t gid; struct { char *lsm_str; void *lsm_rule; }; }; u32 op; }; enum audit_ntp_type { AUDIT_NTP_OFFSET, AUDIT_NTP_FREQ, AUDIT_NTP_STATUS, AUDIT_NTP_TAI, AUDIT_NTP_TICK, AUDIT_NTP_ADJUST, AUDIT_NTP_NVALS /* count */ }; #ifdef CONFIG_AUDITSYSCALL struct audit_ntp_val { long long oldval, newval; }; struct audit_ntp_data { struct audit_ntp_val vals[AUDIT_NTP_NVALS]; }; #else struct audit_ntp_data {}; #endif enum audit_nfcfgop { AUDIT_XT_OP_REGISTER, AUDIT_XT_OP_REPLACE, AUDIT_XT_OP_UNREGISTER, AUDIT_NFT_OP_TABLE_REGISTER, AUDIT_NFT_OP_TABLE_UNREGISTER, AUDIT_NFT_OP_CHAIN_REGISTER, AUDIT_NFT_OP_CHAIN_UNREGISTER, AUDIT_NFT_OP_RULE_REGISTER, AUDIT_NFT_OP_RULE_UNREGISTER, AUDIT_NFT_OP_SET_REGISTER, AUDIT_NFT_OP_SET_UNREGISTER, AUDIT_NFT_OP_SETELEM_REGISTER, AUDIT_NFT_OP_SETELEM_UNREGISTER, AUDIT_NFT_OP_GEN_REGISTER, AUDIT_NFT_OP_OBJ_REGISTER, AUDIT_NFT_OP_OBJ_UNREGISTER, AUDIT_NFT_OP_OBJ_RESET, AUDIT_NFT_OP_FLOWTABLE_REGISTER, AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, AUDIT_NFT_OP_INVALID, }; extern int is_audit_feature_set(int which); extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); /* only for compat system calls */ extern unsigned compat_write_class[]; extern unsigned compat_read_class[]; extern unsigned compat_dir_class[]; extern unsigned compat_chattr_class[]; extern unsigned compat_signal_class[]; extern int audit_classify_compat_syscall(int abi, unsigned syscall); /* audit_names->type values */ #define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */ #define AUDIT_TYPE_NORMAL 1 /* a "normal" audit record */ #define AUDIT_TYPE_PARENT 2 /* a parent audit record */ #define AUDIT_TYPE_CHILD_DELETE 3 /* a child being deleted */ #define AUDIT_TYPE_CHILD_CREATE 4 /* a child being created */ /* maximized args number that audit_socketcall can process */ #define AUDITSC_ARGS 6 /* bit values for ->signal->audit_tty */ #define AUDIT_TTY_ENABLE BIT(0) #define AUDIT_TTY_LOG_PASSWD BIT(1) struct filename; #define AUDIT_OFF 0 #define AUDIT_ON 1 #define AUDIT_LOCKED 2 #ifdef CONFIG_AUDIT /* These are defined in audit.c */ /* Public API */ extern __printf(4, 5) void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, const char *fmt, ...); extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type); extern __printf(2, 3) void audit_log_format(struct audit_buffer *ab, const char *fmt, ...); extern void audit_log_end(struct audit_buffer *ab); extern bool audit_string_contains_control(const char *string, size_t len); extern void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len); extern void audit_log_n_string(struct audit_buffer *ab, const char *buf, size_t n); extern void audit_log_n_untrustedstring(struct audit_buffer *ab, const char *string, size_t n); extern void audit_log_untrustedstring(struct audit_buffer *ab, const char *string); extern void audit_log_d_path(struct audit_buffer *ab, const char *prefix, const struct path *path); extern void audit_log_key(struct audit_buffer *ab, char *key); extern void audit_log_path_denied(int type, const char *operation); extern void audit_log_lost(const char *message); extern int audit_log_task_context(struct audit_buffer *ab); extern void audit_log_task_info(struct audit_buffer *ab); extern int audit_update_lsm_rules(void); /* Private API (for audit.c only) */ extern int audit_rule_change(int type, int seq, void *data, size_t datasz); extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); extern int audit_set_loginuid(kuid_t loginuid); static inline kuid_t audit_get_loginuid(struct task_struct *tsk) { return tsk->loginuid; } static inline unsigned int audit_get_sessionid(struct task_struct *tsk) { return tsk->sessionid; } extern u32 audit_enabled; extern int audit_signal_info(int sig, struct task_struct *t); #else /* CONFIG_AUDIT */ static inline __printf(4, 5) void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, const char *fmt, ...) { } static inline struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { return NULL; } static inline __printf(2, 3) void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) { } static inline void audit_log_end(struct audit_buffer *ab) { } static inline void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len) { } static inline void audit_log_n_string(struct audit_buffer *ab, const char *buf, size_t n) { } static inline void audit_log_n_untrustedstring(struct audit_buffer *ab, const char *string, size_t n) { } static inline void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) { } static inline void audit_log_d_path(struct audit_buffer *ab, const char *prefix, const struct path *path) { } static inline void audit_log_key(struct audit_buffer *ab, char *key) { } static inline void audit_log_path_denied(int type, const char *operation) { } static inline int audit_log_task_context(struct audit_buffer *ab) { return 0; } static inline void audit_log_task_info(struct audit_buffer *ab) { } static inline kuid_t audit_get_loginuid(struct task_struct *tsk) { return INVALID_UID; } static inline unsigned int audit_get_sessionid(struct task_struct *tsk) { return AUDIT_SID_UNSET; } #define audit_enabled AUDIT_OFF static inline int audit_signal_info(int sig, struct task_struct *t) { return 0; } #endif /* CONFIG_AUDIT */ #ifdef CONFIG_AUDIT_COMPAT_GENERIC #define audit_is_compat(arch) (!((arch) & __AUDIT_ARCH_64BIT)) #else #define audit_is_compat(arch) false #endif #define AUDIT_INODE_PARENT 1 /* dentry represents the parent */ #define AUDIT_INODE_HIDDEN 2 /* audit record should be hidden */ #define AUDIT_INODE_NOEVAL 4 /* audit record incomplete */ #ifdef CONFIG_AUDITSYSCALL #include <asm/syscall.h> /* for syscall_get_arch() */ /* These are defined in auditsc.c */ /* Public API */ extern int audit_alloc(struct task_struct *task); extern void __audit_free(struct task_struct *task); extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3); extern void __audit_syscall_exit(int ret_success, long ret_value); extern struct filename *__audit_reusename(const __user char *uptr); extern void __audit_getname(struct filename *name); extern void __audit_getcwd(void); extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); extern void __audit_file(const struct file *); extern void __audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type); extern void audit_seccomp(unsigned long syscall, long signr, int code); extern void audit_seccomp_actions_logged(const char *names, const char *old_names, int res); extern void __audit_ptrace(struct task_struct *t); static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx) { task->audit_context = ctx; } static inline struct audit_context *audit_context(void) { return current->audit_context; } static inline bool audit_dummy_context(void) { void *p = audit_context(); return !p || *(int *)p; } static inline void audit_free(struct task_struct *task) { if (unlikely(task->audit_context)) __audit_free(task); } static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) { if (unlikely(audit_context())) __audit_syscall_entry(major, a0, a1, a2, a3); } static inline void audit_syscall_exit(void *pt_regs) { if (unlikely(audit_context())) { int success = is_syscall_success(pt_regs); long return_code = regs_return_value(pt_regs); __audit_syscall_exit(success, return_code); } } static inline struct filename *audit_reusename(const __user char *name) { if (unlikely(!audit_dummy_context())) return __audit_reusename(name); return NULL; } static inline void audit_getname(struct filename *name) { if (unlikely(!audit_dummy_context())) __audit_getname(name); } static inline void audit_getcwd(void) { if (unlikely(audit_context())) __audit_getcwd(); } static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) { if (unlikely(!audit_dummy_context())) __audit_inode(name, dentry, aflags); } static inline void audit_file(struct file *file) { if (unlikely(!audit_dummy_context())) __audit_file(file); } static inline void audit_inode_parent_hidden(struct filename *name, const struct dentry *dentry) { if (unlikely(!audit_dummy_context())) __audit_inode(name, dentry, AUDIT_INODE_PARENT | AUDIT_INODE_HIDDEN); } static inline void audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type) { if (unlikely(!audit_dummy_context())) __audit_inode_child(parent, dentry, type); } void audit_core_dumps(long signr); static inline void audit_ptrace(struct task_struct *t) { if (unlikely(!audit_dummy_context())) __audit_ptrace(t); } /* Private API (for audit.c only) */ extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); extern void __audit_bprm(struct linux_binprm *bprm); extern int __audit_socketcall(int nargs, unsigned long *args); extern int __audit_sockaddr(int len, void *addr); extern void __audit_fd_pair(int fd1, int fd2); extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr); extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout); extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification); extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old); extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); extern void __audit_log_kern_module(char *name); extern void __audit_fanotify(unsigned int response); extern void __audit_tk_injoffset(struct timespec64 offset); extern void __audit_ntp_log(const struct audit_ntp_data *ad); extern void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, enum audit_nfcfgop op, gfp_t gfp); static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { if (unlikely(!audit_dummy_context())) __audit_ipc_obj(ipcp); } static inline void audit_fd_pair(int fd1, int fd2) { if (unlikely(!audit_dummy_context())) __audit_fd_pair(fd1, fd2); } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { if (unlikely(!audit_dummy_context())) __audit_ipc_set_perm(qbytes, uid, gid, mode); } static inline void audit_bprm(struct linux_binprm *bprm) { if (unlikely(!audit_dummy_context())) __audit_bprm(bprm); } static inline int audit_socketcall(int nargs, unsigned long *args) { if (unlikely(!audit_dummy_context())) return __audit_socketcall(nargs, args); return 0; } static inline int audit_socketcall_compat(int nargs, u32 *args) { unsigned long a[AUDITSC_ARGS]; int i; if (audit_dummy_context()) return 0; for (i = 0; i < nargs; i++) a[i] = (unsigned long)args[i]; return __audit_socketcall(nargs, a); } static inline int audit_sockaddr(int len, void *addr) { if (unlikely(!audit_dummy_context())) return __audit_sockaddr(len, addr); return 0; } static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) { if (unlikely(!audit_dummy_context())) __audit_mq_open(oflag, mode, attr); } static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout) { if (unlikely(!audit_dummy_context())) __audit_mq_sendrecv(mqdes, msg_len, msg_prio, abs_timeout); } static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification) { if (unlikely(!audit_dummy_context())) __audit_mq_notify(mqdes, notification); } static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) { if (unlikely(!audit_dummy_context())) __audit_mq_getsetattr(mqdes, mqstat); } static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old) { if (unlikely(!audit_dummy_context())) return __audit_log_bprm_fcaps(bprm, new, old); return 0; } static inline void audit_log_capset(const struct cred *new, const struct cred *old) { if (unlikely(!audit_dummy_context())) __audit_log_capset(new, old); } static inline void audit_mmap_fd(int fd, int flags) { if (unlikely(!audit_dummy_context())) __audit_mmap_fd(fd, flags); } static inline void audit_log_kern_module(char *name) { if (!audit_dummy_context()) __audit_log_kern_module(name); } static inline void audit_fanotify(unsigned int response) { if (!audit_dummy_context()) __audit_fanotify(response); } static inline void audit_tk_injoffset(struct timespec64 offset) { /* ignore no-op events */ if (offset.tv_sec == 0 && offset.tv_nsec == 0) return; if (!audit_dummy_context()) __audit_tk_injoffset(offset); } static inline void audit_ntp_init(struct audit_ntp_data *ad) { memset(ad, 0, sizeof(*ad)); } static inline void audit_ntp_set_old(struct audit_ntp_data *ad, enum audit_ntp_type type, long long val) { ad->vals[type].oldval = val; } static inline void audit_ntp_set_new(struct audit_ntp_data *ad, enum audit_ntp_type type, long long val) { ad->vals[type].newval = val; } static inline void audit_ntp_log(const struct audit_ntp_data *ad) { if (!audit_dummy_context()) __audit_ntp_log(ad); } static inline void audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, enum audit_nfcfgop op, gfp_t gfp) { if (audit_enabled) __audit_log_nfcfg(name, af, nentries, op, gfp); } extern int audit_n_rules; extern int audit_signals; #else /* CONFIG_AUDITSYSCALL */ static inline int audit_alloc(struct task_struct *task) { return 0; } static inline void audit_free(struct task_struct *task) { } static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) { } static inline void audit_syscall_exit(void *pt_regs) { } static inline bool audit_dummy_context(void) { return true; } static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx) { } static inline struct audit_context *audit_context(void) { return NULL; } static inline struct filename *audit_reusename(const __user char *name) { return NULL; } static inline void audit_getname(struct filename *name) { } static inline void audit_getcwd(void) { } static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) { } static inline void audit_file(struct file *file) { } static inline void audit_inode_parent_hidden(struct filename *name, const struct dentry *dentry) { } static inline void audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type) { } static inline void audit_core_dumps(long signr) { } static inline void audit_seccomp(unsigned long syscall, long signr, int code) { } static inline void audit_seccomp_actions_logged(const char *names, const char *old_names, int res) { } static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { } static inline void audit_bprm(struct linux_binprm *bprm) { } static inline int audit_socketcall(int nargs, unsigned long *args) { return 0; } static inline int audit_socketcall_compat(int nargs, u32 *args) { return 0; } static inline void audit_fd_pair(int fd1, int fd2) { } static inline int audit_sockaddr(int len, void *addr) { return 0; } static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) { } static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout) { } static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification) { } static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) { } static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old) { return 0; } static inline void audit_log_capset(const struct cred *new, const struct cred *old) { } static inline void audit_mmap_fd(int fd, int flags) { } static inline void audit_log_kern_module(char *name) { } static inline void audit_fanotify(unsigned int response) { } static inline void audit_tk_injoffset(struct timespec64 offset) { } static inline void audit_ntp_init(struct audit_ntp_data *ad) { } static inline void audit_ntp_set_old(struct audit_ntp_data *ad, enum audit_ntp_type type, long long val) { } static inline void audit_ntp_set_new(struct audit_ntp_data *ad, enum audit_ntp_type type, long long val) { } static inline void audit_ntp_log(const struct audit_ntp_data *ad) { } static inline void audit_ptrace(struct task_struct *t) { } static inline void audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, enum audit_nfcfgop op, gfp_t gfp) { } #define audit_n_rules 0 #define audit_signals 0 #endif /* CONFIG_AUDITSYSCALL */ static inline bool audit_loginuid_set(struct task_struct *tsk) { return uid_valid(audit_get_loginuid(tsk)); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_LOCAL_LOCK_H # error "Do not include directly, include linux/local_lock.h" #endif #include <linux/percpu-defs.h> #include <linux/lockdep.h> typedef struct { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; struct task_struct *owner; #endif } local_lock_t; #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ .dep_map = { \ .name = #lockname, \ .wait_type_inner = LD_WAIT_CONFIG, \ .lock_type = LD_LOCK_PERCPU, \ }, \ .owner = NULL, static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); DEBUG_LOCKS_WARN_ON(l->owner); l->owner = current; } static inline void local_lock_release(local_lock_t *l) { DEBUG_LOCKS_WARN_ON(l->owner != current); l->owner = NULL; lock_map_release(&l->dep_map); } static inline void local_lock_debug_init(local_lock_t *l) { l->owner = NULL; } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } #define __local_lock_init(lock) \ do { \ static struct lock_class_key __key; \ \ debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, \ 0, LD_WAIT_CONFIG, LD_WAIT_INV, \ LD_LOCK_PERCPU); \ local_lock_debug_init(lock); \ } while (0) #define __local_lock(lock) \ do { \ preempt_disable(); \ local_lock_acquire(this_cpu_ptr(lock)); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ local_lock_acquire(this_cpu_ptr(lock)); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ local_lock_acquire(this_cpu_ptr(lock)); \ } while (0) #define __local_unlock(lock) \ do { \ local_lock_release(this_cpu_ptr(lock)); \ preempt_enable(); \ } while (0) #define __local_unlock_irq(lock) \ do { \ local_lock_release(this_cpu_ptr(lock)); \ local_irq_enable(); \ } while (0) #define __local_unlock_irqrestore(lock, flags) \ do { \ local_lock_release(this_cpu_ptr(lock)); \ local_irq_restore(flags); \ } while (0)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* md.h : kernel internal structure of the Linux MD driver Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman */ #ifndef _MD_MD_H #define _MD_MD_H #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/badblocks.h> #include <linux/kobject.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/mutex.h> #include <linux/timer.h> #include <linux/wait.h> #include <linux/workqueue.h> #include "md-cluster.h" #define MaxSector (~(sector_t)0) /* * These flags should really be called "NO_RETRY" rather than * "FAILFAST" because they don't make any promise about time lapse, * only about the number of retries, which will be zero. * REQ_FAILFAST_DRIVER is not included because * Commit: 4a27446f3e39 ("[SCSI] modify scsi to handle new fail fast flags.") * seems to suggest that the errors it avoids retrying should usually * be retried. */ #define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT) /* * The struct embedded in rdev is used to serialize IO. */ struct serial_in_rdev { struct rb_root_cached serial_rb; spinlock_t serial_lock; wait_queue_head_t serial_io_wait; }; /* * MD's 'extended' device */ struct md_rdev { struct list_head same_set; /* RAID devices within the same set */ sector_t sectors; /* Device size (in 512bytes sectors) */ struct mddev *mddev; /* RAID array if running */ int last_events; /* IO event timestamp */ /* * If meta_bdev is non-NULL, it means that a separate device is * being used to store the metadata (superblock/bitmap) which * would otherwise be contained on the same device as the data (bdev). */ struct block_device *meta_bdev; struct block_device *bdev; /* block device handle */ struct page *sb_page, *bb_page; int sb_loaded; __u64 sb_events; sector_t data_offset; /* start of data in array */ sector_t new_data_offset;/* only relevant while reshaping */ sector_t sb_start; /* offset of the super block (in 512byte sectors) */ int sb_size; /* bytes in the superblock */ int preferred_minor; /* autorun support */ struct kobject kobj; /* A device can be in one of three states based on two flags: * Not working: faulty==1 in_sync==0 * Fully working: faulty==0 in_sync==1 * Working, but not * in sync with array * faulty==0 in_sync==0 * * It can never have faulty==1, in_sync==1 * This reduces the burden of testing multiple flags in many cases */ unsigned long flags; /* bit set of 'enum flag_bits' bits. */ wait_queue_head_t blocked_wait; int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ int new_raid_disk; /* role that the device will have in * the array after a level-change completes. */ int saved_raid_disk; /* role that device used to have in the * array and could again if we did a partial * resync from the bitmap */ union { sector_t recovery_offset;/* If this device has been partially * recovered, this is where we were * up to. */ sector_t journal_tail; /* If this device is a journal device, * this is the journal tail (journal * recovery start point) */ }; atomic_t nr_pending; /* number of pending requests. * only maintained for arrays that * support hot removal */ atomic_t read_errors; /* number of consecutive read errors that * we have tried to ignore. */ time64_t last_read_error; /* monotonic time since our * last read error */ atomic_t corrected_errors; /* number of corrected read errors, * for reporting to userspace and storing * in superblock. */ struct serial_in_rdev *serial; /* used for raid1 io serialization */ struct work_struct del_work; /* used for delayed sysfs removal */ struct kernfs_node *sysfs_state; /* handle for 'state' * sysfs entry */ /* handle for 'unacknowledged_bad_blocks' sysfs dentry */ struct kernfs_node *sysfs_unack_badblocks; /* handle for 'bad_blocks' sysfs dentry */ struct kernfs_node *sysfs_badblocks; struct badblocks badblocks; struct { short offset; /* Offset from superblock to start of PPL. * Not used by external metadata. */ unsigned int size; /* Size in sectors of the PPL space */ sector_t sector; /* First sector of the PPL space */ } ppl; }; enum flag_bits { Faulty, /* device is known to have a fault */ In_sync, /* device is in_sync with rest of array */ Bitmap_sync, /* ..actually, not quite In_sync. Need a * bitmap-based recovery to get fully in sync. * The bit is only meaningful before device * has been passed to pers->hot_add_disk. */ WriteMostly, /* Avoid reading if at all possible */ AutoDetected, /* added by auto-detect */ Blocked, /* An error occurred but has not yet * been acknowledged by the metadata * handler, so don't allow writes * until it is cleared */ WriteErrorSeen, /* A write error has been seen on this * device */ FaultRecorded, /* Intermediate state for clearing * Blocked. The Fault is/will-be * recorded in the metadata, but that * metadata hasn't been stored safely * on disk yet. */ BlockedBadBlocks, /* A writer is blocked because they * found an unacknowledged bad-block. * This can safely be cleared at any * time, and the writer will re-check. * It may be set at any time, and at * worst the writer will timeout and * re-check. So setting it as * accurately as possible is good, but * not absolutely critical. */ WantReplacement, /* This device is a candidate to be * hot-replaced, either because it has * reported some faults, or because * of explicit request. */ Replacement, /* This device is a replacement for * a want_replacement device with same * raid_disk number. */ Candidate, /* For clustered environments only: * This device is seen locally but not * by the whole cluster */ Journal, /* This device is used as journal for * raid-5/6. * Usually, this device should be faster * than other devices in the array */ ClusterRemove, RemoveSynchronized, /* synchronize_rcu() was called after * this device was known to be faulty, * so it is safe to remove without * another synchronize_rcu() call. */ ExternalBbl, /* External metadata provides bad * block management for a disk */ FailFast, /* Minimal retries should be attempted on * this device, so use REQ_FAILFAST_DEV. * Also don't try to repair failed reads. * It is expects that no bad block log * is present. */ LastDev, /* Seems to be the last working dev as * it didn't fail, so don't use FailFast * any more for metadata */ CollisionCheck, /* * check if there is collision between raid1 * serial bios. */ }; static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, sector_t *first_bad, int *bad_sectors) { if (unlikely(rdev->badblocks.count)) { int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s, sectors, first_bad, bad_sectors); if (rv) *first_bad -= rdev->data_offset; return rv; } return 0; } extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, int is_new); extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, int is_new); struct md_cluster_info; /* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */ enum mddev_flags { MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */ MD_CLOSING, /* If set, we are closing the array, do not open * it then */ MD_JOURNAL_CLEAN, /* A raid with journal is already clean */ MD_HAS_JOURNAL, /* The raid array has journal feature set */ MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node * already took resync lock, need to * release the lock */ MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is * supported as calls to md_error() will * never cause the array to become failed. */ MD_HAS_PPL, /* The raid array has PPL feature set */ MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */ MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update * the metadata without taking reconfig_mutex. */ MD_UPDATING_SB, /* md_check_recovery is updating the metadata * without explicitly holding reconfig_mutex. */ MD_NOT_READY, /* do_md_run() is active, so 'array_state' * must not report that array is ready yet */ MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop * I/O in case an array member is gone/failed. */ }; enum mddev_sb_flags { MD_SB_CHANGE_DEVS, /* Some device status has changed */ MD_SB_CHANGE_CLEAN, /* transition to or from 'clean' */ MD_SB_CHANGE_PENDING, /* switch from 'clean' to 'active' in progress */ MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ }; #define NR_SERIAL_INFOS 8 /* record current range of serialize IOs */ struct serial_info { struct rb_node node; sector_t start; /* start sector of rb node */ sector_t last; /* end sector of rb node */ sector_t _subtree_last; /* highest sector in subtree of rb node */ }; struct mddev { void *private; struct md_personality *pers; dev_t unit; int md_minor; struct list_head disks; unsigned long flags; unsigned long sb_flags; int suspended; atomic_t active_io; int ro; int sysfs_active; /* set when sysfs deletes * are happening, so run/ * takeover/stop are not safe */ struct gendisk *gendisk; struct kobject kobj; int hold_active; #define UNTIL_IOCTL 1 #define UNTIL_STOP 2 /* Superblock information */ int major_version, minor_version, patch_version; int persistent; int external; /* metadata is * managed externally */ char metadata_type[17]; /* externally set*/ int chunk_sectors; time64_t ctime, utime; int level, layout; char clevel[16]; int raid_disks; int max_disks; sector_t dev_sectors; /* used size of * component devices */ sector_t array_sectors; /* exported array size */ int external_size; /* size managed * externally */ __u64 events; /* If the last 'event' was simply a clean->dirty transition, and * we didn't write it to the spares, then it is safe and simple * to just decrement the event count on a dirty->clean transition. * So we record that possibility here. */ int can_decrease_events; char uuid[16]; /* If the array is being reshaped, we need to record the * new shape and an indication of where we are up to. * This is written to the superblock. * If reshape_position is MaxSector, then no reshape is happening (yet). */ sector_t reshape_position; int delta_disks, new_level, new_layout; int new_chunk_sectors; int reshape_backwards; struct md_thread *thread; /* management thread */ struct md_thread *sync_thread; /* doing resync or reconstruct */ /* 'last_sync_action' is initialized to "none". It is set when a * sync operation (i.e "data-check", "requested-resync", "resync", * "recovery", or "reshape") is started. It holds this value even * when the sync thread is "frozen" (interrupted) or "idle" (stopped * or finished). It is overwritten when a new sync operation is begun. */ char *last_sync_action; sector_t curr_resync; /* last block scheduled */ /* As resync requests can complete out of order, we cannot easily track * how much resync has been completed. So we occasionally pause until * everything completes, then set curr_resync_completed to curr_resync. * As such it may be well behind the real resync mark, but it is a value * we are certain of. */ sector_t curr_resync_completed; unsigned long resync_mark; /* a recent timestamp */ sector_t resync_mark_cnt;/* blocks written at resync_mark */ sector_t curr_mark_cnt; /* blocks scheduled now */ sector_t resync_max_sectors; /* may be set by personality */ atomic64_t resync_mismatches; /* count of sectors where * parity/replica mismatch found */ /* allow user-space to request suspension of IO to regions of the array */ sector_t suspend_lo; sector_t suspend_hi; /* if zero, use the system-wide default */ int sync_speed_min; int sync_speed_max; /* resync even though the same disks are shared among md-devices */ int parallel_resync; int ok_start_degraded; unsigned long recovery; /* If a RAID personality determines that recovery (of a particular * device) will fail due to a read error on the source device, it * takes a copy of this number and does not attempt recovery again * until this number changes. */ int recovery_disabled; int in_sync; /* know to not need resync */ /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so * that we are never stopping an array while it is open. * 'reconfig_mutex' protects all other reconfiguration. * These locks are separate due to conflicting interactions * with bdev->bd_mutex. * Lock ordering is: * reconfig_mutex -> bd_mutex * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open */ struct mutex open_mutex; struct mutex reconfig_mutex; atomic_t active; /* general refcount */ atomic_t openers; /* number of active opens */ int changed; /* True if we might need to * reread partition info */ int degraded; /* whether md should consider * adding a spare */ atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; sector_t recovery_cp; sector_t resync_min; /* user requested sync * starts here */ sector_t resync_max; /* resync should pause * when it gets here */ struct kernfs_node *sysfs_state; /* handle for 'array_state' * file in sysfs. */ struct kernfs_node *sysfs_action; /* handle for 'sync_action' */ struct kernfs_node *sysfs_completed; /*handle for 'sync_completed' */ struct kernfs_node *sysfs_degraded; /*handle for 'degraded' */ struct kernfs_node *sysfs_level; /*handle for 'level' */ struct work_struct del_work; /* used for delayed sysfs removal */ /* "lock" protects: * flush_bio transition from NULL to !NULL * rdev superblocks, events * clearing MD_CHANGE_* * in_sync - and related safemode and MD_CHANGE changes * pers (also protected by reconfig_mutex and pending IO). * clearing ->bitmap * clearing ->bitmap_info.file * changing ->resync_{min,max} * setting MD_RECOVERY_RUNNING (which interacts with resync_{min,max}) */ spinlock_t lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ atomic_t pending_writes; /* number of active superblock writes */ unsigned int safemode; /* if set, update "clean" superblock * when no writes pending. */ unsigned int safemode_delay; struct timer_list safemode_timer; struct percpu_ref writes_pending; int sync_checkers; /* # of threads checking writes_pending */ struct request_queue *queue; /* for plugging ... */ struct bitmap *bitmap; /* the bitmap for the device */ struct { struct file *file; /* the bitmap file */ loff_t offset; /* offset from superblock of * start of bitmap. May be * negative, but not '0' * For external metadata, offset * from start of device. */ unsigned long space; /* space available at this offset */ loff_t default_offset; /* this is the offset to use when * hot-adding a bitmap. It should * eventually be settable by sysfs. */ unsigned long default_space; /* space available at * default offset */ struct mutex mutex; unsigned long chunksize; unsigned long daemon_sleep; /* how many jiffies between updates? */ unsigned long max_write_behind; /* write-behind mode */ int external; int nodes; /* Maximum number of nodes in the cluster */ char cluster_name[64]; /* Name of the cluster */ } bitmap_info; atomic_t max_corr_read_errors; /* max read retries */ struct list_head all_mddevs; struct attribute_group *to_remove; struct bio_set bio_set; struct bio_set sync_set; /* for sync operations like * metadata and bitmap writes */ /* Generic flush handling. * The last to finish preflush schedules a worker to submit * the rest of the request (without the REQ_PREFLUSH flag). */ struct bio *flush_bio; atomic_t flush_pending; ktime_t start_flush, last_flush; /* last_flush is when the last completed * flush was started. */ struct work_struct flush_work; struct work_struct event_work; /* used by dm to report failure event */ mempool_t *serial_info_pool; void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); struct md_cluster_info *cluster_info; unsigned int good_device_nr; /* good device num within cluster raid */ unsigned int noio_flag; /* for memalloc scope API */ bool has_superblocks:1; bool fail_last_dev:1; bool serialize_policy:1; }; enum recovery_flags { /* * If neither SYNC or RESHAPE are set, then it is a recovery. */ MD_RECOVERY_RUNNING, /* a thread is running, or about to be started */ MD_RECOVERY_SYNC, /* actually doing a resync, not a recovery */ MD_RECOVERY_RECOVER, /* doing recovery, or need to try it. */ MD_RECOVERY_INTR, /* resync needs to be aborted for some reason */ MD_RECOVERY_DONE, /* thread is done and is waiting to be reaped */ MD_RECOVERY_NEEDED, /* we might need to start a resync/recover */ MD_RECOVERY_REQUESTED, /* user-space has requested a sync (used with SYNC) */ MD_RECOVERY_CHECK, /* user-space request for check-only, no repair */ MD_RECOVERY_RESHAPE, /* A reshape is happening */ MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */ MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */ MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */ MD_RESYNCING_REMOTE, /* remote node is running resync thread */ }; static inline int __must_check mddev_lock(struct mddev *mddev) { return mutex_lock_interruptible(&mddev->reconfig_mutex); } /* Sometimes we need to take the lock in a situation where * failure due to interrupts is not acceptable. */ static inline void mddev_lock_nointr(struct mddev *mddev) { mutex_lock(&mddev->reconfig_mutex); } static inline int mddev_trylock(struct mddev *mddev) { return mutex_trylock(&mddev->reconfig_mutex); } extern void mddev_unlock(struct mddev *mddev); static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) { atomic_add(nr_sectors, &bdev->bd_disk->sync_io); } static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors) { atomic_add(nr_sectors, &bio->bi_disk->sync_io); } struct md_personality { char *name; int level; struct list_head list; struct module *owner; bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio); /* * start up works that do NOT require md_thread. tasks that * requires md_thread should go into start() */ int (*run)(struct mddev *mddev); /* start up works that require md threads */ int (*start)(struct mddev *mddev); void (*free)(struct mddev *mddev, void *priv); void (*status)(struct seq_file *seq, struct mddev *mddev); /* error_handler must set ->faulty and clear ->in_sync * if appropriate, and should abort recovery if needed */ void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev); int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev); int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev); int (*spare_active) (struct mddev *mddev); sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped); int (*resize) (struct mddev *mddev, sector_t sectors); sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks); int (*check_reshape) (struct mddev *mddev); int (*start_reshape) (struct mddev *mddev); void (*finish_reshape) (struct mddev *mddev); void (*update_reshape_pos) (struct mddev *mddev); /* quiesce suspends or resumes internal processing. * 1 - stop new actions and wait for action io to complete * 0 - return to normal behaviour */ void (*quiesce) (struct mddev *mddev, int quiesce); /* takeover is used to transition an array from one * personality to another. The new personality must be able * to handle the data in the current layout. * e.g. 2drive raid1 -> 2drive raid5 * ndrive raid5 -> degraded n+1drive raid6 with special layout * If the takeover succeeds, a new 'private' structure is returned. * This needs to be installed and then ->run used to activate the * array. */ void *(*takeover) (struct mddev *mddev); /* Changes the consistency policy of an active array. */ int (*change_consistency_policy)(struct mddev *mddev, const char *buf); }; struct md_sysfs_entry { struct attribute attr; ssize_t (*show)(struct mddev *, char *); ssize_t (*store)(struct mddev *, const char *, size_t); }; extern struct attribute_group md_bitmap_group; static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name) { if (sd) return sysfs_get_dirent(sd, name); return sd; } static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd) { if (sd) sysfs_notify_dirent(sd); } static inline char * mdname (struct mddev * mddev) { return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; } static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) { char nm[20]; if (!test_bit(Replacement, &rdev->flags) && !test_bit(Journal, &rdev->flags) && mddev->kobj.sd) { sprintf(nm, "rd%d", rdev->raid_disk); return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); } else return 0; } static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) { char nm[20]; if (!test_bit(Replacement, &rdev->flags) && !test_bit(Journal, &rdev->flags) && mddev->kobj.sd) { sprintf(nm, "rd%d", rdev->raid_disk); sysfs_remove_link(&mddev->kobj, nm); } } /* * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. */ #define rdev_for_each_list(rdev, tmp, head) \ list_for_each_entry_safe(rdev, tmp, head, same_set) /* * iterates through the 'same array disks' ringlist */ #define rdev_for_each(rdev, mddev) \ list_for_each_entry(rdev, &((mddev)->disks), same_set) #define rdev_for_each_safe(rdev, tmp, mddev) \ list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) #define rdev_for_each_rcu(rdev, mddev) \ list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) struct md_thread { void (*run) (struct md_thread *thread); struct mddev *mddev; wait_queue_head_t wqueue; unsigned long flags; struct task_struct *tsk; unsigned long timeout; void *private; }; #define THREAD_WAKEUP 0 static inline void safe_put_page(struct page *p) { if (p) put_page(p); } extern int register_md_personality(struct md_personality *p); extern int unregister_md_personality(struct md_personality *p); extern int register_md_cluster_operations(struct md_cluster_operations *ops, struct module *module); extern int unregister_md_cluster_operations(void); extern int md_setup_cluster(struct mddev *mddev, int nodes); extern void md_cluster_stop(struct mddev *mddev); extern struct md_thread *md_register_thread( void (*run)(struct md_thread *thread), struct mddev *mddev, const char *name); extern void md_unregister_thread(struct md_thread **threadp); extern void md_wakeup_thread(struct md_thread *thread); extern void md_check_recovery(struct mddev *mddev); extern void md_reap_sync_thread(struct mddev *mddev); extern int mddev_init_writes_pending(struct mddev *mddev); extern bool md_write_start(struct mddev *mddev, struct bio *bi); extern void md_write_inc(struct mddev *mddev, struct bio *bi); extern void md_write_end(struct mddev *mddev); extern void md_done_sync(struct mddev *mddev, int blocks, int ok); extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_finish_reshape(struct mddev *mddev); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, sector_t sector, int size, struct page *page); extern int md_super_wait(struct mddev *mddev); extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct page *page, int op, int op_flags, bool metadata_op); extern void md_do_sync(struct md_thread *thread); extern void md_new_event(struct mddev *mddev); extern void md_allow_write(struct mddev *mddev); extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors); extern int md_check_no_bitmap(struct mddev *mddev); extern int md_integrity_register(struct mddev *mddev); extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev); extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); extern void mddev_init(struct mddev *mddev); extern int md_run(struct mddev *mddev); extern int md_start(struct mddev *mddev); extern void md_stop(struct mddev *mddev); extern void md_stop_writes(struct mddev *mddev); extern int md_rdev_init(struct md_rdev *rdev); extern void md_rdev_clear(struct md_rdev *rdev); extern void md_handle_request(struct mddev *mddev, struct bio *bio); extern void mddev_suspend(struct mddev *mddev); extern void mddev_resume(struct mddev *mddev); extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, struct mddev *mddev); extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_update_sb(struct mddev *mddev, int force); extern void md_kick_rdev_from_array(struct md_rdev * rdev); extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev, bool is_suspend); extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev, bool is_suspend); struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr); struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev); static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type) { int flags = rdev->bdev->bd_disk->flags; if (!(flags & GENHD_FL_UP)) { if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) pr_warn("md: %s: %s array has a missing/failed member\n", mdname(rdev->mddev), md_type); return true; } return false; } static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) { int faulty = test_bit(Faulty, &rdev->flags); if (atomic_dec_and_test(&rdev->nr_pending) && faulty) { set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); } } extern struct md_cluster_operations *md_cluster_ops; static inline int mddev_is_clustered(struct mddev *mddev) { return mddev->cluster_info && mddev->bitmap_info.nodes > 1; } /* clear unsupported mddev_flags */ static inline void mddev_clear_unsupported_flags(struct mddev *mddev, unsigned long unsupported_flags) { mddev->flags &= ~unsupported_flags; } static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio) { if (bio_op(bio) == REQ_OP_WRITE_SAME && !bio->bi_disk->queue->limits.max_write_same_sectors) mddev->queue->limits.max_write_same_sectors = 0; } static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio) { if (bio_op(bio) == REQ_OP_WRITE_ZEROES && !bio->bi_disk->queue->limits.max_write_zeroes_sectors) mddev->queue->limits.max_write_zeroes_sectors = 0; } struct mdu_array_info_s; struct mdu_disk_info_s; extern int mdp_major; void md_autostart_arrays(int part); int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info); int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info); int do_md_run(struct mddev *mddev); extern const struct block_device_operations md_fops; #endif /* _MD_MD_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 /* SPDX-License-Identifier: GPL-2.0 */ /* * Functions used by both the SCSI initiator code and the SCSI target code. */ #ifndef _SCSI_COMMON_H_ #define _SCSI_COMMON_H_ #include <linux/types.h> #include <scsi/scsi_proto.h> static inline unsigned scsi_varlen_cdb_length(const void *hdr) { return ((struct scsi_varlen_cdb_hdr *)hdr)->additional_cdb_length + 8; } extern const unsigned char scsi_command_size_tbl[8]; #define COMMAND_SIZE(opcode) scsi_command_size_tbl[((opcode) >> 5) & 7] static inline unsigned scsi_command_size(const unsigned char *cmnd) { return (cmnd[0] == VARIABLE_LENGTH_CMD) ? scsi_varlen_cdb_length(cmnd) : COMMAND_SIZE(cmnd[0]); } static inline unsigned char scsi_command_control(const unsigned char *cmnd) { return (cmnd[0] == VARIABLE_LENGTH_CMD) ? cmnd[1] : cmnd[COMMAND_SIZE(cmnd[0]) - 1]; } /* Returns a human-readable name for the device */ extern const char *scsi_device_type(unsigned type); extern void int_to_scsilun(u64, struct scsi_lun *); extern u64 scsilun_to_int(struct scsi_lun *); /* * This is a slightly modified SCSI sense "descriptor" format header. * The addition is to allow the 0x70 and 0x71 response codes. The idea * is to place the salient data from either "fixed" or "descriptor" sense * format into one structure to ease application processing. * * The original sense buffer should be kept around for those cases * in which more information is required (e.g. the LBA of a MEDIUM ERROR). */ struct scsi_sense_hdr { /* See SPC-3 section 4.5 */ u8 response_code; /* permit: 0x0, 0x70, 0x71, 0x72, 0x73 */ u8 sense_key; u8 asc; u8 ascq; u8 byte4; u8 byte5; u8 byte6; u8 additional_length; /* always 0 for fixed sense format */ }; static inline bool scsi_sense_valid(const struct scsi_sense_hdr *sshdr) { if (!sshdr) return false; return (sshdr->response_code & 0x70) == 0x70; } extern bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len, struct scsi_sense_hdr *sshdr); extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq); int scsi_set_sense_information(u8 *buf, int buf_len, u64 info); int scsi_set_sense_field_pointer(u8 *buf, int buf_len, u16 fp, u8 bp, bool cd); extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, int desc_type); #endif /* _SCSI_COMMON_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 /* SPDX-License-Identifier: GPL-2.0 */ /* File: linux/posix_acl_xattr.h Extended attribute system call representation of Access Control Lists. Copyright (C) 2000 by Andreas Gruenbacher <a.gruenbacher@computer.org> Copyright (C) 2002 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com> */ #ifndef _POSIX_ACL_XATTR_H #define _POSIX_ACL_XATTR_H #include <uapi/linux/xattr.h> #include <uapi/linux/posix_acl_xattr.h> #include <linux/posix_acl.h> static inline size_t posix_acl_xattr_size(int count) { return (sizeof(struct posix_acl_xattr_header) + (count * sizeof(struct posix_acl_xattr_entry))); } static inline int posix_acl_xattr_count(size_t size) { if (size < sizeof(struct posix_acl_xattr_header)) return -1; size -= sizeof(struct posix_acl_xattr_header); if (size % sizeof(struct posix_acl_xattr_entry)) return -1; return size / sizeof(struct posix_acl_xattr_entry); } #ifdef CONFIG_FS_POSIX_ACL void posix_acl_fix_xattr_from_user(void *value, size_t size); void posix_acl_fix_xattr_to_user(void *value, size_t size); #else static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) { } static inline void posix_acl_fix_xattr_to_user(void *value, size_t size) { } #endif struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, size_t size); int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); extern const struct xattr_handler posix_acl_access_xattr_handler; extern const struct xattr_handler posix_acl_default_xattr_handler; #endif /* _POSIX_ACL_XATTR_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_IVERSION_H #define _LINUX_IVERSION_H #include <linux/fs.h> /* * The inode->i_version field: * --------------------------- * The change attribute (i_version) is mandated by NFSv4 and is mostly for * knfsd, but is also used for other purposes (e.g. IMA). The i_version must * appear different to observers if there was a change to the inode's data or * metadata since it was last queried. * * Observers see the i_version as a 64-bit number that never decreases. If it * remains the same since it was last checked, then nothing has changed in the * inode. If it's different then something has changed. Observers cannot infer * anything about the nature or magnitude of the changes from the value, only * that the inode has changed in some fashion. * * Not all filesystems properly implement the i_version counter. Subsystems that * want to use i_version field on an inode should first check whether the * filesystem sets the SB_I_VERSION flag (usually via the IS_I_VERSION macro). * * Those that set SB_I_VERSION will automatically have their i_version counter * incremented on writes to normal files. If the SB_I_VERSION is not set, then * the VFS will not touch it on writes, and the filesystem can use it how it * wishes. Note that the filesystem is always responsible for updating the * i_version on namespace changes in directories (mkdir, rmdir, unlink, etc.). * We consider these sorts of filesystems to have a kernel-managed i_version. * * It may be impractical for filesystems to keep i_version updates atomic with * respect to the changes that cause them. They should, however, guarantee * that i_version updates are never visible before the changes that caused * them. Also, i_version updates should never be delayed longer than it takes * the original change to reach disk. * * This implementation uses the low bit in the i_version field as a flag to * track when the value has been queried. If it has not been queried since it * was last incremented, we can skip the increment in most cases. * * In the event that we're updating the ctime, we will usually go ahead and * bump the i_version anyway. Since that has to go to stable storage in some * fashion, we might as well increment it as well. * * With this implementation, the value should always appear to observers to * increase over time if the file has changed. It's recommended to use * inode_eq_iversion() helper to compare values. * * Note that some filesystems (e.g. NFS and AFS) just use the field to store * a server-provided value (for the most part). For that reason, those * filesystems do not set SB_I_VERSION. These filesystems are considered to * have a self-managed i_version. * * Persistently storing the i_version * ---------------------------------- * Queries of the i_version field are not gated on them hitting the backing * store. It's always possible that the host could crash after allowing * a query of the value but before it has made it to disk. * * To mitigate this problem, filesystems should always use * inode_set_iversion_queried when loading an existing inode from disk. This * ensures that the next attempted inode increment will result in the value * changing. * * Storing the value to disk therefore does not count as a query, so those * filesystems should use inode_peek_iversion to grab the value to be stored. * There is no need to flag the value as having been queried in that case. */ /* * We borrow the lowest bit in the i_version to use as a flag to tell whether * it has been queried since we last incremented it. If it has, then we must * increment it on the next change. After that, we can clear the flag and * avoid incrementing it again until it has again been queried. */ #define I_VERSION_QUERIED_SHIFT (1) #define I_VERSION_QUERIED (1ULL << (I_VERSION_QUERIED_SHIFT - 1)) #define I_VERSION_INCREMENT (1ULL << I_VERSION_QUERIED_SHIFT) /** * inode_set_iversion_raw - set i_version to the specified raw value * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val. This function is for use by * filesystems that self-manage the i_version. * * For example, the NFS client stores its NFSv4 change attribute in this way, * and the AFS client stores the data_version from the server here. */ static inline void inode_set_iversion_raw(struct inode *inode, u64 val) { atomic64_set(&inode->i_version, val); } /** * inode_peek_iversion_raw - grab a "raw" iversion value * @inode: inode from which i_version should be read * * Grab a "raw" inode->i_version value and return it. The i_version is not * flagged or converted in any way. This is mostly used to access a self-managed * i_version. * * With those filesystems, we want to treat the i_version as an entirely * opaque value. */ static inline u64 inode_peek_iversion_raw(const struct inode *inode) { return atomic64_read(&inode->i_version); } /** * inode_set_max_iversion_raw - update i_version new value is larger * @inode: inode to set * @val: new i_version to set * * Some self-managed filesystems (e.g Ceph) will only update the i_version * value if the new value is larger than the one we already have. */ static inline void inode_set_max_iversion_raw(struct inode *inode, u64 val) { u64 cur, old; cur = inode_peek_iversion_raw(inode); for (;;) { if (cur > val) break; old = atomic64_cmpxchg(&inode->i_version, cur, val); if (likely(old == cur)) break; cur = old; } } /** * inode_set_iversion - set i_version to a particular value * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val. This function is for filesystems with * a kernel-managed i_version, for initializing a newly-created inode from * scratch. * * In this case, we do not set the QUERIED flag since we know that this value * has never been queried. */ static inline void inode_set_iversion(struct inode *inode, u64 val) { inode_set_iversion_raw(inode, val << I_VERSION_QUERIED_SHIFT); } /** * inode_set_iversion_queried - set i_version to a particular value as quereied * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val, and flag it for increment on the next * change. * * Filesystems that persistently store the i_version on disk should use this * when loading an existing inode from disk. * * When loading in an i_version value from a backing store, we can't be certain * that it wasn't previously viewed before being stored. Thus, we must assume * that it was, to ensure that we don't end up handing out the same value for * different versions of the same inode. */ static inline void inode_set_iversion_queried(struct inode *inode, u64 val) { inode_set_iversion_raw(inode, (val << I_VERSION_QUERIED_SHIFT) | I_VERSION_QUERIED); } /** * inode_maybe_inc_iversion - increments i_version * @inode: inode with the i_version that should be updated * @force: increment the counter even if it's not necessary? * * Every time the inode is modified, the i_version field must be seen to have * changed by any observer. * * If "force" is set or the QUERIED flag is set, then ensure that we increment * the value, and clear the queried flag. * * In the common case where neither is set, then we can return "false" without * updating i_version. * * If this function returns false, and no other metadata has changed, then we * can avoid logging the metadata. */ static inline bool inode_maybe_inc_iversion(struct inode *inode, bool force) { u64 cur, old, new; /* * The i_version field is not strictly ordered with any other inode * information, but the legacy inode_inc_iversion code used a spinlock * to serialize increments. * * Here, we add full memory barriers to ensure that any de-facto * ordering with other info is preserved. * * This barrier pairs with the barrier in inode_query_iversion() */ smp_mb(); cur = inode_peek_iversion_raw(inode); for (;;) { /* If flag is clear then we needn't do anything */ if (!force && !(cur & I_VERSION_QUERIED)) return false; /* Since lowest bit is flag, add 2 to avoid it */ new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; old = atomic64_cmpxchg(&inode->i_version, cur, new); if (likely(old == cur)) break; cur = old; } return true; } /** * inode_inc_iversion - forcibly increment i_version * @inode: inode that needs to be updated * * Forcbily increment the i_version field. This always results in a change to * the observable value. */ static inline void inode_inc_iversion(struct inode *inode) { inode_maybe_inc_iversion(inode, true); } /** * inode_iversion_need_inc - is the i_version in need of being incremented? * @inode: inode to check * * Returns whether the inode->i_version counter needs incrementing on the next * change. Just fetch the value and check the QUERIED flag. */ static inline bool inode_iversion_need_inc(struct inode *inode) { return inode_peek_iversion_raw(inode) & I_VERSION_QUERIED; } /** * inode_inc_iversion_raw - forcibly increment raw i_version * @inode: inode that needs to be updated * * Forcbily increment the raw i_version field. This always results in a change * to the raw value. * * NFS will use the i_version field to store the value from the server. It * mostly treats it as opaque, but in the case where it holds a write * delegation, it must increment the value itself. This function does that. */ static inline void inode_inc_iversion_raw(struct inode *inode) { atomic64_inc(&inode->i_version); } /** * inode_peek_iversion - read i_version without flagging it to be incremented * @inode: inode from which i_version should be read * * Read the inode i_version counter for an inode without registering it as a * query. * * This is typically used by local filesystems that need to store an i_version * on disk. In that situation, it's not necessary to flag it as having been * viewed, as the result won't be used to gauge changes from that point. */ static inline u64 inode_peek_iversion(const struct inode *inode) { return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT; } /** * inode_query_iversion - read i_version for later use * @inode: inode from which i_version should be read * * Read the inode i_version counter. This should be used by callers that wish * to store the returned i_version for later comparison. This will guarantee * that a later query of the i_version will result in a different value if * anything has changed. * * In this implementation, we fetch the current value, set the QUERIED flag and * then try to swap it into place with a cmpxchg, if it wasn't already set. If * that fails, we try again with the newly fetched value from the cmpxchg. */ static inline u64 inode_query_iversion(struct inode *inode) { u64 cur, old, new; cur = inode_peek_iversion_raw(inode); for (;;) { /* If flag is already set, then no need to swap */ if (cur & I_VERSION_QUERIED) { /* * This barrier (and the implicit barrier in the * cmpxchg below) pairs with the barrier in * inode_maybe_inc_iversion(). */ smp_mb(); break; } new = cur | I_VERSION_QUERIED; old = atomic64_cmpxchg(&inode->i_version, cur, new); if (likely(old == cur)) break; cur = old; } return cur >> I_VERSION_QUERIED_SHIFT; } /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check * @old: old value to check against its i_version * * Compare the current raw i_version counter with a previous one. Returns true * if they are the same or false if they are different. */ static inline bool inode_eq_iversion_raw(const struct inode *inode, u64 old) { return inode_peek_iversion_raw(inode) == old; } /** * inode_eq_iversion - check whether the i_version counter has changed * @inode: inode to check * @old: old value to check against its i_version * * Compare an i_version counter with a previous one. Returns true if they are * the same, and false if they are different. * * Note that we don't need to set the QUERIED flag in this case, as the value * in the inode is not being recorded for later use. */ static inline bool inode_eq_iversion(const struct inode *inode, u64 old) { return inode_peek_iversion(inode) == old; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 /* * include/linux/ktime.h * * ktime_t - nanosecond-resolution time format. * * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar * * data type definitions, declarations, prototypes and macros. * * Started by: Thomas Gleixner and Ingo Molnar * * Credits: * * Roman Zippel provided the ideas and primary code snippets of * the ktime_t union and further simplifications of the original * code. * * For licencing details see kernel-base/COPYING */ #ifndef _LINUX_KTIME_H #define _LINUX_KTIME_H #include <linux/time.h> #include <linux/jiffies.h> #include <asm/bug.h> /* Nanosecond scalar representation for kernel time values */ typedef s64 ktime_t; /** * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value * @secs: seconds to set * @nsecs: nanoseconds to set * * Return: The ktime_t representation of the value. */ static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs) { if (unlikely(secs >= KTIME_SEC_MAX)) return KTIME_MAX; return secs * NSEC_PER_SEC + (s64)nsecs; } /* Subtract two ktime_t variables. rem = lhs -rhs: */ #define ktime_sub(lhs, rhs) ((lhs) - (rhs)) /* Add two ktime_t variables. res = lhs + rhs: */ #define ktime_add(lhs, rhs) ((lhs) + (rhs)) /* * Same as ktime_add(), but avoids undefined behaviour on overflow; however, * this means that you must check the result for overflow yourself. */ #define ktime_add_unsafe(lhs, rhs) ((u64) (lhs) + (rhs)) /* * Add a ktime_t variable and a scalar nanosecond value. * res = kt + nsval: */ #define ktime_add_ns(kt, nsval) ((kt) + (nsval)) /* * Subtract a scalar nanosecod from a ktime_t variable * res = kt - nsval: */ #define ktime_sub_ns(kt, nsval) ((kt) - (nsval)) /* convert a timespec64 to ktime_t format: */ static inline ktime_t timespec64_to_ktime(struct timespec64 ts) { return ktime_set(ts.tv_sec, ts.tv_nsec); } /* Map the ktime_t to timespec conversion to ns_to_timespec function */ #define ktime_to_timespec64(kt) ns_to_timespec64((kt)) /* Convert ktime_t to nanoseconds */ static inline s64 ktime_to_ns(const ktime_t kt) { return kt; } /** * ktime_compare - Compares two ktime_t variables for less, greater or equal * @cmp1: comparable1 * @cmp2: comparable2 * * Return: ... * cmp1 < cmp2: return <0 * cmp1 == cmp2: return 0 * cmp1 > cmp2: return >0 */ static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2) { if (cmp1 < cmp2) return -1; if (cmp1 > cmp2) return 1; return 0; } /** * ktime_after - Compare if a ktime_t value is bigger than another one. * @cmp1: comparable1 * @cmp2: comparable2 * * Return: true if cmp1 happened after cmp2. */ static inline bool ktime_after(const ktime_t cmp1, const ktime_t cmp2) { return ktime_compare(cmp1, cmp2) > 0; } /** * ktime_before - Compare if a ktime_t value is smaller than another one. * @cmp1: comparable1 * @cmp2: comparable2 * * Return: true if cmp1 happened before cmp2. */ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) { return ktime_compare(cmp1, cmp2) < 0; } #if BITS_PER_LONG < 64 extern s64 __ktime_divns(const ktime_t kt, s64 div); static inline s64 ktime_divns(const ktime_t kt, s64 div) { /* * Negative divisors could cause an inf loop, * so bug out here. */ BUG_ON(div < 0); if (__builtin_constant_p(div) && !(div >> 32)) { s64 ns = kt; u64 tmp = ns < 0 ? -ns : ns; do_div(tmp, div); return ns < 0 ? -tmp : tmp; } else { return __ktime_divns(kt, div); } } #else /* BITS_PER_LONG < 64 */ static inline s64 ktime_divns(const ktime_t kt, s64 div) { /* * 32-bit implementation cannot handle negative divisors, * so catch them on 64bit as well. */ WARN_ON(div < 0); return kt / div; } #endif static inline s64 ktime_to_us(const ktime_t kt) { return ktime_divns(kt, NSEC_PER_USEC); } static inline s64 ktime_to_ms(const ktime_t kt) { return ktime_divns(kt, NSEC_PER_MSEC); } static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) { return ktime_to_us(ktime_sub(later, earlier)); } static inline s64 ktime_ms_delta(const ktime_t later, const ktime_t earlier) { return ktime_to_ms(ktime_sub(later, earlier)); } static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) { return ktime_add_ns(kt, usec * NSEC_PER_USEC); } static inline ktime_t ktime_add_ms(const ktime_t kt, const u64 msec) { return ktime_add_ns(kt, msec * NSEC_PER_MSEC); } static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) { return ktime_sub_ns(kt, usec * NSEC_PER_USEC); } static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) { return ktime_sub_ns(kt, msec * NSEC_PER_MSEC); } extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); /** * ktime_to_timespec64_cond - convert a ktime_t variable to timespec64 * format only if the variable contains data * @kt: the ktime_t variable to convert * @ts: the timespec variable to store the result in * * Return: %true if there was a successful conversion, %false if kt was 0. */ static inline __must_check bool ktime_to_timespec64_cond(const ktime_t kt, struct timespec64 *ts) { if (kt) { *ts = ktime_to_timespec64(kt); return true; } else { return false; } } #include <vdso/ktime.h> static inline ktime_t ns_to_ktime(u64 ns) { return ns; } static inline ktime_t ms_to_ktime(u64 ms) { return ms * NSEC_PER_MSEC; } # include <linux/timekeeping.h> # include <linux/timekeeping32.h> #endif
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_ERR_H #define _LINUX_ERR_H #include <linux/compiler.h> #include <linux/types.h> #include <asm/errno.h> /* * Kernel pointers have redundant information, so we can use a * scheme where we can return either an error code or a normal * pointer with the same return value. * * This should be a per-architecture thing, to allow different * error and pointer decisions. */ #define MAX_ERRNO 4095 #ifndef __ASSEMBLY__ #define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) static inline void * __must_check ERR_PTR(long error) { return (void *) error; } static inline long __must_check PTR_ERR(__force const void *ptr) { return (long) ptr; } static inline bool __must_check IS_ERR(__force const void *ptr) { return IS_ERR_VALUE((unsigned long)ptr); } static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr) { return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr); } /** * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type * @ptr: The pointer to cast. * * Explicitly cast an error-valued pointer to another pointer type in such a * way as to make it clear that's what's going on. */ static inline void * __must_check ERR_CAST(__force const void *ptr) { /* cast away the const */ return (void *) ptr; } static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) { if (IS_ERR(ptr)) return PTR_ERR(ptr); else return 0; } #endif #endif /* _LINUX_ERR_H */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 16