1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/drivers/char/serial_core.h * * Copyright (C) 2000 Deep Blue Solutions Ltd. */ #ifndef LINUX_SERIAL_CORE_H #define LINUX_SERIAL_CORE_H #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/console.h> #include <linux/interrupt.h> #include <linux/circ_buf.h> #include <linux/spinlock.h> #include <linux/sched.h> #include <linux/tty.h> #include <linux/mutex.h> #include <linux/sysrq.h> #include <uapi/linux/serial_core.h> #ifdef CONFIG_SERIAL_CORE_CONSOLE #define uart_console(port) \ ((port)->cons && (port)->cons->index == (port)->line) #else #define uart_console(port) ({ (void)port; 0; }) #endif struct uart_port; struct serial_struct; struct device; struct gpio_desc; /* * This structure describes all the operations that can be done on the * physical hardware. See Documentation/driver-api/serial/driver.rst for details. */ struct uart_ops { unsigned int (*tx_empty)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int mctrl); unsigned int (*get_mctrl)(struct uart_port *); void (*stop_tx)(struct uart_port *); void (*start_tx)(struct uart_port *); void (*throttle)(struct uart_port *); void (*unthrottle)(struct uart_port *); void (*send_xchar)(struct uart_port *, char ch); void (*stop_rx)(struct uart_port *); void (*enable_ms)(struct uart_port *); void (*break_ctl)(struct uart_port *, int ctl); int (*startup)(struct uart_port *); void (*shutdown)(struct uart_port *); void (*flush_buffer)(struct uart_port *); void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); void (*pm)(struct uart_port *, unsigned int state, unsigned int oldstate); /* * Return a string describing the type of the port */ const char *(*type)(struct uart_port *); /* * Release IO and memory resources used by the port. * This includes iounmap if necessary. */ void (*release_port)(struct uart_port *); /* * Request IO and memory resources used by the port. * This includes iomapping the port if necessary. */ int (*request_port)(struct uart_port *); void (*config_port)(struct uart_port *, int); int (*verify_port)(struct uart_port *, struct serial_struct *); int (*ioctl)(struct uart_port *, unsigned int, unsigned long); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct uart_port *); void (*poll_put_char)(struct uart_port *, unsigned char); int (*poll_get_char)(struct uart_port *); #endif }; #define NO_POLL_CHAR 0x00ff0000 #define UART_CONFIG_TYPE (1 << 0) #define UART_CONFIG_IRQ (1 << 1) struct uart_icount { __u32 cts; __u32 dsr; __u32 rng; __u32 dcd; __u32 rx; __u32 tx; __u32 frame; __u32 overrun; __u32 parity; __u32 brk; __u32 buf_overrun; }; typedef unsigned int __bitwise upf_t; typedef unsigned int __bitwise upstat_t; struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ unsigned char __iomem *membase; /* read/write[bwl] */ unsigned int (*serial_in)(struct uart_port *, int); void (*serial_out)(struct uart_port *, int, int); void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); unsigned int (*get_mctrl)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int); unsigned int (*get_divisor)(struct uart_port *, unsigned int baud, unsigned int *frac); void (*set_divisor)(struct uart_port *, unsigned int baud, unsigned int quot, unsigned int quot_frac); int (*startup)(struct uart_port *port); void (*shutdown)(struct uart_port *port); void (*throttle)(struct uart_port *port); void (*unthrottle)(struct uart_port *port); int (*handle_irq)(struct uart_port *); void (*pm)(struct uart_port *, unsigned int state, unsigned int old); void (*handle_break)(struct uart_port *); int (*rs485_config)(struct uart_port *, struct serial_rs485 *rs485); int (*iso7816_config)(struct uart_port *, struct serial_iso7816 *iso7816); unsigned int irq; /* irq number */ unsigned long irqflags; /* irq flags */ unsigned int uartclk; /* base uart clock */ unsigned int fifosize; /* tx fifo size */ unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ unsigned char iotype; /* io access style */ unsigned char quirks; /* internal quirks */ #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ #define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ #define UPIO_MEM32 (SERIAL_IO_MEM32) /* 32b little endian */ #define UPIO_AU (SERIAL_IO_AU) /* Au1x00 and RT288x type IO */ #define UPIO_TSI (SERIAL_IO_TSI) /* Tsi108/109 type IO */ #define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ #define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ /* quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ struct uart_state *state; /* pointer to parent state */ struct uart_icount icount; /* statistics */ struct console *cons; /* struct console, if any */ /* flags must be updated while holding port mutex */ upf_t flags; /* * These flags must be equivalent to the flags defined in * include/uapi/linux/tty_flags.h which are the userspace definitions * assigned from the serial_struct flags in uart_set_info() * [for bit definitions in the UPF_CHANGE_MASK] * * Bits [0..UPF_LAST_USER] are userspace defined/visible/changeable * The remaining bits are serial-core specific and not modifiable by * userspace. */ #define UPF_FOURPORT ((__force upf_t) ASYNC_FOURPORT /* 1 */ ) #define UPF_SAK ((__force upf_t) ASYNC_SAK /* 2 */ ) #define UPF_SPD_HI ((__force upf_t) ASYNC_SPD_HI /* 4 */ ) #define UPF_SPD_VHI ((__force upf_t) ASYNC_SPD_VHI /* 5 */ ) #define UPF_SPD_CUST ((__force upf_t) ASYNC_SPD_CUST /* 0x0030 */ ) #define UPF_SPD_WARP ((__force upf_t) ASYNC_SPD_WARP /* 0x1010 */ ) #define UPF_SPD_MASK ((__force upf_t) ASYNC_SPD_MASK /* 0x1030 */ ) #define UPF_SKIP_TEST ((__force upf_t) ASYNC_SKIP_TEST /* 6 */ ) #define UPF_AUTO_IRQ ((__force upf_t) ASYNC_AUTO_IRQ /* 7 */ ) #define UPF_HARDPPS_CD ((__force upf_t) ASYNC_HARDPPS_CD /* 11 */ ) #define UPF_SPD_SHI ((__force upf_t) ASYNC_SPD_SHI /* 12 */ ) #define UPF_LOW_LATENCY ((__force upf_t) ASYNC_LOW_LATENCY /* 13 */ ) #define UPF_BUGGY_UART ((__force upf_t) ASYNC_BUGGY_UART /* 14 */ ) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ ) #define UPF_NO_THRE_TEST ((__force upf_t) (1 << 19)) /* Port has hardware-assisted h/w flow control */ #define UPF_AUTO_CTS ((__force upf_t) (1 << 20)) #define UPF_AUTO_RTS ((__force upf_t) (1 << 21)) #define UPF_HARD_FLOW ((__force upf_t) (UPF_AUTO_CTS | UPF_AUTO_RTS)) /* Port has hardware-assisted s/w flow control */ #define UPF_SOFT_FLOW ((__force upf_t) (1 << 22)) #define UPF_CONS_FLOW ((__force upf_t) (1 << 23)) #define UPF_SHARE_IRQ ((__force upf_t) (1 << 24)) #define UPF_EXAR_EFR ((__force upf_t) (1 << 25)) #define UPF_BUG_THRE ((__force upf_t) (1 << 26)) /* The exact UART type is known and should not be probed. */ #define UPF_FIXED_TYPE ((__force upf_t) (1 << 27)) #define UPF_BOOT_AUTOCONF ((__force upf_t) (1 << 28)) #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) #define UPF_DEAD ((__force upf_t) (1 << 30)) #define UPF_IOREMAP ((__force upf_t) (1 << 31)) #define __UPF_CHANGE_MASK 0x17fff #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) #define UPF_USR_MASK ((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY)) #if __UPF_CHANGE_MASK > ASYNC_FLAGS #error Change mask not equivalent to userspace-visible bit defines #endif /* * Must hold termios_rwsem, port mutex and port lock to change; * can hold any one lock to read. */ upstat_t status; #define UPSTAT_CTS_ENABLE ((__force upstat_t) (1 << 0)) #define UPSTAT_DCD_ENABLE ((__force upstat_t) (1 << 1)) #define UPSTAT_AUTORTS ((__force upstat_t) (1 << 2)) #define UPSTAT_AUTOCTS ((__force upstat_t) (1 << 3)) #define UPSTAT_AUTOXOFF ((__force upstat_t) (1 << 4)) #define UPSTAT_SYNC_FIFO ((__force upstat_t) (1 << 5)) int hw_stopped; /* sw-assisted CTS flow state */ unsigned int mctrl; /* current modem ctrl settings */ unsigned int timeout; /* character-based timeout */ unsigned int type; /* port type */ const struct uart_ops *ops; unsigned int custom_divisor; unsigned int line; /* port index */ unsigned int minor; resource_size_t mapbase; /* for ioremap */ resource_size_t mapsize; struct device *dev; /* parent device */ unsigned long sysrq; /* sysrq timeout */ unsigned int sysrq_ch; /* char for sysrq */ unsigned char has_sysrq; unsigned char sysrq_seq; /* index in sysrq_toggle_seq */ unsigned char hub6; /* this should be in the 8250 driver */ unsigned char suspended; unsigned char console_reinit; const char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ struct serial_iso7816 iso7816; void *private_data; /* generic platform data pointer */ }; static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); } static inline void serial_port_out(struct uart_port *up, int offset, int value) { up->serial_out(up, offset, value); } /** * enum uart_pm_state - power states for UARTs * @UART_PM_STATE_ON: UART is powered, up and operational * @UART_PM_STATE_OFF: UART is powered off * @UART_PM_STATE_UNDEFINED: sentinel */ enum uart_pm_state { UART_PM_STATE_ON = 0, UART_PM_STATE_OFF = 3, /* number taken from ACPI */ UART_PM_STATE_UNDEFINED, }; /* * This is the state information which is persistent across opens. */ struct uart_state { struct tty_port port; enum uart_pm_state pm_state; struct circ_buf xmit; atomic_t refcount; wait_queue_head_t remove_wait; struct uart_port *uart_port; }; #define UART_XMIT_SIZE PAGE_SIZE /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 struct module; struct tty_driver; struct uart_driver { struct module *owner; const char *driver_name; const char *dev_name; int major; int minor; int nr; struct console *cons; /* * these are private; the low level driver should not * touch these; they should be initialised to NULL */ struct uart_state *state; struct tty_driver *tty_driver; }; void uart_write_wakeup(struct uart_port *port); /* * Baud rate helpers. */ void uart_update_timeout(struct uart_port *port, unsigned int cflag, unsigned int baud); unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, struct ktermios *old, unsigned int min, unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); /* Base timer interval for polling */ static inline int uart_poll_timeout(struct uart_port *port) { int timeout = port->timeout; return timeout > 6 ? (timeout / 2 - 2) : 1; } /* * Console helpers. */ struct earlycon_device { struct console *con; struct uart_port port; char options[16]; /* e.g., 115200n8 */ unsigned int baud; }; struct earlycon_id { char name[15]; char name_term; /* In case compiler didn't '\0' term name */ char compatible[128]; int (*setup)(struct earlycon_device *, const char *options); }; extern const struct earlycon_id *__earlycon_table[]; extern const struct earlycon_id *__earlycon_table_end[]; #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE) #define EARLYCON_USED_OR_UNUSED __used #else #define EARLYCON_USED_OR_UNUSED __maybe_unused #endif #define _OF_EARLYCON_DECLARE(_name, compat, fn, unique_id) \ static const struct earlycon_id unique_id \ EARLYCON_USED_OR_UNUSED __initconst \ = { .name = __stringify(_name), \ .compatible = compat, \ .setup = fn }; \ static const struct earlycon_id EARLYCON_USED_OR_UNUSED \ __section("__earlycon_table") \ * const __PASTE(__p, unique_id) = &unique_id #define OF_EARLYCON_DECLARE(_name, compat, fn) \ _OF_EARLYCON_DECLARE(_name, compat, fn, \ __UNIQUE_ID(__earlycon_##_name)) #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) extern int of_setup_earlycon(const struct earlycon_id *match, unsigned long node, const char *options); #ifdef CONFIG_SERIAL_EARLYCON extern bool earlycon_acpi_spcr_enable __initdata; int setup_earlycon(char *buf); #else static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, char **options); void uart_parse_options(const char *options, int *baud, int *parity, int *bits, int *flow); int uart_set_options(struct uart_port *port, struct console *co, int baud, int parity, int bits, int flow); struct tty_driver *uart_console_device(struct console *co, int *index); void uart_console_write(struct uart_port *port, const char *s, unsigned int count, void (*putchar)(struct uart_port *, int)); /* * Port/driver registration/removal */ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); int uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); int uart_match_port(struct uart_port *port1, struct uart_port *port2); /* * Power Management */ int uart_suspend_port(struct uart_driver *reg, struct uart_port *port); int uart_resume_port(struct uart_driver *reg, struct uart_port *port); #define uart_circ_empty(circ) ((circ)->head == (circ)->tail) #define uart_circ_clear(circ) ((circ)->head = (circ)->tail = 0) #define uart_circ_chars_pending(circ) \ (CIRC_CNT((circ)->head, (circ)->tail, UART_XMIT_SIZE)) #define uart_circ_chars_free(circ) \ (CIRC_SPACE((circ)->head, (circ)->tail, UART_XMIT_SIZE)) static inline int uart_tx_stopped(struct uart_port *port) { struct tty_struct *tty = port->state->port.tty; if ((tty && tty->stopped) || port->hw_stopped) return 1; return 0; } static inline bool uart_cts_enabled(struct uart_port *uport) { return !!(uport->status & UPSTAT_CTS_ENABLE); } static inline bool uart_softcts_mode(struct uart_port *uport) { upstat_t mask = UPSTAT_CTS_ENABLE | UPSTAT_AUTOCTS; return ((uport->status & mask) == UPSTAT_CTS_ENABLE); } /* * The following are helper functions for the low level drivers. */ extern void uart_handle_dcd_change(struct uart_port *uport, unsigned int status); extern void uart_handle_cts_change(struct uart_port *uport, unsigned int status); extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch); static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { handle_sysrq(ch); port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { port->sysrq_ch = ch; port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) { int sysrq_ch; if (!port->has_sysrq) { spin_unlock_irqrestore(&port->lock, irqflags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; spin_unlock_irqrestore(&port->lock, irqflags); if (sysrq_ch) handle_sysrq(sysrq_ch); } #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) { spin_unlock_irqrestore(&port->lock, irqflags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ /* * We do the SysRQ and SAK checking like this... */ static inline int uart_handle_break(struct uart_port *port) { struct uart_state *state = port->state; if (port->handle_break) port->handle_break(port); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL if (port->has_sysrq && uart_console(port)) { if (!port->sysrq) { port->sysrq = jiffies + SYSRQ_TIMEOUT; return 1; } port->sysrq = 0; } #endif if (port->flags & UPF_SAK) do_SAK(state->port.tty); return 0; } /* * UART_ENABLE_MS - determine if port should enable modem status irqs */ #define UART_ENABLE_MS(port,cflag) ((port)->flags & UPF_HARDPPS_CD || \ (cflag) & CRTSCTS || \ !((cflag) & CLOCAL)) int uart_get_rs485_mode(struct uart_port *port); #endif /* LINUX_SERIAL_CORE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 /* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _LINUX_XARRAY_H #define _LINUX_XARRAY_H /* * eXtensible Arrays * Copyright (c) 2017 Microsoft Corporation * Author: Matthew Wilcox <willy@infradead.org> * * See Documentation/core-api/xarray.rst for how to use the XArray. */ #include <linux/bug.h> #include <linux/compiler.h> #include <linux/gfp.h> #include <linux/kconfig.h> #include <linux/kernel.h> #include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/types.h> /* * The bottom two bits of the entry determine how the XArray interprets * the contents: * * 00: Pointer entry * 10: Internal entry * x1: Value entry or tagged pointer * * Attempting to store internal entries in the XArray is a bug. * * Most internal entries are pointers to the next node in the tree. * The following internal entries have a special meaning: * * 0-62: Sibling entries * 256: Retry entry * 257: Zero entry * * Errors are also represented as internal entries, but use the negative * space (-4094 to -2). They're never stored in the slots array; only * returned by the normal API. */ #define BITS_PER_XA_VALUE (BITS_PER_LONG - 1) /** * xa_mk_value() - Create an XArray entry from an integer. * @v: Value to store in XArray. * * Context: Any context. * Return: An entry suitable for storing in the XArray. */ static inline void *xa_mk_value(unsigned long v) { WARN_ON((long)v < 0); return (void *)((v << 1) | 1); } /** * xa_to_value() - Get value stored in an XArray entry. * @entry: XArray entry. * * Context: Any context. * Return: The value stored in the XArray entry. */ static inline unsigned long xa_to_value(const void *entry) { return (unsigned long)entry >> 1; } /** * xa_is_value() - Determine if an entry is a value. * @entry: XArray entry. * * Context: Any context. * Return: True if the entry is a value, false if it is a pointer. */ static inline bool xa_is_value(const void *entry) { return (unsigned long)entry & 1; } /** * xa_tag_pointer() - Create an XArray entry for a tagged pointer. * @p: Plain pointer. * @tag: Tag value (0, 1 or 3). * * If the user of the XArray prefers, they can tag their pointers instead * of storing value entries. Three tags are available (0, 1 and 3). * These are distinct from the xa_mark_t as they are not replicated up * through the array and cannot be searched for. * * Context: Any context. * Return: An XArray entry. */ static inline void *xa_tag_pointer(void *p, unsigned long tag) { return (void *)((unsigned long)p | tag); } /** * xa_untag_pointer() - Turn an XArray entry into a plain pointer. * @entry: XArray entry. * * If you have stored a tagged pointer in the XArray, call this function * to get the untagged version of the pointer. * * Context: Any context. * Return: A pointer. */ static inline void *xa_untag_pointer(void *entry) { return (void *)((unsigned long)entry & ~3UL); } /** * xa_pointer_tag() - Get the tag stored in an XArray entry. * @entry: XArray entry. * * If you have stored a tagged pointer in the XArray, call this function * to get the tag of that pointer. * * Context: Any context. * Return: A tag. */ static inline unsigned int xa_pointer_tag(void *entry) { return (unsigned long)entry & 3UL; } /* * xa_mk_internal() - Create an internal entry. * @v: Value to turn into an internal entry. * * Internal entries are used for a number of purposes. Entries 0-255 are * used for sibling entries (only 0-62 are used by the current code). 256 * is used for the retry entry. 257 is used for the reserved / zero entry. * Negative internal entries are used to represent errnos. Node pointers * are also tagged as internal entries in some situations. * * Context: Any context. * Return: An XArray internal entry corresponding to this value. */ static inline void *xa_mk_internal(unsigned long v) { return (void *)((v << 2) | 2); } /* * xa_to_internal() - Extract the value from an internal entry. * @entry: XArray entry. * * Context: Any context. * Return: The value which was stored in the internal entry. */ static inline unsigned long xa_to_internal(const void *entry) { return (unsigned long)entry >> 2; } /* * xa_is_internal() - Is the entry an internal entry? * @entry: XArray entry. * * Context: Any context. * Return: %true if the entry is an internal entry. */ static inline bool xa_is_internal(const void *entry) { return ((unsigned long)entry & 3) == 2; } #define XA_ZERO_ENTRY xa_mk_internal(257) /** * xa_is_zero() - Is the entry a zero entry? * @entry: Entry retrieved from the XArray * * The normal API will return NULL as the contents of a slot containing * a zero entry. You can only see zero entries by using the advanced API. * * Return: %true if the entry is a zero entry. */ static inline bool xa_is_zero(const void *entry) { return unlikely(entry == XA_ZERO_ENTRY); } /** * xa_is_err() - Report whether an XArray operation returned an error * @entry: Result from calling an XArray function * * If an XArray operation cannot complete an operation, it will return * a special value indicating an error. This function tells you * whether an error occurred; xa_err() tells you which error occurred. * * Context: Any context. * Return: %true if the entry indicates an error. */ static inline bool xa_is_err(const void *entry) { return unlikely(xa_is_internal(entry) && entry >= xa_mk_internal(-MAX_ERRNO)); } /** * xa_err() - Turn an XArray result into an errno. * @entry: Result from calling an XArray function. * * If an XArray operation cannot complete an operation, it will return * a special pointer value which encodes an errno. This function extracts * the errno from the pointer value, or returns 0 if the pointer does not * represent an errno. * * Context: Any context. * Return: A negative errno or 0. */ static inline int xa_err(void *entry) { /* xa_to_internal() would not do sign extension. */ if (xa_is_err(entry)) return (long)entry >> 2; return 0; } /** * struct xa_limit - Represents a range of IDs. * @min: The lowest ID to allocate (inclusive). * @max: The maximum ID to allocate (inclusive). * * This structure is used either directly or via the XA_LIMIT() macro * to communicate the range of IDs that are valid for allocation. * Two common ranges are predefined for you: * * xa_limit_32b - [0 - UINT_MAX] * * xa_limit_31b - [0 - INT_MAX] */ struct xa_limit { u32 max; u32 min; }; #define XA_LIMIT(_min, _max) (struct xa_limit) { .min = _min, .max = _max } #define xa_limit_32b XA_LIMIT(0, UINT_MAX) #define xa_limit_31b XA_LIMIT(0, INT_MAX) typedef unsigned __bitwise xa_mark_t; #define XA_MARK_0 ((__force xa_mark_t)0U) #define XA_MARK_1 ((__force xa_mark_t)1U) #define XA_MARK_2 ((__force xa_mark_t)2U) #define XA_PRESENT ((__force xa_mark_t)8U) #define XA_MARK_MAX XA_MARK_2 #define XA_FREE_MARK XA_MARK_0 enum xa_lock_type { XA_LOCK_IRQ = 1, XA_LOCK_BH = 2, }; /* * Values for xa_flags. The radix tree stores its GFP flags in the xa_flags, * and we remain compatible with that. */ #define XA_FLAGS_LOCK_IRQ ((__force gfp_t)XA_LOCK_IRQ) #define XA_FLAGS_LOCK_BH ((__force gfp_t)XA_LOCK_BH) #define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) #define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) #define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U) #define XA_FLAGS_ACCOUNT ((__force gfp_t)32U) #define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ (__force unsigned)(mark))) /* ALLOC is for a normal 0-based alloc. ALLOC1 is for an 1-based alloc */ #define XA_FLAGS_ALLOC (XA_FLAGS_TRACK_FREE | XA_FLAGS_MARK(XA_FREE_MARK)) #define XA_FLAGS_ALLOC1 (XA_FLAGS_TRACK_FREE | XA_FLAGS_ZERO_BUSY) /** * struct xarray - The anchor of the XArray. * @xa_lock: Lock that protects the contents of the XArray. * * To use the xarray, define it statically or embed it in your data structure. * It is a very small data structure, so it does not usually make sense to * allocate it separately and keep a pointer to it in your data structure. * * You may use the xa_lock to protect your own data structures as well. */ /* * If all of the entries in the array are NULL, @xa_head is a NULL pointer. * If the only non-NULL entry in the array is at index 0, @xa_head is that * entry. If any other entry in the array is non-NULL, @xa_head points * to an @xa_node. */ struct xarray { spinlock_t xa_lock; /* private: The rest of the data structure is not to be used directly. */ gfp_t xa_flags; void __rcu * xa_head; }; #define XARRAY_INIT(name, flags) { \ .xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock), \ .xa_flags = flags, \ .xa_head = NULL, \ } /** * DEFINE_XARRAY_FLAGS() - Define an XArray with custom flags. * @name: A string that names your XArray. * @flags: XA_FLAG values. * * This is intended for file scope definitions of XArrays. It declares * and initialises an empty XArray with the chosen name and flags. It is * equivalent to calling xa_init_flags() on the array, but it does the * initialisation at compiletime instead of runtime. */ #define DEFINE_XARRAY_FLAGS(name, flags) \ struct xarray name = XARRAY_INIT(name, flags) /** * DEFINE_XARRAY() - Define an XArray. * @name: A string that names your XArray. * * This is intended for file scope definitions of XArrays. It declares * and initialises an empty XArray with the chosen name. It is equivalent * to calling xa_init() on the array, but it does the initialisation at * compiletime instead of runtime. */ #define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0) /** * DEFINE_XARRAY_ALLOC() - Define an XArray which allocates IDs starting at 0. * @name: A string that names your XArray. * * This is intended for file scope definitions of allocating XArrays. * See also DEFINE_XARRAY(). */ #define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC) /** * DEFINE_XARRAY_ALLOC1() - Define an XArray which allocates IDs starting at 1. * @name: A string that names your XArray. * * This is intended for file scope definitions of allocating XArrays. * See also DEFINE_XARRAY(). */ #define DEFINE_XARRAY_ALLOC1(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC1) void *xa_load(struct xarray *, unsigned long index); void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *xa_erase(struct xarray *, unsigned long index); void *xa_store_range(struct xarray *, unsigned long first, unsigned long last, void *entry, gfp_t); bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t); void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); void *xa_find(struct xarray *xa, unsigned long *index, unsigned long max, xa_mark_t) __attribute__((nonnull(2))); void *xa_find_after(struct xarray *xa, unsigned long *index, unsigned long max, xa_mark_t) __attribute__((nonnull(2))); unsigned int xa_extract(struct xarray *, void **dst, unsigned long start, unsigned long max, unsigned int n, xa_mark_t); void xa_destroy(struct xarray *); /** * xa_init_flags() - Initialise an empty XArray with flags. * @xa: XArray. * @flags: XA_FLAG values. * * If you need to initialise an XArray with special flags (eg you need * to take the lock from interrupt context), use this function instead * of xa_init(). * * Context: Any context. */ static inline void xa_init_flags(struct xarray *xa, gfp_t flags) { spin_lock_init(&xa->xa_lock); xa->xa_flags = flags; xa->xa_head = NULL; } /** * xa_init() - Initialise an empty XArray. * @xa: XArray. * * An empty XArray is full of NULL entries. * * Context: Any context. */ static inline void xa_init(struct xarray *xa) { xa_init_flags(xa, 0); } /** * xa_empty() - Determine if an array has any present entries. * @xa: XArray. * * Context: Any context. * Return: %true if the array contains only NULL pointers. */ static inline bool xa_empty(const struct xarray *xa) { return xa->xa_head == NULL; } /** * xa_marked() - Inquire whether any entry in this array has a mark set * @xa: Array * @mark: Mark value * * Context: Any context. * Return: %true if any entry has this mark set. */ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark) { return xa->xa_flags & XA_FLAGS_MARK(mark); } /** * xa_for_each_range() - Iterate over a portion of an XArray. * @xa: XArray. * @index: Index of @entry. * @entry: Entry retrieved from array. * @start: First index to retrieve from array. * @last: Last index to retrieve from array. * * During the iteration, @entry will have the value of the entry stored * in @xa at @index. You may modify @index during the iteration if you * want to skip or reprocess indices. It is safe to modify the array * during the iteration. At the end of the iteration, @entry will be set * to NULL and @index will have a value less than or equal to max. * * xa_for_each_range() is O(n.log(n)) while xas_for_each() is O(n). You have * to handle your own locking with xas_for_each(), and if you have to unlock * after each iteration, it will also end up being O(n.log(n)). * xa_for_each_range() will spin if it hits a retry entry; if you intend to * see retry entries, you should use the xas_for_each() iterator instead. * The xas_for_each() iterator will expand into more inline code than * xa_for_each_range(). * * Context: Any context. Takes and releases the RCU lock. */ #define xa_for_each_range(xa, index, entry, start, last) \ for (index = start, \ entry = xa_find(xa, &index, last, XA_PRESENT); \ entry; \ entry = xa_find_after(xa, &index, last, XA_PRESENT)) /** * xa_for_each_start() - Iterate over a portion of an XArray. * @xa: XArray. * @index: Index of @entry. * @entry: Entry retrieved from array. * @start: First index to retrieve from array. * * During the iteration, @entry will have the value of the entry stored * in @xa at @index. You may modify @index during the iteration if you * want to skip or reprocess indices. It is safe to modify the array * during the iteration. At the end of the iteration, @entry will be set * to NULL and @index will have a value less than or equal to max. * * xa_for_each_start() is O(n.log(n)) while xas_for_each() is O(n). You have * to handle your own locking with xas_for_each(), and if you have to unlock * after each iteration, it will also end up being O(n.log(n)). * xa_for_each_start() will spin if it hits a retry entry; if you intend to * see retry entries, you should use the xas_for_each() iterator instead. * The xas_for_each() iterator will expand into more inline code than * xa_for_each_start(). * * Context: Any context. Takes and releases the RCU lock. */ #define xa_for_each_start(xa, index, entry, start) \ xa_for_each_range(xa, index, entry, start, ULONG_MAX) /** * xa_for_each() - Iterate over present entries in an XArray. * @xa: XArray. * @index: Index of @entry. * @entry: Entry retrieved from array. * * During the iteration, @entry will have the value of the entry stored * in @xa at @index. You may modify @index during the iteration if you want * to skip or reprocess indices. It is safe to modify the array during the * iteration. At the end of the iteration, @entry will be set to NULL and * @index will have a value less than or equal to max. * * xa_for_each() is O(n.log(n)) while xas_for_each() is O(n). You have * to handle your own locking with xas_for_each(), and if you have to unlock * after each iteration, it will also end up being O(n.log(n)). xa_for_each() * will spin if it hits a retry entry; if you intend to see retry entries, * you should use the xas_for_each() iterator instead. The xas_for_each() * iterator will expand into more inline code than xa_for_each(). * * Context: Any context. Takes and releases the RCU lock. */ #define xa_for_each(xa, index, entry) \ xa_for_each_start(xa, index, entry, 0) /** * xa_for_each_marked() - Iterate over marked entries in an XArray. * @xa: XArray. * @index: Index of @entry. * @entry: Entry retrieved from array. * @filter: Selection criterion. * * During the iteration, @entry will have the value of the entry stored * in @xa at @index. The iteration will skip all entries in the array * which do not match @filter. You may modify @index during the iteration * if you want to skip or reprocess indices. It is safe to modify the array * during the iteration. At the end of the iteration, @entry will be set to * NULL and @index will have a value less than or equal to max. * * xa_for_each_marked() is O(n.log(n)) while xas_for_each_marked() is O(n). * You have to handle your own locking with xas_for_each(), and if you have * to unlock after each iteration, it will also end up being O(n.log(n)). * xa_for_each_marked() will spin if it hits a retry entry; if you intend to * see retry entries, you should use the xas_for_each_marked() iterator * instead. The xas_for_each_marked() iterator will expand into more inline * code than xa_for_each_marked(). * * Context: Any context. Takes and releases the RCU lock. */ #define xa_for_each_marked(xa, index, entry, filter) \ for (index = 0, entry = xa_find(xa, &index, ULONG_MAX, filter); \ entry; entry = xa_find_after(xa, &index, ULONG_MAX, filter)) #define xa_trylock(xa) spin_trylock(&(xa)->xa_lock) #define xa_lock(xa) spin_lock(&(xa)->xa_lock) #define xa_unlock(xa) spin_unlock(&(xa)->xa_lock) #define xa_lock_bh(xa) spin_lock_bh(&(xa)->xa_lock) #define xa_unlock_bh(xa) spin_unlock_bh(&(xa)->xa_lock) #define xa_lock_irq(xa) spin_lock_irq(&(xa)->xa_lock) #define xa_unlock_irq(xa) spin_unlock_irq(&(xa)->xa_lock) #define xa_lock_irqsave(xa, flags) \ spin_lock_irqsave(&(xa)->xa_lock, flags) #define xa_unlock_irqrestore(xa, flags) \ spin_unlock_irqrestore(&(xa)->xa_lock, flags) #define xa_lock_nested(xa, subclass) \ spin_lock_nested(&(xa)->xa_lock, subclass) #define xa_lock_bh_nested(xa, subclass) \ spin_lock_bh_nested(&(xa)->xa_lock, subclass) #define xa_lock_irq_nested(xa, subclass) \ spin_lock_irq_nested(&(xa)->xa_lock, subclass) #define xa_lock_irqsave_nested(xa, flags, subclass) \ spin_lock_irqsave_nested(&(xa)->xa_lock, flags, subclass) /* * Versions of the normal API which require the caller to hold the * xa_lock. If the GFP flags allow it, they will drop the lock to * allocate memory, then reacquire it afterwards. These functions * may also re-enable interrupts if the XArray flags indicate the * locking should be interrupt safe. */ void *__xa_erase(struct xarray *, unsigned long index); void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old, void *entry, gfp_t); int __must_check __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t); int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry, struct xa_limit, gfp_t); int __must_check __xa_alloc_cyclic(struct xarray *, u32 *id, void *entry, struct xa_limit, u32 *next, gfp_t); void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); /** * xa_store_bh() - Store this entry in the XArray. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * This function is like calling xa_store() except it disables softirqs * while holding the array lock. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. * Return: The old entry at this index or xa_err() if an error happened. */ static inline void *xa_store_bh(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { void *curr; xa_lock_bh(xa); curr = __xa_store(xa, index, entry, gfp); xa_unlock_bh(xa); return curr; } /** * xa_store_irq() - Store this entry in the XArray. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * This function is like calling xa_store() except it disables interrupts * while holding the array lock. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. * Return: The old entry at this index or xa_err() if an error happened. */ static inline void *xa_store_irq(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { void *curr; xa_lock_irq(xa); curr = __xa_store(xa, index, entry, gfp); xa_unlock_irq(xa); return curr; } /** * xa_erase_bh() - Erase this entry from the XArray. * @xa: XArray. * @index: Index of entry. * * After this function returns, loading from @index will return %NULL. * If the index is part of a multi-index entry, all indices will be erased * and none of the entries will be part of a multi-index entry. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. * Return: The entry which used to be at this index. */ static inline void *xa_erase_bh(struct xarray *xa, unsigned long index) { void *entry; xa_lock_bh(xa); entry = __xa_erase(xa, index); xa_unlock_bh(xa); return entry; } /** * xa_erase_irq() - Erase this entry from the XArray. * @xa: XArray. * @index: Index of entry. * * After this function returns, loading from @index will return %NULL. * If the index is part of a multi-index entry, all indices will be erased * and none of the entries will be part of a multi-index entry. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. * Return: The entry which used to be at this index. */ static inline void *xa_erase_irq(struct xarray *xa, unsigned long index) { void *entry; xa_lock_irq(xa); entry = __xa_erase(xa, index); xa_unlock_irq(xa); return entry; } /** * xa_cmpxchg() - Conditionally replace an entry in the XArray. * @xa: XArray. * @index: Index into array. * @old: Old value to test against. * @entry: New value to place in array. * @gfp: Memory allocation flags. * * If the entry at @index is the same as @old, replace it with @entry. * If the return value is equal to @old, then the exchange was successful. * * Context: Any context. Takes and releases the xa_lock. May sleep * if the @gfp flags permit. * Return: The old value at this index or xa_err() if an error happened. */ static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index, void *old, void *entry, gfp_t gfp) { void *curr; xa_lock(xa); curr = __xa_cmpxchg(xa, index, old, entry, gfp); xa_unlock(xa); return curr; } /** * xa_cmpxchg_bh() - Conditionally replace an entry in the XArray. * @xa: XArray. * @index: Index into array. * @old: Old value to test against. * @entry: New value to place in array. * @gfp: Memory allocation flags. * * This function is like calling xa_cmpxchg() except it disables softirqs * while holding the array lock. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: The old value at this index or xa_err() if an error happened. */ static inline void *xa_cmpxchg_bh(struct xarray *xa, unsigned long index, void *old, void *entry, gfp_t gfp) { void *curr; xa_lock_bh(xa); curr = __xa_cmpxchg(xa, index, old, entry, gfp); xa_unlock_bh(xa); return curr; } /** * xa_cmpxchg_irq() - Conditionally replace an entry in the XArray. * @xa: XArray. * @index: Index into array. * @old: Old value to test against. * @entry: New value to place in array. * @gfp: Memory allocation flags. * * This function is like calling xa_cmpxchg() except it disables interrupts * while holding the array lock. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: The old value at this index or xa_err() if an error happened. */ static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index, void *old, void *entry, gfp_t gfp) { void *curr; xa_lock_irq(xa); curr = __xa_cmpxchg(xa, index, old, entry, gfp); xa_unlock_irq(xa); return curr; } /** * xa_insert() - Store this entry in the XArray unless another entry is * already present. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * Inserting a NULL entry will store a reserved entry (like xa_reserve()) * if no entry is present. Inserting will fail if a reserved entry is * present, even though loading from this index will return NULL. * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int __must_check xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { int err; xa_lock(xa); err = __xa_insert(xa, index, entry, gfp); xa_unlock(xa); return err; } /** * xa_insert_bh() - Store this entry in the XArray unless another entry is * already present. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * Inserting a NULL entry will store a reserved entry (like xa_reserve()) * if no entry is present. Inserting will fail if a reserved entry is * present, even though loading from this index will return NULL. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int __must_check xa_insert_bh(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { int err; xa_lock_bh(xa); err = __xa_insert(xa, index, entry, gfp); xa_unlock_bh(xa); return err; } /** * xa_insert_irq() - Store this entry in the XArray unless another entry is * already present. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * Inserting a NULL entry will store a reserved entry (like xa_reserve()) * if no entry is present. Inserting will fail if a reserved entry is * present, even though loading from this index will return NULL. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int __must_check xa_insert_irq(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { int err; xa_lock_irq(xa); err = __xa_insert(xa, index, entry, gfp); xa_unlock_irq(xa); return err; } /** * xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @entry: New entry. * @limit: Range of ID to allocate. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or * -EBUSY if there are no free entries in @limit. */ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, gfp_t gfp) { int err; xa_lock(xa); err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock(xa); return err; } /** * xa_alloc_bh() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @entry: New entry. * @limit: Range of ID to allocate. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or * -EBUSY if there are no free entries in @limit. */ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, gfp_t gfp) { int err; xa_lock_bh(xa); err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_bh(xa); return err; } /** * xa_alloc_irq() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @entry: New entry. * @limit: Range of ID to allocate. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or * -EBUSY if there are no free entries in @limit. */ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, gfp_t gfp) { int err; xa_lock_irq(xa); err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_irq(xa); return err; } /** * xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @entry: New entry. * @limit: Range of allocated ID. * @next: Pointer to next ID to allocate. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * The search for an empty entry will start at @next and will wrap * around if necessary. * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the * allocation succeeded after wrapping, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, u32 *next, gfp_t gfp) { int err; xa_lock(xa); err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock(xa); return err; } /** * xa_alloc_cyclic_bh() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @entry: New entry. * @limit: Range of allocated ID. * @next: Pointer to next ID to allocate. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * The search for an empty entry will start at @next and will wrap * around if necessary. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the * allocation succeeded after wrapping, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, u32 *next, gfp_t gfp) { int err; xa_lock_bh(xa); err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_bh(xa); return err; } /** * xa_alloc_cyclic_irq() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @entry: New entry. * @limit: Range of allocated ID. * @next: Pointer to next ID to allocate. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * The search for an empty entry will start at @next and will wrap * around if necessary. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the * allocation succeeded after wrapping, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, u32 *next, gfp_t gfp) { int err; xa_lock_irq(xa); err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_irq(xa); return err; } /** * xa_reserve() - Reserve this index in the XArray. * @xa: XArray. * @index: Index into array. * @gfp: Memory allocation flags. * * Ensures there is somewhere to store an entry at @index in the array. * If there is already something stored at @index, this function does * nothing. If there was nothing there, the entry is marked as reserved. * Loading from a reserved entry returns a %NULL pointer. * * If you do not use the entry that you have reserved, call xa_release() * or xa_erase() to free any unnecessary memory. * * Context: Any context. Takes and releases the xa_lock. * May sleep if the @gfp flags permit. * Return: 0 if the reservation succeeded or -ENOMEM if it failed. */ static inline __must_check int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp) { return xa_err(xa_cmpxchg(xa, index, NULL, XA_ZERO_ENTRY, gfp)); } /** * xa_reserve_bh() - Reserve this index in the XArray. * @xa: XArray. * @index: Index into array. * @gfp: Memory allocation flags. * * A softirq-disabling version of xa_reserve(). * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. * Return: 0 if the reservation succeeded or -ENOMEM if it failed. */ static inline __must_check int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp) { return xa_err(xa_cmpxchg_bh(xa, index, NULL, XA_ZERO_ENTRY, gfp)); } /** * xa_reserve_irq() - Reserve this index in the XArray. * @xa: XArray. * @index: Index into array. * @gfp: Memory allocation flags. * * An interrupt-disabling version of xa_reserve(). * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. * Return: 0 if the reservation succeeded or -ENOMEM if it failed. */ static inline __must_check int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp) { return xa_err(xa_cmpxchg_irq(xa, index, NULL, XA_ZERO_ENTRY, gfp)); } /** * xa_release() - Release a reserved entry. * @xa: XArray. * @index: Index of entry. * * After calling xa_reserve(), you can call this function to release the * reservation. If the entry at @index has been stored to, this function * will do nothing. */ static inline void xa_release(struct xarray *xa, unsigned long index) { xa_cmpxchg(xa, index, XA_ZERO_ENTRY, NULL, 0); } /* Everything below here is the Advanced API. Proceed with caution. */ /* * The xarray is constructed out of a set of 'chunks' of pointers. Choosing * the best chunk size requires some tradeoffs. A power of two recommends * itself so that we can walk the tree based purely on shifts and masks. * Generally, the larger the better; as the number of slots per level of the * tree increases, the less tall the tree needs to be. But that needs to be * balanced against the memory consumption of each node. On a 64-bit system, * xa_node is currently 576 bytes, and we get 7 of them per 4kB page. If we * doubled the number of slots per node, we'd get only 3 nodes per 4kB page. */ #ifndef XA_CHUNK_SHIFT #define XA_CHUNK_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) #endif #define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT) #define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1) #define XA_MAX_MARKS 3 #define XA_MARK_LONGS DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG) /* * @count is the count of every non-NULL element in the ->slots array * whether that is a value entry, a retry entry, a user pointer, * a sibling entry or a pointer to the next level of the tree. * @nr_values is the count of every element in ->slots which is * either a value entry or a sibling of a value entry. */ struct xa_node { unsigned char shift; /* Bits remaining in each slot */ unsigned char offset; /* Slot offset in parent */ unsigned char count; /* Total entry count */ unsigned char nr_values; /* Value entry count */ struct xa_node __rcu *parent; /* NULL at top of tree */ struct xarray *array; /* The array we belong to */ union { struct list_head private_list; /* For tree user */ struct rcu_head rcu_head; /* Used when freeing node */ }; void __rcu *slots[XA_CHUNK_SIZE]; union { unsigned long tags[XA_MAX_MARKS][XA_MARK_LONGS]; unsigned long marks[XA_MAX_MARKS][XA_MARK_LONGS]; }; }; void xa_dump(const struct xarray *); void xa_dump_node(const struct xa_node *); #ifdef XA_DEBUG #define XA_BUG_ON(xa, x) do { \ if (x) { \ xa_dump(xa); \ BUG(); \ } \ } while (0) #define XA_NODE_BUG_ON(node, x) do { \ if (x) { \ if (node) xa_dump_node(node); \ BUG(); \ } \ } while (0) #else #define XA_BUG_ON(xa, x) do { } while (0) #define XA_NODE_BUG_ON(node, x) do { } while (0) #endif /* Private */ static inline void *xa_head(const struct xarray *xa) { return rcu_dereference_check(xa->xa_head, lockdep_is_held(&xa->xa_lock)); } /* Private */ static inline void *xa_head_locked(const struct xarray *xa) { return rcu_dereference_protected(xa->xa_head, lockdep_is_held(&xa->xa_lock)); } /* Private */ static inline void *xa_entry(const struct xarray *xa, const struct xa_node *node, unsigned int offset) { XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE); return rcu_dereference_check(node->slots[offset], lockdep_is_held(&xa->xa_lock)); } /* Private */ static inline void *xa_entry_locked(const struct xarray *xa, const struct xa_node *node, unsigned int offset) { XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE); return rcu_dereference_protected(node->slots[offset], lockdep_is_held(&xa->xa_lock)); } /* Private */ static inline struct xa_node *xa_parent(const struct xarray *xa, const struct xa_node *node) { return rcu_dereference_check(node->parent, lockdep_is_held(&xa->xa_lock)); } /* Private */ static inline struct xa_node *xa_parent_locked(const struct xarray *xa, const struct xa_node *node) { return rcu_dereference_protected(node->parent, lockdep_is_held(&xa->xa_lock)); } /* Private */ static inline void *xa_mk_node(const struct xa_node *node) { return (void *)((unsigned long)node | 2); } /* Private */ static inline struct xa_node *xa_to_node(const void *entry) { return (struct xa_node *)((unsigned long)entry - 2); } /* Private */ static inline bool xa_is_node(const void *entry) { return xa_is_internal(entry) && (unsigned long)entry > 4096; } /* Private */ static inline void *xa_mk_sibling(unsigned int offset) { return xa_mk_internal(offset); } /* Private */ static inline unsigned long xa_to_sibling(const void *entry) { return xa_to_internal(entry); } /** * xa_is_sibling() - Is the entry a sibling entry? * @entry: Entry retrieved from the XArray * * Return: %true if the entry is a sibling entry. */ static inline bool xa_is_sibling(const void *entry) { return IS_ENABLED(CONFIG_XARRAY_MULTI) && xa_is_internal(entry) && (entry < xa_mk_sibling(XA_CHUNK_SIZE - 1)); } #define XA_RETRY_ENTRY xa_mk_internal(256) /** * xa_is_retry() - Is the entry a retry entry? * @entry: Entry retrieved from the XArray * * Return: %true if the entry is a retry entry. */ static inline bool xa_is_retry(const void *entry) { return unlikely(entry == XA_RETRY_ENTRY); } /** * xa_is_advanced() - Is the entry only permitted for the advanced API? * @entry: Entry to be stored in the XArray. * * Return: %true if the entry cannot be stored by the normal API. */ static inline bool xa_is_advanced(const void *entry) { return xa_is_internal(entry) && (entry <= XA_RETRY_ENTRY); } /** * typedef xa_update_node_t - A callback function from the XArray. * @node: The node which is being processed * * This function is called every time the XArray updates the count of * present and value entries in a node. It allows advanced users to * maintain the private_list in the node. * * Context: The xa_lock is held and interrupts may be disabled. * Implementations should not drop the xa_lock, nor re-enable * interrupts. */ typedef void (*xa_update_node_t)(struct xa_node *node); void xa_delete_node(struct xa_node *, xa_update_node_t); /* * The xa_state is opaque to its users. It contains various different pieces * of state involved in the current operation on the XArray. It should be * declared on the stack and passed between the various internal routines. * The various elements in it should not be accessed directly, but only * through the provided accessor functions. The below documentation is for * the benefit of those working on the code, not for users of the XArray. * * @xa_node usually points to the xa_node containing the slot we're operating * on (and @xa_offset is the offset in the slots array). If there is a * single entry in the array at index 0, there are no allocated xa_nodes to * point to, and so we store %NULL in @xa_node. @xa_node is set to * the value %XAS_RESTART if the xa_state is not walked to the correct * position in the tree of nodes for this operation. If an error occurs * during an operation, it is set to an %XAS_ERROR value. If we run off the * end of the allocated nodes, it is set to %XAS_BOUNDS. */ struct xa_state { struct xarray *xa; unsigned long xa_index; unsigned char xa_shift; unsigned char xa_sibs; unsigned char xa_offset; unsigned char xa_pad; /* Helps gcc generate better code */ struct xa_node *xa_node; struct xa_node *xa_alloc; xa_update_node_t xa_update; }; /* * We encode errnos in the xas->xa_node. If an error has happened, we need to * drop the lock to fix it, and once we've done so the xa_state is invalid. */ #define XA_ERROR(errno) ((struct xa_node *)(((unsigned long)errno << 2) | 2UL)) #define XAS_BOUNDS ((struct xa_node *)1UL) #define XAS_RESTART ((struct xa_node *)3UL) #define __XA_STATE(array, index, shift, sibs) { \ .xa = array, \ .xa_index = index, \ .xa_shift = shift, \ .xa_sibs = sibs, \ .xa_offset = 0, \ .xa_pad = 0, \ .xa_node = XAS_RESTART, \ .xa_alloc = NULL, \ .xa_update = NULL \ } /** * XA_STATE() - Declare an XArray operation state. * @name: Name of this operation state (usually xas). * @array: Array to operate on. * @index: Initial index of interest. * * Declare and initialise an xa_state on the stack. */ #define XA_STATE(name, array, index) \ struct xa_state name = __XA_STATE(array, index, 0, 0) /** * XA_STATE_ORDER() - Declare an XArray operation state. * @name: Name of this operation state (usually xas). * @array: Array to operate on. * @index: Initial index of interest. * @order: Order of entry. * * Declare and initialise an xa_state on the stack. This variant of * XA_STATE() allows you to specify the 'order' of the element you * want to operate on.` */ #define XA_STATE_ORDER(name, array, index, order) \ struct xa_state name = __XA_STATE(array, \ (index >> order) << order, \ order - (order % XA_CHUNK_SHIFT), \ (1U << (order % XA_CHUNK_SHIFT)) - 1) #define xas_marked(xas, mark) xa_marked((xas)->xa, (mark)) #define xas_trylock(xas) xa_trylock((xas)->xa) #define xas_lock(xas) xa_lock((xas)->xa) #define xas_unlock(xas) xa_unlock((xas)->xa) #define xas_lock_bh(xas) xa_lock_bh((xas)->xa) #define xas_unlock_bh(xas) xa_unlock_bh((xas)->xa) #define xas_lock_irq(xas) xa_lock_irq((xas)->xa) #define xas_unlock_irq(xas) xa_unlock_irq((xas)->xa) #define xas_lock_irqsave(xas, flags) \ xa_lock_irqsave((xas)->xa, flags) #define xas_unlock_irqrestore(xas, flags) \ xa_unlock_irqrestore((xas)->xa, flags) /** * xas_error() - Return an errno stored in the xa_state. * @xas: XArray operation state. * * Return: 0 if no error has been noted. A negative errno if one has. */ static inline int xas_error(const struct xa_state *xas) { return xa_err(xas->xa_node); } /** * xas_set_err() - Note an error in the xa_state. * @xas: XArray operation state. * @err: Negative error number. * * Only call this function with a negative @err; zero or positive errors * will probably not behave the way you think they should. If you want * to clear the error from an xa_state, use xas_reset(). */ static inline void xas_set_err(struct xa_state *xas, long err) { xas->xa_node = XA_ERROR(err); } /** * xas_invalid() - Is the xas in a retry or error state? * @xas: XArray operation state. * * Return: %true if the xas cannot be used for operations. */ static inline bool xas_invalid(const struct xa_state *xas) { return (unsigned long)xas->xa_node & 3; } /** * xas_valid() - Is the xas a valid cursor into the array? * @xas: XArray operation state. * * Return: %true if the xas can be used for operations. */ static inline bool xas_valid(const struct xa_state *xas) { return !xas_invalid(xas); } /** * xas_is_node() - Does the xas point to a node? * @xas: XArray operation state. * * Return: %true if the xas currently references a node. */ static inline bool xas_is_node(const struct xa_state *xas) { return xas_valid(xas) && xas->xa_node; } /* True if the pointer is something other than a node */ static inline bool xas_not_node(struct xa_node *node) { return ((unsigned long)node & 3) || !node; } /* True if the node represents RESTART or an error */ static inline bool xas_frozen(struct xa_node *node) { return (unsigned long)node & 2; } /* True if the node represents head-of-tree, RESTART or BOUNDS */ static inline bool xas_top(struct xa_node *node) { return node <= XAS_RESTART; } /** * xas_reset() - Reset an XArray operation state. * @xas: XArray operation state. * * Resets the error or walk state of the @xas so future walks of the * array will start from the root. Use this if you have dropped the * xarray lock and want to reuse the xa_state. * * Context: Any context. */ static inline void xas_reset(struct xa_state *xas) { xas->xa_node = XAS_RESTART; } /** * xas_retry() - Retry the operation if appropriate. * @xas: XArray operation state. * @entry: Entry from xarray. * * The advanced functions may sometimes return an internal entry, such as * a retry entry or a zero entry. This function sets up the @xas to restart * the walk from the head of the array if needed. * * Context: Any context. * Return: true if the operation needs to be retried. */ static inline bool xas_retry(struct xa_state *xas, const void *entry) { if (xa_is_zero(entry)) return true; if (!xa_is_retry(entry)) return false; xas_reset(xas); return true; } void *xas_load(struct xa_state *); void *xas_store(struct xa_state *, void *entry); void *xas_find(struct xa_state *, unsigned long max); void *xas_find_conflict(struct xa_state *); bool xas_get_mark(const struct xa_state *, xa_mark_t); void xas_set_mark(const struct xa_state *, xa_mark_t); void xas_clear_mark(const struct xa_state *, xa_mark_t); void *xas_find_marked(struct xa_state *, unsigned long max, xa_mark_t); void xas_init_marks(const struct xa_state *); bool xas_nomem(struct xa_state *, gfp_t); void xas_pause(struct xa_state *); void xas_create_range(struct xa_state *); #ifdef CONFIG_XARRAY_MULTI int xa_get_order(struct xarray *, unsigned long index); void xas_split(struct xa_state *, void *entry, unsigned int order); void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); #else static inline int xa_get_order(struct xarray *xa, unsigned long index) { return 0; } static inline void xas_split(struct xa_state *xas, void *entry, unsigned int order) { xas_store(xas, entry); } static inline void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, gfp_t gfp) { } #endif /** * xas_reload() - Refetch an entry from the xarray. * @xas: XArray operation state. * * Use this function to check that a previously loaded entry still has * the same value. This is useful for the lockless pagecache lookup where * we walk the array with only the RCU lock to protect us, lock the page, * then check that the page hasn't moved since we looked it up. * * The caller guarantees that @xas is still valid. If it may be in an * error or restart state, call xas_load() instead. * * Return: The entry at this location in the xarray. */ static inline void *xas_reload(struct xa_state *xas) { struct xa_node *node = xas->xa_node; void *entry; char offset; if (!node) return xa_head(xas->xa); if (IS_ENABLED(CONFIG_XARRAY_MULTI)) { offset = (xas->xa_index >> node->shift) & XA_CHUNK_MASK; entry = xa_entry(xas->xa, node, offset); if (!xa_is_sibling(entry)) return entry; offset = xa_to_sibling(entry); } else { offset = xas->xa_offset; } return xa_entry(xas->xa, node, offset); } /** * xas_set() - Set up XArray operation state for a different index. * @xas: XArray operation state. * @index: New index into the XArray. * * Move the operation state to refer to a different index. This will * have the effect of starting a walk from the top; see xas_next() * to move to an adjacent index. */ static inline void xas_set(struct xa_state *xas, unsigned long index) { xas->xa_index = index; xas->xa_node = XAS_RESTART; } /** * xas_set_order() - Set up XArray operation state for a multislot entry. * @xas: XArray operation state. * @index: Target of the operation. * @order: Entry occupies 2^@order indices. */ static inline void xas_set_order(struct xa_state *xas, unsigned long index, unsigned int order) { #ifdef CONFIG_XARRAY_MULTI xas->xa_index = order < BITS_PER_LONG ? (index >> order) << order : 0; xas->xa_shift = order - (order % XA_CHUNK_SHIFT); xas->xa_sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; xas->xa_node = XAS_RESTART; #else BUG_ON(order > 0); xas_set(xas, index); #endif } /** * xas_set_update() - Set up XArray operation state for a callback. * @xas: XArray operation state. * @update: Function to call when updating a node. * * The XArray can notify a caller after it has updated an xa_node. * This is advanced functionality and is only needed by the page cache. */ static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update) { xas->xa_update = update; } /** * xas_next_entry() - Advance iterator to next present entry. * @xas: XArray operation state. * @max: Highest index to return. * * xas_next_entry() is an inline function to optimise xarray traversal for * speed. It is equivalent to calling xas_find(), and will call xas_find() * for all the hard cases. * * Return: The next present entry after the one currently referred to by @xas. */ static inline void *xas_next_entry(struct xa_state *xas, unsigned long max) { struct xa_node *node = xas->xa_node; void *entry; if (unlikely(xas_not_node(node) || node->shift || xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK))) return xas_find(xas, max); do { if (unlikely(xas->xa_index >= max)) return xas_find(xas, max); if (unlikely(xas->xa_offset == XA_CHUNK_MASK)) return xas_find(xas, max); entry = xa_entry(xas->xa, node, xas->xa_offset + 1); if (unlikely(xa_is_internal(entry))) return xas_find(xas, max); xas->xa_offset++; xas->xa_index++; } while (!entry); return entry; } /* Private */ static inline unsigned int xas_find_chunk(struct xa_state *xas, bool advance, xa_mark_t mark) { unsigned long *addr = xas->xa_node->marks[(__force unsigned)mark]; unsigned int offset = xas->xa_offset; if (advance) offset++; if (XA_CHUNK_SIZE == BITS_PER_LONG) { if (offset < XA_CHUNK_SIZE) { unsigned long data = *addr & (~0UL << offset); if (data) return __ffs(data); } return XA_CHUNK_SIZE; } return find_next_bit(addr, XA_CHUNK_SIZE, offset); } /** * xas_next_marked() - Advance iterator to next marked entry. * @xas: XArray operation state. * @max: Highest index to return. * @mark: Mark to search for. * * xas_next_marked() is an inline function to optimise xarray traversal for * speed. It is equivalent to calling xas_find_marked(), and will call * xas_find_marked() for all the hard cases. * * Return: The next marked entry after the one currently referred to by @xas. */ static inline void *xas_next_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark) { struct xa_node *node = xas->xa_node; void *entry; unsigned int offset; if (unlikely(xas_not_node(node) || node->shift)) return xas_find_marked(xas, max, mark); offset = xas_find_chunk(xas, true, mark); xas->xa_offset = offset; xas->xa_index = (xas->xa_index & ~XA_CHUNK_MASK) + offset; if (xas->xa_index > max) return NULL; if (offset == XA_CHUNK_SIZE) return xas_find_marked(xas, max, mark); entry = xa_entry(xas->xa, node, offset); if (!entry) return xas_find_marked(xas, max, mark); return entry; } /* * If iterating while holding a lock, drop the lock and reschedule * every %XA_CHECK_SCHED loops. */ enum { XA_CHECK_SCHED = 4096, }; /** * xas_for_each() - Iterate over a range of an XArray. * @xas: XArray operation state. * @entry: Entry retrieved from the array. * @max: Maximum index to retrieve from array. * * The loop body will be executed for each entry present in the xarray * between the current xas position and @max. @entry will be set to * the entry retrieved from the xarray. It is safe to delete entries * from the array in the loop body. You should hold either the RCU lock * or the xa_lock while iterating. If you need to drop the lock, call * xas_pause() first. */ #define xas_for_each(xas, entry, max) \ for (entry = xas_find(xas, max); entry; \ entry = xas_next_entry(xas, max)) /** * xas_for_each_marked() - Iterate over a range of an XArray. * @xas: XArray operation state. * @entry: Entry retrieved from the array. * @max: Maximum index to retrieve from array. * @mark: Mark to search for. * * The loop body will be executed for each marked entry in the xarray * between the current xas position and @max. @entry will be set to * the entry retrieved from the xarray. It is safe to delete entries * from the array in the loop body. You should hold either the RCU lock * or the xa_lock while iterating. If you need to drop the lock, call * xas_pause() first. */ #define xas_for_each_marked(xas, entry, max, mark) \ for (entry = xas_find_marked(xas, max, mark); entry; \ entry = xas_next_marked(xas, max, mark)) /** * xas_for_each_conflict() - Iterate over a range of an XArray. * @xas: XArray operation state. * @entry: Entry retrieved from the array. * * The loop body will be executed for each entry in the XArray that * lies within the range specified by @xas. If the loop terminates * normally, @entry will be %NULL. The user may break out of the loop, * which will leave @entry set to the conflicting entry. The caller * may also call xa_set_err() to exit the loop while setting an error * to record the reason. */ #define xas_for_each_conflict(xas, entry) \ while ((entry = xas_find_conflict(xas))) void *__xas_next(struct xa_state *); void *__xas_prev(struct xa_state *); /** * xas_prev() - Move iterator to previous index. * @xas: XArray operation state. * * If the @xas was in an error state, it will remain in an error state * and this function will return %NULL. If the @xas has never been walked, * it will have the effect of calling xas_load(). Otherwise one will be * subtracted from the index and the state will be walked to the correct * location in the array for the next operation. * * If the iterator was referencing index 0, this function wraps * around to %ULONG_MAX. * * Return: The entry at the new index. This may be %NULL or an internal * entry. */ static inline void *xas_prev(struct xa_state *xas) { struct xa_node *node = xas->xa_node; if (unlikely(xas_not_node(node) || node->shift || xas->xa_offset == 0)) return __xas_prev(xas); xas->xa_index--; xas->xa_offset--; return xa_entry(xas->xa, node, xas->xa_offset); } /** * xas_next() - Move state to next index. * @xas: XArray operation state. * * If the @xas was in an error state, it will remain in an error state * and this function will return %NULL. If the @xas has never been walked, * it will have the effect of calling xas_load(). Otherwise one will be * added to the index and the state will be walked to the correct * location in the array for the next operation. * * If the iterator was referencing index %ULONG_MAX, this function wraps * around to 0. * * Return: The entry at the new index. This may be %NULL or an internal * entry. */ static inline void *xas_next(struct xa_state *xas) { struct xa_node *node = xas->xa_node; if (unlikely(xas_not_node(node) || node->shift || xas->xa_offset == XA_CHUNK_MASK)) return __xas_next(xas); xas->xa_index++; xas->xa_offset++; return xa_entry(xas->xa, node, xas->xa_offset); } #endif /* _LINUX_XARRAY_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 #undef TRACE_SYSTEM #define TRACE_SYSTEM qdisc #if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_QDISC_H #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/tracepoint.h> #include <linux/ftrace.h> #include <linux/pkt_sched.h> #include <net/sch_generic.h> TRACE_EVENT(qdisc_dequeue, TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, int packets, struct sk_buff *skb), TP_ARGS(qdisc, txq, packets, skb), TP_STRUCT__entry( __field( struct Qdisc *, qdisc ) __field(const struct netdev_queue *, txq ) __field( int, packets ) __field( void *, skbaddr ) __field( int, ifindex ) __field( u32, handle ) __field( u32, parent ) __field( unsigned long, txq_state) ), /* skb==NULL indicate packets dequeued was 0, even when packets==1 */ TP_fast_assign( __entry->qdisc = qdisc; __entry->txq = txq; __entry->packets = skb ? packets : 0; __entry->skbaddr = skb; __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; __entry->handle = qdisc->handle; __entry->parent = qdisc->parent; __entry->txq_state = txq->state; ), TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", __entry->ifindex, __entry->handle, __entry->parent, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); TRACE_EVENT(qdisc_reset, TP_PROTO(struct Qdisc *q), TP_ARGS(q), TP_STRUCT__entry( __string( dev, qdisc_dev(q) ) __string( kind, q->ops->id ) __field( u32, parent ) __field( u32, handle ) ), TP_fast_assign( __assign_str(dev, qdisc_dev(q)); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; ), TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev), __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent), TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle)) ); TRACE_EVENT(qdisc_destroy, TP_PROTO(struct Qdisc *q), TP_ARGS(q), TP_STRUCT__entry( __string( dev, qdisc_dev(q) ) __string( kind, q->ops->id ) __field( u32, parent ) __field( u32, handle ) ), TP_fast_assign( __assign_str(dev, qdisc_dev(q)); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; ), TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev), __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent), TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle)) ); TRACE_EVENT(qdisc_create, TP_PROTO(const struct Qdisc_ops *ops, struct net_device *dev, u32 parent), TP_ARGS(ops, dev, parent), TP_STRUCT__entry( __string( dev, dev->name ) __string( kind, ops->id ) __field( u32, parent ) ), TP_fast_assign( __assign_str(dev, dev->name); __assign_str(kind, ops->id); __entry->parent = parent; ), TP_printk("dev=%s kind=%s parent=%x:%x", __get_str(dev), __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent)) ); #endif /* _TRACE_QDISC_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_UDP_TUNNEL_H #define __NET_UDP_TUNNEL_H #include <net/ip_tunnels.h> #include <net/udp.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ipv6_stubs.h> #endif struct udp_port_cfg { u8 family; /* Used only for kernel-created sockets */ union { struct in_addr local_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr local_ip6; #endif }; union { struct in_addr peer_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr peer_ip6; #endif }; __be16 local_udp_port; __be16 peer_udp_port; int bind_ifindex; unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1; }; int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp); #if IS_ENABLED(CONFIG_IPV6) int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp); #else static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { return 0; } #endif static inline int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { if (cfg->family == AF_INET) return udp_sock_create4(net, cfg, sockp); if (cfg->family == AF_INET6) return udp_sock_create6(net, cfg, sockp); return -EPFNOSUPPORT; } typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, struct sk_buff *skb); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, struct sk_buff *skb); typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb, int nhoff); struct udp_tunnel_sock_cfg { void *sk_user_data; /* user data used by encap_rcv call back */ /* Used for setting up udp_sock fields, see udp.h for details */ __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; udp_tunnel_encap_err_lookup_t encap_err_lookup; udp_tunnel_encap_destroy_t encap_destroy; udp_tunnel_gro_receive_t gro_receive; udp_tunnel_gro_complete_t gro_complete; }; /* Setup the given (UDP) sock to receive UDP encapsulated packets */ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); /* -- List of parsable UDP tunnel types -- * * Adding to this list will result in serious debate. The main issue is * that this list is essentially a list of workarounds for either poorly * designed tunnels, or poorly designed device offloads. * * The parsing supported via these types should really be used for Rx * traffic only as the network stack will have already inserted offsets for * the location of the headers in the skb. In addition any ports that are * pushed should be kept within the namespace without leaking to other * devices such as VFs or other ports on the same device. * * It is strongly encouraged to use CHECKSUM_COMPLETE for Rx to avoid the * need to use this for Rx checksum offload. It should not be necessary to * call this function to perform Tx offloads on outgoing traffic. */ enum udp_parsable_tunnel_type { UDP_TUNNEL_TYPE_VXLAN = BIT(0), /* RFC 7348 */ UDP_TUNNEL_TYPE_GENEVE = BIT(1), /* draft-ietf-nvo3-geneve */ UDP_TUNNEL_TYPE_VXLAN_GPE = BIT(2), /* draft-ietf-nvo3-vxlan-gpe */ }; struct udp_tunnel_info { unsigned short type; sa_family_t sa_family; __be16 port; u8 hw_priv; }; /* Notify network devices of offloadable types */ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); static inline void udp_tunnel_get_rx_info(struct net_device *dev) { ASSERT_RTNL(); call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } static inline void udp_tunnel_drop_rx_info(struct net_device *dev) { ASSERT_RTNL(); call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); } /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck); int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck); void udp_tunnel_sock_release(struct socket *sock); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size); #ifdef CONFIG_INET static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) { int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; return iptunnel_handle_offloads(skb, type); } #endif static inline void udp_tunnel_encap_enable(struct socket *sock) { struct udp_sock *up = udp_sk(sock->sk); if (up->encap_enabled) return; up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); else #endif udp_encap_enable(); } #define UDP_TUNNEL_NIC_MAX_TABLES 4 enum udp_tunnel_nic_info_flags { /* Device callbacks may sleep */ UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0), /* Device only supports offloads when it's open, all ports * will be removed before close and re-added after open. */ UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1), /* Device supports only IPv4 tunnels */ UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2), /* Device has hard-coded the IANA VXLAN port (4789) as VXLAN. * This port must not be counted towards n_entries of any table. * Driver will not receive any callback associated with port 4789. */ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), }; struct udp_tunnel_nic; #define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2) struct udp_tunnel_nic_shared { struct udp_tunnel_nic *udp_tunnel_nic_info; struct list_head devices; }; struct udp_tunnel_nic_shared_node { struct net_device *dev; struct list_head list; }; /** * struct udp_tunnel_nic_info - driver UDP tunnel offload information * @set_port: callback for adding a new port * @unset_port: callback for removing a port * @sync_table: callback for syncing the entire port table at once * @shared: reference to device global state (optional) * @flags: device flags from enum udp_tunnel_nic_info_flags * @tables: UDP port tables this device has * @tables.n_entries: number of entries in this table * @tables.tunnel_types: types of tunnels this table accepts * * Drivers are expected to provide either @set_port and @unset_port callbacks * or the @sync_table callback. Callbacks are invoked with rtnl lock held. * * Devices which (misguidedly) share the UDP tunnel port table across multiple * netdevs should allocate an instance of struct udp_tunnel_nic_shared and * point @shared at it. * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices * sharing a table. * * Known limitations: * - UDP tunnel port notifications are fundamentally best-effort - * it is likely the driver will both see skbs which use a UDP tunnel port, * while not being a tunneled skb, and tunnel skbs from other ports - * drivers should only use these ports for non-critical RX-side offloads, * e.g. the checksum offload; * - none of the devices care about the socket family at present, so we don't * track it. Please extend this code if you care. */ struct udp_tunnel_nic_info { /* one-by-one */ int (*set_port)(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti); int (*unset_port)(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti); /* all at once */ int (*sync_table)(struct net_device *dev, unsigned int table); struct udp_tunnel_nic_shared *shared; unsigned int flags; struct udp_tunnel_nic_table_info { unsigned int n_entries; unsigned int tunnel_types; } tables[UDP_TUNNEL_NIC_MAX_TABLES]; }; /* UDP tunnel module dependencies * * Tunnel drivers are expected to have a hard dependency on the udp_tunnel * module. NIC drivers are not, they just attach their * struct udp_tunnel_nic_info to the netdev and wait for callbacks to come. * Loading a tunnel driver will cause the udp_tunnel module to be loaded * and only then will all the required state structures be allocated. * Since we want a weak dependency from the drivers and the core to udp_tunnel * we call things through the following stubs. */ struct udp_tunnel_nic_ops { void (*get_port)(struct net_device *dev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti); void (*set_port_priv)(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv); void (*add_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*del_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*reset_ntf)(struct net_device *dev); size_t (*dump_size)(struct net_device *dev, unsigned int table); int (*dump_write)(struct net_device *dev, unsigned int table, struct sk_buff *skb); }; #ifdef CONFIG_INET extern const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; #else #define udp_tunnel_nic_ops ((struct udp_tunnel_nic_ops *)NULL) #endif static inline void udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { /* This helper is used from .sync_table, we indicate empty entries * by zero'ed @ti. Drivers which need to know the details of a port * when it gets deleted should use the .set_port / .unset_port * callbacks. * Zero out here, otherwise !CONFIG_INET causes uninitilized warnings. */ memset(ti, 0, sizeof(*ti)); if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->get_port(dev, table, idx, ti); } static inline void udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); } static inline void udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->add_port(dev, ti); } static inline void udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->del_port(dev, ti); } /** * udp_tunnel_nic_reset_ntf() - device-originating reset notification * @dev: network interface device structure * * Called by the driver to inform the core that the entire UDP tunnel port * state has been lost, usually due to device reset. Core will assume device * forgot all the ports and issue .set_port and .sync_table callbacks as * necessary. * * This function must be called with rtnl lock held, and will issue all * the callbacks before returning. */ static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->reset_ntf(dev); } static inline size_t udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) { if (!udp_tunnel_nic_ops) return 0; return udp_tunnel_nic_ops->dump_size(dev, table); } static inline int udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, struct sk_buff *skb) { if (!udp_tunnel_nic_ops) return 0; return udp_tunnel_nic_ops->dump_write(dev, table, skb); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NLS_H #define _LINUX_NLS_H #include <linux/init.h> /* Unicode has changed over the years. Unicode code points no longer * fit into 16 bits; as of Unicode 5 valid code points range from 0 * to 0x10ffff (17 planes, where each plane holds 65536 code points). * * The original decision to represent Unicode characters as 16-bit * wchar_t values is now outdated. But plane 0 still includes the * most commonly used characters, so we will retain it. The newer * 32-bit unicode_t type can be used when it is necessary to * represent the full Unicode character set. */ /* Plane-0 Unicode character */ typedef u16 wchar_t; #define MAX_WCHAR_T 0xffff /* Arbitrary Unicode character */ typedef u32 unicode_t; struct nls_table { const char *charset; const char *alias; int (*uni2char) (wchar_t uni, unsigned char *out, int boundlen); int (*char2uni) (const unsigned char *rawstring, int boundlen, wchar_t *uni); const unsigned char *charset2lower; const unsigned char *charset2upper; struct module *owner; struct nls_table *next; }; /* this value hold the maximum octet of charset */ #define NLS_MAX_CHARSET_SIZE 6 /* for UTF-8 */ /* Byte order for UTF-16 strings */ enum utf16_endian { UTF16_HOST_ENDIAN, UTF16_LITTLE_ENDIAN, UTF16_BIG_ENDIAN }; /* nls_base.c */ extern int __register_nls(struct nls_table *, struct module *); extern int unregister_nls(struct nls_table *); extern struct nls_table *load_nls(char *); extern void unload_nls(struct nls_table *); extern struct nls_table *load_nls_default(void); #define register_nls(nls) __register_nls((nls), THIS_MODULE) extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); extern int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, wchar_t *pwcs, int maxlen); extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, u8 *s, int maxlen); static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c) { unsigned char nc = t->charset2lower[c]; return nc ? nc : c; } static inline unsigned char nls_toupper(struct nls_table *t, unsigned char c) { unsigned char nc = t->charset2upper[c]; return nc ? nc : c; } static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1, const unsigned char *s2, int len) { while (len--) { if (nls_tolower(t, *s1++) != nls_tolower(t, *s2++)) return 1; } return 0; } /* * nls_nullsize - return length of null character for codepage * @codepage - codepage for which to return length of NULL terminator * * Since we can't guarantee that the null terminator will be a particular * length, we have to check against the codepage. If there's a problem * determining it, assume a single-byte NULL terminator. */ static inline int nls_nullsize(const struct nls_table *codepage) { int charlen; char tmp[NLS_MAX_CHARSET_SIZE]; charlen = codepage->uni2char(0, tmp, NLS_MAX_CHARSET_SIZE); return charlen > 0 ? charlen : 1; } #define MODULE_ALIAS_NLS(name) MODULE_ALIAS("nls_" __stringify(name)) #endif /* _LINUX_NLS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 /* * DRBG based on NIST SP800-90A * * Copyright Stephan Mueller <smueller@chronox.de>, 2014 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, and the entire permission notice in its entirety, * including the disclaimer of warranties. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * ALTERNATIVELY, this product may be distributed under the terms of * the GNU General Public License, in which case the provisions of the GPL are * required INSTEAD OF the above restrictions. (This clause is * necessary due to a potential bad interaction between the GPL and * the restrictions contained in a BSD-style copyright.) * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #ifndef _DRBG_H #define _DRBG_H #include <linux/random.h> #include <linux/scatterlist.h> #include <crypto/hash.h> #include <crypto/skcipher.h> #include <linux/module.h> #include <linux/crypto.h> #include <linux/slab.h> #include <crypto/internal/rng.h> #include <crypto/rng.h> #include <linux/fips.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/workqueue.h> /* * Concatenation Helper and string operation helper * * SP800-90A requires the concatenation of different data. To avoid copying * buffers around or allocate additional memory, the following data structure * is used to point to the original memory with its size. In addition, it * is used to build a linked list. The linked list defines the concatenation * of individual buffers. The order of memory block referenced in that * linked list determines the order of concatenation. */ struct drbg_string { const unsigned char *buf; size_t len; struct list_head list; }; static inline void drbg_string_fill(struct drbg_string *string, const unsigned char *buf, size_t len) { string->buf = buf; string->len = len; INIT_LIST_HEAD(&string->list); } struct drbg_state; typedef uint32_t drbg_flag_t; struct drbg_core { drbg_flag_t flags; /* flags for the cipher */ __u8 statelen; /* maximum state length */ __u8 blocklen_bytes; /* block size of output in bytes */ char cra_name[CRYPTO_MAX_ALG_NAME]; /* mapping to kernel crypto API */ /* kernel crypto API backend cipher name */ char backend_cra_name[CRYPTO_MAX_ALG_NAME]; }; struct drbg_state_ops { int (*update)(struct drbg_state *drbg, struct list_head *seed, int reseed); int (*generate)(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct list_head *addtl); int (*crypto_init)(struct drbg_state *drbg); int (*crypto_fini)(struct drbg_state *drbg); }; struct drbg_test_data { struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ }; struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ unsigned char *Vbuf; /* hash: static value 10.1.1.1 1b) hmac / ctr: key */ unsigned char *C; unsigned char *Cbuf; /* Number of RNG requests since last reseed -- 10.1.1.1 1c) */ size_t reseed_ctr; size_t reseed_threshold; /* some memory the DRBG can use for its operation */ unsigned char *scratchpad; unsigned char *scratchpadbuf; void *priv_data; /* Cipher handle */ struct crypto_skcipher *ctr_handle; /* CTR mode cipher handle */ struct skcipher_request *ctr_req; /* CTR mode request handle */ __u8 *outscratchpadbuf; /* CTR mode output scratchpad */ __u8 *outscratchpad; /* CTR mode aligned outbuf */ struct crypto_wait ctr_wait; /* CTR mode async wait obj */ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ bool seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ struct work_struct seed_work; /* asynchronous seeding support */ struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; struct random_ready_callback random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) { if (drbg && drbg->core) return drbg->core->statelen; return 0; } static inline __u8 drbg_blocklen(struct drbg_state *drbg) { if (drbg && drbg->core) return drbg->core->blocklen_bytes; return 0; } static inline __u8 drbg_keylen(struct drbg_state *drbg) { if (drbg && drbg->core) return (drbg->core->statelen - drbg->core->blocklen_bytes); return 0; } static inline size_t drbg_max_request_bytes(struct drbg_state *drbg) { /* SP800-90A requires the limit 2**19 bits, but we return bytes */ return (1 << 16); } static inline size_t drbg_max_addtl(struct drbg_state *drbg) { /* SP800-90A requires 2**35 bytes additional info str / pers str */ #if (__BITS_PER_LONG == 32) /* * SP800-90A allows smaller maximum numbers to be returned -- we * return SIZE_MAX - 1 to allow the verification of the enforcement * of this value in drbg_healthcheck_sanity. */ return (SIZE_MAX - 1); #else return (1UL<<35); #endif } static inline size_t drbg_max_requests(struct drbg_state *drbg) { /* SP800-90A requires 2**48 maximum requests before reseeding */ return (1<<20); } /* * This is a wrapper to the kernel crypto API function of * crypto_rng_generate() to allow the caller to provide additional data. * * @drng DRBG handle -- see crypto_rng_get_bytes * @outbuf output buffer -- see crypto_rng_get_bytes * @outlen length of output buffer -- see crypto_rng_get_bytes * @addtl_input additional information string input buffer * @addtllen length of additional information string buffer * * return * see crypto_rng_get_bytes */ static inline int crypto_drbg_get_bytes_addtl(struct crypto_rng *drng, unsigned char *outbuf, unsigned int outlen, struct drbg_string *addtl) { return crypto_rng_generate(drng, addtl->buf, addtl->len, outbuf, outlen); } /* * TEST code * * This is a wrapper to the kernel crypto API function of * crypto_rng_generate() to allow the caller to provide additional data and * allow furnishing of test_data * * @drng DRBG handle -- see crypto_rng_get_bytes * @outbuf output buffer -- see crypto_rng_get_bytes * @outlen length of output buffer -- see crypto_rng_get_bytes * @addtl_input additional information string input buffer * @addtllen length of additional information string buffer * @test_data filled test data * * return * see crypto_rng_get_bytes */ static inline int crypto_drbg_get_bytes_addtl_test(struct crypto_rng *drng, unsigned char *outbuf, unsigned int outlen, struct drbg_string *addtl, struct drbg_test_data *test_data) { crypto_rng_set_entropy(drng, test_data->testentropy->buf, test_data->testentropy->len); return crypto_rng_generate(drng, addtl->buf, addtl->len, outbuf, outlen); } /* * TEST code * * This is a wrapper to the kernel crypto API function of * crypto_rng_reset() to allow the caller to provide test_data * * @drng DRBG handle -- see crypto_rng_reset * @pers personalization string input buffer * @perslen length of additional information string buffer * @test_data filled test data * * return * see crypto_rng_reset */ static inline int crypto_drbg_reset_test(struct crypto_rng *drng, struct drbg_string *pers, struct drbg_test_data *test_data) { crypto_rng_set_entropy(drng, test_data->testentropy->buf, test_data->testentropy->len); return crypto_rng_reset(drng, pers->buf, pers->len); } /* DRBG type flags */ #define DRBG_CTR ((drbg_flag_t)1<<0) #define DRBG_HMAC ((drbg_flag_t)1<<1) #define DRBG_HASH ((drbg_flag_t)1<<2) #define DRBG_TYPE_MASK (DRBG_CTR | DRBG_HMAC | DRBG_HASH) /* DRBG strength flags */ #define DRBG_STRENGTH128 ((drbg_flag_t)1<<3) #define DRBG_STRENGTH192 ((drbg_flag_t)1<<4) #define DRBG_STRENGTH256 ((drbg_flag_t)1<<5) #define DRBG_STRENGTH_MASK (DRBG_STRENGTH128 | DRBG_STRENGTH192 | \ DRBG_STRENGTH256) enum drbg_prefixes { DRBG_PREFIX0 = 0x00, DRBG_PREFIX1, DRBG_PREFIX2, DRBG_PREFIX3 }; #endif /* _DRBG_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RATELIMIT_H #define _LINUX_RATELIMIT_H #include <linux/ratelimit_types.h> #include <linux/sched.h> #include <linux/spinlock.h> static inline void ratelimit_state_init(struct ratelimit_state *rs, int interval, int burst) { memset(rs, 0, sizeof(*rs)); raw_spin_lock_init(&rs->lock); rs->interval = interval; rs->burst = burst; } static inline void ratelimit_default_init(struct ratelimit_state *rs) { return ratelimit_state_init(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); } static inline void ratelimit_state_exit(struct ratelimit_state *rs) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) return; if (rs->missed) { pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, rs->missed); rs->missed = 0; } } static inline void ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags) { rs->flags = flags; } extern struct ratelimit_state printk_ratelimit_state; #ifdef CONFIG_PRINTK #define WARN_ON_RATELIMIT(condition, state) ({ \ bool __rtn_cond = !!(condition); \ WARN_ON(__rtn_cond && __ratelimit(state)); \ __rtn_cond; \ }) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ int rtn = !!(condition); \ \ if (unlikely(rtn && __ratelimit(&_rs))) \ WARN(rtn, format, ##__VA_ARGS__); \ \ rtn; \ }) #else #define WARN_ON_RATELIMIT(condition, state) \ WARN_ON(condition) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ int rtn = WARN(condition, format, ##__VA_ARGS__); \ rtn; \ }) #endif #endif /* _LINUX_RATELIMIT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * inet6 interface/address list definitions * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> */ #ifndef _NET_IF_INET6_H #define _NET_IF_INET6_H #include <net/snmp.h> #include <linux/ipv6.h> #include <linux/refcount.h> /* inet6_dev.if_flags */ #define IF_RA_OTHERCONF 0x80 #define IF_RA_MANAGED 0x40 #define IF_RA_RCVD 0x20 #define IF_RS_SENT 0x10 #define IF_READY 0x80000000 /* prefix flags */ #define IF_PREFIX_ONLINK 0x01 #define IF_PREFIX_AUTOCONF 0x02 enum { INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, INET6_IFADDR_STATE_POSTDAD, INET6_IFADDR_STATE_ERRDAD, INET6_IFADDR_STATE_DEAD, }; struct inet6_ifaddr { struct in6_addr addr; __u32 prefix_len; __u32 rt_priority; /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ __u32 valid_lft; __u32 prefered_lft; refcount_t refcnt; spinlock_t lock; int state; __u32 flags; __u8 dad_probes; __u8 stable_privacy_retry; __u16 scope; __u64 dad_nonce; unsigned long cstamp; /* created timestamp */ unsigned long tstamp; /* updated timestamp */ struct delayed_work dad_work; struct inet6_dev *idev; struct fib6_info *rt; struct hlist_node addr_lst; struct list_head if_list; struct list_head tmp_list; struct inet6_ifaddr *ifpub; int regen_count; bool tokenized; struct rcu_head rcu; struct in6_addr peer_addr; }; struct ip6_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct in6_addr sl_addr[]; }; #define IP6_SFLSIZE(count) (sizeof(struct ip6_sf_socklist) + \ (count) * sizeof(struct in6_addr)) #define IP6_SFBLOCK 10 /* allocate this many at once */ struct ipv6_mc_socklist { struct in6_addr addr; int ifindex; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ipv6_mc_socklist __rcu *next; rwlock_t sflock; struct ip6_sf_socklist *sflist; struct rcu_head rcu; }; struct ip6_sf_list { struct ip6_sf_list *sf_next; struct in6_addr sf_addr; unsigned long sf_count[2]; /* include/exclude counts */ unsigned char sf_gsresp; /* include in g & s response? */ unsigned char sf_oldin; /* change state */ unsigned char sf_crcount; /* retrans. left to send */ }; #define MAF_TIMER_RUNNING 0x01 #define MAF_LAST_REPORTER 0x02 #define MAF_LOADED 0x04 #define MAF_NOREPORT 0x08 #define MAF_GSQUERY 0x10 struct ifmcaddr6 { struct in6_addr mca_addr; struct inet6_dev *idev; struct ifmcaddr6 *next; struct ip6_sf_list *mca_sources; struct ip6_sf_list *mca_tomb; unsigned int mca_sfmode; unsigned char mca_crcount; unsigned long mca_sfcount[2]; struct timer_list mca_timer; unsigned int mca_flags; int mca_users; refcount_t mca_refcnt; spinlock_t mca_lock; unsigned long mca_cstamp; unsigned long mca_tstamp; }; /* Anycast stuff */ struct ipv6_ac_socklist { struct in6_addr acl_addr; int acl_ifindex; struct ipv6_ac_socklist *acl_next; }; struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; struct ifacaddr6 *aca_next; struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; unsigned long aca_cstamp; unsigned long aca_tstamp; struct rcu_head rcu; }; #define IFA_HOST IPV6_ADDR_LOOPBACK #define IFA_LINK IPV6_ADDR_LINKLOCAL #define IFA_SITE IPV6_ADDR_SITELOCAL struct ipv6_devstat { struct proc_dir_entry *proc_dir_entry; DEFINE_SNMP_STAT(struct ipstats_mib, ipv6); DEFINE_SNMP_STAT_ATOMIC(struct icmpv6_mib_device, icmpv6dev); DEFINE_SNMP_STAT_ATOMIC(struct icmpv6msg_mib_device, icmpv6msgdev); }; struct inet6_dev { struct net_device *dev; struct list_head addr_list; struct ifmcaddr6 *mc_list; struct ifmcaddr6 *mc_tomb; spinlock_t mc_lock; unsigned char mc_qrv; /* Query Robustness Variable */ unsigned char mc_gq_running; unsigned char mc_ifc_count; unsigned char mc_dad_count; unsigned long mc_v1_seen; /* Max time we stay in MLDv1 mode */ unsigned long mc_qi; /* Query Interval */ unsigned long mc_qri; /* Query Response Interval */ unsigned long mc_maxdelay; struct timer_list mc_gq_timer; /* general query timer */ struct timer_list mc_ifc_timer; /* interface change timer */ struct timer_list mc_dad_timer; /* dad complete mc timer */ struct ifacaddr6 *ac_list; rwlock_t lock; refcount_t refcnt; __u32 if_flags; int dead; u32 desync_factor; struct list_head tempaddr_list; struct in6_addr token; struct neigh_parms *nd_parms; struct ipv6_devconf cnf; struct ipv6_devstat stats; struct timer_list rs_timer; __s32 rs_interval; /* in jiffies */ __u8 rs_probes; unsigned long tstamp; /* ipv6InterfaceTable update timestamp */ struct rcu_head rcu; }; static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf) { /* * +-------+-------+-------+-------+-------+-------+ * | 33 | 33 | DST13 | DST14 | DST15 | DST16 | * +-------+-------+-------+-------+-------+-------+ */ buf[0]= 0x33; buf[1]= 0x33; memcpy(buf + 2, &addr->s6_addr32[3], sizeof(__u32)); } static inline void ipv6_arcnet_mc_map(const struct in6_addr *addr, char *buf) { buf[0] = 0x00; } static inline void ipv6_ib_mc_map(const struct in6_addr *addr, const unsigned char *broadcast, char *buf) { unsigned char scope = broadcast[5] & 0xF; buf[0] = 0; /* Reserved */ buf[1] = 0xff; /* Multicast QPN */ buf[2] = 0xff; buf[3] = 0xff; buf[4] = 0xff; buf[5] = 0x10 | scope; /* scope from broadcast address */ buf[6] = 0x60; /* IPv6 signature */ buf[7] = 0x1b; buf[8] = broadcast[8]; /* P_Key */ buf[9] = broadcast[9]; memcpy(buf + 10, addr->s6_addr + 6, 10); } static inline int ipv6_ipgre_mc_map(const struct in6_addr *addr, const unsigned char *broadcast, char *buf) { if ((broadcast[0] | broadcast[1] | broadcast[2] | broadcast[3]) != 0) { memcpy(buf, broadcast, 4); } else { /* v4mapped? */ if ((addr->s6_addr32[0] | addr->s6_addr32[1] | (addr->s6_addr32[2] ^ htonl(0x0000ffff))) != 0) return -EINVAL; memcpy(buf, &addr->s6_addr32[3], 4); } return 0; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BYTEORDER_GENERIC_H #define _LINUX_BYTEORDER_GENERIC_H /* * linux/byteorder/generic.h * Generic Byte-reordering support * * The "... p" macros, like le64_to_cpup, can be used with pointers * to unaligned data, but there will be a performance penalty on * some architectures. Use get_unaligned for unaligned data. * * Francois-Rene Rideau <fare@tunes.org> 19970707 * gathered all the good ideas from all asm-foo/byteorder.h into one file, * cleaned them up. * I hope it is compliant with non-GCC compilers. * I decided to put __BYTEORDER_HAS_U64__ in byteorder.h, * because I wasn't sure it would be ok to put it in types.h * Upgraded it to 2.1.43 * Francois-Rene Rideau <fare@tunes.org> 19971012 * Upgraded it to 2.1.57 * to please Linus T., replaced huge #ifdef's between little/big endian * by nestedly #include'd files. * Francois-Rene Rideau <fare@tunes.org> 19971205 * Made it to 2.1.71; now a facelift: * Put files under include/linux/byteorder/ * Split swab from generic support. * * TODO: * = Regular kernel maintainers could also replace all these manual * byteswap macros that remain, disseminated among drivers, * after some grep or the sources... * = Linus might want to rename all these macros and files to fit his taste, * to fit his personal naming scheme. * = it seems that a few drivers would also appreciate * nybble swapping support... * = every architecture could add their byteswap macro in asm/byteorder.h * see how some architectures already do (i386, alpha, ppc, etc) * = cpu_to_beXX and beXX_to_cpu might some day need to be well * distinguished throughout the kernel. This is not the case currently, * since little endian, big endian, and pdp endian machines needn't it. * But this might be the case for, say, a port of Linux to 20/21 bit * architectures (and F21 Linux addict around?). */ /* * The following macros are to be defined by <asm/byteorder.h>: * * Conversion of long and short int between network and host format * ntohl(__u32 x) * ntohs(__u16 x) * htonl(__u32 x) * htons(__u16 x) * It seems that some programs (which? where? or perhaps a standard? POSIX?) * might like the above to be functions, not macros (why?). * if that's true, then detect them, and take measures. * Anyway, the measure is: define only ___ntohl as a macro instead, * and in a separate file, have * unsigned long inline ntohl(x){return ___ntohl(x);} * * The same for constant arguments * __constant_ntohl(__u32 x) * __constant_ntohs(__u16 x) * __constant_htonl(__u32 x) * __constant_htons(__u16 x) * * Conversion of XX-bit integers (16- 32- or 64-) * between native CPU format and little/big endian format * 64-bit stuff only defined for proper architectures * cpu_to_[bl]eXX(__uXX x) * [bl]eXX_to_cpu(__uXX x) * * The same, but takes a pointer to the value to convert * cpu_to_[bl]eXXp(__uXX x) * [bl]eXX_to_cpup(__uXX x) * * The same, but change in situ * cpu_to_[bl]eXXs(__uXX x) * [bl]eXX_to_cpus(__uXX x) * * See asm-foo/byteorder.h for examples of how to provide * architecture-optimized versions * */ #define cpu_to_le64 __cpu_to_le64 #define le64_to_cpu __le64_to_cpu #define cpu_to_le32 __cpu_to_le32 #define le32_to_cpu __le32_to_cpu #define cpu_to_le16 __cpu_to_le16 #define le16_to_cpu __le16_to_cpu #define cpu_to_be64 __cpu_to_be64 #define be64_to_cpu __be64_to_cpu #define cpu_to_be32 __cpu_to_be32 #define be32_to_cpu __be32_to_cpu #define cpu_to_be16 __cpu_to_be16 #define be16_to_cpu __be16_to_cpu #define cpu_to_le64p __cpu_to_le64p #define le64_to_cpup __le64_to_cpup #define cpu_to_le32p __cpu_to_le32p #define le32_to_cpup __le32_to_cpup #define cpu_to_le16p __cpu_to_le16p #define le16_to_cpup __le16_to_cpup #define cpu_to_be64p __cpu_to_be64p #define be64_to_cpup __be64_to_cpup #define cpu_to_be32p __cpu_to_be32p #define be32_to_cpup __be32_to_cpup #define cpu_to_be16p __cpu_to_be16p #define be16_to_cpup __be16_to_cpup #define cpu_to_le64s __cpu_to_le64s #define le64_to_cpus __le64_to_cpus #define cpu_to_le32s __cpu_to_le32s #define le32_to_cpus __le32_to_cpus #define cpu_to_le16s __cpu_to_le16s #define le16_to_cpus __le16_to_cpus #define cpu_to_be64s __cpu_to_be64s #define be64_to_cpus __be64_to_cpus #define cpu_to_be32s __cpu_to_be32s #define be32_to_cpus __be32_to_cpus #define cpu_to_be16s __cpu_to_be16s #define be16_to_cpus __be16_to_cpus /* * They have to be macros in order to do the constant folding * correctly - if the argument passed into a inline function * it is no longer constant according to gcc.. */ #undef ntohl #undef ntohs #undef htonl #undef htons #define ___htonl(x) __cpu_to_be32(x) #define ___htons(x) __cpu_to_be16(x) #define ___ntohl(x) __be32_to_cpu(x) #define ___ntohs(x) __be16_to_cpu(x) #define htonl(x) ___htonl(x) #define ntohl(x) ___ntohl(x) #define htons(x) ___htons(x) #define ntohs(x) ___ntohs(x) static inline void le16_add_cpu(__le16 *var, u16 val) { *var = cpu_to_le16(le16_to_cpu(*var) + val); } static inline void le32_add_cpu(__le32 *var, u32 val) { *var = cpu_to_le32(le32_to_cpu(*var) + val); } static inline void le64_add_cpu(__le64 *var, u64 val) { *var = cpu_to_le64(le64_to_cpu(*var) + val); } /* XXX: this stuff can be optimized */ static inline void le32_to_cpu_array(u32 *buf, unsigned int words) { while (words--) { __le32_to_cpus(buf); buf++; } } static inline void cpu_to_le32_array(u32 *buf, unsigned int words) { while (words--) { __cpu_to_le32s(buf); buf++; } } static inline void be16_add_cpu(__be16 *var, u16 val) { *var = cpu_to_be16(be16_to_cpu(*var) + val); } static inline void be32_add_cpu(__be32 *var, u32 val) { *var = cpu_to_be32(be32_to_cpu(*var) + val); } static inline void be64_add_cpu(__be64 *var, u64 val) { *var = cpu_to_be64(be64_to_cpu(*var) + val); } static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) { int i; for (i = 0; i < len; i++) dst[i] = cpu_to_be32(src[i]); } static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len) { int i; for (i = 0; i < len; i++) dst[i] = be32_to_cpu(src[i]); } #endif /* _LINUX_BYTEORDER_GENERIC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 /* SPDX-License-Identifier: GPL-2.0 */ /* linux/include/linux/clockchips.h * * This file contains the structure definitions for clockchips. * * If you are not a clockchip, or the time of day code, you should * not be including this file! */ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H #ifdef CONFIG_GENERIC_CLOCKEVENTS # include <linux/clocksource.h> # include <linux/cpumask.h> # include <linux/ktime.h> # include <linux/notifier.h> struct clock_event_device; struct module; /* * Possible states of a clock event device. * * DETACHED: Device is not used by clockevents core. Initial state or can be * reached from SHUTDOWN. * SHUTDOWN: Device is powered-off. Can be reached from PERIODIC or ONESHOT. * PERIODIC: Device is programmed to generate events periodically. Can be * reached from DETACHED or SHUTDOWN. * ONESHOT: Device is programmed to generate event only once. Can be reached * from DETACHED or SHUTDOWN. * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily * stopped. */ enum clock_event_state { CLOCK_EVT_STATE_DETACHED, CLOCK_EVT_STATE_SHUTDOWN, CLOCK_EVT_STATE_PERIODIC, CLOCK_EVT_STATE_ONESHOT, CLOCK_EVT_STATE_ONESHOT_STOPPED, }; /* * Clock event features */ # define CLOCK_EVT_FEAT_PERIODIC 0x000001 # define CLOCK_EVT_FEAT_ONESHOT 0x000002 # define CLOCK_EVT_FEAT_KTIME 0x000004 /* * x86(64) specific (mis)features: * * - Clockevent source stops in C3 State and needs broadcast support. * - Local APIC timer is used as a dummy device. */ # define CLOCK_EVT_FEAT_C3STOP 0x000008 # define CLOCK_EVT_FEAT_DUMMY 0x000010 /* * Core shall set the interrupt affinity dynamically in broadcast mode */ # define CLOCK_EVT_FEAT_DYNIRQ 0x000020 # define CLOCK_EVT_FEAT_PERCPU 0x000040 /* * Clockevent device is based on a hrtimer for broadcast */ # define CLOCK_EVT_FEAT_HRTIMER 0x000080 /** * struct clock_event_device - clock event device descriptor * @event_handler: Assigned by the framework to be called by the low * level handler of the event source * @set_next_event: set next event function using a clocksource delta * @set_next_ktime: set next event function using a direct ktime value * @next_event: local storage for the next event in oneshot mode * @max_delta_ns: maximum delta value in ns * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries * @set_state_periodic: switch state to periodic * @set_state_oneshot: switch state to oneshot * @set_state_oneshot_stopped: switch state to oneshot_stopped * @set_state_shutdown: switch state to shutdown * @tick_resume: resume clkevt device * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration * @name: ptr to clock event name * @rating: variable to rate clock event devices * @irq: IRQ number (only for non CPU local devices) * @bound_on: Bound on CPU * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code * @owner: module reference */ struct clock_event_device { void (*event_handler)(struct clock_event_device *); int (*set_next_event)(unsigned long evt, struct clock_event_device *); int (*set_next_ktime)(ktime_t expires, struct clock_event_device *); ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; u32 mult; u32 shift; enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_oneshot_stopped)(struct clock_event_device *); int (*set_state_shutdown)(struct clock_event_device *); int (*tick_resume)(struct clock_event_device *); void (*broadcast)(const struct cpumask *mask); void (*suspend)(struct clock_event_device *); void (*resume)(struct clock_event_device *); unsigned long min_delta_ticks; unsigned long max_delta_ticks; const char *name; int rating; int irq; int bound_on; const struct cpumask *cpumask; struct list_head list; struct module *owner; } ____cacheline_aligned; /* Helpers to verify state of a clockevent device */ static inline bool clockevent_state_detached(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED; } static inline bool clockevent_state_shutdown(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN; } static inline bool clockevent_state_periodic(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC; } static inline bool clockevent_state_oneshot(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT; } static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED; } /* * Calculate a multiplication factor for scaled math, which is used to convert * nanoseconds based values to clock ticks: * * clock_ticks = (nanoseconds * factor) >> shift. * * div_sc is the rearranged equation to calculate a factor from a given clock * ticks / nanoseconds ratio: * * factor = (clock_ticks << shift) / nanoseconds */ static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, int shift) { u64 tmp = ((u64)ticks) << shift; do_div(tmp, nsec); return (unsigned long) tmp; } /* Clock event layer functions */ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); static inline void clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec) { return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec); } extern void clockevents_suspend(void); extern void clockevents_resume(void); # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST # ifdef CONFIG_ARCH_HAS_TICK_BROADCAST extern void tick_broadcast(const struct cpumask *mask); # else # define tick_broadcast NULL # endif extern int tick_receive_broadcast(void); # endif # if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); # else static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } # endif #else /* !CONFIG_GENERIC_CLOCKEVENTS: */ static inline void clockevents_suspend(void) { } static inline void clockevents_resume(void) { } static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #endif /* _LINUX_CLOCKCHIPS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PROCFS_FD_H__ #define __PROCFS_FD_H__ #include <linux/fs.h> extern const struct file_operations proc_fd_operations; extern const struct inode_operations proc_fd_inode_operations; extern const struct file_operations proc_fdinfo_operations; extern const struct inode_operations proc_fdinfo_inode_operations; extern int proc_fd_permission(struct inode *inode, int mask); static inline unsigned int proc_fd(struct inode *inode) { return PROC_I(inode)->fd; } #endif /* __PROCFS_FD_H__ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _FIB_LOOKUP_H #define _FIB_LOOKUP_H #include <linux/types.h> #include <linux/list.h> #include <net/ip_fib.h> #include <net/nexthop.h> struct fib_alias { struct hlist_node fa_list; struct fib_info *fa_info; u8 fa_tos; u8 fa_type; u8 fa_state; u8 fa_slen; u32 tb_id; s16 fa_default; u8 offload:1, trap:1, unused:6; struct rcu_head rcu; }; #define FA_S_ACCESSED 0x01 /* Dont write on fa_state unless needed, to keep it shared on all cpus */ static inline void fib_alias_accessed(struct fib_alias *fa) { if (!(fa->fa_state & FA_S_ACCESSED)) fa->fa_state |= FA_S_ACCESSED; } /* Exported by fib_semantics.c */ void fib_release_info(struct fib_info *); struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack); int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack); bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, struct fib_rt_info *fri, unsigned int flags); void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) { /* we used to play games with refcounts, but we now use RCU */ res->fi = fi; res->nhc = fib_info_nhc(fi, 0); } struct fib_prop { int error; u8 scope; }; extern const struct fib_prop fib_props[RTN_MAX + 1]; #endif /* _FIB_LOOKUP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H #include <linux/types.h> struct kmem_cache; struct page; struct vm_struct; struct task_struct; #ifdef CONFIG_KASAN #include <linux/pgtable.h> #include <asm/kasan.h> /* kasan_data struct is used in KUnit tests for KASAN expected failures */ struct kunit_kasan_expectation { bool report_expected; bool report_found; }; extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD]; extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } /* Enable reporting bugs after kasan_disable_current() */ extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ extern void kasan_disable_current(void); void kasan_unpoison_shadow(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); void kasan_poison_object_data(struct kmem_cache *cache, void *object); void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, const void *object); void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(void *ptr, unsigned long ip); void kasan_poison_kfree(void *ptr, unsigned long ip); void * __must_check kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); void * __must_check kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void * __must_check kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); struct kasan_cache { int alloc_meta_offset; int free_meta_offset; }; /* * These functions provide a special case to support backing module * allocations with real shadow memory. With KASAN vmalloc, the special * case is unnecessary, as the work is handled in the generic case. */ #ifndef CONFIG_KASAN_VMALLOC int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); #else static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} #endif int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); size_t __ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { kasan_unpoison_shadow(ptr, __ksize(ptr)); } size_t kasan_metadata_size(struct kmem_cache *cache); bool kasan_save_enable_multi_shot(void); void kasan_restore_multi_shot(bool enabled); #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} static inline void kasan_poison_object_data(struct kmem_cache *cache, void *object) {} static inline void *kasan_init_slab_obj(struct kmem_cache *cache, const void *object) { return (void *)object; } static inline void *kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) { return ptr; } static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void *kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) { return (void *)object; } static inline void *kasan_krealloc(const void *object, size_t new_size, gfp_t flags) { return (void *)object; } static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) { return object; } static inline bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip) { return false; } static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} static inline int kasan_add_zero_shadow(void *start, unsigned long size) { return 0; } static inline void kasan_remove_zero_shadow(void *start, unsigned long size) {} static inline void kasan_unpoison_slab(const void *ptr) { } static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ #ifdef CONFIG_KASAN_GENERIC #define KASAN_SHADOW_INIT 0 void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); #else /* CONFIG_KASAN_GENERIC */ static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_record_aux_stack(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ #ifdef CONFIG_KASAN_SW_TAGS #define KASAN_SHADOW_INIT 0xFF void kasan_init_tags(void); void *kasan_reset_tag(const void *addr); bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); #else /* CONFIG_KASAN_SW_TAGS */ static inline void kasan_init_tags(void) { } static inline void *kasan_reset_tag(const void *addr) { return (void *)addr; } #endif /* CONFIG_KASAN_SW_TAGS */ #ifdef CONFIG_KASAN_VMALLOC int kasan_populate_vmalloc(unsigned long addr, unsigned long size); void kasan_poison_vmalloc(const void *start, unsigned long size); void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); #else static inline int kasan_populate_vmalloc(unsigned long start, unsigned long size) { return 0; } static inline void kasan_poison_vmalloc(const void *start, unsigned long size) { } static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) { } static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end) {} #endif #ifdef CONFIG_KASAN_INLINE void kasan_non_canonical_hook(unsigned long addr); #else /* CONFIG_KASAN_INLINE */ static inline void kasan_non_canonical_hook(unsigned long addr) { } #endif /* CONFIG_KASAN_INLINE */ #endif /* LINUX_KASAN_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* audit.h -- Auditing support * * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. * All Rights Reserved. * * Written by Rickard E. (Rik) Faith <faith@redhat.com> */ #ifndef _LINUX_AUDIT_H_ #define _LINUX_AUDIT_H_ #include <linux/sched.h> #include <linux/ptrace.h> #include <uapi/linux/audit.h> #include <uapi/linux/netfilter/nf_tables.h> #define AUDIT_INO_UNSET ((unsigned long)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) struct audit_sig_info { uid_t uid; pid_t pid; char ctx[]; }; struct audit_buffer; struct audit_context; struct inode; struct netlink_skb_parms; struct path; struct linux_binprm; struct mq_attr; struct mqstat; struct audit_watch; struct audit_tree; struct sk_buff; struct audit_krule { u32 pflags; u32 flags; u32 listnr; u32 action; u32 mask[AUDIT_BITMASK_SIZE]; u32 buflen; /* for data alloc on list rules */ u32 field_count; char *filterkey; /* ties events to rules */ struct audit_field *fields; struct audit_field *arch_f; /* quick access to arch field */ struct audit_field *inode_f; /* quick access to an inode field */ struct audit_watch *watch; /* associated watch */ struct audit_tree *tree; /* associated watched tree */ struct audit_fsnotify_mark *exe; struct list_head rlist; /* entry in audit_{watch,tree}.rules list */ struct list_head list; /* for AUDIT_LIST* purposes only */ u64 prio; }; /* Flag to indicate legacy AUDIT_LOGINUID unset usage */ #define AUDIT_LOGINUID_LEGACY 0x1 struct audit_field { u32 type; union { u32 val; kuid_t uid; kgid_t gid; struct { char *lsm_str; void *lsm_rule; }; }; u32 op; }; enum audit_ntp_type { AUDIT_NTP_OFFSET, AUDIT_NTP_FREQ, AUDIT_NTP_STATUS, AUDIT_NTP_TAI, AUDIT_NTP_TICK, AUDIT_NTP_ADJUST, AUDIT_NTP_NVALS /* count */ }; #ifdef CONFIG_AUDITSYSCALL struct audit_ntp_val { long long oldval, newval; }; struct audit_ntp_data { struct audit_ntp_val vals[AUDIT_NTP_NVALS]; }; #else struct audit_ntp_data {}; #endif enum audit_nfcfgop { AUDIT_XT_OP_REGISTER, AUDIT_XT_OP_REPLACE, AUDIT_XT_OP_UNREGISTER, AUDIT_NFT_OP_TABLE_REGISTER, AUDIT_NFT_OP_TABLE_UNREGISTER, AUDIT_NFT_OP_CHAIN_REGISTER, AUDIT_NFT_OP_CHAIN_UNREGISTER, AUDIT_NFT_OP_RULE_REGISTER, AUDIT_NFT_OP_RULE_UNREGISTER, AUDIT_NFT_OP_SET_REGISTER, AUDIT_NFT_OP_SET_UNREGISTER, AUDIT_NFT_OP_SETELEM_REGISTER, AUDIT_NFT_OP_SETELEM_UNREGISTER, AUDIT_NFT_OP_GEN_REGISTER, AUDIT_NFT_OP_OBJ_REGISTER, AUDIT_NFT_OP_OBJ_UNREGISTER, AUDIT_NFT_OP_OBJ_RESET, AUDIT_NFT_OP_FLOWTABLE_REGISTER, AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, AUDIT_NFT_OP_INVALID, }; extern int is_audit_feature_set(int which); extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); /* only for compat system calls */ extern unsigned compat_write_class[]; extern unsigned compat_read_class[]; extern unsigned compat_dir_class[]; extern unsigned compat_chattr_class[]; extern unsigned compat_signal_class[]; extern int audit_classify_compat_syscall(int abi, unsigned syscall); /* audit_names->type values */ #define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */ #define AUDIT_TYPE_NORMAL 1 /* a "normal" audit record */ #define AUDIT_TYPE_PARENT 2 /* a parent audit record */ #define AUDIT_TYPE_CHILD_DELETE 3 /* a child being deleted */ #define AUDIT_TYPE_CHILD_CREATE 4 /* a child being created */ /* maximized args number that audit_socketcall can process */ #define AUDITSC_ARGS 6 /* bit values for ->signal->audit_tty */ #define AUDIT_TTY_ENABLE BIT(0) #define AUDIT_TTY_LOG_PASSWD BIT(1) struct filename; #define AUDIT_OFF 0 #define AUDIT_ON 1 #define AUDIT_LOCKED 2 #ifdef CONFIG_AUDIT /* These are defined in audit.c */ /* Public API */ extern __printf(4, 5) void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, const char *fmt, ...); extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type); extern __printf(2, 3) void audit_log_format(struct audit_buffer *ab, const char *fmt, ...); extern void audit_log_end(struct audit_buffer *ab); extern bool audit_string_contains_control(const char *string, size_t len); extern void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len); extern void audit_log_n_string(struct audit_buffer *ab, const char *buf, size_t n); extern void audit_log_n_untrustedstring(struct audit_buffer *ab, const char *string, size_t n); extern void audit_log_untrustedstring(struct audit_buffer *ab, const char *string); extern void audit_log_d_path(struct audit_buffer *ab, const char *prefix, const struct path *path); extern void audit_log_key(struct audit_buffer *ab, char *key); extern void audit_log_path_denied(int type, const char *operation); extern void audit_log_lost(const char *message); extern int audit_log_task_context(struct audit_buffer *ab); extern void audit_log_task_info(struct audit_buffer *ab); extern int audit_update_lsm_rules(void); /* Private API (for audit.c only) */ extern int audit_rule_change(int type, int seq, void *data, size_t datasz); extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); extern int audit_set_loginuid(kuid_t loginuid); static inline kuid_t audit_get_loginuid(struct task_struct *tsk) { return tsk->loginuid; } static inline unsigned int audit_get_sessionid(struct task_struct *tsk) { return tsk->sessionid; } extern u32 audit_enabled; extern int audit_signal_info(int sig, struct task_struct *t); #else /* CONFIG_AUDIT */ static inline __printf(4, 5) void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, const char *fmt, ...) { } static inline struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { return NULL; } static inline __printf(2, 3) void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) { } static inline void audit_log_end(struct audit_buffer *ab) { } static inline void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len) { } static inline void audit_log_n_string(struct audit_buffer *ab, const char *buf, size_t n) { } static inline void audit_log_n_untrustedstring(struct audit_buffer *ab, const char *string, size_t n) { } static inline void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) { } static inline void audit_log_d_path(struct audit_buffer *ab, const char *prefix, const struct path *path) { } static inline void audit_log_key(struct audit_buffer *ab, char *key) { } static inline void audit_log_path_denied(int type, const char *operation) { } static inline int audit_log_task_context(struct audit_buffer *ab) { return 0; } static inline void audit_log_task_info(struct audit_buffer *ab) { } static inline kuid_t audit_get_loginuid(struct task_struct *tsk) { return INVALID_UID; } static inline unsigned int audit_get_sessionid(struct task_struct *tsk) { return AUDIT_SID_UNSET; } #define audit_enabled AUDIT_OFF static inline int audit_signal_info(int sig, struct task_struct *t) { return 0; } #endif /* CONFIG_AUDIT */ #ifdef CONFIG_AUDIT_COMPAT_GENERIC #define audit_is_compat(arch) (!((arch) & __AUDIT_ARCH_64BIT)) #else #define audit_is_compat(arch) false #endif #define AUDIT_INODE_PARENT 1 /* dentry represents the parent */ #define AUDIT_INODE_HIDDEN 2 /* audit record should be hidden */ #define AUDIT_INODE_NOEVAL 4 /* audit record incomplete */ #ifdef CONFIG_AUDITSYSCALL #include <asm/syscall.h> /* for syscall_get_arch() */ /* These are defined in auditsc.c */ /* Public API */ extern int audit_alloc(struct task_struct *task); extern void __audit_free(struct task_struct *task); extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3); extern void __audit_syscall_exit(int ret_success, long ret_value); extern struct filename *__audit_reusename(const __user char *uptr); extern void __audit_getname(struct filename *name); extern void __audit_getcwd(void); extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); extern void __audit_file(const struct file *); extern void __audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type); extern void audit_seccomp(unsigned long syscall, long signr, int code); extern void audit_seccomp_actions_logged(const char *names, const char *old_names, int res); extern void __audit_ptrace(struct task_struct *t); static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx) { task->audit_context = ctx; } static inline struct audit_context *audit_context(void) { return current->audit_context; } static inline bool audit_dummy_context(void) { void *p = audit_context(); return !p || *(int *)p; } static inline void audit_free(struct task_struct *task) { if (unlikely(task->audit_context)) __audit_free(task); } static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) { if (unlikely(audit_context())) __audit_syscall_entry(major, a0, a1, a2, a3); } static inline void audit_syscall_exit(void *pt_regs) { if (unlikely(audit_context())) { int success = is_syscall_success(pt_regs); long return_code = regs_return_value(pt_regs); __audit_syscall_exit(success, return_code); } } static inline struct filename *audit_reusename(const __user char *name) { if (unlikely(!audit_dummy_context())) return __audit_reusename(name); return NULL; } static inline void audit_getname(struct filename *name) { if (unlikely(!audit_dummy_context())) __audit_getname(name); } static inline void audit_getcwd(void) { if (unlikely(audit_context())) __audit_getcwd(); } static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) { if (unlikely(!audit_dummy_context())) __audit_inode(name, dentry, aflags); } static inline void audit_file(struct file *file) { if (unlikely(!audit_dummy_context())) __audit_file(file); } static inline void audit_inode_parent_hidden(struct filename *name, const struct dentry *dentry) { if (unlikely(!audit_dummy_context())) __audit_inode(name, dentry, AUDIT_INODE_PARENT | AUDIT_INODE_HIDDEN); } static inline void audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type) { if (unlikely(!audit_dummy_context())) __audit_inode_child(parent, dentry, type); } void audit_core_dumps(long signr); static inline void audit_ptrace(struct task_struct *t) { if (unlikely(!audit_dummy_context())) __audit_ptrace(t); } /* Private API (for audit.c only) */ extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); extern void __audit_bprm(struct linux_binprm *bprm); extern int __audit_socketcall(int nargs, unsigned long *args); extern int __audit_sockaddr(int len, void *addr); extern void __audit_fd_pair(int fd1, int fd2); extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr); extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout); extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification); extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old); extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); extern void __audit_log_kern_module(char *name); extern void __audit_fanotify(unsigned int response); extern void __audit_tk_injoffset(struct timespec64 offset); extern void __audit_ntp_log(const struct audit_ntp_data *ad); extern void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, enum audit_nfcfgop op, gfp_t gfp); static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { if (unlikely(!audit_dummy_context())) __audit_ipc_obj(ipcp); } static inline void audit_fd_pair(int fd1, int fd2) { if (unlikely(!audit_dummy_context())) __audit_fd_pair(fd1, fd2); } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { if (unlikely(!audit_dummy_context())) __audit_ipc_set_perm(qbytes, uid, gid, mode); } static inline void audit_bprm(struct linux_binprm *bprm) { if (unlikely(!audit_dummy_context())) __audit_bprm(bprm); } static inline int audit_socketcall(int nargs, unsigned long *args) { if (unlikely(!audit_dummy_context())) return __audit_socketcall(nargs, args); return 0; } static inline int audit_socketcall_compat(int nargs, u32 *args) { unsigned long a[AUDITSC_ARGS]; int i; if (audit_dummy_context()) return 0; for (i = 0; i < nargs; i++) a[i] = (unsigned long)args[i]; return __audit_socketcall(nargs, a); } static inline int audit_sockaddr(int len, void *addr) { if (unlikely(!audit_dummy_context())) return __audit_sockaddr(len, addr); return 0; } static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) { if (unlikely(!audit_dummy_context())) __audit_mq_open(oflag, mode, attr); } static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout) { if (unlikely(!audit_dummy_context())) __audit_mq_sendrecv(mqdes, msg_len, msg_prio, abs_timeout); } static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification) { if (unlikely(!audit_dummy_context())) __audit_mq_notify(mqdes, notification); } static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) { if (unlikely(!audit_dummy_context())) __audit_mq_getsetattr(mqdes, mqstat); } static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old) { if (unlikely(!audit_dummy_context())) return __audit_log_bprm_fcaps(bprm, new, old); return 0; } static inline void audit_log_capset(const struct cred *new, const struct cred *old) { if (unlikely(!audit_dummy_context())) __audit_log_capset(new, old); } static inline void audit_mmap_fd(int fd, int flags) { if (unlikely(!audit_dummy_context())) __audit_mmap_fd(fd, flags); } static inline void audit_log_kern_module(char *name) { if (!audit_dummy_context()) __audit_log_kern_module(name); } static inline void audit_fanotify(unsigned int response) { if (!audit_dummy_context()) __audit_fanotify(response); } static inline void audit_tk_injoffset(struct timespec64 offset) { /* ignore no-op events */ if (offset.tv_sec == 0 && offset.tv_nsec == 0) return; if (!audit_dummy_context()) __audit_tk_injoffset(offset); } static inline void audit_ntp_init(struct audit_ntp_data *ad) { memset(ad, 0, sizeof(*ad)); } static inline void audit_ntp_set_old(struct audit_ntp_data *ad, enum audit_ntp_type type, long long val) { ad->vals[type].oldval = val; } static inline void audit_ntp_set_new(struct audit_ntp_data *ad, enum audit_ntp_type type, long long val) { ad->vals[type].newval = val; } static inline void audit_ntp_log(const struct audit_ntp_data *ad) { if (!audit_dummy_context()) __audit_ntp_log(ad); } static inline void audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, enum audit_nfcfgop op, gfp_t gfp) { if (audit_enabled) __audit_log_nfcfg(name, af, nentries, op, gfp); } extern int audit_n_rules; extern int audit_signals; #else /* CONFIG_AUDITSYSCALL */ static inline int audit_alloc(struct task_struct *task) { return 0; } static inline void audit_free(struct task_struct *task) { } static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) { } static inline void audit_syscall_exit(void *pt_regs) { } static inline bool audit_dummy_context(void) { return true; } static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx) { } static inline struct audit_context *audit_context(void) { return NULL; } static inline struct filename *audit_reusename(const __user char *name) { return NULL; } static inline void audit_getname(struct filename *name) { } static inline void audit_getcwd(void) { } static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) { } static inline void audit_file(struct file *file) { } static inline void audit_inode_parent_hidden(struct filename *name, const struct dentry *dentry) { } static inline void audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type) { } static inline void audit_core_dumps(long signr) { } static inline void audit_seccomp(unsigned long syscall, long signr, int code) { } static inline void audit_seccomp_actions_logged(const char *names, const char *old_names, int res) { } static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { } static inline void audit_bprm(struct linux_binprm *bprm) { } static inline int audit_socketcall(int nargs, unsigned long *args) { return 0; } static inline int audit_socketcall_compat(int nargs, u32 *args) { return 0; } static inline void audit_fd_pair(int fd1, int fd2) { } static inline int audit_sockaddr(int len, void *addr) { return 0; } static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) { } static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout) { } static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification) { } static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) { } static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old) { return 0; } static inline void audit_log_capset(const struct cred *new, const struct cred *old) { } static inline void audit_mmap_fd(int fd, int flags) { } static inline void audit_log_kern_module(char *name) { } static inline void audit_fanotify(unsigned int response) { } static inline void audit_tk_injoffset(struct timespec64 offset) { } static inline void audit_ntp_init(struct audit_ntp_data *ad) { } static inline void audit_ntp_set_old(struct audit_ntp_data *ad, enum audit_ntp_type type, long long val) { } static inline void audit_ntp_set_new(struct audit_ntp_data *ad, enum audit_ntp_type type, long long val) { } static inline void audit_ntp_log(const struct audit_ntp_data *ad) { } static inline void audit_ptrace(struct task_struct *t) { } static inline void audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, enum audit_nfcfgop op, gfp_t gfp) { } #define audit_n_rules 0 #define audit_signals 0 #endif /* CONFIG_AUDITSYSCALL */ static inline bool audit_loginuid_set(struct task_struct *tsk) { return uid_valid(audit_get_loginuid(tsk)); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/nmi.h */ #ifndef LINUX_NMI_H #define LINUX_NMI_H #include <linux/sched.h> #include <asm/irq.h> #if defined(CONFIG_HAVE_NMI_WATCHDOG) #include <asm/nmi.h> #endif #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); bool is_hardlockup(void); extern int watchdog_user_enabled; extern int nmi_watchdog_user_enabled; extern int soft_watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; #else #define sysctl_softlockup_all_cpu_backtrace 0 #define sysctl_hardlockup_all_cpu_backtrace 0 #endif /* !CONFIG_SMP */ #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; extern int lockup_detector_online_cpu(unsigned int cpu); extern int lockup_detector_offline_cpu(unsigned int cpu); #else /* CONFIG_SOFTLOCKUP_DETECTOR */ static inline void touch_softlockup_watchdog_sched(void) { } static inline void touch_softlockup_watchdog(void) { } static inline void touch_softlockup_watchdog_sync(void) { } static inline void touch_all_softlockup_watchdogs(void) { } #define lockup_detector_online_cpu NULL #define lockup_detector_offline_cpu NULL #endif /* CONFIG_SOFTLOCKUP_DETECTOR */ #ifdef CONFIG_DETECT_HUNG_TASK void reset_hung_task_detector(void); #else static inline void reset_hung_task_detector(void) { } #endif /* * The run state of the lockup detectors is controlled by the content of the * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. * * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and * 'soft_watchdog_user_enabled' are variables that are only used as an * 'interface' between the parameters in /proc/sys/kernel and the internal * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is * handled differently because its value is not boolean, and the lockup * detectors are 'suspended' while 'watchdog_thresh' is equal zero. */ #define NMI_WATCHDOG_ENABLED_BIT 0 #define SOFT_WATCHDOG_ENABLED_BIT 1 #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) #if defined(CONFIG_HARDLOCKUP_DETECTOR) extern void hardlockup_detector_disable(void); extern unsigned int hardlockup_panic; #else static inline void hardlockup_detector_disable(void) {} #endif #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) # define NMI_WATCHDOG_SYSCTL_PERM 0644 #else # define NMI_WATCHDOG_SYSCTL_PERM 0444 #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_disable(void) { } static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } # if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } static inline void arch_touch_nmi_watchdog(void) {} # else static inline int hardlockup_detector_perf_init(void) { return 0; } # endif #endif void watchdog_nmi_stop(void); void watchdog_nmi_start(void); int watchdog_nmi_probe(void); int watchdog_nmi_enable(unsigned int cpu); void watchdog_nmi_disable(unsigned int cpu); /** * touch_nmi_watchdog - restart NMI watchdog timeout. * * If the architecture supports the NMI watchdog, touch_nmi_watchdog() * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ static inline void touch_nmi_watchdog(void) { arch_touch_nmi_watchdog(); touch_softlockup_watchdog(); } /* * Create trigger_all_cpu_backtrace() out of the arch-provided * base function. Return whether such support was available, * to allow calling code to fall back to some other mechanism: */ #ifdef arch_trigger_cpumask_backtrace static inline bool trigger_all_cpu_backtrace(void) { arch_trigger_cpumask_backtrace(cpu_online_mask, false); return true; } static inline bool trigger_allbutself_cpu_backtrace(void) { arch_trigger_cpumask_backtrace(cpu_online_mask, true); return true; } static inline bool trigger_cpumask_backtrace(struct cpumask *mask) { arch_trigger_cpumask_backtrace(mask, false); return true; } static inline bool trigger_single_cpu_backtrace(int cpu) { arch_trigger_cpumask_backtrace(cpumask_of(cpu), false); return true; } /* generic implementation */ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self, void (*raise)(cpumask_t *mask)); bool nmi_cpu_backtrace(struct pt_regs *regs); #else static inline bool trigger_all_cpu_backtrace(void) { return false; } static inline bool trigger_allbutself_cpu_backtrace(void) { return false; } static inline bool trigger_cpumask_backtrace(struct cpumask *mask) { return false; } static inline bool trigger_single_cpu_backtrace(int cpu) { return false; } #endif #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh); #endif #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ defined(CONFIG_HARDLOCKUP_DETECTOR) void watchdog_update_hrtimer_threshold(u64 period); #else static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif struct ctl_table; int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *); int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include <asm/nmi.h> #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM io_uring #if !defined(_TRACE_IO_URING_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IO_URING_H #include <linux/tracepoint.h> struct io_wq_work; /** * io_uring_create - called after a new io_uring context was prepared * * @fd: corresponding file descriptor * @ctx: pointer to a ring context structure * @sq_entries: actual SQ size * @cq_entries: actual CQ size * @flags: SQ ring flags, provided to io_uring_setup(2) * * Allows to trace io_uring creation and provide pointer to a context, that can * be used later to find correlated events. */ TRACE_EVENT(io_uring_create, TP_PROTO(int fd, void *ctx, u32 sq_entries, u32 cq_entries, u32 flags), TP_ARGS(fd, ctx, sq_entries, cq_entries, flags), TP_STRUCT__entry ( __field( int, fd ) __field( void *, ctx ) __field( u32, sq_entries ) __field( u32, cq_entries ) __field( u32, flags ) ), TP_fast_assign( __entry->fd = fd; __entry->ctx = ctx; __entry->sq_entries = sq_entries; __entry->cq_entries = cq_entries; __entry->flags = flags; ), TP_printk("ring %p, fd %d sq size %d, cq size %d, flags %d", __entry->ctx, __entry->fd, __entry->sq_entries, __entry->cq_entries, __entry->flags) ); /** * io_uring_register - called after a buffer/file/eventfd was succesfully * registered for a ring * * @ctx: pointer to a ring context structure * @opcode: describes which operation to perform * @nr_user_files: number of registered files * @nr_user_bufs: number of registered buffers * @cq_ev_fd: whether eventfs registered or not * @ret: return code * * Allows to trace fixed files/buffers/eventfds, that could be registered to * avoid an overhead of getting references to them for every operation. This * event, together with io_uring_file_get, can provide a full picture of how * much overhead one can reduce via fixing. */ TRACE_EVENT(io_uring_register, TP_PROTO(void *ctx, unsigned opcode, unsigned nr_files, unsigned nr_bufs, bool eventfd, long ret), TP_ARGS(ctx, opcode, nr_files, nr_bufs, eventfd, ret), TP_STRUCT__entry ( __field( void *, ctx ) __field( unsigned, opcode ) __field( unsigned, nr_files ) __field( unsigned, nr_bufs ) __field( bool, eventfd ) __field( long, ret ) ), TP_fast_assign( __entry->ctx = ctx; __entry->opcode = opcode; __entry->nr_files = nr_files; __entry->nr_bufs = nr_bufs; __entry->eventfd = eventfd; __entry->ret = ret; ), TP_printk("ring %p, opcode %d, nr_user_files %d, nr_user_bufs %d, " "eventfd %d, ret %ld", __entry->ctx, __entry->opcode, __entry->nr_files, __entry->nr_bufs, __entry->eventfd, __entry->ret) ); /** * io_uring_file_get - called before getting references to an SQE file * * @ctx: pointer to a ring context structure * @fd: SQE file descriptor * * Allows to trace out how often an SQE file reference is obtained, which can * help figuring out if it makes sense to use fixed files, or check that fixed * files are used correctly. */ TRACE_EVENT(io_uring_file_get, TP_PROTO(void *ctx, int fd), TP_ARGS(ctx, fd), TP_STRUCT__entry ( __field( void *, ctx ) __field( int, fd ) ), TP_fast_assign( __entry->ctx = ctx; __entry->fd = fd; ), TP_printk("ring %p, fd %d", __entry->ctx, __entry->fd) ); /** * io_uring_queue_async_work - called before submitting a new async work * * @ctx: pointer to a ring context structure * @hashed: type of workqueue, hashed or normal * @req: pointer to a submitted request * @work: pointer to a submitted io_wq_work * * Allows to trace asynchronous work submission. */ TRACE_EVENT(io_uring_queue_async_work, TP_PROTO(void *ctx, int rw, void * req, struct io_wq_work *work, unsigned int flags), TP_ARGS(ctx, rw, req, work, flags), TP_STRUCT__entry ( __field( void *, ctx ) __field( int, rw ) __field( void *, req ) __field( struct io_wq_work *, work ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->ctx = ctx; __entry->rw = rw; __entry->req = req; __entry->work = work; __entry->flags = flags; ), TP_printk("ring %p, request %p, flags %d, %s queue, work %p", __entry->ctx, __entry->req, __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) ); /** * io_uring_defer - called when an io_uring request is deferred * * @ctx: pointer to a ring context structure * @req: pointer to a deferred request * @user_data: user data associated with the request * * Allows to track deferred requests, to get an insight about what requests are * not started immediately. */ TRACE_EVENT(io_uring_defer, TP_PROTO(void *ctx, void *req, unsigned long long user_data), TP_ARGS(ctx, req, user_data), TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) __field( unsigned long long, data ) ), TP_fast_assign( __entry->ctx = ctx; __entry->req = req; __entry->data = user_data; ), TP_printk("ring %p, request %p user_data %llu", __entry->ctx, __entry->req, __entry->data) ); /** * io_uring_link - called before the io_uring request added into link_list of * another request * * @ctx: pointer to a ring context structure * @req: pointer to a linked request * @target_req: pointer to a previous request, that would contain @req * * Allows to track linked requests, to understand dependencies between requests * and how does it influence their execution flow. */ TRACE_EVENT(io_uring_link, TP_PROTO(void *ctx, void *req, void *target_req), TP_ARGS(ctx, req, target_req), TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) __field( void *, target_req ) ), TP_fast_assign( __entry->ctx = ctx; __entry->req = req; __entry->target_req = target_req; ), TP_printk("ring %p, request %p linked after %p", __entry->ctx, __entry->req, __entry->target_req) ); /** * io_uring_cqring_wait - called before start waiting for an available CQE * * @ctx: pointer to a ring context structure * @min_events: minimal number of events to wait for * * Allows to track waiting for CQE, so that we can e.g. troubleshoot * situations, when an application wants to wait for an event, that never * comes. */ TRACE_EVENT(io_uring_cqring_wait, TP_PROTO(void *ctx, int min_events), TP_ARGS(ctx, min_events), TP_STRUCT__entry ( __field( void *, ctx ) __field( int, min_events ) ), TP_fast_assign( __entry->ctx = ctx; __entry->min_events = min_events; ), TP_printk("ring %p, min_events %d", __entry->ctx, __entry->min_events) ); /** * io_uring_fail_link - called before failing a linked request * * @req: request, which links were cancelled * @link: cancelled link * * Allows to track linked requests cancellation, to see not only that some work * was cancelled, but also which request was the reason. */ TRACE_EVENT(io_uring_fail_link, TP_PROTO(void *req, void *link), TP_ARGS(req, link), TP_STRUCT__entry ( __field( void *, req ) __field( void *, link ) ), TP_fast_assign( __entry->req = req; __entry->link = link; ), TP_printk("request %p, link %p", __entry->req, __entry->link) ); /** * io_uring_complete - called when completing an SQE * * @ctx: pointer to a ring context structure * @user_data: user data associated with the request * @res: result of the request * */ TRACE_EVENT(io_uring_complete, TP_PROTO(void *ctx, u64 user_data, long res), TP_ARGS(ctx, user_data, res), TP_STRUCT__entry ( __field( void *, ctx ) __field( u64, user_data ) __field( long, res ) ), TP_fast_assign( __entry->ctx = ctx; __entry->user_data = user_data; __entry->res = res; ), TP_printk("ring %p, user_data 0x%llx, result %ld", __entry->ctx, (unsigned long long)__entry->user_data, __entry->res) ); /** * io_uring_submit_sqe - called before submitting one SQE * * @ctx: pointer to a ring context structure * @opcode: opcode of request * @user_data: user data associated with the request * @force_nonblock: whether a context blocking or not * @sq_thread: true if sq_thread has submitted this SQE * * Allows to track SQE submitting, to understand what was the source of it, SQ * thread or io_uring_enter call. */ TRACE_EVENT(io_uring_submit_sqe, TP_PROTO(void *ctx, u8 opcode, u64 user_data, bool force_nonblock, bool sq_thread), TP_ARGS(ctx, opcode, user_data, force_nonblock, sq_thread), TP_STRUCT__entry ( __field( void *, ctx ) __field( u8, opcode ) __field( u64, user_data ) __field( bool, force_nonblock ) __field( bool, sq_thread ) ), TP_fast_assign( __entry->ctx = ctx; __entry->opcode = opcode; __entry->user_data = user_data; __entry->force_nonblock = force_nonblock; __entry->sq_thread = sq_thread; ), TP_printk("ring %p, op %d, data 0x%llx, non block %d, sq_thread %d", __entry->ctx, __entry->opcode, (unsigned long long) __entry->user_data, __entry->force_nonblock, __entry->sq_thread) ); TRACE_EVENT(io_uring_poll_arm, TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events), TP_ARGS(ctx, opcode, user_data, mask, events), TP_STRUCT__entry ( __field( void *, ctx ) __field( u8, opcode ) __field( u64, user_data ) __field( int, mask ) __field( int, events ) ), TP_fast_assign( __entry->ctx = ctx; __entry->opcode = opcode; __entry->user_data = user_data; __entry->mask = mask; __entry->events = events; ), TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x", __entry->ctx, __entry->opcode, (unsigned long long) __entry->user_data, __entry->mask, __entry->events) ); TRACE_EVENT(io_uring_poll_wake, TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask), TP_ARGS(ctx, opcode, user_data, mask), TP_STRUCT__entry ( __field( void *, ctx ) __field( u8, opcode ) __field( u64, user_data ) __field( int, mask ) ), TP_fast_assign( __entry->ctx = ctx; __entry->opcode = opcode; __entry->user_data = user_data; __entry->mask = mask; ), TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x", __entry->ctx, __entry->opcode, (unsigned long long) __entry->user_data, __entry->mask) ); TRACE_EVENT(io_uring_task_add, TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask), TP_ARGS(ctx, opcode, user_data, mask), TP_STRUCT__entry ( __field( void *, ctx ) __field( u8, opcode ) __field( u64, user_data ) __field( int, mask ) ), TP_fast_assign( __entry->ctx = ctx; __entry->opcode = opcode; __entry->user_data = user_data; __entry->mask = mask; ), TP_printk("ring %p, op %d, data 0x%llx, mask %x", __entry->ctx, __entry->opcode, (unsigned long long) __entry->user_data, __entry->mask) ); TRACE_EVENT(io_uring_task_run, TP_PROTO(void *ctx, u8 opcode, u64 user_data), TP_ARGS(ctx, opcode, user_data), TP_STRUCT__entry ( __field( void *, ctx ) __field( u8, opcode ) __field( u64, user_data ) ), TP_fast_assign( __entry->ctx = ctx; __entry->opcode = opcode; __entry->user_data = user_data; ), TP_printk("ring %p, op %d, data 0x%llx", __entry->ctx, __entry->opcode, (unsigned long long) __entry->user_data) ); #endif /* _TRACE_IO_URING_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* user-type.h: User-defined key type * * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _KEYS_USER_TYPE_H #define _KEYS_USER_TYPE_H #include <linux/key.h> #include <linux/rcupdate.h> #ifdef CONFIG_KEYS /*****************************************************************************/ /* * the payload for a key of type "user" or "logon" * - once filled in and attached to a key: * - the payload struct is invariant may not be changed, only replaced * - the payload must be read with RCU procedures or with the key semaphore * held * - the payload may only be replaced with the key semaphore write-locked * - the key's data length is the size of the actual data, not including the * payload wrapper */ struct user_key_payload { struct rcu_head rcu; /* RCU destructor */ unsigned short datalen; /* length of this data */ char data[] __aligned(__alignof__(u64)); /* actual data */ }; extern struct key_type key_type_user; extern struct key_type key_type_logon; struct key_preparsed_payload; extern int user_preparse(struct key_preparsed_payload *prep); extern void user_free_preparse(struct key_preparsed_payload *prep); extern int user_update(struct key *key, struct key_preparsed_payload *prep); extern void user_revoke(struct key *key); extern void user_destroy(struct key *key); extern void user_describe(const struct key *user, struct seq_file *m); extern long user_read(const struct key *key, char *buffer, size_t buflen); static inline const struct user_key_payload *user_key_payload_rcu(const struct key *key) { return (struct user_key_payload *)dereference_key_rcu(key); } static inline struct user_key_payload *user_key_payload_locked(const struct key *key) { return (struct user_key_payload *)dereference_key_locked((struct key *)key); } #endif /* CONFIG_KEYS */ #endif /* _KEYS_USER_TYPE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2001 Jens Axboe <axboe@suse.de> */ #ifndef __LINUX_BIO_H #define __LINUX_BIO_H #include <linux/highmem.h> #include <linux/mempool.h> #include <linux/ioprio.h> /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include <linux/blk_types.h> #define BIO_DEBUG #ifdef BIO_DEBUG #define BIO_BUG_ON BUG_ON #else #define BIO_BUG_ON #endif #define BIO_MAX_PAGES 256 #define bio_prio(bio) (bio)->bi_ioprio #define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) #define bio_iter_iovec(bio, iter) \ bvec_iter_bvec((bio)->bi_io_vec, (iter)) #define bio_iter_page(bio, iter) \ bvec_iter_page((bio)->bi_io_vec, (iter)) #define bio_iter_len(bio, iter) \ bvec_iter_len((bio)->bi_io_vec, (iter)) #define bio_iter_offset(bio, iter) \ bvec_iter_offset((bio)->bi_io_vec, (iter)) #define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter) #define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) #define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) #define bvec_iter_sectors(iter) ((iter).bi_size >> 9) #define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter))) #define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter) #define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter) /* * Return the data direction, READ or WRITE. */ #define bio_data_dir(bio) \ (op_is_write(bio_op(bio)) ? WRITE : READ) /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ static inline bool bio_has_data(struct bio *bio) { if (bio && bio->bi_iter.bi_size && bio_op(bio) != REQ_OP_DISCARD && bio_op(bio) != REQ_OP_SECURE_ERASE && bio_op(bio) != REQ_OP_WRITE_ZEROES) return true; return false; } static inline bool bio_no_advance_iter(const struct bio *bio) { return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE || bio_op(bio) == REQ_OP_WRITE_SAME || bio_op(bio) == REQ_OP_WRITE_ZEROES; } static inline bool bio_mergeable(struct bio *bio) { if (bio->bi_opf & REQ_NOMERGE_FLAGS) return false; return true; } static inline unsigned int bio_cur_bytes(struct bio *bio) { if (bio_has_data(bio)) return bio_iovec(bio).bv_len; else /* dataless requests such as discard */ return bio->bi_iter.bi_size; } static inline void *bio_data(struct bio *bio) { if (bio_has_data(bio)) return page_address(bio_page(bio)) + bio_offset(bio); return NULL; } /** * bio_full - check if the bio is full * @bio: bio to check * @len: length of one segment to be added * * Return true if @bio is full and one segment with @len bytes can't be * added to the bio, otherwise return false */ static inline bool bio_full(struct bio *bio, unsigned len) { if (bio->bi_vcnt >= bio->bi_max_vecs) return true; if (bio->bi_iter.bi_size > UINT_MAX - len) return true; return false; } static inline bool bio_next_segment(const struct bio *bio, struct bvec_iter_all *iter) { if (iter->idx >= bio->bi_vcnt) return false; bvec_advance(&bio->bi_io_vec[iter->idx], iter); return true; } /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it */ #define bio_for_each_segment_all(bvl, bio, iter) \ for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); ) static inline void bio_advance_iter(const struct bio *bio, struct bvec_iter *iter, unsigned int bytes) { iter->bi_sector += bytes >> 9; if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; else bvec_iter_advance(bio->bi_io_vec, iter, bytes); /* TODO: It is reasonable to complete bio with error here. */ } #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bio_iter_iovec((bio), (iter))), 1); \ bio_advance_iter((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) #define __bio_for_each_bvec(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ bio_advance_iter((bio), &(iter), (bvl).bv_len)) /* iterate over multi-page bvec */ #define bio_for_each_bvec(bvl, bio, iter) \ __bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter) /* * Iterate over all multi-page bvecs. Drivers shouldn't use this version for the * same reasons as bio_for_each_segment_all(). */ #define bio_for_each_bvec_all(bvl, bio, i) \ for (i = 0, bvl = bio_first_bvec_all(bio); \ i < (bio)->bi_vcnt; i++, bvl++) \ #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) static inline unsigned bio_segments(struct bio *bio) { unsigned segs = 0; struct bio_vec bv; struct bvec_iter iter; /* * We special case discard/write same/write zeroes, because they * interpret bi_size differently: */ switch (bio_op(bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: return 0; case REQ_OP_WRITE_SAME: return 1; default: break; } bio_for_each_segment(bv, bio, iter) segs++; return segs; } /* * get a reference to a bio, so it won't disappear. the intended use is * something like: * * bio_get(bio); * submit_bio(rw, bio); * if (bio->bi_flags ...) * do_something * bio_put(bio); * * without the bio_get(), it could potentially complete I/O before submit_bio * returns. and then bio would be freed memory when if (bio->bi_flags ...) * runs */ static inline void bio_get(struct bio *bio) { bio->bi_flags |= (1 << BIO_REFFED); smp_mb__before_atomic(); atomic_inc(&bio->__bi_cnt); } static inline void bio_cnt_set(struct bio *bio, unsigned int count) { if (count != 1) { bio->bi_flags |= (1 << BIO_REFFED); smp_mb(); } atomic_set(&bio->__bi_cnt, count); } static inline bool bio_flagged(struct bio *bio, unsigned int bit) { return (bio->bi_flags & (1U << bit)) != 0; } static inline void bio_set_flag(struct bio *bio, unsigned int bit) { bio->bi_flags |= (1U << bit); } static inline void bio_clear_flag(struct bio *bio, unsigned int bit) { bio->bi_flags &= ~(1U << bit); } static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) { *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); } static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) { struct bvec_iter iter = bio->bi_iter; int idx; bio_get_first_bvec(bio, bv); if (bv->bv_len == bio->bi_iter.bi_size) return; /* this bio only has a single bvec */ bio_advance_iter(bio, &iter, iter.bi_size); if (!iter.bi_bvec_done) idx = iter.bi_idx - 1; else /* in the middle of bvec */ idx = iter.bi_idx; *bv = bio->bi_io_vec[idx]; /* * iter.bi_bvec_done records actual length of the last bvec * if this bio ends in the middle of one io vector */ if (iter.bi_bvec_done) bv->bv_len = iter.bi_bvec_done; } static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); return bio->bi_io_vec; } static inline struct page *bio_first_page_all(struct bio *bio) { return bio_first_bvec_all(bio)->bv_page; } static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); return &bio->bi_io_vec[bio->bi_vcnt - 1]; } enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ }; /* * bio integrity payload */ struct bio_integrity_payload { struct bio *bip_bio; /* parent bio */ struct bvec_iter bip_iter; unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ struct bvec_iter bio_iter; /* for rewinding parent bio */ struct work_struct bip_work; /* I/O completion */ struct bio_vec *bip_vec; struct bio_vec bip_inline_vecs[];/* embedded bvec array */ }; #if defined(CONFIG_BLK_DEV_INTEGRITY) static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) { if (bio->bi_opf & REQ_INTEGRITY) return bio->bi_integrity; return NULL; } static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { struct bio_integrity_payload *bip = bio_integrity(bio); if (bip) return bip->bip_flags & flag; return false; } static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) { return bip->bip_iter.bi_sector; } static inline void bip_set_seed(struct bio_integrity_payload *bip, sector_t seed) { bip->bip_iter.bi_sector = seed; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ extern void bio_trim(struct bio *bio, int offset, int size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); /** * bio_next_split - get next @sectors from a bio, splitting if necessary * @bio: bio to split * @sectors: number of sectors to split from the front of @bio * @gfp: gfp mask * @bs: bio set to allocate from * * Returns a bio representing the next @sectors of @bio - if the bio is smaller * than @sectors, returns the original bio unchanged. */ static inline struct bio *bio_next_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) { if (sectors >= bio_sectors(bio)) return bio; return bio_split(bio, sectors, gfp, bs); } enum { BIOSET_NEED_BVECS = BIT(0), BIOSET_NEED_RESCUER = BIT(1), }; extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *); extern void bio_put(struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); } static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) { return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); } extern blk_qc_t submit_bio(struct bio *); extern void bio_endio(struct bio *); static inline void bio_io_error(struct bio *bio) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); } static inline void bio_wouldblock_error(struct bio *bio) { bio_set_flag(bio, BIO_QUIET); bio->bi_status = BLK_STS_AGAIN; bio_endio(bio); } struct request_queue; extern int submit_bio_wait(struct bio *bio); extern void bio_advance(struct bio *, unsigned); extern void bio_init(struct bio *bio, struct bio_vec *table, unsigned short max_vecs); extern void bio_uninit(struct bio *); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); bool __bio_try_merge_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off, bool *same_page); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_list_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); void bio_truncate(struct bio *bio, unsigned new_size); void guard_bio_eod(struct bio *bio); static inline void zero_fill_bio(struct bio *bio) { zero_fill_bio_iter(bio, bio->bi_iter); } extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); extern const char *bio_devname(struct bio *bio, char *buffer); #define bio_set_dev(bio, bdev) \ do { \ if ((bio)->bi_disk != (bdev)->bd_disk) \ bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ bio_associate_blkg(bio); \ } while (0) #define bio_copy_dev(dst, src) \ do { \ (dst)->bi_disk = (src)->bi_disk; \ (dst)->bi_partno = (src)->bi_partno; \ bio_clone_blkg_association(dst, src); \ } while (0) #define bio_dev(bio) \ disk_devt((bio)->bi_disk) #ifdef CONFIG_BLK_CGROUP void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { } static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM /* * remember never ever reenable interrupts between a bvec_kmap_irq and * bvec_kunmap_irq! */ static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) { unsigned long addr; /* * might not be a highmem page, but the preempt/irq count * balancing is a lot nicer this way */ local_irq_save(*flags); addr = (unsigned long) kmap_atomic(bvec->bv_page); BUG_ON(addr & ~PAGE_MASK); return (char *) addr + bvec->bv_offset; } static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) { unsigned long ptr = (unsigned long) buffer & PAGE_MASK; kunmap_atomic((void *) ptr); local_irq_restore(*flags); } #else static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) { return page_address(bvec->bv_page) + bvec->bv_offset; } static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) { *flags = 0; } #endif /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * * A bio_list anchors a singly-linked list of bios chained through the bi_next * member of the bio. The bio_list also caches the last list member to allow * fast access to the tail. */ struct bio_list { struct bio *head; struct bio *tail; }; static inline int bio_list_empty(const struct bio_list *bl) { return bl->head == NULL; } static inline void bio_list_init(struct bio_list *bl) { bl->head = bl->tail = NULL; } #define BIO_EMPTY_LIST { NULL, NULL } #define bio_list_for_each(bio, bl) \ for (bio = (bl)->head; bio; bio = bio->bi_next) static inline unsigned bio_list_size(const struct bio_list *bl) { unsigned sz = 0; struct bio *bio; bio_list_for_each(bio, bl) sz++; return sz; } static inline void bio_list_add(struct bio_list *bl, struct bio *bio) { bio->bi_next = NULL; if (bl->tail) bl->tail->bi_next = bio; else bl->head = bio; bl->tail = bio; } static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) { bio->bi_next = bl->head; bl->head = bio; if (!bl->tail) bl->tail = bio; } static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) { if (!bl2->head) return; if (bl->tail) bl->tail->bi_next = bl2->head; else bl->head = bl2->head; bl->tail = bl2->tail; } static inline void bio_list_merge_head(struct bio_list *bl, struct bio_list *bl2) { if (!bl2->head) return; if (bl->head) bl2->tail->bi_next = bl->head; else bl->tail = bl2->tail; bl->head = bl2->head; } static inline struct bio *bio_list_peek(struct bio_list *bl) { return bl->head; } static inline struct bio *bio_list_pop(struct bio_list *bl) { struct bio *bio = bl->head; if (bio) { bl->head = bl->head->bi_next; if (!bl->head) bl->tail = NULL; bio->bi_next = NULL; } return bio; } static inline struct bio *bio_list_get(struct bio_list *bl) { struct bio *bio = bl->head; bl->head = bl->tail = NULL; return bio; } /* * Increment chain count for the bio. Make sure the CHAIN flag update * is visible before the raised count. */ static inline void bio_inc_remaining(struct bio *bio) { bio_set_flag(bio, BIO_CHAIN); smp_mb__before_atomic(); atomic_inc(&bio->__bi_remaining); } /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. * These memory pools in turn all allocate from the bio_slab * and the bvec_slabs[]. */ #define BIO_POOL_SIZE 2 struct bio_set { struct kmem_cache *bio_slab; unsigned int front_pad; mempool_t bio_pool; mempool_t bvec_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) mempool_t bio_integrity_pool; mempool_t bvec_integrity_pool; #endif /* * Deadlock avoidance for stacking block drivers: see comments in * bio_alloc_bioset() for details */ spinlock_t rescue_lock; struct bio_list rescue_list; struct work_struct rescue_work; struct workqueue_struct *rescue_workqueue; }; struct biovec_slab { int nr_vecs; char *name; struct kmem_cache *slab; }; static inline bool bioset_initialized(struct bio_set *bs) { return bs->bio_slab != NULL; } /* * a small number of entries is fine, not going to be performance critical. * basically we just need to survive */ #define BIO_SPLIT_ENTRIES 2 #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_for_each_vec(bvl, bip, iter) \ for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter) #define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ for_each_bio(_bio) \ bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); extern int bioset_integrity_create(struct bio_set *, int); extern void bioset_integrity_free(struct bio_set *); extern void bio_integrity_init(void); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline void *bio_integrity(struct bio *bio) { return NULL; } static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) { return 0; } static inline void bioset_integrity_free (struct bio_set *bs) { return; } static inline bool bio_integrity_prep(struct bio *bio) { return true; } static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { return 0; } static inline void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { return; } static inline void bio_integrity_trim(struct bio *bio) { return; } static inline void bio_integrity_init(void) { return; } static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { return false; } static inline void *bio_integrity_alloc(struct bio * bio, gfp_t gfp, unsigned int nr) { return ERR_PTR(-EINVAL); } static inline int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { return 0; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ /* * Mark a bio as polled. Note that for async polled IO, the caller must * expect -EWOULDBLOCK if we cannot allocate a request (or other resources). * We cannot block waiting for requests on polled IO, as those completions * must be found by the caller. This is different than IRQ driven IO, where * it's safe to wait for IO to complete. */ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) { bio->bi_opf |= REQ_HIPRI; if (!is_sync_kiocb(kiocb)) bio->bi_opf |= REQ_NOWAIT; } #endif /* __LINUX_BIO_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * PTP 1588 clock support - private declarations for the core module. * * Copyright (C) 2010 OMICRON electronics GmbH */ #ifndef _PTP_PRIVATE_H_ #define _PTP_PRIVATE_H_ #include <linux/cdev.h> #include <linux/device.h> #include <linux/kthread.h> #include <linux/mutex.h> #include <linux/posix-clock.h> #include <linux/ptp_clock.h> #include <linux/ptp_clock_kernel.h> #include <linux/time.h> #define PTP_MAX_TIMESTAMPS 128 #define PTP_BUF_TIMESTAMPS 30 struct timestamp_event_queue { struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS]; int head; int tail; spinlock_t lock; }; struct ptp_clock { struct posix_clock clock; struct device dev; struct ptp_clock_info *info; dev_t devid; int index; /* index into clocks.map */ struct pps_device *pps_source; long dialed_frequency; /* remembers the frequency adjustment */ struct timestamp_event_queue tsevq; /* simple fifo for time stamps */ struct mutex tsevq_mux; /* one process at a time reading the fifo */ struct mutex pincfg_mux; /* protect concurrent info->pin_config access */ wait_queue_head_t tsev_wq; int defunct; /* tells readers to go away when clock is being removed */ struct device_attribute *pin_dev_attr; struct attribute **pin_attr; struct attribute_group pin_attr_group; /* 1st entry is a pointer to the real group, 2nd is NULL terminator */ const struct attribute_group *pin_attr_groups[2]; struct kthread_worker *kworker; struct kthread_delayed_work aux_work; }; /* * The function queue_cnt() is safe for readers to call without * holding q->lock. Readers use this function to verify that the queue * is nonempty before proceeding with a dequeue operation. The fact * that a writer might concurrently increment the tail does not * matter, since the queue remains nonempty nonetheless. */ static inline int queue_cnt(struct timestamp_event_queue *q) { int cnt = q->tail - q->head; return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt; } /* * see ptp_chardev.c */ /* caller must hold pincfg_mux */ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, enum ptp_pin_function func, unsigned int chan); long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg); int ptp_open(struct posix_clock *pc, fmode_t fmode); ssize_t ptp_read(struct posix_clock *pc, uint flags, char __user *buf, size_t cnt); __poll_t ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait); /* * see ptp_sysfs.c */ extern const struct attribute_group *ptp_groups[]; int ptp_populate_pin_groups(struct ptp_clock *ptp); void ptp_cleanup_pin_groups(struct ptp_clock *ptp); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 /* SPDX-License-Identifier: GPL-2.0 */ /* * A hash table (hashtab) maintains associations between * key values and datum values. The type of the key values * and the type of the datum values is arbitrary. The * functions for hash computation and key comparison are * provided by the creator of the table. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> */ #ifndef _SS_HASHTAB_H_ #define _SS_HASHTAB_H_ #include <linux/types.h> #include <linux/errno.h> #include <linux/sched.h> #define HASHTAB_MAX_NODES U32_MAX struct hashtab_key_params { u32 (*hash)(const void *key); /* hash function */ int (*cmp)(const void *key1, const void *key2); /* key comparison function */ }; struct hashtab_node { void *key; void *datum; struct hashtab_node *next; }; struct hashtab { struct hashtab_node **htable; /* hash table */ u32 size; /* number of slots in hash table */ u32 nel; /* number of elements in hash table */ }; struct hashtab_info { u32 slots_used; u32 max_chain_len; }; /* * Initializes a new hash table with the specified characteristics. * * Returns -ENOMEM if insufficient space is available or 0 otherwise. */ int hashtab_init(struct hashtab *h, u32 nel_hint); int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst, void *key, void *datum); /* * Inserts the specified (key, datum) pair into the specified hash table. * * Returns -ENOMEM on memory allocation error, * -EEXIST if there is already an entry with the same key, * -EINVAL for general errors or 0 otherwise. */ static inline int hashtab_insert(struct hashtab *h, void *key, void *datum, struct hashtab_key_params key_params) { u32 hvalue; struct hashtab_node *prev, *cur; cond_resched(); if (!h->size || h->nel == HASHTAB_MAX_NODES) return -EINVAL; hvalue = key_params.hash(key) & (h->size - 1); prev = NULL; cur = h->htable[hvalue]; while (cur) { int cmp = key_params.cmp(key, cur->key); if (cmp == 0) return -EEXIST; if (cmp < 0) break; prev = cur; cur = cur->next; } return __hashtab_insert(h, prev ? &prev->next : &h->htable[hvalue], key, datum); } /* * Searches for the entry with the specified key in the hash table. * * Returns NULL if no entry has the specified key or * the datum of the entry otherwise. */ static inline void *hashtab_search(struct hashtab *h, const void *key, struct hashtab_key_params key_params) { u32 hvalue; struct hashtab_node *cur; if (!h->size) return NULL; hvalue = key_params.hash(key) & (h->size - 1); cur = h->htable[hvalue]; while (cur) { int cmp = key_params.cmp(key, cur->key); if (cmp == 0) return cur->datum; if (cmp < 0) break; cur = cur->next; } return NULL; } /* * Destroys the specified hash table. */ void hashtab_destroy(struct hashtab *h); /* * Applies the specified apply function to (key,datum,args) * for each entry in the specified hash table. * * The order in which the function is applied to the entries * is dependent upon the internal structure of the hash table. * * If apply returns a non-zero status, then hashtab_map will cease * iterating through the hash table and will propagate the error * return to its caller. */ int hashtab_map(struct hashtab *h, int (*apply)(void *k, void *d, void *args), void *args); int hashtab_duplicate(struct hashtab *new, struct hashtab *orig, int (*copy)(struct hashtab_node *new, struct hashtab_node *orig, void *args), int (*destroy)(void *k, void *d, void *args), void *args); /* Fill info with some hash table statistics */ void hashtab_stat(struct hashtab *h, struct hashtab_info *info); #endif /* _SS_HASHTAB_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __VDSO_HELPERS_H #define __VDSO_HELPERS_H #ifndef __ASSEMBLY__ #include <vdso/datapage.h> static __always_inline u32 vdso_read_begin(const struct vdso_data *vd) { u32 seq; while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) cpu_relax(); smp_rmb(); return seq; } static __always_inline u32 vdso_read_retry(const struct vdso_data *vd, u32 start) { u32 seq; smp_rmb(); seq = READ_ONCE(vd->seq); return seq != start; } static __always_inline void vdso_write_begin(struct vdso_data *vd) { /* * WRITE_ONCE it is required otherwise the compiler can validly tear * updates to vd[x].seq and it is possible that the value seen by the * reader it is inconsistent. */ WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1); WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1); smp_wmb(); } static __always_inline void vdso_write_end(struct vdso_data *vd) { smp_wmb(); /* * WRITE_ONCE it is required otherwise the compiler can validly tear * updates to vd[x].seq and it is possible that the value seen by the * reader it is inconsistent. */ WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1); WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1); } #endif /* !__ASSEMBLY__ */ #endif /* __VDSO_HELPERS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef DRIVERS_PCI_H #define DRIVERS_PCI_H #include <linux/pci.h> /* Number of possible devfns: 0.0 to 1f.7 inclusive */ #define MAX_NR_DEVFNS 256 #define PCI_FIND_CAP_TTL 48 #define PCI_VSEC_ID_INTEL_TBT 0x1234 /* Thunderbolt */ extern const unsigned char pcie_link_speed[]; extern bool pci_early_dump; bool pcie_cap_has_lnkctl(const struct pci_dev *dev); bool pcie_cap_has_rtctl(const struct pci_dev *dev); /* Functions internal to the PCI core code */ int pci_create_sysfs_dev_files(struct pci_dev *pdev); void pci_remove_sysfs_dev_files(struct pci_dev *pdev); #if !defined(CONFIG_DMI) && !defined(CONFIG_ACPI) static inline void pci_create_firmware_label_files(struct pci_dev *pdev) { return; } static inline void pci_remove_firmware_label_files(struct pci_dev *pdev) { return; } #else void pci_create_firmware_label_files(struct pci_dev *pdev); void pci_remove_firmware_label_files(struct pci_dev *pdev); #endif void pci_cleanup_rom(struct pci_dev *dev); enum pci_mmap_api { PCI_MMAP_SYSFS, /* mmap on /sys/bus/pci/devices/<BDF>/resource<N> */ PCI_MMAP_PROCFS /* mmap on /proc/bus/pci/<BDF> */ }; int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai, enum pci_mmap_api mmap_api); int pci_probe_reset_function(struct pci_dev *dev); int pci_bridge_secondary_bus_reset(struct pci_dev *dev); int pci_bus_error_reset(struct pci_dev *dev); #define PCI_PM_D2_DELAY 200 /* usec; see PCIe r4.0, sec 5.9.1 */ #define PCI_PM_D3HOT_WAIT 10 /* msec */ #define PCI_PM_D3COLD_WAIT 100 /* msec */ /** * struct pci_platform_pm_ops - Firmware PM callbacks * * @bridge_d3: Does the bridge allow entering into D3 * * @is_manageable: returns 'true' if given device is power manageable by the * platform firmware * * @set_state: invokes the platform firmware to set the device's power state * * @get_state: queries the platform firmware for a device's current power state * * @refresh_state: asks the platform to refresh the device's power state data * * @choose_state: returns PCI power state of given device preferred by the * platform; to be used during system-wide transitions from a * sleeping state to the working state and vice versa * * @set_wakeup: enables/disables wakeup capability for the device * * @need_resume: returns 'true' if the given device (which is currently * suspended) needs to be resumed to be configured for system * wakeup. * * If given platform is generally capable of power managing PCI devices, all of * these callbacks are mandatory. */ struct pci_platform_pm_ops { bool (*bridge_d3)(struct pci_dev *dev); bool (*is_manageable)(struct pci_dev *dev); int (*set_state)(struct pci_dev *dev, pci_power_t state); pci_power_t (*get_state)(struct pci_dev *dev); void (*refresh_state)(struct pci_dev *dev); pci_power_t (*choose_state)(struct pci_dev *dev); int (*set_wakeup)(struct pci_dev *dev, bool enable); bool (*need_resume)(struct pci_dev *dev); }; int pci_set_platform_pm(const struct pci_platform_pm_ops *ops); void pci_update_current_state(struct pci_dev *dev, pci_power_t state); void pci_refresh_power_state(struct pci_dev *dev); int pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); int pci_finish_runtime_suspend(struct pci_dev *dev); void pcie_clear_device_status(struct pci_dev *dev); void pcie_clear_root_pme_status(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); int __pci_pme_wakeup(struct pci_dev *dev, void *ign); void pci_pme_restore(struct pci_dev *dev); bool pci_dev_need_resume(struct pci_dev *dev); void pci_dev_adjust_pme(struct pci_dev *dev); void pci_dev_complete_resume(struct pci_dev *pci_dev); void pci_config_pm_runtime_get(struct pci_dev *dev); void pci_config_pm_runtime_put(struct pci_dev *dev); void pci_pm_init(struct pci_dev *dev); void pci_ea_init(struct pci_dev *dev); void pci_allocate_cap_save_buffers(struct pci_dev *dev); void pci_free_cap_save_buffers(struct pci_dev *dev); bool pci_bridge_d3_possible(struct pci_dev *dev); void pci_bridge_d3_update(struct pci_dev *dev); void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev); static inline void pci_wakeup_event(struct pci_dev *dev) { /* Wait 100 ms before the system can be put into a sleep state. */ pm_wakeup_event(&dev->dev, 100); } static inline bool pci_has_subordinate(struct pci_dev *pci_dev) { return !!(pci_dev->subordinate); } static inline bool pci_power_manageable(struct pci_dev *pci_dev) { /* * Currently we allow normal PCI devices and PCI bridges transition * into D3 if their bridge_d3 is set. */ return !pci_has_subordinate(pci_dev) || pci_dev->bridge_d3; } static inline bool pcie_downstream_port(const struct pci_dev *dev) { int type = pci_pcie_type(dev); return type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_PCIE_BRIDGE; } int pci_vpd_init(struct pci_dev *dev); void pci_vpd_release(struct pci_dev *dev); void pcie_vpd_create_sysfs_dev_files(struct pci_dev *dev); void pcie_vpd_remove_sysfs_dev_files(struct pci_dev *dev); /* PCI Virtual Channel */ int pci_save_vc_state(struct pci_dev *dev); void pci_restore_vc_state(struct pci_dev *dev); void pci_allocate_vc_save_buffers(struct pci_dev *dev); /* PCI /proc functions */ #ifdef CONFIG_PROC_FS int pci_proc_attach_device(struct pci_dev *dev); int pci_proc_detach_device(struct pci_dev *dev); int pci_proc_detach_bus(struct pci_bus *bus); #else static inline int pci_proc_attach_device(struct pci_dev *dev) { return 0; } static inline int pci_proc_detach_device(struct pci_dev *dev) { return 0; } static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; } #endif /* Functions for PCI Hotplug drivers to use */ int pci_hp_add_bridge(struct pci_dev *dev); #ifdef HAVE_PCI_LEGACY void pci_create_legacy_files(struct pci_bus *bus); void pci_remove_legacy_files(struct pci_bus *bus); #else static inline void pci_create_legacy_files(struct pci_bus *bus) { return; } static inline void pci_remove_legacy_files(struct pci_bus *bus) { return; } #endif /* Lock for read/write access to pci device and bus lists */ extern struct rw_semaphore pci_bus_sem; extern struct mutex pci_slot_mutex; extern raw_spinlock_t pci_lock; extern unsigned int pci_pm_d3hot_delay; #ifdef CONFIG_PCI_MSI void pci_no_msi(void); #else static inline void pci_no_msi(void) { } #endif static inline void pci_msi_set_enable(struct pci_dev *dev, int enable) { u16 control; pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); control &= ~PCI_MSI_FLAGS_ENABLE; if (enable) control |= PCI_MSI_FLAGS_ENABLE; pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); } static inline void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) { u16 ctrl; pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl); ctrl &= ~clear; ctrl |= set; pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl); } void pci_realloc_get_opt(char *); static inline int pci_no_d1d2(struct pci_dev *dev) { unsigned int parent_dstates = 0; if (dev->bus->self) parent_dstates = dev->bus->self->no_d1d2; return (dev->no_d1d2 || parent_dstates); } extern const struct attribute_group *pci_dev_groups[]; extern const struct attribute_group *pcibus_groups[]; extern const struct device_type pci_dev_type; extern const struct attribute_group *pci_bus_groups[]; extern unsigned long pci_hotplug_io_size; extern unsigned long pci_hotplug_mmio_size; extern unsigned long pci_hotplug_mmio_pref_size; extern unsigned long pci_hotplug_bus_size; /** * pci_match_one_device - Tell if a PCI device structure has a matching * PCI device id structure * @id: single PCI device id structure to match * @dev: the PCI device structure to match against * * Returns the matching pci_device_id structure or %NULL if there is no match. */ static inline const struct pci_device_id * pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) { if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && (id->device == PCI_ANY_ID || id->device == dev->device) && (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) && (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) && !((id->class ^ dev->class) & id->class_mask)) return id; return NULL; } /* PCI slot sysfs helper code */ #define to_pci_slot(s) container_of(s, struct pci_slot, kobj) extern struct kset *pci_slots_kset; struct pci_slot_attribute { struct attribute attr; ssize_t (*show)(struct pci_slot *, char *); ssize_t (*store)(struct pci_slot *, const char *, size_t); }; #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr) enum pci_bar_type { pci_bar_unknown, /* Standard PCI BAR probe */ pci_bar_io, /* An I/O port BAR */ pci_bar_mem32, /* A 32-bit memory BAR */ pci_bar_mem64, /* A 64-bit memory BAR */ }; struct device *pci_get_host_bridge_device(struct pci_dev *dev); void pci_put_host_bridge_device(struct device *dev); int pci_configure_extended_tags(struct pci_dev *dev, void *ign); bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int crs_timeout); bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int crs_timeout); int pci_idt_bus_quirk(struct pci_bus *bus, int devfn, u32 *pl, int crs_timeout); int pci_setup_device(struct pci_dev *dev); int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); void pci_configure_ari(struct pci_dev *dev); void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head); void __pci_bus_assign_resources(const struct pci_bus *bus, struct list_head *realloc_head, struct list_head *fail_head); bool pci_bus_clip_resource(struct pci_dev *dev, int idx); void pci_reassigndev_resource_alignment(struct pci_dev *dev); void pci_disable_bridge_window(struct pci_dev *dev); struct pci_bus *pci_bus_get(struct pci_bus *bus); void pci_bus_put(struct pci_bus *bus); /* PCIe link information from Link Capabilities 2 */ #define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \ ((lnkcap2) & PCI_EXP_LNKCAP2_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ PCI_SPEED_UNKNOWN) /* PCIe speed to Mb/s reduced by encoding overhead */ #define PCIE_SPEED2MBS_ENC(speed) \ ((speed) == PCIE_SPEED_32_0GT ? 32000*128/130 : \ (speed) == PCIE_SPEED_16_0GT ? 16000*128/130 : \ (speed) == PCIE_SPEED_8_0GT ? 8000*128/130 : \ (speed) == PCIE_SPEED_5_0GT ? 5000*8/10 : \ (speed) == PCIE_SPEED_2_5GT ? 2500*8/10 : \ 0) const char *pci_speed_string(enum pci_bus_speed speed); enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width); void __pcie_print_link_status(struct pci_dev *dev, bool verbose); void pcie_report_downtraining(struct pci_dev *dev); void pcie_update_link_speed(struct pci_bus *bus, u16 link_status); /* Single Root I/O Virtualization */ struct pci_sriov { int pos; /* Capability position */ int nres; /* Number of resources */ u32 cap; /* SR-IOV Capabilities */ u16 ctrl; /* SR-IOV Control */ u16 total_VFs; /* Total VFs associated with the PF */ u16 initial_VFs; /* Initial VFs associated with the PF */ u16 num_VFs; /* Number of VFs available */ u16 offset; /* First VF Routing ID offset */ u16 stride; /* Following VF stride */ u16 vf_device; /* VF device ID */ u32 pgsz; /* Page size for BAR alignment */ u8 link; /* Function Dependency Link */ u8 max_VF_buses; /* Max buses consumed by VFs */ u16 driver_max_VFs; /* Max num VFs driver supports */ struct pci_dev *dev; /* Lowest numbered PF */ struct pci_dev *self; /* This PF */ u32 class; /* VF device */ u8 hdr_type; /* VF header type */ u16 subsystem_vendor; /* VF subsystem vendor */ u16 subsystem_device; /* VF subsystem device */ resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */ bool drivers_autoprobe; /* Auto probing of VFs by driver */ }; /** * pci_dev_set_io_state - Set the new error state if possible. * * @dev - pci device to set new error_state * @new - the state we want dev to be in * * Must be called with device_lock held. * * Returns true if state has been changed to the requested state. */ static inline bool pci_dev_set_io_state(struct pci_dev *dev, pci_channel_state_t new) { bool changed = false; device_lock_assert(&dev->dev); switch (new) { case pci_channel_io_perm_failure: switch (dev->error_state) { case pci_channel_io_frozen: case pci_channel_io_normal: case pci_channel_io_perm_failure: changed = true; break; } break; case pci_channel_io_frozen: switch (dev->error_state) { case pci_channel_io_frozen: case pci_channel_io_normal: changed = true; break; } break; case pci_channel_io_normal: switch (dev->error_state) { case pci_channel_io_frozen: case pci_channel_io_normal: changed = true; break; } break; } if (changed) dev->error_state = new; return changed; } static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) { device_lock(&dev->dev); pci_dev_set_io_state(dev, pci_channel_io_perm_failure); device_unlock(&dev->dev); return 0; } static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) { return dev->error_state == pci_channel_io_perm_failure; } /* pci_dev priv_flags */ #define PCI_DEV_ADDED 0 #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) { assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added); } static inline bool pci_dev_is_added(const struct pci_dev *dev) { return test_bit(PCI_DEV_ADDED, &dev->priv_flags); } #ifdef CONFIG_PCIEAER #include <linux/aer.h> #define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; unsigned int id:16; unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ unsigned int __pad1:5; unsigned int multi_error_valid:1; unsigned int first_error:5; unsigned int __pad2:2; unsigned int tlp_header_valid:1; unsigned int status; /* COR/UNCOR Error Status */ unsigned int mask; /* COR/UNCOR Error Mask */ struct aer_header_log_regs tlp; /* TLP Header */ }; int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); #endif /* CONFIG_PCIEAER */ #ifdef CONFIG_PCIE_DPC void pci_save_dpc_state(struct pci_dev *dev); void pci_restore_dpc_state(struct pci_dev *dev); void pci_dpc_init(struct pci_dev *pdev); void dpc_process_error(struct pci_dev *pdev); pci_ers_result_t dpc_reset_link(struct pci_dev *pdev); bool pci_dpc_recovered(struct pci_dev *pdev); #else static inline void pci_save_dpc_state(struct pci_dev *dev) {} static inline void pci_restore_dpc_state(struct pci_dev *dev) {} static inline void pci_dpc_init(struct pci_dev *pdev) {} static inline bool pci_dpc_recovered(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_PCI_ATS /* Address Translation Service */ void pci_ats_init(struct pci_dev *dev); void pci_restore_ats_state(struct pci_dev *dev); #else static inline void pci_ats_init(struct pci_dev *d) { } static inline void pci_restore_ats_state(struct pci_dev *dev) { } #endif /* CONFIG_PCI_ATS */ #ifdef CONFIG_PCI_PRI void pci_pri_init(struct pci_dev *dev); void pci_restore_pri_state(struct pci_dev *pdev); #else static inline void pci_pri_init(struct pci_dev *dev) { } static inline void pci_restore_pri_state(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCI_PASID void pci_pasid_init(struct pci_dev *dev); void pci_restore_pasid_state(struct pci_dev *pdev); #else static inline void pci_pasid_init(struct pci_dev *dev) { } static inline void pci_restore_pasid_state(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCI_IOV int pci_iov_init(struct pci_dev *dev); void pci_iov_release(struct pci_dev *dev); void pci_iov_remove(struct pci_dev *dev); void pci_iov_update_resource(struct pci_dev *dev, int resno); resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); int pci_iov_bus_range(struct pci_bus *bus); extern const struct attribute_group sriov_dev_attr_group; #else static inline int pci_iov_init(struct pci_dev *dev) { return -ENODEV; } static inline void pci_iov_release(struct pci_dev *dev) { } static inline void pci_iov_remove(struct pci_dev *dev) { } static inline void pci_restore_iov_state(struct pci_dev *dev) { } static inline int pci_iov_bus_range(struct pci_bus *bus) { return 0; } #endif /* CONFIG_PCI_IOV */ unsigned long pci_cardbus_resource_alignment(struct resource *); static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, struct resource *res) { #ifdef CONFIG_PCI_IOV int resno = res - dev->resource; if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) return pci_sriov_resource_alignment(dev, resno); #endif if (dev->class >> 8 == PCI_CLASS_BRIDGE_CARDBUS) return pci_cardbus_resource_alignment(res); return resource_alignment(res); } void pci_acs_init(struct pci_dev *dev); #ifdef CONFIG_PCI_QUIRKS int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags); int pci_dev_specific_enable_acs(struct pci_dev *dev); int pci_dev_specific_disable_acs_redir(struct pci_dev *dev); #else static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags) { return -ENOTTY; } static inline int pci_dev_specific_enable_acs(struct pci_dev *dev) { return -ENOTTY; } static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) { return -ENOTTY; } #endif /* PCI error reporting and recovery */ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_channel_state_t state, pci_ers_result_t (*reset_link)(struct pci_dev *pdev)); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); void pcie_aspm_pm_state_change(struct pci_dev *pdev); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCIE_ECRC void pcie_set_ecrc_checking(struct pci_dev *dev); void pcie_ecrc_get_policy(char *str); #else static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { } static inline void pcie_ecrc_get_policy(char *str) { } #endif #ifdef CONFIG_PCIE_PTM void pci_ptm_init(struct pci_dev *dev); int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); #else static inline void pci_ptm_init(struct pci_dev *dev) { } static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity) { return -EINVAL; } #endif struct pci_dev_reset_methods { u16 vendor; u16 device; int (*reset)(struct pci_dev *dev, int probe); }; #ifdef CONFIG_PCI_QUIRKS int pci_dev_specific_reset(struct pci_dev *dev, int probe); #else static inline int pci_dev_specific_reset(struct pci_dev *dev, int probe) { return -ENOTTY; } #endif #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64) int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment, struct resource *res); #else static inline int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment, struct resource *res) { return -ENODEV; } #endif u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar); int pci_rebar_get_current_size(struct pci_dev *pdev, int bar); int pci_rebar_set_size(struct pci_dev *pdev, int bar, int size); static inline u64 pci_rebar_size_to_bytes(int size) { return 1ULL << (size + 20); } struct device_node; #ifdef CONFIG_OF int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); int of_pci_get_max_link_speed(struct device_node *node); void pci_set_of_node(struct pci_dev *dev); void pci_release_of_node(struct pci_dev *dev); void pci_set_bus_of_node(struct pci_bus *bus); void pci_release_bus_of_node(struct pci_bus *bus); int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge); #else static inline int of_pci_parse_bus_range(struct device_node *node, struct resource *res) { return -EINVAL; } static inline int of_get_pci_domain_nr(struct device_node *node) { return -1; } static inline int of_pci_get_max_link_speed(struct device_node *node) { return -EINVAL; } static inline void pci_set_of_node(struct pci_dev *dev) { } static inline void pci_release_of_node(struct pci_dev *dev) { } static inline void pci_set_bus_of_node(struct pci_bus *bus) { } static inline void pci_release_bus_of_node(struct pci_bus *bus) { } static inline int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge) { return 0; } #endif /* CONFIG_OF */ #ifdef CONFIG_PCIEAER void pci_no_aer(void); void pci_aer_init(struct pci_dev *dev); void pci_aer_exit(struct pci_dev *dev); extern const struct attribute_group aer_stats_attr_group; void pci_aer_clear_fatal_status(struct pci_dev *dev); int pci_aer_clear_status(struct pci_dev *dev); int pci_aer_raw_clear_status(struct pci_dev *dev); #else static inline void pci_no_aer(void) { } static inline void pci_aer_init(struct pci_dev *d) { } static inline void pci_aer_exit(struct pci_dev *d) { } static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; } #endif #ifdef CONFIG_ACPI int pci_acpi_program_hp_params(struct pci_dev *dev); #else static inline int pci_acpi_program_hp_params(struct pci_dev *dev) { return -ENODEV; } #endif #ifdef CONFIG_PCIEASPM extern const struct attribute_group aspm_ctrl_attr_group; #endif #endif /* DRIVERS_PCI_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __SHMEM_FS_H #define __SHMEM_FS_H #include <linux/file.h> #include <linux/swap.h> #include <linux/mempolicy.h> #include <linux/pagemap.h> #include <linux/percpu_counter.h> #include <linux/xattr.h> #include <linux/fs_parser.h> /* inode in-kernel data */ struct shmem_inode_info { spinlock_t lock; unsigned int seals; /* shmem seals */ unsigned long flags; unsigned long alloced; /* data pages alloced to file */ unsigned long swapped; /* subtotal assigned to swap */ struct list_head shrinklist; /* shrinkable hpage inodes */ struct list_head swaplist; /* chain of maybes on swap */ struct shared_policy policy; /* NUMA memory alloc policy */ struct simple_xattrs xattrs; /* list of xattrs */ atomic_t stop_eviction; /* hold when working on inode */ struct inode vfs_inode; }; struct shmem_sb_info { unsigned long max_blocks; /* How many blocks are allowed */ struct percpu_counter used_blocks; /* How many are allocated */ unsigned long max_inodes; /* How many inodes are allowed */ unsigned long free_inodes; /* How many are left for allocation */ spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ umode_t mode; /* Mount mode for root directory */ unsigned char huge; /* Whether to try for hugepages */ kuid_t uid; /* Mount uid for root directory */ kgid_t gid; /* Mount gid for root directory */ bool full_inums; /* If i_ino should be uint or ino_t */ ino_t next_ino; /* The next per-sb inode number to use */ ino_t __percpu *ino_batch; /* The next per-cpu inode number to use */ struct mempolicy *mpol; /* default memory policy for mappings */ spinlock_t shrinklist_lock; /* Protects shrinklist */ struct list_head shrinklist; /* List of shinkable inodes */ unsigned long shrinklist_len; /* Length of shrinklist */ }; static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) { return container_of(inode, struct shmem_inode_info, vfs_inode); } /* * Functions in mm/shmem.c called directly from elsewhere: */ extern const struct fs_parameter_spec shmem_fs_parameters[]; extern int shmem_init(void); extern int shmem_init_fs_context(struct fs_context *fc); extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); #ifdef CONFIG_SHMEM extern bool shmem_mapping(struct address_space *mapping); #else static inline bool shmem_mapping(struct address_space *mapping) { return false; } #endif /* CONFIG_SHMEM */ extern void shmem_unlock_mapping(struct address_space *mapping); extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(unsigned int type, bool frontswap, unsigned long *fs_pages_to_unuse); extern bool shmem_huge_enabled(struct vm_area_struct *vma); extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); /* Flag allocation requirements to shmem_getpage */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */ SGP_HUGE, /* like SGP_CACHE, huge pages preferred */ SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */ SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ }; extern int shmem_getpage(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp); static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) { return shmem_read_mapping_page_gfp(mapping, index, mapping_gfp_mask(mapping)); } static inline bool shmem_file(struct file *file) { if (!IS_ENABLED(CONFIG_SHMEM)) return false; if (!file || !file->f_mapping) return false; return shmem_mapping(file->f_mapping); } extern bool shmem_charge(struct inode *inode, long pages); extern void shmem_uncharge(struct inode *inode, long pages); #ifdef CONFIG_SHMEM extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, struct page **pagep); extern int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr); #else #define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) #define shmem_mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, \ dst_addr) ({ BUG(); 0; }) #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_IP_TUNNELS_H #define __NET_IP_TUNNELS_H 1 #include <linux/if_tunnel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/types.h> #include <linux/u64_stats_sync.h> #include <linux/bitops.h> #include <net/dsfield.h> #include <net/gro_cells.h> #include <net/inet_ecn.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/lwtunnel.h> #include <net/dst_cache.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #endif /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) /* Used to memset ip_tunnel padding. */ #define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst) /* Used to memset ipv4 address padding. */ #define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst) #define IP_TUNNEL_KEY_IPV4_PAD_LEN \ (sizeof_field(struct ip_tunnel_key, u) - \ sizeof_field(struct ip_tunnel_key, u.ipv4)) struct ip_tunnel_key { __be64 tun_id; union { struct { __be32 src; __be32 dst; } ipv4; struct { struct in6_addr src; struct in6_addr dst; } ipv6; } u; __be16 tun_flags; u8 tos; /* TOS for IPv4, TC for IPv6 */ u8 ttl; /* TTL for IPv4, HL for IPv6 */ __be32 label; /* Flow Label for IPv6 */ __be16 tp_src; __be16 tp_dst; }; /* Flags for ip_tunnel_info mode. */ #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ #define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */ /* Maximum tunnel options length. */ #define IP_TUNNEL_OPTS_MAX \ GENMASK((sizeof_field(struct ip_tunnel_info, \ options_len) * BITS_PER_BYTE) - 1, 0) struct ip_tunnel_info { struct ip_tunnel_key key; #ifdef CONFIG_DST_CACHE struct dst_cache dst_cache; #endif u8 options_len; u8 mode; }; /* 6rd prefix/relay information */ #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm { struct in6_addr prefix; __be32 relay_prefix; u16 prefixlen; u16 relay_prefixlen; }; #endif struct ip_tunnel_encap { u16 type; u16 flags; __be16 sport; __be16 dport; }; struct ip_tunnel_prl_entry { struct ip_tunnel_prl_entry __rcu *next; __be32 addr; u16 flags; struct rcu_head rcu_head; }; struct metadata_dst; struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; struct net_device *dev; struct net *net; /* netns for packet i/o */ unsigned long err_time; /* Time when the last ICMP error * arrived */ int err_count; /* Number of arrived ICMP errors */ /* These four fields used only by GRE */ u32 i_seqno; /* The last seen seqno */ u32 o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ /* These four fields used only by ERSPAN */ u32 index; /* ERSPAN type II index */ u8 erspan_ver; /* ERSPAN version */ u8 dir; /* ERSPAN direction */ u16 hwid; /* ERSPAN hardware ID */ struct dst_cache dst_cache; struct ip_tunnel_parm parms; int mlink; int encap_hlen; /* Encap header length (FOU,GUE) */ int hlen; /* tun_hlen + encap_hlen */ struct ip_tunnel_encap encap; /* for SIT */ #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm ip6rd; #endif struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ unsigned int ip_tnl_net_id; struct gro_cells gro_cells; __u32 fwmark; bool collect_md; bool ignore_df; }; struct tnl_ptk_info { __be16 flags; __be16 proto; __be32 key; __be32 seq; int hdr_len; }; #define PACKET_RCVD 0 #define PACKET_REJECT 1 #define PACKET_NEXT 2 #define IP_TNL_HASH_BITS 7 #define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) struct ip_tunnel_net { struct net_device *fb_tunnel_dev; struct rtnl_link_ops *rtnl_link_ops; struct hlist_head tunnels[IP_TNL_HASH_SIZE]; struct ip_tunnel __rcu *collect_md_tun; int type; }; static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, __be64 tun_id, __be16 tun_flags) { key->tun_id = tun_id; key->u.ipv4.src = saddr; key->u.ipv4.dst = daddr; memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD, 0, IP_TUNNEL_KEY_IPV4_PAD_LEN); key->tos = tos; key->ttl = ttl; key->label = label; key->tun_flags = tun_flags; /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of * the upper tunnel are used. * E.g: GRE over IPSEC, the tp_src and tp_port are zero. */ key->tp_src = tp_src; key->tp_dst = tp_dst; /* Clear struct padding. */ if (sizeof(*key) != IP_TUNNEL_KEY_SIZE) memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE, 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE); } static inline bool ip_tunnel_dst_cache_usable(const struct sk_buff *skb, const struct ip_tunnel_info *info) { if (skb->mark) return false; if (!info) return true; if (info->key.tun_flags & TUNNEL_NOCACHE) return false; return true; } static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info *tun_info) { return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; } static inline __be64 key32_to_tunnel_id(__be32 key) { #ifdef __BIG_ENDIAN return (__force __be64)key; #else return (__force __be64)((__force u64)key << 32); #endif } /* Returns the least-significant 32 bits of a __be64. */ static inline __be32 tunnel_id_to_key32(__be64 tun_id) { #ifdef __BIG_ENDIAN return (__force __be32)tun_id; #else return (__force __be32)((__force u64)tun_id >> 32); #endif } #ifdef CONFIG_INET static inline void ip_tunnel_init_flow(struct flowi4 *fl4, int proto, __be32 daddr, __be32 saddr, __be32 key, __u8 tos, int oif, __u32 mark, __u32 tun_inner_hash) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = oif; fl4->daddr = daddr; fl4->saddr = saddr; fl4->flowi4_tos = tos; fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; fl4->flowi4_multipath_hash = tun_inner_hash; } int ip_tunnel_init(struct net_device *dev); void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); struct net *ip_tunnel_get_link_net(const struct net_device *dev); int ip_tunnel_get_iflink(const struct net_device *dev); int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); void ip_tunnel_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, int link, __be16 flags, __be32 remote, __be32 local, __be32 key); int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4); int (*err_handler)(struct sk_buff *skb, u32 info); }; #define MAX_IPTUN_ENCAP_OPS 8 extern const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS]; int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, unsigned int num); int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, unsigned int num); int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); static inline bool pskb_inet_may_pull(struct sk_buff *skb) { int nhlen; switch (skb->protocol) { #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): nhlen = sizeof(struct ipv6hdr); break; #endif case htons(ETH_P_IP): nhlen = sizeof(struct iphdr); break; default: nhlen = 0; } return pskb_network_may_pull(skb, nhlen); } static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; int hlen = -EINVAL; if (e->type == TUNNEL_ENCAP_NONE) return 0; if (e->type >= MAX_IPTUN_ENCAP_OPS) return -EINVAL; rcu_read_lock(); ops = rcu_dereference(iptun_encaps[e->type]); if (likely(ops && ops->encap_hlen)) hlen = ops->encap_hlen(e); rcu_read_unlock(); return hlen; } static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, u8 *protocol, struct flowi4 *fl4) { const struct ip_tunnel_encap_ops *ops; int ret = -EINVAL; if (t->encap.type == TUNNEL_ENCAP_NONE) return 0; if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) return -EINVAL; rcu_read_lock(); ops = rcu_dereference(iptun_encaps[t->encap.type]); if (likely(ops && ops->build_header)) ret = ops->build_header(skb, &t->encap, protocol, fl4); rcu_read_unlock(); return ret; } /* Extract dsfield from inner protocol */ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, const struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) return iph->tos; else if (skb->protocol == htons(ETH_P_IPV6)) return ipv6_get_dsfield((const struct ipv6hdr *)iph); else return 0; } static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, const struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) return iph->ttl; else if (skb->protocol == htons(ETH_P_IPV6)) return ((const struct ipv6hdr *)iph)->hop_limit; else return 0; } /* Propogate ECN bits out */ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, const struct sk_buff *skb) { u8 inner = ip_tunnel_get_dsfield(iph, skb); return INET_ECN_encapsulate(tos, inner); } int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, bool raw_proto, bool xnet); static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, bool xnet) { return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet); } void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, int headroom, bool reply); int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); static inline int iptunnel_pull_offloads(struct sk_buff *skb) { if (skb_is_gso(skb)) { int err; err = skb_unclone(skb, GFP_ATOMIC); if (unlikely(err)) return err; skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >> NETIF_F_GSO_SHIFT); } skb->encapsulation = 0; return 0; } static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); } else { struct net_device_stats *err_stats = &dev->stats; if (pkt_len < 0) { err_stats->tx_errors++; err_stats->tx_aborted_errors++; } else { err_stats->tx_dropped++; } } } static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info) { return info + 1; } static inline void ip_tunnel_info_opts_get(void *to, const struct ip_tunnel_info *info) { memcpy(to, info + 1, info->options_len); } static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, __be16 flags) { info->options_len = len; if (len > 0) { memcpy(ip_tunnel_info_opts(info), from, len); info->key.tun_flags |= flags; } } static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) { return (struct ip_tunnel_info *)lwtstate->data; } DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt); /* Returns > 0 if metadata should be collected */ static inline int ip_tunnel_collect_metadata(void) { return static_branch_unlikely(&ip_tunnel_metadata_cnt); } void __init ip_tunnel_core_init(void); void ip_tunnel_need_metadata(void); void ip_tunnel_unneed_metadata(void); #else /* CONFIG_INET */ static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) { return NULL; } static inline void ip_tunnel_need_metadata(void) { } static inline void ip_tunnel_unneed_metadata(void) { } static inline void ip_tunnel_info_opts_get(void *to, const struct ip_tunnel_info *info) { } static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, __be16 flags) { info->options_len = 0; } #endif /* CONFIG_INET */ #endif /* __NET_IP_TUNNELS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 // SPDX-License-Identifier: GPL-2.0 /* * The class-specific portions of the driver model * * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org> * Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2008-2009 Novell Inc. * Copyright (c) 2012-2019 Greg Kroah-Hartman <gregkh@linuxfoundation.org> * Copyright (c) 2012-2019 Linux Foundation * * See Documentation/driver-api/driver-model/ for more information. */ #ifndef _DEVICE_CLASS_H_ #define _DEVICE_CLASS_H_ #include <linux/kobject.h> #include <linux/klist.h> #include <linux/pm.h> #include <linux/device/bus.h> struct device; struct fwnode_handle; /** * struct class - device classes * @name: Name of the class. * @owner: The module owner. * @class_groups: Default attributes of this class. * @dev_groups: Default attributes of the devices that belong to the class. * @dev_kobj: The kobject that represents this class and links it into the hierarchy. * @dev_uevent: Called when a device is added, removed from this class, or a * few other things that generate uevents to add the environment * variables. * @devnode: Callback to provide the devtmpfs. * @class_release: Called to release this class. * @dev_release: Called to release the device. * @shutdown_pre: Called at shut-down time before driver shutdown. * @ns_type: Callbacks so sysfs can detemine namespaces. * @namespace: Namespace of the device belongs to this class. * @get_ownership: Allows class to specify uid/gid of the sysfs directories * for the devices belonging to the class. Usually tied to * device's namespace. * @pm: The default device power management operations of this class. * @p: The private data of the driver core, no one other than the * driver core can touch this. * * A class is a higher-level view of a device that abstracts out low-level * implementation details. Drivers may see a SCSI disk or an ATA disk, but, * at the class level, they are all simply disks. Classes allow user space * to work with devices based on what they do, rather than how they are * connected or how they work. */ struct class { const char *name; struct module *owner; const struct attribute_group **class_groups; const struct attribute_group **dev_groups; struct kobject *dev_kobj; int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env); char *(*devnode)(struct device *dev, umode_t *mode); void (*class_release)(struct class *class); void (*dev_release)(struct device *dev); int (*shutdown_pre)(struct device *dev); const struct kobj_ns_type_operations *ns_type; const void *(*namespace)(struct device *dev); void (*get_ownership)(struct device *dev, kuid_t *uid, kgid_t *gid); const struct dev_pm_ops *pm; struct subsys_private *p; }; struct class_dev_iter { struct klist_iter ki; const struct device_type *type; }; extern struct kobject *sysfs_dev_block_kobj; extern struct kobject *sysfs_dev_char_kobj; extern int __must_check __class_register(struct class *class, struct lock_class_key *key); extern void class_unregister(struct class *class); /* This is a #define to keep the compiler from merging different * instances of the __key variable */ #define class_register(class) \ ({ \ static struct lock_class_key __key; \ __class_register(class, &__key); \ }) struct class_compat; struct class_compat *class_compat_register(const char *name); void class_compat_unregister(struct class_compat *cls); int class_compat_create_link(struct class_compat *cls, struct device *dev, struct device *device_link); void class_compat_remove_link(struct class_compat *cls, struct device *dev, struct device *device_link); extern void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, struct device *start, const struct device_type *type); extern struct device *class_dev_iter_next(struct class_dev_iter *iter); extern void class_dev_iter_exit(struct class_dev_iter *iter); extern int class_for_each_device(struct class *class, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); extern struct device *class_find_device(struct class *class, struct device *start, const void *data, int (*match)(struct device *, const void *)); /** * class_find_device_by_name - device iterator for locating a particular device * of a specific name. * @class: class type * @name: name of the device to match */ static inline struct device *class_find_device_by_name(struct class *class, const char *name) { return class_find_device(class, NULL, name, device_match_name); } /** * class_find_device_by_of_node : device iterator for locating a particular device * matching the of_node. * @class: class type * @np: of_node of the device to match. */ static inline struct device * class_find_device_by_of_node(struct class *class, const struct device_node *np) { return class_find_device(class, NULL, np, device_match_of_node); } /** * class_find_device_by_fwnode : device iterator for locating a particular device * matching the fwnode. * @class: class type * @fwnode: fwnode of the device to match. */ static inline struct device * class_find_device_by_fwnode(struct class *class, const struct fwnode_handle *fwnode) { return class_find_device(class, NULL, fwnode, device_match_fwnode); } /** * class_find_device_by_devt : device iterator for locating a particular device * matching the device type. * @class: class type * @devt: device type of the device to match. */ static inline struct device *class_find_device_by_devt(struct class *class, dev_t devt) { return class_find_device(class, NULL, &devt, device_match_devt); } #ifdef CONFIG_ACPI struct acpi_device; /** * class_find_device_by_acpi_dev : device iterator for locating a particular * device matching the ACPI_COMPANION device. * @class: class type * @adev: ACPI_COMPANION device to match. */ static inline struct device * class_find_device_by_acpi_dev(struct class *class, const struct acpi_device *adev) { return class_find_device(class, NULL, adev, device_match_acpi_dev); } #else static inline struct device * class_find_device_by_acpi_dev(struct class *class, const void *adev) { return NULL; } #endif struct class_attribute { struct attribute attr; ssize_t (*show)(struct class *class, struct class_attribute *attr, char *buf); ssize_t (*store)(struct class *class, struct class_attribute *attr, const char *buf, size_t count); }; #define CLASS_ATTR_RW(_name) \ struct class_attribute class_attr_##_name = __ATTR_RW(_name) #define CLASS_ATTR_RO(_name) \ struct class_attribute class_attr_##_name = __ATTR_RO(_name) #define CLASS_ATTR_WO(_name) \ struct class_attribute class_attr_##_name = __ATTR_WO(_name) extern int __must_check class_create_file_ns(struct class *class, const struct class_attribute *attr, const void *ns); extern void class_remove_file_ns(struct class *class, const struct class_attribute *attr, const void *ns); static inline int __must_check class_create_file(struct class *class, const struct class_attribute *attr) { return class_create_file_ns(class, attr, NULL); } static inline void class_remove_file(struct class *class, const struct class_attribute *attr) { return class_remove_file_ns(class, attr, NULL); } /* Simple class attribute that is just a static string */ struct class_attribute_string { struct class_attribute attr; char *str; }; /* Currently read-only only */ #define _CLASS_ATTR_STRING(_name, _mode, _str) \ { __ATTR(_name, _mode, show_class_attr_string, NULL), _str } #define CLASS_ATTR_STRING(_name, _mode, _str) \ struct class_attribute_string class_attr_##_name = \ _CLASS_ATTR_STRING(_name, _mode, _str) extern ssize_t show_class_attr_string(struct class *class, struct class_attribute *attr, char *buf); struct class_interface { struct list_head node; struct class *class; int (*add_dev) (struct device *, struct class_interface *); void (*remove_dev) (struct device *, struct class_interface *); }; extern int __must_check class_interface_register(struct class_interface *); extern void class_interface_unregister(struct class_interface *); extern struct class * __must_check __class_create(struct module *owner, const char *name, struct lock_class_key *key); extern void class_destroy(struct class *cls); /* This is a #define to keep the compiler from merging different * instances of the __key variable */ #define class_create(owner, name) \ ({ \ static struct lock_class_key __key; \ __class_create(owner, name, &__key); \ }) #endif /* _DEVICE_CLASS_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 // SPDX-License-Identifier: GPL-2.0 /* * hrtimers - High-resolution kernel timers * * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar * * data type definitions, declarations, prototypes * * Started by: Thomas Gleixner and Ingo Molnar */ #ifndef _LINUX_HRTIMER_H #define _LINUX_HRTIMER_H #include <linux/hrtimer_defs.h> #include <linux/rbtree.h> #include <linux/init.h> #include <linux/list.h> #include <linux/percpu.h> #include <linux/seqlock.h> #include <linux/timer.h> #include <linux/timerqueue.h> struct hrtimer_clock_base; struct hrtimer_cpu_base; /* * Mode arguments of xxx_hrtimer functions: * * HRTIMER_MODE_ABS - Time value is absolute * HRTIMER_MODE_REL - Time value is relative to now * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered * when starting the timer) * HRTIMER_MODE_SOFT - Timer callback function will be executed in * soft irq context * HRTIMER_MODE_HARD - Timer callback function will be executed in * hard irq context even on PREEMPT_RT. */ enum hrtimer_mode { HRTIMER_MODE_ABS = 0x00, HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, HRTIMER_MODE_HARD = 0x08, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, HRTIMER_MODE_ABS_SOFT = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_SOFT = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT, HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, }; /* * Return values for the callback function */ enum hrtimer_restart { HRTIMER_NORESTART, /* Timer is not restarted */ HRTIMER_RESTART, /* Timer must be restarted */ }; /* * Values to track state of the timer * * Possible states: * * 0x00 inactive * 0x01 enqueued into rbtree * * The callback state is not part of the timer->state because clearing it would * mean touching the timer after the callback, this makes it impossible to free * the timer from the callback function. * * Therefore we track the callback state in: * * timer->base->cpu_base->running == timer * * On SMP it is possible to have a "callback function running and enqueued" * status. It happens for example when a posix timer expired and the callback * queued a signal. Between dropping the lock which protects the posix timer * and reacquiring the base lock of the hrtimer, another CPU can deliver the * signal and rearm the timer. * * All state transitions are protected by cpu_base->lock. */ #define HRTIMER_STATE_INACTIVE 0x00 #define HRTIMER_STATE_ENQUEUED 0x01 /** * struct hrtimer - the basic hrtimer structure * @node: timerqueue node, which also manages node.expires, * the absolute expiry time in the hrtimers internal * representation. The time is related to the clock on * which the timer is based. Is setup by adding * slack to the _softexpires value. For non range timers * identical to _softexpires. * @_softexpires: the absolute earliest expiry time of the hrtimer. * The time which was given as expiry time when the timer * was armed. * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) * @is_rel: Set if the timer was armed relative * @is_soft: Set if hrtimer will be expired in soft interrupt context. * @is_hard: Set if hrtimer will be expired in hard interrupt context * even on RT. * * The hrtimer structure must be initialized by hrtimer_init() */ struct hrtimer { struct timerqueue_node node; ktime_t _softexpires; enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; u8 state; u8 is_rel; u8 is_soft; u8 is_hard; }; /** * struct hrtimer_sleeper - simple sleeper structure * @timer: embedded timer structure * @task: task to wake up * * task is set to NULL, when the timer expires. */ struct hrtimer_sleeper { struct hrtimer timer; struct task_struct *task; }; #ifdef CONFIG_64BIT # define __hrtimer_clock_base_align ____cacheline_aligned #else # define __hrtimer_clock_base_align #endif /** * struct hrtimer_clock_base - the timer base for a specific clock * @cpu_base: per cpu clock base * @index: clock type index for per_cpu support when moving a * timer to a base on another cpu. * @clockid: clock id for per_cpu support * @seq: seqcount around __run_hrtimer * @running: pointer to the currently running hrtimer * @active: red black tree root node for the active timers * @get_time: function to retrieve the current time of the clock * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; unsigned int index; clockid_t clockid; seqcount_raw_spinlock_t seq; struct hrtimer *running; struct timerqueue_head active; ktime_t (*get_time)(void); ktime_t offset; } __hrtimer_clock_base_align; enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, HRTIMER_BASE_MONOTONIC_SOFT, HRTIMER_BASE_REALTIME_SOFT, HRTIMER_BASE_BOOTTIME_SOFT, HRTIMER_BASE_TAI_SOFT, HRTIMER_MAX_CLOCK_BASES, }; /** * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang * @softirq_activated: displays, if the softirq is raised - update of softirq * related settings is not required then. * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are * expired * @timer_waiters: A hrtimer_cancel() invocation waits for the timer * callback to finish. * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue; it is the total first expiry time (hard * and soft hrtimer are taken into account) * @next_timer: Pointer to the first expiring timer * @softirq_expires_next: Time to check, if soft queues needs also to be expired * @softirq_next_timer: Pointer to the first expiring softirq based timer * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). * Do not dereference the pointer because it is not reliable on * cross cpu removals. */ struct hrtimer_cpu_base { raw_spinlock_t lock; unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; unsigned int hres_active : 1, in_hrtirq : 1, hang_detected : 1, softirq_activated : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; #endif #ifdef CONFIG_PREEMPT_RT spinlock_t softirq_expiry_lock; atomic_t timer_waiters; #endif ktime_t expires_next; struct hrtimer *next_timer; ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->node.expires = time; timer->_softexpires = time; } static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta) { timer->_softexpires = time; timer->node.expires = ktime_add_safe(time, delta); } static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, u64 delta) { timer->_softexpires = time; timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta)); } static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) { timer->node.expires = tv64; timer->_softexpires = tv64; } static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) { timer->node.expires = ktime_add_safe(timer->node.expires, time); timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) { timer->node.expires = ktime_add_ns(timer->node.expires, ns); timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); } static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) { return timer->node.expires; } static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) { return timer->_softexpires; } static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) { return timer->node.expires; } static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) { return timer->_softexpires; } static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) { return ktime_to_ns(timer->node.expires); } static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) { return ktime_sub(timer->node.expires, timer->base->get_time()); } static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) { return timer->base->get_time(); } static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? timer->base->cpu_base->hres_active : 0; } #ifdef CONFIG_HIGH_RES_TIMERS struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); extern unsigned int hrtimer_resolution; #else #define hrtimer_resolution (unsigned int)LOW_RES_NSEC #endif static inline ktime_t __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now) { ktime_t rem = ktime_sub(timer->node.expires, now); /* * Adjust relative timers for the extra we added in * hrtimer_start_range_ns() to prevent short timeouts. */ if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel) rem -= hrtimer_resolution; return rem; } static inline ktime_t hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) { return __hrtimer_expires_remaining_adjusted(timer, timer->base->get_time()); } #ifdef CONFIG_TIMERFD extern void timerfd_clock_was_set(void); #else static inline void timerfd_clock_was_set(void) { } #endif extern void hrtimers_resume(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); #ifdef CONFIG_PREEMPT_RT void hrtimer_cancel_wait_running(const struct hrtimer *timer); #else static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) { cpu_relax(); } #endif /* Exported timer functions: */ /* Initialize timers: */ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode); extern void destroy_hrtimer_on_stack(struct hrtimer *timer); #else static inline void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode) { hrtimer_init(timer, which_clock, mode); } static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode) { hrtimer_init_sleeper(sl, clock_id, mode); } static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif /* Basic timer operations: */ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 range_ns, const enum hrtimer_mode mode); /** * hrtimer_start - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); * softirq based mode is considered for debug purpose only! */ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) { hrtimer_start_range_ns(timer, tim, 0, mode); } extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); static inline void hrtimer_start_expires(struct hrtimer *timer, enum hrtimer_mode mode) { u64 delta; ktime_t soft, hard; soft = hrtimer_get_softexpires(timer); hard = hrtimer_get_expires(timer); delta = ktime_to_ns(ktime_sub(hard, soft)); hrtimer_start_range_ns(timer, soft, delta, mode); } void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, enum hrtimer_mode mode); static inline void hrtimer_restart(struct hrtimer *timer) { hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } /* Query timers: */ extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer) { return __hrtimer_get_remaining(timer, false); } extern u64 hrtimer_get_next_event(void); extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer); /** * hrtimer_is_queued = check, whether the timer is on one of the queues * @timer: Timer to check * * Returns: True if the timer is queued, false otherwise * * The function can be used lockless, but it gives only a current snapshot. */ static inline bool hrtimer_is_queued(struct hrtimer *timer) { /* The READ_ONCE pairs with the update functions of timer->state */ return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED); } /* * Helper function to check, whether the timer is running the callback * function */ static inline int hrtimer_callback_running(struct hrtimer *timer) { return timer->base->running == timer; } /* Forward a hrtimer so it expires after now: */ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); /** * hrtimer_forward_now - forward the timer expiry so it expires after now * @timer: hrtimer to forward * @interval: the interval to forward * * Forward the timer expiry so it will expire after the current time * of the hrtimer clock base. Returns the number of overruns. * * Can be safely called from the callback function of @timer. If * called from other contexts @timer must neither be enqueued nor * running the callback and the caller needs to take care of * serialization. * * Note: This only updates the timer expiry value and does not requeue * the timer. */ static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) { return hrtimer_forward(timer, timer->base->get_time(), interval); } /* Precise sleep: */ extern int nanosleep_copyout(struct restart_block *, struct timespec64 *); extern long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, const clockid_t clockid); extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode); extern int schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, const enum hrtimer_mode mode, clockid_t clock_id); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); /* Show pending timers: */ extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU int hrtimers_dead_cpu(unsigned int cpu); #else #define hrtimers_dead_cpu NULL #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for inet_sock * * Authors: Many, reorganised here by * Arnaldo Carvalho de Melo <acme@mandriva.com> */ #ifndef _INET_SOCK_H #define _INET_SOCK_H #include <linux/bitops.h> #include <linux/string.h> #include <linux/types.h> #include <linux/jhash.h> #include <linux/netdevice.h> #include <net/flow.h> #include <net/sock.h> #include <net/request_sock.h> #include <net/netns/hash.h> #include <net/tcp_states.h> #include <net/l3mdev.h> /** struct ip_options - IP Options * * @faddr - Saved first hop address * @nexthop - Saved nexthop address in LSRR and SSRR * @is_strictroute - Strict source route * @srr_is_hit - Packet destination addr was our one * @is_changed - IP checksum more not valid * @rr_needaddr - Need to record addr of outgoing dev * @ts_needtime - Need to record timestamp * @ts_needaddr - Need to record addr of outgoing dev */ struct ip_options { __be32 faddr; __be32 nexthop; unsigned char optlen; unsigned char srr; unsigned char rr; unsigned char ts; unsigned char is_strictroute:1, srr_is_hit:1, is_changed:1, rr_needaddr:1, ts_needtime:1, ts_needaddr:1; unsigned char router_alert; unsigned char cipso; unsigned char __pad2; unsigned char __data[]; }; struct ip_options_rcu { struct rcu_head rcu; struct ip_options opt; }; struct ip_options_data { struct ip_options_rcu opt; char data[40]; }; struct inet_request_sock { struct request_sock req; #define ir_loc_addr req.__req_common.skc_rcv_saddr #define ir_rmt_addr req.__req_common.skc_daddr #define ir_num req.__req_common.skc_num #define ir_rmt_port req.__req_common.skc_dport #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr #define ir_iif req.__req_common.skc_bound_dev_if #define ir_cookie req.__req_common.skc_cookie #define ireq_net req.__req_common.skc_net #define ireq_state req.__req_common.skc_state #define ireq_family req.__req_common.skc_family u16 snd_wscale : 4, rcv_wscale : 4, tstamp_ok : 1, sack_ok : 1, wscale_ok : 1, ecn_ok : 1, acked : 1, no_srccheck: 1, smc_ok : 1; u32 ir_mark; union { struct ip_options_rcu __rcu *ireq_opt; #if IS_ENABLED(CONFIG_IPV6) struct { struct ipv6_txoptions *ipv6_opt; struct sk_buff *pktopts; }; #endif }; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) { return (struct inet_request_sock *)sk; } static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) return skb->mark; return sk->sk_mark; } static inline int inet_request_bound_dev_if(const struct sock *sk, struct sk_buff *skb) { #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif return sk->sk_bound_dev_if; } static inline int inet_sk_bound_l3mdev(const struct sock *sk) { #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); if (!net->ipv4.sysctl_tcp_l3mdev_accept) return l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); #endif return 0; } static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, int dif, int sdif) { if (!bound_dev_if) return !sdif || l3mdev_accept; return bound_dev_if == dif || bound_dev_if == sdif; } struct inet_cork { unsigned int flags; __be32 addr; struct ip_options *opt; unsigned int fragsize; int length; /* Total length of all frames */ struct dst_entry *dst; u8 tx_flags; __u8 ttl; __s16 tos; char priority; __u16 gso_size; u64 transmit_time; u32 mark; }; struct inet_cork_full { struct inet_cork base; struct flowi fl; }; struct ip_mc_socklist; struct ipv6_pinfo; struct rtable; /** struct inet_sock - representation of INET sockets * * @sk - ancestor class * @pinet6 - pointer to IPv6 control block * @inet_daddr - Foreign IPv4 addr * @inet_rcv_saddr - Bound local IPv4 addr * @inet_dport - Destination port * @inet_num - Local port * @inet_saddr - Sending source * @uc_ttl - Unicast TTL * @inet_sport - Source port * @inet_id - ID counter for DF pkts * @tos - TOS * @mc_ttl - Multicasting TTL * @is_icsk - is this an inet_connection_sock? * @uc_index - Unicast outgoing device index * @mc_index - Multicast device index * @mc_list - Group array * @cork - info to build ip hdr on each ip frag while socket is corked */ struct inet_sock { /* sk and pinet6 has to be the first two members of inet_sock */ struct sock sk; #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *pinet6; #endif /* Socket demultiplex comparisons on incoming packets. */ #define inet_daddr sk.__sk_common.skc_daddr #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr #define inet_dport sk.__sk_common.skc_dport #define inet_num sk.__sk_common.skc_num __be32 inet_saddr; __s16 uc_ttl; __u16 cmsg_flags; __be16 inet_sport; __u16 inet_id; struct ip_options_rcu __rcu *inet_opt; int rx_dst_ifindex; __u8 tos; __u8 min_ttl; __u8 mc_ttl; __u8 pmtudisc; __u8 recverr:1, is_icsk:1, freebind:1, hdrincl:1, mc_loop:1, transparent:1, mc_all:1, nodefrag:1; __u8 bind_address_no_port:1, recverr_rfc4884:1, defer_connect:1; /* Indicates that fastopen_connect is set * and cookie exists so we defer connect * until first data frame is written */ __u8 rcv_tos; __u8 convert_csum; int uc_index; int mc_index; __be32 mc_addr; struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork; }; #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ #define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ /* cmsg flags for inet */ #define IP_CMSG_PKTINFO BIT(0) #define IP_CMSG_TTL BIT(1) #define IP_CMSG_TOS BIT(2) #define IP_CMSG_RECVOPTS BIT(3) #define IP_CMSG_RETOPTS BIT(4) #define IP_CMSG_PASSSEC BIT(5) #define IP_CMSG_ORIGDSTADDR BIT(6) #define IP_CMSG_CHECKSUM BIT(7) #define IP_CMSG_RECVFRAGSIZE BIT(8) /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket * * SYNACK messages might be attached to request sockets. * Some places want to reach the listener in this case. */ static inline struct sock *sk_to_full_sk(struct sock *sk) { #ifdef CONFIG_INET if (sk && sk->sk_state == TCP_NEW_SYN_RECV) sk = inet_reqsk(sk)->rsk_listener; #endif return sk; } /* sk_to_full_sk() variant with a const argument */ static inline const struct sock *sk_const_to_full_sk(const struct sock *sk) { #ifdef CONFIG_INET if (sk && sk->sk_state == TCP_NEW_SYN_RECV) sk = ((const struct request_sock *)sk)->rsk_listener; #endif return sk; } static inline struct sock *skb_to_full_sk(const struct sk_buff *skb) { return sk_to_full_sk(skb->sk); } static inline struct inet_sock *inet_sk(const struct sock *sk) { return (struct inet_sock *)sk; } static inline void __inet_sk_copy_descendant(struct sock *sk_to, const struct sock *sk_from, const int ancestor_size) { memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, sk_from->sk_prot->obj_size - ancestor_size); } int inet_sk_rebuild_header(struct sock *sk); /** * inet_sk_state_load - read sk->sk_state for lockless contexts * @sk: socket pointer * * Paired with inet_sk_state_store(). Used in places we don't hold socket lock: * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... */ static inline int inet_sk_state_load(const struct sock *sk) { /* state change might impact lockless readers. */ return smp_load_acquire(&sk->sk_state); } /** * inet_sk_state_store - update sk->sk_state * @sk: socket pointer * @newstate: new state * * Paired with inet_sk_state_load(). Should be used in contexts where * state change might impact lockless readers. */ void inet_sk_state_store(struct sock *sk, int newstate); void inet_sk_set_state(struct sock *sk, int state); static inline unsigned int __inet_ehashfn(const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport, u32 initval) { return jhash_3words((__force __u32) laddr, (__force __u32) faddr, ((__u32) lport) << 16 | (__force __u32)fport, initval); } struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener); static inline __u8 inet_sk_flowi_flags(const struct sock *sk) { __u8 flags = 0; if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) flags |= FLOWI_FLAG_ANYSRC; return flags; } static inline void inet_inc_convert_csum(struct sock *sk) { inet_sk(sk)->convert_csum++; } static inline void inet_dec_convert_csum(struct sock *sk) { if (inet_sk(sk)->convert_csum > 0) inet_sk(sk)->convert_csum--; } static inline bool inet_get_convert_csum(struct sock *sk) { return !!inet_sk(sk)->convert_csum; } static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { return net->ipv4.sysctl_ip_nonlocal_bind || inet->freebind || inet->transparent; } #endif /* _INET_SOCK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2016 Qualcomm Atheros, Inc * * Based on net/sched/sch_fq_codel.c */ #ifndef __NET_SCHED_FQ_IMPL_H #define __NET_SCHED_FQ_IMPL_H #include <net/fq.h> /* functions that are embedded into includer */ static void fq_adjust_removal(struct fq *fq, struct fq_flow *flow, struct sk_buff *skb) { struct fq_tin *tin = flow->tin; tin->backlog_bytes -= skb->len; tin->backlog_packets--; flow->backlog -= skb->len; fq->backlog--; fq->memory_usage -= skb->truesize; } static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) { struct fq_flow *i; if (flow->backlog == 0) { list_del_init(&flow->backlogchain); } else { i = flow; list_for_each_entry_continue(i, &fq->backlogs, backlogchain) if (i->backlog < flow->backlog) break; list_move_tail(&flow->backlogchain, &i->backlogchain); } } static struct sk_buff *fq_flow_dequeue(struct fq *fq, struct fq_flow *flow) { struct sk_buff *skb; lockdep_assert_held(&fq->lock); skb = __skb_dequeue(&flow->queue); if (!skb) return NULL; fq_adjust_removal(fq, flow, skb); fq_rejigger_backlog(fq, flow); return skb; } static struct sk_buff *fq_tin_dequeue(struct fq *fq, struct fq_tin *tin, fq_tin_dequeue_t dequeue_func) { struct fq_flow *flow; struct list_head *head; struct sk_buff *skb; lockdep_assert_held(&fq->lock); begin: head = &tin->new_flows; if (list_empty(head)) { head = &tin->old_flows; if (list_empty(head)) return NULL; } flow = list_first_entry(head, struct fq_flow, flowchain); if (flow->deficit <= 0) { flow->deficit += fq->quantum; list_move_tail(&flow->flowchain, &tin->old_flows); goto begin; } skb = dequeue_func(fq, tin, flow); if (!skb) { /* force a pass through old_flows to prevent starvation */ if ((head == &tin->new_flows) && !list_empty(&tin->old_flows)) { list_move_tail(&flow->flowchain, &tin->old_flows); } else { list_del_init(&flow->flowchain); flow->tin = NULL; } goto begin; } flow->deficit -= skb->len; tin->tx_bytes += skb->len; tin->tx_packets++; return skb; } static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) { u32 hash = skb_get_hash(skb); return reciprocal_scale(hash, fq->flows_cnt); } static struct fq_flow *fq_flow_classify(struct fq *fq, struct fq_tin *tin, u32 idx, struct sk_buff *skb, fq_flow_get_default_t get_default_func) { struct fq_flow *flow; lockdep_assert_held(&fq->lock); flow = &fq->flows[idx]; if (flow->tin && flow->tin != tin) { flow = get_default_func(fq, tin, idx, skb); tin->collisions++; fq->collisions++; } if (!flow->tin) tin->flows++; return flow; } static void fq_recalc_backlog(struct fq *fq, struct fq_tin *tin, struct fq_flow *flow) { struct fq_flow *i; if (list_empty(&flow->backlogchain)) list_add_tail(&flow->backlogchain, &fq->backlogs); i = flow; list_for_each_entry_continue_reverse(i, &fq->backlogs, backlogchain) if (i->backlog > flow->backlog) break; list_move(&flow->backlogchain, &i->backlogchain); } static void fq_tin_enqueue(struct fq *fq, struct fq_tin *tin, u32 idx, struct sk_buff *skb, fq_skb_free_t free_func, fq_flow_get_default_t get_default_func) { struct fq_flow *flow; bool oom; lockdep_assert_held(&fq->lock); flow = fq_flow_classify(fq, tin, idx, skb, get_default_func); flow->tin = tin; flow->backlog += skb->len; tin->backlog_bytes += skb->len; tin->backlog_packets++; fq->memory_usage += skb->truesize; fq->backlog++; fq_recalc_backlog(fq, tin, flow); if (list_empty(&flow->flowchain)) { flow->deficit = fq->quantum; list_add_tail(&flow->flowchain, &tin->new_flows); } __skb_queue_tail(&flow->queue, skb); oom = (fq->memory_usage > fq->memory_limit); while (fq->backlog > fq->limit || oom) { flow = list_first_entry_or_null(&fq->backlogs, struct fq_flow, backlogchain); if (!flow) return; skb = fq_flow_dequeue(fq, flow); if (!skb) return; free_func(fq, flow->tin, flow, skb); flow->tin->overlimit++; fq->overlimit++; if (oom) { fq->overmemory++; oom = (fq->memory_usage > fq->memory_limit); } } } static void fq_flow_filter(struct fq *fq, struct fq_flow *flow, fq_skb_filter_t filter_func, void *filter_data, fq_skb_free_t free_func) { struct fq_tin *tin = flow->tin; struct sk_buff *skb, *tmp; lockdep_assert_held(&fq->lock); skb_queue_walk_safe(&flow->queue, skb, tmp) { if (!filter_func(fq, tin, flow, skb, filter_data)) continue; __skb_unlink(skb, &flow->queue); fq_adjust_removal(fq, flow, skb); free_func(fq, tin, flow, skb); } fq_rejigger_backlog(fq, flow); } static void fq_tin_filter(struct fq *fq, struct fq_tin *tin, fq_skb_filter_t filter_func, void *filter_data, fq_skb_free_t free_func) { struct fq_flow *flow; lockdep_assert_held(&fq->lock); list_for_each_entry(flow, &tin->new_flows, flowchain) fq_flow_filter(fq, flow, filter_func, filter_data, free_func); list_for_each_entry(flow, &tin->old_flows, flowchain) fq_flow_filter(fq, flow, filter_func, filter_data, free_func); } static void fq_flow_reset(struct fq *fq, struct fq_flow *flow, fq_skb_free_t free_func) { struct sk_buff *skb; while ((skb = fq_flow_dequeue(fq, flow))) free_func(fq, flow->tin, flow, skb); if (!list_empty(&flow->flowchain)) list_del_init(&flow->flowchain); if (!list_empty(&flow->backlogchain)) list_del_init(&flow->backlogchain); flow->tin = NULL; WARN_ON_ONCE(flow->backlog); } static void fq_tin_reset(struct fq *fq, struct fq_tin *tin, fq_skb_free_t free_func) { struct list_head *head; struct fq_flow *flow; for (;;) { head = &tin->new_flows; if (list_empty(head)) { head = &tin->old_flows; if (list_empty(head)) break; } flow = list_first_entry(head, struct fq_flow, flowchain); fq_flow_reset(fq, flow, free_func); } WARN_ON_ONCE(tin->backlog_bytes); WARN_ON_ONCE(tin->backlog_packets); } static void fq_flow_init(struct fq_flow *flow) { INIT_LIST_HEAD(&flow->flowchain); INIT_LIST_HEAD(&flow->backlogchain); __skb_queue_head_init(&flow->queue); } static void fq_tin_init(struct fq_tin *tin) { INIT_LIST_HEAD(&tin->new_flows); INIT_LIST_HEAD(&tin->old_flows); } static int fq_init(struct fq *fq, int flows_cnt) { int i; memset(fq, 0, sizeof(fq[0])); INIT_LIST_HEAD(&fq->backlogs); spin_lock_init(&fq->lock); fq->flows_cnt = max_t(u32, flows_cnt, 1); fq->quantum = 300; fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); if (!fq->flows) return -ENOMEM; for (i = 0; i < fq->flows_cnt; i++) fq_flow_init(&fq->flows[i]); return 0; } static void fq_reset(struct fq *fq, fq_skb_free_t free_func) { int i; for (i = 0; i < fq->flows_cnt; i++) fq_flow_reset(fq, &fq->flows[i], free_func); kvfree(fq->flows); fq->flows = NULL; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Internal procfs definitions * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/refcount.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/binfmts.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> struct ctl_table_header; struct mempolicy; /* * This is not completely implemented yet. The idea is to * create an in-memory tree (like the actual /proc filesystem * tree) of these proc_dir_entries, so that we can dynamically * add new files to /proc. * * parent/subdir are used for the directory structure (every /proc file has a * parent, but "subdir" is empty for all non-directory entries). * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { /* * number of callers into module in progress; * negative -> it's going away RSN */ atomic_t in_use; refcount_t refcnt; struct list_head pde_openers; /* who did ->open, but not ->release */ /* protects ->pde_openers and all struct pde_opener instances */ spinlock_t pde_unload_lock; struct completion *pde_unload_completion; const struct inode_operations *proc_iops; union { const struct proc_ops *proc_ops; const struct file_operations *proc_dir_ops; }; const struct dentry_operations *proc_dops; union { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); }; proc_write_t write; void *data; unsigned int state_size; unsigned int low_ino; nlink_t nlink; kuid_t uid; kgid_t gid; loff_t size; struct proc_dir_entry *parent; struct rb_root subdir; struct rb_node subdir_node; char *name; umode_t mode; u8 flags; u8 namelen; char inline_name[]; } __randomize_layout; #define SIZEOF_PDE ( \ sizeof(struct proc_dir_entry) < 128 ? 128 : \ sizeof(struct proc_dir_entry) < 192 ? 192 : \ sizeof(struct proc_dir_entry) < 256 ? 256 : \ sizeof(struct proc_dir_entry) < 512 ? 512 : \ 0) #define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry)) static inline bool pde_is_permanent(const struct proc_dir_entry *pde) { return pde->flags & PROC_ENTRY_PERMANENT; } extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); union proc_op { int (*proc_get_link)(struct dentry *, struct path *); int (*proc_show)(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); const char *lsm; }; struct proc_inode { struct pid *pid; unsigned int fd; union proc_op op; struct proc_dir_entry *pde; struct ctl_table_header *sysctl; struct ctl_table *sysctl_entry; struct hlist_node sibling_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; } __randomize_layout; /* * General functions */ static inline struct proc_inode *PROC_I(const struct inode *inode) { return container_of(inode, struct proc_inode, vfs_inode); } static inline struct proc_dir_entry *PDE(const struct inode *inode) { return PROC_I(inode)->pde; } static inline void *__PDE_DATA(const struct inode *inode) { return PDE(inode)->data; } static inline struct pid *proc_pid(const struct inode *inode) { return PROC_I(inode)->pid; } static inline struct task_struct *get_proc_task(const struct inode *inode) { return get_pid_task(proc_pid(inode), PIDTYPE_PID); } void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t *ruid, kgid_t *rgid); unsigned name_to_int(const struct qstr *qstr); /* * Offset of the first process in the /proc root directory.. */ #define FIRST_PROCESS_ENTRY 256 /* Worst case buffer size needed for holding an integer. */ #define PROC_NUMBUF 13 /* * array.c */ extern const struct file_operations proc_tid_children_operations; extern void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape); extern int proc_tid_stat(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_pid_status(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_pid_statm(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); /* * base.c */ extern const struct dentry_operations pid_dentry_operations; extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int proc_setattr(struct dentry *, struct iattr *); extern void proc_pid_evict_inode(struct proc_inode *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); extern void pid_update_inode(struct task_struct *, struct inode *); extern int pid_delete_dentry(const struct dentry *); extern int proc_pid_readdir(struct file *, struct dir_context *); struct dentry *proc_pid_lookup(struct dentry *, unsigned int); extern loff_t mem_lseek(struct file *, loff_t, int); /* Lookups */ typedef struct dentry *instantiate_t(struct dentry *, struct task_struct *, const void *); bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int, instantiate_t, struct task_struct *, const void *); /* * generic.c */ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, struct proc_dir_entry **parent, void *data); struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, struct proc_dir_entry *dp); extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *); static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) { refcount_inc(&pde->refcnt); return pde; } extern void pde_put(struct proc_dir_entry *); static inline bool is_empty_pde(const struct proc_dir_entry *pde) { return S_ISDIR(pde->mode) && !pde->proc_iops; } extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, loff_t *); /* * inode.c */ struct pde_opener { struct list_head lh; struct file *file; bool closing; struct completion *c; } __randomize_layout; extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; extern const struct super_operations proc_sops; void proc_init_kmemcache(void); void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern void proc_entry_rundown(struct proc_dir_entry *); /* * proc_namespaces.c */ extern const struct inode_operations proc_ns_dir_inode_operations; extern const struct file_operations proc_ns_dir_operations; /* * proc_net.c */ extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; #ifdef CONFIG_NET extern int proc_net_init(void); #else static inline int proc_net_init(void) { return 0; } #endif /* * proc_self.c */ extern int proc_setup_self(struct super_block *); /* * proc_thread_self.c */ extern int proc_setup_thread_self(struct super_block *); extern void proc_thread_self_init(void); /* * proc_sysctl.c */ #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); extern void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head); #else static inline void proc_sys_init(void) { } static inline void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) { } #endif /* * proc_tty.c */ #ifdef CONFIG_TTY extern void proc_tty_init(void); #else static inline void proc_tty_init(void) {} #endif /* * root.c */ extern struct proc_dir_entry proc_root; extern void proc_self_init(void); /* * task_[no]mmu.c */ struct mem_size_stats; struct proc_maps_private { struct inode *inode; struct task_struct *task; struct mm_struct *mm; #ifdef CONFIG_MMU struct vm_area_struct *tail_vma; #endif #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif } __randomize_layout; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); extern const struct file_operations proc_pid_maps_operations; extern const struct file_operations proc_pid_numa_maps_operations; extern const struct file_operations proc_pid_smaps_operations; extern const struct file_operations proc_pid_smaps_rollup_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern unsigned long task_vsize(struct mm_struct *); extern unsigned long task_statm(struct mm_struct *, unsigned long *, unsigned long *, unsigned long *, unsigned long *); extern void task_mem(struct seq_file *, struct mm_struct *); extern const struct dentry_operations proc_net_dentry_ops; static inline void pde_force_lookup(struct proc_dir_entry *pde) { /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ pde->proc_dops = &proc_net_dentry_ops; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2018 Christoph Hellwig. * * DMA operations that map physical memory directly without using an IOMMU. */ #ifndef _KERNEL_DMA_DIRECT_H #define _KERNEL_DMA_DIRECT_H #include <linux/dma-direct.h> int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); bool dma_direct_can_mmap(struct device *dev); int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); size_t dma_direct_max_mapping_size(struct device *dev); #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_SWIOTLB) void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir); #else static inline void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { } #endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ defined(CONFIG_SWIOTLB) void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); void dma_direct_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir); #else static inline void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { } static inline void dma_direct_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { } #endif static inline void dma_direct_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { phys_addr_t paddr = dma_to_phys(dev, addr); if (unlikely(is_swiotlb_buffer(paddr))) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, size, dir); } static inline void dma_direct_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { phys_addr_t paddr = dma_to_phys(dev, addr); if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_cpu(paddr, size, dir); arch_sync_dma_for_cpu_all(); } if (unlikely(is_swiotlb_buffer(paddr))) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, size); } static inline dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t phys = page_to_phys(page) + offset; dma_addr_t dma_addr = phys_to_dma(dev, phys); if (unlikely(swiotlb_force == SWIOTLB_FORCE)) return swiotlb_map(dev, phys, size, dir, attrs); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { if (swiotlb_force != SWIOTLB_NO_FORCE) return swiotlb_map(dev, phys, size, dir, attrs); dev_WARN_ONCE(dev, 1, "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); return DMA_MAPPING_ERROR; } if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) arch_sync_dma_for_device(phys, size, dir); return dma_addr; } static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t phys = dma_to_phys(dev, addr); if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); if (unlikely(is_swiotlb_buffer(phys))) swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); } #endif /* _KERNEL_DMA_DIRECT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_PGTABLE_INVERT_H #define _ASM_PGTABLE_INVERT_H 1 #ifndef __ASSEMBLY__ /* * A clear pte value is special, and doesn't get inverted. * * Note that even users that only pass a pgprot_t (rather * than a full pte) won't trigger the special zero case, * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED * set. So the all zero case really is limited to just the * cleared page table entry case. */ static inline bool __pte_needs_invert(u64 val) { return val && !(val & _PAGE_PRESENT); } /* Get a mask to xor with the page table entry to get the correct pfn. */ static inline u64 protnone_mask(u64 val) { return __pte_needs_invert(val) ? ~0ull : 0; } static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) { /* * When a PTE transitions from NONE to !NONE or vice-versa * invert the PFN part to stop speculation. * pte_pfn undoes this when needed. */ if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) val = (val & ~mask) | (~val & mask); return val; } #endif /* __ASSEMBLY__ */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_INTERNAL_H #define BLK_INTERNAL_H #include <linux/idr.h> #include <linux/blk-mq.h> #include <linux/part_stat.h> #include <linux/blk-crypto.h> #include <xen/xen.h> #include "blk-crypto-internal.h" #include "blk-mq.h" #include "blk-mq-sched.h" /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) extern struct dentry *blk_debugfs_root; struct blk_flush_queue { unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; blk_status_t rq_status; unsigned long flush_pending_since; struct list_head flush_queue[2]; struct list_head flush_data_in_flight; struct request *flush_rq; struct lock_class_key key; spinlock_t mq_flush_lock; }; extern struct kmem_cache *blk_requestq_cachep; extern struct kobj_type blk_queue_ktype; extern struct ida blk_queue_ida; static inline struct blk_flush_queue * blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx) { return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq; } static inline void __blk_get_queue(struct request_queue *q) { kobject_get(&q->kobj); } bool is_flush_rq(struct request *req); struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); void blk_freeze_queue(struct request_queue *q); static inline bool biovec_phys_mergeable(struct request_queue *q, struct bio_vec *vec1, struct bio_vec *vec2) { unsigned long mask = queue_segment_boundary(q); phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset; phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset; if (addr1 + vec1->bv_len != addr2) return false; if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page)) return false; if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) return false; return true; } static inline bool __bvec_gap_to_prev(struct request_queue *q, struct bio_vec *bprv, unsigned int offset) { return (offset & queue_virt_boundary(q)) || ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); } /* * Check if adding a bio_vec after bprv with offset would create a gap in * the SG list. Most drivers don't care about this, but some do. */ static inline bool bvec_gap_to_prev(struct request_queue *q, struct bio_vec *bprv, unsigned int offset) { if (!queue_virt_boundary(q)) return false; return __bvec_gap_to_prev(q, bprv, offset); } static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, unsigned int nr_segs) { rq->nr_phys_segments = nr_segs; rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; rq->ioprio = bio_prio(bio); if (bio->bi_disk) rq->rq_disk = bio->bi_disk; } #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); bool __bio_integrity_endio(struct bio *); void bio_integrity_free(struct bio *bio); static inline bool bio_integrity_endio(struct bio *bio) { if (bio_integrity(bio)) return __bio_integrity_endio(bio); return true; } bool blk_integrity_merge_rq(struct request_queue *, struct request *, struct request *); bool blk_integrity_merge_bio(struct request_queue *, struct request *, struct bio *); static inline bool integrity_req_gap_back_merge(struct request *req, struct bio *next) { struct bio_integrity_payload *bip = bio_integrity(req->bio); struct bio_integrity_payload *bip_next = bio_integrity(next); return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], bip_next->bip_vec[0].bv_offset); } static inline bool integrity_req_gap_front_merge(struct request *req, struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_integrity_payload *bip_next = bio_integrity(req->bio); return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], bip_next->bip_vec[0].bv_offset); } void blk_integrity_add(struct gendisk *); void blk_integrity_del(struct gendisk *); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline bool blk_integrity_merge_rq(struct request_queue *rq, struct request *r1, struct request *r2) { return true; } static inline bool blk_integrity_merge_bio(struct request_queue *rq, struct request *r, struct bio *b) { return true; } static inline bool integrity_req_gap_back_merge(struct request *req, struct bio *next) { return false; } static inline bool integrity_req_gap_front_merge(struct request *req, struct bio *bio) { return false; } static inline void blk_flush_integrity(void) { } static inline bool bio_integrity_endio(struct bio *bio) { return true; } static inline void bio_integrity_free(struct bio *bio) { } static inline void blk_integrity_add(struct gendisk *disk) { } static inline void blk_integrity_del(struct gendisk *disk) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs, struct request **same_queue_rq); bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio, unsigned int nr_segs); void blk_account_io_start(struct request *req); void blk_account_io_done(struct request *req, u64 now); /* * Internal elevator interface */ #define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED) void blk_insert_flush(struct request *rq); void elevator_init_mq(struct request_queue *q); int elevator_switch_mq(struct request_queue *q, struct elevator_type *new_e); void __elevator_exit(struct request_queue *, struct elevator_queue *); int elv_register_queue(struct request_queue *q, bool uevent); void elv_unregister_queue(struct request_queue *q); static inline void elevator_exit(struct request_queue *q, struct elevator_queue *e) { lockdep_assert_held(&q->sysfs_lock); blk_mq_sched_free_requests(q); __elevator_exit(q, e); } struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); ssize_t part_timeout_store(struct device *, struct device_attribute *, const char *, size_t); void __blk_queue_split(struct bio **bio, unsigned int *nr_segs); int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs); int blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next); unsigned int blk_recalc_rq_segments(struct request *rq); void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); int blk_dev_init(void); /* * Contribute to IO statistics IFF: * * a) it's attached to a gendisk, and * b) the queue had IO stats enabled when this request was started */ static inline bool blk_do_io_stat(struct request *rq) { return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT); } static inline void req_set_nomerge(struct request_queue *q, struct request *req) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; } /* * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size * is defined as 'unsigned int', meantime it has to aligned to with logical * block size which is the minimum accepted unit by hardware. */ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q) { return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9; } /* * The max bio size which is aligned to q->limits.discard_granularity. This * is a hint to split large discard bio in generic block layer, then if device * driver needs to split the discard bio into smaller ones, their bi_size can * be very probably and easily aligned to discard_granularity of the device's * queue. */ static inline unsigned int bio_aligned_discard_max_sectors( struct request_queue *q) { return round_down(UINT_MAX, q->limits.discard_granularity) >> SECTOR_SHIFT; } /* * Internal io_context interface */ void get_io_context(struct io_context *ioc); struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q); struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, gfp_t gfp_mask); void ioc_clear_queue(struct request_queue *q); int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); /* * Internal throttling interface */ #ifdef CONFIG_BLK_DEV_THROTTLING extern int blk_throtl_init(struct request_queue *q); extern void blk_throtl_exit(struct request_queue *q); extern void blk_throtl_register_queue(struct request_queue *q); extern void blk_throtl_charge_bio_split(struct bio *bio); bool blk_throtl_bio(struct bio *bio); #else /* CONFIG_BLK_DEV_THROTTLING */ static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline void blk_throtl_exit(struct request_queue *q) { } static inline void blk_throtl_register_queue(struct request_queue *q) { } static inline void blk_throtl_charge_bio_split(struct bio *bio) { } static inline bool blk_throtl_bio(struct bio *bio) { return false; } #endif /* CONFIG_BLK_DEV_THROTTLING */ #ifdef CONFIG_BLK_DEV_THROTTLING_LOW extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page); extern ssize_t blk_throtl_sample_time_store(struct request_queue *q, const char *page, size_t count); extern void blk_throtl_bio_endio(struct bio *bio); extern void blk_throtl_stat_add(struct request *rq, u64 time); #else static inline void blk_throtl_bio_endio(struct bio *bio) { } static inline void blk_throtl_stat_add(struct request *rq, u64 time) { } #endif #ifdef CONFIG_BOUNCE extern int init_emergency_isa_pool(void); extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); #else static inline int init_emergency_isa_pool(void) { return 0; } static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) { } #endif /* CONFIG_BOUNCE */ #ifdef CONFIG_BLK_CGROUP_IOLATENCY extern int blk_iolatency_init(struct request_queue *q); #else static inline int blk_iolatency_init(struct request_queue *q) { return 0; } #endif struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp); #ifdef CONFIG_BLK_DEV_ZONED void blk_queue_free_zone_bitmaps(struct request_queue *q); #else static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} #endif struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); int blk_alloc_devt(struct hd_struct *part, dev_t *devt); void blk_free_devt(dev_t devt); void blk_invalidate_devt(dev_t devt); char *disk_name(struct gendisk *hd, int partno, char *buf); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 void delete_partition(struct hd_struct *part); int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int bdev_del_partition(struct block_device *bdev, int partno); int bdev_resize_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int disk_expand_part_tbl(struct gendisk *disk, int target); int hd_ref_init(struct hd_struct *part); /* no need to get/put refcount of part0 */ static inline int hd_struct_try_get(struct hd_struct *part) { if (part->partno) return percpu_ref_tryget_live(&part->ref); return 1; } static inline void hd_struct_put(struct hd_struct *part) { if (part->partno) percpu_ref_put(&part->ref); } static inline void hd_free_part(struct hd_struct *part) { free_percpu(part->dkstats); kfree(part->info); percpu_ref_exit(&part->ref); } /* * Any access of part->nr_sects which is not protected by partition * bd_mutex or gendisk bdev bd_mutex, should be done using this * accessor function. * * Code written along the lines of i_size_read() and i_size_write(). * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption * on. */ static inline sector_t part_nr_sects_read(struct hd_struct *part) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) sector_t nr_sects; unsigned seq; do { seq = read_seqcount_begin(&part->nr_sects_seq); nr_sects = part->nr_sects; } while (read_seqcount_retry(&part->nr_sects_seq, seq)); return nr_sects; #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) sector_t nr_sects; preempt_disable(); nr_sects = part->nr_sects; preempt_enable(); return nr_sects; #else return part->nr_sects; #endif } /* * Should be called with mutex lock held (typically bd_mutex) of partition * to provide mutual exlusion among writers otherwise seqcount might be * left in wrong state leaving the readers spinning infinitely. */ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) preempt_disable(); write_seqcount_begin(&part->nr_sects_seq); part->nr_sects = size; write_seqcount_end(&part->nr_sects_seq); preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); part->nr_sects = size; preempt_enable(); #else part->nr_sects = size; #endif } int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, unsigned int max_sectors, bool *same_page); #endif /* BLK_INTERNAL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_MROUTE_H #define __LINUX_MROUTE_H #include <linux/in.h> #include <linux/pim.h> #include <net/fib_rules.h> #include <net/fib_notifier.h> #include <uapi/linux/mroute.h> #include <linux/mroute_base.h> #include <linux/sockptr.h> #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) { return opt >= MRT_BASE && opt <= MRT_MAX; } int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int); int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); int ip_mr_init(void); bool ipmr_rule_default(const struct fib_rule *rule); #else static inline int ip_mroute_setsockopt(struct sock *sock, int optname, sockptr_t optval, unsigned int optlen) { return -ENOPROTOOPT; } static inline int ip_mroute_getsockopt(struct sock *sock, int optname, char __user *optval, int __user *optlen) { return -ENOPROTOOPT; } static inline int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { return -ENOIOCTLCMD; } static inline int ip_mr_init(void) { return 0; } static inline int ip_mroute_opt(int opt) { return 0; } static inline bool ipmr_rule_default(const struct fib_rule *rule) { return true; } #endif #define VIFF_STATIC 0x8000 struct mfc_cache_cmp_arg { __be32 mfc_mcastgrp; __be32 mfc_origin; }; /** * struct mfc_cache - multicast routing entries * @_c: Common multicast routing information; has to be first [for casting] * @mfc_mcastgrp: destination multicast group address * @mfc_origin: source address * @cmparg: used for rhashtable comparisons */ struct mfc_cache { struct mr_mfc _c; union { struct { __be32 mfc_mcastgrp; __be32 mfc_origin; }; struct mfc_cache_cmp_arg cmparg; }; }; struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 /* SPDX-License-Identifier: GPL-2.0 */ /* * Traceprobe fetch helper inlines */ static nokprobe_inline void fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf) { switch (code->size) { case 1: *(u8 *)buf = (u8)val; break; case 2: *(u16 *)buf = (u16)val; break; case 4: *(u32 *)buf = (u32)val; break; case 8: //TBD: 32bit signed *(u64 *)buf = (u64)val; break; default: *(unsigned long *)buf = val; } } static nokprobe_inline void fetch_apply_bitfield(struct fetch_insn *code, void *buf) { switch (code->basesize) { case 1: *(u8 *)buf <<= code->lshift; *(u8 *)buf >>= code->rshift; break; case 2: *(u16 *)buf <<= code->lshift; *(u16 *)buf >>= code->rshift; break; case 4: *(u32 *)buf <<= code->lshift; *(u32 *)buf >>= code->rshift; break; case 8: *(u64 *)buf <<= code->lshift; *(u64 *)buf >>= code->rshift; break; } } /* * These functions must be defined for each callsite. * Return consumed dynamic data size (>= 0), or error (< 0). * If dest is NULL, don't store result and return required dynamic data size. */ static int process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, void *base); static nokprobe_inline int fetch_store_strlen(unsigned long addr); static nokprobe_inline int fetch_store_string(unsigned long addr, void *dest, void *base); static nokprobe_inline int fetch_store_strlen_user(unsigned long addr); static nokprobe_inline int fetch_store_string_user(unsigned long addr, void *dest, void *base); static nokprobe_inline int probe_mem_read(void *dest, void *src, size_t size); static nokprobe_inline int probe_mem_read_user(void *dest, void *src, size_t size); /* From the 2nd stage, routine is same */ static nokprobe_inline int process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, void *dest, void *base) { struct fetch_insn *s3 = NULL; int total = 0, ret = 0, i = 0; u32 loc = 0; unsigned long lval = val; stage2: /* 2nd stage: dereference memory if needed */ do { if (code->op == FETCH_OP_DEREF) { lval = val; ret = probe_mem_read(&val, (void *)val + code->offset, sizeof(val)); } else if (code->op == FETCH_OP_UDEREF) { lval = val; ret = probe_mem_read_user(&val, (void *)val + code->offset, sizeof(val)); } else break; if (ret) return ret; code++; } while (1); s3 = code; stage3: /* 3rd stage: store value to buffer */ if (unlikely(!dest)) { if (code->op == FETCH_OP_ST_STRING) { ret = fetch_store_strlen(val + code->offset); code++; goto array; } else if (code->op == FETCH_OP_ST_USTRING) { ret += fetch_store_strlen_user(val + code->offset); code++; goto array; } else return -EILSEQ; } switch (code->op) { case FETCH_OP_ST_RAW: fetch_store_raw(val, code, dest); break; case FETCH_OP_ST_MEM: probe_mem_read(dest, (void *)val + code->offset, code->size); break; case FETCH_OP_ST_UMEM: probe_mem_read_user(dest, (void *)val + code->offset, code->size); break; case FETCH_OP_ST_STRING: loc = *(u32 *)dest; ret = fetch_store_string(val + code->offset, dest, base); break; case FETCH_OP_ST_USTRING: loc = *(u32 *)dest; ret = fetch_store_string_user(val + code->offset, dest, base); break; default: return -EILSEQ; } code++; /* 4th stage: modify stored value if needed */ if (code->op == FETCH_OP_MOD_BF) { fetch_apply_bitfield(code, dest); code++; } array: /* the last stage: Loop on array */ if (code->op == FETCH_OP_LP_ARRAY) { total += ret; if (++i < code->param) { code = s3; if (s3->op != FETCH_OP_ST_STRING && s3->op != FETCH_OP_ST_USTRING) { dest += s3->size; val += s3->size; goto stage3; } code--; val = lval + sizeof(char *); if (dest) { dest += sizeof(u32); *(u32 *)dest = update_data_loc(loc, ret); } goto stage2; } code++; ret = total; } return code->op == FETCH_OP_END ? ret : -EILSEQ; } /* Sum up total data length for dynamic arraies (strings) */ static nokprobe_inline int __get_data_size(struct trace_probe *tp, struct pt_regs *regs) { struct probe_arg *arg; int i, len, ret = 0; for (i = 0; i < tp->nr_args; i++) { arg = tp->args + i; if (unlikely(arg->dynamic)) { len = process_fetch_insn(arg->code, regs, NULL, NULL); if (len > 0) ret += len; } } return ret; } /* Store the value of each argument */ static nokprobe_inline void store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs, int header_size, int maxlen) { struct probe_arg *arg; void *base = data - header_size; void *dyndata = data + tp->size; u32 *dl; /* Data location */ int ret, i; for (i = 0; i < tp->nr_args; i++) { arg = tp->args + i; dl = data + arg->offset; /* Point the dynamic data area if needed */ if (unlikely(arg->dynamic)) *dl = make_data_loc(maxlen, dyndata - base); ret = process_fetch_insn(arg->code, regs, dl, base); if (unlikely(ret < 0 && arg->dynamic)) { *dl = make_data_loc(0, dyndata - base); } else { dyndata += ret; maxlen -= ret; } } } static inline int print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args, u8 *data, void *field) { void *p; int i, j; for (i = 0; i < nr_args; i++) { struct probe_arg *a = args + i; trace_seq_printf(s, " %s=", a->name); if (likely(!a->count)) { if (!a->type->print(s, data + a->offset, field)) return -ENOMEM; continue; } trace_seq_putc(s, '{'); p = data + a->offset; for (j = 0; j < a->count; j++) { if (!a->type->print(s, p, field)) return -ENOMEM; trace_seq_putc(s, j == a->count - 1 ? '}' : ','); p += a->type->size; } } return 0; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * NET Generic infrastructure for INET connection oriented protocols. * * Definitions for inet_connection_sock * * Authors: Many people, see the TCP sources * * From code originally in TCP */ #ifndef _INET_CONNECTION_SOCK_H #define _INET_CONNECTION_SOCK_H #include <linux/compiler.h> #include <linux/string.h> #include <linux/timer.h> #include <linux/poll.h> #include <linux/kernel.h> #include <linux/sockptr.h> #include <net/inet_sock.h> #include <net/request_sock.h> /* Cancel timers, when they are not required. */ #undef INET_CSK_CLEAR_TIMERS struct inet_bind_bucket; struct tcp_congestion_ops; /* * Pointers to address related TCP functions * (i.e. things that depend on the address family) */ struct inet_connection_sock_af_ops { int (*queue_xmit)(struct sock *sk, struct sk_buff *skb, struct flowi *fl); void (*send_check)(struct sock *sk, struct sk_buff *skb); int (*rebuild_header)(struct sock *sk); void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb); int (*conn_request)(struct sock *sk, struct sk_buff *skb); struct sock *(*syn_recv_sock)(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req); u16 net_header_len; u16 net_frag_header_len; u16 sockaddr_len; int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); void (*mtu_reduced)(struct sock *sk); }; /** inet_connection_sock - INET connection oriented sock * * @icsk_accept_queue: FIFO of established children * @icsk_bind_hash: Bind node * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout * @icsk_pmtu_cookie Last pmtu seen by socket * @icsk_ca_ops Pluggable congestion control hook * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data * @icsk_clean_acked Clean acked data hook * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event * @icsk_backoff: Backoff * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries * @icsk_probes_out: unanswered 0 window probes * @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options) * @icsk_ack: Delayed ACK control data * @icsk_mtup; MTU probing control data * @icsk_probes_tstamp: Probe timestamp (cleared by non-zero window ack) * @icsk_user_timeout: TCP_USER_TIMEOUT value */ struct inet_connection_sock { /* inet_sock has to be the first member! */ struct inet_sock icsk_inet; struct request_sock_queue icsk_accept_queue; struct inet_bind_bucket *icsk_bind_hash; unsigned long icsk_timeout; struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; __u32 icsk_rto_min; __u32 icsk_delack_max; __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void __rcu *icsk_ulp_data; void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq); struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:5, icsk_ca_initialized:1, icsk_ca_setsockopt:1, icsk_ca_dst_locked:1; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; __u8 icsk_probes_out; __u16 icsk_ext_hdr_len; struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ __u8 pingpong; /* The session is interactive */ __u8 retry; /* Number of attempts */ __u32 ato; /* Predicted tick of soft clock */ unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; struct { int enabled; /* Range of MTUs to search */ int search_high; int search_low; /* Information on the current probe. */ int probe_size; u32 probe_timestamp; } icsk_mtup; u32 icsk_probes_tstamp; u32 icsk_user_timeout; u64 icsk_ca_priv[104 / sizeof(u64)]; #define ICSK_CA_PRIV_SIZE (13 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_DACK 2 /* Delayed ack timer */ #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ #define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ #define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ #define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { return (struct inet_connection_sock *)sk; } static inline void *inet_csk_ca(const struct sock *sk) { return (void *)inet_csk(sk)->icsk_ca_priv; } struct sock *inet_csk_clone_lock(const struct sock *sk, const struct request_sock *req, const gfp_t priority); enum inet_csk_ack_state_t { ICSK_ACK_SCHED = 1, ICSK_ACK_TIMER = 2, ICSK_ACK_PUSHED = 4, ICSK_ACK_PUSHED2 = 8, ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ }; void inet_csk_init_xmit_timers(struct sock *sk, void (*retransmit_handler)(struct timer_list *), void (*delack_handler)(struct timer_list *), void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) { inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; } static inline int inet_csk_ack_scheduled(const struct sock *sk) { return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; } static inline void inet_csk_delack_init(struct sock *sk) { memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); } void inet_csk_delete_keepalive_timer(struct sock *sk); void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { struct inet_connection_sock *icsk = inet_csk(sk); if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { icsk->icsk_pending = 0; #ifdef INET_CSK_CLEAR_TIMERS sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif } else if (what == ICSK_TIME_DACK) { icsk->icsk_ack.pending = 0; icsk->icsk_ack.retry = 0; #ifdef INET_CSK_CLEAR_TIMERS sk_stop_timer(sk, &icsk->icsk_delack_timer); #endif } else { pr_debug("inet_csk BUG: unknown timer value\n"); } } /* * Reset the retransmission timer */ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, unsigned long when, const unsigned long max_when) { struct inet_connection_sock *icsk = inet_csk(sk); if (when > max_when) { pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, (void *)_THIS_IP_); when = max_when; } if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); } else if (what == ICSK_TIME_DACK) { icsk->icsk_ack.pending |= ICSK_ACK_TIMER; icsk->icsk_ack.timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); } else { pr_debug("inet_csk BUG: unknown timer value\n"); } } static inline unsigned long inet_csk_rto_backoff(const struct inet_connection_sock *icsk, unsigned long max_when) { u64 when = (u64)icsk->icsk_rto << icsk->icsk_backoff; return (unsigned long)min_t(u64, when, max_when); } struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern); int inet_csk_get_port(struct sock *sk, unsigned short snum); struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, const struct request_sock *req); struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *newsk, const struct request_sock *req); struct sock *inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, struct sock *child); void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, struct request_sock *req, bool own_req); static inline void inet_csk_reqsk_queue_added(struct sock *sk) { reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue); } static inline int inet_csk_reqsk_queue_len(const struct sock *sk) { return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); } static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog; } bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) { /* The below has to be done to allow calling inet_csk_destroy_sock */ sock_set_flag(sk, SOCK_DEAD); percpu_counter_inc(sk->sk_prot->orphan_count); } void inet_csk_destroy_sock(struct sock *sk); void inet_csk_prepare_forced_close(struct sock *sk); /* * LISTEN is a special case for poll.. */ static inline __poll_t inet_csk_listen_poll(const struct sock *sk) { return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? (EPOLLIN | EPOLLRDNORM) : 0; } int inet_csk_listen_start(struct sock *sk, int backlog); void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); /* update the fast reuse flag when adding a socket */ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct sock *sk); struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #define TCP_PINGPONG_THRESH 3 static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH; } static inline void inet_csk_exit_pingpong_mode(struct sock *sk) { inet_csk(sk)->icsk_ack.pingpong = 0; } static inline bool inet_csk_in_pingpong_mode(struct sock *sk) { return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; } static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ack.pingpong < U8_MAX) icsk->icsk_ack.pingpong++; } static inline bool inet_csk_has_ulp(struct sock *sk) { return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; } #endif /* _INET_CONNECTION_SOCK_H */
1 2 3 4 5 6 7 8 9 10 11 /* SPDX-License-Identifier: GPL-2.0 */ #include <asm/processor.h> static inline int phys_addr_valid(resource_size_t addr) { #ifdef CONFIG_PHYS_ADDR_T_64BIT return !(addr >> boot_cpu_data.x86_phys_bits); #else return 1; #endif }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 // SPDX-License-Identifier: GPL-2.0-only /* * This implements the various checks for CONFIG_HARDENED_USERCOPY*, * which are designed to protect kernel memory from needless exposure * and overwrite under many unintended conditions. This code is based * on PAX_USERCOPY, which is: * * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source * Security Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mm.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/thread_info.h> #include <linux/atomic.h> #include <linux/jump_label.h> #include <asm/sections.h> /* * Checks if a given pointer and length is contained by the current * stack frame (if possible). * * Returns: * NOT_STACK: not at all on the stack * GOOD_FRAME: fully within a valid stack frame * GOOD_STACK: fully on the stack (when can't do frame-checking) * BAD_STACK: error condition (invalid stack position or bad stack frame) */ static noinline int check_stack_object(const void *obj, unsigned long len) { const void * const stack = task_stack_page(current); const void * const stackend = stack + THREAD_SIZE; int ret; /* Object is not on the stack at all. */ if (obj + len <= stack || stackend <= obj) return NOT_STACK; /* * Reject: object partially overlaps the stack (passing the * check above means at least one end is within the stack, * so if this check fails, the other end is outside the stack). */ if (obj < stack || stackend < obj + len) return BAD_STACK; /* Check if object is safely within a valid frame. */ ret = arch_within_stack_frames(stack, stackend, obj, len); if (ret) return ret; return GOOD_STACK; } /* * If these functions are reached, then CONFIG_HARDENED_USERCOPY has found * an unexpected state during a copy_from_user() or copy_to_user() call. * There are several checks being performed on the buffer by the * __check_object_size() function. Normal stack buffer usage should never * trip the checks, and kernel text addressing will always trip the check. * For cache objects, it is checking that only the whitelisted range of * bytes for a given cache is being accessed (via the cache's usersize and * useroffset fields). To adjust a cache whitelist, use the usercopy-aware * kmem_cache_create_usercopy() function to create the cache (and * carefully audit the whitelist range). */ void usercopy_warn(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len) { WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", to_user ? "exposure" : "overwrite", to_user ? "from" : "to", name ? : "unknown?!", detail ? " '" : "", detail ? : "", detail ? "'" : "", offset, len); } void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len) { pr_emerg("Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", to_user ? "exposure" : "overwrite", to_user ? "from" : "to", name ? : "unknown?!", detail ? " '" : "", detail ? : "", detail ? "'" : "", offset, len); /* * For greater effect, it would be nice to do do_group_exit(), * but BUG() actually hooks all the lock-breaking and per-arch * Oops code, so that is used here instead. */ BUG(); } /* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */ static bool overlaps(const unsigned long ptr, unsigned long n, unsigned long low, unsigned long high) { const unsigned long check_low = ptr; unsigned long check_high = check_low + n; /* Does not overlap if entirely above or entirely below. */ if (check_low >= high || check_high <= low) return false; return true; } /* Is this address range in the kernel text area? */ static inline void check_kernel_text_object(const unsigned long ptr, unsigned long n, bool to_user) { unsigned long textlow = (unsigned long)_stext; unsigned long texthigh = (unsigned long)_etext; unsigned long textlow_linear, texthigh_linear; if (overlaps(ptr, n, textlow, texthigh)) usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n); /* * Some architectures have virtual memory mappings with a secondary * mapping of the kernel text, i.e. there is more than one virtual * kernel address that points to the kernel image. It is usually * when there is a separate linear physical memory mapping, in that * __pa() is not just the reverse of __va(). This can be detected * and checked: */ textlow_linear = (unsigned long)lm_alias(textlow); /* No different mapping: we're done. */ if (textlow_linear == textlow) return; /* Check the secondary mapping... */ texthigh_linear = (unsigned long)lm_alias(texthigh); if (overlaps(ptr, n, textlow_linear, texthigh_linear)) usercopy_abort("linear kernel text", NULL, to_user, ptr - textlow_linear, n); } static inline void check_bogus_address(const unsigned long ptr, unsigned long n, bool to_user) { /* Reject if object wraps past end of memory. */ if (ptr + (n - 1) < ptr) usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n); /* Reject if NULL or ZERO-allocation. */ if (ZERO_OR_NULL_PTR(ptr)) usercopy_abort("null address", NULL, to_user, ptr, n); } /* Checks for allocs that are marked in some way as spanning multiple pages. */ static inline void check_page_span(const void *ptr, unsigned long n, struct page *page, bool to_user) { #ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN const void *end = ptr + n - 1; struct page *endpage; bool is_reserved, is_cma; /* * Sometimes the kernel data regions are not marked Reserved (see * check below). And sometimes [_sdata,_edata) does not cover * rodata and/or bss, so check each range explicitly. */ /* Allow reads of kernel rodata region (if not marked as Reserved). */ if (ptr >= (const void *)__start_rodata && end <= (const void *)__end_rodata) { if (!to_user) usercopy_abort("rodata", NULL, to_user, 0, n); return; } /* Allow kernel data region (if not marked as Reserved). */ if (ptr >= (const void *)_sdata && end <= (const void *)_edata) return; /* Allow kernel bss region (if not marked as Reserved). */ if (ptr >= (const void *)__bss_start && end <= (const void *)__bss_stop) return; /* Is the object wholly within one base page? */ if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == ((unsigned long)end & (unsigned long)PAGE_MASK))) return; /* Allow if fully inside the same compound (__GFP_COMP) page. */ endpage = virt_to_head_page(end); if (likely(endpage == page)) return; /* * Reject if range is entirely either Reserved (i.e. special or * device memory), or CMA. Otherwise, reject since the object spans * several independently allocated pages. */ is_reserved = PageReserved(page); is_cma = is_migrate_cma_page(page); if (!is_reserved && !is_cma) usercopy_abort("spans multiple pages", NULL, to_user, 0, n); for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { page = virt_to_head_page(ptr); if (is_reserved && !PageReserved(page)) usercopy_abort("spans Reserved and non-Reserved pages", NULL, to_user, 0, n); if (is_cma && !is_migrate_cma_page(page)) usercopy_abort("spans CMA and non-CMA pages", NULL, to_user, 0, n); } #endif } static inline void check_heap_object(const void *ptr, unsigned long n, bool to_user) { struct page *page; if (!virt_addr_valid(ptr)) return; /* * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the * highmem page or fallback to virt_to_page(). The following * is effectively a highmem-aware virt_to_head_page(). */ page = compound_head(kmap_to_page((void *)ptr)); if (PageSlab(page)) { /* Check slab allocator for flags and size. */ __check_heap_object(ptr, n, page, to_user); } else { /* Verify object does not incorrectly span multiple pages. */ check_page_span(ptr, n, page, to_user); } } static DEFINE_STATIC_KEY_FALSE_RO(bypass_usercopy_checks); /* * Validates that the given object is: * - not bogus address * - fully contained by stack (or stack frame, when available) * - fully within SLAB object (or object whitelist area, when available) * - not in kernel text */ void __check_object_size(const void *ptr, unsigned long n, bool to_user) { if (static_branch_unlikely(&bypass_usercopy_checks)) return; /* Skip all tests if size is zero. */ if (!n) return; /* Check for invalid addresses. */ check_bogus_address((const unsigned long)ptr, n, to_user); /* Check for bad stack object. */ switch (check_stack_object(ptr, n)) { case NOT_STACK: /* Object is not touching the current process stack. */ break; case GOOD_FRAME: case GOOD_STACK: /* * Object is either in the correct frame (when it * is possible to check) or just generally on the * process stack (when frame checking not available). */ return; default: usercopy_abort("process stack", NULL, to_user, 0, n); } /* Check for bad heap object. */ check_heap_object(ptr, n, to_user); /* Check for object in kernel to avoid text exposure. */ check_kernel_text_object((const unsigned long)ptr, n, to_user); } EXPORT_SYMBOL(__check_object_size); static bool enable_checks __initdata = true; static int __init parse_hardened_usercopy(char *str) { return strtobool(str, &enable_checks); } __setup("hardened_usercopy=", parse_hardened_usercopy); static int __init set_hardened_usercopy(void) { if (enable_checks == false) static_branch_enable(&bypass_usercopy_checks); return 1; } late_initcall(set_hardened_usercopy);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Cryptographic API. * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_INTERNAL_H #define _CRYPTO_INTERNAL_H #include <crypto/algapi.h> #include <linux/completion.h> #include <linux/list.h> #include <linux/module.h> #include <linux/notifier.h> #include <linux/numa.h> #include <linux/refcount.h> #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/types.h> struct crypto_instance; struct crypto_template; struct crypto_larval { struct crypto_alg alg; struct crypto_alg *adult; struct completion completion; u32 mask; }; extern struct list_head crypto_alg_list; extern struct rw_semaphore crypto_alg_sem; extern struct blocking_notifier_head crypto_chain; #ifdef CONFIG_PROC_FS void __init crypto_init_proc(void); void __exit crypto_exit_proc(void); #else static inline void crypto_init_proc(void) { } static inline void crypto_exit_proc(void) { } #endif static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg) { return alg->cra_ctxsize; } static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg) { return alg->cra_ctxsize; } struct crypto_alg *crypto_mod_get(struct crypto_alg *alg); struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask); struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask); void crypto_larval_kill(struct crypto_alg *alg); void crypto_alg_tested(const char *name, int err); void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, struct crypto_alg *nalg); void crypto_remove_final(struct list_head *list); void crypto_shoot_alg(struct crypto_alg *alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask); void *crypto_create_tfm_node(struct crypto_alg *alg, const struct crypto_type *frontend, int node); static inline void *crypto_create_tfm(struct crypto_alg *alg, const struct crypto_type *frontend) { return crypto_create_tfm_node(alg, frontend, NUMA_NO_NODE); } struct crypto_alg *crypto_find_alg(const char *alg_name, const struct crypto_type *frontend, u32 type, u32 mask); void *crypto_alloc_tfm_node(const char *alg_name, const struct crypto_type *frontend, u32 type, u32 mask, int node); static inline void *crypto_alloc_tfm(const char *alg_name, const struct crypto_type *frontend, u32 type, u32 mask) { return crypto_alloc_tfm_node(alg_name, frontend, type, mask, NUMA_NO_NODE); } int crypto_probing_notify(unsigned long val, void *v); unsigned int crypto_alg_extsize(struct crypto_alg *alg); int crypto_type_has_alg(const char *name, const struct crypto_type *frontend, u32 type, u32 mask); static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) { refcount_inc(&alg->cra_refcnt); return alg; } static inline void crypto_alg_put(struct crypto_alg *alg) { if (refcount_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy) alg->cra_destroy(alg); } static inline int crypto_tmpl_get(struct crypto_template *tmpl) { return try_module_get(tmpl->module); } static inline void crypto_tmpl_put(struct crypto_template *tmpl) { module_put(tmpl->module); } static inline int crypto_is_larval(struct crypto_alg *alg) { return alg->cra_flags & CRYPTO_ALG_LARVAL; } static inline int crypto_is_dead(struct crypto_alg *alg) { return alg->cra_flags & CRYPTO_ALG_DEAD; } static inline int crypto_is_moribund(struct crypto_alg *alg) { return alg->cra_flags & (CRYPTO_ALG_DEAD | CRYPTO_ALG_DYING); } static inline void crypto_notify(unsigned long val, void *v) { blocking_notifier_call_chain(&crypto_chain, val, v); } static inline void crypto_yield(u32 flags) { if (flags & CRYPTO_TFM_REQ_MAY_SLEEP) cond_resched(); } #endif /* _CRYPTO_INTERNAL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 /* SPDX-License-Identifier: GPL-2.0-only */ /* * pm_runtime.h - Device run-time power management helper functions. * * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl> */ #ifndef _LINUX_PM_RUNTIME_H #define _LINUX_PM_RUNTIME_H #include <linux/device.h> #include <linux/notifier.h> #include <linux/pm.h> #include <linux/jiffies.h> /* Runtime PM flag argument bits */ #define RPM_ASYNC 0x01 /* Request is asynchronous */ #define RPM_NOWAIT 0x02 /* Don't wait for concurrent state change */ #define RPM_GET_PUT 0x04 /* Increment/decrement the usage_count */ #define RPM_AUTO 0x08 /* Use autosuspend_delay */ #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; static inline bool queue_pm_work(struct work_struct *work) { return queue_work(pm_wq, work); } extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); extern int pm_runtime_force_suspend(struct device *dev); extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); extern int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); extern void pm_runtime_allow(struct device *dev); extern void pm_runtime_forbid(struct device *dev); extern void pm_runtime_no_callbacks(struct device *dev); extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern u64 pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. * @dev: Target device. * * Increment the runtime PM usage counter of @dev if its runtime PM status is * %RPM_ACTIVE and its runtime PM usage counter is greater than 0. */ static inline int pm_runtime_get_if_in_use(struct device *dev) { return pm_runtime_get_if_active(dev, false); } /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. * @dev: Target device. * @enable: Whether or not to ignore possible dependencies on children. * * The dependencies of @dev on its children will not be taken into account by * the runtime PM framework going forward if @enable is %true, or they will * be taken into account otherwise. */ static inline void pm_suspend_ignore_children(struct device *dev, bool enable) { dev->power.ignore_children = enable; } /** * pm_runtime_get_noresume - Bump up runtime PM usage counter of a device. * @dev: Target device. */ static inline void pm_runtime_get_noresume(struct device *dev) { atomic_inc(&dev->power.usage_count); } /** * pm_runtime_put_noidle - Drop runtime PM usage counter of a device. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev unless it is 0 already. */ static inline void pm_runtime_put_noidle(struct device *dev) { atomic_add_unless(&dev->power.usage_count, -1, 0); } /** * pm_runtime_suspended - Check whether or not a device is runtime-suspended. * @dev: Target device. * * Return %true if runtime PM is enabled for @dev and its runtime PM status is * %RPM_SUSPENDED, or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev and its runtime PM * status cannot change. */ static inline bool pm_runtime_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED && !dev->power.disable_depth; } /** * pm_runtime_active - Check whether or not a device is runtime-active. * @dev: Target device. * * Return %true if runtime PM is enabled for @dev and its runtime PM status is * %RPM_ACTIVE, or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev and its runtime PM * status cannot change. */ static inline bool pm_runtime_active(struct device *dev) { return dev->power.runtime_status == RPM_ACTIVE || dev->power.disable_depth; } /** * pm_runtime_status_suspended - Check if runtime PM status is "suspended". * @dev: Target device. * * Return %true if the runtime PM status of @dev is %RPM_SUSPENDED, or %false * otherwise, regardless of whether or not runtime PM has been enabled for @dev. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which the * runtime PM status of @dev cannot change. */ static inline bool pm_runtime_status_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED; } /** * pm_runtime_enabled - Check if runtime PM is enabled. * @dev: Target device. * * Return %true if runtime PM is enabled for @dev or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev. */ static inline bool pm_runtime_enabled(struct device *dev) { return !dev->power.disable_depth; } /** * pm_runtime_has_no_callbacks - Check if runtime PM callbacks may be present. * @dev: Target device. * * Return %true if @dev is a special device without runtime PM callbacks or * %false otherwise. */ static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return dev->power.no_callbacks; } /** * pm_runtime_mark_last_busy - Update the last access time of a device. * @dev: Target device. * * Update the last access time of @dev used by the runtime PM autosuspend * mechanism to the current time as returned by ktime_get_mono_fast_ns(). */ static inline void pm_runtime_mark_last_busy(struct device *dev) { WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns()); } /** * pm_runtime_is_irq_safe - Check if runtime PM can work in interrupt context. * @dev: Target device. * * Return %true if @dev has been marked as an "IRQ-safe" device (with respect * to runtime PM), in which case its runtime PM callabcks can be expected to * work correctly when invoked from interrupt handlers. */ static inline bool pm_runtime_is_irq_safe(struct device *dev) { return dev->power.irq_safe; } extern u64 pm_runtime_suspended_time(struct device *dev); #else /* !CONFIG_PM */ static inline bool queue_pm_work(struct work_struct *work) { return false; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } static inline int pm_runtime_force_suspend(struct device *dev) { return 0; } static inline int pm_runtime_force_resume(struct device *dev) { return 0; } static inline int __pm_runtime_idle(struct device *dev, int rpmflags) { return -ENOSYS; } static inline int __pm_runtime_suspend(struct device *dev, int rpmflags) { return -ENOSYS; } static inline int __pm_runtime_resume(struct device *dev, int rpmflags) { return 1; } static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) { return -ENOSYS; } static inline int pm_runtime_get_if_in_use(struct device *dev) { return -EINVAL; } static inline int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count) { return -EINVAL; } static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } static inline void pm_runtime_enable(struct device *dev) {} static inline void __pm_runtime_disable(struct device *dev, bool c) {} static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } static inline bool pm_runtime_enabled(struct device *dev) { return false; } static inline void pm_runtime_no_callbacks(struct device *dev) {} static inline void pm_runtime_irq_safe(struct device *dev) {} static inline bool pm_runtime_is_irq_safe(struct device *dev) { return false; } static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return false; } static inline void pm_runtime_mark_last_busy(struct device *dev) {} static inline void __pm_runtime_use_autosuspend(struct device *dev, bool use) {} static inline void pm_runtime_set_autosuspend_delay(struct device *dev, int delay) {} static inline u64 pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} #endif /* !CONFIG_PM */ /** * pm_runtime_idle - Conditionally set up autosuspend of a device or suspend it. * @dev: Target device. * * Invoke the "idle check" callback of @dev and, depending on its return value, * set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend has been enabled for it). */ static inline int pm_runtime_idle(struct device *dev) { return __pm_runtime_idle(dev, 0); } /** * pm_runtime_suspend - Suspend a device synchronously. * @dev: Target device. */ static inline int pm_runtime_suspend(struct device *dev) { return __pm_runtime_suspend(dev, 0); } /** * pm_runtime_autosuspend - Set up autosuspend of a device or suspend it. * @dev: Target device. * * Set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend is enabled for it) without engaging its "idle check" callback. */ static inline int pm_runtime_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_AUTO); } /** * pm_runtime_resume - Resume a device synchronously. * @dev: Target device. */ static inline int pm_runtime_resume(struct device *dev) { return __pm_runtime_resume(dev, 0); } /** * pm_request_idle - Queue up "idle check" execution for a device. * @dev: Target device. * * Queue up a work item to run an equivalent of pm_runtime_idle() for @dev * asynchronously. */ static inline int pm_request_idle(struct device *dev) { return __pm_runtime_idle(dev, RPM_ASYNC); } /** * pm_request_resume - Queue up runtime-resume of a device. * @dev: Target device. */ static inline int pm_request_resume(struct device *dev) { return __pm_runtime_resume(dev, RPM_ASYNC); } /** * pm_request_autosuspend - Queue up autosuspend of a device. * @dev: Target device. * * Queue up a work item to run an equivalent pm_runtime_autosuspend() for @dev * asynchronously. */ static inline int pm_request_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO); } /** * pm_runtime_get - Bump up usage counter and queue up resume of a device. * @dev: Target device. * * Bump up the runtime PM usage counter of @dev and queue up a work item to * carry out runtime-resume of it. */ static inline int pm_runtime_get(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC); } /** * pm_runtime_get_sync - Bump up usage counter of a device and resume it. * @dev: Target device. * * Bump up the runtime PM usage counter of @dev and carry out runtime-resume of * it synchronously. * * The possible return values of this function are the same as for * pm_runtime_resume() and the runtime PM usage counter of @dev remains * incremented in all cases, even if it returns an error code. */ static inline int pm_runtime_get_sync(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT); } /** * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. * @dev: Target device. * * Resume @dev synchronously and if that is successful, increment its runtime * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been * incremented or a negative error code otherwise. */ static inline int pm_runtime_resume_and_get(struct device *dev) { int ret; ret = __pm_runtime_resume(dev, RPM_GET_PUT); if (ret < 0) { pm_runtime_put_noidle(dev); return ret; } return 0; } /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_idle(). */ static inline int pm_runtime_put(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } /** * pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). */ static inline int pm_runtime_put_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); } /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, invoke the "idle check" callback of @dev and, depending on its * return value, set up autosuspend of @dev or suspend it (depending on whether * or not autosuspend has been enabled for it). * * The possible return values of this function are the same as for * pm_runtime_idle() and the runtime PM usage counter of @dev remains * decremented in all cases, even if it returns an error code. */ static inline int pm_runtime_put_sync(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT); } /** * pm_runtime_put_sync_suspend - Drop device usage counter and suspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, carry out runtime-suspend of @dev synchronously. * * The possible return values of this function are the same as for * pm_runtime_suspend() and the runtime PM usage counter of @dev remains * decremented in all cases, even if it returns an error code. */ static inline int pm_runtime_put_sync_suspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT); } /** * pm_runtime_put_sync_autosuspend - Drop device usage counter and autosuspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, set up autosuspend of @dev or suspend it synchronously (depending * on whether or not autosuspend has been enabled for it). * * The possible return values of this function are the same as for * pm_runtime_autosuspend() and the runtime PM usage counter of @dev remains * decremented in all cases, even if it returns an error code. */ static inline int pm_runtime_put_sync_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO); } /** * pm_runtime_set_active - Set runtime PM status to "active". * @dev: Target device. * * Set the runtime PM status of @dev to %RPM_ACTIVE and ensure that dependencies * of it will be taken into account. * * It is not valid to call this function for devices with runtime PM enabled. */ static inline int pm_runtime_set_active(struct device *dev) { return __pm_runtime_set_status(dev, RPM_ACTIVE); } /** * pm_runtime_set_suspended - Set runtime PM status to "suspended". * @dev: Target device. * * Set the runtime PM status of @dev to %RPM_SUSPENDED and ensure that * dependencies of it will be taken into account. * * It is not valid to call this function for devices with runtime PM enabled. */ static inline int pm_runtime_set_suspended(struct device *dev) { return __pm_runtime_set_status(dev, RPM_SUSPENDED); } /** * pm_runtime_disable - Disable runtime PM for a device. * @dev: Target device. * * Prevent the runtime PM framework from working with @dev (by incrementing its * "blocking" counter). * * For each invocation of this function for @dev there must be a matching * pm_runtime_enable() call in order for runtime PM to be enabled for it. */ static inline void pm_runtime_disable(struct device *dev) { __pm_runtime_disable(dev, true); } /** * pm_runtime_use_autosuspend - Allow autosuspend to be used for a device. * @dev: Target device. * * Allow the runtime PM autosuspend mechanism to be used for @dev whenever * requested (or "autosuspend" will be handled as direct runtime-suspend for * it). */ static inline void pm_runtime_use_autosuspend(struct device *dev) { __pm_runtime_use_autosuspend(dev, true); } /** * pm_runtime_dont_use_autosuspend - Prevent autosuspend from being used. * @dev: Target device. * * Prevent the runtime PM autosuspend mechanism from being used for @dev which * means that "autosuspend" will be handled as direct runtime-suspend for it * going forward. */ static inline void pm_runtime_dont_use_autosuspend(struct device *dev) { __pm_runtime_use_autosuspend(dev, false); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __MAC802154_DRIVER_OPS #define __MAC802154_DRIVER_OPS #include <linux/types.h> #include <linux/rtnetlink.h> #include <net/mac802154.h> #include "ieee802154_i.h" #include "trace.h" static inline int drv_xmit_async(struct ieee802154_local *local, struct sk_buff *skb) { return local->ops->xmit_async(&local->hw, skb); } static inline int drv_xmit_sync(struct ieee802154_local *local, struct sk_buff *skb) { might_sleep(); return local->ops->xmit_sync(&local->hw, skb); } static inline int drv_start(struct ieee802154_local *local) { int ret; might_sleep(); trace_802154_drv_start(local); local->started = true; smp_mb(); ret = local->ops->start(&local->hw); trace_802154_drv_return_int(local, ret); return ret; } static inline void drv_stop(struct ieee802154_local *local) { might_sleep(); trace_802154_drv_stop(local); local->ops->stop(&local->hw); trace_802154_drv_return_void(local); /* sync away all work on the tasklet before clearing started */ tasklet_disable(&local->tasklet); tasklet_enable(&local->tasklet); barrier(); local->started = false; } static inline int drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel) { int ret; might_sleep(); trace_802154_drv_set_channel(local, page, channel); ret = local->ops->set_channel(&local->hw, page, channel); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_tx_power(struct ieee802154_local *local, s32 mbm) { int ret; might_sleep(); if (!local->ops->set_txpower) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_tx_power(local, mbm); ret = local->ops->set_txpower(&local->hw, mbm); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_cca_mode(struct ieee802154_local *local, const struct wpan_phy_cca *cca) { int ret; might_sleep(); if (!local->ops->set_cca_mode) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_cca_mode(local, cca); ret = local->ops->set_cca_mode(&local->hw, cca); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) { int ret; might_sleep(); if (!local->ops->set_lbt) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_lbt_mode(local, mode); ret = local->ops->set_lbt(&local->hw, mode); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_cca_ed_level(struct ieee802154_local *local, s32 mbm) { int ret; might_sleep(); if (!local->ops->set_cca_ed_level) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_cca_ed_level(local, mbm); ret = local->ops->set_cca_ed_level(&local->hw, mbm); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id) { struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } filt.pan_id = pan_id; trace_802154_drv_set_pan_id(local, pan_id); ret = local->ops->set_hw_addr_filt(&local->hw, &filt, IEEE802154_AFILT_PANID_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr) { struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } filt.ieee_addr = extended_addr; trace_802154_drv_set_extended_addr(local, extended_addr); ret = local->ops->set_hw_addr_filt(&local->hw, &filt, IEEE802154_AFILT_IEEEADDR_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr) { struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } filt.short_addr = short_addr; trace_802154_drv_set_short_addr(local, short_addr); ret = local->ops->set_hw_addr_filt(&local->hw, &filt, IEEE802154_AFILT_SADDR_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_pan_coord(struct ieee802154_local *local, bool is_coord) { struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } filt.pan_coord = is_coord; trace_802154_drv_set_pan_coord(local, is_coord); ret = local->ops->set_hw_addr_filt(&local->hw, &filt, IEEE802154_AFILT_PANC_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_csma_params(struct ieee802154_local *local, u8 min_be, u8 max_be, u8 max_csma_backoffs) { int ret; might_sleep(); if (!local->ops->set_csma_params) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_csma_params(local, min_be, max_be, max_csma_backoffs); ret = local->ops->set_csma_params(&local->hw, min_be, max_be, max_csma_backoffs); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_max_frame_retries(struct ieee802154_local *local, s8 max_frame_retries) { int ret; might_sleep(); if (!local->ops->set_frame_retries) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_max_frame_retries(local, max_frame_retries); ret = local->ops->set_frame_retries(&local->hw, max_frame_retries); trace_802154_drv_return_int(local, ret); return ret; } static inline int drv_set_promiscuous_mode(struct ieee802154_local *local, bool on) { int ret; might_sleep(); if (!local->ops->set_promiscuous_mode) { WARN_ON(1); return -EOPNOTSUPP; } trace_802154_drv_set_promiscuous_mode(local, on); ret = local->ops->set_promiscuous_mode(&local->hw, on); trace_802154_drv_return_int(local, ret); return ret; } #endif /* __MAC802154_DRIVER_OPS */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 /* SPDX-License-Identifier: GPL-2.0 */ /* * memory buffer pool support */ #ifndef _LINUX_MEMPOOL_H #define _LINUX_MEMPOOL_H #include <linux/wait.h> #include <linux/compiler.h> struct kmem_cache; typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data); typedef void (mempool_free_t)(void *element, void *pool_data); typedef struct mempool_s { spinlock_t lock; int min_nr; /* nr of elements at *elements */ int curr_nr; /* Current nr of elements at *elements */ void **elements; void *pool_data; mempool_alloc_t *alloc; mempool_free_t *free; wait_queue_head_t wait; } mempool_t; static inline bool mempool_initialized(mempool_t *pool) { return pool->elements != NULL; } void mempool_exit(mempool_t *pool); int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id); int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data); extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data); extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int nid); extern int mempool_resize(mempool_t *pool, int new_min_nr); extern void mempool_destroy(mempool_t *pool); extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc; extern void mempool_free(void *element, mempool_t *pool); /* * A mempool_alloc_t and mempool_free_t that get the memory from * a slab cache that is passed in through pool_data. * Note: the slab cache may not have a ctor function. */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); static inline int mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc) { return mempool_init(pool, min_nr, mempool_alloc_slab, mempool_free_slab, (void *) kc); } static inline mempool_t * mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) { return mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab, (void *) kc); } /* * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the * amount of memory specified by pool_data */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); void mempool_kfree(void *element, void *pool_data); static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size) { return mempool_init(pool, min_nr, mempool_kmalloc, mempool_kfree, (void *) size); } static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) { return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, (void *) size); } /* * A mempool_alloc_t and mempool_free_t for a simple page allocator that * allocates pages of the order specified by pool_data */ void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data); void mempool_free_pages(void *element, void *pool_data); static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order) { return mempool_init(pool, min_nr, mempool_alloc_pages, mempool_free_pages, (void *)(long)order); } static inline mempool_t *mempool_create_page_pool(int min_nr, int order) { return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages, (void *)(long)order); } #endif /* _LINUX_MEMPOOL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _linux_POSIX_TIMERS_H #define _linux_POSIX_TIMERS_H #include <linux/spinlock.h> #include <linux/list.h> #include <linux/alarmtimer.h> #include <linux/timerqueue.h> #include <linux/task_work.h> struct kernel_siginfo; struct task_struct; /* * Bit fields within a clockid: * * The most significant 29 bits hold either a pid or a file descriptor. * * Bit 2 indicates whether a cpu clock refers to a thread or a process. * * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3. * * A clockid is invalid if bits 2, 1, and 0 are all set. */ #define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) #define CPUCLOCK_PERTHREAD(clock) \ (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) #define CPUCLOCK_PERTHREAD_MASK 4 #define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK) #define CPUCLOCK_CLOCK_MASK 3 #define CPUCLOCK_PROF 0 #define CPUCLOCK_VIRT 1 #define CPUCLOCK_SCHED 2 #define CPUCLOCK_MAX 3 #define CLOCKFD CPUCLOCK_MAX #define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) static inline clockid_t make_process_cpuclock(const unsigned int pid, const clockid_t clock) { return ((~pid) << 3) | clock; } static inline clockid_t make_thread_cpuclock(const unsigned int tid, const clockid_t clock) { return make_process_cpuclock(tid, clock | CPUCLOCK_PERTHREAD_MASK); } static inline clockid_t fd_to_clockid(const int fd) { return make_process_cpuclock((unsigned int) fd, CLOCKFD); } static inline int clockid_to_fd(const clockid_t clk) { return ~(clk >> 3); } #ifdef CONFIG_POSIX_TIMERS /** * cpu_timer - Posix CPU timer representation for k_itimer * @node: timerqueue node to queue in the task/sig * @head: timerqueue head on which this timer is queued * @task: Pointer to target task * @elist: List head for the expiry list * @firing: Timer is currently firing */ struct cpu_timer { struct timerqueue_node node; struct timerqueue_head *head; struct pid *pid; struct list_head elist; int firing; }; static inline bool cpu_timer_enqueue(struct timerqueue_head *head, struct cpu_timer *ctmr) { ctmr->head = head; return timerqueue_add(head, &ctmr->node); } static inline void cpu_timer_dequeue(struct cpu_timer *ctmr) { if (ctmr->head) { timerqueue_del(ctmr->head, &ctmr->node); ctmr->head = NULL; } } static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr) { return ctmr->node.expires; } static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp) { ctmr->node.expires = exp; } /** * posix_cputimer_base - Container per posix CPU clock * @nextevt: Earliest-expiration cache * @tqhead: timerqueue head for cpu_timers */ struct posix_cputimer_base { u64 nextevt; struct timerqueue_head tqhead; }; /** * posix_cputimers - Container for posix CPU timer related data * @bases: Base container for posix CPU clocks * @timers_active: Timers are queued. * @expiry_active: Timer expiry is active. Used for * process wide timers to avoid multiple * task trying to handle expiry concurrently * * Used in task_struct and signal_struct */ struct posix_cputimers { struct posix_cputimer_base bases[CPUCLOCK_MAX]; unsigned int timers_active; unsigned int expiry_active; }; /** * posix_cputimers_work - Container for task work based posix CPU timer expiry * @work: The task work to be scheduled * @scheduled: @work has been scheduled already, no further processing */ struct posix_cputimers_work { struct callback_head work; unsigned int scheduled; }; static inline void posix_cputimers_init(struct posix_cputimers *pct) { memset(pct, 0, sizeof(*pct)); pct->bases[0].nextevt = U64_MAX; pct->bases[1].nextevt = U64_MAX; pct->bases[2].nextevt = U64_MAX; } void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit); static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, u64 runtime) { pct->bases[CPUCLOCK_SCHED].nextevt = runtime; } /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ .nextevt = U64_MAX, \ } #define INIT_CPU_TIMERBASES(b) { \ INIT_CPU_TIMERBASE(b[0]), \ INIT_CPU_TIMERBASE(b[1]), \ INIT_CPU_TIMERBASE(b[2]), \ } #define INIT_CPU_TIMERS(s) \ .posix_cputimers = { \ .bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases), \ }, #else struct posix_cputimers { }; struct cpu_timer { }; #define INIT_CPU_TIMERS(s) static inline void posix_cputimers_init(struct posix_cputimers *pct) { } static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK void posix_cputimers_init_work(void); #else static inline void posix_cputimers_init_work(void) { } #endif #define REQUEUE_PENDING 1 /** * struct k_itimer - POSIX.1b interval timer structure. * @list: List head for binding the timer to signals->posix_timers * @t_hash: Entry in the posix timer hash table * @it_lock: Lock protecting the timer * @kclock: Pointer to the k_clock struct handling this timer * @it_clock: The posix timer clock id * @it_id: The posix timer id for identifying the timer * @it_active: Marker that timer is active * @it_overrun: The overrun counter for pending signals * @it_overrun_last: The overrun at the time of the last delivered signal * @it_requeue_pending: Indicator that timer waits for being requeued on * signal delivery * @it_sigev_notify: The notify word of sigevent struct for signal delivery * @it_interval: The interval for periodic timers * @it_signal: Pointer to the creators signal struct * @it_pid: The pid of the process/task targeted by the signal * @it_process: The task to wakeup on clock_nanosleep (CPU timers) * @sigq: Pointer to preallocated sigqueue * @it: Union representing the various posix timer type * internals. * @rcu: RCU head for freeing the timer. */ struct k_itimer { struct list_head list; struct hlist_node t_hash; spinlock_t it_lock; const struct k_clock *kclock; clockid_t it_clock; timer_t it_id; int it_active; s64 it_overrun; s64 it_overrun_last; int it_requeue_pending; int it_sigev_notify; ktime_t it_interval; struct signal_struct *it_signal; union { struct pid *it_pid; struct task_struct *it_process; }; struct sigqueue *sigq; union { struct { struct hrtimer timer; } real; struct cpu_timer cpu; struct { struct alarm alarmtimer; } alarm; } it; struct rcu_head rcu; }; void run_posix_cpu_timers(void); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, u64 *newval, u64 *oldval); void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); void posixtimer_rearm(struct kernel_siginfo *info); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 /* SPDX-License-Identifier: GPL-2.0 */ /* File: linux/posix_acl_xattr.h Extended attribute system call representation of Access Control Lists. Copyright (C) 2000 by Andreas Gruenbacher <a.gruenbacher@computer.org> Copyright (C) 2002 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com> */ #ifndef _POSIX_ACL_XATTR_H #define _POSIX_ACL_XATTR_H #include <uapi/linux/xattr.h> #include <uapi/linux/posix_acl_xattr.h> #include <linux/posix_acl.h> static inline size_t posix_acl_xattr_size(int count) { return (sizeof(struct posix_acl_xattr_header) + (count * sizeof(struct posix_acl_xattr_entry))); } static inline int posix_acl_xattr_count(size_t size) { if (size < sizeof(struct posix_acl_xattr_header)) return -1; size -= sizeof(struct posix_acl_xattr_header); if (size % sizeof(struct posix_acl_xattr_entry)) return -1; return size / sizeof(struct posix_acl_xattr_entry); } #ifdef CONFIG_FS_POSIX_ACL void posix_acl_fix_xattr_from_user(void *value, size_t size); void posix_acl_fix_xattr_to_user(void *value, size_t size); #else static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) { } static inline void posix_acl_fix_xattr_to_user(void *value, size_t size) { } #endif struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, size_t size); int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); extern const struct xattr_handler posix_acl_access_xattr_handler; extern const struct xattr_handler posix_acl_default_xattr_handler; #endif /* _POSIX_ACL_XATTR_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM random #if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RANDOM_H #include <linux/writeback.h> #include <linux/tracepoint.h> TRACE_EVENT(add_device_randomness, TP_PROTO(int bytes, unsigned long IP), TP_ARGS(bytes, IP), TP_STRUCT__entry( __field( int, bytes ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->bytes = bytes; __entry->IP = IP; ), TP_printk("bytes %d caller %pS", __entry->bytes, (void *)__entry->IP) ); DECLARE_EVENT_CLASS(random__mix_pool_bytes, TP_PROTO(const char *pool_name, int bytes, unsigned long IP), TP_ARGS(pool_name, bytes, IP), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, bytes ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->bytes = bytes; __entry->IP = IP; ), TP_printk("%s pool: bytes %d caller %pS", __entry->pool_name, __entry->bytes, (void *)__entry->IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, TP_PROTO(const char *pool_name, int bytes, unsigned long IP), TP_ARGS(pool_name, bytes, IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, TP_PROTO(const char *pool_name, int bytes, unsigned long IP), TP_ARGS(pool_name, bytes, IP) ); TRACE_EVENT(credit_entropy_bits, TP_PROTO(const char *pool_name, int bits, int entropy_count, unsigned long IP), TP_ARGS(pool_name, bits, entropy_count, IP), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, bits ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->bits = bits; __entry->entropy_count = entropy_count; __entry->IP = IP; ), TP_printk("%s pool: bits %d entropy_count %d caller %pS", __entry->pool_name, __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); TRACE_EVENT(push_to_pool, TP_PROTO(const char *pool_name, int pool_bits, int input_bits), TP_ARGS(pool_name, pool_bits, input_bits), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, pool_bits ) __field( int, input_bits ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->pool_bits = pool_bits; __entry->input_bits = input_bits; ), TP_printk("%s: pool_bits %d input_pool_bits %d", __entry->pool_name, __entry->pool_bits, __entry->input_bits) ); TRACE_EVENT(debit_entropy, TP_PROTO(const char *pool_name, int debit_bits), TP_ARGS(pool_name, debit_bits), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, debit_bits ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->debit_bits = debit_bits; ), TP_printk("%s: debit_bits %d", __entry->pool_name, __entry->debit_bits) ); TRACE_EVENT(add_input_randomness, TP_PROTO(int input_bits), TP_ARGS(input_bits), TP_STRUCT__entry( __field( int, input_bits ) ), TP_fast_assign( __entry->input_bits = input_bits; ), TP_printk("input_pool_bits %d", __entry->input_bits) ); TRACE_EVENT(add_disk_randomness, TP_PROTO(dev_t dev, int input_bits), TP_ARGS(dev, input_bits), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, input_bits ) ), TP_fast_assign( __entry->dev = dev; __entry->input_bits = input_bits; ), TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->input_bits) ); TRACE_EVENT(xfer_secondary_pool, TP_PROTO(const char *pool_name, int xfer_bits, int request_bits, int pool_entropy, int input_entropy), TP_ARGS(pool_name, xfer_bits, request_bits, pool_entropy, input_entropy), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, xfer_bits ) __field( int, request_bits ) __field( int, pool_entropy ) __field( int, input_entropy ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->xfer_bits = xfer_bits; __entry->request_bits = request_bits; __entry->pool_entropy = pool_entropy; __entry->input_entropy = input_entropy; ), TP_printk("pool %s xfer_bits %d request_bits %d pool_entropy %d " "input_entropy %d", __entry->pool_name, __entry->xfer_bits, __entry->request_bits, __entry->pool_entropy, __entry->input_entropy) ); DECLARE_EVENT_CLASS(random__get_random_bytes, TP_PROTO(int nbytes, unsigned long IP), TP_ARGS(nbytes, IP), TP_STRUCT__entry( __field( int, nbytes ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->nbytes = nbytes; __entry->IP = IP; ), TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes, TP_PROTO(int nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, TP_PROTO(int nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DECLARE_EVENT_CLASS(random__extract_entropy, TP_PROTO(const char *pool_name, int nbytes, int entropy_count, unsigned long IP), TP_ARGS(pool_name, nbytes, entropy_count, IP), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, nbytes ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->nbytes = nbytes; __entry->entropy_count = entropy_count; __entry->IP = IP; ), TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", __entry->pool_name, __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, TP_PROTO(const char *pool_name, int nbytes, int entropy_count, unsigned long IP), TP_ARGS(pool_name, nbytes, entropy_count, IP) ); DEFINE_EVENT(random__extract_entropy, extract_entropy_user, TP_PROTO(const char *pool_name, int nbytes, int entropy_count, unsigned long IP), TP_ARGS(pool_name, nbytes, entropy_count, IP) ); TRACE_EVENT(random_read, TP_PROTO(int got_bits, int need_bits, int pool_left, int input_left), TP_ARGS(got_bits, need_bits, pool_left, input_left), TP_STRUCT__entry( __field( int, got_bits ) __field( int, need_bits ) __field( int, pool_left ) __field( int, input_left ) ), TP_fast_assign( __entry->got_bits = got_bits; __entry->need_bits = need_bits; __entry->pool_left = pool_left; __entry->input_left = input_left; ), TP_printk("got_bits %d still_needed_bits %d " "blocking_pool_entropy_left %d input_entropy_left %d", __entry->got_bits, __entry->got_bits, __entry->pool_left, __entry->input_left) ); TRACE_EVENT(urandom_read, TP_PROTO(int got_bits, int pool_left, int input_left), TP_ARGS(got_bits, pool_left, input_left), TP_STRUCT__entry( __field( int, got_bits ) __field( int, pool_left ) __field( int, input_left ) ), TP_fast_assign( __entry->got_bits = got_bits; __entry->pool_left = pool_left; __entry->input_left = input_left; ), TP_printk("got_bits %d nonblocking_pool_entropy_left %d " "input_entropy_left %d", __entry->got_bits, __entry->pool_left, __entry->input_left) ); TRACE_EVENT(prandom_u32, TP_PROTO(unsigned int ret), TP_ARGS(ret), TP_STRUCT__entry( __field( unsigned int, ret) ), TP_fast_assign( __entry->ret = ret; ), TP_printk("ret=%u" , __entry->ret) ); #endif /* _TRACE_RANDOM_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_ATOMIC_H #define _ASM_X86_ATOMIC_H #include <linux/compiler.h> #include <linux/types.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> #include <asm/rmwcc.h> #include <asm/barrier.h> /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. */ /** * arch_atomic_read - read atomic variable * @v: pointer of type atomic_t * * Atomically reads the value of @v. */ static __always_inline int arch_atomic_read(const atomic_t *v) { /* * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, * it's non-inlined function that increases binary size and stack usage. */ return __READ_ONCE((v)->counter); } /** * arch_atomic_set - set atomic variable * @v: pointer of type atomic_t * @i: required value * * Atomically sets the value of @v to @i. */ static __always_inline void arch_atomic_set(atomic_t *v, int i) { __WRITE_ONCE(v->counter, i); } /** * arch_atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v. */ static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } /** * arch_atomic_sub - subtract integer from atomic variable * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v. */ static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } /** * arch_atomic_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); } #define arch_atomic_sub_and_test arch_atomic_sub_and_test /** * arch_atomic_inc - increment atomic variable * @v: pointer of type atomic_t * * Atomically increments @v by 1. */ static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_inc arch_atomic_inc /** * arch_atomic_dec - decrement atomic variable * @v: pointer of type atomic_t * * Atomically decrements @v by 1. */ static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_dec arch_atomic_dec /** * arch_atomic_dec_and_test - decrement and test * @v: pointer of type atomic_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); } #define arch_atomic_dec_and_test arch_atomic_dec_and_test /** * arch_atomic_inc_and_test - increment and test * @v: pointer of type atomic_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); } #define arch_atomic_inc_and_test arch_atomic_inc_and_test /** * arch_atomic_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); } #define arch_atomic_add_negative arch_atomic_add_negative /** * arch_atomic_add_return - add integer and return * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns @i + @v */ static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } #define arch_atomic_add_return arch_atomic_add_return /** * arch_atomic_sub_return - subtract integer and return * @v: pointer of type atomic_t * @i: integer value to subtract * * Atomically subtracts @i from @v and returns @v - @i */ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { return arch_atomic_add_return(-i, v); } #define arch_atomic_sub_return arch_atomic_sub_return static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return xadd(&v->counter, i); } #define arch_atomic_fetch_add arch_atomic_fetch_add static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) { return xadd(&v->counter, -i); } #define arch_atomic_fetch_sub arch_atomic_fetch_sub static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return arch_cmpxchg(&v->counter, old, new); } #define arch_atomic_cmpxchg arch_atomic_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { return try_cmpxchg(&v->counter, old, new); } #define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg static __always_inline int arch_atomic_xchg(atomic_t *v, int new) { return arch_xchg(&v->counter, new); } #define arch_atomic_xchg arch_atomic_xchg static __always_inline void arch_atomic_and(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "andl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i)); return val; } #define arch_atomic_fetch_and arch_atomic_fetch_and static __always_inline void arch_atomic_or(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "orl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i)); return val; } #define arch_atomic_fetch_or arch_atomic_fetch_or static __always_inline void arch_atomic_xor(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "xorl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i)); return val; } #define arch_atomic_fetch_xor arch_atomic_fetch_xor #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else # include <asm/atomic64_64.h> #endif #define ARCH_ATOMIC #endif /* _ASM_X86_ATOMIC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_SCHED_GENERIC_H #define __NET_SCHED_GENERIC_H #include <linux/netdevice.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/pkt_sched.h> #include <linux/pkt_cls.h> #include <linux/percpu.h> #include <linux/dynamic_queue_limits.h> #include <linux/list.h> #include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/atomic.h> #include <linux/hashtable.h> #include <net/gen_stats.h> #include <net/rtnetlink.h> #include <net/flow_offload.h> struct Qdisc_ops; struct qdisc_walker; struct tcf_walker; struct module; struct bpf_flow_keys; struct qdisc_rate_table { struct tc_ratespec rate; u32 data[256]; struct qdisc_rate_table *next; int refcnt; }; enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, __QDISC_STATE_MISSED, }; struct qdisc_size_table { struct rcu_head rcu; struct list_head list; struct tc_sizespec szopts; int refcnt; u16 data[]; }; /* similar to sk_buff_head, but skb->prev pointer is undefined. */ struct qdisc_skb_head { struct sk_buff *head; struct sk_buff *tail; __u32 qlen; spinlock_t lock; }; struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *sch); unsigned int flags; #define TCQ_F_BUILTIN 1 #define TCQ_F_INGRESS 2 #define TCQ_F_CAN_BYPASS 4 #define TCQ_F_MQROOT 8 #define TCQ_F_ONETXQUEUE 0x10 /* dequeue_skb() can assume all skbs are for * q->dev_queue : It can test * netif_xmit_frozen_or_stopped() before * dequeueing next packet. * Its true for MQ/MQPRIO slaves, or non * multiqueue device. */ #define TCQ_F_WARN_NONWC (1 << 16) #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ #define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : * qdisc_tree_decrease_qlen() should stop. */ #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ #define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; struct hlist_node hash; u32 handle; u32 parent; struct netdev_queue *dev_queue; struct net_rate_estimator __rcu *rate_est; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; int pad; refcount_t refcnt; /* * For performance sake on SMP, we put highly modified fields at the end */ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; struct sk_buff_head skb_bad_txq; spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; /* for NOLOCK qdisc, true if there are no enqueued skbs */ bool empty; struct rcu_head rcu; /* private data */ long privdata[] ____cacheline_aligned; }; static inline void qdisc_refcount_inc(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_BUILTIN) return; refcount_inc(&qdisc->refcnt); } /* Intended to be used by unlocked users, when concurrent qdisc release is * possible. */ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_BUILTIN) return qdisc; if (refcount_inc_not_zero(&qdisc->refcnt)) return qdisc; return NULL; } static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) { return q->flags & TCQ_F_CPUSTATS; } static inline bool qdisc_is_empty(const struct Qdisc *qdisc) { if (qdisc_is_percpu_stats(qdisc)) return READ_ONCE(qdisc->empty); return !READ_ONCE(qdisc->q.qlen); } static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { if (spin_trylock(&qdisc->seqlock)) goto nolock_empty; /* Paired with smp_mb__after_atomic() to make sure * STATE_MISSED checking is synchronized with clearing * in pfifo_fast_dequeue(). */ smp_mb__before_atomic(); /* If the MISSED flag is set, it means other thread has * set the MISSED flag before second spin_trylock(), so * we can return false here to avoid multi cpus doing * the set_bit() and second spin_trylock() concurrently. */ if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) return false; /* Set the MISSED flag before the second spin_trylock(), * if the second spin_trylock() return false, it means * other cpu holding the lock will do dequeuing for us * or it will see the MISSED flag set after releasing * lock and reschedule the net_tx_action() to do the * dequeuing. */ set_bit(__QDISC_STATE_MISSED, &qdisc->state); /* spin_trylock() only has load-acquire semantic, so use * smp_mb__after_atomic() to ensure STATE_MISSED is set * before doing the second spin_trylock(). */ smp_mb__after_atomic(); /* Retry again in case other CPU may not see the new flag * after it releases the lock at the end of qdisc_run_end(). */ if (!spin_trylock(&qdisc->seqlock)) return false; nolock_empty: WRITE_ONCE(qdisc->empty, false); } else if (qdisc_is_running(qdisc)) { return false; } /* Variant of write_seqcount_begin() telling lockdep a trylock * was attempted. */ raw_write_seqcount_begin(&qdisc->running); seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); return true; } static inline void qdisc_run_end(struct Qdisc *qdisc) { write_seqcount_end(&qdisc->running); if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) { clear_bit(__QDISC_STATE_MISSED, &qdisc->state); __netif_schedule(qdisc); } } } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) { return qdisc->flags & TCQ_F_ONETXQUEUE; } static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) { #ifdef CONFIG_BQL /* Non-BQL migrated drivers will return 0, too. */ return dql_avail(&txq->dql); #else return 0; #endif } struct Qdisc_class_ops { unsigned int flags; /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); int (*graft)(struct Qdisc *, unsigned long cl,